diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,170703 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 20.601336302895323, + "eval_steps": 250, + "global_step": 9250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0022271714922048997, + "grad_norm": 48.25035095214844, + "learning_rate": 1e-06, + "loss": 4.3999, + "num_input_tokens_seen": 55440, + "step": 1 + }, + { + "epoch": 0.0022271714922048997, + "loss": 4.068600654602051, + "loss_ce": 1.3654756546020508, + "loss_iou": 1.1171875, + "loss_num": 0.09423828125, + "loss_xval": 2.703125, + "num_input_tokens_seen": 55440, + "step": 1 + }, + { + "epoch": 0.004454342984409799, + "grad_norm": 130.20863342285156, + "learning_rate": 1e-06, + "loss": 3.4252, + "num_input_tokens_seen": 111516, + "step": 2 + }, + { + "epoch": 0.004454342984409799, + "loss": 3.1070704460144043, + "loss_ce": 0.6607813835144043, + "loss_iou": 1.0390625, + "loss_num": 0.072265625, + "loss_xval": 2.453125, + "num_input_tokens_seen": 111516, + "step": 2 + }, + { + "epoch": 0.0066815144766146995, + "grad_norm": 219.82708740234375, + "learning_rate": 1e-06, + "loss": 4.3295, + "num_input_tokens_seen": 167164, + "step": 3 + }, + { + "epoch": 0.0066815144766146995, + "loss": 4.420684814453125, + "loss_ce": 1.4587706327438354, + "loss_iou": 1.2109375, + "loss_num": 0.10888671875, + "loss_xval": 2.96875, + "num_input_tokens_seen": 167164, + "step": 3 + }, + { + "epoch": 0.008908685968819599, + "grad_norm": 42.13670349121094, + "learning_rate": 1e-06, + "loss": 4.1998, + "num_input_tokens_seen": 221656, + "step": 4 + }, + { + "epoch": 0.008908685968819599, + "loss": 4.317120552062988, + "loss_ce": 1.9479798078536987, + "loss_iou": 0.91015625, + "loss_num": 0.10986328125, + "loss_xval": 2.375, + "num_input_tokens_seen": 221656, + "step": 4 + }, + { + "epoch": 0.011135857461024499, + "grad_norm": 45.899261474609375, + "learning_rate": 1e-06, + "loss": 3.8544, + "num_input_tokens_seen": 279772, + "step": 5 + }, + { + "epoch": 0.011135857461024499, + "eval_seeclick_web_CIoU": 0.627178281545639, + "eval_seeclick_web_GIoU": 0.6259045600891113, + "eval_seeclick_web_IoU": 0.6463199555873871, + "eval_seeclick_web_MAE_all": 0.015169001650065184, + "eval_seeclick_web_MAE_h": 0.008642073255032301, + "eval_seeclick_web_MAE_w": 0.0126046072691679, + "eval_seeclick_web_MAE_x_boxes": 0.00952292513102293, + "eval_seeclick_web_MAE_y_boxes": 0.022719496861100197, + "eval_seeclick_web_inside_bbox": 0.8732638955116272, + "eval_seeclick_web_loss": 0.8245152831077576, + "eval_seeclick_web_loss_ce": 0.00025572007871232927, + "eval_seeclick_web_loss_iou": 0.3670654296875, + "eval_seeclick_web_loss_num": 0.011660575866699219, + "eval_seeclick_web_loss_xval": 0.7923583984375, + "eval_seeclick_web_runtime": 17.4436, + "eval_seeclick_web_samples_per_second": 2.866, + "eval_seeclick_web_steps_per_second": 0.115, + "num_input_tokens_seen": 279772, + "step": 5 + }, + { + "epoch": 0.011135857461024499, + "eval_icons_CIoU": 0.4010816812515259, + "eval_icons_GIoU": 0.4324014186859131, + "eval_icons_IoU": 0.49571336805820465, + "eval_icons_MAE_all": 0.05109180323779583, + "eval_icons_MAE_h": 0.020274315029382706, + "eval_icons_MAE_w": 0.07078993320465088, + "eval_icons_MAE_x_boxes": 0.05979071371257305, + "eval_icons_MAE_y_boxes": 0.02338168118149042, + "eval_icons_inside_bbox": 0.7361111044883728, + "eval_icons_loss": 1.3772772550582886, + "eval_icons_loss_ce": 9.112022598856129e-05, + "eval_icons_loss_iou": 0.539794921875, + "eval_icons_loss_num": 0.04870033264160156, + "eval_icons_loss_xval": 1.323486328125, + "eval_icons_runtime": 16.5509, + "eval_icons_samples_per_second": 3.021, + "eval_icons_steps_per_second": 0.121, + "num_input_tokens_seen": 279772, + "step": 5 + }, + { + "epoch": 0.011135857461024499, + "eval_screenspot_CIoU": 0.10782323777675629, + "eval_screenspot_GIoU": 0.12048953274885814, + "eval_screenspot_IoU": 0.26688433190186817, + "eval_screenspot_MAE_all": 0.1258778969446818, + "eval_screenspot_MAE_h": 0.07123650113741557, + "eval_screenspot_MAE_w": 0.16419320305188498, + "eval_screenspot_MAE_x_boxes": 0.1336704045534134, + "eval_screenspot_MAE_y_boxes": 0.13144449392954508, + "eval_screenspot_inside_bbox": 0.451666663090388, + "eval_screenspot_loss": 2.3829233646392822, + "eval_screenspot_loss_ce": 0.0014819869732794662, + "eval_screenspot_loss_iou": 0.888671875, + "eval_screenspot_loss_num": 0.1301116943359375, + "eval_screenspot_loss_xval": 2.427734375, + "eval_screenspot_runtime": 26.6936, + "eval_screenspot_samples_per_second": 3.334, + "eval_screenspot_steps_per_second": 0.112, + "num_input_tokens_seen": 279772, + "step": 5 + }, + { + "epoch": 0.011135857461024499, + "eval_compot_CIoU": 0.3970271050930023, + "eval_compot_GIoU": 0.4374036639928818, + "eval_compot_IoU": 0.45621901750564575, + "eval_compot_MAE_all": 0.01914477813988924, + "eval_compot_MAE_h": 0.005190690280869603, + "eval_compot_MAE_w": 0.027808972634375095, + "eval_compot_MAE_x_boxes": 0.03283216618001461, + "eval_compot_MAE_y_boxes": 0.0045464420691132545, + "eval_compot_inside_bbox": 0.6458333432674408, + "eval_compot_loss": 1.301793098449707, + "eval_compot_loss_ce": 0.0011670971289277077, + "eval_compot_loss_iou": 0.60400390625, + "eval_compot_loss_num": 0.018798828125, + "eval_compot_loss_xval": 1.302001953125, + "eval_compot_runtime": 16.8489, + "eval_compot_samples_per_second": 2.968, + "eval_compot_steps_per_second": 0.119, + "num_input_tokens_seen": 279772, + "step": 5 + }, + { + "epoch": 0.011135857461024499, + "eval_custom_ui_val_CIoU": -0.2649508896801207, + "eval_custom_ui_val_GIoU": -0.28727365616295075, + "eval_custom_ui_val_IoU": 0.02925413821099533, + "eval_custom_ui_val_MAE_all": 0.09884808709224065, + "eval_custom_ui_val_MAE_h": 0.0829097247785992, + "eval_custom_ui_val_MAE_w": 0.06122648136483298, + "eval_custom_ui_val_MAE_x_boxes": 0.05147905213137468, + "eval_custom_ui_val_MAE_y_boxes": 0.1948982576529185, + "eval_custom_ui_val_inside_bbox": 0.034722222222222224, + "eval_custom_ui_val_loss": 3.0896787643432617, + "eval_custom_ui_val_loss_ce": 0.0023468418318467834, + "eval_custom_ui_val_loss_iou": 1.2951931423611112, + "eval_custom_ui_val_loss_num": 0.09839375813802083, + "eval_custom_ui_val_loss_xval": 3.082248263888889, + "eval_custom_ui_val_runtime": 54.828, + "eval_custom_ui_val_samples_per_second": 4.833, + "eval_custom_ui_val_steps_per_second": 0.164, + "num_input_tokens_seen": 279772, + "step": 5 + }, + { + "epoch": 0.011135857461024499, + "loss": 2.992961883544922, + "loss_ce": 0.004680817015469074, + "loss_iou": 1.3046875, + "loss_num": 0.07470703125, + "loss_xval": 2.984375, + "num_input_tokens_seen": 279772, + "step": 5 + }, + { + "epoch": 0.013363028953229399, + "grad_norm": 51.13984298706055, + "learning_rate": 1e-06, + "loss": 3.8227, + "num_input_tokens_seen": 336424, + "step": 6 + }, + { + "epoch": 0.013363028953229399, + "loss": 3.419996738433838, + "loss_ce": 1.4341567754745483, + "loss_iou": 0.7578125, + "loss_num": 0.09375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 336424, + "step": 6 + }, + { + "epoch": 0.015590200445434299, + "grad_norm": 43.559844970703125, + "learning_rate": 1e-06, + "loss": 3.5541, + "num_input_tokens_seen": 391856, + "step": 7 + }, + { + "epoch": 0.015590200445434299, + "loss": 3.7645998001098633, + "loss_ce": 0.9286624193191528, + "loss_iou": 1.140625, + "loss_num": 0.1123046875, + "loss_xval": 2.84375, + "num_input_tokens_seen": 391856, + "step": 7 + }, + { + "epoch": 0.017817371937639197, + "grad_norm": 58.651023864746094, + "learning_rate": 1e-06, + "loss": 4.0592, + "num_input_tokens_seen": 446076, + "step": 8 + }, + { + "epoch": 0.017817371937639197, + "loss": 3.763951301574707, + "loss_ce": 1.845982551574707, + "loss_iou": 0.69140625, + "loss_num": 0.107421875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 446076, + "step": 8 + }, + { + "epoch": 0.0200445434298441, + "grad_norm": 46.13543701171875, + "learning_rate": 1e-06, + "loss": 3.4206, + "num_input_tokens_seen": 503600, + "step": 9 + }, + { + "epoch": 0.0200445434298441, + "loss": 3.4812936782836914, + "loss_ce": 1.1902780532836914, + "loss_iou": 0.90234375, + "loss_num": 0.0966796875, + "loss_xval": 2.296875, + "num_input_tokens_seen": 503600, + "step": 9 + }, + { + "epoch": 0.022271714922048998, + "grad_norm": 42.953372955322266, + "learning_rate": 1e-06, + "loss": 4.2745, + "num_input_tokens_seen": 557604, + "step": 10 + }, + { + "epoch": 0.022271714922048998, + "loss": 4.187029838562012, + "loss_ce": 1.5722839832305908, + "loss_iou": 1.0390625, + "loss_num": 0.107421875, + "loss_xval": 2.609375, + "num_input_tokens_seen": 557604, + "step": 10 + }, + { + "epoch": 0.024498886414253896, + "grad_norm": 53.018638610839844, + "learning_rate": 1e-06, + "loss": 4.4355, + "num_input_tokens_seen": 614780, + "step": 11 + }, + { + "epoch": 0.024498886414253896, + "loss": 3.7926254272460938, + "loss_ce": 1.1353989839553833, + "loss_iou": 1.0703125, + "loss_num": 0.103515625, + "loss_xval": 2.65625, + "num_input_tokens_seen": 614780, + "step": 11 + }, + { + "epoch": 0.026726057906458798, + "grad_norm": 52.364288330078125, + "learning_rate": 1e-06, + "loss": 3.4929, + "num_input_tokens_seen": 672264, + "step": 12 + }, + { + "epoch": 0.026726057906458798, + "loss": 3.3297152519226074, + "loss_ce": 0.778934121131897, + "loss_iou": 1.0859375, + "loss_num": 0.0751953125, + "loss_xval": 2.546875, + "num_input_tokens_seen": 672264, + "step": 12 + }, + { + "epoch": 0.028953229398663696, + "grad_norm": 160.2591552734375, + "learning_rate": 1e-06, + "loss": 4.0254, + "num_input_tokens_seen": 728504, + "step": 13 + }, + { + "epoch": 0.028953229398663696, + "loss": 4.575099945068359, + "loss_ce": 1.8211936950683594, + "loss_iou": 1.125, + "loss_num": 0.10107421875, + "loss_xval": 2.75, + "num_input_tokens_seen": 728504, + "step": 13 + }, + { + "epoch": 0.031180400890868598, + "grad_norm": 48.962005615234375, + "learning_rate": 1e-06, + "loss": 3.9505, + "num_input_tokens_seen": 783692, + "step": 14 + }, + { + "epoch": 0.031180400890868598, + "loss": 4.143004417419434, + "loss_ce": 1.8163442611694336, + "loss_iou": 0.94921875, + "loss_num": 0.08642578125, + "loss_xval": 2.328125, + "num_input_tokens_seen": 783692, + "step": 14 + }, + { + "epoch": 0.0334075723830735, + "grad_norm": 39.02128982543945, + "learning_rate": 1e-06, + "loss": 4.0185, + "num_input_tokens_seen": 838168, + "step": 15 + }, + { + "epoch": 0.0334075723830735, + "loss": 4.348506927490234, + "loss_ce": 2.2537801265716553, + "loss_iou": 0.78125, + "loss_num": 0.1064453125, + "loss_xval": 2.09375, + "num_input_tokens_seen": 838168, + "step": 15 + }, + { + "epoch": 0.035634743875278395, + "grad_norm": 35.96154022216797, + "learning_rate": 1e-06, + "loss": 3.6832, + "num_input_tokens_seen": 895276, + "step": 16 + }, + { + "epoch": 0.035634743875278395, + "loss": 3.568532943725586, + "loss_ce": 1.519704818725586, + "loss_iou": 0.83203125, + "loss_num": 0.0771484375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 895276, + "step": 16 + }, + { + "epoch": 0.0378619153674833, + "grad_norm": 49.04829788208008, + "learning_rate": 1e-06, + "loss": 3.519, + "num_input_tokens_seen": 951180, + "step": 17 + }, + { + "epoch": 0.0378619153674833, + "loss": 3.1685400009155273, + "loss_ce": 0.9458837509155273, + "loss_iou": 0.88671875, + "loss_num": 0.0908203125, + "loss_xval": 2.21875, + "num_input_tokens_seen": 951180, + "step": 17 + }, + { + "epoch": 0.0400890868596882, + "grad_norm": 105.61235809326172, + "learning_rate": 1e-06, + "loss": 3.7186, + "num_input_tokens_seen": 1008684, + "step": 18 + }, + { + "epoch": 0.0400890868596882, + "loss": 3.8322296142578125, + "loss_ce": 1.353714108467102, + "loss_iou": 0.96484375, + "loss_num": 0.10986328125, + "loss_xval": 2.484375, + "num_input_tokens_seen": 1008684, + "step": 18 + }, + { + "epoch": 0.042316258351893093, + "grad_norm": 114.26968383789062, + "learning_rate": 1e-06, + "loss": 4.0211, + "num_input_tokens_seen": 1062212, + "step": 19 + }, + { + "epoch": 0.042316258351893093, + "loss": 3.5515711307525635, + "loss_ce": 1.0281336307525635, + "loss_iou": 1.0234375, + "loss_num": 0.09521484375, + "loss_xval": 2.53125, + "num_input_tokens_seen": 1062212, + "step": 19 + }, + { + "epoch": 0.044543429844097995, + "grad_norm": 53.97627258300781, + "learning_rate": 1e-06, + "loss": 4.2972, + "num_input_tokens_seen": 1114576, + "step": 20 + }, + { + "epoch": 0.044543429844097995, + "loss": 4.559757709503174, + "loss_ce": 1.2492105960845947, + "loss_iou": 1.3046875, + "loss_num": 0.138671875, + "loss_xval": 3.3125, + "num_input_tokens_seen": 1114576, + "step": 20 + }, + { + "epoch": 0.0467706013363029, + "grad_norm": 38.33933639526367, + "learning_rate": 1e-06, + "loss": 3.5725, + "num_input_tokens_seen": 1169836, + "step": 21 + }, + { + "epoch": 0.0467706013363029, + "loss": 3.7388784885406494, + "loss_ce": 1.3677849769592285, + "loss_iou": 0.98828125, + "loss_num": 0.0791015625, + "loss_xval": 2.375, + "num_input_tokens_seen": 1169836, + "step": 21 + }, + { + "epoch": 0.04899777282850779, + "grad_norm": 59.448753356933594, + "learning_rate": 1e-06, + "loss": 3.8666, + "num_input_tokens_seen": 1228308, + "step": 22 + }, + { + "epoch": 0.04899777282850779, + "loss": 3.630049705505371, + "loss_ce": 1.1505573987960815, + "loss_iou": 0.9921875, + "loss_num": 0.0986328125, + "loss_xval": 2.484375, + "num_input_tokens_seen": 1228308, + "step": 22 + }, + { + "epoch": 0.051224944320712694, + "grad_norm": 52.553958892822266, + "learning_rate": 1e-06, + "loss": 3.362, + "num_input_tokens_seen": 1287564, + "step": 23 + }, + { + "epoch": 0.051224944320712694, + "loss": 2.8519859313964844, + "loss_ce": 1.0302085876464844, + "loss_iou": 0.75390625, + "loss_num": 0.0625, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 1287564, + "step": 23 + }, + { + "epoch": 0.053452115812917596, + "grad_norm": 49.04064178466797, + "learning_rate": 1e-06, + "loss": 4.1017, + "num_input_tokens_seen": 1343120, + "step": 24 + }, + { + "epoch": 0.053452115812917596, + "loss": 4.231411933898926, + "loss_ce": 1.8066072463989258, + "loss_iou": 0.96875, + "loss_num": 0.09716796875, + "loss_xval": 2.421875, + "num_input_tokens_seen": 1343120, + "step": 24 + }, + { + "epoch": 0.0556792873051225, + "grad_norm": 56.048038482666016, + "learning_rate": 1e-06, + "loss": 4.3803, + "num_input_tokens_seen": 1396700, + "step": 25 + }, + { + "epoch": 0.0556792873051225, + "loss": 4.276466369628906, + "loss_ce": 1.6143567562103271, + "loss_iou": 1.0234375, + "loss_num": 0.1240234375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 1396700, + "step": 25 + }, + { + "epoch": 0.05790645879732739, + "grad_norm": 39.70867919921875, + "learning_rate": 1e-06, + "loss": 3.3416, + "num_input_tokens_seen": 1453876, + "step": 26 + }, + { + "epoch": 0.05790645879732739, + "loss": 3.4961788654327393, + "loss_ce": 1.0899291038513184, + "loss_iou": 1.0234375, + "loss_num": 0.0732421875, + "loss_xval": 2.40625, + "num_input_tokens_seen": 1453876, + "step": 26 + }, + { + "epoch": 0.060133630289532294, + "grad_norm": 60.43865966796875, + "learning_rate": 1e-06, + "loss": 3.5507, + "num_input_tokens_seen": 1509556, + "step": 27 + }, + { + "epoch": 0.060133630289532294, + "loss": 3.679036855697632, + "loss_ce": 1.376302719116211, + "loss_iou": 0.890625, + "loss_num": 0.103515625, + "loss_xval": 2.296875, + "num_input_tokens_seen": 1509556, + "step": 27 + }, + { + "epoch": 0.062360801781737196, + "grad_norm": 33.97861099243164, + "learning_rate": 1e-06, + "loss": 3.5827, + "num_input_tokens_seen": 1566848, + "step": 28 + }, + { + "epoch": 0.062360801781737196, + "loss": 3.9071435928344727, + "loss_ce": 1.2176905870437622, + "loss_iou": 1.0703125, + "loss_num": 0.1103515625, + "loss_xval": 2.6875, + "num_input_tokens_seen": 1566848, + "step": 28 + }, + { + "epoch": 0.0645879732739421, + "grad_norm": 71.39791107177734, + "learning_rate": 1e-06, + "loss": 3.9493, + "num_input_tokens_seen": 1622132, + "step": 29 + }, + { + "epoch": 0.0645879732739421, + "loss": 4.430097579956055, + "loss_ce": 1.3754103183746338, + "loss_iou": 1.203125, + "loss_num": 0.1298828125, + "loss_xval": 3.0625, + "num_input_tokens_seen": 1622132, + "step": 29 + }, + { + "epoch": 0.066815144766147, + "grad_norm": 111.8135757446289, + "learning_rate": 1e-06, + "loss": 3.5729, + "num_input_tokens_seen": 1680504, + "step": 30 + }, + { + "epoch": 0.066815144766147, + "loss": 3.6753127574920654, + "loss_ce": 1.620625376701355, + "loss_iou": 0.76953125, + "loss_num": 0.10302734375, + "loss_xval": 2.0625, + "num_input_tokens_seen": 1680504, + "step": 30 + }, + { + "epoch": 0.06904231625835189, + "grad_norm": 48.25176239013672, + "learning_rate": 1e-06, + "loss": 3.9768, + "num_input_tokens_seen": 1735984, + "step": 31 + }, + { + "epoch": 0.06904231625835189, + "loss": 4.000043869018555, + "loss_ce": 1.2251414060592651, + "loss_iou": 1.1171875, + "loss_num": 0.107421875, + "loss_xval": 2.78125, + "num_input_tokens_seen": 1735984, + "step": 31 + }, + { + "epoch": 0.07126948775055679, + "grad_norm": 105.31439208984375, + "learning_rate": 1e-06, + "loss": 4.4586, + "num_input_tokens_seen": 1791036, + "step": 32 + }, + { + "epoch": 0.07126948775055679, + "loss": 4.315090656280518, + "loss_ce": 1.6012234687805176, + "loss_iou": 1.078125, + "loss_num": 0.1103515625, + "loss_xval": 2.71875, + "num_input_tokens_seen": 1791036, + "step": 32 + }, + { + "epoch": 0.07349665924276169, + "grad_norm": 61.652164459228516, + "learning_rate": 1e-06, + "loss": 4.2882, + "num_input_tokens_seen": 1846348, + "step": 33 + }, + { + "epoch": 0.07349665924276169, + "loss": 3.7140510082244873, + "loss_ce": 1.3263558149337769, + "loss_iou": 0.9375, + "loss_num": 0.1025390625, + "loss_xval": 2.390625, + "num_input_tokens_seen": 1846348, + "step": 33 + }, + { + "epoch": 0.0757238307349666, + "grad_norm": 40.629302978515625, + "learning_rate": 1e-06, + "loss": 3.8681, + "num_input_tokens_seen": 1902912, + "step": 34 + }, + { + "epoch": 0.0757238307349666, + "loss": 3.7667856216430664, + "loss_ce": 1.078309178352356, + "loss_iou": 1.109375, + "loss_num": 0.09375, + "loss_xval": 2.6875, + "num_input_tokens_seen": 1902912, + "step": 34 + }, + { + "epoch": 0.0779510022271715, + "grad_norm": 68.22655487060547, + "learning_rate": 1e-06, + "loss": 4.0071, + "num_input_tokens_seen": 1958520, + "step": 35 + }, + { + "epoch": 0.0779510022271715, + "loss": 3.5187363624572754, + "loss_ce": 1.068541169166565, + "loss_iou": 0.9296875, + "loss_num": 0.11865234375, + "loss_xval": 2.453125, + "num_input_tokens_seen": 1958520, + "step": 35 + }, + { + "epoch": 0.0801781737193764, + "grad_norm": 76.44446563720703, + "learning_rate": 1e-06, + "loss": 4.2565, + "num_input_tokens_seen": 2011292, + "step": 36 + }, + { + "epoch": 0.0801781737193764, + "loss": 3.761260509490967, + "loss_ce": 1.2253718376159668, + "loss_iou": 1.015625, + "loss_num": 0.10205078125, + "loss_xval": 2.53125, + "num_input_tokens_seen": 2011292, + "step": 36 + }, + { + "epoch": 0.08240534521158129, + "grad_norm": 40.924400329589844, + "learning_rate": 1e-06, + "loss": 3.7774, + "num_input_tokens_seen": 2068200, + "step": 37 + }, + { + "epoch": 0.08240534521158129, + "loss": 3.715888500213623, + "loss_ce": 1.778876781463623, + "loss_iou": 0.75390625, + "loss_num": 0.0859375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 2068200, + "step": 37 + }, + { + "epoch": 0.08463251670378619, + "grad_norm": 63.25859069824219, + "learning_rate": 1e-06, + "loss": 3.4187, + "num_input_tokens_seen": 2124620, + "step": 38 + }, + { + "epoch": 0.08463251670378619, + "loss": 3.6475610733032227, + "loss_ce": 1.252053141593933, + "loss_iou": 0.953125, + "loss_num": 0.09765625, + "loss_xval": 2.390625, + "num_input_tokens_seen": 2124620, + "step": 38 + }, + { + "epoch": 0.08685968819599109, + "grad_norm": 53.20210266113281, + "learning_rate": 1e-06, + "loss": 3.69, + "num_input_tokens_seen": 2180648, + "step": 39 + }, + { + "epoch": 0.08685968819599109, + "loss": 3.29498291015625, + "loss_ce": 1.070373773574829, + "loss_iou": 0.921875, + "loss_num": 0.076171875, + "loss_xval": 2.21875, + "num_input_tokens_seen": 2180648, + "step": 39 + }, + { + "epoch": 0.08908685968819599, + "grad_norm": 56.693077087402344, + "learning_rate": 1e-06, + "loss": 3.8869, + "num_input_tokens_seen": 2239320, + "step": 40 + }, + { + "epoch": 0.08908685968819599, + "loss": 4.501180648803711, + "loss_ce": 1.5011805295944214, + "loss_iou": 1.1796875, + "loss_num": 0.1298828125, + "loss_xval": 3.0, + "num_input_tokens_seen": 2239320, + "step": 40 + }, + { + "epoch": 0.09131403118040089, + "grad_norm": 40.52269744873047, + "learning_rate": 1e-06, + "loss": 3.3113, + "num_input_tokens_seen": 2294136, + "step": 41 + }, + { + "epoch": 0.09131403118040089, + "loss": 3.6476950645446777, + "loss_ce": 1.1711325645446777, + "loss_iou": 0.9609375, + "loss_num": 0.1103515625, + "loss_xval": 2.46875, + "num_input_tokens_seen": 2294136, + "step": 41 + }, + { + "epoch": 0.0935412026726058, + "grad_norm": 45.07380676269531, + "learning_rate": 1e-06, + "loss": 4.2714, + "num_input_tokens_seen": 2349572, + "step": 42 + }, + { + "epoch": 0.0935412026726058, + "loss": 4.451573371887207, + "loss_ce": 1.497471570968628, + "loss_iou": 1.1953125, + "loss_num": 0.11279296875, + "loss_xval": 2.953125, + "num_input_tokens_seen": 2349572, + "step": 42 + }, + { + "epoch": 0.0957683741648107, + "grad_norm": 43.15333557128906, + "learning_rate": 1e-06, + "loss": 4.3193, + "num_input_tokens_seen": 2405120, + "step": 43 + }, + { + "epoch": 0.0957683741648107, + "loss": 4.689324378967285, + "loss_ce": 1.3651058673858643, + "loss_iou": 1.34375, + "loss_num": 0.125, + "loss_xval": 3.328125, + "num_input_tokens_seen": 2405120, + "step": 43 + }, + { + "epoch": 0.09799554565701558, + "grad_norm": 43.444942474365234, + "learning_rate": 1e-06, + "loss": 3.2982, + "num_input_tokens_seen": 2459840, + "step": 44 + }, + { + "epoch": 0.09799554565701558, + "loss": 3.244344472885132, + "loss_ce": 0.8068445920944214, + "loss_iou": 1.03125, + "loss_num": 0.07470703125, + "loss_xval": 2.4375, + "num_input_tokens_seen": 2459840, + "step": 44 + }, + { + "epoch": 0.10022271714922049, + "grad_norm": 43.407318115234375, + "learning_rate": 1e-06, + "loss": 3.8361, + "num_input_tokens_seen": 2513464, + "step": 45 + }, + { + "epoch": 0.10022271714922049, + "loss": 3.987287998199463, + "loss_ce": 1.3007644414901733, + "loss_iou": 1.0625, + "loss_num": 0.111328125, + "loss_xval": 2.6875, + "num_input_tokens_seen": 2513464, + "step": 45 + }, + { + "epoch": 0.10244988864142539, + "grad_norm": 47.13986587524414, + "learning_rate": 1e-06, + "loss": 3.9219, + "num_input_tokens_seen": 2572040, + "step": 46 + }, + { + "epoch": 0.10244988864142539, + "loss": 4.833956718444824, + "loss_ce": 1.9491908550262451, + "loss_iou": 1.0703125, + "loss_num": 0.1484375, + "loss_xval": 2.890625, + "num_input_tokens_seen": 2572040, + "step": 46 + }, + { + "epoch": 0.10467706013363029, + "grad_norm": 48.692970275878906, + "learning_rate": 1e-06, + "loss": 3.7242, + "num_input_tokens_seen": 2627832, + "step": 47 + }, + { + "epoch": 0.10467706013363029, + "loss": 3.7583022117614746, + "loss_ce": 1.0444347858428955, + "loss_iou": 1.078125, + "loss_num": 0.11181640625, + "loss_xval": 2.71875, + "num_input_tokens_seen": 2627832, + "step": 47 + }, + { + "epoch": 0.10690423162583519, + "grad_norm": 39.18439483642578, + "learning_rate": 1e-06, + "loss": 4.2013, + "num_input_tokens_seen": 2682316, + "step": 48 + }, + { + "epoch": 0.10690423162583519, + "loss": 4.251529216766357, + "loss_ce": 1.5435214042663574, + "loss_iou": 1.0625, + "loss_num": 0.1162109375, + "loss_xval": 2.703125, + "num_input_tokens_seen": 2682316, + "step": 48 + }, + { + "epoch": 0.1091314031180401, + "grad_norm": 40.1721305847168, + "learning_rate": 1e-06, + "loss": 3.7247, + "num_input_tokens_seen": 2738948, + "step": 49 + }, + { + "epoch": 0.1091314031180401, + "loss": 3.429368734359741, + "loss_ce": 1.0699937343597412, + "loss_iou": 0.96875, + "loss_num": 0.0849609375, + "loss_xval": 2.359375, + "num_input_tokens_seen": 2738948, + "step": 49 + }, + { + "epoch": 0.111358574610245, + "grad_norm": 53.2962646484375, + "learning_rate": 1e-06, + "loss": 3.3685, + "num_input_tokens_seen": 2795512, + "step": 50 + }, + { + "epoch": 0.111358574610245, + "loss": 3.372053861618042, + "loss_ce": 1.0507646799087524, + "loss_iou": 0.9375, + "loss_num": 0.08935546875, + "loss_xval": 2.328125, + "num_input_tokens_seen": 2795512, + "step": 50 + }, + { + "epoch": 0.11358574610244988, + "grad_norm": 39.22996139526367, + "learning_rate": 1e-06, + "loss": 3.5006, + "num_input_tokens_seen": 2853264, + "step": 51 + }, + { + "epoch": 0.11358574610244988, + "loss": 3.352569580078125, + "loss_ce": 0.9648742079734802, + "loss_iou": 0.9609375, + "loss_num": 0.09423828125, + "loss_xval": 2.390625, + "num_input_tokens_seen": 2853264, + "step": 51 + }, + { + "epoch": 0.11581291759465479, + "grad_norm": 51.9208869934082, + "learning_rate": 1e-06, + "loss": 4.002, + "num_input_tokens_seen": 2909744, + "step": 52 + }, + { + "epoch": 0.11581291759465479, + "loss": 3.849562168121338, + "loss_ce": 1.003859043121338, + "loss_iou": 1.0546875, + "loss_num": 0.1455078125, + "loss_xval": 2.84375, + "num_input_tokens_seen": 2909744, + "step": 52 + }, + { + "epoch": 0.11804008908685969, + "grad_norm": 36.589988708496094, + "learning_rate": 1e-06, + "loss": 3.8423, + "num_input_tokens_seen": 2963684, + "step": 53 + }, + { + "epoch": 0.11804008908685969, + "loss": 3.315175771713257, + "loss_ce": 1.3342187404632568, + "loss_iou": 0.77734375, + "loss_num": 0.0859375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 2963684, + "step": 53 + }, + { + "epoch": 0.12026726057906459, + "grad_norm": 93.62321472167969, + "learning_rate": 1e-06, + "loss": 3.6344, + "num_input_tokens_seen": 3019120, + "step": 54 + }, + { + "epoch": 0.12026726057906459, + "loss": 3.678225040435791, + "loss_ce": 1.3032249212265015, + "loss_iou": 0.95703125, + "loss_num": 0.09228515625, + "loss_xval": 2.375, + "num_input_tokens_seen": 3019120, + "step": 54 + }, + { + "epoch": 0.12249443207126949, + "grad_norm": 73.76739501953125, + "learning_rate": 1e-06, + "loss": 4.5231, + "num_input_tokens_seen": 3074692, + "step": 55 + }, + { + "epoch": 0.12249443207126949, + "loss": 4.42036247253418, + "loss_ce": 1.8959481716156006, + "loss_iou": 0.91796875, + "loss_num": 0.1376953125, + "loss_xval": 2.53125, + "num_input_tokens_seen": 3074692, + "step": 55 + }, + { + "epoch": 0.12472160356347439, + "grad_norm": 58.37528991699219, + "learning_rate": 1e-06, + "loss": 4.4363, + "num_input_tokens_seen": 3128300, + "step": 56 + }, + { + "epoch": 0.12472160356347439, + "loss": 4.376946926116943, + "loss_ce": 1.451165795326233, + "loss_iou": 1.140625, + "loss_num": 0.12890625, + "loss_xval": 2.921875, + "num_input_tokens_seen": 3128300, + "step": 56 + }, + { + "epoch": 0.12694877505567928, + "grad_norm": 50.1368293762207, + "learning_rate": 1e-06, + "loss": 3.0486, + "num_input_tokens_seen": 3186576, + "step": 57 + }, + { + "epoch": 0.12694877505567928, + "loss": 3.0530099868774414, + "loss_ce": 0.8830881118774414, + "loss_iou": 0.828125, + "loss_num": 0.1025390625, + "loss_xval": 2.171875, + "num_input_tokens_seen": 3186576, + "step": 57 + }, + { + "epoch": 0.1291759465478842, + "grad_norm": 88.61648559570312, + "learning_rate": 1e-06, + "loss": 3.6031, + "num_input_tokens_seen": 3243908, + "step": 58 + }, + { + "epoch": 0.1291759465478842, + "loss": 3.6260769367218018, + "loss_ce": 1.0352567434310913, + "loss_iou": 1.0546875, + "loss_num": 0.095703125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 3243908, + "step": 58 + }, + { + "epoch": 0.13140311804008908, + "grad_norm": 366.4051818847656, + "learning_rate": 1e-06, + "loss": 2.9703, + "num_input_tokens_seen": 3301940, + "step": 59 + }, + { + "epoch": 0.13140311804008908, + "loss": 2.9112725257873535, + "loss_ce": 0.6681084632873535, + "loss_iou": 0.890625, + "loss_num": 0.09326171875, + "loss_xval": 2.25, + "num_input_tokens_seen": 3301940, + "step": 59 + }, + { + "epoch": 0.133630289532294, + "grad_norm": 60.00054931640625, + "learning_rate": 1e-06, + "loss": 3.4228, + "num_input_tokens_seen": 3357440, + "step": 60 + }, + { + "epoch": 0.133630289532294, + "loss": 4.004701137542725, + "loss_ce": 1.8259902000427246, + "loss_iou": 0.8125, + "loss_num": 0.10986328125, + "loss_xval": 2.171875, + "num_input_tokens_seen": 3357440, + "step": 60 + }, + { + "epoch": 0.1358574610244989, + "grad_norm": 75.00879669189453, + "learning_rate": 1e-06, + "loss": 3.1458, + "num_input_tokens_seen": 3416064, + "step": 61 + }, + { + "epoch": 0.1358574610244989, + "loss": 3.1031904220581055, + "loss_ce": 1.0651044845581055, + "loss_iou": 0.828125, + "loss_num": 0.076171875, + "loss_xval": 2.03125, + "num_input_tokens_seen": 3416064, + "step": 61 + }, + { + "epoch": 0.13808463251670378, + "grad_norm": 44.26047134399414, + "learning_rate": 1e-06, + "loss": 3.4534, + "num_input_tokens_seen": 3474300, + "step": 62 + }, + { + "epoch": 0.13808463251670378, + "loss": 3.3606114387512207, + "loss_ce": 0.8108068704605103, + "loss_iou": 0.98828125, + "loss_num": 0.1142578125, + "loss_xval": 2.546875, + "num_input_tokens_seen": 3474300, + "step": 62 + }, + { + "epoch": 0.1403118040089087, + "grad_norm": 43.13284683227539, + "learning_rate": 1e-06, + "loss": 4.0539, + "num_input_tokens_seen": 3526756, + "step": 63 + }, + { + "epoch": 0.1403118040089087, + "loss": 3.9055140018463135, + "loss_ce": 1.0422327518463135, + "loss_iou": 1.1015625, + "loss_num": 0.1328125, + "loss_xval": 2.859375, + "num_input_tokens_seen": 3526756, + "step": 63 + }, + { + "epoch": 0.14253897550111358, + "grad_norm": 48.860225677490234, + "learning_rate": 1e-06, + "loss": 3.7684, + "num_input_tokens_seen": 3582620, + "step": 64 + }, + { + "epoch": 0.14253897550111358, + "loss": 3.5178382396698, + "loss_ce": 0.9514319896697998, + "loss_iou": 1.0234375, + "loss_num": 0.10400390625, + "loss_xval": 2.5625, + "num_input_tokens_seen": 3582620, + "step": 64 + }, + { + "epoch": 0.1447661469933185, + "grad_norm": 51.4886474609375, + "learning_rate": 1e-06, + "loss": 4.0096, + "num_input_tokens_seen": 3636136, + "step": 65 + }, + { + "epoch": 0.1447661469933185, + "loss": 4.009946823120117, + "loss_ce": 1.5128767490386963, + "loss_iou": 0.9765625, + "loss_num": 0.10888671875, + "loss_xval": 2.5, + "num_input_tokens_seen": 3636136, + "step": 65 + }, + { + "epoch": 0.14699331848552338, + "grad_norm": 75.6802978515625, + "learning_rate": 1e-06, + "loss": 4.0291, + "num_input_tokens_seen": 3692172, + "step": 66 + }, + { + "epoch": 0.14699331848552338, + "loss": 3.9964442253112793, + "loss_ce": 1.5921471118927002, + "loss_iou": 0.84765625, + "loss_num": 0.1416015625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 3692172, + "step": 66 + }, + { + "epoch": 0.1492204899777283, + "grad_norm": 44.05568313598633, + "learning_rate": 1e-06, + "loss": 2.7517, + "num_input_tokens_seen": 3750032, + "step": 67 + }, + { + "epoch": 0.1492204899777283, + "loss": 2.7267603874206543, + "loss_ce": 0.6720730662345886, + "loss_iou": 0.8671875, + "loss_num": 0.06396484375, + "loss_xval": 2.0625, + "num_input_tokens_seen": 3750032, + "step": 67 + }, + { + "epoch": 0.1514476614699332, + "grad_norm": 33.830841064453125, + "learning_rate": 1e-06, + "loss": 3.3022, + "num_input_tokens_seen": 3804556, + "step": 68 + }, + { + "epoch": 0.1514476614699332, + "loss": 3.4905872344970703, + "loss_ce": 0.7855091094970703, + "loss_iou": 1.046875, + "loss_num": 0.12451171875, + "loss_xval": 2.703125, + "num_input_tokens_seen": 3804556, + "step": 68 + }, + { + "epoch": 0.15367483296213807, + "grad_norm": 47.166080474853516, + "learning_rate": 1e-06, + "loss": 3.6395, + "num_input_tokens_seen": 3860224, + "step": 69 + }, + { + "epoch": 0.15367483296213807, + "loss": 3.921863555908203, + "loss_ce": 1.5263557434082031, + "loss_iou": 0.8828125, + "loss_num": 0.1259765625, + "loss_xval": 2.390625, + "num_input_tokens_seen": 3860224, + "step": 69 + }, + { + "epoch": 0.155902004454343, + "grad_norm": 60.735130310058594, + "learning_rate": 1e-06, + "loss": 4.3284, + "num_input_tokens_seen": 3911976, + "step": 70 + }, + { + "epoch": 0.155902004454343, + "loss": 4.273301124572754, + "loss_ce": 1.4632422924041748, + "loss_iou": 1.1328125, + "loss_num": 0.10791015625, + "loss_xval": 2.8125, + "num_input_tokens_seen": 3911976, + "step": 70 + }, + { + "epoch": 0.15812917594654788, + "grad_norm": 40.85783004760742, + "learning_rate": 1e-06, + "loss": 3.3411, + "num_input_tokens_seen": 3969416, + "step": 71 + }, + { + "epoch": 0.15812917594654788, + "loss": 3.9168148040771484, + "loss_ce": 1.2000181674957275, + "loss_iou": 0.99609375, + "loss_num": 0.1455078125, + "loss_xval": 2.71875, + "num_input_tokens_seen": 3969416, + "step": 71 + }, + { + "epoch": 0.1603563474387528, + "grad_norm": 37.3762092590332, + "learning_rate": 1e-06, + "loss": 4.0877, + "num_input_tokens_seen": 4024764, + "step": 72 + }, + { + "epoch": 0.1603563474387528, + "loss": 3.9297492504119873, + "loss_ce": 1.3613898754119873, + "loss_iou": 1.0390625, + "loss_num": 0.09912109375, + "loss_xval": 2.5625, + "num_input_tokens_seen": 4024764, + "step": 72 + }, + { + "epoch": 0.16258351893095768, + "grad_norm": 37.51104736328125, + "learning_rate": 1e-06, + "loss": 3.6427, + "num_input_tokens_seen": 4081604, + "step": 73 + }, + { + "epoch": 0.16258351893095768, + "loss": 3.7234535217285156, + "loss_ce": 1.1570473909378052, + "loss_iou": 1.0, + "loss_num": 0.11328125, + "loss_xval": 2.5625, + "num_input_tokens_seen": 4081604, + "step": 73 + }, + { + "epoch": 0.16481069042316257, + "grad_norm": 58.83037567138672, + "learning_rate": 1e-06, + "loss": 3.6275, + "num_input_tokens_seen": 4138792, + "step": 74 + }, + { + "epoch": 0.16481069042316257, + "loss": 3.8506052494049072, + "loss_ce": 1.0537302494049072, + "loss_iou": 1.078125, + "loss_num": 0.12890625, + "loss_xval": 2.796875, + "num_input_tokens_seen": 4138792, + "step": 74 + }, + { + "epoch": 0.16703786191536749, + "grad_norm": 74.164794921875, + "learning_rate": 1e-06, + "loss": 3.8793, + "num_input_tokens_seen": 4195448, + "step": 75 + }, + { + "epoch": 0.16703786191536749, + "loss": 4.057165145874023, + "loss_ce": 0.9848995208740234, + "loss_iou": 1.1796875, + "loss_num": 0.1416015625, + "loss_xval": 3.078125, + "num_input_tokens_seen": 4195448, + "step": 75 + }, + { + "epoch": 0.16926503340757237, + "grad_norm": 103.62609100341797, + "learning_rate": 1e-06, + "loss": 3.7588, + "num_input_tokens_seen": 4247988, + "step": 76 + }, + { + "epoch": 0.16926503340757237, + "loss": 3.4790072441101074, + "loss_ce": 1.3315460681915283, + "loss_iou": 0.83203125, + "loss_num": 0.0966796875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 4247988, + "step": 76 + }, + { + "epoch": 0.1714922048997773, + "grad_norm": 36.90887451171875, + "learning_rate": 1e-06, + "loss": 3.8448, + "num_input_tokens_seen": 4301000, + "step": 77 + }, + { + "epoch": 0.1714922048997773, + "loss": 4.019002437591553, + "loss_ce": 1.0971274375915527, + "loss_iou": 1.1328125, + "loss_num": 0.1328125, + "loss_xval": 2.921875, + "num_input_tokens_seen": 4301000, + "step": 77 + }, + { + "epoch": 0.17371937639198218, + "grad_norm": 60.749755859375, + "learning_rate": 1e-06, + "loss": 3.6311, + "num_input_tokens_seen": 4358284, + "step": 78 + }, + { + "epoch": 0.17371937639198218, + "loss": 3.9924979209899902, + "loss_ce": 1.3831228017807007, + "loss_iou": 1.0390625, + "loss_num": 0.10693359375, + "loss_xval": 2.609375, + "num_input_tokens_seen": 4358284, + "step": 78 + }, + { + "epoch": 0.1759465478841871, + "grad_norm": 43.58686065673828, + "learning_rate": 1e-06, + "loss": 3.7598, + "num_input_tokens_seen": 4412984, + "step": 79 + }, + { + "epoch": 0.1759465478841871, + "loss": 4.099183559417725, + "loss_ce": 1.1939101219177246, + "loss_iou": 1.0859375, + "loss_num": 0.1474609375, + "loss_xval": 2.90625, + "num_input_tokens_seen": 4412984, + "step": 79 + }, + { + "epoch": 0.17817371937639198, + "grad_norm": 46.925628662109375, + "learning_rate": 1e-06, + "loss": 3.6665, + "num_input_tokens_seen": 4469440, + "step": 80 + }, + { + "epoch": 0.17817371937639198, + "loss": 3.7074530124664307, + "loss_ce": 1.1742497682571411, + "loss_iou": 0.9609375, + "loss_num": 0.12109375, + "loss_xval": 2.53125, + "num_input_tokens_seen": 4469440, + "step": 80 + }, + { + "epoch": 0.18040089086859687, + "grad_norm": 37.318233489990234, + "learning_rate": 1e-06, + "loss": 3.108, + "num_input_tokens_seen": 4528044, + "step": 81 + }, + { + "epoch": 0.18040089086859687, + "loss": 3.302272319793701, + "loss_ce": 0.8784441947937012, + "loss_iou": 0.91796875, + "loss_num": 0.11767578125, + "loss_xval": 2.421875, + "num_input_tokens_seen": 4528044, + "step": 81 + }, + { + "epoch": 0.18262806236080179, + "grad_norm": 89.16364288330078, + "learning_rate": 1e-06, + "loss": 2.8823, + "num_input_tokens_seen": 4585856, + "step": 82 + }, + { + "epoch": 0.18262806236080179, + "loss": 2.5211141109466553, + "loss_ce": 0.4742392301559448, + "loss_iou": 0.84765625, + "loss_num": 0.06982421875, + "loss_xval": 2.046875, + "num_input_tokens_seen": 4585856, + "step": 82 + }, + { + "epoch": 0.18485523385300667, + "grad_norm": 75.21393585205078, + "learning_rate": 1e-06, + "loss": 3.7455, + "num_input_tokens_seen": 4640456, + "step": 83 + }, + { + "epoch": 0.18485523385300667, + "loss": 4.00370979309082, + "loss_ce": 1.2458975315093994, + "loss_iou": 1.09375, + "loss_num": 0.11328125, + "loss_xval": 2.75, + "num_input_tokens_seen": 4640456, + "step": 83 + }, + { + "epoch": 0.1870824053452116, + "grad_norm": 47.31288146972656, + "learning_rate": 1e-06, + "loss": 3.7102, + "num_input_tokens_seen": 4695604, + "step": 84 + }, + { + "epoch": 0.1870824053452116, + "loss": 3.4363784790039062, + "loss_ce": 1.1434097290039062, + "loss_iou": 0.8671875, + "loss_num": 0.1123046875, + "loss_xval": 2.296875, + "num_input_tokens_seen": 4695604, + "step": 84 + }, + { + "epoch": 0.18930957683741648, + "grad_norm": 103.16661071777344, + "learning_rate": 1e-06, + "loss": 3.9615, + "num_input_tokens_seen": 4750568, + "step": 85 + }, + { + "epoch": 0.18930957683741648, + "loss": 3.598466396331787, + "loss_ce": 0.9617477655410767, + "loss_iou": 0.98046875, + "loss_num": 0.134765625, + "loss_xval": 2.640625, + "num_input_tokens_seen": 4750568, + "step": 85 + }, + { + "epoch": 0.1915367483296214, + "grad_norm": 30.83519172668457, + "learning_rate": 1e-06, + "loss": 3.4149, + "num_input_tokens_seen": 4802048, + "step": 86 + }, + { + "epoch": 0.1915367483296214, + "loss": 3.000054359436035, + "loss_ce": 0.6123592257499695, + "loss_iou": 0.890625, + "loss_num": 0.12158203125, + "loss_xval": 2.390625, + "num_input_tokens_seen": 4802048, + "step": 86 + }, + { + "epoch": 0.19376391982182628, + "grad_norm": 44.40241241455078, + "learning_rate": 1e-06, + "loss": 3.8462, + "num_input_tokens_seen": 4857292, + "step": 87 + }, + { + "epoch": 0.19376391982182628, + "loss": 3.8453128337860107, + "loss_ce": 1.0943361520767212, + "loss_iou": 1.0546875, + "loss_num": 0.1279296875, + "loss_xval": 2.75, + "num_input_tokens_seen": 4857292, + "step": 87 + }, + { + "epoch": 0.19599109131403117, + "grad_norm": 36.97007751464844, + "learning_rate": 1e-06, + "loss": 3.4149, + "num_input_tokens_seen": 4914020, + "step": 88 + }, + { + "epoch": 0.19599109131403117, + "loss": 3.3789005279541016, + "loss_ce": 1.074212908744812, + "loss_iou": 0.84375, + "loss_num": 0.1220703125, + "loss_xval": 2.3125, + "num_input_tokens_seen": 4914020, + "step": 88 + }, + { + "epoch": 0.19821826280623608, + "grad_norm": 100.41455841064453, + "learning_rate": 1e-06, + "loss": 3.7294, + "num_input_tokens_seen": 4970596, + "step": 89 + }, + { + "epoch": 0.19821826280623608, + "loss": 3.8161191940307617, + "loss_ce": 1.1676816940307617, + "loss_iou": 1.046875, + "loss_num": 0.1123046875, + "loss_xval": 2.65625, + "num_input_tokens_seen": 4970596, + "step": 89 + }, + { + "epoch": 0.20044543429844097, + "grad_norm": 56.474788665771484, + "learning_rate": 1e-06, + "loss": 3.628, + "num_input_tokens_seen": 5022084, + "step": 90 + }, + { + "epoch": 0.20044543429844097, + "loss": 3.246203899383545, + "loss_ce": 0.770129919052124, + "loss_iou": 0.89453125, + "loss_num": 0.13671875, + "loss_xval": 2.46875, + "num_input_tokens_seen": 5022084, + "step": 90 + }, + { + "epoch": 0.2026726057906459, + "grad_norm": 45.0289192199707, + "learning_rate": 1e-06, + "loss": 2.9406, + "num_input_tokens_seen": 5076540, + "step": 91 + }, + { + "epoch": 0.2026726057906459, + "loss": 2.8975300788879395, + "loss_ce": 0.491280198097229, + "loss_iou": 0.9296875, + "loss_num": 0.109375, + "loss_xval": 2.40625, + "num_input_tokens_seen": 5076540, + "step": 91 + }, + { + "epoch": 0.20489977728285078, + "grad_norm": 33.83285140991211, + "learning_rate": 1e-06, + "loss": 2.8553, + "num_input_tokens_seen": 5132312, + "step": 92 + }, + { + "epoch": 0.20489977728285078, + "loss": 2.73876953125, + "loss_ce": 0.7338864803314209, + "loss_iou": 0.75390625, + "loss_num": 0.09912109375, + "loss_xval": 2.0, + "num_input_tokens_seen": 5132312, + "step": 92 + }, + { + "epoch": 0.2071269487750557, + "grad_norm": 55.93543243408203, + "learning_rate": 1e-06, + "loss": 3.3996, + "num_input_tokens_seen": 5188188, + "step": 93 + }, + { + "epoch": 0.2071269487750557, + "loss": 3.369142770767212, + "loss_ce": 0.7919942140579224, + "loss_iou": 0.95703125, + "loss_num": 0.1318359375, + "loss_xval": 2.578125, + "num_input_tokens_seen": 5188188, + "step": 93 + }, + { + "epoch": 0.20935412026726058, + "grad_norm": 42.92362976074219, + "learning_rate": 1e-06, + "loss": 3.3957, + "num_input_tokens_seen": 5245580, + "step": 94 + }, + { + "epoch": 0.20935412026726058, + "loss": 3.3484046459198, + "loss_ce": 0.7302405834197998, + "loss_iou": 0.9609375, + "loss_num": 0.1396484375, + "loss_xval": 2.625, + "num_input_tokens_seen": 5245580, + "step": 94 + }, + { + "epoch": 0.21158129175946547, + "grad_norm": 38.73051452636719, + "learning_rate": 1e-06, + "loss": 2.8372, + "num_input_tokens_seen": 5302720, + "step": 95 + }, + { + "epoch": 0.21158129175946547, + "loss": 3.1018481254577637, + "loss_ce": 0.6829028129577637, + "loss_iou": 0.90625, + "loss_num": 0.12109375, + "loss_xval": 2.421875, + "num_input_tokens_seen": 5302720, + "step": 95 + }, + { + "epoch": 0.21380846325167038, + "grad_norm": 51.393985748291016, + "learning_rate": 1e-06, + "loss": 3.4311, + "num_input_tokens_seen": 5359268, + "step": 96 + }, + { + "epoch": 0.21380846325167038, + "loss": 3.3091301918029785, + "loss_ce": 1.068895697593689, + "loss_iou": 0.796875, + "loss_num": 0.1298828125, + "loss_xval": 2.234375, + "num_input_tokens_seen": 5359268, + "step": 96 + }, + { + "epoch": 0.21603563474387527, + "grad_norm": 34.125511169433594, + "learning_rate": 1e-06, + "loss": 3.3903, + "num_input_tokens_seen": 5415716, + "step": 97 + }, + { + "epoch": 0.21603563474387527, + "loss": 3.3011648654937744, + "loss_ce": 0.8431569933891296, + "loss_iou": 0.8828125, + "loss_num": 0.138671875, + "loss_xval": 2.453125, + "num_input_tokens_seen": 5415716, + "step": 97 + }, + { + "epoch": 0.2182628062360802, + "grad_norm": 47.519710540771484, + "learning_rate": 1e-06, + "loss": 2.9414, + "num_input_tokens_seen": 5473288, + "step": 98 + }, + { + "epoch": 0.2182628062360802, + "loss": 3.0692644119262695, + "loss_ce": 0.71477210521698, + "loss_iou": 0.88671875, + "loss_num": 0.11669921875, + "loss_xval": 2.359375, + "num_input_tokens_seen": 5473288, + "step": 98 + }, + { + "epoch": 0.22048997772828507, + "grad_norm": 26.452730178833008, + "learning_rate": 1e-06, + "loss": 3.3211, + "num_input_tokens_seen": 5528484, + "step": 99 + }, + { + "epoch": 0.22048997772828507, + "loss": 3.3662614822387695, + "loss_ce": 0.9463395476341248, + "loss_iou": 0.875, + "loss_num": 0.1328125, + "loss_xval": 2.421875, + "num_input_tokens_seen": 5528484, + "step": 99 + }, + { + "epoch": 0.22271714922049, + "grad_norm": 60.19447708129883, + "learning_rate": 1e-06, + "loss": 3.2708, + "num_input_tokens_seen": 5584888, + "step": 100 + }, + { + "epoch": 0.22271714922049, + "loss": 3.1879868507385254, + "loss_ce": 0.9902329444885254, + "loss_iou": 0.79296875, + "loss_num": 0.1220703125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 5584888, + "step": 100 + }, + { + "epoch": 0.22494432071269488, + "grad_norm": 70.54967498779297, + "learning_rate": 1e-06, + "loss": 2.7226, + "num_input_tokens_seen": 5642204, + "step": 101 + }, + { + "epoch": 0.22494432071269488, + "loss": 2.864590644836426, + "loss_ce": 0.7434969544410706, + "loss_iou": 0.765625, + "loss_num": 0.11865234375, + "loss_xval": 2.125, + "num_input_tokens_seen": 5642204, + "step": 101 + }, + { + "epoch": 0.22717149220489977, + "grad_norm": 30.121597290039062, + "learning_rate": 1e-06, + "loss": 3.6091, + "num_input_tokens_seen": 5696676, + "step": 102 + }, + { + "epoch": 0.22717149220489977, + "loss": 3.1128854751586914, + "loss_ce": 0.7256782054901123, + "loss_iou": 0.8828125, + "loss_num": 0.12353515625, + "loss_xval": 2.390625, + "num_input_tokens_seen": 5696676, + "step": 102 + }, + { + "epoch": 0.22939866369710468, + "grad_norm": 38.39320373535156, + "learning_rate": 1e-06, + "loss": 2.9509, + "num_input_tokens_seen": 5751512, + "step": 103 + }, + { + "epoch": 0.22939866369710468, + "loss": 3.0732927322387695, + "loss_ce": 0.5625505447387695, + "loss_iou": 0.90234375, + "loss_num": 0.140625, + "loss_xval": 2.515625, + "num_input_tokens_seen": 5751512, + "step": 103 + }, + { + "epoch": 0.23162583518930957, + "grad_norm": 38.619789123535156, + "learning_rate": 1e-06, + "loss": 3.2933, + "num_input_tokens_seen": 5806704, + "step": 104 + }, + { + "epoch": 0.23162583518930957, + "loss": 3.1526646614074707, + "loss_ce": 0.8069615960121155, + "loss_iou": 0.89453125, + "loss_num": 0.11083984375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 5806704, + "step": 104 + }, + { + "epoch": 0.23385300668151449, + "grad_norm": 39.458309173583984, + "learning_rate": 1e-06, + "loss": 2.705, + "num_input_tokens_seen": 5864592, + "step": 105 + }, + { + "epoch": 0.23385300668151449, + "loss": 2.9264371395111084, + "loss_ce": 0.6334683895111084, + "loss_iou": 0.859375, + "loss_num": 0.11474609375, + "loss_xval": 2.296875, + "num_input_tokens_seen": 5864592, + "step": 105 + }, + { + "epoch": 0.23608017817371937, + "grad_norm": 91.0936050415039, + "learning_rate": 1e-06, + "loss": 2.6971, + "num_input_tokens_seen": 5920932, + "step": 106 + }, + { + "epoch": 0.23608017817371937, + "loss": 2.743122100830078, + "loss_ce": 0.6098213195800781, + "loss_iou": 0.75, + "loss_num": 0.126953125, + "loss_xval": 2.140625, + "num_input_tokens_seen": 5920932, + "step": 106 + }, + { + "epoch": 0.2383073496659243, + "grad_norm": 58.84850311279297, + "learning_rate": 1e-06, + "loss": 3.047, + "num_input_tokens_seen": 5978660, + "step": 107 + }, + { + "epoch": 0.2383073496659243, + "loss": 2.7701728343963623, + "loss_ce": 0.4518134593963623, + "loss_iou": 0.8125, + "loss_num": 0.138671875, + "loss_xval": 2.3125, + "num_input_tokens_seen": 5978660, + "step": 107 + }, + { + "epoch": 0.24053452115812918, + "grad_norm": 37.72629928588867, + "learning_rate": 1e-06, + "loss": 3.1347, + "num_input_tokens_seen": 6032308, + "step": 108 + }, + { + "epoch": 0.24053452115812918, + "loss": 3.364459991455078, + "loss_ce": 0.720416784286499, + "loss_iou": 0.98828125, + "loss_num": 0.1337890625, + "loss_xval": 2.640625, + "num_input_tokens_seen": 6032308, + "step": 108 + }, + { + "epoch": 0.24276169265033407, + "grad_norm": 35.40589141845703, + "learning_rate": 1e-06, + "loss": 2.8689, + "num_input_tokens_seen": 6087012, + "step": 109 + }, + { + "epoch": 0.24276169265033407, + "loss": 2.9507226943969727, + "loss_ce": 0.40775376558303833, + "loss_iou": 0.9453125, + "loss_num": 0.1298828125, + "loss_xval": 2.546875, + "num_input_tokens_seen": 6087012, + "step": 109 + }, + { + "epoch": 0.24498886414253898, + "grad_norm": 42.018192291259766, + "learning_rate": 1e-06, + "loss": 2.9651, + "num_input_tokens_seen": 6144992, + "step": 110 + }, + { + "epoch": 0.24498886414253898, + "loss": 2.959064483642578, + "loss_ce": 0.5215646624565125, + "loss_iou": 0.88671875, + "loss_num": 0.1318359375, + "loss_xval": 2.4375, + "num_input_tokens_seen": 6144992, + "step": 110 + }, + { + "epoch": 0.24721603563474387, + "grad_norm": 80.21202850341797, + "learning_rate": 1e-06, + "loss": 2.9912, + "num_input_tokens_seen": 6201808, + "step": 111 + }, + { + "epoch": 0.24721603563474387, + "loss": 3.193042755126953, + "loss_ce": 0.9244881868362427, + "loss_iou": 0.875, + "loss_num": 0.10400390625, + "loss_xval": 2.265625, + "num_input_tokens_seen": 6201808, + "step": 111 + }, + { + "epoch": 0.24944320712694878, + "grad_norm": 54.696163177490234, + "learning_rate": 1e-06, + "loss": 3.2172, + "num_input_tokens_seen": 6253116, + "step": 112 + }, + { + "epoch": 0.24944320712694878, + "loss": 3.254600763320923, + "loss_ce": 0.6891711354255676, + "loss_iou": 0.94140625, + "loss_num": 0.13671875, + "loss_xval": 2.5625, + "num_input_tokens_seen": 6253116, + "step": 112 + }, + { + "epoch": 0.2516703786191537, + "grad_norm": 42.921669006347656, + "learning_rate": 1e-06, + "loss": 3.186, + "num_input_tokens_seen": 6308684, + "step": 113 + }, + { + "epoch": 0.2516703786191537, + "loss": 3.4225220680236816, + "loss_ce": 0.48892807960510254, + "loss_iou": 1.046875, + "loss_num": 0.16796875, + "loss_xval": 2.9375, + "num_input_tokens_seen": 6308684, + "step": 113 + }, + { + "epoch": 0.25389755011135856, + "grad_norm": 78.24114227294922, + "learning_rate": 1e-06, + "loss": 3.0514, + "num_input_tokens_seen": 6364580, + "step": 114 + }, + { + "epoch": 0.25389755011135856, + "loss": 3.6505331993103027, + "loss_ce": 0.7813925743103027, + "loss_iou": 1.0546875, + "loss_num": 0.1533203125, + "loss_xval": 2.875, + "num_input_tokens_seen": 6364580, + "step": 114 + }, + { + "epoch": 0.2561247216035635, + "grad_norm": 43.09880447387695, + "learning_rate": 1e-06, + "loss": 3.0621, + "num_input_tokens_seen": 6418640, + "step": 115 + }, + { + "epoch": 0.2561247216035635, + "loss": 2.922844648361206, + "loss_ce": 0.6381767392158508, + "loss_iou": 0.8125, + "loss_num": 0.1328125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 6418640, + "step": 115 + }, + { + "epoch": 0.2583518930957684, + "grad_norm": 78.85059356689453, + "learning_rate": 1e-06, + "loss": 2.7225, + "num_input_tokens_seen": 6476332, + "step": 116 + }, + { + "epoch": 0.2583518930957684, + "loss": 2.682021141052246, + "loss_ce": 0.4828024208545685, + "loss_iou": 0.8046875, + "loss_num": 0.119140625, + "loss_xval": 2.203125, + "num_input_tokens_seen": 6476332, + "step": 116 + }, + { + "epoch": 0.26057906458797325, + "grad_norm": 39.069583892822266, + "learning_rate": 1e-06, + "loss": 2.7889, + "num_input_tokens_seen": 6531328, + "step": 117 + }, + { + "epoch": 0.26057906458797325, + "loss": 2.9253664016723633, + "loss_ce": 0.6001709699630737, + "loss_iou": 0.859375, + "loss_num": 0.12060546875, + "loss_xval": 2.328125, + "num_input_tokens_seen": 6531328, + "step": 117 + }, + { + "epoch": 0.26280623608017817, + "grad_norm": 40.309391021728516, + "learning_rate": 1e-06, + "loss": 2.8465, + "num_input_tokens_seen": 6589548, + "step": 118 + }, + { + "epoch": 0.26280623608017817, + "loss": 2.8932766914367676, + "loss_ce": 0.43429216742515564, + "loss_iou": 0.87890625, + "loss_num": 0.1396484375, + "loss_xval": 2.453125, + "num_input_tokens_seen": 6589548, + "step": 118 + }, + { + "epoch": 0.2650334075723831, + "grad_norm": 31.454940795898438, + "learning_rate": 1e-06, + "loss": 2.6428, + "num_input_tokens_seen": 6647160, + "step": 119 + }, + { + "epoch": 0.2650334075723831, + "loss": 2.7675888538360596, + "loss_ce": 0.48755955696105957, + "loss_iou": 0.78515625, + "loss_num": 0.142578125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 6647160, + "step": 119 + }, + { + "epoch": 0.267260579064588, + "grad_norm": 108.88129425048828, + "learning_rate": 1e-06, + "loss": 2.7364, + "num_input_tokens_seen": 6705976, + "step": 120 + }, + { + "epoch": 0.267260579064588, + "loss": 2.662890911102295, + "loss_ce": 0.4177738428115845, + "loss_iou": 0.796875, + "loss_num": 0.1298828125, + "loss_xval": 2.25, + "num_input_tokens_seen": 6705976, + "step": 120 + }, + { + "epoch": 0.26948775055679286, + "grad_norm": 63.05554962158203, + "learning_rate": 1e-06, + "loss": 3.2316, + "num_input_tokens_seen": 6760876, + "step": 121 + }, + { + "epoch": 0.26948775055679286, + "loss": 3.1148786544799805, + "loss_ce": 0.5191754102706909, + "loss_iou": 0.96875, + "loss_num": 0.130859375, + "loss_xval": 2.59375, + "num_input_tokens_seen": 6760876, + "step": 121 + }, + { + "epoch": 0.2717149220489978, + "grad_norm": 39.52189254760742, + "learning_rate": 1e-06, + "loss": 3.0243, + "num_input_tokens_seen": 6815624, + "step": 122 + }, + { + "epoch": 0.2717149220489978, + "loss": 2.8611388206481934, + "loss_ce": 0.37090444564819336, + "loss_iou": 0.91796875, + "loss_num": 0.1298828125, + "loss_xval": 2.484375, + "num_input_tokens_seen": 6815624, + "step": 122 + }, + { + "epoch": 0.2739420935412027, + "grad_norm": 44.9962158203125, + "learning_rate": 1e-06, + "loss": 2.9217, + "num_input_tokens_seen": 6870556, + "step": 123 + }, + { + "epoch": 0.2739420935412027, + "loss": 3.207249164581299, + "loss_ce": 0.7170149087905884, + "loss_iou": 0.94921875, + "loss_num": 0.11865234375, + "loss_xval": 2.484375, + "num_input_tokens_seen": 6870556, + "step": 123 + }, + { + "epoch": 0.27616926503340755, + "grad_norm": 25.661319732666016, + "learning_rate": 1e-06, + "loss": 2.9669, + "num_input_tokens_seen": 6925992, + "step": 124 + }, + { + "epoch": 0.27616926503340755, + "loss": 2.76767897605896, + "loss_ce": 0.3028353154659271, + "loss_iou": 0.91015625, + "loss_num": 0.1298828125, + "loss_xval": 2.46875, + "num_input_tokens_seen": 6925992, + "step": 124 + }, + { + "epoch": 0.27839643652561247, + "grad_norm": 41.912784576416016, + "learning_rate": 1e-06, + "loss": 2.6341, + "num_input_tokens_seen": 6983364, + "step": 125 + }, + { + "epoch": 0.27839643652561247, + "loss": 2.6220178604125977, + "loss_ce": 0.4081503748893738, + "loss_iou": 0.86328125, + "loss_num": 0.09765625, + "loss_xval": 2.21875, + "num_input_tokens_seen": 6983364, + "step": 125 + }, + { + "epoch": 0.2806236080178174, + "grad_norm": 60.407127380371094, + "learning_rate": 1e-06, + "loss": 2.6668, + "num_input_tokens_seen": 7037256, + "step": 126 + }, + { + "epoch": 0.2806236080178174, + "loss": 2.8176770210266113, + "loss_ce": 0.3831067681312561, + "loss_iou": 0.86328125, + "loss_num": 0.140625, + "loss_xval": 2.4375, + "num_input_tokens_seen": 7037256, + "step": 126 + }, + { + "epoch": 0.2828507795100223, + "grad_norm": 102.43517303466797, + "learning_rate": 1e-06, + "loss": 2.8805, + "num_input_tokens_seen": 7091296, + "step": 127 + }, + { + "epoch": 0.2828507795100223, + "loss": 2.8854475021362305, + "loss_ce": 0.7106426954269409, + "loss_iou": 0.79296875, + "loss_num": 0.1171875, + "loss_xval": 2.171875, + "num_input_tokens_seen": 7091296, + "step": 127 + }, + { + "epoch": 0.28507795100222716, + "grad_norm": 44.01148223876953, + "learning_rate": 1e-06, + "loss": 2.8418, + "num_input_tokens_seen": 7146940, + "step": 128 + }, + { + "epoch": 0.28507795100222716, + "loss": 2.973205089569092, + "loss_ce": 0.391173779964447, + "loss_iou": 0.90625, + "loss_num": 0.154296875, + "loss_xval": 2.578125, + "num_input_tokens_seen": 7146940, + "step": 128 + }, + { + "epoch": 0.2873051224944321, + "grad_norm": 116.4285659790039, + "learning_rate": 1e-06, + "loss": 2.5866, + "num_input_tokens_seen": 7203656, + "step": 129 + }, + { + "epoch": 0.2873051224944321, + "loss": 3.1285247802734375, + "loss_ce": 0.44102469086647034, + "loss_iou": 0.9609375, + "loss_num": 0.15234375, + "loss_xval": 2.6875, + "num_input_tokens_seen": 7203656, + "step": 129 + }, + { + "epoch": 0.289532293986637, + "grad_norm": 24.682891845703125, + "learning_rate": 1e-06, + "loss": 2.6812, + "num_input_tokens_seen": 7258748, + "step": 130 + }, + { + "epoch": 0.289532293986637, + "loss": 2.730166435241699, + "loss_ce": 0.5348541140556335, + "loss_iou": 0.8046875, + "loss_num": 0.11767578125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 7258748, + "step": 130 + }, + { + "epoch": 0.29175946547884185, + "grad_norm": 34.84796142578125, + "learning_rate": 1e-06, + "loss": 2.5544, + "num_input_tokens_seen": 7315644, + "step": 131 + }, + { + "epoch": 0.29175946547884185, + "loss": 2.3853559494018555, + "loss_ce": 0.3248090147972107, + "loss_iou": 0.78515625, + "loss_num": 0.09716796875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 7315644, + "step": 131 + }, + { + "epoch": 0.29398663697104677, + "grad_norm": 33.59828186035156, + "learning_rate": 1e-06, + "loss": 2.6018, + "num_input_tokens_seen": 7371296, + "step": 132 + }, + { + "epoch": 0.29398663697104677, + "loss": 2.7992615699768066, + "loss_ce": 0.5941836833953857, + "loss_iou": 0.78125, + "loss_num": 0.12890625, + "loss_xval": 2.203125, + "num_input_tokens_seen": 7371296, + "step": 132 + }, + { + "epoch": 0.2962138084632517, + "grad_norm": 26.216712951660156, + "learning_rate": 1e-06, + "loss": 2.4007, + "num_input_tokens_seen": 7427892, + "step": 133 + }, + { + "epoch": 0.2962138084632517, + "loss": 2.259786605834961, + "loss_ce": 0.35256001353263855, + "loss_iou": 0.73828125, + "loss_num": 0.0859375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 7427892, + "step": 133 + }, + { + "epoch": 0.2984409799554566, + "grad_norm": 148.89346313476562, + "learning_rate": 1e-06, + "loss": 2.645, + "num_input_tokens_seen": 7484960, + "step": 134 + }, + { + "epoch": 0.2984409799554566, + "loss": 2.5535106658935547, + "loss_ce": 0.44901835918426514, + "loss_iou": 0.80078125, + "loss_num": 0.099609375, + "loss_xval": 2.109375, + "num_input_tokens_seen": 7484960, + "step": 134 + }, + { + "epoch": 0.30066815144766146, + "grad_norm": 29.48415184020996, + "learning_rate": 1e-06, + "loss": 2.4559, + "num_input_tokens_seen": 7543972, + "step": 135 + }, + { + "epoch": 0.30066815144766146, + "loss": 2.225175380706787, + "loss_ce": 0.2398238629102707, + "loss_iou": 0.7421875, + "loss_num": 0.10009765625, + "loss_xval": 1.984375, + "num_input_tokens_seen": 7543972, + "step": 135 + }, + { + "epoch": 0.3028953229398664, + "grad_norm": 50.64659881591797, + "learning_rate": 1e-06, + "loss": 2.7228, + "num_input_tokens_seen": 7600460, + "step": 136 + }, + { + "epoch": 0.3028953229398664, + "loss": 2.643749475479126, + "loss_ce": 0.2501947581768036, + "loss_iou": 0.8515625, + "loss_num": 0.1376953125, + "loss_xval": 2.390625, + "num_input_tokens_seen": 7600460, + "step": 136 + }, + { + "epoch": 0.3051224944320713, + "grad_norm": 27.465696334838867, + "learning_rate": 1e-06, + "loss": 2.5444, + "num_input_tokens_seen": 7657896, + "step": 137 + }, + { + "epoch": 0.3051224944320713, + "loss": 2.5869100093841553, + "loss_ce": 0.3886679410934448, + "loss_iou": 0.796875, + "loss_num": 0.12158203125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 7657896, + "step": 137 + }, + { + "epoch": 0.30734966592427615, + "grad_norm": 30.95069122314453, + "learning_rate": 1e-06, + "loss": 2.6368, + "num_input_tokens_seen": 7710764, + "step": 138 + }, + { + "epoch": 0.30734966592427615, + "loss": 2.530655860900879, + "loss_ce": 0.41444480419158936, + "loss_iou": 0.70703125, + "loss_num": 0.140625, + "loss_xval": 2.109375, + "num_input_tokens_seen": 7710764, + "step": 138 + }, + { + "epoch": 0.30957683741648107, + "grad_norm": 27.392942428588867, + "learning_rate": 1e-06, + "loss": 2.3889, + "num_input_tokens_seen": 7767452, + "step": 139 + }, + { + "epoch": 0.30957683741648107, + "loss": 2.26643705368042, + "loss_ce": 0.3201477825641632, + "loss_iou": 0.7421875, + "loss_num": 0.0927734375, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 7767452, + "step": 139 + }, + { + "epoch": 0.311804008908686, + "grad_norm": 52.42769241333008, + "learning_rate": 1e-06, + "loss": 2.5987, + "num_input_tokens_seen": 7821504, + "step": 140 + }, + { + "epoch": 0.311804008908686, + "loss": 2.6357715129852295, + "loss_ce": 0.3486621379852295, + "loss_iou": 0.8671875, + "loss_num": 0.10986328125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 7821504, + "step": 140 + }, + { + "epoch": 0.31403118040089084, + "grad_norm": 23.9682674407959, + "learning_rate": 1e-06, + "loss": 2.6711, + "num_input_tokens_seen": 7875900, + "step": 141 + }, + { + "epoch": 0.31403118040089084, + "loss": 2.9198169708251953, + "loss_ce": 0.44423115253448486, + "loss_iou": 0.9140625, + "loss_num": 0.12890625, + "loss_xval": 2.46875, + "num_input_tokens_seen": 7875900, + "step": 141 + }, + { + "epoch": 0.31625835189309576, + "grad_norm": 103.93788146972656, + "learning_rate": 1e-06, + "loss": 2.5893, + "num_input_tokens_seen": 7931732, + "step": 142 + }, + { + "epoch": 0.31625835189309576, + "loss": 2.553285837173462, + "loss_ce": 0.41266071796417236, + "loss_iou": 0.82421875, + "loss_num": 0.09814453125, + "loss_xval": 2.140625, + "num_input_tokens_seen": 7931732, + "step": 142 + }, + { + "epoch": 0.3184855233853007, + "grad_norm": 44.492820739746094, + "learning_rate": 1e-06, + "loss": 2.4701, + "num_input_tokens_seen": 7987608, + "step": 143 + }, + { + "epoch": 0.3184855233853007, + "loss": 2.3122029304504395, + "loss_ce": 0.3502885699272156, + "loss_iou": 0.7265625, + "loss_num": 0.1025390625, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 7987608, + "step": 143 + }, + { + "epoch": 0.3207126948775056, + "grad_norm": 23.186241149902344, + "learning_rate": 1e-06, + "loss": 2.4726, + "num_input_tokens_seen": 8043360, + "step": 144 + }, + { + "epoch": 0.3207126948775056, + "loss": 2.7117819786071777, + "loss_ce": 0.3328755497932434, + "loss_iou": 0.88671875, + "loss_num": 0.12060546875, + "loss_xval": 2.375, + "num_input_tokens_seen": 8043360, + "step": 144 + }, + { + "epoch": 0.32293986636971045, + "grad_norm": 60.27910614013672, + "learning_rate": 1e-06, + "loss": 2.6172, + "num_input_tokens_seen": 8098064, + "step": 145 + }, + { + "epoch": 0.32293986636971045, + "loss": 2.32407283782959, + "loss_ce": 0.28696349263191223, + "loss_iou": 0.796875, + "loss_num": 0.08837890625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 8098064, + "step": 145 + }, + { + "epoch": 0.32516703786191536, + "grad_norm": 28.506025314331055, + "learning_rate": 1e-06, + "loss": 2.534, + "num_input_tokens_seen": 8153264, + "step": 146 + }, + { + "epoch": 0.32516703786191536, + "loss": 2.474531650543213, + "loss_ce": 0.3632035255432129, + "loss_iou": 0.7421875, + "loss_num": 0.1259765625, + "loss_xval": 2.109375, + "num_input_tokens_seen": 8153264, + "step": 146 + }, + { + "epoch": 0.3273942093541203, + "grad_norm": 55.345096588134766, + "learning_rate": 1e-06, + "loss": 2.5464, + "num_input_tokens_seen": 8209784, + "step": 147 + }, + { + "epoch": 0.3273942093541203, + "loss": 2.5160865783691406, + "loss_ce": 0.48581331968307495, + "loss_iou": 0.7578125, + "loss_num": 0.10205078125, + "loss_xval": 2.03125, + "num_input_tokens_seen": 8209784, + "step": 147 + }, + { + "epoch": 0.32962138084632514, + "grad_norm": 44.24598693847656, + "learning_rate": 1e-06, + "loss": 2.7815, + "num_input_tokens_seen": 8262396, + "step": 148 + }, + { + "epoch": 0.32962138084632514, + "loss": 2.644756317138672, + "loss_ce": 0.3185845911502838, + "loss_iou": 0.88671875, + "loss_num": 0.11083984375, + "loss_xval": 2.328125, + "num_input_tokens_seen": 8262396, + "step": 148 + }, + { + "epoch": 0.33184855233853006, + "grad_norm": 39.58821105957031, + "learning_rate": 1e-06, + "loss": 2.3255, + "num_input_tokens_seen": 8316924, + "step": 149 + }, + { + "epoch": 0.33184855233853006, + "loss": 2.2807397842407227, + "loss_ce": 0.3002711534500122, + "loss_iou": 0.73046875, + "loss_num": 0.103515625, + "loss_xval": 1.984375, + "num_input_tokens_seen": 8316924, + "step": 149 + }, + { + "epoch": 0.33407572383073497, + "grad_norm": 30.283483505249023, + "learning_rate": 1e-06, + "loss": 2.3857, + "num_input_tokens_seen": 8372780, + "step": 150 + }, + { + "epoch": 0.33407572383073497, + "loss": 2.6044678688049316, + "loss_ce": 0.25729984045028687, + "loss_iou": 0.84375, + "loss_num": 0.1318359375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 8372780, + "step": 150 + }, + { + "epoch": 0.3363028953229399, + "grad_norm": 30.648853302001953, + "learning_rate": 1e-06, + "loss": 2.5253, + "num_input_tokens_seen": 8426544, + "step": 151 + }, + { + "epoch": 0.3363028953229399, + "loss": 2.3786497116088867, + "loss_ce": 0.2819698452949524, + "loss_iou": 0.7734375, + "loss_num": 0.10986328125, + "loss_xval": 2.09375, + "num_input_tokens_seen": 8426544, + "step": 151 + }, + { + "epoch": 0.33853006681514475, + "grad_norm": 41.66792297363281, + "learning_rate": 1e-06, + "loss": 2.3784, + "num_input_tokens_seen": 8483968, + "step": 152 + }, + { + "epoch": 0.33853006681514475, + "loss": 2.490370273590088, + "loss_ce": 0.3272841274738312, + "loss_iou": 0.79296875, + "loss_num": 0.115234375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 8483968, + "step": 152 + }, + { + "epoch": 0.34075723830734966, + "grad_norm": 39.65934753417969, + "learning_rate": 1e-06, + "loss": 2.9073, + "num_input_tokens_seen": 8537416, + "step": 153 + }, + { + "epoch": 0.34075723830734966, + "loss": 3.037890911102295, + "loss_ce": 0.40898463129997253, + "loss_iou": 1.03125, + "loss_num": 0.11328125, + "loss_xval": 2.625, + "num_input_tokens_seen": 8537416, + "step": 153 + }, + { + "epoch": 0.3429844097995546, + "grad_norm": 43.49128341674805, + "learning_rate": 1e-06, + "loss": 2.6995, + "num_input_tokens_seen": 8592392, + "step": 154 + }, + { + "epoch": 0.3429844097995546, + "loss": 2.778602123260498, + "loss_ce": 0.24051626026630402, + "loss_iou": 0.8984375, + "loss_num": 0.146484375, + "loss_xval": 2.53125, + "num_input_tokens_seen": 8592392, + "step": 154 + }, + { + "epoch": 0.34521158129175944, + "grad_norm": 158.33724975585938, + "learning_rate": 1e-06, + "loss": 2.5526, + "num_input_tokens_seen": 8647228, + "step": 155 + }, + { + "epoch": 0.34521158129175944, + "loss": 2.642277717590332, + "loss_ce": 0.29755133390426636, + "loss_iou": 0.89453125, + "loss_num": 0.11083984375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 8647228, + "step": 155 + }, + { + "epoch": 0.34743875278396436, + "grad_norm": 173.70913696289062, + "learning_rate": 1e-06, + "loss": 2.8128, + "num_input_tokens_seen": 8701864, + "step": 156 + }, + { + "epoch": 0.34743875278396436, + "loss": 2.8622255325317383, + "loss_ce": 0.30753791332244873, + "loss_iou": 0.921875, + "loss_num": 0.1416015625, + "loss_xval": 2.5625, + "num_input_tokens_seen": 8701864, + "step": 156 + }, + { + "epoch": 0.34966592427616927, + "grad_norm": 410.6438903808594, + "learning_rate": 1e-06, + "loss": 2.4924, + "num_input_tokens_seen": 8759524, + "step": 157 + }, + { + "epoch": 0.34966592427616927, + "loss": 2.3690528869628906, + "loss_ce": 0.3465919494628906, + "loss_iou": 0.7578125, + "loss_num": 0.1025390625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 8759524, + "step": 157 + }, + { + "epoch": 0.3518930957683742, + "grad_norm": 40.04865646362305, + "learning_rate": 1e-06, + "loss": 2.43, + "num_input_tokens_seen": 8814804, + "step": 158 + }, + { + "epoch": 0.3518930957683742, + "loss": 2.7295665740966797, + "loss_ce": 0.2725353240966797, + "loss_iou": 0.94921875, + "loss_num": 0.111328125, + "loss_xval": 2.453125, + "num_input_tokens_seen": 8814804, + "step": 158 + }, + { + "epoch": 0.35412026726057905, + "grad_norm": 62.50952911376953, + "learning_rate": 1e-06, + "loss": 2.4288, + "num_input_tokens_seen": 8870392, + "step": 159 + }, + { + "epoch": 0.35412026726057905, + "loss": 2.16571307182312, + "loss_ce": 0.24676772952079773, + "loss_iou": 0.7734375, + "loss_num": 0.07568359375, + "loss_xval": 1.921875, + "num_input_tokens_seen": 8870392, + "step": 159 + }, + { + "epoch": 0.35634743875278396, + "grad_norm": 28.229555130004883, + "learning_rate": 1e-06, + "loss": 2.3698, + "num_input_tokens_seen": 8927972, + "step": 160 + }, + { + "epoch": 0.35634743875278396, + "loss": 2.608736753463745, + "loss_ce": 0.24057263135910034, + "loss_iou": 0.8828125, + "loss_num": 0.12060546875, + "loss_xval": 2.375, + "num_input_tokens_seen": 8927972, + "step": 160 + }, + { + "epoch": 0.3585746102449889, + "grad_norm": 21.421159744262695, + "learning_rate": 1e-06, + "loss": 2.3587, + "num_input_tokens_seen": 8983156, + "step": 161 + }, + { + "epoch": 0.3585746102449889, + "loss": 1.9863007068634033, + "loss_ce": 0.13766781985759735, + "loss_iou": 0.75390625, + "loss_num": 0.0673828125, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 8983156, + "step": 161 + }, + { + "epoch": 0.36080178173719374, + "grad_norm": 23.95926284790039, + "learning_rate": 1e-06, + "loss": 2.4225, + "num_input_tokens_seen": 9038652, + "step": 162 + }, + { + "epoch": 0.36080178173719374, + "loss": 2.301337480545044, + "loss_ce": 0.2510446012020111, + "loss_iou": 0.7734375, + "loss_num": 0.10009765625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 9038652, + "step": 162 + }, + { + "epoch": 0.36302895322939865, + "grad_norm": 47.72632598876953, + "learning_rate": 1e-06, + "loss": 2.5188, + "num_input_tokens_seen": 9096844, + "step": 163 + }, + { + "epoch": 0.36302895322939865, + "loss": 2.3073041439056396, + "loss_ce": 0.20134715735912323, + "loss_iou": 0.75, + "loss_num": 0.12060546875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 9096844, + "step": 163 + }, + { + "epoch": 0.36525612472160357, + "grad_norm": 75.89527893066406, + "learning_rate": 1e-06, + "loss": 2.5799, + "num_input_tokens_seen": 9151260, + "step": 164 + }, + { + "epoch": 0.36525612472160357, + "loss": 2.6282100677490234, + "loss_ce": 0.3332880735397339, + "loss_iou": 0.87890625, + "loss_num": 0.10693359375, + "loss_xval": 2.296875, + "num_input_tokens_seen": 9151260, + "step": 164 + }, + { + "epoch": 0.3674832962138085, + "grad_norm": 44.563873291015625, + "learning_rate": 1e-06, + "loss": 2.5729, + "num_input_tokens_seen": 9206156, + "step": 165 + }, + { + "epoch": 0.3674832962138085, + "loss": 2.5874228477478027, + "loss_ce": 0.30128994584083557, + "loss_iou": 0.90625, + "loss_num": 0.0947265625, + "loss_xval": 2.28125, + "num_input_tokens_seen": 9206156, + "step": 165 + }, + { + "epoch": 0.36971046770601335, + "grad_norm": 57.38001251220703, + "learning_rate": 1e-06, + "loss": 2.588, + "num_input_tokens_seen": 9259864, + "step": 166 + }, + { + "epoch": 0.36971046770601335, + "loss": 2.448063373565674, + "loss_ce": 0.2586103677749634, + "loss_iou": 0.875, + "loss_num": 0.08740234375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 9259864, + "step": 166 + }, + { + "epoch": 0.37193763919821826, + "grad_norm": 53.09568405151367, + "learning_rate": 1e-06, + "loss": 2.5351, + "num_input_tokens_seen": 9317272, + "step": 167 + }, + { + "epoch": 0.37193763919821826, + "loss": 2.441850185394287, + "loss_ce": 0.3842327892780304, + "loss_iou": 0.7890625, + "loss_num": 0.0966796875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 9317272, + "step": 167 + }, + { + "epoch": 0.3741648106904232, + "grad_norm": 36.94442367553711, + "learning_rate": 1e-06, + "loss": 2.1125, + "num_input_tokens_seen": 9372720, + "step": 168 + }, + { + "epoch": 0.3741648106904232, + "loss": 2.203955888748169, + "loss_ce": 0.25278398394584656, + "loss_iou": 0.74609375, + "loss_num": 0.091796875, + "loss_xval": 1.953125, + "num_input_tokens_seen": 9372720, + "step": 168 + }, + { + "epoch": 0.37639198218262804, + "grad_norm": 61.23322296142578, + "learning_rate": 1e-06, + "loss": 2.5849, + "num_input_tokens_seen": 9429244, + "step": 169 + }, + { + "epoch": 0.37639198218262804, + "loss": 2.3757333755493164, + "loss_ce": 0.22045986354351044, + "loss_iou": 0.81640625, + "loss_num": 0.1044921875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 9429244, + "step": 169 + }, + { + "epoch": 0.37861915367483295, + "grad_norm": 44.59577178955078, + "learning_rate": 1e-06, + "loss": 2.3184, + "num_input_tokens_seen": 9485716, + "step": 170 + }, + { + "epoch": 0.37861915367483295, + "loss": 2.2477307319641113, + "loss_ce": 0.21745747327804565, + "loss_iou": 0.83203125, + "loss_num": 0.0732421875, + "loss_xval": 2.03125, + "num_input_tokens_seen": 9485716, + "step": 170 + }, + { + "epoch": 0.38084632516703787, + "grad_norm": 26.56584358215332, + "learning_rate": 1e-06, + "loss": 2.3085, + "num_input_tokens_seen": 9542880, + "step": 171 + }, + { + "epoch": 0.38084632516703787, + "loss": 2.289217948913574, + "loss_ce": 0.17203053832054138, + "loss_iou": 0.859375, + "loss_num": 0.080078125, + "loss_xval": 2.125, + "num_input_tokens_seen": 9542880, + "step": 171 + }, + { + "epoch": 0.3830734966592428, + "grad_norm": 40.40160369873047, + "learning_rate": 1e-06, + "loss": 2.4104, + "num_input_tokens_seen": 9599024, + "step": 172 + }, + { + "epoch": 0.3830734966592428, + "loss": 2.4211387634277344, + "loss_ce": 0.155513733625412, + "loss_iou": 0.84765625, + "loss_num": 0.11376953125, + "loss_xval": 2.265625, + "num_input_tokens_seen": 9599024, + "step": 172 + }, + { + "epoch": 0.38530066815144765, + "grad_norm": 23.74785614013672, + "learning_rate": 1e-06, + "loss": 2.5009, + "num_input_tokens_seen": 9651488, + "step": 173 + }, + { + "epoch": 0.38530066815144765, + "loss": 2.5731005668640137, + "loss_ce": 0.2664598822593689, + "loss_iou": 0.890625, + "loss_num": 0.1044921875, + "loss_xval": 2.3125, + "num_input_tokens_seen": 9651488, + "step": 173 + }, + { + "epoch": 0.38752783964365256, + "grad_norm": 51.20659255981445, + "learning_rate": 1e-06, + "loss": 2.7359, + "num_input_tokens_seen": 9708124, + "step": 174 + }, + { + "epoch": 0.38752783964365256, + "loss": 2.863722324371338, + "loss_ce": 0.31489402055740356, + "loss_iou": 0.95703125, + "loss_num": 0.126953125, + "loss_xval": 2.546875, + "num_input_tokens_seen": 9708124, + "step": 174 + }, + { + "epoch": 0.3897550111358575, + "grad_norm": 18.966318130493164, + "learning_rate": 1e-06, + "loss": 2.3377, + "num_input_tokens_seen": 9766496, + "step": 175 + }, + { + "epoch": 0.3897550111358575, + "loss": 2.530661106109619, + "loss_ce": 0.2992156147956848, + "loss_iou": 0.8515625, + "loss_num": 0.10546875, + "loss_xval": 2.234375, + "num_input_tokens_seen": 9766496, + "step": 175 + }, + { + "epoch": 0.39198218262806234, + "grad_norm": 43.686466217041016, + "learning_rate": 1e-06, + "loss": 2.5387, + "num_input_tokens_seen": 9823256, + "step": 176 + }, + { + "epoch": 0.39198218262806234, + "loss": 2.393760919570923, + "loss_ce": 0.18672963976860046, + "loss_iou": 0.8203125, + "loss_num": 0.11279296875, + "loss_xval": 2.203125, + "num_input_tokens_seen": 9823256, + "step": 176 + }, + { + "epoch": 0.39420935412026725, + "grad_norm": 117.024658203125, + "learning_rate": 1e-06, + "loss": 2.5409, + "num_input_tokens_seen": 9881900, + "step": 177 + }, + { + "epoch": 0.39420935412026725, + "loss": 2.7967357635498047, + "loss_ce": 0.2508372664451599, + "loss_iou": 0.90234375, + "loss_num": 0.1474609375, + "loss_xval": 2.546875, + "num_input_tokens_seen": 9881900, + "step": 177 + }, + { + "epoch": 0.39643652561247217, + "grad_norm": 77.61784362792969, + "learning_rate": 1e-06, + "loss": 2.5997, + "num_input_tokens_seen": 9937420, + "step": 178 + }, + { + "epoch": 0.39643652561247217, + "loss": 2.7027268409729004, + "loss_ce": 0.18905505537986755, + "loss_iou": 0.9453125, + "loss_num": 0.125, + "loss_xval": 2.515625, + "num_input_tokens_seen": 9937420, + "step": 178 + }, + { + "epoch": 0.3986636971046771, + "grad_norm": 22.932493209838867, + "learning_rate": 1e-06, + "loss": 2.0248, + "num_input_tokens_seen": 9994316, + "step": 179 + }, + { + "epoch": 0.3986636971046771, + "loss": 2.1930716037750244, + "loss_ce": 0.20479029417037964, + "loss_iou": 0.76953125, + "loss_num": 0.08984375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 9994316, + "step": 179 + }, + { + "epoch": 0.40089086859688194, + "grad_norm": 61.33550262451172, + "learning_rate": 1e-06, + "loss": 2.55, + "num_input_tokens_seen": 10046516, + "step": 180 + }, + { + "epoch": 0.40089086859688194, + "loss": 2.5461134910583496, + "loss_ce": 0.25509777665138245, + "loss_iou": 0.91796875, + "loss_num": 0.0908203125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 10046516, + "step": 180 + }, + { + "epoch": 0.40311804008908686, + "grad_norm": 29.507160186767578, + "learning_rate": 1e-06, + "loss": 2.4409, + "num_input_tokens_seen": 10103372, + "step": 181 + }, + { + "epoch": 0.40311804008908686, + "loss": 2.6029810905456543, + "loss_ce": 0.23091065883636475, + "loss_iou": 0.8984375, + "loss_num": 0.11572265625, + "loss_xval": 2.375, + "num_input_tokens_seen": 10103372, + "step": 181 + }, + { + "epoch": 0.4053452115812918, + "grad_norm": 90.82682037353516, + "learning_rate": 1e-06, + "loss": 2.5212, + "num_input_tokens_seen": 10159204, + "step": 182 + }, + { + "epoch": 0.4053452115812918, + "loss": 2.695434331893921, + "loss_ce": 0.3145750164985657, + "loss_iou": 0.8828125, + "loss_num": 0.123046875, + "loss_xval": 2.375, + "num_input_tokens_seen": 10159204, + "step": 182 + }, + { + "epoch": 0.40757238307349664, + "grad_norm": 27.87862205505371, + "learning_rate": 1e-06, + "loss": 2.2165, + "num_input_tokens_seen": 10214768, + "step": 183 + }, + { + "epoch": 0.40757238307349664, + "loss": 2.1518983840942383, + "loss_ce": 0.1807069331407547, + "loss_iou": 0.73828125, + "loss_num": 0.09912109375, + "loss_xval": 1.96875, + "num_input_tokens_seen": 10214768, + "step": 183 + }, + { + "epoch": 0.40979955456570155, + "grad_norm": 72.37561798095703, + "learning_rate": 1e-06, + "loss": 2.2339, + "num_input_tokens_seen": 10270868, + "step": 184 + }, + { + "epoch": 0.40979955456570155, + "loss": 2.1870460510253906, + "loss_ce": 0.20511233806610107, + "loss_iou": 0.7734375, + "loss_num": 0.08642578125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 10270868, + "step": 184 + }, + { + "epoch": 0.41202672605790647, + "grad_norm": 22.659595489501953, + "learning_rate": 1e-06, + "loss": 2.4928, + "num_input_tokens_seen": 10322388, + "step": 185 + }, + { + "epoch": 0.41202672605790647, + "loss": 2.4523706436157227, + "loss_ce": 0.2468043565750122, + "loss_iou": 0.83984375, + "loss_num": 0.1044921875, + "loss_xval": 2.203125, + "num_input_tokens_seen": 10322388, + "step": 185 + }, + { + "epoch": 0.4142538975501114, + "grad_norm": 27.231279373168945, + "learning_rate": 1e-06, + "loss": 2.2191, + "num_input_tokens_seen": 10379116, + "step": 186 + }, + { + "epoch": 0.4142538975501114, + "loss": 2.0813474655151367, + "loss_ce": 0.14189457893371582, + "loss_iou": 0.765625, + "loss_num": 0.08203125, + "loss_xval": 1.9375, + "num_input_tokens_seen": 10379116, + "step": 186 + }, + { + "epoch": 0.41648106904231624, + "grad_norm": 20.43091583251953, + "learning_rate": 1e-06, + "loss": 2.2453, + "num_input_tokens_seen": 10434552, + "step": 187 + }, + { + "epoch": 0.41648106904231624, + "loss": 2.5419094562530518, + "loss_ce": 0.25821810960769653, + "loss_iou": 0.796875, + "loss_num": 0.138671875, + "loss_xval": 2.28125, + "num_input_tokens_seen": 10434552, + "step": 187 + }, + { + "epoch": 0.41870824053452116, + "grad_norm": 25.056270599365234, + "learning_rate": 1e-06, + "loss": 2.2543, + "num_input_tokens_seen": 10493740, + "step": 188 + }, + { + "epoch": 0.41870824053452116, + "loss": 2.3587117195129395, + "loss_ce": 0.16730527579784393, + "loss_iou": 0.8203125, + "loss_num": 0.109375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 10493740, + "step": 188 + }, + { + "epoch": 0.4209354120267261, + "grad_norm": 26.688255310058594, + "learning_rate": 1e-06, + "loss": 2.2537, + "num_input_tokens_seen": 10548228, + "step": 189 + }, + { + "epoch": 0.4209354120267261, + "loss": 2.1556687355041504, + "loss_ce": 0.173246830701828, + "loss_iou": 0.7890625, + "loss_num": 0.08203125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 10548228, + "step": 189 + }, + { + "epoch": 0.42316258351893093, + "grad_norm": 56.197242736816406, + "learning_rate": 1e-06, + "loss": 2.0274, + "num_input_tokens_seen": 10604476, + "step": 190 + }, + { + "epoch": 0.42316258351893093, + "loss": 2.1501314640045166, + "loss_ce": 0.188217431306839, + "loss_iou": 0.734375, + "loss_num": 0.09716796875, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 10604476, + "step": 190 + }, + { + "epoch": 0.42538975501113585, + "grad_norm": 32.006675720214844, + "learning_rate": 1e-06, + "loss": 2.4196, + "num_input_tokens_seen": 10660088, + "step": 191 + }, + { + "epoch": 0.42538975501113585, + "loss": 2.4907290935516357, + "loss_ce": 0.21826806664466858, + "loss_iou": 0.859375, + "loss_num": 0.1103515625, + "loss_xval": 2.265625, + "num_input_tokens_seen": 10660088, + "step": 191 + }, + { + "epoch": 0.42761692650334077, + "grad_norm": 23.622976303100586, + "learning_rate": 1e-06, + "loss": 1.8407, + "num_input_tokens_seen": 10719304, + "step": 192 + }, + { + "epoch": 0.42761692650334077, + "loss": 1.853776454925537, + "loss_ce": 0.0735030248761177, + "loss_iou": 0.76171875, + "loss_num": 0.05078125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 10719304, + "step": 192 + }, + { + "epoch": 0.4298440979955457, + "grad_norm": 132.57778930664062, + "learning_rate": 1e-06, + "loss": 1.9273, + "num_input_tokens_seen": 10776824, + "step": 193 + }, + { + "epoch": 0.4298440979955457, + "loss": 2.2028584480285645, + "loss_ce": 0.19748730957508087, + "loss_iou": 0.74609375, + "loss_num": 0.10302734375, + "loss_xval": 2.0, + "num_input_tokens_seen": 10776824, + "step": 193 + }, + { + "epoch": 0.43207126948775054, + "grad_norm": 78.81751251220703, + "learning_rate": 1e-06, + "loss": 2.282, + "num_input_tokens_seen": 10832888, + "step": 194 + }, + { + "epoch": 0.43207126948775054, + "loss": 2.0263118743896484, + "loss_ce": 0.08978863805532455, + "loss_iou": 0.78515625, + "loss_num": 0.0732421875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 10832888, + "step": 194 + }, + { + "epoch": 0.43429844097995546, + "grad_norm": 33.7293586730957, + "learning_rate": 1e-06, + "loss": 1.9715, + "num_input_tokens_seen": 10889816, + "step": 195 + }, + { + "epoch": 0.43429844097995546, + "loss": 1.9463962316513062, + "loss_ce": 0.15049774944782257, + "loss_iou": 0.71484375, + "loss_num": 0.07373046875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 10889816, + "step": 195 + }, + { + "epoch": 0.4365256124721604, + "grad_norm": 42.51150131225586, + "learning_rate": 1e-06, + "loss": 2.2224, + "num_input_tokens_seen": 10947644, + "step": 196 + }, + { + "epoch": 0.4365256124721604, + "loss": 2.052577495574951, + "loss_ce": 0.1531633585691452, + "loss_iou": 0.7734375, + "loss_num": 0.0703125, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 10947644, + "step": 196 + }, + { + "epoch": 0.43875278396436523, + "grad_norm": 21.027219772338867, + "learning_rate": 1e-06, + "loss": 2.0492, + "num_input_tokens_seen": 11004624, + "step": 197 + }, + { + "epoch": 0.43875278396436523, + "loss": 2.039621353149414, + "loss_ce": 0.1343478262424469, + "loss_iou": 0.77734375, + "loss_num": 0.0703125, + "loss_xval": 1.90625, + "num_input_tokens_seen": 11004624, + "step": 197 + }, + { + "epoch": 0.44097995545657015, + "grad_norm": 30.52896499633789, + "learning_rate": 1e-06, + "loss": 2.0472, + "num_input_tokens_seen": 11060372, + "step": 198 + }, + { + "epoch": 0.44097995545657015, + "loss": 2.0091304779052734, + "loss_ce": 0.12045860290527344, + "loss_iou": 0.76953125, + "loss_num": 0.0703125, + "loss_xval": 1.890625, + "num_input_tokens_seen": 11060372, + "step": 198 + }, + { + "epoch": 0.44320712694877507, + "grad_norm": 40.47418975830078, + "learning_rate": 1e-06, + "loss": 2.1817, + "num_input_tokens_seen": 11117700, + "step": 199 + }, + { + "epoch": 0.44320712694877507, + "loss": 2.1524715423583984, + "loss_ce": 0.1846981942653656, + "loss_iou": 0.75, + "loss_num": 0.09375, + "loss_xval": 1.96875, + "num_input_tokens_seen": 11117700, + "step": 199 + }, + { + "epoch": 0.44543429844098, + "grad_norm": 42.97262191772461, + "learning_rate": 1e-06, + "loss": 2.1472, + "num_input_tokens_seen": 11176308, + "step": 200 + }, + { + "epoch": 0.44543429844098, + "loss": 2.278196096420288, + "loss_ce": 0.12389924377202988, + "loss_iou": 0.8359375, + "loss_num": 0.09716796875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 11176308, + "step": 200 + }, + { + "epoch": 0.44766146993318484, + "grad_norm": 46.24988555908203, + "learning_rate": 1e-06, + "loss": 2.2836, + "num_input_tokens_seen": 11232984, + "step": 201 + }, + { + "epoch": 0.44766146993318484, + "loss": 2.1692562103271484, + "loss_ce": 0.16632673144340515, + "loss_iou": 0.76953125, + "loss_num": 0.09375, + "loss_xval": 2.0, + "num_input_tokens_seen": 11232984, + "step": 201 + }, + { + "epoch": 0.44988864142538976, + "grad_norm": 19.978219985961914, + "learning_rate": 1e-06, + "loss": 1.9362, + "num_input_tokens_seen": 11289736, + "step": 202 + }, + { + "epoch": 0.44988864142538976, + "loss": 2.002622127532959, + "loss_ce": 0.14520032703876495, + "loss_iou": 0.75, + "loss_num": 0.07080078125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 11289736, + "step": 202 + }, + { + "epoch": 0.4521158129175947, + "grad_norm": 40.11283493041992, + "learning_rate": 1e-06, + "loss": 2.0214, + "num_input_tokens_seen": 11347688, + "step": 203 + }, + { + "epoch": 0.4521158129175947, + "loss": 2.089003801345825, + "loss_ce": 0.12367182970046997, + "loss_iou": 0.76171875, + "loss_num": 0.087890625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 11347688, + "step": 203 + }, + { + "epoch": 0.45434298440979953, + "grad_norm": 45.47671890258789, + "learning_rate": 1e-06, + "loss": 1.7514, + "num_input_tokens_seen": 11405500, + "step": 204 + }, + { + "epoch": 0.45434298440979953, + "loss": 1.679763913154602, + "loss_ce": 0.08992011845111847, + "loss_iou": 0.6328125, + "loss_num": 0.06494140625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 11405500, + "step": 204 + }, + { + "epoch": 0.45657015590200445, + "grad_norm": 37.67365646362305, + "learning_rate": 1e-06, + "loss": 2.1156, + "num_input_tokens_seen": 11463076, + "step": 205 + }, + { + "epoch": 0.45657015590200445, + "loss": 1.912656545639038, + "loss_ce": 0.1499611884355545, + "loss_iou": 0.63671875, + "loss_num": 0.09814453125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 11463076, + "step": 205 + }, + { + "epoch": 0.45879732739420936, + "grad_norm": 22.140178680419922, + "learning_rate": 1e-06, + "loss": 2.2171, + "num_input_tokens_seen": 11517904, + "step": 206 + }, + { + "epoch": 0.45879732739420936, + "loss": 2.3471362590789795, + "loss_ce": 0.13815191388130188, + "loss_iou": 0.875, + "loss_num": 0.0908203125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 11517904, + "step": 206 + }, + { + "epoch": 0.4610244988864143, + "grad_norm": 42.47764205932617, + "learning_rate": 1e-06, + "loss": 1.9804, + "num_input_tokens_seen": 11572968, + "step": 207 + }, + { + "epoch": 0.4610244988864143, + "loss": 2.3084888458251953, + "loss_ce": 0.2538011372089386, + "loss_iou": 0.796875, + "loss_num": 0.09228515625, + "loss_xval": 2.0625, + "num_input_tokens_seen": 11572968, + "step": 207 + }, + { + "epoch": 0.46325167037861914, + "grad_norm": 32.766109466552734, + "learning_rate": 1e-06, + "loss": 1.8334, + "num_input_tokens_seen": 11629228, + "step": 208 + }, + { + "epoch": 0.46325167037861914, + "loss": 1.8067898750305176, + "loss_ce": 0.10049097239971161, + "loss_iou": 0.67578125, + "loss_num": 0.07177734375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 11629228, + "step": 208 + }, + { + "epoch": 0.46547884187082406, + "grad_norm": 21.969463348388672, + "learning_rate": 1e-06, + "loss": 1.7864, + "num_input_tokens_seen": 11687208, + "step": 209 + }, + { + "epoch": 0.46547884187082406, + "loss": 1.507708191871643, + "loss_ce": 0.09462223947048187, + "loss_iou": 0.56640625, + "loss_num": 0.055419921875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 11687208, + "step": 209 + }, + { + "epoch": 0.46770601336302897, + "grad_norm": 66.28128814697266, + "learning_rate": 1e-06, + "loss": 2.1212, + "num_input_tokens_seen": 11744372, + "step": 210 + }, + { + "epoch": 0.46770601336302897, + "loss": 1.94266939163208, + "loss_ce": 0.18949554860591888, + "loss_iou": 0.66015625, + "loss_num": 0.0869140625, + "loss_xval": 1.75, + "num_input_tokens_seen": 11744372, + "step": 210 + }, + { + "epoch": 0.46993318485523383, + "grad_norm": 27.227609634399414, + "learning_rate": 1e-06, + "loss": 2.1373, + "num_input_tokens_seen": 11800196, + "step": 211 + }, + { + "epoch": 0.46993318485523383, + "loss": 2.1795947551727295, + "loss_ce": 0.12490727007389069, + "loss_iou": 0.79296875, + "loss_num": 0.09375, + "loss_xval": 2.0625, + "num_input_tokens_seen": 11800196, + "step": 211 + }, + { + "epoch": 0.47216035634743875, + "grad_norm": 27.855268478393555, + "learning_rate": 1e-06, + "loss": 2.2184, + "num_input_tokens_seen": 11857088, + "step": 212 + }, + { + "epoch": 0.47216035634743875, + "loss": 2.045684337615967, + "loss_ce": 0.13650476932525635, + "loss_iou": 0.765625, + "loss_num": 0.076171875, + "loss_xval": 1.90625, + "num_input_tokens_seen": 11857088, + "step": 212 + }, + { + "epoch": 0.47438752783964366, + "grad_norm": 18.745811462402344, + "learning_rate": 1e-06, + "loss": 1.8398, + "num_input_tokens_seen": 11908736, + "step": 213 + }, + { + "epoch": 0.47438752783964366, + "loss": 1.898270845413208, + "loss_ce": 0.1253216713666916, + "loss_iou": 0.71875, + "loss_num": 0.06689453125, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 11908736, + "step": 213 + }, + { + "epoch": 0.4766146993318486, + "grad_norm": 50.83546447753906, + "learning_rate": 1e-06, + "loss": 1.8401, + "num_input_tokens_seen": 11963016, + "step": 214 + }, + { + "epoch": 0.4766146993318486, + "loss": 1.990050196647644, + "loss_ce": 0.1697378158569336, + "loss_iou": 0.671875, + "loss_num": 0.0947265625, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 11963016, + "step": 214 + }, + { + "epoch": 0.47884187082405344, + "grad_norm": 15.607227325439453, + "learning_rate": 1e-06, + "loss": 2.0483, + "num_input_tokens_seen": 12019844, + "step": 215 + }, + { + "epoch": 0.47884187082405344, + "loss": 1.621078610420227, + "loss_ce": 0.10240183770656586, + "loss_iou": 0.6328125, + "loss_num": 0.05078125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 12019844, + "step": 215 + }, + { + "epoch": 0.48106904231625836, + "grad_norm": 561.2518920898438, + "learning_rate": 1e-06, + "loss": 2.0352, + "num_input_tokens_seen": 12079540, + "step": 216 + }, + { + "epoch": 0.48106904231625836, + "loss": 2.1394166946411133, + "loss_ce": 0.18922138214111328, + "loss_iou": 0.71484375, + "loss_num": 0.1044921875, + "loss_xval": 1.953125, + "num_input_tokens_seen": 12079540, + "step": 216 + }, + { + "epoch": 0.48329621380846327, + "grad_norm": 75.96478271484375, + "learning_rate": 1e-06, + "loss": 1.9004, + "num_input_tokens_seen": 12136556, + "step": 217 + }, + { + "epoch": 0.48329621380846327, + "loss": 2.064810276031494, + "loss_ce": 0.13121652603149414, + "loss_iou": 0.72265625, + "loss_num": 0.09765625, + "loss_xval": 1.9375, + "num_input_tokens_seen": 12136556, + "step": 217 + }, + { + "epoch": 0.48552338530066813, + "grad_norm": 21.790206909179688, + "learning_rate": 1e-06, + "loss": 1.9754, + "num_input_tokens_seen": 12190060, + "step": 218 + }, + { + "epoch": 0.48552338530066813, + "loss": 2.044926404953003, + "loss_ce": 0.13574674725532532, + "loss_iou": 0.70703125, + "loss_num": 0.09912109375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 12190060, + "step": 218 + }, + { + "epoch": 0.48775055679287305, + "grad_norm": 127.96965026855469, + "learning_rate": 1e-06, + "loss": 2.0309, + "num_input_tokens_seen": 12247900, + "step": 219 + }, + { + "epoch": 0.48775055679287305, + "loss": 2.0088348388671875, + "loss_ce": 0.12211604416370392, + "loss_iou": 0.74609375, + "loss_num": 0.0791015625, + "loss_xval": 1.890625, + "num_input_tokens_seen": 12247900, + "step": 219 + }, + { + "epoch": 0.48997772828507796, + "grad_norm": 37.08074951171875, + "learning_rate": 1e-06, + "loss": 1.8653, + "num_input_tokens_seen": 12303552, + "step": 220 + }, + { + "epoch": 0.48997772828507796, + "loss": 1.8836941719055176, + "loss_ce": 0.10244403779506683, + "loss_iou": 0.72265625, + "loss_num": 0.0673828125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 12303552, + "step": 220 + }, + { + "epoch": 0.4922048997772829, + "grad_norm": 32.377262115478516, + "learning_rate": 1e-06, + "loss": 1.8778, + "num_input_tokens_seen": 12359432, + "step": 221 + }, + { + "epoch": 0.4922048997772829, + "loss": 1.9408328533172607, + "loss_ce": 0.10782508552074432, + "loss_iou": 0.7421875, + "loss_num": 0.06884765625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 12359432, + "step": 221 + }, + { + "epoch": 0.49443207126948774, + "grad_norm": 45.29218673706055, + "learning_rate": 1e-06, + "loss": 2.1275, + "num_input_tokens_seen": 12415808, + "step": 222 + }, + { + "epoch": 0.49443207126948774, + "loss": 2.1390140056610107, + "loss_ce": 0.1058109700679779, + "loss_iou": 0.77734375, + "loss_num": 0.09619140625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 12415808, + "step": 222 + }, + { + "epoch": 0.49665924276169265, + "grad_norm": 23.249635696411133, + "learning_rate": 1e-06, + "loss": 2.2482, + "num_input_tokens_seen": 12468328, + "step": 223 + }, + { + "epoch": 0.49665924276169265, + "loss": 2.2763051986694336, + "loss_ce": 0.13763317465782166, + "loss_iou": 0.8125, + "loss_num": 0.10302734375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 12468328, + "step": 223 + }, + { + "epoch": 0.49888641425389757, + "grad_norm": 26.10118293762207, + "learning_rate": 1e-06, + "loss": 1.745, + "num_input_tokens_seen": 12526672, + "step": 224 + }, + { + "epoch": 0.49888641425389757, + "loss": 1.4877617359161377, + "loss_ce": 0.06539853662252426, + "loss_iou": 0.57421875, + "loss_num": 0.055419921875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 12526672, + "step": 224 + }, + { + "epoch": 0.5011135857461024, + "grad_norm": 28.74011993408203, + "learning_rate": 1e-06, + "loss": 1.8918, + "num_input_tokens_seen": 12582496, + "step": 225 + }, + { + "epoch": 0.5011135857461024, + "loss": 2.0152716636657715, + "loss_ce": 0.0636114627122879, + "loss_iou": 0.77734375, + "loss_num": 0.0791015625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 12582496, + "step": 225 + }, + { + "epoch": 0.5033407572383074, + "grad_norm": 39.25506591796875, + "learning_rate": 1e-06, + "loss": 2.072, + "num_input_tokens_seen": 12637180, + "step": 226 + }, + { + "epoch": 0.5033407572383074, + "loss": 2.0042176246643066, + "loss_ce": 0.0706239864230156, + "loss_iou": 0.76171875, + "loss_num": 0.08154296875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 12637180, + "step": 226 + }, + { + "epoch": 0.5055679287305123, + "grad_norm": 58.19468307495117, + "learning_rate": 1e-06, + "loss": 1.9587, + "num_input_tokens_seen": 12696256, + "step": 227 + }, + { + "epoch": 0.5055679287305123, + "loss": 2.141087532043457, + "loss_ce": 0.10886099934577942, + "loss_iou": 0.80078125, + "loss_num": 0.08642578125, + "loss_xval": 2.03125, + "num_input_tokens_seen": 12696256, + "step": 227 + }, + { + "epoch": 0.5077951002227171, + "grad_norm": 26.209997177124023, + "learning_rate": 1e-06, + "loss": 2.0101, + "num_input_tokens_seen": 12751124, + "step": 228 + }, + { + "epoch": 0.5077951002227171, + "loss": 1.975238561630249, + "loss_ce": 0.10316816717386246, + "loss_iou": 0.76953125, + "loss_num": 0.06689453125, + "loss_xval": 1.875, + "num_input_tokens_seen": 12751124, + "step": 228 + }, + { + "epoch": 0.5100222717149221, + "grad_norm": 29.183605194091797, + "learning_rate": 1e-06, + "loss": 1.8975, + "num_input_tokens_seen": 12807368, + "step": 229 + }, + { + "epoch": 0.5100222717149221, + "loss": 2.0033857822418213, + "loss_ce": 0.1225263923406601, + "loss_iou": 0.71875, + "loss_num": 0.08837890625, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 12807368, + "step": 229 + }, + { + "epoch": 0.512249443207127, + "grad_norm": 32.19590377807617, + "learning_rate": 1e-06, + "loss": 1.9127, + "num_input_tokens_seen": 12866044, + "step": 230 + }, + { + "epoch": 0.512249443207127, + "loss": 1.722662329673767, + "loss_ce": 0.10645144432783127, + "loss_iou": 0.63671875, + "loss_num": 0.068359375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 12866044, + "step": 230 + }, + { + "epoch": 0.5144766146993318, + "grad_norm": 88.9583740234375, + "learning_rate": 1e-06, + "loss": 1.8966, + "num_input_tokens_seen": 12919828, + "step": 231 + }, + { + "epoch": 0.5144766146993318, + "loss": 1.7427191734313965, + "loss_ce": 0.08256293088197708, + "loss_iou": 0.66796875, + "loss_num": 0.06494140625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 12919828, + "step": 231 + }, + { + "epoch": 0.5167037861915368, + "grad_norm": 38.6852912902832, + "learning_rate": 1e-06, + "loss": 1.8535, + "num_input_tokens_seen": 12976452, + "step": 232 + }, + { + "epoch": 0.5167037861915368, + "loss": 1.5481868982315063, + "loss_ce": 0.09603846073150635, + "loss_iou": 0.60546875, + "loss_num": 0.04833984375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 12976452, + "step": 232 + }, + { + "epoch": 0.5189309576837416, + "grad_norm": 40.1108512878418, + "learning_rate": 1e-06, + "loss": 1.7714, + "num_input_tokens_seen": 13032624, + "step": 233 + }, + { + "epoch": 0.5189309576837416, + "loss": 1.8302245140075684, + "loss_ce": 0.043115146458148956, + "loss_iou": 0.765625, + "loss_num": 0.05126953125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 13032624, + "step": 233 + }, + { + "epoch": 0.5211581291759465, + "grad_norm": 35.25422286987305, + "learning_rate": 1e-06, + "loss": 1.9139, + "num_input_tokens_seen": 13089524, + "step": 234 + }, + { + "epoch": 0.5211581291759465, + "loss": 2.1705551147460938, + "loss_ce": 0.11000814288854599, + "loss_iou": 0.8203125, + "loss_num": 0.083984375, + "loss_xval": 2.0625, + "num_input_tokens_seen": 13089524, + "step": 234 + }, + { + "epoch": 0.5233853006681515, + "grad_norm": 92.00837707519531, + "learning_rate": 1e-06, + "loss": 2.0319, + "num_input_tokens_seen": 13146916, + "step": 235 + }, + { + "epoch": 0.5233853006681515, + "loss": 1.8411858081817627, + "loss_ce": 0.04724044352769852, + "loss_iou": 0.7578125, + "loss_num": 0.05517578125, + "loss_xval": 1.796875, + "num_input_tokens_seen": 13146916, + "step": 235 + }, + { + "epoch": 0.5256124721603563, + "grad_norm": 61.55561065673828, + "learning_rate": 1e-06, + "loss": 1.938, + "num_input_tokens_seen": 13206136, + "step": 236 + }, + { + "epoch": 0.5256124721603563, + "loss": 1.859898328781128, + "loss_ce": 0.0937848836183548, + "loss_iou": 0.6875, + "loss_num": 0.078125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 13206136, + "step": 236 + }, + { + "epoch": 0.5278396436525612, + "grad_norm": 24.729263305664062, + "learning_rate": 1e-06, + "loss": 1.6522, + "num_input_tokens_seen": 13262240, + "step": 237 + }, + { + "epoch": 0.5278396436525612, + "loss": 1.7375231981277466, + "loss_ce": 0.05246463418006897, + "loss_iou": 0.671875, + "loss_num": 0.06884765625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 13262240, + "step": 237 + }, + { + "epoch": 0.5300668151447662, + "grad_norm": 46.568321228027344, + "learning_rate": 1e-06, + "loss": 1.8631, + "num_input_tokens_seen": 13317024, + "step": 238 + }, + { + "epoch": 0.5300668151447662, + "loss": 1.6511387825012207, + "loss_ce": 0.043716952204704285, + "loss_iou": 0.66796875, + "loss_num": 0.05517578125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 13317024, + "step": 238 + }, + { + "epoch": 0.532293986636971, + "grad_norm": 433.0378112792969, + "learning_rate": 1e-06, + "loss": 1.9798, + "num_input_tokens_seen": 13370596, + "step": 239 + }, + { + "epoch": 0.532293986636971, + "loss": 1.9186971187591553, + "loss_ce": 0.052486199885606766, + "loss_iou": 0.7421875, + "loss_num": 0.076171875, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 13370596, + "step": 239 + }, + { + "epoch": 0.534521158129176, + "grad_norm": 25.267301559448242, + "learning_rate": 1e-06, + "loss": 1.6908, + "num_input_tokens_seen": 13427652, + "step": 240 + }, + { + "epoch": 0.534521158129176, + "loss": 1.6441099643707275, + "loss_ce": 0.04157081991434097, + "loss_iou": 0.625, + "loss_num": 0.06982421875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 13427652, + "step": 240 + }, + { + "epoch": 0.5367483296213809, + "grad_norm": 23.9971981048584, + "learning_rate": 1e-06, + "loss": 1.944, + "num_input_tokens_seen": 13484652, + "step": 241 + }, + { + "epoch": 0.5367483296213809, + "loss": 1.9427827596664429, + "loss_ce": 0.0628998875617981, + "loss_iou": 0.78515625, + "loss_num": 0.061279296875, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 13484652, + "step": 241 + }, + { + "epoch": 0.5389755011135857, + "grad_norm": 28.519479751586914, + "learning_rate": 1e-06, + "loss": 1.7093, + "num_input_tokens_seen": 13539076, + "step": 242 + }, + { + "epoch": 0.5389755011135857, + "loss": 1.5857552289962769, + "loss_ce": 0.030091160908341408, + "loss_iou": 0.61328125, + "loss_num": 0.06494140625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 13539076, + "step": 242 + }, + { + "epoch": 0.5412026726057907, + "grad_norm": 25.626075744628906, + "learning_rate": 1e-06, + "loss": 1.8502, + "num_input_tokens_seen": 13596724, + "step": 243 + }, + { + "epoch": 0.5412026726057907, + "loss": 1.6758081912994385, + "loss_ce": 0.09328873455524445, + "loss_iou": 0.6640625, + "loss_num": 0.05029296875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 13596724, + "step": 243 + }, + { + "epoch": 0.5434298440979956, + "grad_norm": 41.99237823486328, + "learning_rate": 1e-06, + "loss": 1.6974, + "num_input_tokens_seen": 13652844, + "step": 244 + }, + { + "epoch": 0.5434298440979956, + "loss": 1.7878342866897583, + "loss_ce": 0.05052957311272621, + "loss_iou": 0.7421875, + "loss_num": 0.05029296875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 13652844, + "step": 244 + }, + { + "epoch": 0.5456570155902004, + "grad_norm": 36.3553352355957, + "learning_rate": 1e-06, + "loss": 1.9218, + "num_input_tokens_seen": 13707612, + "step": 245 + }, + { + "epoch": 0.5456570155902004, + "loss": 1.8159754276275635, + "loss_ce": 0.04546765610575676, + "loss_iou": 0.76171875, + "loss_num": 0.04931640625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 13707612, + "step": 245 + }, + { + "epoch": 0.5478841870824054, + "grad_norm": 25.488935470581055, + "learning_rate": 1e-06, + "loss": 1.7327, + "num_input_tokens_seen": 13764768, + "step": 246 + }, + { + "epoch": 0.5478841870824054, + "loss": 1.66471266746521, + "loss_ce": 0.040689267218112946, + "loss_iou": 0.65625, + "loss_num": 0.06298828125, + "loss_xval": 1.625, + "num_input_tokens_seen": 13764768, + "step": 246 + }, + { + "epoch": 0.5501113585746102, + "grad_norm": 43.198577880859375, + "learning_rate": 1e-06, + "loss": 2.2297, + "num_input_tokens_seen": 13817324, + "step": 247 + }, + { + "epoch": 0.5501113585746102, + "loss": 2.3575921058654785, + "loss_ce": 0.06559999287128448, + "loss_iou": 0.859375, + "loss_num": 0.115234375, + "loss_xval": 2.296875, + "num_input_tokens_seen": 13817324, + "step": 247 + }, + { + "epoch": 0.5523385300668151, + "grad_norm": 40.075462341308594, + "learning_rate": 1e-06, + "loss": 1.6199, + "num_input_tokens_seen": 13873804, + "step": 248 + }, + { + "epoch": 0.5523385300668151, + "loss": 1.5571107864379883, + "loss_ce": 0.028302079066634178, + "loss_iou": 0.61328125, + "loss_num": 0.060546875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 13873804, + "step": 248 + }, + { + "epoch": 0.5545657015590201, + "grad_norm": 36.94553756713867, + "learning_rate": 1e-06, + "loss": 1.4597, + "num_input_tokens_seen": 13932404, + "step": 249 + }, + { + "epoch": 0.5545657015590201, + "loss": 1.564407229423523, + "loss_ce": 0.05122363194823265, + "loss_iou": 0.64453125, + "loss_num": 0.044677734375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 13932404, + "step": 249 + }, + { + "epoch": 0.5567928730512249, + "grad_norm": 48.372737884521484, + "learning_rate": 1e-06, + "loss": 1.6582, + "num_input_tokens_seen": 13988996, + "step": 250 + }, + { + "epoch": 0.5567928730512249, + "eval_seeclick_web_CIoU": 0.45204322040081024, + "eval_seeclick_web_GIoU": 0.446009561419487, + "eval_seeclick_web_IoU": 0.48508621752262115, + "eval_seeclick_web_MAE_all": 0.015304200816899538, + "eval_seeclick_web_MAE_h": 0.01132917869836092, + "eval_seeclick_web_MAE_w": 0.01562582701444626, + "eval_seeclick_web_MAE_x_boxes": 0.010999062564224005, + "eval_seeclick_web_MAE_y_boxes": 0.02148408070206642, + "eval_seeclick_web_inside_bbox": 0.8263888955116272, + "eval_seeclick_web_loss": 1.2039486169815063, + "eval_seeclick_web_loss_ce": 0.0032457184279337525, + "eval_seeclick_web_loss_iou": 0.554443359375, + "eval_seeclick_web_loss_num": 0.013319015502929688, + "eval_seeclick_web_loss_xval": 1.176025390625, + "eval_seeclick_web_runtime": 17.9468, + "eval_seeclick_web_samples_per_second": 2.786, + "eval_seeclick_web_steps_per_second": 0.111, + "num_input_tokens_seen": 13988996, + "step": 250 + }, + { + "epoch": 0.5567928730512249, + "eval_icons_CIoU": 0.16195975244045258, + "eval_icons_GIoU": 0.18568327277898788, + "eval_icons_IoU": 0.2861369401216507, + "eval_icons_MAE_all": 0.05986557714641094, + "eval_icons_MAE_h": 0.0345042385160923, + "eval_icons_MAE_w": 0.07578632980585098, + "eval_icons_MAE_x_boxes": 0.06385871395468712, + "eval_icons_MAE_y_boxes": 0.02914611343294382, + "eval_icons_inside_bbox": 0.4322916716337204, + "eval_icons_loss": 1.9311987161636353, + "eval_icons_loss_ce": 0.01602194458246231, + "eval_icons_loss_iou": 0.7734375, + "eval_icons_loss_num": 0.05643463134765625, + "eval_icons_loss_xval": 1.828125, + "eval_icons_runtime": 16.6678, + "eval_icons_samples_per_second": 3.0, + "eval_icons_steps_per_second": 0.12, + "num_input_tokens_seen": 13988996, + "step": 250 + }, + { + "epoch": 0.5567928730512249, + "eval_screenspot_CIoU": 0.13644553472598395, + "eval_screenspot_GIoU": 0.13833926369746527, + "eval_screenspot_IoU": 0.27226150035858154, + "eval_screenspot_MAE_all": 0.11441413809855779, + "eval_screenspot_MAE_h": 0.06734236205617587, + "eval_screenspot_MAE_w": 0.15528815736373267, + "eval_screenspot_MAE_x_boxes": 0.13489964107672373, + "eval_screenspot_MAE_y_boxes": 0.08852330843607585, + "eval_screenspot_inside_bbox": 0.4887500007947286, + "eval_screenspot_loss": 2.3626174926757812, + "eval_screenspot_loss_ce": 0.06506530692179997, + "eval_screenspot_loss_iou": 0.8843587239583334, + "eval_screenspot_loss_num": 0.11939748128255208, + "eval_screenspot_loss_xval": 2.365234375, + "eval_screenspot_runtime": 26.8445, + "eval_screenspot_samples_per_second": 3.315, + "eval_screenspot_steps_per_second": 0.112, + "num_input_tokens_seen": 13988996, + "step": 250 + }, + { + "epoch": 0.5567928730512249, + "eval_compot_CIoU": 0.2119811549782753, + "eval_compot_GIoU": 0.22996322065591812, + "eval_compot_IoU": 0.2962482124567032, + "eval_compot_MAE_all": 0.029227093793451786, + "eval_compot_MAE_h": 0.021410066168755293, + "eval_compot_MAE_w": 0.032643974758684635, + "eval_compot_MAE_x_boxes": 0.03914828971028328, + "eval_compot_MAE_y_boxes": 0.012671195901930332, + "eval_compot_inside_bbox": 0.46875, + "eval_compot_loss": 1.7058837413787842, + "eval_compot_loss_ce": 0.0029119880637153983, + "eval_compot_loss_iou": 0.745849609375, + "eval_compot_loss_num": 0.02825927734375, + "eval_compot_loss_xval": 1.6337890625, + "eval_compot_runtime": 18.0854, + "eval_compot_samples_per_second": 2.765, + "eval_compot_steps_per_second": 0.111, + "num_input_tokens_seen": 13988996, + "step": 250 + }, + { + "epoch": 0.5567928730512249, + "eval_custom_ui_val_CIoU": 0.18997877753443188, + "eval_custom_ui_val_GIoU": 0.199455168719093, + "eval_custom_ui_val_IoU": 0.26526735723018646, + "eval_custom_ui_val_MAE_all": 0.06052247662511137, + "eval_custom_ui_val_MAE_h": 0.03889055632882648, + "eval_custom_ui_val_MAE_w": 0.07028790439168613, + "eval_custom_ui_val_MAE_x_boxes": 0.058271253067586154, + "eval_custom_ui_val_MAE_y_boxes": 0.050626704883244306, + "eval_custom_ui_val_inside_bbox": 0.5111882719728682, + "eval_custom_ui_val_loss": 1.941453218460083, + "eval_custom_ui_val_loss_ce": 0.020650964023338422, + "eval_custom_ui_val_loss_iou": 0.7940809461805556, + "eval_custom_ui_val_loss_num": 0.05839665730794271, + "eval_custom_ui_val_loss_xval": 1.8801540798611112, + "eval_custom_ui_val_runtime": 55.8224, + "eval_custom_ui_val_samples_per_second": 4.747, + "eval_custom_ui_val_steps_per_second": 0.161, + "num_input_tokens_seen": 13988996, + "step": 250 + }, + { + "epoch": 0.5567928730512249, + "loss": 1.535244107246399, + "loss_ce": 0.023525364696979523, + "loss_iou": 0.66015625, + "loss_num": 0.038330078125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 13988996, + "step": 250 + }, + { + "epoch": 0.5590200445434298, + "grad_norm": 25.04877281188965, + "learning_rate": 1e-06, + "loss": 1.6859, + "num_input_tokens_seen": 14044712, + "step": 251 + }, + { + "epoch": 0.5590200445434298, + "loss": 1.5795611143112183, + "loss_ce": 0.04782275855541229, + "loss_iou": 0.609375, + "loss_num": 0.06298828125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 14044712, + "step": 251 + }, + { + "epoch": 0.5612472160356348, + "grad_norm": 88.63025665283203, + "learning_rate": 1e-06, + "loss": 1.3939, + "num_input_tokens_seen": 14101200, + "step": 252 + }, + { + "epoch": 0.5612472160356348, + "loss": 1.3761667013168335, + "loss_ce": 0.0680612325668335, + "loss_iou": 0.54296875, + "loss_num": 0.044921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 14101200, + "step": 252 + }, + { + "epoch": 0.5634743875278396, + "grad_norm": 28.615373611450195, + "learning_rate": 1e-06, + "loss": 1.5713, + "num_input_tokens_seen": 14158672, + "step": 253 + }, + { + "epoch": 0.5634743875278396, + "loss": 1.540935754776001, + "loss_ce": 0.01456859614700079, + "loss_iou": 0.64453125, + "loss_num": 0.046875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 14158672, + "step": 253 + }, + { + "epoch": 0.5657015590200446, + "grad_norm": 39.433143615722656, + "learning_rate": 1e-06, + "loss": 1.6708, + "num_input_tokens_seen": 14216228, + "step": 254 + }, + { + "epoch": 0.5657015590200446, + "loss": 1.7911148071289062, + "loss_ce": 0.05381014943122864, + "loss_iou": 0.71484375, + "loss_num": 0.060546875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 14216228, + "step": 254 + }, + { + "epoch": 0.5679287305122495, + "grad_norm": 30.27321434020996, + "learning_rate": 1e-06, + "loss": 1.9151, + "num_input_tokens_seen": 14270952, + "step": 255 + }, + { + "epoch": 0.5679287305122495, + "loss": 2.0560014247894287, + "loss_ce": 0.04672405868768692, + "loss_iou": 0.79296875, + "loss_num": 0.08447265625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 14270952, + "step": 255 + }, + { + "epoch": 0.5701559020044543, + "grad_norm": 25.623844146728516, + "learning_rate": 1e-06, + "loss": 1.5495, + "num_input_tokens_seen": 14327860, + "step": 256 + }, + { + "epoch": 0.5701559020044543, + "loss": 1.5679357051849365, + "loss_ce": 0.032779376953840256, + "loss_iou": 0.6015625, + "loss_num": 0.0654296875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 14327860, + "step": 256 + }, + { + "epoch": 0.5723830734966593, + "grad_norm": 34.12678909301758, + "learning_rate": 1e-06, + "loss": 1.5758, + "num_input_tokens_seen": 14383192, + "step": 257 + }, + { + "epoch": 0.5723830734966593, + "loss": 1.398708701133728, + "loss_ce": 0.030056362971663475, + "loss_iou": 0.54296875, + "loss_num": 0.056396484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 14383192, + "step": 257 + }, + { + "epoch": 0.5746102449888641, + "grad_norm": 22.465286254882812, + "learning_rate": 1e-06, + "loss": 1.618, + "num_input_tokens_seen": 14438224, + "step": 258 + }, + { + "epoch": 0.5746102449888641, + "loss": 1.6812279224395752, + "loss_ce": 0.019118648022413254, + "loss_iou": 0.72265625, + "loss_num": 0.04345703125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 14438224, + "step": 258 + }, + { + "epoch": 0.576837416481069, + "grad_norm": 45.041263580322266, + "learning_rate": 1e-06, + "loss": 1.7445, + "num_input_tokens_seen": 14490804, + "step": 259 + }, + { + "epoch": 0.576837416481069, + "loss": 1.58518648147583, + "loss_ce": 0.05833101272583008, + "loss_iou": 0.62890625, + "loss_num": 0.053955078125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 14490804, + "step": 259 + }, + { + "epoch": 0.579064587973274, + "grad_norm": 32.25990295410156, + "learning_rate": 1e-06, + "loss": 1.7383, + "num_input_tokens_seen": 14548204, + "step": 260 + }, + { + "epoch": 0.579064587973274, + "loss": 1.8021881580352783, + "loss_ce": 0.019961677491664886, + "loss_iou": 0.73046875, + "loss_num": 0.06396484375, + "loss_xval": 1.78125, + "num_input_tokens_seen": 14548204, + "step": 260 + }, + { + "epoch": 0.5812917594654788, + "grad_norm": 24.800201416015625, + "learning_rate": 1e-06, + "loss": 1.9518, + "num_input_tokens_seen": 14602976, + "step": 261 + }, + { + "epoch": 0.5812917594654788, + "loss": 1.7187559604644775, + "loss_ce": 0.05420520156621933, + "loss_iou": 0.70703125, + "loss_num": 0.04931640625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 14602976, + "step": 261 + }, + { + "epoch": 0.5835189309576837, + "grad_norm": 38.99089431762695, + "learning_rate": 1e-06, + "loss": 1.3557, + "num_input_tokens_seen": 14659812, + "step": 262 + }, + { + "epoch": 0.5835189309576837, + "loss": 1.3884127140045166, + "loss_ce": 0.003158772364258766, + "loss_iou": 0.609375, + "loss_num": 0.032470703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 14659812, + "step": 262 + }, + { + "epoch": 0.5857461024498887, + "grad_norm": 36.124290466308594, + "learning_rate": 1e-06, + "loss": 1.9584, + "num_input_tokens_seen": 14716812, + "step": 263 + }, + { + "epoch": 0.5857461024498887, + "loss": 2.1227617263793945, + "loss_ce": 0.07393358647823334, + "loss_iou": 0.83203125, + "loss_num": 0.07763671875, + "loss_xval": 2.046875, + "num_input_tokens_seen": 14716812, + "step": 263 + }, + { + "epoch": 0.5879732739420935, + "grad_norm": 27.93045997619629, + "learning_rate": 1e-06, + "loss": 1.6202, + "num_input_tokens_seen": 14770856, + "step": 264 + }, + { + "epoch": 0.5879732739420935, + "loss": 1.2804536819458008, + "loss_ce": 0.0348481610417366, + "loss_iou": 0.51953125, + "loss_num": 0.041748046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 14770856, + "step": 264 + }, + { + "epoch": 0.5902004454342984, + "grad_norm": 25.890186309814453, + "learning_rate": 1e-06, + "loss": 1.7844, + "num_input_tokens_seen": 14828476, + "step": 265 + }, + { + "epoch": 0.5902004454342984, + "loss": 1.8109703063964844, + "loss_ce": 0.03265002369880676, + "loss_iou": 0.7421875, + "loss_num": 0.059326171875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 14828476, + "step": 265 + }, + { + "epoch": 0.5924276169265034, + "grad_norm": 44.33429718017578, + "learning_rate": 1e-06, + "loss": 1.8674, + "num_input_tokens_seen": 14880352, + "step": 266 + }, + { + "epoch": 0.5924276169265034, + "loss": 1.7880139350891113, + "loss_ce": 0.041920170187950134, + "loss_iou": 0.70703125, + "loss_num": 0.06640625, + "loss_xval": 1.75, + "num_input_tokens_seen": 14880352, + "step": 266 + }, + { + "epoch": 0.5946547884187082, + "grad_norm": 46.44974136352539, + "learning_rate": 1e-06, + "loss": 1.6601, + "num_input_tokens_seen": 14934660, + "step": 267 + }, + { + "epoch": 0.5946547884187082, + "loss": 1.5294612646102905, + "loss_ce": 0.031414370983839035, + "loss_iou": 0.6171875, + "loss_num": 0.052978515625, + "loss_xval": 1.5, + "num_input_tokens_seen": 14934660, + "step": 267 + }, + { + "epoch": 0.5968819599109132, + "grad_norm": 50.88404846191406, + "learning_rate": 1e-06, + "loss": 1.4847, + "num_input_tokens_seen": 14990032, + "step": 268 + }, + { + "epoch": 0.5968819599109132, + "loss": 1.1937447786331177, + "loss_ce": 0.016986995935440063, + "loss_iou": 0.5, + "loss_num": 0.03564453125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 14990032, + "step": 268 + }, + { + "epoch": 0.5991091314031181, + "grad_norm": 27.827131271362305, + "learning_rate": 1e-06, + "loss": 1.5246, + "num_input_tokens_seen": 15048824, + "step": 269 + }, + { + "epoch": 0.5991091314031181, + "loss": 1.5418243408203125, + "loss_ce": 0.0242463368922472, + "loss_iou": 0.63671875, + "loss_num": 0.049072265625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 15048824, + "step": 269 + }, + { + "epoch": 0.6013363028953229, + "grad_norm": 19.647504806518555, + "learning_rate": 1e-06, + "loss": 1.637, + "num_input_tokens_seen": 15107692, + "step": 270 + }, + { + "epoch": 0.6013363028953229, + "loss": 1.218379259109497, + "loss_ce": 0.010371430777013302, + "loss_iou": 0.53125, + "loss_num": 0.0284423828125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 15107692, + "step": 270 + }, + { + "epoch": 0.6035634743875279, + "grad_norm": 24.345657348632812, + "learning_rate": 1e-06, + "loss": 1.8571, + "num_input_tokens_seen": 15161936, + "step": 271 + }, + { + "epoch": 0.6035634743875279, + "loss": 1.8928604125976562, + "loss_ce": 0.01151271816343069, + "loss_iou": 0.80078125, + "loss_num": 0.055908203125, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 15161936, + "step": 271 + }, + { + "epoch": 0.6057906458797327, + "grad_norm": 28.753704071044922, + "learning_rate": 1e-06, + "loss": 1.4391, + "num_input_tokens_seen": 15218400, + "step": 272 + }, + { + "epoch": 0.6057906458797327, + "loss": 1.046234130859375, + "loss_ce": 0.030609235167503357, + "loss_iou": 0.431640625, + "loss_num": 0.0303955078125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 15218400, + "step": 272 + }, + { + "epoch": 0.6080178173719376, + "grad_norm": 28.32134437561035, + "learning_rate": 1e-06, + "loss": 1.6383, + "num_input_tokens_seen": 15271292, + "step": 273 + }, + { + "epoch": 0.6080178173719376, + "loss": 1.7744327783584595, + "loss_ce": 0.0346866175532341, + "loss_iou": 0.7578125, + "loss_num": 0.044921875, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 15271292, + "step": 273 + }, + { + "epoch": 0.6102449888641426, + "grad_norm": 26.63081932067871, + "learning_rate": 1e-06, + "loss": 1.6534, + "num_input_tokens_seen": 15324252, + "step": 274 + }, + { + "epoch": 0.6102449888641426, + "loss": 1.8621397018432617, + "loss_ce": 0.015460037626326084, + "loss_iou": 0.77734375, + "loss_num": 0.05859375, + "loss_xval": 1.84375, + "num_input_tokens_seen": 15324252, + "step": 274 + }, + { + "epoch": 0.6124721603563474, + "grad_norm": 159.9815673828125, + "learning_rate": 1e-06, + "loss": 1.6268, + "num_input_tokens_seen": 15380232, + "step": 275 + }, + { + "epoch": 0.6124721603563474, + "loss": 1.5270476341247559, + "loss_ce": 0.024606265127658844, + "loss_iou": 0.6015625, + "loss_num": 0.060546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 15380232, + "step": 275 + }, + { + "epoch": 0.6146993318485523, + "grad_norm": 16.36348533630371, + "learning_rate": 1e-06, + "loss": 1.8404, + "num_input_tokens_seen": 15434716, + "step": 276 + }, + { + "epoch": 0.6146993318485523, + "loss": 2.1927452087402344, + "loss_ce": 0.04479604959487915, + "loss_iou": 0.81640625, + "loss_num": 0.10400390625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 15434716, + "step": 276 + }, + { + "epoch": 0.6169265033407573, + "grad_norm": 35.74172592163086, + "learning_rate": 1e-06, + "loss": 1.4253, + "num_input_tokens_seen": 15491324, + "step": 277 + }, + { + "epoch": 0.6169265033407573, + "loss": 1.4241523742675781, + "loss_ce": 0.007160228211432695, + "loss_iou": 0.59765625, + "loss_num": 0.045166015625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 15491324, + "step": 277 + }, + { + "epoch": 0.6191536748329621, + "grad_norm": 30.04996681213379, + "learning_rate": 1e-06, + "loss": 1.7018, + "num_input_tokens_seen": 15545848, + "step": 278 + }, + { + "epoch": 0.6191536748329621, + "loss": 1.6422159671783447, + "loss_ce": 0.028934601694345474, + "loss_iou": 0.67578125, + "loss_num": 0.051513671875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 15545848, + "step": 278 + }, + { + "epoch": 0.621380846325167, + "grad_norm": 32.703819274902344, + "learning_rate": 1e-06, + "loss": 1.6324, + "num_input_tokens_seen": 15603716, + "step": 279 + }, + { + "epoch": 0.621380846325167, + "loss": 1.5165987014770508, + "loss_ce": 0.0112276840955019, + "loss_iou": 0.6484375, + "loss_num": 0.041015625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 15603716, + "step": 279 + }, + { + "epoch": 0.623608017817372, + "grad_norm": 35.6143798828125, + "learning_rate": 1e-06, + "loss": 1.6699, + "num_input_tokens_seen": 15658112, + "step": 280 + }, + { + "epoch": 0.623608017817372, + "loss": 1.777186393737793, + "loss_ce": 0.02865123562514782, + "loss_iou": 0.6796875, + "loss_num": 0.07763671875, + "loss_xval": 1.75, + "num_input_tokens_seen": 15658112, + "step": 280 + }, + { + "epoch": 0.6258351893095768, + "grad_norm": 28.780593872070312, + "learning_rate": 1e-06, + "loss": 1.6865, + "num_input_tokens_seen": 15716760, + "step": 281 + }, + { + "epoch": 0.6258351893095768, + "loss": 1.422225832939148, + "loss_ce": 0.011092978529632092, + "loss_iou": 0.6328125, + "loss_num": 0.0284423828125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 15716760, + "step": 281 + }, + { + "epoch": 0.6280623608017817, + "grad_norm": 39.134151458740234, + "learning_rate": 1e-06, + "loss": 2.0141, + "num_input_tokens_seen": 15773952, + "step": 282 + }, + { + "epoch": 0.6280623608017817, + "loss": 2.0597357749938965, + "loss_ce": 0.03629831597208977, + "loss_iou": 0.79296875, + "loss_num": 0.087890625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 15773952, + "step": 282 + }, + { + "epoch": 0.6302895322939867, + "grad_norm": 21.123931884765625, + "learning_rate": 1e-06, + "loss": 1.4487, + "num_input_tokens_seen": 15832456, + "step": 283 + }, + { + "epoch": 0.6302895322939867, + "loss": 1.4959959983825684, + "loss_ce": 0.018945157527923584, + "loss_iou": 0.6328125, + "loss_num": 0.0419921875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 15832456, + "step": 283 + }, + { + "epoch": 0.6325167037861915, + "grad_norm": 137.96141052246094, + "learning_rate": 1e-06, + "loss": 1.5979, + "num_input_tokens_seen": 15890888, + "step": 284 + }, + { + "epoch": 0.6325167037861915, + "loss": 1.8737279176712036, + "loss_ce": 0.05341540277004242, + "loss_iou": 0.71875, + "loss_num": 0.07763671875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 15890888, + "step": 284 + }, + { + "epoch": 0.6347438752783965, + "grad_norm": 26.675518035888672, + "learning_rate": 1e-06, + "loss": 1.7712, + "num_input_tokens_seen": 15945736, + "step": 285 + }, + { + "epoch": 0.6347438752783965, + "loss": 1.6105928421020508, + "loss_ce": 0.03197958692908287, + "loss_iou": 0.65234375, + "loss_num": 0.054931640625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 15945736, + "step": 285 + }, + { + "epoch": 0.6369710467706013, + "grad_norm": 24.4492130279541, + "learning_rate": 1e-06, + "loss": 1.5191, + "num_input_tokens_seen": 16002312, + "step": 286 + }, + { + "epoch": 0.6369710467706013, + "loss": 1.766524314880371, + "loss_ce": 0.0673055648803711, + "loss_iou": 0.6796875, + "loss_num": 0.0673828125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 16002312, + "step": 286 + }, + { + "epoch": 0.6391982182628062, + "grad_norm": 30.305042266845703, + "learning_rate": 1e-06, + "loss": 1.4741, + "num_input_tokens_seen": 16059496, + "step": 287 + }, + { + "epoch": 0.6391982182628062, + "loss": 1.4600698947906494, + "loss_ce": 0.04307776317000389, + "loss_iou": 0.53125, + "loss_num": 0.06982421875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 16059496, + "step": 287 + }, + { + "epoch": 0.6414253897550112, + "grad_norm": 43.9190788269043, + "learning_rate": 1e-06, + "loss": 2.0036, + "num_input_tokens_seen": 16114212, + "step": 288 + }, + { + "epoch": 0.6414253897550112, + "loss": 2.116396188735962, + "loss_ce": 0.04706018790602684, + "loss_iou": 0.7890625, + "loss_num": 0.09912109375, + "loss_xval": 2.0625, + "num_input_tokens_seen": 16114212, + "step": 288 + }, + { + "epoch": 0.643652561247216, + "grad_norm": 32.69955825805664, + "learning_rate": 1e-06, + "loss": 1.4369, + "num_input_tokens_seen": 16171136, + "step": 289 + }, + { + "epoch": 0.643652561247216, + "loss": 1.4707211256027222, + "loss_ce": 0.019060947000980377, + "loss_iou": 0.6015625, + "loss_num": 0.049560546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 16171136, + "step": 289 + }, + { + "epoch": 0.6458797327394209, + "grad_norm": 23.189529418945312, + "learning_rate": 1e-06, + "loss": 1.6011, + "num_input_tokens_seen": 16227256, + "step": 290 + }, + { + "epoch": 0.6458797327394209, + "loss": 1.716025948524475, + "loss_ce": 0.02217838540673256, + "loss_iou": 0.7109375, + "loss_num": 0.0537109375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 16227256, + "step": 290 + }, + { + "epoch": 0.6481069042316259, + "grad_norm": 83.02802276611328, + "learning_rate": 1e-06, + "loss": 1.5053, + "num_input_tokens_seen": 16283164, + "step": 291 + }, + { + "epoch": 0.6481069042316259, + "loss": 1.8076732158660889, + "loss_ce": 0.03325919434428215, + "loss_iou": 0.7265625, + "loss_num": 0.06494140625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 16283164, + "step": 291 + }, + { + "epoch": 0.6503340757238307, + "grad_norm": 26.41913604736328, + "learning_rate": 1e-06, + "loss": 1.482, + "num_input_tokens_seen": 16341224, + "step": 292 + }, + { + "epoch": 0.6503340757238307, + "loss": 1.52240788936615, + "loss_ce": 0.02973209135234356, + "loss_iou": 0.60546875, + "loss_num": 0.055908203125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 16341224, + "step": 292 + }, + { + "epoch": 0.6525612472160356, + "grad_norm": 24.008352279663086, + "learning_rate": 1e-06, + "loss": 1.2855, + "num_input_tokens_seen": 16399488, + "step": 293 + }, + { + "epoch": 0.6525612472160356, + "loss": 1.323553442955017, + "loss_ce": 0.02570188418030739, + "loss_iou": 0.515625, + "loss_num": 0.052490234375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 16399488, + "step": 293 + }, + { + "epoch": 0.6547884187082406, + "grad_norm": 40.153358459472656, + "learning_rate": 1e-06, + "loss": 2.0784, + "num_input_tokens_seen": 16452992, + "step": 294 + }, + { + "epoch": 0.6547884187082406, + "loss": 2.15143084526062, + "loss_ce": 0.020571384578943253, + "loss_iou": 0.828125, + "loss_num": 0.0947265625, + "loss_xval": 2.125, + "num_input_tokens_seen": 16452992, + "step": 294 + }, + { + "epoch": 0.6570155902004454, + "grad_norm": 19.569934844970703, + "learning_rate": 1e-06, + "loss": 1.6381, + "num_input_tokens_seen": 16510300, + "step": 295 + }, + { + "epoch": 0.6570155902004454, + "loss": 1.743638515472412, + "loss_ce": 0.018052466213703156, + "loss_iou": 0.7265625, + "loss_num": 0.053466796875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 16510300, + "step": 295 + }, + { + "epoch": 0.6592427616926503, + "grad_norm": 24.097932815551758, + "learning_rate": 1e-06, + "loss": 1.4483, + "num_input_tokens_seen": 16565272, + "step": 296 + }, + { + "epoch": 0.6592427616926503, + "loss": 1.252844214439392, + "loss_ce": 0.0033324414398521185, + "loss_iou": 0.49609375, + "loss_num": 0.051513671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 16565272, + "step": 296 + }, + { + "epoch": 0.6614699331848553, + "grad_norm": 20.85422706604004, + "learning_rate": 1e-06, + "loss": 1.4426, + "num_input_tokens_seen": 16620292, + "step": 297 + }, + { + "epoch": 0.6614699331848553, + "loss": 1.5032652616500854, + "loss_ce": 0.02230823040008545, + "loss_iou": 0.6015625, + "loss_num": 0.055419921875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 16620292, + "step": 297 + }, + { + "epoch": 0.6636971046770601, + "grad_norm": 31.075130462646484, + "learning_rate": 1e-06, + "loss": 1.7549, + "num_input_tokens_seen": 16677532, + "step": 298 + }, + { + "epoch": 0.6636971046770601, + "loss": 1.6268385648727417, + "loss_ce": 0.030647173523902893, + "loss_iou": 0.64453125, + "loss_num": 0.0615234375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 16677532, + "step": 298 + }, + { + "epoch": 0.6659242761692651, + "grad_norm": 179.63589477539062, + "learning_rate": 1e-06, + "loss": 1.5358, + "num_input_tokens_seen": 16731944, + "step": 299 + }, + { + "epoch": 0.6659242761692651, + "loss": 1.3143196105957031, + "loss_ce": 0.01890948787331581, + "loss_iou": 0.52734375, + "loss_num": 0.04736328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 16731944, + "step": 299 + }, + { + "epoch": 0.6681514476614699, + "grad_norm": 78.36969757080078, + "learning_rate": 1e-06, + "loss": 1.6417, + "num_input_tokens_seen": 16785832, + "step": 300 + }, + { + "epoch": 0.6681514476614699, + "loss": 1.405898928642273, + "loss_ce": 0.023086415603756905, + "loss_iou": 0.5546875, + "loss_num": 0.054931640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 16785832, + "step": 300 + }, + { + "epoch": 0.6703786191536748, + "grad_norm": 20.544160842895508, + "learning_rate": 1e-06, + "loss": 1.3546, + "num_input_tokens_seen": 16843352, + "step": 301 + }, + { + "epoch": 0.6703786191536748, + "loss": 1.2849873304367065, + "loss_ce": 0.00910840556025505, + "loss_iou": 0.55859375, + "loss_num": 0.03125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 16843352, + "step": 301 + }, + { + "epoch": 0.6726057906458798, + "grad_norm": 33.55524826049805, + "learning_rate": 1e-06, + "loss": 1.5479, + "num_input_tokens_seen": 16899880, + "step": 302 + }, + { + "epoch": 0.6726057906458798, + "loss": 1.4881852865219116, + "loss_ce": 0.01504078321158886, + "loss_iou": 0.625, + "loss_num": 0.04443359375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 16899880, + "step": 302 + }, + { + "epoch": 0.6748329621380846, + "grad_norm": 25.37139320373535, + "learning_rate": 1e-06, + "loss": 1.3608, + "num_input_tokens_seen": 16955144, + "step": 303 + }, + { + "epoch": 0.6748329621380846, + "loss": 1.1685500144958496, + "loss_ce": 0.01718279719352722, + "loss_iou": 0.484375, + "loss_num": 0.03662109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 16955144, + "step": 303 + }, + { + "epoch": 0.6770601336302895, + "grad_norm": 26.81342887878418, + "learning_rate": 1e-06, + "loss": 1.3982, + "num_input_tokens_seen": 17011748, + "step": 304 + }, + { + "epoch": 0.6770601336302895, + "loss": 1.2066997289657593, + "loss_ce": 0.01138727180659771, + "loss_iou": 0.53125, + "loss_num": 0.02587890625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 17011748, + "step": 304 + }, + { + "epoch": 0.6792873051224945, + "grad_norm": 27.548656463623047, + "learning_rate": 1e-06, + "loss": 1.5679, + "num_input_tokens_seen": 17068112, + "step": 305 + }, + { + "epoch": 0.6792873051224945, + "loss": 1.7167648077011108, + "loss_ce": 0.04781951755285263, + "loss_iou": 0.671875, + "loss_num": 0.06396484375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 17068112, + "step": 305 + }, + { + "epoch": 0.6815144766146993, + "grad_norm": 40.20294952392578, + "learning_rate": 1e-06, + "loss": 1.4371, + "num_input_tokens_seen": 17122104, + "step": 306 + }, + { + "epoch": 0.6815144766146993, + "loss": 1.477067470550537, + "loss_ce": 0.01710660383105278, + "loss_iou": 0.58984375, + "loss_num": 0.055419921875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 17122104, + "step": 306 + }, + { + "epoch": 0.6837416481069042, + "grad_norm": 31.41501808166504, + "learning_rate": 1e-06, + "loss": 1.4743, + "num_input_tokens_seen": 17179616, + "step": 307 + }, + { + "epoch": 0.6837416481069042, + "loss": 1.279785394668579, + "loss_ce": 0.05468768998980522, + "loss_iou": 0.515625, + "loss_num": 0.03857421875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 17179616, + "step": 307 + }, + { + "epoch": 0.6859688195991092, + "grad_norm": 31.942686080932617, + "learning_rate": 1e-06, + "loss": 1.2797, + "num_input_tokens_seen": 17236176, + "step": 308 + }, + { + "epoch": 0.6859688195991092, + "loss": 1.2551771402359009, + "loss_ce": 0.027149761095643044, + "loss_iou": 0.5390625, + "loss_num": 0.029541015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 17236176, + "step": 308 + }, + { + "epoch": 0.688195991091314, + "grad_norm": 30.1185302734375, + "learning_rate": 1e-06, + "loss": 1.3682, + "num_input_tokens_seen": 17293040, + "step": 309 + }, + { + "epoch": 0.688195991091314, + "loss": 1.479234218597412, + "loss_ce": 0.037827931344509125, + "loss_iou": 0.60546875, + "loss_num": 0.0458984375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 17293040, + "step": 309 + }, + { + "epoch": 0.6904231625835189, + "grad_norm": 17.059524536132812, + "learning_rate": 1e-06, + "loss": 1.5945, + "num_input_tokens_seen": 17349008, + "step": 310 + }, + { + "epoch": 0.6904231625835189, + "loss": 1.7185901403427124, + "loss_ce": 0.01155892200767994, + "loss_iou": 0.71484375, + "loss_num": 0.054443359375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 17349008, + "step": 310 + }, + { + "epoch": 0.6926503340757239, + "grad_norm": 20.258180618286133, + "learning_rate": 1e-06, + "loss": 1.4631, + "num_input_tokens_seen": 17404740, + "step": 311 + }, + { + "epoch": 0.6926503340757239, + "loss": 1.595271348953247, + "loss_ce": 0.05718539282679558, + "loss_iou": 0.625, + "loss_num": 0.058349609375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 17404740, + "step": 311 + }, + { + "epoch": 0.6948775055679287, + "grad_norm": 51.31781005859375, + "learning_rate": 1e-06, + "loss": 1.9344, + "num_input_tokens_seen": 17460352, + "step": 312 + }, + { + "epoch": 0.6948775055679287, + "loss": 1.8748557567596436, + "loss_ce": 0.0301292035728693, + "loss_iou": 0.7734375, + "loss_num": 0.0595703125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 17460352, + "step": 312 + }, + { + "epoch": 0.6971046770601337, + "grad_norm": 42.53439712524414, + "learning_rate": 1e-06, + "loss": 1.2032, + "num_input_tokens_seen": 17518148, + "step": 313 + }, + { + "epoch": 0.6971046770601337, + "loss": 1.2251746654510498, + "loss_ce": 0.024491025134921074, + "loss_iou": 0.51171875, + "loss_num": 0.03515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 17518148, + "step": 313 + }, + { + "epoch": 0.6993318485523385, + "grad_norm": 190.15823364257812, + "learning_rate": 1e-06, + "loss": 1.5881, + "num_input_tokens_seen": 17574740, + "step": 314 + }, + { + "epoch": 0.6993318485523385, + "loss": 1.6861159801483154, + "loss_ce": 0.023518286645412445, + "loss_iou": 0.6875, + "loss_num": 0.056640625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 17574740, + "step": 314 + }, + { + "epoch": 0.7015590200445434, + "grad_norm": 23.019386291503906, + "learning_rate": 1e-06, + "loss": 1.336, + "num_input_tokens_seen": 17630988, + "step": 315 + }, + { + "epoch": 0.7015590200445434, + "loss": 1.2523000240325928, + "loss_ce": 0.0032766717486083508, + "loss_iou": 0.5078125, + "loss_num": 0.047119140625, + "loss_xval": 1.25, + "num_input_tokens_seen": 17630988, + "step": 315 + }, + { + "epoch": 0.7037861915367484, + "grad_norm": 33.44746017456055, + "learning_rate": 1e-06, + "loss": 1.4686, + "num_input_tokens_seen": 17688132, + "step": 316 + }, + { + "epoch": 0.7037861915367484, + "loss": 1.3316755294799805, + "loss_ce": 0.024058308452367783, + "loss_iou": 0.53125, + "loss_num": 0.049560546875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 17688132, + "step": 316 + }, + { + "epoch": 0.7060133630289532, + "grad_norm": 26.666362762451172, + "learning_rate": 1e-06, + "loss": 1.4387, + "num_input_tokens_seen": 17744136, + "step": 317 + }, + { + "epoch": 0.7060133630289532, + "loss": 1.4408788681030273, + "loss_ce": 0.038046881556510925, + "loss_iou": 0.58984375, + "loss_num": 0.044677734375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 17744136, + "step": 317 + }, + { + "epoch": 0.7082405345211581, + "grad_norm": 29.52345848083496, + "learning_rate": 1e-06, + "loss": 1.6314, + "num_input_tokens_seen": 17802324, + "step": 318 + }, + { + "epoch": 0.7082405345211581, + "loss": 1.7159836292266846, + "loss_ce": 0.018229741603136063, + "loss_iou": 0.6875, + "loss_num": 0.06396484375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 17802324, + "step": 318 + }, + { + "epoch": 0.7104677060133631, + "grad_norm": 31.622154235839844, + "learning_rate": 1e-06, + "loss": 1.765, + "num_input_tokens_seen": 17860648, + "step": 319 + }, + { + "epoch": 0.7104677060133631, + "loss": 1.6791552305221558, + "loss_ce": 0.010209913365542889, + "loss_iou": 0.671875, + "loss_num": 0.06494140625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 17860648, + "step": 319 + }, + { + "epoch": 0.7126948775055679, + "grad_norm": 38.03776931762695, + "learning_rate": 1e-06, + "loss": 1.3441, + "num_input_tokens_seen": 17915988, + "step": 320 + }, + { + "epoch": 0.7126948775055679, + "loss": 1.6516163349151611, + "loss_ce": 0.017338957637548447, + "loss_iou": 0.65234375, + "loss_num": 0.06640625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 17915988, + "step": 320 + }, + { + "epoch": 0.7149220489977728, + "grad_norm": 30.399452209472656, + "learning_rate": 1e-06, + "loss": 1.4676, + "num_input_tokens_seen": 17974328, + "step": 321 + }, + { + "epoch": 0.7149220489977728, + "loss": 1.5550212860107422, + "loss_ce": 0.01888836920261383, + "loss_iou": 0.62109375, + "loss_num": 0.05908203125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 17974328, + "step": 321 + }, + { + "epoch": 0.7171492204899778, + "grad_norm": 34.999053955078125, + "learning_rate": 1e-06, + "loss": 1.5055, + "num_input_tokens_seen": 18027768, + "step": 322 + }, + { + "epoch": 0.7171492204899778, + "loss": 1.5647876262664795, + "loss_ce": 0.014494719915091991, + "loss_iou": 0.65625, + "loss_num": 0.048095703125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 18027768, + "step": 322 + }, + { + "epoch": 0.7193763919821826, + "grad_norm": 48.161949157714844, + "learning_rate": 1e-06, + "loss": 1.5941, + "num_input_tokens_seen": 18086484, + "step": 323 + }, + { + "epoch": 0.7193763919821826, + "loss": 1.713207483291626, + "loss_ce": 0.02326606959104538, + "loss_iou": 0.70703125, + "loss_num": 0.05517578125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 18086484, + "step": 323 + }, + { + "epoch": 0.7216035634743875, + "grad_norm": 68.18085479736328, + "learning_rate": 1e-06, + "loss": 1.604, + "num_input_tokens_seen": 18142296, + "step": 324 + }, + { + "epoch": 0.7216035634743875, + "loss": 1.4922294616699219, + "loss_ce": 0.012249022722244263, + "loss_iou": 0.59375, + "loss_num": 0.05810546875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 18142296, + "step": 324 + }, + { + "epoch": 0.7238307349665924, + "grad_norm": 23.24517250061035, + "learning_rate": 1e-06, + "loss": 1.6705, + "num_input_tokens_seen": 18201016, + "step": 325 + }, + { + "epoch": 0.7238307349665924, + "loss": 1.8428983688354492, + "loss_ce": 0.0069608502089977264, + "loss_iou": 0.77734375, + "loss_num": 0.056396484375, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 18201016, + "step": 325 + }, + { + "epoch": 0.7260579064587973, + "grad_norm": 95.58949279785156, + "learning_rate": 1e-06, + "loss": 1.7858, + "num_input_tokens_seen": 18256504, + "step": 326 + }, + { + "epoch": 0.7260579064587973, + "loss": 1.7894692420959473, + "loss_ce": 0.013102035038173199, + "loss_iou": 0.69921875, + "loss_num": 0.07568359375, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 18256504, + "step": 326 + }, + { + "epoch": 0.7282850779510023, + "grad_norm": 23.247690200805664, + "learning_rate": 1e-06, + "loss": 1.5796, + "num_input_tokens_seen": 18310452, + "step": 327 + }, + { + "epoch": 0.7282850779510023, + "loss": 1.59755539894104, + "loss_ce": 0.010641279630362988, + "loss_iou": 0.65234375, + "loss_num": 0.05615234375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 18310452, + "step": 327 + }, + { + "epoch": 0.7305122494432071, + "grad_norm": 67.12216186523438, + "learning_rate": 1e-06, + "loss": 1.4647, + "num_input_tokens_seen": 18367084, + "step": 328 + }, + { + "epoch": 0.7305122494432071, + "loss": 1.48179030418396, + "loss_ce": 0.014016897417604923, + "loss_iou": 0.5859375, + "loss_num": 0.05908203125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 18367084, + "step": 328 + }, + { + "epoch": 0.732739420935412, + "grad_norm": 31.022777557373047, + "learning_rate": 1e-06, + "loss": 1.4833, + "num_input_tokens_seen": 18424920, + "step": 329 + }, + { + "epoch": 0.732739420935412, + "loss": 1.486365795135498, + "loss_ce": 0.014686101116240025, + "loss_iou": 0.62109375, + "loss_num": 0.0458984375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 18424920, + "step": 329 + }, + { + "epoch": 0.734966592427617, + "grad_norm": 29.17088508605957, + "learning_rate": 1e-06, + "loss": 1.1757, + "num_input_tokens_seen": 18478976, + "step": 330 + }, + { + "epoch": 0.734966592427617, + "loss": 1.1004878282546997, + "loss_ce": 0.005761317443102598, + "loss_iou": 0.453125, + "loss_num": 0.037841796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 18478976, + "step": 330 + }, + { + "epoch": 0.7371937639198218, + "grad_norm": 20.834753036499023, + "learning_rate": 1e-06, + "loss": 1.1889, + "num_input_tokens_seen": 18536144, + "step": 331 + }, + { + "epoch": 0.7371937639198218, + "loss": 1.3901407718658447, + "loss_ce": 0.0014689104864373803, + "loss_iou": 0.578125, + "loss_num": 0.04638671875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 18536144, + "step": 331 + }, + { + "epoch": 0.7394209354120267, + "grad_norm": 68.39385986328125, + "learning_rate": 1e-06, + "loss": 1.5586, + "num_input_tokens_seen": 18593860, + "step": 332 + }, + { + "epoch": 0.7394209354120267, + "loss": 1.5588513612747192, + "loss_ce": 0.005140438210219145, + "loss_iou": 0.6484375, + "loss_num": 0.052001953125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 18593860, + "step": 332 + }, + { + "epoch": 0.7416481069042317, + "grad_norm": 19.545167922973633, + "learning_rate": 1e-06, + "loss": 1.5776, + "num_input_tokens_seen": 18647892, + "step": 333 + }, + { + "epoch": 0.7416481069042317, + "loss": 1.6709346771240234, + "loss_ce": 0.017126010730862617, + "loss_iou": 0.640625, + "loss_num": 0.07421875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 18647892, + "step": 333 + }, + { + "epoch": 0.7438752783964365, + "grad_norm": 47.12783432006836, + "learning_rate": 1e-06, + "loss": 1.503, + "num_input_tokens_seen": 18704784, + "step": 334 + }, + { + "epoch": 0.7438752783964365, + "loss": 1.865045428276062, + "loss_ce": 0.012994609773159027, + "loss_iou": 0.796875, + "loss_num": 0.052001953125, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 18704784, + "step": 334 + }, + { + "epoch": 0.7461024498886414, + "grad_norm": 25.380210876464844, + "learning_rate": 1e-06, + "loss": 1.1623, + "num_input_tokens_seen": 18761996, + "step": 335 + }, + { + "epoch": 0.7461024498886414, + "loss": 1.0759522914886475, + "loss_ce": 0.015649594366550446, + "loss_iou": 0.423828125, + "loss_num": 0.042724609375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 18761996, + "step": 335 + }, + { + "epoch": 0.7483296213808464, + "grad_norm": 19.682802200317383, + "learning_rate": 1e-06, + "loss": 1.4553, + "num_input_tokens_seen": 18819652, + "step": 336 + }, + { + "epoch": 0.7483296213808464, + "loss": 1.3878556489944458, + "loss_ce": 0.011879058554768562, + "loss_iou": 0.5546875, + "loss_num": 0.053466796875, + "loss_xval": 1.375, + "num_input_tokens_seen": 18819652, + "step": 336 + }, + { + "epoch": 0.7505567928730512, + "grad_norm": 91.77301025390625, + "learning_rate": 1e-06, + "loss": 1.6139, + "num_input_tokens_seen": 18877576, + "step": 337 + }, + { + "epoch": 0.7505567928730512, + "loss": 1.334415316581726, + "loss_ce": 0.008731753565371037, + "loss_iou": 0.546875, + "loss_num": 0.0458984375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 18877576, + "step": 337 + }, + { + "epoch": 0.7527839643652561, + "grad_norm": 28.926156997680664, + "learning_rate": 1e-06, + "loss": 1.5567, + "num_input_tokens_seen": 18935300, + "step": 338 + }, + { + "epoch": 0.7527839643652561, + "loss": 1.7415505647659302, + "loss_ce": 0.01205837819725275, + "loss_iou": 0.671875, + "loss_num": 0.076171875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 18935300, + "step": 338 + }, + { + "epoch": 0.755011135857461, + "grad_norm": 36.80242919921875, + "learning_rate": 1e-06, + "loss": 1.783, + "num_input_tokens_seen": 18989472, + "step": 339 + }, + { + "epoch": 0.755011135857461, + "loss": 1.8453316688537598, + "loss_ce": 0.012812146916985512, + "loss_iou": 0.77734375, + "loss_num": 0.0546875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 18989472, + "step": 339 + }, + { + "epoch": 0.7572383073496659, + "grad_norm": 19.47812271118164, + "learning_rate": 1e-06, + "loss": 1.5968, + "num_input_tokens_seen": 19041164, + "step": 340 + }, + { + "epoch": 0.7572383073496659, + "loss": 1.3463078737258911, + "loss_ce": 0.017938785254955292, + "loss_iou": 0.56640625, + "loss_num": 0.039306640625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 19041164, + "step": 340 + }, + { + "epoch": 0.7594654788418709, + "grad_norm": 21.5255126953125, + "learning_rate": 1e-06, + "loss": 1.2247, + "num_input_tokens_seen": 19098404, + "step": 341 + }, + { + "epoch": 0.7594654788418709, + "loss": 1.1793166399002075, + "loss_ce": 0.015254099853336811, + "loss_iou": 0.470703125, + "loss_num": 0.04443359375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 19098404, + "step": 341 + }, + { + "epoch": 0.7616926503340757, + "grad_norm": 21.701622009277344, + "learning_rate": 1e-06, + "loss": 1.4873, + "num_input_tokens_seen": 19152172, + "step": 342 + }, + { + "epoch": 0.7616926503340757, + "loss": 1.5115392208099365, + "loss_ce": 0.016422055661678314, + "loss_iou": 0.5859375, + "loss_num": 0.0634765625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 19152172, + "step": 342 + }, + { + "epoch": 0.7639198218262806, + "grad_norm": 24.85000228881836, + "learning_rate": 1e-06, + "loss": 1.3042, + "num_input_tokens_seen": 19208312, + "step": 343 + }, + { + "epoch": 0.7639198218262806, + "loss": 1.3061310052871704, + "loss_ce": 0.011697422713041306, + "loss_iou": 0.54296875, + "loss_num": 0.041259765625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 19208312, + "step": 343 + }, + { + "epoch": 0.7661469933184856, + "grad_norm": 28.263137817382812, + "learning_rate": 1e-06, + "loss": 1.4686, + "num_input_tokens_seen": 19263288, + "step": 344 + }, + { + "epoch": 0.7661469933184856, + "loss": 1.6164791584014893, + "loss_ce": 0.008080787025392056, + "loss_iou": 0.64453125, + "loss_num": 0.064453125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 19263288, + "step": 344 + }, + { + "epoch": 0.7683741648106904, + "grad_norm": 26.359323501586914, + "learning_rate": 1e-06, + "loss": 1.5675, + "num_input_tokens_seen": 19322276, + "step": 345 + }, + { + "epoch": 0.7683741648106904, + "loss": 1.6035141944885254, + "loss_ce": 0.007078767288476229, + "loss_iou": 0.6484375, + "loss_num": 0.0595703125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 19322276, + "step": 345 + }, + { + "epoch": 0.7706013363028953, + "grad_norm": 28.58954429626465, + "learning_rate": 1e-06, + "loss": 1.4959, + "num_input_tokens_seen": 19377088, + "step": 346 + }, + { + "epoch": 0.7706013363028953, + "loss": 1.7983763217926025, + "loss_ce": 0.02982161194086075, + "loss_iou": 0.65234375, + "loss_num": 0.09326171875, + "loss_xval": 1.765625, + "num_input_tokens_seen": 19377088, + "step": 346 + }, + { + "epoch": 0.7728285077951003, + "grad_norm": 32.53110885620117, + "learning_rate": 1e-06, + "loss": 1.468, + "num_input_tokens_seen": 19432868, + "step": 347 + }, + { + "epoch": 0.7728285077951003, + "loss": 1.644303560256958, + "loss_ce": 0.006120047532021999, + "loss_iou": 0.703125, + "loss_num": 0.04638671875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 19432868, + "step": 347 + }, + { + "epoch": 0.7750556792873051, + "grad_norm": 19.344207763671875, + "learning_rate": 1e-06, + "loss": 1.4323, + "num_input_tokens_seen": 19490488, + "step": 348 + }, + { + "epoch": 0.7750556792873051, + "loss": 1.4876625537872314, + "loss_ce": 0.005729038268327713, + "loss_iou": 0.5546875, + "loss_num": 0.07373046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 19490488, + "step": 348 + }, + { + "epoch": 0.77728285077951, + "grad_norm": 142.5430145263672, + "learning_rate": 1e-06, + "loss": 1.4212, + "num_input_tokens_seen": 19546108, + "step": 349 + }, + { + "epoch": 0.77728285077951, + "loss": 1.081338882446289, + "loss_ce": 0.0024814759381115437, + "loss_iou": 0.4453125, + "loss_num": 0.037841796875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 19546108, + "step": 349 + }, + { + "epoch": 0.779510022271715, + "grad_norm": 27.834339141845703, + "learning_rate": 1e-06, + "loss": 1.4574, + "num_input_tokens_seen": 19602608, + "step": 350 + }, + { + "epoch": 0.779510022271715, + "loss": 1.220694899559021, + "loss_ce": 0.0029214350506663322, + "loss_iou": 0.494140625, + "loss_num": 0.0458984375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 19602608, + "step": 350 + }, + { + "epoch": 0.7817371937639198, + "grad_norm": 51.78019332885742, + "learning_rate": 1e-06, + "loss": 1.3015, + "num_input_tokens_seen": 19659844, + "step": 351 + }, + { + "epoch": 0.7817371937639198, + "loss": 0.9219905138015747, + "loss_ce": 0.007439759094268084, + "loss_iou": 0.388671875, + "loss_num": 0.0277099609375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 19659844, + "step": 351 + }, + { + "epoch": 0.7839643652561247, + "grad_norm": 19.256717681884766, + "learning_rate": 1e-06, + "loss": 1.552, + "num_input_tokens_seen": 19716640, + "step": 352 + }, + { + "epoch": 0.7839643652561247, + "loss": 1.5087199211120605, + "loss_ce": 0.00481368275359273, + "loss_iou": 0.6171875, + "loss_num": 0.0537109375, + "loss_xval": 1.5, + "num_input_tokens_seen": 19716640, + "step": 352 + }, + { + "epoch": 0.7861915367483296, + "grad_norm": 25.72173500061035, + "learning_rate": 1e-06, + "loss": 1.4851, + "num_input_tokens_seen": 19771512, + "step": 353 + }, + { + "epoch": 0.7861915367483296, + "loss": 1.3254516124725342, + "loss_ce": 0.028576675802469254, + "loss_iou": 0.51953125, + "loss_num": 0.051025390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 19771512, + "step": 353 + }, + { + "epoch": 0.7884187082405345, + "grad_norm": 27.796306610107422, + "learning_rate": 1e-06, + "loss": 1.1745, + "num_input_tokens_seen": 19827200, + "step": 354 + }, + { + "epoch": 0.7884187082405345, + "loss": 1.0663065910339355, + "loss_ce": 0.0013651238987222314, + "loss_iou": 0.458984375, + "loss_num": 0.029296875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 19827200, + "step": 354 + }, + { + "epoch": 0.7906458797327395, + "grad_norm": 29.534481048583984, + "learning_rate": 1e-06, + "loss": 1.2979, + "num_input_tokens_seen": 19883360, + "step": 355 + }, + { + "epoch": 0.7906458797327395, + "loss": 1.285064697265625, + "loss_ce": 0.014556895941495895, + "loss_iou": 0.484375, + "loss_num": 0.060791015625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 19883360, + "step": 355 + }, + { + "epoch": 0.7928730512249443, + "grad_norm": 20.099090576171875, + "learning_rate": 1e-06, + "loss": 1.4715, + "num_input_tokens_seen": 19940216, + "step": 356 + }, + { + "epoch": 0.7928730512249443, + "loss": 1.6332509517669678, + "loss_ce": 0.00483296625316143, + "loss_iou": 0.6640625, + "loss_num": 0.060546875, + "loss_xval": 1.625, + "num_input_tokens_seen": 19940216, + "step": 356 + }, + { + "epoch": 0.7951002227171492, + "grad_norm": 23.28925323486328, + "learning_rate": 1e-06, + "loss": 1.4499, + "num_input_tokens_seen": 19996716, + "step": 357 + }, + { + "epoch": 0.7951002227171492, + "loss": 1.103065848350525, + "loss_ce": 0.007850958965718746, + "loss_iou": 0.453125, + "loss_num": 0.038330078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 19996716, + "step": 357 + }, + { + "epoch": 0.7973273942093542, + "grad_norm": 26.855409622192383, + "learning_rate": 1e-06, + "loss": 1.1852, + "num_input_tokens_seen": 20052192, + "step": 358 + }, + { + "epoch": 0.7973273942093542, + "loss": 1.3929839134216309, + "loss_ce": 0.003823714330792427, + "loss_iou": 0.5703125, + "loss_num": 0.050537109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 20052192, + "step": 358 + }, + { + "epoch": 0.799554565701559, + "grad_norm": 32.07172393798828, + "learning_rate": 1e-06, + "loss": 1.5933, + "num_input_tokens_seen": 20110068, + "step": 359 + }, + { + "epoch": 0.799554565701559, + "loss": 1.588189721107483, + "loss_ce": 0.005181873217225075, + "loss_iou": 0.65234375, + "loss_num": 0.05517578125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 20110068, + "step": 359 + }, + { + "epoch": 0.8017817371937639, + "grad_norm": 32.26838684082031, + "learning_rate": 1e-06, + "loss": 1.451, + "num_input_tokens_seen": 20166760, + "step": 360 + }, + { + "epoch": 0.8017817371937639, + "loss": 1.4686858654022217, + "loss_ce": 0.017513982951641083, + "loss_iou": 0.609375, + "loss_num": 0.04638671875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 20166760, + "step": 360 + }, + { + "epoch": 0.8040089086859689, + "grad_norm": 31.04205894470215, + "learning_rate": 1e-06, + "loss": 1.3697, + "num_input_tokens_seen": 20223556, + "step": 361 + }, + { + "epoch": 0.8040089086859689, + "loss": 1.2945581674575806, + "loss_ce": 0.006228114478290081, + "loss_iou": 0.515625, + "loss_num": 0.05078125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 20223556, + "step": 361 + }, + { + "epoch": 0.8062360801781737, + "grad_norm": 28.388748168945312, + "learning_rate": 1e-06, + "loss": 1.4442, + "num_input_tokens_seen": 20283264, + "step": 362 + }, + { + "epoch": 0.8062360801781737, + "loss": 1.3582143783569336, + "loss_ce": 0.002745626959949732, + "loss_iou": 0.5625, + "loss_num": 0.04541015625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 20283264, + "step": 362 + }, + { + "epoch": 0.8084632516703786, + "grad_norm": 49.78532028198242, + "learning_rate": 1e-06, + "loss": 1.3255, + "num_input_tokens_seen": 20337404, + "step": 363 + }, + { + "epoch": 0.8084632516703786, + "loss": 1.1450395584106445, + "loss_ce": 0.015156792476773262, + "loss_iou": 0.412109375, + "loss_num": 0.061767578125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 20337404, + "step": 363 + }, + { + "epoch": 0.8106904231625836, + "grad_norm": 48.754234313964844, + "learning_rate": 1e-06, + "loss": 1.2584, + "num_input_tokens_seen": 20393236, + "step": 364 + }, + { + "epoch": 0.8106904231625836, + "loss": 1.1171329021453857, + "loss_ce": 0.007513846270740032, + "loss_iou": 0.4453125, + "loss_num": 0.04345703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 20393236, + "step": 364 + }, + { + "epoch": 0.8129175946547884, + "grad_norm": 49.22020721435547, + "learning_rate": 1e-06, + "loss": 1.4507, + "num_input_tokens_seen": 20451716, + "step": 365 + }, + { + "epoch": 0.8129175946547884, + "loss": 1.6032345294952393, + "loss_ce": 0.046593837440013885, + "loss_iou": 0.6171875, + "loss_num": 0.064453125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 20451716, + "step": 365 + }, + { + "epoch": 0.8151447661469933, + "grad_norm": 81.98351287841797, + "learning_rate": 1e-06, + "loss": 1.5286, + "num_input_tokens_seen": 20507752, + "step": 366 + }, + { + "epoch": 0.8151447661469933, + "loss": 1.5305746793746948, + "loss_ce": 0.0032309277448803186, + "loss_iou": 0.62890625, + "loss_num": 0.054443359375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 20507752, + "step": 366 + }, + { + "epoch": 0.8173719376391982, + "grad_norm": 20.340085983276367, + "learning_rate": 1e-06, + "loss": 1.245, + "num_input_tokens_seen": 20561536, + "step": 367 + }, + { + "epoch": 0.8173719376391982, + "loss": 1.0740153789520264, + "loss_ce": 0.01566578447818756, + "loss_iou": 0.400390625, + "loss_num": 0.05126953125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 20561536, + "step": 367 + }, + { + "epoch": 0.8195991091314031, + "grad_norm": 28.017810821533203, + "learning_rate": 1e-06, + "loss": 1.4366, + "num_input_tokens_seen": 20614508, + "step": 368 + }, + { + "epoch": 0.8195991091314031, + "loss": 1.5188279151916504, + "loss_ce": 0.033476315438747406, + "loss_iou": 0.6015625, + "loss_num": 0.05615234375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 20614508, + "step": 368 + }, + { + "epoch": 0.821826280623608, + "grad_norm": 26.68250846862793, + "learning_rate": 1e-06, + "loss": 1.1561, + "num_input_tokens_seen": 20669580, + "step": 369 + }, + { + "epoch": 0.821826280623608, + "loss": 1.2030680179595947, + "loss_ce": 0.0014078648528084159, + "loss_iou": 0.5, + "loss_num": 0.040283203125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 20669580, + "step": 369 + }, + { + "epoch": 0.8240534521158129, + "grad_norm": 19.57592010498047, + "learning_rate": 1e-06, + "loss": 1.2601, + "num_input_tokens_seen": 20723132, + "step": 370 + }, + { + "epoch": 0.8240534521158129, + "loss": 1.2386668920516968, + "loss_ce": 0.013080945238471031, + "loss_iou": 0.5390625, + "loss_num": 0.030029296875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 20723132, + "step": 370 + }, + { + "epoch": 0.8262806236080178, + "grad_norm": 38.58708190917969, + "learning_rate": 1e-06, + "loss": 1.5873, + "num_input_tokens_seen": 20779144, + "step": 371 + }, + { + "epoch": 0.8262806236080178, + "loss": 1.2857810258865356, + "loss_ce": 0.013320127502083778, + "loss_iou": 0.49609375, + "loss_num": 0.055908203125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 20779144, + "step": 371 + }, + { + "epoch": 0.8285077951002228, + "grad_norm": 27.13821792602539, + "learning_rate": 1e-06, + "loss": 1.7652, + "num_input_tokens_seen": 20835076, + "step": 372 + }, + { + "epoch": 0.8285077951002228, + "loss": 1.7804100513458252, + "loss_ce": 0.006972476840019226, + "loss_iou": 0.69921875, + "loss_num": 0.0751953125, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 20835076, + "step": 372 + }, + { + "epoch": 0.8307349665924276, + "grad_norm": 18.151918411254883, + "learning_rate": 1e-06, + "loss": 1.4218, + "num_input_tokens_seen": 20891112, + "step": 373 + }, + { + "epoch": 0.8307349665924276, + "loss": 1.4063541889190674, + "loss_ce": 0.005475334823131561, + "loss_iou": 0.56640625, + "loss_num": 0.052978515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 20891112, + "step": 373 + }, + { + "epoch": 0.8329621380846325, + "grad_norm": 20.646316528320312, + "learning_rate": 1e-06, + "loss": 1.2937, + "num_input_tokens_seen": 20947924, + "step": 374 + }, + { + "epoch": 0.8329621380846325, + "loss": 1.1752688884735107, + "loss_ce": 0.02146025560796261, + "loss_iou": 0.466796875, + "loss_num": 0.043701171875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 20947924, + "step": 374 + }, + { + "epoch": 0.8351893095768375, + "grad_norm": 65.90545654296875, + "learning_rate": 1e-06, + "loss": 1.1793, + "num_input_tokens_seen": 21003736, + "step": 375 + }, + { + "epoch": 0.8351893095768375, + "loss": 1.0895462036132812, + "loss_ce": 0.006538336630910635, + "loss_iou": 0.4609375, + "loss_num": 0.0322265625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 21003736, + "step": 375 + }, + { + "epoch": 0.8374164810690423, + "grad_norm": 30.747459411621094, + "learning_rate": 1e-06, + "loss": 1.4332, + "num_input_tokens_seen": 21058432, + "step": 376 + }, + { + "epoch": 0.8374164810690423, + "loss": 1.4749113321304321, + "loss_ce": 0.005673029460012913, + "loss_iou": 0.54296875, + "loss_num": 0.076171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 21058432, + "step": 376 + }, + { + "epoch": 0.8396436525612472, + "grad_norm": 48.859676361083984, + "learning_rate": 1e-06, + "loss": 1.288, + "num_input_tokens_seen": 21114724, + "step": 377 + }, + { + "epoch": 0.8396436525612472, + "loss": 1.3059370517730713, + "loss_ce": 0.02029254473745823, + "loss_iou": 0.53515625, + "loss_num": 0.0439453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 21114724, + "step": 377 + }, + { + "epoch": 0.8418708240534521, + "grad_norm": 34.50962448120117, + "learning_rate": 1e-06, + "loss": 1.237, + "num_input_tokens_seen": 21169136, + "step": 378 + }, + { + "epoch": 0.8418708240534521, + "loss": 1.1840803623199463, + "loss_ce": 0.0024397093802690506, + "loss_iou": 0.44921875, + "loss_num": 0.05712890625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 21169136, + "step": 378 + }, + { + "epoch": 0.844097995545657, + "grad_norm": 50.90530776977539, + "learning_rate": 1e-06, + "loss": 1.3962, + "num_input_tokens_seen": 21225176, + "step": 379 + }, + { + "epoch": 0.844097995545657, + "loss": 1.6650288105010986, + "loss_ce": 0.02660096064209938, + "loss_iou": 0.65625, + "loss_num": 0.0654296875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 21225176, + "step": 379 + }, + { + "epoch": 0.8463251670378619, + "grad_norm": 30.996505737304688, + "learning_rate": 1e-06, + "loss": 1.2789, + "num_input_tokens_seen": 21284272, + "step": 380 + }, + { + "epoch": 0.8463251670378619, + "loss": 1.3432170152664185, + "loss_ce": 0.0019084562081843615, + "loss_iou": 0.52734375, + "loss_num": 0.056884765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 21284272, + "step": 380 + }, + { + "epoch": 0.8485523385300668, + "grad_norm": 20.33319854736328, + "learning_rate": 1e-06, + "loss": 1.1925, + "num_input_tokens_seen": 21339980, + "step": 381 + }, + { + "epoch": 0.8485523385300668, + "loss": 1.164018154144287, + "loss_ce": 0.0019087546970695257, + "loss_iou": 0.47265625, + "loss_num": 0.043212890625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 21339980, + "step": 381 + }, + { + "epoch": 0.8507795100222717, + "grad_norm": 47.0328369140625, + "learning_rate": 1e-06, + "loss": 1.4929, + "num_input_tokens_seen": 21397864, + "step": 382 + }, + { + "epoch": 0.8507795100222717, + "loss": 1.355049729347229, + "loss_ce": 0.005440343637019396, + "loss_iou": 0.55859375, + "loss_num": 0.046142578125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 21397864, + "step": 382 + }, + { + "epoch": 0.8530066815144766, + "grad_norm": 84.89624786376953, + "learning_rate": 1e-06, + "loss": 1.4523, + "num_input_tokens_seen": 21453536, + "step": 383 + }, + { + "epoch": 0.8530066815144766, + "loss": 1.6871811151504517, + "loss_ce": 0.00944666936993599, + "loss_iou": 0.65625, + "loss_num": 0.0732421875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 21453536, + "step": 383 + }, + { + "epoch": 0.8552338530066815, + "grad_norm": 17.764833450317383, + "learning_rate": 1e-06, + "loss": 1.4082, + "num_input_tokens_seen": 21508716, + "step": 384 + }, + { + "epoch": 0.8552338530066815, + "loss": 1.6076984405517578, + "loss_ce": 0.015901662409305573, + "loss_iou": 0.66015625, + "loss_num": 0.0546875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 21508716, + "step": 384 + }, + { + "epoch": 0.8574610244988864, + "grad_norm": 21.195423126220703, + "learning_rate": 1e-06, + "loss": 1.1628, + "num_input_tokens_seen": 21563776, + "step": 385 + }, + { + "epoch": 0.8574610244988864, + "loss": 1.30448317527771, + "loss_ce": 0.010049499571323395, + "loss_iou": 0.546875, + "loss_num": 0.039794921875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 21563776, + "step": 385 + }, + { + "epoch": 0.8596881959910914, + "grad_norm": 31.222137451171875, + "learning_rate": 1e-06, + "loss": 1.2988, + "num_input_tokens_seen": 21617956, + "step": 386 + }, + { + "epoch": 0.8596881959910914, + "loss": 1.2785615921020508, + "loss_ce": 0.0092744380235672, + "loss_iou": 0.49609375, + "loss_num": 0.05517578125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 21617956, + "step": 386 + }, + { + "epoch": 0.8619153674832962, + "grad_norm": 43.02681350708008, + "learning_rate": 1e-06, + "loss": 1.5262, + "num_input_tokens_seen": 21673748, + "step": 387 + }, + { + "epoch": 0.8619153674832962, + "loss": 1.531335711479187, + "loss_ce": 0.003991919104009867, + "loss_iou": 0.58203125, + "loss_num": 0.07177734375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 21673748, + "step": 387 + }, + { + "epoch": 0.8641425389755011, + "grad_norm": 41.39469909667969, + "learning_rate": 1e-06, + "loss": 1.2424, + "num_input_tokens_seen": 21729848, + "step": 388 + }, + { + "epoch": 0.8641425389755011, + "loss": 1.398338794708252, + "loss_ce": 0.0013661817647516727, + "loss_iou": 0.55859375, + "loss_num": 0.055419921875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 21729848, + "step": 388 + }, + { + "epoch": 0.8663697104677061, + "grad_norm": 24.085895538330078, + "learning_rate": 1e-06, + "loss": 1.3652, + "num_input_tokens_seen": 21789184, + "step": 389 + }, + { + "epoch": 0.8663697104677061, + "loss": 1.4368267059326172, + "loss_ce": 0.012022039853036404, + "loss_iou": 0.57421875, + "loss_num": 0.0546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 21789184, + "step": 389 + }, + { + "epoch": 0.8685968819599109, + "grad_norm": 21.240936279296875, + "learning_rate": 1e-06, + "loss": 1.1302, + "num_input_tokens_seen": 21845372, + "step": 390 + }, + { + "epoch": 0.8685968819599109, + "loss": 1.243898868560791, + "loss_ce": 0.01001210231333971, + "loss_iou": 0.515625, + "loss_num": 0.040283203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 21845372, + "step": 390 + }, + { + "epoch": 0.8708240534521158, + "grad_norm": 26.22933578491211, + "learning_rate": 1e-06, + "loss": 1.2685, + "num_input_tokens_seen": 21898080, + "step": 391 + }, + { + "epoch": 0.8708240534521158, + "loss": 1.6092109680175781, + "loss_ce": 0.0179024338722229, + "loss_iou": 0.65234375, + "loss_num": 0.05810546875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 21898080, + "step": 391 + }, + { + "epoch": 0.8730512249443207, + "grad_norm": 68.2789077758789, + "learning_rate": 1e-06, + "loss": 1.4469, + "num_input_tokens_seen": 21954884, + "step": 392 + }, + { + "epoch": 0.8730512249443207, + "loss": 1.568161964416504, + "loss_ce": 0.002732297871261835, + "loss_iou": 0.64453125, + "loss_num": 0.0556640625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 21954884, + "step": 392 + }, + { + "epoch": 0.8752783964365256, + "grad_norm": 27.22370719909668, + "learning_rate": 1e-06, + "loss": 1.2137, + "num_input_tokens_seen": 22008924, + "step": 393 + }, + { + "epoch": 0.8752783964365256, + "loss": 1.0929490327835083, + "loss_ce": 0.004081860650330782, + "loss_iou": 0.44921875, + "loss_num": 0.038330078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 22008924, + "step": 393 + }, + { + "epoch": 0.8775055679287305, + "grad_norm": 82.91708374023438, + "learning_rate": 1e-06, + "loss": 1.6931, + "num_input_tokens_seen": 22064736, + "step": 394 + }, + { + "epoch": 0.8775055679287305, + "loss": 1.915067434310913, + "loss_ce": 0.007840840145945549, + "loss_iou": 0.765625, + "loss_num": 0.07568359375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 22064736, + "step": 394 + }, + { + "epoch": 0.8797327394209354, + "grad_norm": 25.599044799804688, + "learning_rate": 1e-06, + "loss": 1.3419, + "num_input_tokens_seen": 22119196, + "step": 395 + }, + { + "epoch": 0.8797327394209354, + "loss": 1.3939309120178223, + "loss_ce": 0.003794201649725437, + "loss_iou": 0.55078125, + "loss_num": 0.05712890625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 22119196, + "step": 395 + }, + { + "epoch": 0.8819599109131403, + "grad_norm": 38.43741226196289, + "learning_rate": 1e-06, + "loss": 1.3195, + "num_input_tokens_seen": 22175900, + "step": 396 + }, + { + "epoch": 0.8819599109131403, + "loss": 1.2486144304275513, + "loss_ce": 0.013262815773487091, + "loss_iou": 0.4921875, + "loss_num": 0.050048828125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 22175900, + "step": 396 + }, + { + "epoch": 0.8841870824053452, + "grad_norm": 51.51115417480469, + "learning_rate": 1e-06, + "loss": 1.3812, + "num_input_tokens_seen": 22232828, + "step": 397 + }, + { + "epoch": 0.8841870824053452, + "loss": 1.6601974964141846, + "loss_ce": 0.029338089749217033, + "loss_iou": 0.6171875, + "loss_num": 0.080078125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 22232828, + "step": 397 + }, + { + "epoch": 0.8864142538975501, + "grad_norm": 29.84437370300293, + "learning_rate": 1e-06, + "loss": 1.145, + "num_input_tokens_seen": 22290996, + "step": 398 + }, + { + "epoch": 0.8864142538975501, + "loss": 1.0916481018066406, + "loss_ce": 0.0008277894230559468, + "loss_iou": 0.412109375, + "loss_num": 0.053466796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 22290996, + "step": 398 + }, + { + "epoch": 0.888641425389755, + "grad_norm": 34.59022903442383, + "learning_rate": 1e-06, + "loss": 1.1759, + "num_input_tokens_seen": 22345820, + "step": 399 + }, + { + "epoch": 0.888641425389755, + "loss": 1.1103270053863525, + "loss_ce": 0.0009520421735942364, + "loss_iou": 0.46875, + "loss_num": 0.0341796875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 22345820, + "step": 399 + }, + { + "epoch": 0.89086859688196, + "grad_norm": 36.79861068725586, + "learning_rate": 1e-06, + "loss": 1.3186, + "num_input_tokens_seen": 22403684, + "step": 400 + }, + { + "epoch": 0.89086859688196, + "loss": 1.1463134288787842, + "loss_ce": 0.005688320379704237, + "loss_iou": 0.47265625, + "loss_num": 0.0390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 22403684, + "step": 400 + }, + { + "epoch": 0.8930957683741648, + "grad_norm": 21.335620880126953, + "learning_rate": 1e-06, + "loss": 1.3037, + "num_input_tokens_seen": 22461644, + "step": 401 + }, + { + "epoch": 0.8930957683741648, + "loss": 1.3235667943954468, + "loss_ce": 0.007160472683608532, + "loss_iou": 0.5390625, + "loss_num": 0.048095703125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 22461644, + "step": 401 + }, + { + "epoch": 0.8953229398663697, + "grad_norm": 50.855709075927734, + "learning_rate": 1e-06, + "loss": 1.6207, + "num_input_tokens_seen": 22518540, + "step": 402 + }, + { + "epoch": 0.8953229398663697, + "loss": 1.8505743741989136, + "loss_ce": 0.002918071812018752, + "loss_iou": 0.71875, + "loss_num": 0.08203125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 22518540, + "step": 402 + }, + { + "epoch": 0.8975501113585747, + "grad_norm": 25.960264205932617, + "learning_rate": 1e-06, + "loss": 1.412, + "num_input_tokens_seen": 22575984, + "step": 403 + }, + { + "epoch": 0.8975501113585747, + "loss": 1.4437758922576904, + "loss_ce": 0.0028578725177794695, + "loss_iou": 0.59375, + "loss_num": 0.05126953125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 22575984, + "step": 403 + }, + { + "epoch": 0.8997772828507795, + "grad_norm": 61.44157791137695, + "learning_rate": 1e-06, + "loss": 1.1071, + "num_input_tokens_seen": 22632724, + "step": 404 + }, + { + "epoch": 0.8997772828507795, + "loss": 1.1888924837112427, + "loss_ce": 0.002613153774291277, + "loss_iou": 0.498046875, + "loss_num": 0.0380859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 22632724, + "step": 404 + }, + { + "epoch": 0.9020044543429844, + "grad_norm": 32.13711929321289, + "learning_rate": 1e-06, + "loss": 1.3325, + "num_input_tokens_seen": 22689348, + "step": 405 + }, + { + "epoch": 0.9020044543429844, + "loss": 1.1436662673950195, + "loss_ce": 0.002064717700704932, + "loss_iou": 0.494140625, + "loss_num": 0.030517578125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 22689348, + "step": 405 + }, + { + "epoch": 0.9042316258351893, + "grad_norm": 22.586368560791016, + "learning_rate": 1e-06, + "loss": 1.3851, + "num_input_tokens_seen": 22744560, + "step": 406 + }, + { + "epoch": 0.9042316258351893, + "loss": 1.374314546585083, + "loss_ce": 0.010056735947728157, + "loss_iou": 0.5390625, + "loss_num": 0.056884765625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 22744560, + "step": 406 + }, + { + "epoch": 0.9064587973273942, + "grad_norm": 19.449066162109375, + "learning_rate": 1e-06, + "loss": 1.4187, + "num_input_tokens_seen": 22796036, + "step": 407 + }, + { + "epoch": 0.9064587973273942, + "loss": 1.272933006286621, + "loss_ce": 0.0038900894578546286, + "loss_iou": 0.5390625, + "loss_num": 0.03857421875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 22796036, + "step": 407 + }, + { + "epoch": 0.9086859688195991, + "grad_norm": 17.967327117919922, + "learning_rate": 1e-06, + "loss": 1.101, + "num_input_tokens_seen": 22851848, + "step": 408 + }, + { + "epoch": 0.9086859688195991, + "loss": 0.8791401386260986, + "loss_ce": 0.009511251002550125, + "loss_iou": 0.35546875, + "loss_num": 0.0311279296875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 22851848, + "step": 408 + }, + { + "epoch": 0.910913140311804, + "grad_norm": 34.27125549316406, + "learning_rate": 1e-06, + "loss": 1.279, + "num_input_tokens_seen": 22906068, + "step": 409 + }, + { + "epoch": 0.910913140311804, + "loss": 1.3037530183792114, + "loss_ce": 0.008098645135760307, + "loss_iou": 0.546875, + "loss_num": 0.041015625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 22906068, + "step": 409 + }, + { + "epoch": 0.9131403118040089, + "grad_norm": 21.108455657958984, + "learning_rate": 1e-06, + "loss": 1.3891, + "num_input_tokens_seen": 22963800, + "step": 410 + }, + { + "epoch": 0.9131403118040089, + "loss": 1.512423038482666, + "loss_ce": 0.009981658309698105, + "loss_iou": 0.6640625, + "loss_num": 0.034912109375, + "loss_xval": 1.5, + "num_input_tokens_seen": 22963800, + "step": 410 + }, + { + "epoch": 0.9153674832962138, + "grad_norm": 26.992612838745117, + "learning_rate": 1e-06, + "loss": 1.39, + "num_input_tokens_seen": 23019888, + "step": 411 + }, + { + "epoch": 0.9153674832962138, + "loss": 1.4112193584442139, + "loss_ce": 0.0020396006293594837, + "loss_iou": 0.5703125, + "loss_num": 0.052490234375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 23019888, + "step": 411 + }, + { + "epoch": 0.9175946547884187, + "grad_norm": 25.17546272277832, + "learning_rate": 1e-06, + "loss": 1.2354, + "num_input_tokens_seen": 23077784, + "step": 412 + }, + { + "epoch": 0.9175946547884187, + "loss": 1.2591149806976318, + "loss_ce": 0.02327517233788967, + "loss_iou": 0.515625, + "loss_num": 0.04052734375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 23077784, + "step": 412 + }, + { + "epoch": 0.9198218262806236, + "grad_norm": 50.736148834228516, + "learning_rate": 1e-06, + "loss": 1.1223, + "num_input_tokens_seen": 23133656, + "step": 413 + }, + { + "epoch": 0.9198218262806236, + "loss": 1.0429635047912598, + "loss_ce": 0.0029244269244372845, + "loss_iou": 0.404296875, + "loss_num": 0.046630859375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 23133656, + "step": 413 + }, + { + "epoch": 0.9220489977728286, + "grad_norm": 49.43931579589844, + "learning_rate": 1e-06, + "loss": 1.1577, + "num_input_tokens_seen": 23190832, + "step": 414 + }, + { + "epoch": 0.9220489977728286, + "loss": 1.1161892414093018, + "loss_ce": 0.009988099336624146, + "loss_iou": 0.431640625, + "loss_num": 0.04833984375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 23190832, + "step": 414 + }, + { + "epoch": 0.9242761692650334, + "grad_norm": 31.02313232421875, + "learning_rate": 1e-06, + "loss": 1.4122, + "num_input_tokens_seen": 23247304, + "step": 415 + }, + { + "epoch": 0.9242761692650334, + "loss": 1.5239001512527466, + "loss_ce": 0.003880674485117197, + "loss_iou": 0.640625, + "loss_num": 0.04833984375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 23247304, + "step": 415 + }, + { + "epoch": 0.9265033407572383, + "grad_norm": 24.66490936279297, + "learning_rate": 1e-06, + "loss": 1.3191, + "num_input_tokens_seen": 23302872, + "step": 416 + }, + { + "epoch": 0.9265033407572383, + "loss": 1.2839527130126953, + "loss_ce": 0.00563238188624382, + "loss_iou": 0.51953125, + "loss_num": 0.04833984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 23302872, + "step": 416 + }, + { + "epoch": 0.9287305122494433, + "grad_norm": 29.012544631958008, + "learning_rate": 1e-06, + "loss": 1.4372, + "num_input_tokens_seen": 23361228, + "step": 417 + }, + { + "epoch": 0.9287305122494433, + "loss": 1.4470521211624146, + "loss_ce": 0.008087254129350185, + "loss_iou": 0.578125, + "loss_num": 0.056640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 23361228, + "step": 417 + }, + { + "epoch": 0.9309576837416481, + "grad_norm": 29.133703231811523, + "learning_rate": 1e-06, + "loss": 1.3035, + "num_input_tokens_seen": 23417480, + "step": 418 + }, + { + "epoch": 0.9309576837416481, + "loss": 1.2170838117599487, + "loss_ce": 0.001263482728973031, + "loss_iou": 0.482421875, + "loss_num": 0.050537109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 23417480, + "step": 418 + }, + { + "epoch": 0.933184855233853, + "grad_norm": 24.859573364257812, + "learning_rate": 1e-06, + "loss": 1.6284, + "num_input_tokens_seen": 23474768, + "step": 419 + }, + { + "epoch": 0.933184855233853, + "loss": 1.9370301961898804, + "loss_ce": 0.03078020177781582, + "loss_iou": 0.76171875, + "loss_num": 0.0771484375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 23474768, + "step": 419 + }, + { + "epoch": 0.9354120267260579, + "grad_norm": 25.39378547668457, + "learning_rate": 1e-06, + "loss": 1.1607, + "num_input_tokens_seen": 23530896, + "step": 420 + }, + { + "epoch": 0.9354120267260579, + "loss": 1.2405550479888916, + "loss_ce": 0.00471511110663414, + "loss_iou": 0.53515625, + "loss_num": 0.033935546875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 23530896, + "step": 420 + }, + { + "epoch": 0.9376391982182628, + "grad_norm": 39.46179962158203, + "learning_rate": 1e-06, + "loss": 1.4029, + "num_input_tokens_seen": 23587564, + "step": 421 + }, + { + "epoch": 0.9376391982182628, + "loss": 1.2013354301452637, + "loss_ce": 0.0026050377637147903, + "loss_iou": 0.466796875, + "loss_num": 0.053466796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 23587564, + "step": 421 + }, + { + "epoch": 0.9398663697104677, + "grad_norm": 20.98834991455078, + "learning_rate": 1e-06, + "loss": 1.4864, + "num_input_tokens_seen": 23641996, + "step": 422 + }, + { + "epoch": 0.9398663697104677, + "loss": 1.4933122396469116, + "loss_ce": 0.005030961707234383, + "loss_iou": 0.6015625, + "loss_num": 0.057373046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 23641996, + "step": 422 + }, + { + "epoch": 0.9420935412026726, + "grad_norm": 28.44550895690918, + "learning_rate": 1e-06, + "loss": 1.3014, + "num_input_tokens_seen": 23695464, + "step": 423 + }, + { + "epoch": 0.9420935412026726, + "loss": 1.5061261653900146, + "loss_ce": 0.021751180291175842, + "loss_iou": 0.61328125, + "loss_num": 0.05224609375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 23695464, + "step": 423 + }, + { + "epoch": 0.9443207126948775, + "grad_norm": 24.33575439453125, + "learning_rate": 1e-06, + "loss": 1.104, + "num_input_tokens_seen": 23747676, + "step": 424 + }, + { + "epoch": 0.9443207126948775, + "loss": 1.0683302879333496, + "loss_ce": 0.0016799941658973694, + "loss_iou": 0.439453125, + "loss_num": 0.03759765625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 23747676, + "step": 424 + }, + { + "epoch": 0.9465478841870824, + "grad_norm": 29.6976375579834, + "learning_rate": 1e-06, + "loss": 1.1804, + "num_input_tokens_seen": 23805080, + "step": 425 + }, + { + "epoch": 0.9465478841870824, + "loss": 1.1131433248519897, + "loss_ce": 0.00132695899810642, + "loss_iou": 0.453125, + "loss_num": 0.041748046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 23805080, + "step": 425 + }, + { + "epoch": 0.9487750556792873, + "grad_norm": 24.570606231689453, + "learning_rate": 1e-06, + "loss": 1.0864, + "num_input_tokens_seen": 23862348, + "step": 426 + }, + { + "epoch": 0.9487750556792873, + "loss": 0.9711905717849731, + "loss_ce": 0.000975710921920836, + "loss_iou": 0.416015625, + "loss_num": 0.0277099609375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 23862348, + "step": 426 + }, + { + "epoch": 0.9510022271714922, + "grad_norm": 34.240631103515625, + "learning_rate": 1e-06, + "loss": 1.1878, + "num_input_tokens_seen": 23919428, + "step": 427 + }, + { + "epoch": 0.9510022271714922, + "loss": 1.3001813888549805, + "loss_ce": 0.0015973602421581745, + "loss_iou": 0.53125, + "loss_num": 0.04736328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 23919428, + "step": 427 + }, + { + "epoch": 0.9532293986636972, + "grad_norm": 22.28962516784668, + "learning_rate": 1e-06, + "loss": 1.0995, + "num_input_tokens_seen": 23975564, + "step": 428 + }, + { + "epoch": 0.9532293986636972, + "loss": 1.0272424221038818, + "loss_ce": 0.01918586902320385, + "loss_iou": 0.41796875, + "loss_num": 0.0341796875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 23975564, + "step": 428 + }, + { + "epoch": 0.955456570155902, + "grad_norm": 50.77242660522461, + "learning_rate": 1e-06, + "loss": 1.4387, + "num_input_tokens_seen": 24032664, + "step": 429 + }, + { + "epoch": 0.955456570155902, + "loss": 1.2324780225753784, + "loss_ce": 0.019099093973636627, + "loss_iou": 0.451171875, + "loss_num": 0.0625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 24032664, + "step": 429 + }, + { + "epoch": 0.9576837416481069, + "grad_norm": 17.031267166137695, + "learning_rate": 1e-06, + "loss": 1.0689, + "num_input_tokens_seen": 24087656, + "step": 430 + }, + { + "epoch": 0.9576837416481069, + "loss": 1.104911208152771, + "loss_ce": 0.008475645445287228, + "loss_iou": 0.4296875, + "loss_num": 0.0478515625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 24087656, + "step": 430 + }, + { + "epoch": 0.9599109131403119, + "grad_norm": 29.331024169921875, + "learning_rate": 1e-06, + "loss": 1.404, + "num_input_tokens_seen": 24141584, + "step": 431 + }, + { + "epoch": 0.9599109131403119, + "loss": 1.457468032836914, + "loss_ce": 0.027536382898688316, + "loss_iou": 0.55859375, + "loss_num": 0.06298828125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 24141584, + "step": 431 + }, + { + "epoch": 0.9621380846325167, + "grad_norm": 23.636056900024414, + "learning_rate": 1e-06, + "loss": 1.379, + "num_input_tokens_seen": 24197980, + "step": 432 + }, + { + "epoch": 0.9621380846325167, + "loss": 1.2647449970245361, + "loss_ce": 0.002049737609922886, + "loss_iou": 0.51171875, + "loss_num": 0.04833984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 24197980, + "step": 432 + }, + { + "epoch": 0.9643652561247216, + "grad_norm": 94.4756851196289, + "learning_rate": 1e-06, + "loss": 1.3875, + "num_input_tokens_seen": 24252980, + "step": 433 + }, + { + "epoch": 0.9643652561247216, + "loss": 1.2504823207855225, + "loss_ce": 0.0009705987758934498, + "loss_iou": 0.474609375, + "loss_num": 0.059814453125, + "loss_xval": 1.25, + "num_input_tokens_seen": 24252980, + "step": 433 + }, + { + "epoch": 0.9665924276169265, + "grad_norm": 29.35396957397461, + "learning_rate": 1e-06, + "loss": 1.313, + "num_input_tokens_seen": 24310388, + "step": 434 + }, + { + "epoch": 0.9665924276169265, + "loss": 1.3304047584533691, + "loss_ce": 0.015951739624142647, + "loss_iou": 0.484375, + "loss_num": 0.06884765625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 24310388, + "step": 434 + }, + { + "epoch": 0.9688195991091314, + "grad_norm": 19.95311737060547, + "learning_rate": 1e-06, + "loss": 1.0571, + "num_input_tokens_seen": 24367988, + "step": 435 + }, + { + "epoch": 0.9688195991091314, + "loss": 0.8543475270271301, + "loss_ce": 0.0008319243206642568, + "loss_iou": 0.345703125, + "loss_num": 0.03271484375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 24367988, + "step": 435 + }, + { + "epoch": 0.9710467706013363, + "grad_norm": 17.18592071533203, + "learning_rate": 1e-06, + "loss": 1.4076, + "num_input_tokens_seen": 24421072, + "step": 436 + }, + { + "epoch": 0.9710467706013363, + "loss": 1.2863093614578247, + "loss_ce": 0.0038386958185583353, + "loss_iou": 0.5078125, + "loss_num": 0.053466796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 24421072, + "step": 436 + }, + { + "epoch": 0.9732739420935412, + "grad_norm": 45.679954528808594, + "learning_rate": 1e-06, + "loss": 1.4846, + "num_input_tokens_seen": 24476364, + "step": 437 + }, + { + "epoch": 0.9732739420935412, + "loss": 1.2888422012329102, + "loss_ce": 0.0017329129623249173, + "loss_iou": 0.5078125, + "loss_num": 0.0537109375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 24476364, + "step": 437 + }, + { + "epoch": 0.9755011135857461, + "grad_norm": 26.56547737121582, + "learning_rate": 1e-06, + "loss": 1.4019, + "num_input_tokens_seen": 24532516, + "step": 438 + }, + { + "epoch": 0.9755011135857461, + "loss": 1.554681420326233, + "loss_ce": 0.003900158451870084, + "loss_iou": 0.609375, + "loss_num": 0.06640625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 24532516, + "step": 438 + }, + { + "epoch": 0.977728285077951, + "grad_norm": 37.293983459472656, + "learning_rate": 1e-06, + "loss": 1.2445, + "num_input_tokens_seen": 24586972, + "step": 439 + }, + { + "epoch": 0.977728285077951, + "loss": 1.148227572441101, + "loss_ce": 0.0100439777597785, + "loss_iou": 0.4140625, + "loss_num": 0.06201171875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 24586972, + "step": 439 + }, + { + "epoch": 0.9799554565701559, + "grad_norm": 29.18305778503418, + "learning_rate": 1e-06, + "loss": 1.1585, + "num_input_tokens_seen": 24643012, + "step": 440 + }, + { + "epoch": 0.9799554565701559, + "loss": 1.3668557405471802, + "loss_ce": 0.004306901711970568, + "loss_iou": 0.5625, + "loss_num": 0.0478515625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 24643012, + "step": 440 + }, + { + "epoch": 0.9821826280623608, + "grad_norm": 47.63218688964844, + "learning_rate": 1e-06, + "loss": 1.4442, + "num_input_tokens_seen": 24697816, + "step": 441 + }, + { + "epoch": 0.9821826280623608, + "loss": 1.5267517566680908, + "loss_ce": 0.0018494933610782027, + "loss_iou": 0.60546875, + "loss_num": 0.0625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 24697816, + "step": 441 + }, + { + "epoch": 0.9844097995545658, + "grad_norm": 21.678260803222656, + "learning_rate": 1e-06, + "loss": 1.3717, + "num_input_tokens_seen": 24752152, + "step": 442 + }, + { + "epoch": 0.9844097995545658, + "loss": 1.6792147159576416, + "loss_ce": 0.005386614240705967, + "loss_iou": 0.60546875, + "loss_num": 0.0927734375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 24752152, + "step": 442 + }, + { + "epoch": 0.9866369710467706, + "grad_norm": 52.10433578491211, + "learning_rate": 1e-06, + "loss": 1.3047, + "num_input_tokens_seen": 24810384, + "step": 443 + }, + { + "epoch": 0.9866369710467706, + "loss": 1.4768739938735962, + "loss_ce": 0.005682523362338543, + "loss_iou": 0.58203125, + "loss_num": 0.06201171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 24810384, + "step": 443 + }, + { + "epoch": 0.9888641425389755, + "grad_norm": 35.90444564819336, + "learning_rate": 1e-06, + "loss": 1.4123, + "num_input_tokens_seen": 24864948, + "step": 444 + }, + { + "epoch": 0.9888641425389755, + "loss": 1.7221654653549194, + "loss_ce": 0.0039037105161696672, + "loss_iou": 0.69921875, + "loss_num": 0.06494140625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 24864948, + "step": 444 + }, + { + "epoch": 0.9910913140311804, + "grad_norm": 20.43412208557129, + "learning_rate": 1e-06, + "loss": 1.3585, + "num_input_tokens_seen": 24919520, + "step": 445 + }, + { + "epoch": 0.9910913140311804, + "loss": 1.4697803258895874, + "loss_ce": 0.01421389076858759, + "loss_iou": 0.58984375, + "loss_num": 0.05517578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 24919520, + "step": 445 + }, + { + "epoch": 0.9933184855233853, + "grad_norm": 48.228004455566406, + "learning_rate": 1e-06, + "loss": 1.6479, + "num_input_tokens_seen": 24977308, + "step": 446 + }, + { + "epoch": 0.9933184855233853, + "loss": 1.6608734130859375, + "loss_ce": 0.0016936406027525663, + "loss_iou": 0.67578125, + "loss_num": 0.0615234375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 24977308, + "step": 446 + }, + { + "epoch": 0.9955456570155902, + "grad_norm": 43.12616729736328, + "learning_rate": 1e-06, + "loss": 1.6302, + "num_input_tokens_seen": 25033208, + "step": 447 + }, + { + "epoch": 0.9955456570155902, + "loss": 1.7152773141860962, + "loss_ce": 0.004828128032386303, + "loss_iou": 0.61328125, + "loss_num": 0.0966796875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 25033208, + "step": 447 + }, + { + "epoch": 0.9977728285077951, + "grad_norm": 32.99639892578125, + "learning_rate": 1e-06, + "loss": 1.43, + "num_input_tokens_seen": 25089796, + "step": 448 + }, + { + "epoch": 0.9977728285077951, + "loss": 1.0105788707733154, + "loss_ce": 0.005207820795476437, + "loss_iou": 0.423828125, + "loss_num": 0.031494140625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 25089796, + "step": 448 + }, + { + "epoch": 1.0, + "grad_norm": 22.11530303955078, + "learning_rate": 1e-06, + "loss": 1.0883, + "num_input_tokens_seen": 25146032, + "step": 449 + }, + { + "epoch": 1.0, + "loss": 1.1784168481826782, + "loss_ce": 0.006053549237549305, + "loss_iou": 0.4609375, + "loss_num": 0.05029296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 25146032, + "step": 449 + }, + { + "epoch": 1.0022271714922049, + "grad_norm": 18.13511848449707, + "learning_rate": 1e-06, + "loss": 1.2001, + "num_input_tokens_seen": 25204016, + "step": 450 + }, + { + "epoch": 1.0022271714922049, + "loss": 1.3148114681243896, + "loss_ce": 0.0042645614594221115, + "loss_iou": 0.55078125, + "loss_num": 0.042236328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 25204016, + "step": 450 + }, + { + "epoch": 1.0044543429844097, + "grad_norm": 72.61564636230469, + "learning_rate": 1e-06, + "loss": 1.3781, + "num_input_tokens_seen": 25258404, + "step": 451 + }, + { + "epoch": 1.0044543429844097, + "loss": 1.4480741024017334, + "loss_ce": 0.015945199877023697, + "loss_iou": 0.55078125, + "loss_num": 0.06689453125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 25258404, + "step": 451 + }, + { + "epoch": 1.0066815144766148, + "grad_norm": 40.180419921875, + "learning_rate": 1e-06, + "loss": 1.4411, + "num_input_tokens_seen": 25315080, + "step": 452 + }, + { + "epoch": 1.0066815144766148, + "loss": 1.4863982200622559, + "loss_ce": 0.005441202782094479, + "loss_iou": 0.5703125, + "loss_num": 0.068359375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 25315080, + "step": 452 + }, + { + "epoch": 1.0089086859688197, + "grad_norm": 28.12888526916504, + "learning_rate": 1e-06, + "loss": 1.1451, + "num_input_tokens_seen": 25370644, + "step": 453 + }, + { + "epoch": 1.0089086859688197, + "loss": 1.1297539472579956, + "loss_ce": 0.01940234750509262, + "loss_iou": 0.4453125, + "loss_num": 0.043701171875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 25370644, + "step": 453 + }, + { + "epoch": 1.0111358574610245, + "grad_norm": 26.153356552124023, + "learning_rate": 1e-06, + "loss": 1.2351, + "num_input_tokens_seen": 25425148, + "step": 454 + }, + { + "epoch": 1.0111358574610245, + "loss": 1.6115047931671143, + "loss_ce": 0.002618103986606002, + "loss_iou": 0.5859375, + "loss_num": 0.08837890625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 25425148, + "step": 454 + }, + { + "epoch": 1.0133630289532294, + "grad_norm": 41.62250900268555, + "learning_rate": 1e-06, + "loss": 1.4871, + "num_input_tokens_seen": 25478780, + "step": 455 + }, + { + "epoch": 1.0133630289532294, + "loss": 1.8347097635269165, + "loss_ce": 0.004143323749303818, + "loss_iou": 0.703125, + "loss_num": 0.08544921875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 25478780, + "step": 455 + }, + { + "epoch": 1.0155902004454342, + "grad_norm": 54.628746032714844, + "learning_rate": 1e-06, + "loss": 1.2763, + "num_input_tokens_seen": 25533068, + "step": 456 + }, + { + "epoch": 1.0155902004454342, + "loss": 1.017959713935852, + "loss_ce": 0.007217581383883953, + "loss_iou": 0.384765625, + "loss_num": 0.048583984375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 25533068, + "step": 456 + }, + { + "epoch": 1.017817371937639, + "grad_norm": 19.17976951599121, + "learning_rate": 1e-06, + "loss": 1.1245, + "num_input_tokens_seen": 25588116, + "step": 457 + }, + { + "epoch": 1.017817371937639, + "loss": 1.1571969985961914, + "loss_ce": 0.0024117890279740095, + "loss_iou": 0.5078125, + "loss_num": 0.02734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 25588116, + "step": 457 + }, + { + "epoch": 1.0200445434298442, + "grad_norm": 59.96177291870117, + "learning_rate": 1e-06, + "loss": 1.213, + "num_input_tokens_seen": 25645984, + "step": 458 + }, + { + "epoch": 1.0200445434298442, + "loss": 1.2157493829727173, + "loss_ce": 0.006276742089539766, + "loss_iou": 0.455078125, + "loss_num": 0.06005859375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 25645984, + "step": 458 + }, + { + "epoch": 1.022271714922049, + "grad_norm": 22.60318946838379, + "learning_rate": 1e-06, + "loss": 1.5715, + "num_input_tokens_seen": 25701456, + "step": 459 + }, + { + "epoch": 1.022271714922049, + "loss": 1.8298416137695312, + "loss_ce": 0.006599403452128172, + "loss_iou": 0.75390625, + "loss_num": 0.06298828125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 25701456, + "step": 459 + }, + { + "epoch": 1.024498886414254, + "grad_norm": 30.009733200073242, + "learning_rate": 1e-06, + "loss": 1.2751, + "num_input_tokens_seen": 25760304, + "step": 460 + }, + { + "epoch": 1.024498886414254, + "loss": 0.9823180437088013, + "loss_ce": 0.008441124111413956, + "loss_iou": 0.388671875, + "loss_num": 0.0390625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 25760304, + "step": 460 + }, + { + "epoch": 1.0267260579064588, + "grad_norm": 29.346010208129883, + "learning_rate": 1e-06, + "loss": 1.0664, + "num_input_tokens_seen": 25812724, + "step": 461 + }, + { + "epoch": 1.0267260579064588, + "loss": 0.9884135723114014, + "loss_ce": 0.006968258880078793, + "loss_iou": 0.396484375, + "loss_num": 0.03759765625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 25812724, + "step": 461 + }, + { + "epoch": 1.0289532293986636, + "grad_norm": 75.70279693603516, + "learning_rate": 1e-06, + "loss": 1.2436, + "num_input_tokens_seen": 25866680, + "step": 462 + }, + { + "epoch": 1.0289532293986636, + "loss": 1.3562824726104736, + "loss_ce": 0.010579358786344528, + "loss_iou": 0.5625, + "loss_num": 0.04443359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 25866680, + "step": 462 + }, + { + "epoch": 1.0311804008908685, + "grad_norm": 54.88785171508789, + "learning_rate": 1e-06, + "loss": 1.2655, + "num_input_tokens_seen": 25920128, + "step": 463 + }, + { + "epoch": 1.0311804008908685, + "loss": 1.265822172164917, + "loss_ce": 0.002150336978957057, + "loss_iou": 0.486328125, + "loss_num": 0.05810546875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 25920128, + "step": 463 + }, + { + "epoch": 1.0334075723830736, + "grad_norm": 24.761333465576172, + "learning_rate": 1e-06, + "loss": 1.2714, + "num_input_tokens_seen": 25973036, + "step": 464 + }, + { + "epoch": 1.0334075723830736, + "loss": 1.3659954071044922, + "loss_ce": 0.0012494358234107494, + "loss_iou": 0.515625, + "loss_num": 0.0673828125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 25973036, + "step": 464 + }, + { + "epoch": 1.0356347438752784, + "grad_norm": 23.98271369934082, + "learning_rate": 1e-06, + "loss": 1.3715, + "num_input_tokens_seen": 26025288, + "step": 465 + }, + { + "epoch": 1.0356347438752784, + "loss": 1.3842101097106934, + "loss_ce": 0.007867315784096718, + "loss_iou": 0.546875, + "loss_num": 0.056640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 26025288, + "step": 465 + }, + { + "epoch": 1.0378619153674833, + "grad_norm": 21.38801383972168, + "learning_rate": 1e-06, + "loss": 1.2095, + "num_input_tokens_seen": 26082480, + "step": 466 + }, + { + "epoch": 1.0378619153674833, + "loss": 1.2851924896240234, + "loss_ce": 0.0010128666181117296, + "loss_iou": 0.50390625, + "loss_num": 0.054931640625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 26082480, + "step": 466 + }, + { + "epoch": 1.0400890868596881, + "grad_norm": 25.396892547607422, + "learning_rate": 1e-06, + "loss": 1.0663, + "num_input_tokens_seen": 26139504, + "step": 467 + }, + { + "epoch": 1.0400890868596881, + "loss": 1.0588932037353516, + "loss_ce": 0.0007877358002588153, + "loss_iou": 0.44140625, + "loss_num": 0.03466796875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 26139504, + "step": 467 + }, + { + "epoch": 1.042316258351893, + "grad_norm": 18.479541778564453, + "learning_rate": 1e-06, + "loss": 1.2228, + "num_input_tokens_seen": 26196448, + "step": 468 + }, + { + "epoch": 1.042316258351893, + "loss": 1.2266954183578491, + "loss_ce": 0.0011094606015831232, + "loss_iou": 0.482421875, + "loss_num": 0.052001953125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 26196448, + "step": 468 + }, + { + "epoch": 1.044543429844098, + "grad_norm": 19.95421028137207, + "learning_rate": 1e-06, + "loss": 1.1337, + "num_input_tokens_seen": 26250848, + "step": 469 + }, + { + "epoch": 1.044543429844098, + "loss": 1.1349093914031982, + "loss_ce": 0.0011203193571418524, + "loss_iou": 0.421875, + "loss_num": 0.05859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 26250848, + "step": 469 + }, + { + "epoch": 1.046770601336303, + "grad_norm": 29.218976974487305, + "learning_rate": 1e-06, + "loss": 1.0506, + "num_input_tokens_seen": 26304848, + "step": 470 + }, + { + "epoch": 1.046770601336303, + "loss": 0.7857218980789185, + "loss_ce": 0.0015422652941197157, + "loss_iou": 0.267578125, + "loss_num": 0.049560546875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 26304848, + "step": 470 + }, + { + "epoch": 1.0489977728285078, + "grad_norm": 26.657873153686523, + "learning_rate": 1e-06, + "loss": 1.2585, + "num_input_tokens_seen": 26362040, + "step": 471 + }, + { + "epoch": 1.0489977728285078, + "loss": 1.1070148944854736, + "loss_ce": 0.0015461579896509647, + "loss_iou": 0.4453125, + "loss_num": 0.04345703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 26362040, + "step": 471 + }, + { + "epoch": 1.0512249443207127, + "grad_norm": 24.098112106323242, + "learning_rate": 1e-06, + "loss": 1.2629, + "num_input_tokens_seen": 26418744, + "step": 472 + }, + { + "epoch": 1.0512249443207127, + "loss": 1.3726317882537842, + "loss_ce": 0.008373986929655075, + "loss_iou": 0.53125, + "loss_num": 0.0595703125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 26418744, + "step": 472 + }, + { + "epoch": 1.0534521158129175, + "grad_norm": 18.4561767578125, + "learning_rate": 1e-06, + "loss": 1.0389, + "num_input_tokens_seen": 26476200, + "step": 473 + }, + { + "epoch": 1.0534521158129175, + "loss": 1.1621673107147217, + "loss_ce": 0.0010345308110117912, + "loss_iou": 0.453125, + "loss_num": 0.051025390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 26476200, + "step": 473 + }, + { + "epoch": 1.0556792873051224, + "grad_norm": 22.120088577270508, + "learning_rate": 1e-06, + "loss": 1.2004, + "num_input_tokens_seen": 26532420, + "step": 474 + }, + { + "epoch": 1.0556792873051224, + "loss": 1.0202289819717407, + "loss_ce": 0.0014302136842161417, + "loss_iou": 0.384765625, + "loss_num": 0.050048828125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 26532420, + "step": 474 + }, + { + "epoch": 1.0579064587973275, + "grad_norm": 28.964649200439453, + "learning_rate": 1e-06, + "loss": 1.2759, + "num_input_tokens_seen": 26585900, + "step": 475 + }, + { + "epoch": 1.0579064587973275, + "loss": 1.5081238746643066, + "loss_ce": 0.0017761469352990389, + "loss_iou": 0.57421875, + "loss_num": 0.072265625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 26585900, + "step": 475 + }, + { + "epoch": 1.0601336302895323, + "grad_norm": 18.955963134765625, + "learning_rate": 1e-06, + "loss": 1.2483, + "num_input_tokens_seen": 26640324, + "step": 476 + }, + { + "epoch": 1.0601336302895323, + "loss": 1.3863422870635986, + "loss_ce": 0.0040180860087275505, + "loss_iou": 0.6015625, + "loss_num": 0.03564453125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 26640324, + "step": 476 + }, + { + "epoch": 1.0623608017817372, + "grad_norm": 29.491914749145508, + "learning_rate": 1e-06, + "loss": 1.2475, + "num_input_tokens_seen": 26694532, + "step": 477 + }, + { + "epoch": 1.0623608017817372, + "loss": 0.9849135875701904, + "loss_ce": 0.0007827761583030224, + "loss_iou": 0.369140625, + "loss_num": 0.048828125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 26694532, + "step": 477 + }, + { + "epoch": 1.064587973273942, + "grad_norm": 65.8954849243164, + "learning_rate": 1e-06, + "loss": 1.4668, + "num_input_tokens_seen": 26749932, + "step": 478 + }, + { + "epoch": 1.064587973273942, + "loss": 1.4816560745239258, + "loss_ce": 0.001187233254313469, + "loss_iou": 0.5703125, + "loss_num": 0.06787109375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 26749932, + "step": 478 + }, + { + "epoch": 1.066815144766147, + "grad_norm": 19.884841918945312, + "learning_rate": 1e-06, + "loss": 1.1807, + "num_input_tokens_seen": 26804456, + "step": 479 + }, + { + "epoch": 1.066815144766147, + "loss": 1.2711074352264404, + "loss_ce": 0.002064512576907873, + "loss_iou": 0.5234375, + "loss_num": 0.044189453125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 26804456, + "step": 479 + }, + { + "epoch": 1.069042316258352, + "grad_norm": 22.818063735961914, + "learning_rate": 1e-06, + "loss": 1.3075, + "num_input_tokens_seen": 26860280, + "step": 480 + }, + { + "epoch": 1.069042316258352, + "loss": 1.333120584487915, + "loss_ce": 0.0010893936268985271, + "loss_iou": 0.5546875, + "loss_num": 0.045166015625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 26860280, + "step": 480 + }, + { + "epoch": 1.0712694877505569, + "grad_norm": 21.60323715209961, + "learning_rate": 1e-06, + "loss": 1.1227, + "num_input_tokens_seen": 26915848, + "step": 481 + }, + { + "epoch": 1.0712694877505569, + "loss": 1.1749460697174072, + "loss_ce": 0.002582841319963336, + "loss_iou": 0.498046875, + "loss_num": 0.03564453125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 26915848, + "step": 481 + }, + { + "epoch": 1.0734966592427617, + "grad_norm": 17.057666778564453, + "learning_rate": 1e-06, + "loss": 0.963, + "num_input_tokens_seen": 26973348, + "step": 482 + }, + { + "epoch": 1.0734966592427617, + "loss": 1.024942398071289, + "loss_ce": 0.001016615773551166, + "loss_iou": 0.4296875, + "loss_num": 0.033203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 26973348, + "step": 482 + }, + { + "epoch": 1.0757238307349666, + "grad_norm": 26.440898895263672, + "learning_rate": 1e-06, + "loss": 1.2304, + "num_input_tokens_seen": 27030864, + "step": 483 + }, + { + "epoch": 1.0757238307349666, + "loss": 1.3861130475997925, + "loss_ce": 0.00134742702357471, + "loss_iou": 0.52734375, + "loss_num": 0.06640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 27030864, + "step": 483 + }, + { + "epoch": 1.0779510022271714, + "grad_norm": 19.473052978515625, + "learning_rate": 1e-06, + "loss": 1.494, + "num_input_tokens_seen": 27084476, + "step": 484 + }, + { + "epoch": 1.0779510022271714, + "loss": 1.7679762840270996, + "loss_ce": 0.0008863758994266391, + "loss_iou": 0.70703125, + "loss_num": 0.07080078125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 27084476, + "step": 484 + }, + { + "epoch": 1.0801781737193763, + "grad_norm": 22.302900314331055, + "learning_rate": 1e-06, + "loss": 1.3245, + "num_input_tokens_seen": 27138164, + "step": 485 + }, + { + "epoch": 1.0801781737193763, + "loss": 1.4490606784820557, + "loss_ce": 0.0008185390033759177, + "loss_iou": 0.6171875, + "loss_num": 0.042724609375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 27138164, + "step": 485 + }, + { + "epoch": 1.0824053452115814, + "grad_norm": 27.68915557861328, + "learning_rate": 1e-06, + "loss": 0.9771, + "num_input_tokens_seen": 27195028, + "step": 486 + }, + { + "epoch": 1.0824053452115814, + "loss": 0.880718469619751, + "loss_ce": 0.0018122631590813398, + "loss_iou": 0.369140625, + "loss_num": 0.0281982421875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 27195028, + "step": 486 + }, + { + "epoch": 1.0846325167037862, + "grad_norm": 27.754865646362305, + "learning_rate": 1e-06, + "loss": 1.3244, + "num_input_tokens_seen": 27250504, + "step": 487 + }, + { + "epoch": 1.0846325167037862, + "loss": 1.085121750831604, + "loss_ce": 0.0006490740925073624, + "loss_iou": 0.4296875, + "loss_num": 0.044677734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 27250504, + "step": 487 + }, + { + "epoch": 1.086859688195991, + "grad_norm": 29.184783935546875, + "learning_rate": 1e-06, + "loss": 1.131, + "num_input_tokens_seen": 27306092, + "step": 488 + }, + { + "epoch": 1.086859688195991, + "loss": 1.2078437805175781, + "loss_ce": 0.007404354866594076, + "loss_iou": 0.453125, + "loss_num": 0.058837890625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 27306092, + "step": 488 + }, + { + "epoch": 1.089086859688196, + "grad_norm": 16.471162796020508, + "learning_rate": 1e-06, + "loss": 1.1771, + "num_input_tokens_seen": 27362708, + "step": 489 + }, + { + "epoch": 1.089086859688196, + "loss": 1.2551052570343018, + "loss_ce": 0.0007107860874384642, + "loss_iou": 0.52734375, + "loss_num": 0.040283203125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 27362708, + "step": 489 + }, + { + "epoch": 1.0913140311804008, + "grad_norm": 36.433353424072266, + "learning_rate": 1e-06, + "loss": 1.1646, + "num_input_tokens_seen": 27421108, + "step": 490 + }, + { + "epoch": 1.0913140311804008, + "loss": 1.1176813840866089, + "loss_ce": 0.0014704548520967364, + "loss_iou": 0.46484375, + "loss_num": 0.037841796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 27421108, + "step": 490 + }, + { + "epoch": 1.093541202672606, + "grad_norm": 21.38726806640625, + "learning_rate": 1e-06, + "loss": 1.1923, + "num_input_tokens_seen": 27475860, + "step": 491 + }, + { + "epoch": 1.093541202672606, + "loss": 1.053163766860962, + "loss_ce": 0.0058004953898489475, + "loss_iou": 0.41796875, + "loss_num": 0.042236328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 27475860, + "step": 491 + }, + { + "epoch": 1.0957683741648108, + "grad_norm": 32.688072204589844, + "learning_rate": 1e-06, + "loss": 1.0674, + "num_input_tokens_seen": 27533372, + "step": 492 + }, + { + "epoch": 1.0957683741648108, + "loss": 0.7940797805786133, + "loss_ce": 0.01527119055390358, + "loss_iou": 0.3359375, + "loss_num": 0.021240234375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 27533372, + "step": 492 + }, + { + "epoch": 1.0979955456570156, + "grad_norm": 20.838420867919922, + "learning_rate": 1e-06, + "loss": 1.4187, + "num_input_tokens_seen": 27588776, + "step": 493 + }, + { + "epoch": 1.0979955456570156, + "loss": 1.4859492778778076, + "loss_ce": 0.0015742178075015545, + "loss_iou": 0.5625, + "loss_num": 0.0712890625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 27588776, + "step": 493 + }, + { + "epoch": 1.1002227171492205, + "grad_norm": 48.34200668334961, + "learning_rate": 1e-06, + "loss": 1.3155, + "num_input_tokens_seen": 27646412, + "step": 494 + }, + { + "epoch": 1.1002227171492205, + "loss": 1.6553877592086792, + "loss_ce": 0.004997197538614273, + "loss_iou": 0.65234375, + "loss_num": 0.06884765625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 27646412, + "step": 494 + }, + { + "epoch": 1.1024498886414253, + "grad_norm": 23.207468032836914, + "learning_rate": 1e-06, + "loss": 1.1153, + "num_input_tokens_seen": 27702240, + "step": 495 + }, + { + "epoch": 1.1024498886414253, + "loss": 1.4165308475494385, + "loss_ce": 0.003933266270905733, + "loss_iou": 0.57421875, + "loss_num": 0.05322265625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 27702240, + "step": 495 + }, + { + "epoch": 1.1046770601336302, + "grad_norm": 15.092357635498047, + "learning_rate": 1e-06, + "loss": 0.9602, + "num_input_tokens_seen": 27759140, + "step": 496 + }, + { + "epoch": 1.1046770601336302, + "loss": 1.09522545337677, + "loss_ce": 0.002940312959253788, + "loss_iou": 0.439453125, + "loss_num": 0.04248046875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 27759140, + "step": 496 + }, + { + "epoch": 1.1069042316258353, + "grad_norm": 23.996767044067383, + "learning_rate": 1e-06, + "loss": 1.2124, + "num_input_tokens_seen": 27816900, + "step": 497 + }, + { + "epoch": 1.1069042316258353, + "loss": 1.2346551418304443, + "loss_ce": 0.004186346661299467, + "loss_iou": 0.53125, + "loss_num": 0.033447265625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 27816900, + "step": 497 + }, + { + "epoch": 1.1091314031180401, + "grad_norm": 154.3430633544922, + "learning_rate": 1e-06, + "loss": 1.2454, + "num_input_tokens_seen": 27875428, + "step": 498 + }, + { + "epoch": 1.1091314031180401, + "loss": 1.4324363470077515, + "loss_ce": 0.0007956642657518387, + "loss_iou": 0.59765625, + "loss_num": 0.047607421875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 27875428, + "step": 498 + }, + { + "epoch": 1.111358574610245, + "grad_norm": 30.189279556274414, + "learning_rate": 1e-06, + "loss": 1.4237, + "num_input_tokens_seen": 27930704, + "step": 499 + }, + { + "epoch": 1.111358574610245, + "loss": 1.6740535497665405, + "loss_ce": 0.0021784906275570393, + "loss_iou": 0.62109375, + "loss_num": 0.08642578125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 27930704, + "step": 499 + }, + { + "epoch": 1.1135857461024499, + "grad_norm": 49.67326354980469, + "learning_rate": 1e-06, + "loss": 1.2495, + "num_input_tokens_seen": 27984260, + "step": 500 + }, + { + "epoch": 1.1135857461024499, + "eval_seeclick_web_CIoU": 0.48640576004981995, + "eval_seeclick_web_GIoU": 0.47375138103961945, + "eval_seeclick_web_IoU": 0.5116834044456482, + "eval_seeclick_web_MAE_all": 0.01661441382020712, + "eval_seeclick_web_MAE_h": 0.01039309287443757, + "eval_seeclick_web_MAE_w": 0.017452615778893232, + "eval_seeclick_web_MAE_x_boxes": 0.010556747671216726, + "eval_seeclick_web_MAE_y_boxes": 0.020513739669695497, + "eval_seeclick_web_inside_bbox": 0.8576388955116272, + "eval_seeclick_web_loss": 1.1193333864212036, + "eval_seeclick_web_loss_ce": 0.0005516507662832737, + "eval_seeclick_web_loss_iou": 0.519775390625, + "eval_seeclick_web_loss_num": 0.014141082763671875, + "eval_seeclick_web_loss_xval": 1.110107421875, + "eval_seeclick_web_runtime": 19.1191, + "eval_seeclick_web_samples_per_second": 2.615, + "eval_seeclick_web_steps_per_second": 0.105, + "num_input_tokens_seen": 27984260, + "step": 500 + }, + { + "epoch": 1.1135857461024499, + "eval_icons_CIoU": 0.3478478938341141, + "eval_icons_GIoU": 0.39389291405677795, + "eval_icons_IoU": 0.4208338111639023, + "eval_icons_MAE_all": 0.06633740290999413, + "eval_icons_MAE_h": 0.03294991096481681, + "eval_icons_MAE_w": 0.06817016191780567, + "eval_icons_MAE_x_boxes": 0.06793619319796562, + "eval_icons_MAE_y_boxes": 0.033266451209783554, + "eval_icons_inside_bbox": 0.7048611044883728, + "eval_icons_loss": 1.5932660102844238, + "eval_icons_loss_ce": 0.0014974797377362847, + "eval_icons_loss_iou": 0.614501953125, + "eval_icons_loss_num": 0.06303977966308594, + "eval_icons_loss_xval": 1.544677734375, + "eval_icons_runtime": 16.8196, + "eval_icons_samples_per_second": 2.973, + "eval_icons_steps_per_second": 0.119, + "num_input_tokens_seen": 27984260, + "step": 500 + }, + { + "epoch": 1.1135857461024499, + "eval_screenspot_CIoU": 0.2469616780678431, + "eval_screenspot_GIoU": 0.2680433491865794, + "eval_screenspot_IoU": 0.33675894141197205, + "eval_screenspot_MAE_all": 0.09882631152868271, + "eval_screenspot_MAE_h": 0.06025169417262077, + "eval_screenspot_MAE_w": 0.10315311948458354, + "eval_screenspot_MAE_x_boxes": 0.11736861368020375, + "eval_screenspot_MAE_y_boxes": 0.0693823571006457, + "eval_screenspot_inside_bbox": 0.5570833285649618, + "eval_screenspot_loss": 1.9871058464050293, + "eval_screenspot_loss_ce": 0.006458223797380924, + "eval_screenspot_loss_iou": 0.7589518229166666, + "eval_screenspot_loss_num": 0.10675303141276042, + "eval_screenspot_loss_xval": 2.0504557291666665, + "eval_screenspot_runtime": 27.6396, + "eval_screenspot_samples_per_second": 3.22, + "eval_screenspot_steps_per_second": 0.109, + "num_input_tokens_seen": 27984260, + "step": 500 + }, + { + "epoch": 1.1135857461024499, + "eval_compot_CIoU": 0.2672244608402252, + "eval_compot_GIoU": 0.2929573655128479, + "eval_compot_IoU": 0.333434134721756, + "eval_compot_MAE_all": 0.03275496046990156, + "eval_compot_MAE_h": 0.015514453873038292, + "eval_compot_MAE_w": 0.043023936450481415, + "eval_compot_MAE_x_boxes": 0.04298969078809023, + "eval_compot_MAE_y_boxes": 0.009457055712118745, + "eval_compot_inside_bbox": 0.5590277910232544, + "eval_compot_loss": 1.6115740537643433, + "eval_compot_loss_ce": 0.0006396450917236507, + "eval_compot_loss_iou": 0.701904296875, + "eval_compot_loss_num": 0.02944183349609375, + "eval_compot_loss_xval": 1.55078125, + "eval_compot_runtime": 18.007, + "eval_compot_samples_per_second": 2.777, + "eval_compot_steps_per_second": 0.111, + "num_input_tokens_seen": 27984260, + "step": 500 + }, + { + "epoch": 1.1135857461024499, + "eval_custom_ui_val_CIoU": 0.3800675223271052, + "eval_custom_ui_val_GIoU": 0.4050305353270637, + "eval_custom_ui_val_IoU": 0.43855932023790145, + "eval_custom_ui_val_MAE_all": 0.046337926760315895, + "eval_custom_ui_val_MAE_h": 0.026893117154637974, + "eval_custom_ui_val_MAE_w": 0.05207906601329645, + "eval_custom_ui_val_MAE_x_boxes": 0.046116245082683034, + "eval_custom_ui_val_MAE_y_boxes": 0.030045698396861553, + "eval_custom_ui_val_inside_bbox": 0.6608796318372091, + "eval_custom_ui_val_loss": 1.4429850578308105, + "eval_custom_ui_val_loss_ce": 0.0016363520019998152, + "eval_custom_ui_val_loss_iou": 0.5924343532986112, + "eval_custom_ui_val_loss_num": 0.04485532972547743, + "eval_custom_ui_val_loss_xval": 1.4086642795138888, + "eval_custom_ui_val_runtime": 55.8893, + "eval_custom_ui_val_samples_per_second": 4.742, + "eval_custom_ui_val_steps_per_second": 0.161, + "num_input_tokens_seen": 27984260, + "step": 500 + }, + { + "epoch": 1.1135857461024499, + "loss": 1.1450822353363037, + "loss_ce": 0.0012833788059651852, + "loss_iou": 0.482421875, + "loss_num": 0.0361328125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 27984260, + "step": 500 + }, + { + "epoch": 1.1158129175946547, + "grad_norm": 604.319580078125, + "learning_rate": 1e-06, + "loss": 1.4076, + "num_input_tokens_seen": 28041640, + "step": 501 + }, + { + "epoch": 1.1158129175946547, + "loss": 1.3554702997207642, + "loss_ce": 0.0037856735289096832, + "loss_iou": 0.55078125, + "loss_num": 0.0498046875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 28041640, + "step": 501 + }, + { + "epoch": 1.1180400890868596, + "grad_norm": 25.75654411315918, + "learning_rate": 1e-06, + "loss": 1.1217, + "num_input_tokens_seen": 28095584, + "step": 502 + }, + { + "epoch": 1.1180400890868596, + "loss": 1.170928955078125, + "loss_ce": 0.01809690147638321, + "loss_iou": 0.45703125, + "loss_num": 0.047607421875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 28095584, + "step": 502 + }, + { + "epoch": 1.1202672605790647, + "grad_norm": 25.978004455566406, + "learning_rate": 1e-06, + "loss": 1.3928, + "num_input_tokens_seen": 28152476, + "step": 503 + }, + { + "epoch": 1.1202672605790647, + "loss": 1.4479464292526245, + "loss_ce": 0.001657373970374465, + "loss_iou": 0.5625, + "loss_num": 0.06396484375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 28152476, + "step": 503 + }, + { + "epoch": 1.1224944320712695, + "grad_norm": 26.65591049194336, + "learning_rate": 1e-06, + "loss": 1.2271, + "num_input_tokens_seen": 28207068, + "step": 504 + }, + { + "epoch": 1.1224944320712695, + "loss": 1.1432322263717651, + "loss_ce": 0.007001784630119801, + "loss_iou": 0.455078125, + "loss_num": 0.045654296875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 28207068, + "step": 504 + }, + { + "epoch": 1.1247216035634744, + "grad_norm": 54.66550064086914, + "learning_rate": 1e-06, + "loss": 1.1682, + "num_input_tokens_seen": 28259960, + "step": 505 + }, + { + "epoch": 1.1247216035634744, + "loss": 1.2690539360046387, + "loss_ce": 0.0009875732939690351, + "loss_iou": 0.51953125, + "loss_num": 0.0458984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 28259960, + "step": 505 + }, + { + "epoch": 1.1269487750556793, + "grad_norm": 47.31111145019531, + "learning_rate": 1e-06, + "loss": 1.1786, + "num_input_tokens_seen": 28320020, + "step": 506 + }, + { + "epoch": 1.1269487750556793, + "loss": 1.4288506507873535, + "loss_ce": 0.0045341607183218, + "loss_iou": 0.58984375, + "loss_num": 0.04931640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 28320020, + "step": 506 + }, + { + "epoch": 1.1291759465478841, + "grad_norm": 19.817325592041016, + "learning_rate": 1e-06, + "loss": 1.2798, + "num_input_tokens_seen": 28376252, + "step": 507 + }, + { + "epoch": 1.1291759465478841, + "loss": 0.7173053622245789, + "loss_ce": 0.0009967784862965345, + "loss_iou": 0.294921875, + "loss_num": 0.025634765625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 28376252, + "step": 507 + }, + { + "epoch": 1.131403118040089, + "grad_norm": 29.8699951171875, + "learning_rate": 1e-06, + "loss": 1.0068, + "num_input_tokens_seen": 28428956, + "step": 508 + }, + { + "epoch": 1.131403118040089, + "loss": 1.0889060497283936, + "loss_ce": 0.0010153307812288404, + "loss_iou": 0.46484375, + "loss_num": 0.031982421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 28428956, + "step": 508 + }, + { + "epoch": 1.133630289532294, + "grad_norm": 33.13240051269531, + "learning_rate": 1e-06, + "loss": 1.3036, + "num_input_tokens_seen": 28484500, + "step": 509 + }, + { + "epoch": 1.133630289532294, + "loss": 1.068528652191162, + "loss_ce": 0.003343157237395644, + "loss_iou": 0.4375, + "loss_num": 0.037841796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 28484500, + "step": 509 + }, + { + "epoch": 1.135857461024499, + "grad_norm": 28.9699764251709, + "learning_rate": 1e-06, + "loss": 0.9363, + "num_input_tokens_seen": 28537936, + "step": 510 + }, + { + "epoch": 1.135857461024499, + "loss": 0.9706677198410034, + "loss_ce": 0.005823939107358456, + "loss_iou": 0.419921875, + "loss_num": 0.0252685546875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 28537936, + "step": 510 + }, + { + "epoch": 1.1380846325167038, + "grad_norm": 24.593473434448242, + "learning_rate": 1e-06, + "loss": 1.2115, + "num_input_tokens_seen": 28595228, + "step": 511 + }, + { + "epoch": 1.1380846325167038, + "loss": 1.0602378845214844, + "loss_ce": 0.0006675816257484257, + "loss_iou": 0.455078125, + "loss_num": 0.0294189453125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 28595228, + "step": 511 + }, + { + "epoch": 1.1403118040089086, + "grad_norm": 24.511798858642578, + "learning_rate": 1e-06, + "loss": 1.2554, + "num_input_tokens_seen": 28650712, + "step": 512 + }, + { + "epoch": 1.1403118040089086, + "loss": 1.273465871810913, + "loss_ce": 0.0049112411215901375, + "loss_iou": 0.55859375, + "loss_num": 0.0306396484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 28650712, + "step": 512 + }, + { + "epoch": 1.1425389755011135, + "grad_norm": 23.49129295349121, + "learning_rate": 1e-06, + "loss": 1.1208, + "num_input_tokens_seen": 28708008, + "step": 513 + }, + { + "epoch": 1.1425389755011135, + "loss": 1.1443027257919312, + "loss_ce": 0.005630870349705219, + "loss_iou": 0.46875, + "loss_num": 0.040283203125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 28708008, + "step": 513 + }, + { + "epoch": 1.1447661469933186, + "grad_norm": 22.09214973449707, + "learning_rate": 1e-06, + "loss": 1.2605, + "num_input_tokens_seen": 28762704, + "step": 514 + }, + { + "epoch": 1.1447661469933186, + "loss": 1.2734076976776123, + "loss_ce": 0.018280737102031708, + "loss_iou": 0.484375, + "loss_num": 0.05712890625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 28762704, + "step": 514 + }, + { + "epoch": 1.1469933184855234, + "grad_norm": 36.97422790527344, + "learning_rate": 1e-06, + "loss": 1.2453, + "num_input_tokens_seen": 28820084, + "step": 515 + }, + { + "epoch": 1.1469933184855234, + "loss": 1.241039514541626, + "loss_ce": 0.0012934368569403887, + "loss_iou": 0.52734375, + "loss_num": 0.037109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 28820084, + "step": 515 + }, + { + "epoch": 1.1492204899777283, + "grad_norm": 19.85295867919922, + "learning_rate": 1e-06, + "loss": 1.346, + "num_input_tokens_seen": 28874500, + "step": 516 + }, + { + "epoch": 1.1492204899777283, + "loss": 1.231957197189331, + "loss_ce": 0.0005119675770401955, + "loss_iou": 0.5, + "loss_num": 0.046630859375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 28874500, + "step": 516 + }, + { + "epoch": 1.1514476614699332, + "grad_norm": 34.39645767211914, + "learning_rate": 1e-06, + "loss": 1.3985, + "num_input_tokens_seen": 28931164, + "step": 517 + }, + { + "epoch": 1.1514476614699332, + "loss": 1.2988076210021973, + "loss_ce": 0.0019326311303302646, + "loss_iou": 0.50390625, + "loss_num": 0.05810546875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 28931164, + "step": 517 + }, + { + "epoch": 1.153674832962138, + "grad_norm": 20.85076141357422, + "learning_rate": 1e-06, + "loss": 1.3304, + "num_input_tokens_seen": 28987576, + "step": 518 + }, + { + "epoch": 1.153674832962138, + "loss": 1.326310157775879, + "loss_ce": 0.005509458482265472, + "loss_iou": 0.5234375, + "loss_num": 0.0546875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 28987576, + "step": 518 + }, + { + "epoch": 1.1559020044543429, + "grad_norm": 25.91316795349121, + "learning_rate": 1e-06, + "loss": 1.0788, + "num_input_tokens_seen": 29045416, + "step": 519 + }, + { + "epoch": 1.1559020044543429, + "loss": 0.9822598695755005, + "loss_ce": 0.0008145694737322628, + "loss_iou": 0.4296875, + "loss_num": 0.0242919921875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 29045416, + "step": 519 + }, + { + "epoch": 1.158129175946548, + "grad_norm": 19.741249084472656, + "learning_rate": 1e-06, + "loss": 1.179, + "num_input_tokens_seen": 29100408, + "step": 520 + }, + { + "epoch": 1.158129175946548, + "loss": 1.2618613243103027, + "loss_ce": 0.0020956983789801598, + "loss_iou": 0.486328125, + "loss_num": 0.05712890625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 29100408, + "step": 520 + }, + { + "epoch": 1.1603563474387528, + "grad_norm": 18.815519332885742, + "learning_rate": 1e-06, + "loss": 1.0117, + "num_input_tokens_seen": 29158568, + "step": 521 + }, + { + "epoch": 1.1603563474387528, + "loss": 1.15644371509552, + "loss_ce": 0.00605308311060071, + "loss_iou": 0.482421875, + "loss_num": 0.037109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 29158568, + "step": 521 + }, + { + "epoch": 1.1625835189309577, + "grad_norm": 37.82133865356445, + "learning_rate": 1e-06, + "loss": 1.617, + "num_input_tokens_seen": 29216180, + "step": 522 + }, + { + "epoch": 1.1625835189309577, + "loss": 1.670715570449829, + "loss_ce": 0.003723357105627656, + "loss_iou": 0.63671875, + "loss_num": 0.0791015625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 29216180, + "step": 522 + }, + { + "epoch": 1.1648106904231625, + "grad_norm": 31.2655029296875, + "learning_rate": 1e-06, + "loss": 1.1777, + "num_input_tokens_seen": 29270996, + "step": 523 + }, + { + "epoch": 1.1648106904231625, + "loss": 1.0179691314697266, + "loss_ce": 0.0013676062226295471, + "loss_iou": 0.419921875, + "loss_num": 0.035400390625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 29270996, + "step": 523 + }, + { + "epoch": 1.1670378619153674, + "grad_norm": 32.841373443603516, + "learning_rate": 1e-06, + "loss": 1.4131, + "num_input_tokens_seen": 29327600, + "step": 524 + }, + { + "epoch": 1.1670378619153674, + "loss": 1.0806894302368164, + "loss_ce": 0.0006113001727499068, + "loss_iou": 0.439453125, + "loss_num": 0.039794921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 29327600, + "step": 524 + }, + { + "epoch": 1.1692650334075725, + "grad_norm": 14.620657920837402, + "learning_rate": 1e-06, + "loss": 1.228, + "num_input_tokens_seen": 29383468, + "step": 525 + }, + { + "epoch": 1.1692650334075725, + "loss": 1.1438016891479492, + "loss_ce": 0.0007352291722781956, + "loss_iou": 0.49609375, + "loss_num": 0.030517578125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 29383468, + "step": 525 + }, + { + "epoch": 1.1714922048997773, + "grad_norm": 38.983917236328125, + "learning_rate": 1e-06, + "loss": 1.0997, + "num_input_tokens_seen": 29438716, + "step": 526 + }, + { + "epoch": 1.1714922048997773, + "loss": 1.1212483644485474, + "loss_ce": 0.005525712855160236, + "loss_iou": 0.447265625, + "loss_num": 0.04443359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 29438716, + "step": 526 + }, + { + "epoch": 1.1737193763919822, + "grad_norm": 38.69413375854492, + "learning_rate": 1e-06, + "loss": 1.2774, + "num_input_tokens_seen": 29494100, + "step": 527 + }, + { + "epoch": 1.1737193763919822, + "loss": 0.9909718036651611, + "loss_ce": 0.0007374268025159836, + "loss_iou": 0.375, + "loss_num": 0.0478515625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 29494100, + "step": 527 + }, + { + "epoch": 1.175946547884187, + "grad_norm": 20.045331954956055, + "learning_rate": 1e-06, + "loss": 1.0815, + "num_input_tokens_seen": 29549828, + "step": 528 + }, + { + "epoch": 1.175946547884187, + "loss": 0.9373371005058289, + "loss_ce": 0.004231642000377178, + "loss_iou": 0.376953125, + "loss_num": 0.03564453125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 29549828, + "step": 528 + }, + { + "epoch": 1.178173719376392, + "grad_norm": 29.08791732788086, + "learning_rate": 1e-06, + "loss": 1.0875, + "num_input_tokens_seen": 29605928, + "step": 529 + }, + { + "epoch": 1.178173719376392, + "loss": 1.0599498748779297, + "loss_ce": 0.0018443877343088388, + "loss_iou": 0.404296875, + "loss_num": 0.050537109375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 29605928, + "step": 529 + }, + { + "epoch": 1.1804008908685968, + "grad_norm": 19.786157608032227, + "learning_rate": 1e-06, + "loss": 1.2501, + "num_input_tokens_seen": 29661192, + "step": 530 + }, + { + "epoch": 1.1804008908685968, + "loss": 1.2494618892669678, + "loss_ce": 0.0014150183415040374, + "loss_iou": 0.46484375, + "loss_num": 0.0634765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 29661192, + "step": 530 + }, + { + "epoch": 1.1826280623608019, + "grad_norm": 56.539695739746094, + "learning_rate": 1e-06, + "loss": 1.2396, + "num_input_tokens_seen": 29716704, + "step": 531 + }, + { + "epoch": 1.1826280623608019, + "loss": 1.2581884860992432, + "loss_ce": 0.0008642220636829734, + "loss_iou": 0.546875, + "loss_num": 0.033447265625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 29716704, + "step": 531 + }, + { + "epoch": 1.1848552338530067, + "grad_norm": 15.872360229492188, + "learning_rate": 1e-06, + "loss": 1.0118, + "num_input_tokens_seen": 29775024, + "step": 532 + }, + { + "epoch": 1.1848552338530067, + "loss": 0.7084531784057617, + "loss_ce": 0.0009336533839814365, + "loss_iou": 0.296875, + "loss_num": 0.0228271484375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 29775024, + "step": 532 + }, + { + "epoch": 1.1870824053452116, + "grad_norm": 20.187984466552734, + "learning_rate": 1e-06, + "loss": 1.2427, + "num_input_tokens_seen": 29830880, + "step": 533 + }, + { + "epoch": 1.1870824053452116, + "loss": 1.1666477918624878, + "loss_ce": 0.001608791295439005, + "loss_iou": 0.50390625, + "loss_num": 0.03125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 29830880, + "step": 533 + }, + { + "epoch": 1.1893095768374164, + "grad_norm": 30.441001892089844, + "learning_rate": 1e-06, + "loss": 0.9994, + "num_input_tokens_seen": 29888668, + "step": 534 + }, + { + "epoch": 1.1893095768374164, + "loss": 1.0735273361206055, + "loss_ce": 0.00077339808922261, + "loss_iou": 0.453125, + "loss_num": 0.033203125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 29888668, + "step": 534 + }, + { + "epoch": 1.1915367483296213, + "grad_norm": 24.198322296142578, + "learning_rate": 1e-06, + "loss": 0.9697, + "num_input_tokens_seen": 29946592, + "step": 535 + }, + { + "epoch": 1.1915367483296213, + "loss": 0.9628783464431763, + "loss_ce": 0.0009642738732509315, + "loss_iou": 0.376953125, + "loss_num": 0.04150390625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 29946592, + "step": 535 + }, + { + "epoch": 1.1937639198218264, + "grad_norm": 30.70039939880371, + "learning_rate": 1e-06, + "loss": 1.2909, + "num_input_tokens_seen": 30004576, + "step": 536 + }, + { + "epoch": 1.1937639198218264, + "loss": 1.1397030353546143, + "loss_ce": 0.0010311845690011978, + "loss_iou": 0.45703125, + "loss_num": 0.04541015625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 30004576, + "step": 536 + }, + { + "epoch": 1.1959910913140313, + "grad_norm": 40.41281509399414, + "learning_rate": 1e-06, + "loss": 1.3186, + "num_input_tokens_seen": 30056972, + "step": 537 + }, + { + "epoch": 1.1959910913140313, + "loss": 1.1098320484161377, + "loss_ce": 0.0004569512093439698, + "loss_iou": 0.490234375, + "loss_num": 0.0260009765625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 30056972, + "step": 537 + }, + { + "epoch": 1.1982182628062361, + "grad_norm": 25.64579963684082, + "learning_rate": 1e-06, + "loss": 1.2267, + "num_input_tokens_seen": 30111776, + "step": 538 + }, + { + "epoch": 1.1982182628062361, + "loss": 1.4003875255584717, + "loss_ce": 0.002682518446817994, + "loss_iou": 0.5625, + "loss_num": 0.05517578125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 30111776, + "step": 538 + }, + { + "epoch": 1.200445434298441, + "grad_norm": 62.563392639160156, + "learning_rate": 1e-06, + "loss": 1.1343, + "num_input_tokens_seen": 30168204, + "step": 539 + }, + { + "epoch": 1.200445434298441, + "loss": 1.2498741149902344, + "loss_ce": 0.00353623297996819, + "loss_iou": 0.48828125, + "loss_num": 0.053955078125, + "loss_xval": 1.25, + "num_input_tokens_seen": 30168204, + "step": 539 + }, + { + "epoch": 1.2026726057906458, + "grad_norm": 24.099092483520508, + "learning_rate": 1e-06, + "loss": 1.5677, + "num_input_tokens_seen": 30222172, + "step": 540 + }, + { + "epoch": 1.2026726057906458, + "loss": 1.189444899559021, + "loss_ce": 0.001456591533496976, + "loss_iou": 0.470703125, + "loss_num": 0.049560546875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 30222172, + "step": 540 + }, + { + "epoch": 1.2048997772828507, + "grad_norm": 14.803204536437988, + "learning_rate": 1e-06, + "loss": 1.2895, + "num_input_tokens_seen": 30277260, + "step": 541 + }, + { + "epoch": 1.2048997772828507, + "loss": 1.3928616046905518, + "loss_ce": 0.0027249492704868317, + "loss_iou": 0.5625, + "loss_num": 0.052490234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 30277260, + "step": 541 + }, + { + "epoch": 1.2071269487750558, + "grad_norm": 56.08818054199219, + "learning_rate": 1e-06, + "loss": 1.4108, + "num_input_tokens_seen": 30333120, + "step": 542 + }, + { + "epoch": 1.2071269487750558, + "loss": 1.4470124244689941, + "loss_ce": 0.001211727038025856, + "loss_iou": 0.59765625, + "loss_num": 0.049560546875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 30333120, + "step": 542 + }, + { + "epoch": 1.2093541202672606, + "grad_norm": 33.44521713256836, + "learning_rate": 1e-06, + "loss": 1.0997, + "num_input_tokens_seen": 30385752, + "step": 543 + }, + { + "epoch": 1.2093541202672606, + "loss": 0.8862332105636597, + "loss_ce": 0.0009793277131393552, + "loss_iou": 0.375, + "loss_num": 0.027099609375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 30385752, + "step": 543 + }, + { + "epoch": 1.2115812917594655, + "grad_norm": 20.260774612426758, + "learning_rate": 1e-06, + "loss": 1.3352, + "num_input_tokens_seen": 30441532, + "step": 544 + }, + { + "epoch": 1.2115812917594655, + "loss": 1.403501033782959, + "loss_ce": 0.0006689808797091246, + "loss_iou": 0.5234375, + "loss_num": 0.0712890625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 30441532, + "step": 544 + }, + { + "epoch": 1.2138084632516704, + "grad_norm": 20.101301193237305, + "learning_rate": 1e-06, + "loss": 1.0824, + "num_input_tokens_seen": 30495056, + "step": 545 + }, + { + "epoch": 1.2138084632516704, + "loss": 1.0261245965957642, + "loss_ce": 0.0004898015176877379, + "loss_iou": 0.431640625, + "loss_num": 0.0322265625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 30495056, + "step": 545 + }, + { + "epoch": 1.2160356347438752, + "grad_norm": 27.02719497680664, + "learning_rate": 1e-06, + "loss": 1.1653, + "num_input_tokens_seen": 30550680, + "step": 546 + }, + { + "epoch": 1.2160356347438752, + "loss": 1.1834897994995117, + "loss_ce": 0.0008725329535081983, + "loss_iou": 0.5078125, + "loss_num": 0.033447265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 30550680, + "step": 546 + }, + { + "epoch": 1.2182628062360803, + "grad_norm": 17.25623893737793, + "learning_rate": 1e-06, + "loss": 1.4252, + "num_input_tokens_seen": 30606964, + "step": 547 + }, + { + "epoch": 1.2182628062360803, + "loss": 1.1636087894439697, + "loss_ce": 0.0036967378109693527, + "loss_iou": 0.45703125, + "loss_num": 0.049072265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 30606964, + "step": 547 + }, + { + "epoch": 1.2204899777282852, + "grad_norm": 19.519060134887695, + "learning_rate": 1e-06, + "loss": 1.1066, + "num_input_tokens_seen": 30663460, + "step": 548 + }, + { + "epoch": 1.2204899777282852, + "loss": 1.1835308074951172, + "loss_ce": 0.0018901234725490212, + "loss_iou": 0.50390625, + "loss_num": 0.034912109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 30663460, + "step": 548 + }, + { + "epoch": 1.22271714922049, + "grad_norm": 25.55568504333496, + "learning_rate": 1e-06, + "loss": 1.2514, + "num_input_tokens_seen": 30721628, + "step": 549 + }, + { + "epoch": 1.22271714922049, + "loss": 1.3971433639526367, + "loss_ce": 0.0011472116457298398, + "loss_iou": 0.5625, + "loss_num": 0.0546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 30721628, + "step": 549 + }, + { + "epoch": 1.2249443207126949, + "grad_norm": 31.964738845825195, + "learning_rate": 1e-06, + "loss": 1.3476, + "num_input_tokens_seen": 30777708, + "step": 550 + }, + { + "epoch": 1.2249443207126949, + "loss": 1.316117286682129, + "loss_ce": 0.0006876069819554687, + "loss_iou": 0.5546875, + "loss_num": 0.041259765625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 30777708, + "step": 550 + }, + { + "epoch": 1.2271714922048997, + "grad_norm": 19.710111618041992, + "learning_rate": 1e-06, + "loss": 1.3899, + "num_input_tokens_seen": 30829420, + "step": 551 + }, + { + "epoch": 1.2271714922048997, + "loss": 1.4299864768981934, + "loss_ce": 0.01006464846432209, + "loss_iou": 0.57421875, + "loss_num": 0.053955078125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 30829420, + "step": 551 + }, + { + "epoch": 1.2293986636971046, + "grad_norm": 49.47341537475586, + "learning_rate": 1e-06, + "loss": 1.1639, + "num_input_tokens_seen": 30887668, + "step": 552 + }, + { + "epoch": 1.2293986636971046, + "loss": 1.0400408506393433, + "loss_ce": 0.0014665467897430062, + "loss_iou": 0.396484375, + "loss_num": 0.04931640625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 30887668, + "step": 552 + }, + { + "epoch": 1.2316258351893095, + "grad_norm": 21.37660789489746, + "learning_rate": 1e-06, + "loss": 1.2537, + "num_input_tokens_seen": 30942632, + "step": 553 + }, + { + "epoch": 1.2316258351893095, + "loss": 1.3482673168182373, + "loss_ce": 0.0020758796017616987, + "loss_iou": 0.53515625, + "loss_num": 0.0556640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 30942632, + "step": 553 + }, + { + "epoch": 1.2338530066815145, + "grad_norm": 15.992125511169434, + "learning_rate": 1e-06, + "loss": 1.1998, + "num_input_tokens_seen": 31000952, + "step": 554 + }, + { + "epoch": 1.2338530066815145, + "loss": 1.3668153285980225, + "loss_ce": 0.0006044998299330473, + "loss_iou": 0.5859375, + "loss_num": 0.039306640625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 31000952, + "step": 554 + }, + { + "epoch": 1.2360801781737194, + "grad_norm": 22.379846572875977, + "learning_rate": 1e-06, + "loss": 1.3172, + "num_input_tokens_seen": 31058164, + "step": 555 + }, + { + "epoch": 1.2360801781737194, + "loss": 1.7307417392730713, + "loss_ce": 0.004179192706942558, + "loss_iou": 0.71484375, + "loss_num": 0.0595703125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 31058164, + "step": 555 + }, + { + "epoch": 1.2383073496659243, + "grad_norm": 17.48279571533203, + "learning_rate": 1e-06, + "loss": 1.0391, + "num_input_tokens_seen": 31113116, + "step": 556 + }, + { + "epoch": 1.2383073496659243, + "loss": 1.0837606191635132, + "loss_ce": 0.0009969680104404688, + "loss_iou": 0.380859375, + "loss_num": 0.06396484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 31113116, + "step": 556 + }, + { + "epoch": 1.2405345211581291, + "grad_norm": 26.064071655273438, + "learning_rate": 1e-06, + "loss": 1.2376, + "num_input_tokens_seen": 31172128, + "step": 557 + }, + { + "epoch": 1.2405345211581291, + "loss": 1.2718275785446167, + "loss_ce": 0.0022963983938097954, + "loss_iou": 0.52734375, + "loss_num": 0.04248046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 31172128, + "step": 557 + }, + { + "epoch": 1.242761692650334, + "grad_norm": 27.61939811706543, + "learning_rate": 1e-06, + "loss": 0.9737, + "num_input_tokens_seen": 31227572, + "step": 558 + }, + { + "epoch": 1.242761692650334, + "loss": 1.1241004467010498, + "loss_ce": 0.0010536747286096215, + "loss_iou": 0.47265625, + "loss_num": 0.03564453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 31227572, + "step": 558 + }, + { + "epoch": 1.244988864142539, + "grad_norm": 23.97410774230957, + "learning_rate": 1e-06, + "loss": 1.2226, + "num_input_tokens_seen": 31282092, + "step": 559 + }, + { + "epoch": 1.244988864142539, + "loss": 1.2522716522216797, + "loss_ce": 0.000806744210422039, + "loss_iou": 0.484375, + "loss_num": 0.056396484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 31282092, + "step": 559 + }, + { + "epoch": 1.247216035634744, + "grad_norm": 18.71125602722168, + "learning_rate": 1e-06, + "loss": 1.0672, + "num_input_tokens_seen": 31340076, + "step": 560 + }, + { + "epoch": 1.247216035634744, + "loss": 1.1468454599380493, + "loss_ce": 0.0008493656641803682, + "loss_iou": 0.482421875, + "loss_num": 0.036376953125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 31340076, + "step": 560 + }, + { + "epoch": 1.2494432071269488, + "grad_norm": 70.7640151977539, + "learning_rate": 1e-06, + "loss": 1.2216, + "num_input_tokens_seen": 31396448, + "step": 561 + }, + { + "epoch": 1.2494432071269488, + "loss": 1.2371618747711182, + "loss_ce": 0.013528996147215366, + "loss_iou": 0.515625, + "loss_num": 0.038330078125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 31396448, + "step": 561 + }, + { + "epoch": 1.2516703786191536, + "grad_norm": 19.904014587402344, + "learning_rate": 1e-06, + "loss": 1.354, + "num_input_tokens_seen": 31453460, + "step": 562 + }, + { + "epoch": 1.2516703786191536, + "loss": 1.3166056871414185, + "loss_ce": 0.00117595330812037, + "loss_iou": 0.5234375, + "loss_num": 0.05322265625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 31453460, + "step": 562 + }, + { + "epoch": 1.2538975501113585, + "grad_norm": 24.498027801513672, + "learning_rate": 1e-06, + "loss": 1.0603, + "num_input_tokens_seen": 31509260, + "step": 563 + }, + { + "epoch": 1.2538975501113585, + "loss": 1.2601665258407593, + "loss_ce": 0.000645035644993186, + "loss_iou": 0.53515625, + "loss_num": 0.037841796875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 31509260, + "step": 563 + }, + { + "epoch": 1.2561247216035634, + "grad_norm": 17.997339248657227, + "learning_rate": 1e-06, + "loss": 1.2419, + "num_input_tokens_seen": 31561832, + "step": 564 + }, + { + "epoch": 1.2561247216035634, + "loss": 1.050682783126831, + "loss_ce": 0.002831157995387912, + "loss_iou": 0.4453125, + "loss_num": 0.031494140625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 31561832, + "step": 564 + }, + { + "epoch": 1.2583518930957684, + "grad_norm": 25.669340133666992, + "learning_rate": 1e-06, + "loss": 1.0692, + "num_input_tokens_seen": 31619832, + "step": 565 + }, + { + "epoch": 1.2583518930957684, + "loss": 1.0688444375991821, + "loss_ce": 0.0009733220795169473, + "loss_iou": 0.4453125, + "loss_num": 0.035888671875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 31619832, + "step": 565 + }, + { + "epoch": 1.2605790645879733, + "grad_norm": 16.837244033813477, + "learning_rate": 1e-06, + "loss": 1.2095, + "num_input_tokens_seen": 31676760, + "step": 566 + }, + { + "epoch": 1.2605790645879733, + "loss": 1.536379337310791, + "loss_ce": 0.0017113613430410624, + "loss_iou": 0.61328125, + "loss_num": 0.0615234375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 31676760, + "step": 566 + }, + { + "epoch": 1.2628062360801782, + "grad_norm": 16.443872451782227, + "learning_rate": 1e-06, + "loss": 1.4209, + "num_input_tokens_seen": 31730956, + "step": 567 + }, + { + "epoch": 1.2628062360801782, + "loss": 1.459883213043213, + "loss_ce": 0.0011430047452449799, + "loss_iou": 0.56640625, + "loss_num": 0.06591796875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 31730956, + "step": 567 + }, + { + "epoch": 1.265033407572383, + "grad_norm": 24.123031616210938, + "learning_rate": 1e-06, + "loss": 1.0963, + "num_input_tokens_seen": 31786932, + "step": 568 + }, + { + "epoch": 1.265033407572383, + "loss": 1.188434362411499, + "loss_ce": 0.0009342934936285019, + "loss_iou": 0.47265625, + "loss_num": 0.048828125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 31786932, + "step": 568 + }, + { + "epoch": 1.267260579064588, + "grad_norm": 20.135202407836914, + "learning_rate": 1e-06, + "loss": 1.1357, + "num_input_tokens_seen": 31843536, + "step": 569 + }, + { + "epoch": 1.267260579064588, + "loss": 1.4373984336853027, + "loss_ce": 0.001607370562851429, + "loss_iou": 0.53125, + "loss_num": 0.07470703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 31843536, + "step": 569 + }, + { + "epoch": 1.269487750556793, + "grad_norm": 34.270626068115234, + "learning_rate": 1e-06, + "loss": 1.2551, + "num_input_tokens_seen": 31898080, + "step": 570 + }, + { + "epoch": 1.269487750556793, + "loss": 1.2200250625610352, + "loss_ce": 0.0007868170505389571, + "loss_iou": 0.478515625, + "loss_num": 0.05224609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 31898080, + "step": 570 + }, + { + "epoch": 1.2717149220489978, + "grad_norm": 19.55361557006836, + "learning_rate": 1e-06, + "loss": 1.1386, + "num_input_tokens_seen": 31951824, + "step": 571 + }, + { + "epoch": 1.2717149220489978, + "loss": 1.242485761642456, + "loss_ce": 0.0007865370716899633, + "loss_iou": 0.5078125, + "loss_num": 0.045654296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 31951824, + "step": 571 + }, + { + "epoch": 1.2739420935412027, + "grad_norm": 64.2752456665039, + "learning_rate": 1e-06, + "loss": 1.3344, + "num_input_tokens_seen": 32006760, + "step": 572 + }, + { + "epoch": 1.2739420935412027, + "loss": 0.9924131035804749, + "loss_ce": 0.002422844059765339, + "loss_iou": 0.41796875, + "loss_num": 0.03076171875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 32006760, + "step": 572 + }, + { + "epoch": 1.2761692650334076, + "grad_norm": 18.450927734375, + "learning_rate": 1e-06, + "loss": 1.1749, + "num_input_tokens_seen": 32065980, + "step": 573 + }, + { + "epoch": 1.2761692650334076, + "loss": 1.0965838432312012, + "loss_ce": 0.0023455810733139515, + "loss_iou": 0.42578125, + "loss_num": 0.04833984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 32065980, + "step": 573 + }, + { + "epoch": 1.2783964365256124, + "grad_norm": 20.623170852661133, + "learning_rate": 1e-06, + "loss": 1.0463, + "num_input_tokens_seen": 32120756, + "step": 574 + }, + { + "epoch": 1.2783964365256124, + "loss": 1.1162598133087158, + "loss_ce": 0.0005371532752178609, + "loss_iou": 0.48046875, + "loss_num": 0.0306396484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 32120756, + "step": 574 + }, + { + "epoch": 1.2806236080178173, + "grad_norm": 30.17593002319336, + "learning_rate": 1e-06, + "loss": 1.0251, + "num_input_tokens_seen": 32175548, + "step": 575 + }, + { + "epoch": 1.2806236080178173, + "loss": 1.20136296749115, + "loss_ce": 0.000679333577863872, + "loss_iou": 0.5078125, + "loss_num": 0.037353515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 32175548, + "step": 575 + }, + { + "epoch": 1.2828507795100224, + "grad_norm": 24.09958839416504, + "learning_rate": 1e-06, + "loss": 1.2353, + "num_input_tokens_seen": 32230428, + "step": 576 + }, + { + "epoch": 1.2828507795100224, + "loss": 1.3486398458480835, + "loss_ce": 0.002448498737066984, + "loss_iou": 0.5625, + "loss_num": 0.0439453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 32230428, + "step": 576 + }, + { + "epoch": 1.2850779510022272, + "grad_norm": 25.068777084350586, + "learning_rate": 1e-06, + "loss": 1.1451, + "num_input_tokens_seen": 32282324, + "step": 577 + }, + { + "epoch": 1.2850779510022272, + "loss": 1.3363144397735596, + "loss_ce": 0.0011094561778008938, + "loss_iou": 0.498046875, + "loss_num": 0.06787109375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 32282324, + "step": 577 + }, + { + "epoch": 1.287305122494432, + "grad_norm": 21.740449905395508, + "learning_rate": 1e-06, + "loss": 0.9655, + "num_input_tokens_seen": 32339156, + "step": 578 + }, + { + "epoch": 1.287305122494432, + "loss": 0.8180431127548218, + "loss_ce": 0.0009044220205396414, + "loss_iou": 0.33984375, + "loss_num": 0.02783203125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 32339156, + "step": 578 + }, + { + "epoch": 1.289532293986637, + "grad_norm": 70.63128662109375, + "learning_rate": 1e-06, + "loss": 1.2932, + "num_input_tokens_seen": 32394596, + "step": 579 + }, + { + "epoch": 1.289532293986637, + "loss": 1.4371894598007202, + "loss_ce": 0.0006660318467766047, + "loss_iou": 0.5546875, + "loss_num": 0.06640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 32394596, + "step": 579 + }, + { + "epoch": 1.2917594654788418, + "grad_norm": 23.278512954711914, + "learning_rate": 1e-06, + "loss": 1.1461, + "num_input_tokens_seen": 32452512, + "step": 580 + }, + { + "epoch": 1.2917594654788418, + "loss": 1.3186371326446533, + "loss_ce": 0.003695748746395111, + "loss_iou": 0.494140625, + "loss_num": 0.0654296875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 32452512, + "step": 580 + }, + { + "epoch": 1.2939866369710469, + "grad_norm": 21.15460205078125, + "learning_rate": 1e-06, + "loss": 1.1878, + "num_input_tokens_seen": 32507568, + "step": 581 + }, + { + "epoch": 1.2939866369710469, + "loss": 1.161703109741211, + "loss_ce": 0.0027676241006702185, + "loss_iou": 0.474609375, + "loss_num": 0.0419921875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 32507568, + "step": 581 + }, + { + "epoch": 1.2962138084632517, + "grad_norm": 20.2482852935791, + "learning_rate": 1e-06, + "loss": 1.0799, + "num_input_tokens_seen": 32564432, + "step": 582 + }, + { + "epoch": 1.2962138084632517, + "loss": 1.0239415168762207, + "loss_ce": 0.0012363542336970568, + "loss_iou": 0.419921875, + "loss_num": 0.036376953125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 32564432, + "step": 582 + }, + { + "epoch": 1.2984409799554566, + "grad_norm": 39.36937713623047, + "learning_rate": 1e-06, + "loss": 0.9524, + "num_input_tokens_seen": 32618096, + "step": 583 + }, + { + "epoch": 1.2984409799554566, + "loss": 0.9080791473388672, + "loss_ce": 0.0008525372250005603, + "loss_iou": 0.34375, + "loss_num": 0.044189453125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 32618096, + "step": 583 + }, + { + "epoch": 1.3006681514476615, + "grad_norm": 21.902511596679688, + "learning_rate": 1e-06, + "loss": 1.3062, + "num_input_tokens_seen": 32674700, + "step": 584 + }, + { + "epoch": 1.3006681514476615, + "loss": 1.2775499820709229, + "loss_ce": 0.004112505819648504, + "loss_iou": 0.5078125, + "loss_num": 0.051025390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 32674700, + "step": 584 + }, + { + "epoch": 1.3028953229398663, + "grad_norm": 18.999221801757812, + "learning_rate": 1e-06, + "loss": 1.1638, + "num_input_tokens_seen": 32732620, + "step": 585 + }, + { + "epoch": 1.3028953229398663, + "loss": 1.1583878993988037, + "loss_ce": 0.0018937456188723445, + "loss_iou": 0.431640625, + "loss_num": 0.05859375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 32732620, + "step": 585 + }, + { + "epoch": 1.3051224944320712, + "grad_norm": 29.080703735351562, + "learning_rate": 1e-06, + "loss": 1.1476, + "num_input_tokens_seen": 32790356, + "step": 586 + }, + { + "epoch": 1.3051224944320712, + "loss": 1.168489933013916, + "loss_ce": 0.0005212133983150125, + "loss_iou": 0.48828125, + "loss_num": 0.0380859375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 32790356, + "step": 586 + }, + { + "epoch": 1.307349665924276, + "grad_norm": 26.149927139282227, + "learning_rate": 1e-06, + "loss": 1.1816, + "num_input_tokens_seen": 32846304, + "step": 587 + }, + { + "epoch": 1.307349665924276, + "loss": 1.3775864839553833, + "loss_ce": 0.0006334002828225493, + "loss_iou": 0.56640625, + "loss_num": 0.04931640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 32846304, + "step": 587 + }, + { + "epoch": 1.3095768374164811, + "grad_norm": 15.33914566040039, + "learning_rate": 1e-06, + "loss": 0.9549, + "num_input_tokens_seen": 32901992, + "step": 588 + }, + { + "epoch": 1.3095768374164811, + "loss": 0.846169114112854, + "loss_ce": 0.001198451267555356, + "loss_iou": 0.345703125, + "loss_num": 0.03125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 32901992, + "step": 588 + }, + { + "epoch": 1.311804008908686, + "grad_norm": 45.818695068359375, + "learning_rate": 1e-06, + "loss": 1.2682, + "num_input_tokens_seen": 32961136, + "step": 589 + }, + { + "epoch": 1.311804008908686, + "loss": 1.3090804815292358, + "loss_ce": 0.00048678729217499495, + "loss_iou": 0.5703125, + "loss_num": 0.033203125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 32961136, + "step": 589 + }, + { + "epoch": 1.3140311804008908, + "grad_norm": 31.00553321838379, + "learning_rate": 1e-06, + "loss": 1.1881, + "num_input_tokens_seen": 33015672, + "step": 590 + }, + { + "epoch": 1.3140311804008908, + "loss": 1.2077548503875732, + "loss_ce": 0.0007235349621623755, + "loss_iou": 0.48828125, + "loss_num": 0.04541015625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 33015672, + "step": 590 + }, + { + "epoch": 1.3162583518930957, + "grad_norm": 32.82173156738281, + "learning_rate": 1e-06, + "loss": 1.4302, + "num_input_tokens_seen": 33071504, + "step": 591 + }, + { + "epoch": 1.3162583518930957, + "loss": 1.373945713043213, + "loss_ce": 0.00041063432581722736, + "loss_iou": 0.546875, + "loss_num": 0.056640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 33071504, + "step": 591 + }, + { + "epoch": 1.3184855233853008, + "grad_norm": 19.010250091552734, + "learning_rate": 1e-06, + "loss": 0.9531, + "num_input_tokens_seen": 33126464, + "step": 592 + }, + { + "epoch": 1.3184855233853008, + "loss": 0.9081621766090393, + "loss_ce": 0.002156274626031518, + "loss_iou": 0.37109375, + "loss_num": 0.032958984375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 33126464, + "step": 592 + }, + { + "epoch": 1.3207126948775056, + "grad_norm": 19.802019119262695, + "learning_rate": 1e-06, + "loss": 1.3826, + "num_input_tokens_seen": 33182964, + "step": 593 + }, + { + "epoch": 1.3207126948775056, + "loss": 1.3006547689437866, + "loss_ce": 0.0008500526309944689, + "loss_iou": 0.52734375, + "loss_num": 0.049560546875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 33182964, + "step": 593 + }, + { + "epoch": 1.3229398663697105, + "grad_norm": 19.390378952026367, + "learning_rate": 1e-06, + "loss": 1.3825, + "num_input_tokens_seen": 33237572, + "step": 594 + }, + { + "epoch": 1.3229398663697105, + "loss": 1.126906156539917, + "loss_ce": 0.0004412978305481374, + "loss_iou": 0.486328125, + "loss_num": 0.031005859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 33237572, + "step": 594 + }, + { + "epoch": 1.3251670378619154, + "grad_norm": 17.320398330688477, + "learning_rate": 1e-06, + "loss": 0.8407, + "num_input_tokens_seen": 33292472, + "step": 595 + }, + { + "epoch": 1.3251670378619154, + "loss": 0.7980577945709229, + "loss_ce": 0.00679800333455205, + "loss_iou": 0.33984375, + "loss_num": 0.0223388671875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 33292472, + "step": 595 + }, + { + "epoch": 1.3273942093541202, + "grad_norm": 22.934152603149414, + "learning_rate": 1e-06, + "loss": 1.2177, + "num_input_tokens_seen": 33349680, + "step": 596 + }, + { + "epoch": 1.3273942093541202, + "loss": 1.237870216369629, + "loss_ce": 0.000809596327599138, + "loss_iou": 0.50390625, + "loss_num": 0.046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 33349680, + "step": 596 + }, + { + "epoch": 1.329621380846325, + "grad_norm": 19.58114242553711, + "learning_rate": 1e-06, + "loss": 0.9529, + "num_input_tokens_seen": 33407408, + "step": 597 + }, + { + "epoch": 1.329621380846325, + "loss": 1.1202585697174072, + "loss_ce": 0.0006297538056969643, + "loss_iou": 0.443359375, + "loss_num": 0.046630859375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 33407408, + "step": 597 + }, + { + "epoch": 1.33184855233853, + "grad_norm": 34.02892303466797, + "learning_rate": 1e-06, + "loss": 1.4897, + "num_input_tokens_seen": 33461988, + "step": 598 + }, + { + "epoch": 1.33184855233853, + "loss": 1.4317845106124878, + "loss_ce": 0.0016087474068626761, + "loss_iou": 0.59765625, + "loss_num": 0.04736328125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 33461988, + "step": 598 + }, + { + "epoch": 1.334075723830735, + "grad_norm": 51.48478698730469, + "learning_rate": 1e-06, + "loss": 1.156, + "num_input_tokens_seen": 33517444, + "step": 599 + }, + { + "epoch": 1.334075723830735, + "loss": 0.9608005881309509, + "loss_ce": 0.0010837747249752283, + "loss_iou": 0.400390625, + "loss_num": 0.03173828125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 33517444, + "step": 599 + }, + { + "epoch": 1.3363028953229399, + "grad_norm": 24.541122436523438, + "learning_rate": 1e-06, + "loss": 1.1167, + "num_input_tokens_seen": 33573432, + "step": 600 + }, + { + "epoch": 1.3363028953229399, + "loss": 1.3346822261810303, + "loss_ce": 0.0014302851632237434, + "loss_iou": 0.578125, + "loss_num": 0.035888671875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 33573432, + "step": 600 + }, + { + "epoch": 1.3385300668151447, + "grad_norm": 15.516777038574219, + "learning_rate": 1e-06, + "loss": 1.2706, + "num_input_tokens_seen": 33627888, + "step": 601 + }, + { + "epoch": 1.3385300668151447, + "loss": 1.197658896446228, + "loss_ce": 0.0008815636392682791, + "loss_iou": 0.484375, + "loss_num": 0.04541015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 33627888, + "step": 601 + }, + { + "epoch": 1.3407572383073496, + "grad_norm": 22.7963924407959, + "learning_rate": 1e-06, + "loss": 1.2063, + "num_input_tokens_seen": 33685960, + "step": 602 + }, + { + "epoch": 1.3407572383073496, + "loss": 1.1104426383972168, + "loss_ce": 0.0008235453860834241, + "loss_iou": 0.46484375, + "loss_num": 0.0361328125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 33685960, + "step": 602 + }, + { + "epoch": 1.3429844097995547, + "grad_norm": 32.18126678466797, + "learning_rate": 1e-06, + "loss": 1.2209, + "num_input_tokens_seen": 33742816, + "step": 603 + }, + { + "epoch": 1.3429844097995547, + "loss": 1.1211140155792236, + "loss_ce": 0.0007527406560257077, + "loss_iou": 0.453125, + "loss_num": 0.042724609375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 33742816, + "step": 603 + }, + { + "epoch": 1.3452115812917596, + "grad_norm": 24.347137451171875, + "learning_rate": 1e-06, + "loss": 1.1429, + "num_input_tokens_seen": 33800708, + "step": 604 + }, + { + "epoch": 1.3452115812917596, + "loss": 1.146965742111206, + "loss_ce": 0.0012137566227465868, + "loss_iou": 0.453125, + "loss_num": 0.047607421875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 33800708, + "step": 604 + }, + { + "epoch": 1.3474387527839644, + "grad_norm": 27.303401947021484, + "learning_rate": 1e-06, + "loss": 1.1817, + "num_input_tokens_seen": 33853544, + "step": 605 + }, + { + "epoch": 1.3474387527839644, + "loss": 1.0051369667053223, + "loss_ce": 0.006113563664257526, + "loss_iou": 0.390625, + "loss_num": 0.04345703125, + "loss_xval": 1.0, + "num_input_tokens_seen": 33853544, + "step": 605 + }, + { + "epoch": 1.3496659242761693, + "grad_norm": 24.224193572998047, + "learning_rate": 1e-06, + "loss": 1.0283, + "num_input_tokens_seen": 33908172, + "step": 606 + }, + { + "epoch": 1.3496659242761693, + "loss": 1.0841628313064575, + "loss_ce": 0.0006667570560239255, + "loss_iou": 0.41015625, + "loss_num": 0.05224609375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 33908172, + "step": 606 + }, + { + "epoch": 1.3518930957683741, + "grad_norm": 23.933223724365234, + "learning_rate": 1e-06, + "loss": 1.3531, + "num_input_tokens_seen": 33964128, + "step": 607 + }, + { + "epoch": 1.3518930957683741, + "loss": 1.0083250999450684, + "loss_ce": 0.0005124981980770826, + "loss_iou": 0.42578125, + "loss_num": 0.03125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 33964128, + "step": 607 + }, + { + "epoch": 1.354120267260579, + "grad_norm": 49.8543815612793, + "learning_rate": 1e-06, + "loss": 0.9476, + "num_input_tokens_seen": 34018964, + "step": 608 + }, + { + "epoch": 1.354120267260579, + "loss": 1.1164300441741943, + "loss_ce": 0.0011956640519201756, + "loss_iou": 0.447265625, + "loss_num": 0.043701171875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 34018964, + "step": 608 + }, + { + "epoch": 1.3563474387527839, + "grad_norm": 20.391830444335938, + "learning_rate": 1e-06, + "loss": 1.0838, + "num_input_tokens_seen": 34074960, + "step": 609 + }, + { + "epoch": 1.3563474387527839, + "loss": 1.0016207695007324, + "loss_ce": 0.0008883203845471144, + "loss_iou": 0.390625, + "loss_num": 0.044189453125, + "loss_xval": 1.0, + "num_input_tokens_seen": 34074960, + "step": 609 + }, + { + "epoch": 1.358574610244989, + "grad_norm": 21.543336868286133, + "learning_rate": 1e-06, + "loss": 1.0178, + "num_input_tokens_seen": 34131548, + "step": 610 + }, + { + "epoch": 1.358574610244989, + "loss": 0.7443816661834717, + "loss_ce": 0.0036590369418263435, + "loss_iou": 0.2890625, + "loss_num": 0.03271484375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 34131548, + "step": 610 + }, + { + "epoch": 1.3608017817371938, + "grad_norm": 205.24061584472656, + "learning_rate": 1e-06, + "loss": 1.1072, + "num_input_tokens_seen": 34187164, + "step": 611 + }, + { + "epoch": 1.3608017817371938, + "loss": 1.3798058032989502, + "loss_ce": 0.0013879577163606882, + "loss_iou": 0.5625, + "loss_num": 0.05078125, + "loss_xval": 1.375, + "num_input_tokens_seen": 34187164, + "step": 611 + }, + { + "epoch": 1.3630289532293987, + "grad_norm": 50.390380859375, + "learning_rate": 1e-06, + "loss": 1.4146, + "num_input_tokens_seen": 34240892, + "step": 612 + }, + { + "epoch": 1.3630289532293987, + "loss": 1.688087821006775, + "loss_ce": 0.00644719647243619, + "loss_iou": 0.6328125, + "loss_num": 0.08251953125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 34240892, + "step": 612 + }, + { + "epoch": 1.3652561247216035, + "grad_norm": 22.16488265991211, + "learning_rate": 1e-06, + "loss": 1.0745, + "num_input_tokens_seen": 34296472, + "step": 613 + }, + { + "epoch": 1.3652561247216035, + "loss": 1.2694756984710693, + "loss_ce": 0.0006769584724679589, + "loss_iou": 0.5078125, + "loss_num": 0.050537109375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 34296472, + "step": 613 + }, + { + "epoch": 1.3674832962138086, + "grad_norm": 44.119998931884766, + "learning_rate": 1e-06, + "loss": 1.1704, + "num_input_tokens_seen": 34352164, + "step": 614 + }, + { + "epoch": 1.3674832962138086, + "loss": 1.0769069194793701, + "loss_ce": 0.0012233321322128177, + "loss_iou": 0.431640625, + "loss_num": 0.04248046875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 34352164, + "step": 614 + }, + { + "epoch": 1.3697104677060135, + "grad_norm": 38.12681198120117, + "learning_rate": 1e-06, + "loss": 1.2304, + "num_input_tokens_seen": 34408956, + "step": 615 + }, + { + "epoch": 1.3697104677060135, + "loss": 1.0864746570587158, + "loss_ce": 0.0005371640436351299, + "loss_iou": 0.447265625, + "loss_num": 0.038330078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 34408956, + "step": 615 + }, + { + "epoch": 1.3719376391982183, + "grad_norm": 22.228532791137695, + "learning_rate": 1e-06, + "loss": 1.0929, + "num_input_tokens_seen": 34462868, + "step": 616 + }, + { + "epoch": 1.3719376391982183, + "loss": 1.1881909370422363, + "loss_ce": 0.0011792225996032357, + "loss_iou": 0.458984375, + "loss_num": 0.053955078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 34462868, + "step": 616 + }, + { + "epoch": 1.3741648106904232, + "grad_norm": 16.62993812561035, + "learning_rate": 1e-06, + "loss": 0.8739, + "num_input_tokens_seen": 34519148, + "step": 617 + }, + { + "epoch": 1.3741648106904232, + "loss": 0.939282238483429, + "loss_ce": 0.0008056251681409776, + "loss_iou": 0.3828125, + "loss_num": 0.0341796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 34519148, + "step": 617 + }, + { + "epoch": 1.376391982182628, + "grad_norm": 26.92157745361328, + "learning_rate": 1e-06, + "loss": 1.16, + "num_input_tokens_seen": 34576480, + "step": 618 + }, + { + "epoch": 1.376391982182628, + "loss": 1.1556055545806885, + "loss_ce": 0.002041085623204708, + "loss_iou": 0.494140625, + "loss_num": 0.033447265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 34576480, + "step": 618 + }, + { + "epoch": 1.378619153674833, + "grad_norm": 78.8863525390625, + "learning_rate": 1e-06, + "loss": 0.8745, + "num_input_tokens_seen": 34631844, + "step": 619 + }, + { + "epoch": 1.378619153674833, + "loss": 0.6508793234825134, + "loss_ce": 0.0004887055838480592, + "loss_iou": 0.263671875, + "loss_num": 0.0242919921875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 34631844, + "step": 619 + }, + { + "epoch": 1.3808463251670378, + "grad_norm": 18.949325561523438, + "learning_rate": 1e-06, + "loss": 0.8934, + "num_input_tokens_seen": 34687740, + "step": 620 + }, + { + "epoch": 1.3808463251670378, + "loss": 0.8919047117233276, + "loss_ce": 0.000791468657553196, + "loss_iou": 0.373046875, + "loss_num": 0.029052734375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 34687740, + "step": 620 + }, + { + "epoch": 1.3830734966592428, + "grad_norm": 21.888832092285156, + "learning_rate": 1e-06, + "loss": 1.0868, + "num_input_tokens_seen": 34744896, + "step": 621 + }, + { + "epoch": 1.3830734966592428, + "loss": 1.120755910873413, + "loss_ce": 0.0006387863541021943, + "loss_iou": 0.486328125, + "loss_num": 0.0291748046875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 34744896, + "step": 621 + }, + { + "epoch": 1.3853006681514477, + "grad_norm": 62.403011322021484, + "learning_rate": 1e-06, + "loss": 0.8124, + "num_input_tokens_seen": 34800684, + "step": 622 + }, + { + "epoch": 1.3853006681514477, + "loss": 0.9201839566230774, + "loss_ce": 0.0012385983718559146, + "loss_iou": 0.3828125, + "loss_num": 0.031005859375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 34800684, + "step": 622 + }, + { + "epoch": 1.3875278396436526, + "grad_norm": 28.83528709411621, + "learning_rate": 1e-06, + "loss": 1.2444, + "num_input_tokens_seen": 34856832, + "step": 623 + }, + { + "epoch": 1.3875278396436526, + "loss": 1.0172494649887085, + "loss_ce": 0.00113617442548275, + "loss_iou": 0.41796875, + "loss_num": 0.0361328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 34856832, + "step": 623 + }, + { + "epoch": 1.3897550111358574, + "grad_norm": 62.715023040771484, + "learning_rate": 1e-06, + "loss": 1.3, + "num_input_tokens_seen": 34914236, + "step": 624 + }, + { + "epoch": 1.3897550111358574, + "loss": 1.4728524684906006, + "loss_ce": 0.002149291103705764, + "loss_iou": 0.5859375, + "loss_num": 0.060302734375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 34914236, + "step": 624 + }, + { + "epoch": 1.3919821826280623, + "grad_norm": 22.02700424194336, + "learning_rate": 1e-06, + "loss": 1.218, + "num_input_tokens_seen": 34970880, + "step": 625 + }, + { + "epoch": 1.3919821826280623, + "loss": 1.4092342853546143, + "loss_ce": 0.0010311320656910539, + "loss_iou": 0.59375, + "loss_num": 0.044189453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 34970880, + "step": 625 + }, + { + "epoch": 1.3942093541202674, + "grad_norm": 82.06532287597656, + "learning_rate": 1e-06, + "loss": 1.2257, + "num_input_tokens_seen": 35026436, + "step": 626 + }, + { + "epoch": 1.3942093541202674, + "loss": 1.3661468029022217, + "loss_ce": 0.0018889284692704678, + "loss_iou": 0.4921875, + "loss_num": 0.0751953125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 35026436, + "step": 626 + }, + { + "epoch": 1.3964365256124722, + "grad_norm": 21.959980010986328, + "learning_rate": 1e-06, + "loss": 1.0485, + "num_input_tokens_seen": 35083640, + "step": 627 + }, + { + "epoch": 1.3964365256124722, + "loss": 1.2543073892593384, + "loss_ce": 0.0008894825004972517, + "loss_iou": 0.47265625, + "loss_num": 0.061767578125, + "loss_xval": 1.25, + "num_input_tokens_seen": 35083640, + "step": 627 + }, + { + "epoch": 1.398663697104677, + "grad_norm": 69.06570434570312, + "learning_rate": 1e-06, + "loss": 1.1878, + "num_input_tokens_seen": 35139632, + "step": 628 + }, + { + "epoch": 1.398663697104677, + "loss": 1.040403127670288, + "loss_ce": 0.003171587362885475, + "loss_iou": 0.4375, + "loss_num": 0.032958984375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 35139632, + "step": 628 + }, + { + "epoch": 1.400890868596882, + "grad_norm": 32.16525650024414, + "learning_rate": 1e-06, + "loss": 1.4466, + "num_input_tokens_seen": 35194568, + "step": 629 + }, + { + "epoch": 1.400890868596882, + "loss": 1.3845007419586182, + "loss_ce": 0.006571032106876373, + "loss_iou": 0.474609375, + "loss_num": 0.0849609375, + "loss_xval": 1.375, + "num_input_tokens_seen": 35194568, + "step": 629 + }, + { + "epoch": 1.4031180400890868, + "grad_norm": 19.882808685302734, + "learning_rate": 1e-06, + "loss": 0.7151, + "num_input_tokens_seen": 35252680, + "step": 630 + }, + { + "epoch": 1.4031180400890868, + "loss": 0.6794017553329468, + "loss_ce": 0.00044663704466074705, + "loss_iou": 0.251953125, + "loss_num": 0.034912109375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 35252680, + "step": 630 + }, + { + "epoch": 1.4053452115812917, + "grad_norm": 23.41751480102539, + "learning_rate": 1e-06, + "loss": 0.8731, + "num_input_tokens_seen": 35308480, + "step": 631 + }, + { + "epoch": 1.4053452115812917, + "loss": 0.9022737741470337, + "loss_ce": 0.0009065663907676935, + "loss_iou": 0.376953125, + "loss_num": 0.02978515625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 35308480, + "step": 631 + }, + { + "epoch": 1.4075723830734965, + "grad_norm": 23.227956771850586, + "learning_rate": 1e-06, + "loss": 1.2639, + "num_input_tokens_seen": 35364808, + "step": 632 + }, + { + "epoch": 1.4075723830734965, + "loss": 1.2337548732757568, + "loss_ce": 0.0023096189834177494, + "loss_iou": 0.5, + "loss_num": 0.045654296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 35364808, + "step": 632 + }, + { + "epoch": 1.4097995545657016, + "grad_norm": 55.23591232299805, + "learning_rate": 1e-06, + "loss": 1.1625, + "num_input_tokens_seen": 35420704, + "step": 633 + }, + { + "epoch": 1.4097995545657016, + "loss": 0.9497365951538086, + "loss_ce": 0.0005178386345505714, + "loss_iou": 0.365234375, + "loss_num": 0.043701171875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 35420704, + "step": 633 + }, + { + "epoch": 1.4120267260579065, + "grad_norm": 26.891746520996094, + "learning_rate": 1e-06, + "loss": 1.1738, + "num_input_tokens_seen": 35473184, + "step": 634 + }, + { + "epoch": 1.4120267260579065, + "loss": 1.1008825302124023, + "loss_ce": 0.0012731605675071478, + "loss_iou": 0.453125, + "loss_num": 0.03857421875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 35473184, + "step": 634 + }, + { + "epoch": 1.4142538975501113, + "grad_norm": 17.678611755371094, + "learning_rate": 1e-06, + "loss": 1.1334, + "num_input_tokens_seen": 35530148, + "step": 635 + }, + { + "epoch": 1.4142538975501113, + "loss": 1.0327593088150024, + "loss_ce": 0.0005327487015165389, + "loss_iou": 0.443359375, + "loss_num": 0.0294189453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 35530148, + "step": 635 + }, + { + "epoch": 1.4164810690423162, + "grad_norm": 313.0267028808594, + "learning_rate": 1e-06, + "loss": 1.3829, + "num_input_tokens_seen": 35583856, + "step": 636 + }, + { + "epoch": 1.4164810690423162, + "loss": 1.357082486152649, + "loss_ce": 0.0023461126256734133, + "loss_iou": 0.5625, + "loss_num": 0.046142578125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 35583856, + "step": 636 + }, + { + "epoch": 1.4187082405345213, + "grad_norm": 20.430328369140625, + "learning_rate": 1e-06, + "loss": 1.2449, + "num_input_tokens_seen": 35640836, + "step": 637 + }, + { + "epoch": 1.4187082405345213, + "loss": 1.4005608558654785, + "loss_ce": 0.0006584060029126704, + "loss_iou": 0.56640625, + "loss_num": 0.052978515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 35640836, + "step": 637 + }, + { + "epoch": 1.4209354120267261, + "grad_norm": 23.457950592041016, + "learning_rate": 1e-06, + "loss": 0.8546, + "num_input_tokens_seen": 35695288, + "step": 638 + }, + { + "epoch": 1.4209354120267261, + "loss": 0.8239110112190247, + "loss_ce": 0.0017674551345407963, + "loss_iou": 0.34375, + "loss_num": 0.02685546875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 35695288, + "step": 638 + }, + { + "epoch": 1.423162583518931, + "grad_norm": 22.952999114990234, + "learning_rate": 1e-06, + "loss": 0.8464, + "num_input_tokens_seen": 35751468, + "step": 639 + }, + { + "epoch": 1.423162583518931, + "loss": 1.025329351425171, + "loss_ce": 0.0009152949205599725, + "loss_iou": 0.423828125, + "loss_num": 0.03515625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 35751468, + "step": 639 + }, + { + "epoch": 1.4253897550111359, + "grad_norm": 17.500404357910156, + "learning_rate": 1e-06, + "loss": 0.8751, + "num_input_tokens_seen": 35809384, + "step": 640 + }, + { + "epoch": 1.4253897550111359, + "loss": 0.8379029035568237, + "loss_ce": 0.0005006167921237648, + "loss_iou": 0.34765625, + "loss_num": 0.0286865234375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 35809384, + "step": 640 + }, + { + "epoch": 1.4276169265033407, + "grad_norm": 30.681381225585938, + "learning_rate": 1e-06, + "loss": 1.1528, + "num_input_tokens_seen": 35865544, + "step": 641 + }, + { + "epoch": 1.4276169265033407, + "loss": 1.2849559783935547, + "loss_ce": 0.0005320889176800847, + "loss_iou": 0.55078125, + "loss_num": 0.037353515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 35865544, + "step": 641 + }, + { + "epoch": 1.4298440979955456, + "grad_norm": 19.769329071044922, + "learning_rate": 1e-06, + "loss": 1.2354, + "num_input_tokens_seen": 35921472, + "step": 642 + }, + { + "epoch": 1.4298440979955456, + "loss": 1.344813346862793, + "loss_ce": 0.0008191849919967353, + "loss_iou": 0.5703125, + "loss_num": 0.041259765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 35921472, + "step": 642 + }, + { + "epoch": 1.4320712694877504, + "grad_norm": 166.1317901611328, + "learning_rate": 1e-06, + "loss": 1.3808, + "num_input_tokens_seen": 35978028, + "step": 643 + }, + { + "epoch": 1.4320712694877504, + "loss": 1.7419133186340332, + "loss_ce": 0.002655471907928586, + "loss_iou": 0.68359375, + "loss_num": 0.0751953125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 35978028, + "step": 643 + }, + { + "epoch": 1.4342984409799555, + "grad_norm": 21.949459075927734, + "learning_rate": 1e-06, + "loss": 0.9716, + "num_input_tokens_seen": 36033152, + "step": 644 + }, + { + "epoch": 1.4342984409799555, + "loss": 0.8689587116241455, + "loss_ce": 0.0007946894620545208, + "loss_iou": 0.376953125, + "loss_num": 0.022705078125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 36033152, + "step": 644 + }, + { + "epoch": 1.4365256124721604, + "grad_norm": 53.461631774902344, + "learning_rate": 1e-06, + "loss": 1.3388, + "num_input_tokens_seen": 36088644, + "step": 645 + }, + { + "epoch": 1.4365256124721604, + "loss": 1.3191097974777222, + "loss_ce": 0.0017270214157178998, + "loss_iou": 0.490234375, + "loss_num": 0.0673828125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 36088644, + "step": 645 + }, + { + "epoch": 1.4387527839643652, + "grad_norm": 17.125713348388672, + "learning_rate": 1e-06, + "loss": 1.1615, + "num_input_tokens_seen": 36147156, + "step": 646 + }, + { + "epoch": 1.4387527839643652, + "loss": 1.204886555671692, + "loss_ce": 0.0012732010800391436, + "loss_iou": 0.46875, + "loss_num": 0.052978515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 36147156, + "step": 646 + }, + { + "epoch": 1.44097995545657, + "grad_norm": 20.655792236328125, + "learning_rate": 1e-06, + "loss": 0.8804, + "num_input_tokens_seen": 36205528, + "step": 647 + }, + { + "epoch": 1.44097995545657, + "loss": 0.9184082746505737, + "loss_ce": 0.0004395167634356767, + "loss_iou": 0.36328125, + "loss_num": 0.037841796875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 36205528, + "step": 647 + }, + { + "epoch": 1.4432071269487752, + "grad_norm": 27.488887786865234, + "learning_rate": 1e-06, + "loss": 1.0496, + "num_input_tokens_seen": 36263608, + "step": 648 + }, + { + "epoch": 1.4432071269487752, + "loss": 1.052678108215332, + "loss_ce": 0.000431923137512058, + "loss_iou": 0.423828125, + "loss_num": 0.041259765625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 36263608, + "step": 648 + }, + { + "epoch": 1.44543429844098, + "grad_norm": 19.03914451599121, + "learning_rate": 1e-06, + "loss": 1.0058, + "num_input_tokens_seen": 36320740, + "step": 649 + }, + { + "epoch": 1.44543429844098, + "loss": 1.1877658367156982, + "loss_ce": 0.0014864858239889145, + "loss_iou": 0.46484375, + "loss_num": 0.05126953125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 36320740, + "step": 649 + }, + { + "epoch": 1.447661469933185, + "grad_norm": 30.350322723388672, + "learning_rate": 1e-06, + "loss": 1.1674, + "num_input_tokens_seen": 36375948, + "step": 650 + }, + { + "epoch": 1.447661469933185, + "loss": 1.6142632961273193, + "loss_ce": 0.0014703237684443593, + "loss_iou": 0.6484375, + "loss_num": 0.0625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 36375948, + "step": 650 + }, + { + "epoch": 1.4498886414253898, + "grad_norm": 31.322311401367188, + "learning_rate": 1e-06, + "loss": 1.0709, + "num_input_tokens_seen": 36433352, + "step": 651 + }, + { + "epoch": 1.4498886414253898, + "loss": 1.1504631042480469, + "loss_ce": 0.0005606971681118011, + "loss_iou": 0.482421875, + "loss_num": 0.037109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 36433352, + "step": 651 + }, + { + "epoch": 1.4521158129175946, + "grad_norm": 58.9162712097168, + "learning_rate": 1e-06, + "loss": 1.1301, + "num_input_tokens_seen": 36489816, + "step": 652 + }, + { + "epoch": 1.4521158129175946, + "loss": 1.2538143396377563, + "loss_ce": 0.001372913713566959, + "loss_iou": 0.494140625, + "loss_num": 0.052490234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 36489816, + "step": 652 + }, + { + "epoch": 1.4543429844097995, + "grad_norm": 17.665956497192383, + "learning_rate": 1e-06, + "loss": 1.1987, + "num_input_tokens_seen": 36542532, + "step": 653 + }, + { + "epoch": 1.4543429844097995, + "loss": 1.2465870380401611, + "loss_ce": 0.0004933135933242738, + "loss_iou": 0.50390625, + "loss_num": 0.047607421875, + "loss_xval": 1.25, + "num_input_tokens_seen": 36542532, + "step": 653 + }, + { + "epoch": 1.4565701559020043, + "grad_norm": 63.455204010009766, + "learning_rate": 1e-06, + "loss": 1.0736, + "num_input_tokens_seen": 36599520, + "step": 654 + }, + { + "epoch": 1.4565701559020043, + "loss": 1.2124230861663818, + "loss_ce": 0.0005090509075671434, + "loss_iou": 0.4765625, + "loss_num": 0.0517578125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 36599520, + "step": 654 + }, + { + "epoch": 1.4587973273942094, + "grad_norm": 26.223678588867188, + "learning_rate": 1e-06, + "loss": 1.0227, + "num_input_tokens_seen": 36656080, + "step": 655 + }, + { + "epoch": 1.4587973273942094, + "loss": 0.9443738460540771, + "loss_ce": 0.000526183401234448, + "loss_iou": 0.353515625, + "loss_num": 0.047119140625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 36656080, + "step": 655 + }, + { + "epoch": 1.4610244988864143, + "grad_norm": 41.6849365234375, + "learning_rate": 1e-06, + "loss": 1.0155, + "num_input_tokens_seen": 36712280, + "step": 656 + }, + { + "epoch": 1.4610244988864143, + "loss": 1.218764066696167, + "loss_ce": 0.008314890787005424, + "loss_iou": 0.47265625, + "loss_num": 0.052978515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 36712280, + "step": 656 + }, + { + "epoch": 1.4632516703786191, + "grad_norm": 26.14752197265625, + "learning_rate": 1e-06, + "loss": 1.06, + "num_input_tokens_seen": 36769340, + "step": 657 + }, + { + "epoch": 1.4632516703786191, + "loss": 1.0224517583847046, + "loss_ce": 0.0007232209318317473, + "loss_iou": 0.39453125, + "loss_num": 0.04638671875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 36769340, + "step": 657 + }, + { + "epoch": 1.465478841870824, + "grad_norm": 25.274215698242188, + "learning_rate": 1e-06, + "loss": 1.2905, + "num_input_tokens_seen": 36824936, + "step": 658 + }, + { + "epoch": 1.465478841870824, + "loss": 1.3570466041564941, + "loss_ce": 0.0010896207531914115, + "loss_iou": 0.57421875, + "loss_num": 0.041748046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 36824936, + "step": 658 + }, + { + "epoch": 1.467706013363029, + "grad_norm": 18.70472526550293, + "learning_rate": 1e-06, + "loss": 0.9063, + "num_input_tokens_seen": 36883996, + "step": 659 + }, + { + "epoch": 1.467706013363029, + "loss": 0.9597538709640503, + "loss_ce": 0.005408107303082943, + "loss_iou": 0.376953125, + "loss_num": 0.0400390625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 36883996, + "step": 659 + }, + { + "epoch": 1.469933184855234, + "grad_norm": 24.98056411743164, + "learning_rate": 1e-06, + "loss": 0.9572, + "num_input_tokens_seen": 36936316, + "step": 660 + }, + { + "epoch": 1.469933184855234, + "loss": 0.7073796391487122, + "loss_ce": 0.006451886147260666, + "loss_iou": 0.283203125, + "loss_num": 0.02685546875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 36936316, + "step": 660 + }, + { + "epoch": 1.4721603563474388, + "grad_norm": 22.775863647460938, + "learning_rate": 1e-06, + "loss": 1.0446, + "num_input_tokens_seen": 36991328, + "step": 661 + }, + { + "epoch": 1.4721603563474388, + "loss": 1.084336757659912, + "loss_ce": 0.0008406001143157482, + "loss_iou": 0.4296875, + "loss_num": 0.044921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 36991328, + "step": 661 + }, + { + "epoch": 1.4743875278396437, + "grad_norm": 25.190065383911133, + "learning_rate": 1e-06, + "loss": 1.2837, + "num_input_tokens_seen": 37044136, + "step": 662 + }, + { + "epoch": 1.4743875278396437, + "loss": 1.5641846656799316, + "loss_ce": 0.0026612321380525827, + "loss_iou": 0.609375, + "loss_num": 0.06787109375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 37044136, + "step": 662 + }, + { + "epoch": 1.4766146993318485, + "grad_norm": 28.881362915039062, + "learning_rate": 1e-06, + "loss": 0.8291, + "num_input_tokens_seen": 37100336, + "step": 663 + }, + { + "epoch": 1.4766146993318485, + "loss": 0.8968852758407593, + "loss_ce": 0.003330609295517206, + "loss_iou": 0.35546875, + "loss_num": 0.036376953125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 37100336, + "step": 663 + }, + { + "epoch": 1.4788418708240534, + "grad_norm": 24.36873435974121, + "learning_rate": 1e-06, + "loss": 1.2756, + "num_input_tokens_seen": 37154992, + "step": 664 + }, + { + "epoch": 1.4788418708240534, + "loss": 1.3726325035095215, + "loss_ce": 0.0030035879462957382, + "loss_iou": 0.5546875, + "loss_num": 0.052001953125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 37154992, + "step": 664 + }, + { + "epoch": 1.4810690423162582, + "grad_norm": 40.89757537841797, + "learning_rate": 1e-06, + "loss": 1.1327, + "num_input_tokens_seen": 37208516, + "step": 665 + }, + { + "epoch": 1.4810690423162582, + "loss": 0.8330258131027222, + "loss_ce": 0.0005062957643531263, + "loss_iou": 0.35546875, + "loss_num": 0.024169921875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 37208516, + "step": 665 + }, + { + "epoch": 1.4832962138084633, + "grad_norm": 33.48430252075195, + "learning_rate": 1e-06, + "loss": 1.0491, + "num_input_tokens_seen": 37263476, + "step": 666 + }, + { + "epoch": 1.4832962138084633, + "loss": 1.249821424484253, + "loss_ce": 0.001530370325781405, + "loss_iou": 0.55078125, + "loss_num": 0.02978515625, + "loss_xval": 1.25, + "num_input_tokens_seen": 37263476, + "step": 666 + }, + { + "epoch": 1.4855233853006682, + "grad_norm": 31.53962516784668, + "learning_rate": 1e-06, + "loss": 0.8733, + "num_input_tokens_seen": 37319864, + "step": 667 + }, + { + "epoch": 1.4855233853006682, + "loss": 0.6637530326843262, + "loss_ce": 0.00213192543014884, + "loss_iou": 0.27734375, + "loss_num": 0.0218505859375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 37319864, + "step": 667 + }, + { + "epoch": 1.487750556792873, + "grad_norm": 19.535804748535156, + "learning_rate": 1e-06, + "loss": 1.0933, + "num_input_tokens_seen": 37378472, + "step": 668 + }, + { + "epoch": 1.487750556792873, + "loss": 1.066902756690979, + "loss_ce": 0.000496495165862143, + "loss_iou": 0.451171875, + "loss_num": 0.03271484375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 37378472, + "step": 668 + }, + { + "epoch": 1.489977728285078, + "grad_norm": 35.252281188964844, + "learning_rate": 1e-06, + "loss": 1.4544, + "num_input_tokens_seen": 37433128, + "step": 669 + }, + { + "epoch": 1.489977728285078, + "loss": 1.5701709985733032, + "loss_ce": 0.0008350461139343679, + "loss_iou": 0.640625, + "loss_num": 0.05712890625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 37433128, + "step": 669 + }, + { + "epoch": 1.492204899777283, + "grad_norm": 22.478748321533203, + "learning_rate": 1e-06, + "loss": 1.0887, + "num_input_tokens_seen": 37487148, + "step": 670 + }, + { + "epoch": 1.492204899777283, + "loss": 0.9821290373802185, + "loss_ce": 0.0026368550024926662, + "loss_iou": 0.333984375, + "loss_num": 0.06201171875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 37487148, + "step": 670 + }, + { + "epoch": 1.4944320712694878, + "grad_norm": 26.4521484375, + "learning_rate": 1e-06, + "loss": 1.0069, + "num_input_tokens_seen": 37545644, + "step": 671 + }, + { + "epoch": 1.4944320712694878, + "loss": 1.1284525394439697, + "loss_ce": 0.0176127590239048, + "loss_iou": 0.423828125, + "loss_num": 0.052734375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 37545644, + "step": 671 + }, + { + "epoch": 1.4966592427616927, + "grad_norm": 20.00921058654785, + "learning_rate": 1e-06, + "loss": 1.1729, + "num_input_tokens_seen": 37602744, + "step": 672 + }, + { + "epoch": 1.4966592427616927, + "loss": 1.4309265613555908, + "loss_ce": 0.0017273698467761278, + "loss_iou": 0.57421875, + "loss_num": 0.05615234375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 37602744, + "step": 672 + }, + { + "epoch": 1.4988864142538976, + "grad_norm": 21.240440368652344, + "learning_rate": 1e-06, + "loss": 0.9773, + "num_input_tokens_seen": 37658080, + "step": 673 + }, + { + "epoch": 1.4988864142538976, + "loss": 1.0557523965835571, + "loss_ce": 0.0005766096874140203, + "loss_iou": 0.42578125, + "loss_num": 0.040771484375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 37658080, + "step": 673 + }, + { + "epoch": 1.5011135857461024, + "grad_norm": 20.119047164916992, + "learning_rate": 1e-06, + "loss": 0.8459, + "num_input_tokens_seen": 37713888, + "step": 674 + }, + { + "epoch": 1.5011135857461024, + "loss": 0.8226086497306824, + "loss_ce": 0.0013196287909522653, + "loss_iou": 0.34375, + "loss_num": 0.0263671875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 37713888, + "step": 674 + }, + { + "epoch": 1.5033407572383073, + "grad_norm": 29.80697250366211, + "learning_rate": 1e-06, + "loss": 0.9022, + "num_input_tokens_seen": 37768756, + "step": 675 + }, + { + "epoch": 1.5033407572383073, + "loss": 0.9731925129890442, + "loss_ce": 0.001024498138576746, + "loss_iou": 0.423828125, + "loss_num": 0.0247802734375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 37768756, + "step": 675 + }, + { + "epoch": 1.5055679287305122, + "grad_norm": 17.169862747192383, + "learning_rate": 1e-06, + "loss": 1.2668, + "num_input_tokens_seen": 37821224, + "step": 676 + }, + { + "epoch": 1.5055679287305122, + "loss": 1.339667558670044, + "loss_ce": 0.0008004190749488771, + "loss_iou": 0.5390625, + "loss_num": 0.052490234375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 37821224, + "step": 676 + }, + { + "epoch": 1.507795100222717, + "grad_norm": 48.648136138916016, + "learning_rate": 1e-06, + "loss": 1.1283, + "num_input_tokens_seen": 37876864, + "step": 677 + }, + { + "epoch": 1.507795100222717, + "loss": 1.0091381072998047, + "loss_ce": 0.00047117803478613496, + "loss_iou": 0.412109375, + "loss_num": 0.03662109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 37876864, + "step": 677 + }, + { + "epoch": 1.510022271714922, + "grad_norm": 31.37755012512207, + "learning_rate": 1e-06, + "loss": 1.0842, + "num_input_tokens_seen": 37934036, + "step": 678 + }, + { + "epoch": 1.510022271714922, + "loss": 0.8647042512893677, + "loss_ce": 0.0004463824152480811, + "loss_iou": 0.37109375, + "loss_num": 0.0245361328125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 37934036, + "step": 678 + }, + { + "epoch": 1.512249443207127, + "grad_norm": 75.93875885009766, + "learning_rate": 1e-06, + "loss": 1.3985, + "num_input_tokens_seen": 37987492, + "step": 679 + }, + { + "epoch": 1.512249443207127, + "loss": 1.21080482006073, + "loss_ce": 0.0013321400620043278, + "loss_iou": 0.5, + "loss_num": 0.041015625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 37987492, + "step": 679 + }, + { + "epoch": 1.5144766146993318, + "grad_norm": 28.709468841552734, + "learning_rate": 1e-06, + "loss": 1.0242, + "num_input_tokens_seen": 38046268, + "step": 680 + }, + { + "epoch": 1.5144766146993318, + "loss": 0.7978775501251221, + "loss_ce": 0.0007584316190332174, + "loss_iou": 0.3046875, + "loss_num": 0.0380859375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 38046268, + "step": 680 + }, + { + "epoch": 1.516703786191537, + "grad_norm": 15.11223030090332, + "learning_rate": 1e-06, + "loss": 1.0458, + "num_input_tokens_seen": 38096816, + "step": 681 + }, + { + "epoch": 1.516703786191537, + "loss": 0.8654755353927612, + "loss_ce": 0.0004853087302763015, + "loss_iou": 0.3515625, + "loss_num": 0.032470703125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 38096816, + "step": 681 + }, + { + "epoch": 1.5189309576837418, + "grad_norm": 28.030420303344727, + "learning_rate": 1e-06, + "loss": 1.1786, + "num_input_tokens_seen": 38152668, + "step": 682 + }, + { + "epoch": 1.5189309576837418, + "loss": 0.9609849452972412, + "loss_ce": 0.0010240338742733002, + "loss_iou": 0.392578125, + "loss_num": 0.034912109375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 38152668, + "step": 682 + }, + { + "epoch": 1.5211581291759466, + "grad_norm": 18.825443267822266, + "learning_rate": 1e-06, + "loss": 1.1633, + "num_input_tokens_seen": 38208028, + "step": 683 + }, + { + "epoch": 1.5211581291759466, + "loss": 1.3014280796051025, + "loss_ce": 0.0006468580104410648, + "loss_iou": 0.5390625, + "loss_num": 0.04541015625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 38208028, + "step": 683 + }, + { + "epoch": 1.5233853006681515, + "grad_norm": 33.09317398071289, + "learning_rate": 1e-06, + "loss": 1.0005, + "num_input_tokens_seen": 38266628, + "step": 684 + }, + { + "epoch": 1.5233853006681515, + "loss": 0.8423627614974976, + "loss_ce": 0.0005658682784996927, + "loss_iou": 0.326171875, + "loss_num": 0.037841796875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 38266628, + "step": 684 + }, + { + "epoch": 1.5256124721603563, + "grad_norm": 21.19887924194336, + "learning_rate": 1e-06, + "loss": 1.2159, + "num_input_tokens_seen": 38321396, + "step": 685 + }, + { + "epoch": 1.5256124721603563, + "loss": 1.512146234512329, + "loss_ce": 0.001403960632160306, + "loss_iou": 0.6015625, + "loss_num": 0.06201171875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 38321396, + "step": 685 + }, + { + "epoch": 1.5278396436525612, + "grad_norm": 108.64661407470703, + "learning_rate": 1e-06, + "loss": 1.1361, + "num_input_tokens_seen": 38376092, + "step": 686 + }, + { + "epoch": 1.5278396436525612, + "loss": 1.0967187881469727, + "loss_ce": 0.0010156568605452776, + "loss_iou": 0.453125, + "loss_num": 0.038330078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 38376092, + "step": 686 + }, + { + "epoch": 1.530066815144766, + "grad_norm": 18.550582885742188, + "learning_rate": 1e-06, + "loss": 1.2406, + "num_input_tokens_seen": 38432308, + "step": 687 + }, + { + "epoch": 1.530066815144766, + "loss": 1.231999158859253, + "loss_ce": 0.0005538459517993033, + "loss_iou": 0.515625, + "loss_num": 0.040283203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 38432308, + "step": 687 + }, + { + "epoch": 1.532293986636971, + "grad_norm": 54.29788589477539, + "learning_rate": 1e-06, + "loss": 0.9656, + "num_input_tokens_seen": 38488264, + "step": 688 + }, + { + "epoch": 1.532293986636971, + "loss": 0.8684132099151611, + "loss_ce": 0.0007374430424533784, + "loss_iou": 0.318359375, + "loss_num": 0.046142578125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 38488264, + "step": 688 + }, + { + "epoch": 1.534521158129176, + "grad_norm": 26.847867965698242, + "learning_rate": 1e-06, + "loss": 1.318, + "num_input_tokens_seen": 38544928, + "step": 689 + }, + { + "epoch": 1.534521158129176, + "loss": 1.185499906539917, + "loss_ce": 0.0019061192870140076, + "loss_iou": 0.49609375, + "loss_num": 0.03857421875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 38544928, + "step": 689 + }, + { + "epoch": 1.5367483296213809, + "grad_norm": 17.0640926361084, + "learning_rate": 1e-06, + "loss": 1.1527, + "num_input_tokens_seen": 38600752, + "step": 690 + }, + { + "epoch": 1.5367483296213809, + "loss": 1.185793399810791, + "loss_ce": 0.0007348479703068733, + "loss_iou": 0.484375, + "loss_num": 0.04345703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 38600752, + "step": 690 + }, + { + "epoch": 1.5389755011135857, + "grad_norm": 125.89933013916016, + "learning_rate": 1e-06, + "loss": 1.053, + "num_input_tokens_seen": 38656252, + "step": 691 + }, + { + "epoch": 1.5389755011135857, + "loss": 1.032954454421997, + "loss_ce": 0.0009720420930534601, + "loss_iou": 0.39453125, + "loss_num": 0.048583984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 38656252, + "step": 691 + }, + { + "epoch": 1.5412026726057908, + "grad_norm": 50.515708923339844, + "learning_rate": 1e-06, + "loss": 1.1837, + "num_input_tokens_seen": 38712256, + "step": 692 + }, + { + "epoch": 1.5412026726057908, + "loss": 1.369874358177185, + "loss_ce": 0.001466137240640819, + "loss_iou": 0.53515625, + "loss_num": 0.05908203125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 38712256, + "step": 692 + }, + { + "epoch": 1.5434298440979957, + "grad_norm": 25.205232620239258, + "learning_rate": 1e-06, + "loss": 1.059, + "num_input_tokens_seen": 38769176, + "step": 693 + }, + { + "epoch": 1.5434298440979957, + "loss": 1.1137714385986328, + "loss_ce": 0.0004902197397314012, + "loss_iou": 0.462890625, + "loss_num": 0.037353515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 38769176, + "step": 693 + }, + { + "epoch": 1.5456570155902005, + "grad_norm": 17.223934173583984, + "learning_rate": 1e-06, + "loss": 1.1755, + "num_input_tokens_seen": 38825140, + "step": 694 + }, + { + "epoch": 1.5456570155902005, + "loss": 1.2859344482421875, + "loss_ce": 0.0017547393217682838, + "loss_iou": 0.48828125, + "loss_num": 0.0615234375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 38825140, + "step": 694 + }, + { + "epoch": 1.5478841870824054, + "grad_norm": 36.42241287231445, + "learning_rate": 1e-06, + "loss": 1.0896, + "num_input_tokens_seen": 38881164, + "step": 695 + }, + { + "epoch": 1.5478841870824054, + "loss": 0.9976003170013428, + "loss_ce": 0.0010183160193264484, + "loss_iou": 0.416015625, + "loss_num": 0.033203125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 38881164, + "step": 695 + }, + { + "epoch": 1.5501113585746102, + "grad_norm": 53.83267593383789, + "learning_rate": 1e-06, + "loss": 1.0186, + "num_input_tokens_seen": 38937620, + "step": 696 + }, + { + "epoch": 1.5501113585746102, + "loss": 1.0366376638412476, + "loss_ce": 0.0014814026653766632, + "loss_iou": 0.435546875, + "loss_num": 0.032958984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 38937620, + "step": 696 + }, + { + "epoch": 1.552338530066815, + "grad_norm": 26.581382751464844, + "learning_rate": 1e-06, + "loss": 0.9886, + "num_input_tokens_seen": 38990964, + "step": 697 + }, + { + "epoch": 1.552338530066815, + "loss": 1.0419948101043701, + "loss_ce": 0.0004909674171358347, + "loss_iou": 0.42578125, + "loss_num": 0.0380859375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 38990964, + "step": 697 + }, + { + "epoch": 1.55456570155902, + "grad_norm": 20.961688995361328, + "learning_rate": 1e-06, + "loss": 1.2405, + "num_input_tokens_seen": 39046004, + "step": 698 + }, + { + "epoch": 1.55456570155902, + "loss": 1.3255722522735596, + "loss_ce": 0.0023301024921238422, + "loss_iou": 0.57421875, + "loss_num": 0.035400390625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 39046004, + "step": 698 + }, + { + "epoch": 1.5567928730512248, + "grad_norm": 18.719867706298828, + "learning_rate": 1e-06, + "loss": 1.0962, + "num_input_tokens_seen": 39104440, + "step": 699 + }, + { + "epoch": 1.5567928730512248, + "loss": 0.9274605512619019, + "loss_ce": 0.0011909835739061236, + "loss_iou": 0.38671875, + "loss_num": 0.0311279296875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 39104440, + "step": 699 + }, + { + "epoch": 1.5590200445434297, + "grad_norm": 27.276357650756836, + "learning_rate": 1e-06, + "loss": 1.0064, + "num_input_tokens_seen": 39161992, + "step": 700 + }, + { + "epoch": 1.5590200445434297, + "loss": 0.9984862208366394, + "loss_ce": 0.0011718107853084803, + "loss_iou": 0.408203125, + "loss_num": 0.036376953125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 39161992, + "step": 700 + }, + { + "epoch": 1.5612472160356348, + "grad_norm": 17.313262939453125, + "learning_rate": 1e-06, + "loss": 1.0717, + "num_input_tokens_seen": 39216700, + "step": 701 + }, + { + "epoch": 1.5612472160356348, + "loss": 0.8593438863754272, + "loss_ce": 0.0034632175229489803, + "loss_iou": 0.35546875, + "loss_num": 0.0289306640625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 39216700, + "step": 701 + }, + { + "epoch": 1.5634743875278396, + "grad_norm": 26.77741241455078, + "learning_rate": 1e-06, + "loss": 0.9991, + "num_input_tokens_seen": 39273188, + "step": 702 + }, + { + "epoch": 1.5634743875278396, + "loss": 1.1402983665466309, + "loss_ce": 0.0006498623406514525, + "loss_iou": 0.48828125, + "loss_num": 0.032958984375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 39273188, + "step": 702 + }, + { + "epoch": 1.5657015590200447, + "grad_norm": 25.94588851928711, + "learning_rate": 1e-06, + "loss": 1.2285, + "num_input_tokens_seen": 39326596, + "step": 703 + }, + { + "epoch": 1.5657015590200447, + "loss": 1.107351541519165, + "loss_ce": 0.0016386474017053843, + "loss_iou": 0.392578125, + "loss_num": 0.064453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 39326596, + "step": 703 + }, + { + "epoch": 1.5679287305122496, + "grad_norm": 108.70765686035156, + "learning_rate": 1e-06, + "loss": 1.2865, + "num_input_tokens_seen": 39381292, + "step": 704 + }, + { + "epoch": 1.5679287305122496, + "loss": 1.2144091129302979, + "loss_ce": 0.0024950681254267693, + "loss_iou": 0.4765625, + "loss_num": 0.051513671875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 39381292, + "step": 704 + }, + { + "epoch": 1.5701559020044544, + "grad_norm": 27.831453323364258, + "learning_rate": 1e-06, + "loss": 1.1255, + "num_input_tokens_seen": 39438056, + "step": 705 + }, + { + "epoch": 1.5701559020044544, + "loss": 1.3021111488342285, + "loss_ce": 0.0015739843947812915, + "loss_iou": 0.5078125, + "loss_num": 0.056884765625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 39438056, + "step": 705 + }, + { + "epoch": 1.5723830734966593, + "grad_norm": 35.32048797607422, + "learning_rate": 1e-06, + "loss": 0.9482, + "num_input_tokens_seen": 39492676, + "step": 706 + }, + { + "epoch": 1.5723830734966593, + "loss": 0.9542930722236633, + "loss_ce": 0.0006797942915000021, + "loss_iou": 0.390625, + "loss_num": 0.03515625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 39492676, + "step": 706 + }, + { + "epoch": 1.5746102449888641, + "grad_norm": 65.93453216552734, + "learning_rate": 1e-06, + "loss": 1.0207, + "num_input_tokens_seen": 39551388, + "step": 707 + }, + { + "epoch": 1.5746102449888641, + "loss": 0.9525820016860962, + "loss_ce": 0.0006776798400096595, + "loss_iou": 0.421875, + "loss_num": 0.021484375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 39551388, + "step": 707 + }, + { + "epoch": 1.576837416481069, + "grad_norm": 14.823631286621094, + "learning_rate": 1e-06, + "loss": 1.1039, + "num_input_tokens_seen": 39607732, + "step": 708 + }, + { + "epoch": 1.576837416481069, + "loss": 0.9811808466911316, + "loss_ce": 0.00046793223009444773, + "loss_iou": 0.404296875, + "loss_num": 0.03466796875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 39607732, + "step": 708 + }, + { + "epoch": 1.5790645879732739, + "grad_norm": 33.79949188232422, + "learning_rate": 1e-06, + "loss": 1.0272, + "num_input_tokens_seen": 39664892, + "step": 709 + }, + { + "epoch": 1.5790645879732739, + "loss": 0.8985086679458618, + "loss_ce": 0.0005594115937128663, + "loss_iou": 0.392578125, + "loss_num": 0.0228271484375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 39664892, + "step": 709 + }, + { + "epoch": 1.5812917594654787, + "grad_norm": 44.01935958862305, + "learning_rate": 1e-06, + "loss": 1.2837, + "num_input_tokens_seen": 39720468, + "step": 710 + }, + { + "epoch": 1.5812917594654787, + "loss": 1.2381861209869385, + "loss_ce": 0.0008813057793304324, + "loss_iou": 0.435546875, + "loss_num": 0.0732421875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 39720468, + "step": 710 + }, + { + "epoch": 1.5835189309576836, + "grad_norm": 16.639955520629883, + "learning_rate": 1e-06, + "loss": 1.2093, + "num_input_tokens_seen": 39776680, + "step": 711 + }, + { + "epoch": 1.5835189309576836, + "loss": 1.147930383682251, + "loss_ce": 0.001445973408408463, + "loss_iou": 0.453125, + "loss_num": 0.04833984375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 39776680, + "step": 711 + }, + { + "epoch": 1.5857461024498887, + "grad_norm": 21.6805477142334, + "learning_rate": 1e-06, + "loss": 1.0205, + "num_input_tokens_seen": 39830636, + "step": 712 + }, + { + "epoch": 1.5857461024498887, + "loss": 1.0399500131607056, + "loss_ce": 0.0016199484234675765, + "loss_iou": 0.427734375, + "loss_num": 0.036376953125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 39830636, + "step": 712 + }, + { + "epoch": 1.5879732739420935, + "grad_norm": 50.981754302978516, + "learning_rate": 1e-06, + "loss": 1.0619, + "num_input_tokens_seen": 39887336, + "step": 713 + }, + { + "epoch": 1.5879732739420935, + "loss": 1.1285226345062256, + "loss_ce": 0.0005930241313762963, + "loss_iou": 0.43359375, + "loss_num": 0.052001953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 39887336, + "step": 713 + }, + { + "epoch": 1.5902004454342984, + "grad_norm": 1745.5362548828125, + "learning_rate": 1e-06, + "loss": 1.0216, + "num_input_tokens_seen": 39945364, + "step": 714 + }, + { + "epoch": 1.5902004454342984, + "loss": 0.8426003456115723, + "loss_ce": 0.0008034344646148384, + "loss_iou": 0.34375, + "loss_num": 0.031005859375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 39945364, + "step": 714 + }, + { + "epoch": 1.5924276169265035, + "grad_norm": 141.83999633789062, + "learning_rate": 1e-06, + "loss": 0.9186, + "num_input_tokens_seen": 39998372, + "step": 715 + }, + { + "epoch": 1.5924276169265035, + "loss": 0.9666658639907837, + "loss_ce": 0.00035729241790249944, + "loss_iou": 0.376953125, + "loss_num": 0.04296875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 39998372, + "step": 715 + }, + { + "epoch": 1.5946547884187083, + "grad_norm": 16.73200225830078, + "learning_rate": 1e-06, + "loss": 1.0519, + "num_input_tokens_seen": 40053772, + "step": 716 + }, + { + "epoch": 1.5946547884187083, + "loss": 1.3080520629882812, + "loss_ce": 0.005317714065313339, + "loss_iou": 0.55078125, + "loss_num": 0.040771484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 40053772, + "step": 716 + }, + { + "epoch": 1.5968819599109132, + "grad_norm": 19.21235466003418, + "learning_rate": 1e-06, + "loss": 0.903, + "num_input_tokens_seen": 40111152, + "step": 717 + }, + { + "epoch": 1.5968819599109132, + "loss": 0.8331512808799744, + "loss_ce": 0.00649112556129694, + "loss_iou": 0.341796875, + "loss_num": 0.0283203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 40111152, + "step": 717 + }, + { + "epoch": 1.599109131403118, + "grad_norm": 26.08073616027832, + "learning_rate": 1e-06, + "loss": 1.0622, + "num_input_tokens_seen": 40170092, + "step": 718 + }, + { + "epoch": 1.599109131403118, + "loss": 1.0961337089538574, + "loss_ce": 0.0006747127044945955, + "loss_iou": 0.43359375, + "loss_num": 0.045654296875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 40170092, + "step": 718 + }, + { + "epoch": 1.601336302895323, + "grad_norm": 38.96022033691406, + "learning_rate": 1e-06, + "loss": 1.0349, + "num_input_tokens_seen": 40222380, + "step": 719 + }, + { + "epoch": 1.601336302895323, + "loss": 1.0462639331817627, + "loss_ce": 0.00048747030086815357, + "loss_iou": 0.43359375, + "loss_num": 0.03564453125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 40222380, + "step": 719 + }, + { + "epoch": 1.6035634743875278, + "grad_norm": 15.093145370483398, + "learning_rate": 1e-06, + "loss": 1.0315, + "num_input_tokens_seen": 40279864, + "step": 720 + }, + { + "epoch": 1.6035634743875278, + "loss": 1.010891079902649, + "loss_ce": 0.0040551056154072285, + "loss_iou": 0.416015625, + "loss_num": 0.034912109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 40279864, + "step": 720 + }, + { + "epoch": 1.6057906458797326, + "grad_norm": 24.701709747314453, + "learning_rate": 1e-06, + "loss": 1.314, + "num_input_tokens_seen": 40334520, + "step": 721 + }, + { + "epoch": 1.6057906458797326, + "loss": 1.5099828243255615, + "loss_ce": 0.0011937202652916312, + "loss_iou": 0.5625, + "loss_num": 0.07763671875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 40334520, + "step": 721 + }, + { + "epoch": 1.6080178173719375, + "grad_norm": 18.33966064453125, + "learning_rate": 1e-06, + "loss": 1.0704, + "num_input_tokens_seen": 40389100, + "step": 722 + }, + { + "epoch": 1.6080178173719375, + "loss": 0.9851089715957642, + "loss_ce": 0.0004898360930383205, + "loss_iou": 0.43359375, + "loss_num": 0.0233154296875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 40389100, + "step": 722 + }, + { + "epoch": 1.6102449888641426, + "grad_norm": 51.41325759887695, + "learning_rate": 1e-06, + "loss": 0.8882, + "num_input_tokens_seen": 40446016, + "step": 723 + }, + { + "epoch": 1.6102449888641426, + "loss": 1.013216257095337, + "loss_ce": 0.0005209506489336491, + "loss_iou": 0.431640625, + "loss_num": 0.0296630859375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 40446016, + "step": 723 + }, + { + "epoch": 1.6124721603563474, + "grad_norm": 22.967365264892578, + "learning_rate": 1e-06, + "loss": 1.1492, + "num_input_tokens_seen": 40500448, + "step": 724 + }, + { + "epoch": 1.6124721603563474, + "loss": 1.168558120727539, + "loss_ce": 0.0005894272471778095, + "loss_iou": 0.4453125, + "loss_num": 0.0556640625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 40500448, + "step": 724 + }, + { + "epoch": 1.6146993318485523, + "grad_norm": 54.2728157043457, + "learning_rate": 1e-06, + "loss": 1.0244, + "num_input_tokens_seen": 40557300, + "step": 725 + }, + { + "epoch": 1.6146993318485523, + "loss": 0.8771815896034241, + "loss_ce": 0.0009608692489564419, + "loss_iou": 0.353515625, + "loss_num": 0.033935546875, + "loss_xval": 0.875, + "num_input_tokens_seen": 40557300, + "step": 725 + }, + { + "epoch": 1.6169265033407574, + "grad_norm": 55.31407928466797, + "learning_rate": 1e-06, + "loss": 1.0433, + "num_input_tokens_seen": 40613104, + "step": 726 + }, + { + "epoch": 1.6169265033407574, + "loss": 1.0284351110458374, + "loss_ce": 0.003044519107788801, + "loss_iou": 0.4140625, + "loss_num": 0.0390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 40613104, + "step": 726 + }, + { + "epoch": 1.6191536748329622, + "grad_norm": 24.382633209228516, + "learning_rate": 1e-06, + "loss": 0.9911, + "num_input_tokens_seen": 40667412, + "step": 727 + }, + { + "epoch": 1.6191536748329622, + "loss": 0.7859052419662476, + "loss_ce": 0.0005047998856753111, + "loss_iou": 0.33984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 40667412, + "step": 727 + }, + { + "epoch": 1.621380846325167, + "grad_norm": 22.375062942504883, + "learning_rate": 1e-06, + "loss": 0.8341, + "num_input_tokens_seen": 40724616, + "step": 728 + }, + { + "epoch": 1.621380846325167, + "loss": 0.7646250128746033, + "loss_ce": 0.0004648742906283587, + "loss_iou": 0.328125, + "loss_num": 0.021728515625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 40724616, + "step": 728 + }, + { + "epoch": 1.623608017817372, + "grad_norm": 15.496011734008789, + "learning_rate": 1e-06, + "loss": 1.1697, + "num_input_tokens_seen": 40780212, + "step": 729 + }, + { + "epoch": 1.623608017817372, + "loss": 1.1705524921417236, + "loss_ce": 0.0006306255236268044, + "loss_iou": 0.484375, + "loss_num": 0.04052734375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 40780212, + "step": 729 + }, + { + "epoch": 1.6258351893095768, + "grad_norm": 24.535715103149414, + "learning_rate": 1e-06, + "loss": 1.0096, + "num_input_tokens_seen": 40837012, + "step": 730 + }, + { + "epoch": 1.6258351893095768, + "loss": 0.9559507369995117, + "loss_ce": 0.0008726270170882344, + "loss_iou": 0.39453125, + "loss_num": 0.033203125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 40837012, + "step": 730 + }, + { + "epoch": 1.6280623608017817, + "grad_norm": 16.106016159057617, + "learning_rate": 1e-06, + "loss": 1.144, + "num_input_tokens_seen": 40891652, + "step": 731 + }, + { + "epoch": 1.6280623608017817, + "loss": 1.1018345355987549, + "loss_ce": 0.000516209052875638, + "loss_iou": 0.482421875, + "loss_num": 0.0274658203125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 40891652, + "step": 731 + }, + { + "epoch": 1.6302895322939865, + "grad_norm": 34.02077102661133, + "learning_rate": 1e-06, + "loss": 1.173, + "num_input_tokens_seen": 40947216, + "step": 732 + }, + { + "epoch": 1.6302895322939865, + "loss": 1.2337870597839355, + "loss_ce": 0.003562505356967449, + "loss_iou": 0.45703125, + "loss_num": 0.06298828125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 40947216, + "step": 732 + }, + { + "epoch": 1.6325167037861914, + "grad_norm": 23.39971351623535, + "learning_rate": 1e-06, + "loss": 0.7367, + "num_input_tokens_seen": 41005556, + "step": 733 + }, + { + "epoch": 1.6325167037861914, + "loss": 0.6531933546066284, + "loss_ce": 0.000361293728929013, + "loss_iou": 0.2578125, + "loss_num": 0.02783203125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 41005556, + "step": 733 + }, + { + "epoch": 1.6347438752783965, + "grad_norm": 15.164018630981445, + "learning_rate": 1e-06, + "loss": 1.2361, + "num_input_tokens_seen": 41060208, + "step": 734 + }, + { + "epoch": 1.6347438752783965, + "loss": 1.2857666015625, + "loss_ce": 0.001342757255770266, + "loss_iou": 0.484375, + "loss_num": 0.06298828125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 41060208, + "step": 734 + }, + { + "epoch": 1.6369710467706013, + "grad_norm": 24.00090980529785, + "learning_rate": 1e-06, + "loss": 1.0606, + "num_input_tokens_seen": 41116160, + "step": 735 + }, + { + "epoch": 1.6369710467706013, + "loss": 0.6998146772384644, + "loss_ce": 0.001450410927645862, + "loss_iou": 0.283203125, + "loss_num": 0.0262451171875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 41116160, + "step": 735 + }, + { + "epoch": 1.6391982182628062, + "grad_norm": 42.26320266723633, + "learning_rate": 1e-06, + "loss": 0.9948, + "num_input_tokens_seen": 41173264, + "step": 736 + }, + { + "epoch": 1.6391982182628062, + "loss": 0.8883851766586304, + "loss_ce": 0.0006898957653902471, + "loss_iou": 0.380859375, + "loss_num": 0.0252685546875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 41173264, + "step": 736 + }, + { + "epoch": 1.6414253897550113, + "grad_norm": 17.52292823791504, + "learning_rate": 1e-06, + "loss": 1.1069, + "num_input_tokens_seen": 41228928, + "step": 737 + }, + { + "epoch": 1.6414253897550113, + "loss": 1.1075830459594727, + "loss_ce": 0.001137720886617899, + "loss_iou": 0.427734375, + "loss_num": 0.050048828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 41228928, + "step": 737 + }, + { + "epoch": 1.6436525612472161, + "grad_norm": 17.622894287109375, + "learning_rate": 1e-06, + "loss": 1.1064, + "num_input_tokens_seen": 41285968, + "step": 738 + }, + { + "epoch": 1.6436525612472161, + "loss": 0.900823712348938, + "loss_ce": 0.0004331024829298258, + "loss_iou": 0.37890625, + "loss_num": 0.0281982421875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 41285968, + "step": 738 + }, + { + "epoch": 1.645879732739421, + "grad_norm": 40.00740051269531, + "learning_rate": 1e-06, + "loss": 0.9503, + "num_input_tokens_seen": 41341372, + "step": 739 + }, + { + "epoch": 1.645879732739421, + "loss": 0.6320021152496338, + "loss_ce": 0.0004103146493434906, + "loss_iou": 0.2734375, + "loss_num": 0.0167236328125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 41341372, + "step": 739 + }, + { + "epoch": 1.6481069042316259, + "grad_norm": 23.410932540893555, + "learning_rate": 1e-06, + "loss": 1.1302, + "num_input_tokens_seen": 41394872, + "step": 740 + }, + { + "epoch": 1.6481069042316259, + "loss": 1.1568620204925537, + "loss_ce": 0.001100304420106113, + "loss_iou": 0.43359375, + "loss_num": 0.05810546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 41394872, + "step": 740 + }, + { + "epoch": 1.6503340757238307, + "grad_norm": 15.308507919311523, + "learning_rate": 1e-06, + "loss": 1.0673, + "num_input_tokens_seen": 41451676, + "step": 741 + }, + { + "epoch": 1.6503340757238307, + "loss": 1.1007647514343262, + "loss_ce": 0.0033525261096656322, + "loss_iou": 0.416015625, + "loss_num": 0.052734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 41451676, + "step": 741 + }, + { + "epoch": 1.6525612472160356, + "grad_norm": 18.964244842529297, + "learning_rate": 1e-06, + "loss": 1.0727, + "num_input_tokens_seen": 41509472, + "step": 742 + }, + { + "epoch": 1.6525612472160356, + "loss": 0.9488592743873596, + "loss_ce": 0.001105397124774754, + "loss_iou": 0.404296875, + "loss_num": 0.028076171875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 41509472, + "step": 742 + }, + { + "epoch": 1.6547884187082404, + "grad_norm": 25.588586807250977, + "learning_rate": 1e-06, + "loss": 0.9794, + "num_input_tokens_seen": 41565372, + "step": 743 + }, + { + "epoch": 1.6547884187082404, + "loss": 0.8954076766967773, + "loss_ce": 0.00038817772292532027, + "loss_iou": 0.3125, + "loss_num": 0.054443359375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 41565372, + "step": 743 + }, + { + "epoch": 1.6570155902004453, + "grad_norm": 58.01374435424805, + "learning_rate": 1e-06, + "loss": 1.0396, + "num_input_tokens_seen": 41622096, + "step": 744 + }, + { + "epoch": 1.6570155902004453, + "loss": 1.068095088005066, + "loss_ce": 0.00046813933295197785, + "loss_iou": 0.44140625, + "loss_num": 0.036865234375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 41622096, + "step": 744 + }, + { + "epoch": 1.6592427616926502, + "grad_norm": 19.471540451049805, + "learning_rate": 1e-06, + "loss": 1.2403, + "num_input_tokens_seen": 41679236, + "step": 745 + }, + { + "epoch": 1.6592427616926502, + "loss": 1.3742785453796387, + "loss_ce": 0.00123158423230052, + "loss_iou": 0.5546875, + "loss_num": 0.05322265625, + "loss_xval": 1.375, + "num_input_tokens_seen": 41679236, + "step": 745 + }, + { + "epoch": 1.6614699331848553, + "grad_norm": 28.21709442138672, + "learning_rate": 1e-06, + "loss": 1.0587, + "num_input_tokens_seen": 41736156, + "step": 746 + }, + { + "epoch": 1.6614699331848553, + "loss": 0.78886479139328, + "loss_ce": 0.0005347341066226363, + "loss_iou": 0.33203125, + "loss_num": 0.02490234375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 41736156, + "step": 746 + }, + { + "epoch": 1.6636971046770601, + "grad_norm": 18.029945373535156, + "learning_rate": 1e-06, + "loss": 1.2296, + "num_input_tokens_seen": 41791268, + "step": 747 + }, + { + "epoch": 1.6636971046770601, + "loss": 1.125550389289856, + "loss_ce": 0.0007945015095174313, + "loss_iou": 0.4609375, + "loss_num": 0.041015625, + "loss_xval": 1.125, + "num_input_tokens_seen": 41791268, + "step": 747 + }, + { + "epoch": 1.6659242761692652, + "grad_norm": 19.177194595336914, + "learning_rate": 1e-06, + "loss": 1.143, + "num_input_tokens_seen": 41845948, + "step": 748 + }, + { + "epoch": 1.6659242761692652, + "loss": 1.0622222423553467, + "loss_ce": 0.0006987220258451998, + "loss_iou": 0.451171875, + "loss_num": 0.0322265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 41845948, + "step": 748 + }, + { + "epoch": 1.66815144766147, + "grad_norm": 18.510656356811523, + "learning_rate": 1e-06, + "loss": 0.9679, + "num_input_tokens_seen": 41901836, + "step": 749 + }, + { + "epoch": 1.66815144766147, + "loss": 1.2074395418167114, + "loss_ce": 0.0008966219611465931, + "loss_iou": 0.490234375, + "loss_num": 0.04541015625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 41901836, + "step": 749 + }, + { + "epoch": 1.670378619153675, + "grad_norm": 18.269397735595703, + "learning_rate": 1e-06, + "loss": 0.8143, + "num_input_tokens_seen": 41960764, + "step": 750 + }, + { + "epoch": 1.670378619153675, + "eval_seeclick_web_CIoU": 0.5411946177482605, + "eval_seeclick_web_GIoU": 0.5299433916807175, + "eval_seeclick_web_IoU": 0.560671478509903, + "eval_seeclick_web_MAE_all": 0.01778533821925521, + "eval_seeclick_web_MAE_h": 0.011779951397329569, + "eval_seeclick_web_MAE_w": 0.017373694106936455, + "eval_seeclick_web_MAE_x_boxes": 0.01111887488514185, + "eval_seeclick_web_MAE_y_boxes": 0.02299651806242764, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 1.0158196687698364, + "eval_seeclick_web_loss_ce": 0.00048441681428812444, + "eval_seeclick_web_loss_iou": 0.462646484375, + "eval_seeclick_web_loss_num": 0.014404296875, + "eval_seeclick_web_loss_xval": 0.9970703125, + "eval_seeclick_web_runtime": 17.3473, + "eval_seeclick_web_samples_per_second": 2.882, + "eval_seeclick_web_steps_per_second": 0.115, + "num_input_tokens_seen": 41960764, + "step": 750 + }, + { + "epoch": 1.670378619153675, + "eval_icons_CIoU": 0.3312116116285324, + "eval_icons_GIoU": 0.36676979064941406, + "eval_icons_IoU": 0.40202146768569946, + "eval_icons_MAE_all": 0.06723485328257084, + "eval_icons_MAE_h": 0.03537856135517359, + "eval_icons_MAE_w": 0.07818298228085041, + "eval_icons_MAE_x_boxes": 0.058233313262462616, + "eval_icons_MAE_y_boxes": 0.03735906444489956, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.6458972692489624, + "eval_icons_loss_ce": 0.0018369617173448205, + "eval_icons_loss_iou": 0.6297607421875, + "eval_icons_loss_num": 0.06496238708496094, + "eval_icons_loss_xval": 1.583984375, + "eval_icons_runtime": 17.556, + "eval_icons_samples_per_second": 2.848, + "eval_icons_steps_per_second": 0.114, + "num_input_tokens_seen": 41960764, + "step": 750 + }, + { + "epoch": 1.670378619153675, + "eval_screenspot_CIoU": 0.2800278961658478, + "eval_screenspot_GIoU": 0.3007667362689972, + "eval_screenspot_IoU": 0.36394914984703064, + "eval_screenspot_MAE_all": 0.08516304691632588, + "eval_screenspot_MAE_h": 0.04900899901986122, + "eval_screenspot_MAE_w": 0.08881760636965434, + "eval_screenspot_MAE_x_boxes": 0.11530703057845433, + "eval_screenspot_MAE_y_boxes": 0.05522619063655535, + "eval_screenspot_inside_bbox": 0.5808333357175192, + "eval_screenspot_loss": 1.8781194686889648, + "eval_screenspot_loss_ce": 0.00343730168727537, + "eval_screenspot_loss_iou": 0.73583984375, + "eval_screenspot_loss_num": 0.09470876057942708, + "eval_screenspot_loss_xval": 1.9451497395833333, + "eval_screenspot_runtime": 27.4038, + "eval_screenspot_samples_per_second": 3.248, + "eval_screenspot_steps_per_second": 0.109, + "num_input_tokens_seen": 41960764, + "step": 750 + }, + { + "epoch": 1.670378619153675, + "eval_compot_CIoU": 0.3159261643886566, + "eval_compot_GIoU": 0.3490176349878311, + "eval_compot_IoU": 0.380667582154274, + "eval_compot_MAE_all": 0.03044109046459198, + "eval_compot_MAE_h": 0.012972671538591385, + "eval_compot_MAE_w": 0.040612708777189255, + "eval_compot_MAE_x_boxes": 0.03857684042304754, + "eval_compot_MAE_y_boxes": 0.007390682585537434, + "eval_compot_inside_bbox": 0.5868055522441864, + "eval_compot_loss": 1.4768078327178955, + "eval_compot_loss_ce": 0.0005472496850416064, + "eval_compot_loss_iou": 0.6441650390625, + "eval_compot_loss_num": 0.027063369750976562, + "eval_compot_loss_xval": 1.423095703125, + "eval_compot_runtime": 17.7125, + "eval_compot_samples_per_second": 2.823, + "eval_compot_steps_per_second": 0.113, + "num_input_tokens_seen": 41960764, + "step": 750 + }, + { + "epoch": 1.670378619153675, + "eval_custom_ui_val_CIoU": 0.3929840202132861, + "eval_custom_ui_val_GIoU": 0.4221853729751375, + "eval_custom_ui_val_IoU": 0.45630496740341187, + "eval_custom_ui_val_MAE_all": 0.043078110449843936, + "eval_custom_ui_val_MAE_h": 0.025929110849069223, + "eval_custom_ui_val_MAE_w": 0.048152227161659136, + "eval_custom_ui_val_MAE_x_boxes": 0.04743420394758383, + "eval_custom_ui_val_MAE_y_boxes": 0.02334741482304202, + "eval_custom_ui_val_inside_bbox": 0.6608796318372091, + "eval_custom_ui_val_loss": 1.3928289413452148, + "eval_custom_ui_val_loss_ce": 0.0016487750755105582, + "eval_custom_ui_val_loss_iou": 0.5744357638888888, + "eval_custom_ui_val_loss_num": 0.0423272450764974, + "eval_custom_ui_val_loss_xval": 1.3607584635416667, + "eval_custom_ui_val_runtime": 56.9195, + "eval_custom_ui_val_samples_per_second": 4.656, + "eval_custom_ui_val_steps_per_second": 0.158, + "num_input_tokens_seen": 41960764, + "step": 750 + }, + { + "epoch": 1.670378619153675, + "loss": 1.1135873794555664, + "loss_ce": 0.0010384945198893547, + "loss_iou": 0.466796875, + "loss_num": 0.03515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 41960764, + "step": 750 + }, + { + "epoch": 1.6726057906458798, + "grad_norm": 22.51813507080078, + "learning_rate": 1e-06, + "loss": 1.0466, + "num_input_tokens_seen": 42015972, + "step": 751 + }, + { + "epoch": 1.6726057906458798, + "loss": 1.0174639225006104, + "loss_ce": 0.0008623974863439798, + "loss_iou": 0.419921875, + "loss_num": 0.034912109375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 42015972, + "step": 751 + }, + { + "epoch": 1.6748329621380846, + "grad_norm": 22.246463775634766, + "learning_rate": 1e-06, + "loss": 1.4559, + "num_input_tokens_seen": 42071584, + "step": 752 + }, + { + "epoch": 1.6748329621380846, + "loss": 1.3008112907409668, + "loss_ce": 0.001006676466204226, + "loss_iou": 0.4921875, + "loss_num": 0.06298828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 42071584, + "step": 752 + }, + { + "epoch": 1.6770601336302895, + "grad_norm": 25.3890380859375, + "learning_rate": 1e-06, + "loss": 1.0625, + "num_input_tokens_seen": 42129356, + "step": 753 + }, + { + "epoch": 1.6770601336302895, + "loss": 0.8968948721885681, + "loss_ce": 0.0004105077823624015, + "loss_iou": 0.37109375, + "loss_num": 0.0311279296875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 42129356, + "step": 753 + }, + { + "epoch": 1.6792873051224944, + "grad_norm": 19.59492301940918, + "learning_rate": 1e-06, + "loss": 0.9136, + "num_input_tokens_seen": 42186100, + "step": 754 + }, + { + "epoch": 1.6792873051224944, + "loss": 0.7982625961303711, + "loss_ce": 0.0004110607551410794, + "loss_iou": 0.33203125, + "loss_num": 0.0267333984375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 42186100, + "step": 754 + }, + { + "epoch": 1.6815144766146992, + "grad_norm": 24.47226905822754, + "learning_rate": 1e-06, + "loss": 1.1686, + "num_input_tokens_seen": 42240244, + "step": 755 + }, + { + "epoch": 1.6815144766146992, + "loss": 1.1603378057479858, + "loss_ce": 0.0004257457912899554, + "loss_iou": 0.470703125, + "loss_num": 0.043701171875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 42240244, + "step": 755 + }, + { + "epoch": 1.683741648106904, + "grad_norm": 23.5756893157959, + "learning_rate": 1e-06, + "loss": 1.1523, + "num_input_tokens_seen": 42299200, + "step": 756 + }, + { + "epoch": 1.683741648106904, + "loss": 0.9354584217071533, + "loss_ce": 0.00039978878339752555, + "loss_iou": 0.388671875, + "loss_num": 0.03125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 42299200, + "step": 756 + }, + { + "epoch": 1.6859688195991092, + "grad_norm": 29.683124542236328, + "learning_rate": 1e-06, + "loss": 1.2437, + "num_input_tokens_seen": 42355712, + "step": 757 + }, + { + "epoch": 1.6859688195991092, + "loss": 1.3413095474243164, + "loss_ce": 0.0007333762478083372, + "loss_iou": 0.55078125, + "loss_num": 0.046875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 42355712, + "step": 757 + }, + { + "epoch": 1.688195991091314, + "grad_norm": 15.45508861541748, + "learning_rate": 1e-06, + "loss": 0.7795, + "num_input_tokens_seen": 42413232, + "step": 758 + }, + { + "epoch": 1.688195991091314, + "loss": 0.9429343938827515, + "loss_ce": 0.0005515510565601289, + "loss_iou": 0.396484375, + "loss_num": 0.0299072265625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 42413232, + "step": 758 + }, + { + "epoch": 1.6904231625835189, + "grad_norm": 24.212417602539062, + "learning_rate": 1e-06, + "loss": 0.9237, + "num_input_tokens_seen": 42470224, + "step": 759 + }, + { + "epoch": 1.6904231625835189, + "loss": 1.025328516960144, + "loss_ce": 0.005308972671627998, + "loss_iou": 0.419921875, + "loss_num": 0.035888671875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 42470224, + "step": 759 + }, + { + "epoch": 1.692650334075724, + "grad_norm": 18.754596710205078, + "learning_rate": 1e-06, + "loss": 1.0195, + "num_input_tokens_seen": 42525456, + "step": 760 + }, + { + "epoch": 1.692650334075724, + "loss": 1.1706150770187378, + "loss_ce": 0.0036228555254638195, + "loss_iou": 0.4921875, + "loss_num": 0.036376953125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 42525456, + "step": 760 + }, + { + "epoch": 1.6948775055679288, + "grad_norm": 19.588882446289062, + "learning_rate": 1e-06, + "loss": 0.695, + "num_input_tokens_seen": 42582136, + "step": 761 + }, + { + "epoch": 1.6948775055679288, + "loss": 0.852080225944519, + "loss_ce": 0.0005177696002647281, + "loss_iou": 0.376953125, + "loss_num": 0.019287109375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 42582136, + "step": 761 + }, + { + "epoch": 1.6971046770601337, + "grad_norm": 32.498355865478516, + "learning_rate": 1e-06, + "loss": 1.1963, + "num_input_tokens_seen": 42637868, + "step": 762 + }, + { + "epoch": 1.6971046770601337, + "loss": 1.1873914003372192, + "loss_ce": 0.0008679982274770737, + "loss_iou": 0.484375, + "loss_num": 0.04345703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 42637868, + "step": 762 + }, + { + "epoch": 1.6993318485523385, + "grad_norm": 24.11650848388672, + "learning_rate": 1e-06, + "loss": 1.1282, + "num_input_tokens_seen": 42689848, + "step": 763 + }, + { + "epoch": 1.6993318485523385, + "loss": 1.3757023811340332, + "loss_ce": 0.0007023118087090552, + "loss_iou": 0.578125, + "loss_num": 0.04443359375, + "loss_xval": 1.375, + "num_input_tokens_seen": 42689848, + "step": 763 + }, + { + "epoch": 1.7015590200445434, + "grad_norm": 17.393308639526367, + "learning_rate": 1e-06, + "loss": 1.0435, + "num_input_tokens_seen": 42745660, + "step": 764 + }, + { + "epoch": 1.7015590200445434, + "loss": 0.8240088224411011, + "loss_ce": 0.0014990322524681687, + "loss_iou": 0.34375, + "loss_num": 0.02685546875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 42745660, + "step": 764 + }, + { + "epoch": 1.7037861915367483, + "grad_norm": 26.520469665527344, + "learning_rate": 1e-06, + "loss": 1.0743, + "num_input_tokens_seen": 42802272, + "step": 765 + }, + { + "epoch": 1.7037861915367483, + "loss": 1.2456159591674805, + "loss_ce": 0.0019635630305856466, + "loss_iou": 0.5, + "loss_num": 0.048095703125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 42802272, + "step": 765 + }, + { + "epoch": 1.7060133630289531, + "grad_norm": 34.5315055847168, + "learning_rate": 1e-06, + "loss": 1.1088, + "num_input_tokens_seen": 42859164, + "step": 766 + }, + { + "epoch": 1.7060133630289531, + "loss": 1.120169758796692, + "loss_ce": 0.0015174560248851776, + "loss_iou": 0.47265625, + "loss_num": 0.03466796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 42859164, + "step": 766 + }, + { + "epoch": 1.708240534521158, + "grad_norm": 28.206703186035156, + "learning_rate": 1e-06, + "loss": 1.1139, + "num_input_tokens_seen": 42911928, + "step": 767 + }, + { + "epoch": 1.708240534521158, + "loss": 1.1372349262237549, + "loss_ce": 0.0010045571252703667, + "loss_iou": 0.4765625, + "loss_num": 0.03662109375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 42911928, + "step": 767 + }, + { + "epoch": 1.710467706013363, + "grad_norm": 44.07477951049805, + "learning_rate": 1e-06, + "loss": 0.6435, + "num_input_tokens_seen": 42970112, + "step": 768 + }, + { + "epoch": 1.710467706013363, + "loss": 0.7950579524040222, + "loss_ce": 0.0003801731509156525, + "loss_iou": 0.318359375, + "loss_num": 0.031494140625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 42970112, + "step": 768 + }, + { + "epoch": 1.712694877505568, + "grad_norm": 39.26905059814453, + "learning_rate": 1e-06, + "loss": 1.1469, + "num_input_tokens_seen": 43025648, + "step": 769 + }, + { + "epoch": 1.712694877505568, + "loss": 1.2926554679870605, + "loss_ce": 0.0006633971352130175, + "loss_iou": 0.50390625, + "loss_num": 0.05712890625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 43025648, + "step": 769 + }, + { + "epoch": 1.7149220489977728, + "grad_norm": 39.54010009765625, + "learning_rate": 1e-06, + "loss": 1.1173, + "num_input_tokens_seen": 43083808, + "step": 770 + }, + { + "epoch": 1.7149220489977728, + "loss": 1.2310657501220703, + "loss_ce": 0.0010852674022316933, + "loss_iou": 0.482421875, + "loss_num": 0.052978515625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 43083808, + "step": 770 + }, + { + "epoch": 1.7171492204899779, + "grad_norm": 19.132938385009766, + "learning_rate": 1e-06, + "loss": 1.06, + "num_input_tokens_seen": 43142508, + "step": 771 + }, + { + "epoch": 1.7171492204899779, + "loss": 0.9760845303535461, + "loss_ce": 0.0004985497798770666, + "loss_iou": 0.412109375, + "loss_num": 0.0308837890625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 43142508, + "step": 771 + }, + { + "epoch": 1.7193763919821827, + "grad_norm": 23.097333908081055, + "learning_rate": 1e-06, + "loss": 1.0792, + "num_input_tokens_seen": 43196132, + "step": 772 + }, + { + "epoch": 1.7193763919821827, + "loss": 0.7085492014884949, + "loss_ce": 0.0005413593607954681, + "loss_iou": 0.28515625, + "loss_num": 0.027099609375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 43196132, + "step": 772 + }, + { + "epoch": 1.7216035634743876, + "grad_norm": 30.94361686706543, + "learning_rate": 1e-06, + "loss": 1.0867, + "num_input_tokens_seen": 43253588, + "step": 773 + }, + { + "epoch": 1.7216035634743876, + "loss": 0.9410488605499268, + "loss_ce": 0.0011074627982452512, + "loss_iou": 0.3828125, + "loss_num": 0.034912109375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 43253588, + "step": 773 + }, + { + "epoch": 1.7238307349665924, + "grad_norm": 23.98365592956543, + "learning_rate": 1e-06, + "loss": 1.3351, + "num_input_tokens_seen": 43308080, + "step": 774 + }, + { + "epoch": 1.7238307349665924, + "loss": 1.3561934232711792, + "loss_ce": 0.0007246616296470165, + "loss_iou": 0.515625, + "loss_num": 0.0654296875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 43308080, + "step": 774 + }, + { + "epoch": 1.7260579064587973, + "grad_norm": 21.451553344726562, + "learning_rate": 1e-06, + "loss": 0.835, + "num_input_tokens_seen": 43365784, + "step": 775 + }, + { + "epoch": 1.7260579064587973, + "loss": 0.7267694473266602, + "loss_ce": 0.0019159411313012242, + "loss_iou": 0.306640625, + "loss_num": 0.0224609375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 43365784, + "step": 775 + }, + { + "epoch": 1.7282850779510022, + "grad_norm": 32.42961502075195, + "learning_rate": 1e-06, + "loss": 0.7633, + "num_input_tokens_seen": 43420724, + "step": 776 + }, + { + "epoch": 1.7282850779510022, + "loss": 0.8495993614196777, + "loss_ce": 0.0009665663237683475, + "loss_iou": 0.345703125, + "loss_num": 0.031494140625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 43420724, + "step": 776 + }, + { + "epoch": 1.730512249443207, + "grad_norm": 12.944496154785156, + "learning_rate": 1e-06, + "loss": 1.1678, + "num_input_tokens_seen": 43478128, + "step": 777 + }, + { + "epoch": 1.730512249443207, + "loss": 1.2062785625457764, + "loss_ce": 0.0004680473357439041, + "loss_iou": 0.5, + "loss_num": 0.041259765625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 43478128, + "step": 777 + }, + { + "epoch": 1.732739420935412, + "grad_norm": 24.311969757080078, + "learning_rate": 1e-06, + "loss": 1.3848, + "num_input_tokens_seen": 43535104, + "step": 778 + }, + { + "epoch": 1.732739420935412, + "loss": 1.19753098487854, + "loss_ce": 0.0007536107441410422, + "loss_iou": 0.486328125, + "loss_num": 0.04443359375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 43535104, + "step": 778 + }, + { + "epoch": 1.734966592427617, + "grad_norm": 24.397178649902344, + "learning_rate": 1e-06, + "loss": 1.0169, + "num_input_tokens_seen": 43592232, + "step": 779 + }, + { + "epoch": 1.734966592427617, + "loss": 1.0153961181640625, + "loss_ce": 0.003433296922594309, + "loss_iou": 0.40625, + "loss_num": 0.040283203125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 43592232, + "step": 779 + }, + { + "epoch": 1.7371937639198218, + "grad_norm": 18.340551376342773, + "learning_rate": 1e-06, + "loss": 1.0615, + "num_input_tokens_seen": 43645688, + "step": 780 + }, + { + "epoch": 1.7371937639198218, + "loss": 1.023805856704712, + "loss_ce": 0.0007346307393163443, + "loss_iou": 0.404296875, + "loss_num": 0.04248046875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 43645688, + "step": 780 + }, + { + "epoch": 1.7394209354120267, + "grad_norm": 16.935611724853516, + "learning_rate": 1e-06, + "loss": 1.0056, + "num_input_tokens_seen": 43702564, + "step": 781 + }, + { + "epoch": 1.7394209354120267, + "loss": 0.7179310321807861, + "loss_ce": 0.002354835858568549, + "loss_iou": 0.291015625, + "loss_num": 0.026611328125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 43702564, + "step": 781 + }, + { + "epoch": 1.7416481069042318, + "grad_norm": 22.93832015991211, + "learning_rate": 1e-06, + "loss": 0.9551, + "num_input_tokens_seen": 43760636, + "step": 782 + }, + { + "epoch": 1.7416481069042318, + "loss": 1.01814603805542, + "loss_ce": 0.0010561385424807668, + "loss_iou": 0.423828125, + "loss_num": 0.033935546875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 43760636, + "step": 782 + }, + { + "epoch": 1.7438752783964366, + "grad_norm": 26.112279891967773, + "learning_rate": 1e-06, + "loss": 1.104, + "num_input_tokens_seen": 43818316, + "step": 783 + }, + { + "epoch": 1.7438752783964366, + "loss": 0.8660245537757874, + "loss_ce": 0.0020108623430132866, + "loss_iou": 0.37109375, + "loss_num": 0.0244140625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 43818316, + "step": 783 + }, + { + "epoch": 1.7461024498886415, + "grad_norm": 20.74223518371582, + "learning_rate": 1e-06, + "loss": 0.9333, + "num_input_tokens_seen": 43874416, + "step": 784 + }, + { + "epoch": 1.7461024498886415, + "loss": 1.0379929542541504, + "loss_ce": 0.0003951968683395535, + "loss_iou": 0.42578125, + "loss_num": 0.037109375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 43874416, + "step": 784 + }, + { + "epoch": 1.7483296213808464, + "grad_norm": 16.81831169128418, + "learning_rate": 1e-06, + "loss": 0.8192, + "num_input_tokens_seen": 43929028, + "step": 785 + }, + { + "epoch": 1.7483296213808464, + "loss": 0.7447642087936401, + "loss_ce": 0.0003794525982812047, + "loss_iou": 0.322265625, + "loss_num": 0.020263671875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 43929028, + "step": 785 + }, + { + "epoch": 1.7505567928730512, + "grad_norm": 15.588628768920898, + "learning_rate": 1e-06, + "loss": 1.0575, + "num_input_tokens_seen": 43985228, + "step": 786 + }, + { + "epoch": 1.7505567928730512, + "loss": 0.9384199380874634, + "loss_ce": 0.0004316343110986054, + "loss_iou": 0.376953125, + "loss_num": 0.03662109375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 43985228, + "step": 786 + }, + { + "epoch": 1.752783964365256, + "grad_norm": 47.489871978759766, + "learning_rate": 1e-06, + "loss": 0.8388, + "num_input_tokens_seen": 44042064, + "step": 787 + }, + { + "epoch": 1.752783964365256, + "loss": 0.9427950382232666, + "loss_ce": 0.0006563607603311539, + "loss_iou": 0.3828125, + "loss_num": 0.034912109375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 44042064, + "step": 787 + }, + { + "epoch": 1.755011135857461, + "grad_norm": 23.84717559814453, + "learning_rate": 1e-06, + "loss": 1.1627, + "num_input_tokens_seen": 44099536, + "step": 788 + }, + { + "epoch": 1.755011135857461, + "loss": 1.0481215715408325, + "loss_ce": 0.0007583041442558169, + "loss_iou": 0.42578125, + "loss_num": 0.038818359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 44099536, + "step": 788 + }, + { + "epoch": 1.7572383073496658, + "grad_norm": 15.595168113708496, + "learning_rate": 1e-06, + "loss": 0.9104, + "num_input_tokens_seen": 44156140, + "step": 789 + }, + { + "epoch": 1.7572383073496658, + "loss": 0.9101336002349854, + "loss_ce": 0.0004656048258766532, + "loss_iou": 0.369140625, + "loss_num": 0.034912109375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 44156140, + "step": 789 + }, + { + "epoch": 1.7594654788418709, + "grad_norm": 19.3514404296875, + "learning_rate": 1e-06, + "loss": 1.3284, + "num_input_tokens_seen": 44212504, + "step": 790 + }, + { + "epoch": 1.7594654788418709, + "loss": 1.7789078950881958, + "loss_ce": 0.0010758922435343266, + "loss_iou": 0.6953125, + "loss_num": 0.07861328125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 44212504, + "step": 790 + }, + { + "epoch": 1.7616926503340757, + "grad_norm": 20.171802520751953, + "learning_rate": 1e-06, + "loss": 0.8222, + "num_input_tokens_seen": 44268412, + "step": 791 + }, + { + "epoch": 1.7616926503340757, + "loss": 0.7054498195648193, + "loss_ce": 0.00037169185816310346, + "loss_iou": 0.283203125, + "loss_num": 0.02783203125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 44268412, + "step": 791 + }, + { + "epoch": 1.7639198218262806, + "grad_norm": 26.683834075927734, + "learning_rate": 1e-06, + "loss": 0.9773, + "num_input_tokens_seen": 44326872, + "step": 792 + }, + { + "epoch": 1.7639198218262806, + "loss": 1.1897196769714355, + "loss_ce": 0.0007548188441433012, + "loss_iou": 0.47265625, + "loss_num": 0.048583984375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 44326872, + "step": 792 + }, + { + "epoch": 1.7661469933184857, + "grad_norm": 32.8150749206543, + "learning_rate": 1e-06, + "loss": 0.9811, + "num_input_tokens_seen": 44380344, + "step": 793 + }, + { + "epoch": 1.7661469933184857, + "loss": 1.1400426626205444, + "loss_ce": 0.0006384018342941999, + "loss_iou": 0.462890625, + "loss_num": 0.042724609375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 44380344, + "step": 793 + }, + { + "epoch": 1.7683741648106905, + "grad_norm": 117.90059661865234, + "learning_rate": 1e-06, + "loss": 1.0223, + "num_input_tokens_seen": 44435364, + "step": 794 + }, + { + "epoch": 1.7683741648106905, + "loss": 0.6915375590324402, + "loss_ce": 0.0008637503487989306, + "loss_iou": 0.279296875, + "loss_num": 0.02685546875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 44435364, + "step": 794 + }, + { + "epoch": 1.7706013363028954, + "grad_norm": 20.04206085205078, + "learning_rate": 1e-06, + "loss": 0.6872, + "num_input_tokens_seen": 44492192, + "step": 795 + }, + { + "epoch": 1.7706013363028954, + "loss": 0.6449465751647949, + "loss_ce": 0.00041528072324581444, + "loss_iou": 0.267578125, + "loss_num": 0.022216796875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 44492192, + "step": 795 + }, + { + "epoch": 1.7728285077951003, + "grad_norm": 17.540992736816406, + "learning_rate": 1e-06, + "loss": 0.8579, + "num_input_tokens_seen": 44548236, + "step": 796 + }, + { + "epoch": 1.7728285077951003, + "loss": 0.8379471302032471, + "loss_ce": 0.0005447610164992511, + "loss_iou": 0.33984375, + "loss_num": 0.031494140625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 44548236, + "step": 796 + }, + { + "epoch": 1.7750556792873051, + "grad_norm": 21.848678588867188, + "learning_rate": 1e-06, + "loss": 0.9379, + "num_input_tokens_seen": 44603416, + "step": 797 + }, + { + "epoch": 1.7750556792873051, + "loss": 1.0255460739135742, + "loss_ce": 0.0005215964047238231, + "loss_iou": 0.416015625, + "loss_num": 0.038818359375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 44603416, + "step": 797 + }, + { + "epoch": 1.77728285077951, + "grad_norm": 17.641189575195312, + "learning_rate": 1e-06, + "loss": 1.009, + "num_input_tokens_seen": 44659364, + "step": 798 + }, + { + "epoch": 1.77728285077951, + "loss": 1.0162487030029297, + "loss_ce": 0.0006236857152543962, + "loss_iou": 0.431640625, + "loss_num": 0.0301513671875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 44659364, + "step": 798 + }, + { + "epoch": 1.7795100222717148, + "grad_norm": 32.69601058959961, + "learning_rate": 1e-06, + "loss": 1.0617, + "num_input_tokens_seen": 44716108, + "step": 799 + }, + { + "epoch": 1.7795100222717148, + "loss": 1.2807564735412598, + "loss_ce": 0.0009712378960102797, + "loss_iou": 0.484375, + "loss_num": 0.062255859375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 44716108, + "step": 799 + }, + { + "epoch": 1.7817371937639197, + "grad_norm": 18.248733520507812, + "learning_rate": 1e-06, + "loss": 0.9217, + "num_input_tokens_seen": 44775240, + "step": 800 + }, + { + "epoch": 1.7817371937639197, + "loss": 0.9542760252952576, + "loss_ce": 0.0026158532127738, + "loss_iou": 0.392578125, + "loss_num": 0.03369140625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 44775240, + "step": 800 + }, + { + "epoch": 1.7839643652561246, + "grad_norm": 31.044193267822266, + "learning_rate": 1e-06, + "loss": 1.0257, + "num_input_tokens_seen": 44830904, + "step": 801 + }, + { + "epoch": 1.7839643652561246, + "loss": 0.9924131035804749, + "loss_ce": 0.000713902700226754, + "loss_iou": 0.365234375, + "loss_num": 0.0517578125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 44830904, + "step": 801 + }, + { + "epoch": 1.7861915367483296, + "grad_norm": 16.06730842590332, + "learning_rate": 1e-06, + "loss": 1.0, + "num_input_tokens_seen": 44885496, + "step": 802 + }, + { + "epoch": 1.7861915367483296, + "loss": 1.0914322137832642, + "loss_ce": 0.0006118253222666681, + "loss_iou": 0.4453125, + "loss_num": 0.0400390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 44885496, + "step": 802 + }, + { + "epoch": 1.7884187082405345, + "grad_norm": 18.782161712646484, + "learning_rate": 1e-06, + "loss": 1.0227, + "num_input_tokens_seen": 44939520, + "step": 803 + }, + { + "epoch": 1.7884187082405345, + "loss": 1.2903541326522827, + "loss_ce": 0.0008032987243495882, + "loss_iou": 0.53125, + "loss_num": 0.045166015625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 44939520, + "step": 803 + }, + { + "epoch": 1.7906458797327396, + "grad_norm": 14.930334091186523, + "learning_rate": 1e-06, + "loss": 0.9842, + "num_input_tokens_seen": 44994976, + "step": 804 + }, + { + "epoch": 1.7906458797327396, + "loss": 0.6712737679481506, + "loss_ce": 0.0028167327400296926, + "loss_iou": 0.27734375, + "loss_num": 0.0223388671875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 44994976, + "step": 804 + }, + { + "epoch": 1.7928730512249444, + "grad_norm": 21.465778350830078, + "learning_rate": 1e-06, + "loss": 1.161, + "num_input_tokens_seen": 45053556, + "step": 805 + }, + { + "epoch": 1.7928730512249444, + "loss": 1.2845568656921387, + "loss_ce": 0.0006212838925421238, + "loss_iou": 0.486328125, + "loss_num": 0.062255859375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 45053556, + "step": 805 + }, + { + "epoch": 1.7951002227171493, + "grad_norm": 20.920679092407227, + "learning_rate": 1e-06, + "loss": 1.1158, + "num_input_tokens_seen": 45110760, + "step": 806 + }, + { + "epoch": 1.7951002227171493, + "loss": 1.0931113958358765, + "loss_ce": 0.000582136504817754, + "loss_iou": 0.44140625, + "loss_num": 0.04150390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 45110760, + "step": 806 + }, + { + "epoch": 1.7973273942093542, + "grad_norm": 29.755828857421875, + "learning_rate": 1e-06, + "loss": 1.2162, + "num_input_tokens_seen": 45166492, + "step": 807 + }, + { + "epoch": 1.7973273942093542, + "loss": 1.4112908840179443, + "loss_ce": 0.0006463178433477879, + "loss_iou": 0.5703125, + "loss_num": 0.0537109375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 45166492, + "step": 807 + }, + { + "epoch": 1.799554565701559, + "grad_norm": 25.334617614746094, + "learning_rate": 1e-06, + "loss": 1.0345, + "num_input_tokens_seen": 45222840, + "step": 808 + }, + { + "epoch": 1.799554565701559, + "loss": 0.8354628086090088, + "loss_ce": 0.0005018864176236093, + "loss_iou": 0.32421875, + "loss_num": 0.03759765625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 45222840, + "step": 808 + }, + { + "epoch": 1.8017817371937639, + "grad_norm": 12.594086647033691, + "learning_rate": 1e-06, + "loss": 1.0409, + "num_input_tokens_seen": 45278916, + "step": 809 + }, + { + "epoch": 1.8017817371937639, + "loss": 0.8014848232269287, + "loss_ce": 0.0004594190395437181, + "loss_iou": 0.326171875, + "loss_num": 0.029541015625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 45278916, + "step": 809 + }, + { + "epoch": 1.8040089086859687, + "grad_norm": 19.810697555541992, + "learning_rate": 1e-06, + "loss": 0.8261, + "num_input_tokens_seen": 45331148, + "step": 810 + }, + { + "epoch": 1.8040089086859687, + "loss": 0.7200347185134888, + "loss_ce": 0.0005523251602426171, + "loss_iou": 0.279296875, + "loss_num": 0.031982421875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 45331148, + "step": 810 + }, + { + "epoch": 1.8062360801781736, + "grad_norm": 15.187623023986816, + "learning_rate": 1e-06, + "loss": 0.9049, + "num_input_tokens_seen": 45385580, + "step": 811 + }, + { + "epoch": 1.8062360801781736, + "loss": 0.8386489748954773, + "loss_ce": 0.001979056978598237, + "loss_iou": 0.322265625, + "loss_num": 0.03857421875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 45385580, + "step": 811 + }, + { + "epoch": 1.8084632516703785, + "grad_norm": 30.983192443847656, + "learning_rate": 1e-06, + "loss": 1.2259, + "num_input_tokens_seen": 45440584, + "step": 812 + }, + { + "epoch": 1.8084632516703785, + "loss": 0.9607589244842529, + "loss_ce": 0.001286255195736885, + "loss_iou": 0.41015625, + "loss_num": 0.0281982421875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 45440584, + "step": 812 + }, + { + "epoch": 1.8106904231625836, + "grad_norm": 16.581043243408203, + "learning_rate": 1e-06, + "loss": 0.9445, + "num_input_tokens_seen": 45493384, + "step": 813 + }, + { + "epoch": 1.8106904231625836, + "loss": 0.8210749626159668, + "loss_ce": 0.0007623857818543911, + "loss_iou": 0.34765625, + "loss_num": 0.0252685546875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 45493384, + "step": 813 + }, + { + "epoch": 1.8129175946547884, + "grad_norm": 20.215229034423828, + "learning_rate": 1e-06, + "loss": 1.0194, + "num_input_tokens_seen": 45548060, + "step": 814 + }, + { + "epoch": 1.8129175946547884, + "loss": 1.2104692459106445, + "loss_ce": 0.0029496951028704643, + "loss_iou": 0.46484375, + "loss_num": 0.055908203125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 45548060, + "step": 814 + }, + { + "epoch": 1.8151447661469933, + "grad_norm": 17.977659225463867, + "learning_rate": 1e-06, + "loss": 0.9403, + "num_input_tokens_seen": 45605948, + "step": 815 + }, + { + "epoch": 1.8151447661469933, + "loss": 0.7051939964294434, + "loss_ce": 0.0003600172349251807, + "loss_iou": 0.302734375, + "loss_num": 0.019775390625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 45605948, + "step": 815 + }, + { + "epoch": 1.8173719376391984, + "grad_norm": 21.293136596679688, + "learning_rate": 1e-06, + "loss": 0.9601, + "num_input_tokens_seen": 45660788, + "step": 816 + }, + { + "epoch": 1.8173719376391984, + "loss": 0.7573980093002319, + "loss_ce": 0.0005621028249152005, + "loss_iou": 0.326171875, + "loss_num": 0.020751953125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 45660788, + "step": 816 + }, + { + "epoch": 1.8195991091314032, + "grad_norm": 31.912277221679688, + "learning_rate": 1e-06, + "loss": 1.2548, + "num_input_tokens_seen": 45713720, + "step": 817 + }, + { + "epoch": 1.8195991091314032, + "loss": 1.1719120740890503, + "loss_ce": 0.012366149574518204, + "loss_iou": 0.490234375, + "loss_num": 0.03564453125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 45713720, + "step": 817 + }, + { + "epoch": 1.821826280623608, + "grad_norm": 43.602935791015625, + "learning_rate": 1e-06, + "loss": 1.0844, + "num_input_tokens_seen": 45769628, + "step": 818 + }, + { + "epoch": 1.821826280623608, + "loss": 1.0855807065963745, + "loss_ce": 0.0006197973270900548, + "loss_iou": 0.451171875, + "loss_num": 0.036865234375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 45769628, + "step": 818 + }, + { + "epoch": 1.824053452115813, + "grad_norm": 23.616613388061523, + "learning_rate": 1e-06, + "loss": 1.1116, + "num_input_tokens_seen": 45827072, + "step": 819 + }, + { + "epoch": 1.824053452115813, + "loss": 1.052821159362793, + "loss_ce": 0.0005750858690589666, + "loss_iou": 0.4375, + "loss_num": 0.03515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 45827072, + "step": 819 + }, + { + "epoch": 1.8262806236080178, + "grad_norm": 24.936237335205078, + "learning_rate": 1e-06, + "loss": 1.1643, + "num_input_tokens_seen": 45882452, + "step": 820 + }, + { + "epoch": 1.8262806236080178, + "loss": 1.1679127216339111, + "loss_ce": 0.00043235800694674253, + "loss_iou": 0.466796875, + "loss_num": 0.046630859375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 45882452, + "step": 820 + }, + { + "epoch": 1.8285077951002227, + "grad_norm": 27.94110107421875, + "learning_rate": 1e-06, + "loss": 1.0027, + "num_input_tokens_seen": 45937344, + "step": 821 + }, + { + "epoch": 1.8285077951002227, + "loss": 1.220947265625, + "loss_ce": 0.0007323599420487881, + "loss_iou": 0.46875, + "loss_num": 0.056396484375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 45937344, + "step": 821 + }, + { + "epoch": 1.8307349665924275, + "grad_norm": 19.486064910888672, + "learning_rate": 1e-06, + "loss": 0.9696, + "num_input_tokens_seen": 45994040, + "step": 822 + }, + { + "epoch": 1.8307349665924275, + "loss": 0.7867767214775085, + "loss_ce": 0.0003997594176325947, + "loss_iou": 0.32421875, + "loss_num": 0.028076171875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 45994040, + "step": 822 + }, + { + "epoch": 1.8329621380846324, + "grad_norm": 33.13214111328125, + "learning_rate": 1e-06, + "loss": 1.2236, + "num_input_tokens_seen": 46049792, + "step": 823 + }, + { + "epoch": 1.8329621380846324, + "loss": 1.2174769639968872, + "loss_ce": 0.0011683637276291847, + "loss_iou": 0.486328125, + "loss_num": 0.049072265625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 46049792, + "step": 823 + }, + { + "epoch": 1.8351893095768375, + "grad_norm": 22.1630802154541, + "learning_rate": 1e-06, + "loss": 1.0282, + "num_input_tokens_seen": 46106908, + "step": 824 + }, + { + "epoch": 1.8351893095768375, + "loss": 1.025941252708435, + "loss_ce": 0.0005506377201527357, + "loss_iou": 0.4453125, + "loss_num": 0.02685546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 46106908, + "step": 824 + }, + { + "epoch": 1.8374164810690423, + "grad_norm": 18.732593536376953, + "learning_rate": 1e-06, + "loss": 1.1442, + "num_input_tokens_seen": 46161732, + "step": 825 + }, + { + "epoch": 1.8374164810690423, + "loss": 1.1100322008132935, + "loss_ce": 0.0004130484303459525, + "loss_iou": 0.4453125, + "loss_num": 0.044189453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 46161732, + "step": 825 + }, + { + "epoch": 1.8396436525612472, + "grad_norm": 16.942766189575195, + "learning_rate": 1e-06, + "loss": 1.1521, + "num_input_tokens_seen": 46217944, + "step": 826 + }, + { + "epoch": 1.8396436525612472, + "loss": 0.9271703362464905, + "loss_ce": 0.0018773877527564764, + "loss_iou": 0.3828125, + "loss_num": 0.031982421875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 46217944, + "step": 826 + }, + { + "epoch": 1.8418708240534523, + "grad_norm": 19.92278289794922, + "learning_rate": 1e-06, + "loss": 1.1781, + "num_input_tokens_seen": 46274360, + "step": 827 + }, + { + "epoch": 1.8418708240534523, + "loss": 1.132466197013855, + "loss_ce": 0.0008743547950871289, + "loss_iou": 0.443359375, + "loss_num": 0.04931640625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 46274360, + "step": 827 + }, + { + "epoch": 1.8440979955456571, + "grad_norm": 27.525070190429688, + "learning_rate": 1e-06, + "loss": 0.9065, + "num_input_tokens_seen": 46331236, + "step": 828 + }, + { + "epoch": 1.8440979955456571, + "loss": 0.9962807893753052, + "loss_ce": 0.000431208114605397, + "loss_iou": 0.412109375, + "loss_num": 0.034423828125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 46331236, + "step": 828 + }, + { + "epoch": 1.846325167037862, + "grad_norm": 19.961835861206055, + "learning_rate": 1e-06, + "loss": 1.0033, + "num_input_tokens_seen": 46387876, + "step": 829 + }, + { + "epoch": 1.846325167037862, + "loss": 0.8641173243522644, + "loss_ce": 0.0008360765059478581, + "loss_iou": 0.376953125, + "loss_num": 0.021728515625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 46387876, + "step": 829 + }, + { + "epoch": 1.8485523385300668, + "grad_norm": 23.06169891357422, + "learning_rate": 1e-06, + "loss": 1.2062, + "num_input_tokens_seen": 46441284, + "step": 830 + }, + { + "epoch": 1.8485523385300668, + "loss": 1.086785078048706, + "loss_ce": 0.0003593094297684729, + "loss_iou": 0.447265625, + "loss_num": 0.037841796875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 46441284, + "step": 830 + }, + { + "epoch": 1.8507795100222717, + "grad_norm": 23.462730407714844, + "learning_rate": 1e-06, + "loss": 0.932, + "num_input_tokens_seen": 46498148, + "step": 831 + }, + { + "epoch": 1.8507795100222717, + "loss": 1.0123460292816162, + "loss_ce": 0.005265878979116678, + "loss_iou": 0.41015625, + "loss_num": 0.037841796875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 46498148, + "step": 831 + }, + { + "epoch": 1.8530066815144766, + "grad_norm": 18.19162368774414, + "learning_rate": 1e-06, + "loss": 1.1176, + "num_input_tokens_seen": 46552656, + "step": 832 + }, + { + "epoch": 1.8530066815144766, + "loss": 1.0369131565093994, + "loss_ce": 0.0007803026819601655, + "loss_iou": 0.4296875, + "loss_num": 0.03466796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 46552656, + "step": 832 + }, + { + "epoch": 1.8552338530066814, + "grad_norm": 27.384538650512695, + "learning_rate": 1e-06, + "loss": 1.327, + "num_input_tokens_seen": 46609180, + "step": 833 + }, + { + "epoch": 1.8552338530066814, + "loss": 1.4005694389343262, + "loss_ce": 0.002132008085027337, + "loss_iou": 0.55078125, + "loss_num": 0.05859375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 46609180, + "step": 833 + }, + { + "epoch": 1.8574610244988863, + "grad_norm": 17.754493713378906, + "learning_rate": 1e-06, + "loss": 0.8976, + "num_input_tokens_seen": 46667088, + "step": 834 + }, + { + "epoch": 1.8574610244988863, + "loss": 0.7892530560493469, + "loss_ce": 0.00043467164505273104, + "loss_iou": 0.326171875, + "loss_num": 0.0274658203125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 46667088, + "step": 834 + }, + { + "epoch": 1.8596881959910914, + "grad_norm": 22.643587112426758, + "learning_rate": 1e-06, + "loss": 1.2626, + "num_input_tokens_seen": 46722560, + "step": 835 + }, + { + "epoch": 1.8596881959910914, + "loss": 1.1584439277648926, + "loss_ce": 0.0014614604879170656, + "loss_iou": 0.4453125, + "loss_num": 0.05322265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 46722560, + "step": 835 + }, + { + "epoch": 1.8619153674832962, + "grad_norm": 16.647260665893555, + "learning_rate": 1e-06, + "loss": 0.9988, + "num_input_tokens_seen": 46780772, + "step": 836 + }, + { + "epoch": 1.8619153674832962, + "loss": 0.8128501176834106, + "loss_ce": 0.0010825353674590588, + "loss_iou": 0.30859375, + "loss_num": 0.03857421875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 46780772, + "step": 836 + }, + { + "epoch": 1.864142538975501, + "grad_norm": 17.087894439697266, + "learning_rate": 1e-06, + "loss": 1.3527, + "num_input_tokens_seen": 46837680, + "step": 837 + }, + { + "epoch": 1.864142538975501, + "loss": 1.4123845100402832, + "loss_ce": 0.0007634205976501107, + "loss_iou": 0.56640625, + "loss_num": 0.05517578125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 46837680, + "step": 837 + }, + { + "epoch": 1.8663697104677062, + "grad_norm": 17.794803619384766, + "learning_rate": 1e-06, + "loss": 1.1507, + "num_input_tokens_seen": 46894204, + "step": 838 + }, + { + "epoch": 1.8663697104677062, + "loss": 1.2315852642059326, + "loss_ce": 0.015764975920319557, + "loss_iou": 0.474609375, + "loss_num": 0.053466796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 46894204, + "step": 838 + }, + { + "epoch": 1.868596881959911, + "grad_norm": 20.756214141845703, + "learning_rate": 1e-06, + "loss": 1.0918, + "num_input_tokens_seen": 46953264, + "step": 839 + }, + { + "epoch": 1.868596881959911, + "loss": 1.1783467531204224, + "loss_ce": 0.00036822014953941107, + "loss_iou": 0.478515625, + "loss_num": 0.044189453125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 46953264, + "step": 839 + }, + { + "epoch": 1.8708240534521159, + "grad_norm": 17.135751724243164, + "learning_rate": 1e-06, + "loss": 0.8869, + "num_input_tokens_seen": 47008696, + "step": 840 + }, + { + "epoch": 1.8708240534521159, + "loss": 0.8081262111663818, + "loss_ce": 0.000631108705420047, + "loss_iou": 0.302734375, + "loss_num": 0.040283203125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 47008696, + "step": 840 + }, + { + "epoch": 1.8730512249443207, + "grad_norm": 24.36521339416504, + "learning_rate": 1e-06, + "loss": 1.1049, + "num_input_tokens_seen": 47066420, + "step": 841 + }, + { + "epoch": 1.8730512249443207, + "loss": 0.8385207653045654, + "loss_ce": 0.00038598667015321553, + "loss_iou": 0.373046875, + "loss_num": 0.01806640625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 47066420, + "step": 841 + }, + { + "epoch": 1.8752783964365256, + "grad_norm": 29.640390396118164, + "learning_rate": 1e-06, + "loss": 1.0064, + "num_input_tokens_seen": 47123592, + "step": 842 + }, + { + "epoch": 1.8752783964365256, + "loss": 0.8564976453781128, + "loss_ce": 0.0005406375275924802, + "loss_iou": 0.34765625, + "loss_num": 0.0322265625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 47123592, + "step": 842 + }, + { + "epoch": 1.8775055679287305, + "grad_norm": 23.356630325317383, + "learning_rate": 1e-06, + "loss": 1.1621, + "num_input_tokens_seen": 47178084, + "step": 843 + }, + { + "epoch": 1.8775055679287305, + "loss": 1.280181646347046, + "loss_ce": 0.0003964488860219717, + "loss_iou": 0.55859375, + "loss_num": 0.032470703125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 47178084, + "step": 843 + }, + { + "epoch": 1.8797327394209353, + "grad_norm": 22.576688766479492, + "learning_rate": 1e-06, + "loss": 0.9159, + "num_input_tokens_seen": 47235124, + "step": 844 + }, + { + "epoch": 1.8797327394209353, + "loss": 0.9658552408218384, + "loss_ce": 0.0005232463008724153, + "loss_iou": 0.3984375, + "loss_num": 0.033935546875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 47235124, + "step": 844 + }, + { + "epoch": 1.8819599109131402, + "grad_norm": 16.825740814208984, + "learning_rate": 1e-06, + "loss": 1.0477, + "num_input_tokens_seen": 47291416, + "step": 845 + }, + { + "epoch": 1.8819599109131402, + "loss": 1.1294829845428467, + "loss_ce": 0.0013091352302581072, + "loss_iou": 0.453125, + "loss_num": 0.044189453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 47291416, + "step": 845 + }, + { + "epoch": 1.884187082405345, + "grad_norm": 26.097557067871094, + "learning_rate": 1e-06, + "loss": 0.9203, + "num_input_tokens_seen": 47348868, + "step": 846 + }, + { + "epoch": 1.884187082405345, + "loss": 1.0167232751846313, + "loss_ce": 0.0013424212811514735, + "loss_iou": 0.443359375, + "loss_num": 0.025390625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 47348868, + "step": 846 + }, + { + "epoch": 1.8864142538975501, + "grad_norm": 25.306549072265625, + "learning_rate": 1e-06, + "loss": 1.1072, + "num_input_tokens_seen": 47405380, + "step": 847 + }, + { + "epoch": 1.8864142538975501, + "loss": 0.9173081517219543, + "loss_ce": 0.0008042958797886968, + "loss_iou": 0.359375, + "loss_num": 0.039794921875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 47405380, + "step": 847 + }, + { + "epoch": 1.888641425389755, + "grad_norm": 14.741415023803711, + "learning_rate": 1e-06, + "loss": 0.7495, + "num_input_tokens_seen": 47460792, + "step": 848 + }, + { + "epoch": 1.888641425389755, + "loss": 0.6206876635551453, + "loss_ce": 0.0003263273974880576, + "loss_iou": 0.265625, + "loss_num": 0.0177001953125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 47460792, + "step": 848 + }, + { + "epoch": 1.89086859688196, + "grad_norm": 16.20488739013672, + "learning_rate": 1e-06, + "loss": 0.9648, + "num_input_tokens_seen": 47518560, + "step": 849 + }, + { + "epoch": 1.89086859688196, + "loss": 0.8242032527923584, + "loss_ce": 0.010482522659003735, + "loss_iou": 0.287109375, + "loss_num": 0.0478515625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 47518560, + "step": 849 + }, + { + "epoch": 1.893095768374165, + "grad_norm": 19.45951271057129, + "learning_rate": 1e-06, + "loss": 1.2282, + "num_input_tokens_seen": 47574244, + "step": 850 + }, + { + "epoch": 1.893095768374165, + "loss": 1.3258323669433594, + "loss_ce": 0.0013694913359358907, + "loss_iou": 0.515625, + "loss_num": 0.059326171875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 47574244, + "step": 850 + }, + { + "epoch": 1.8953229398663698, + "grad_norm": 18.448118209838867, + "learning_rate": 1e-06, + "loss": 1.1335, + "num_input_tokens_seen": 47627784, + "step": 851 + }, + { + "epoch": 1.8953229398663698, + "loss": 1.3810503482818604, + "loss_ce": 0.001167478272691369, + "loss_iou": 0.5625, + "loss_num": 0.051025390625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 47627784, + "step": 851 + }, + { + "epoch": 1.8975501113585747, + "grad_norm": 28.44631004333496, + "learning_rate": 1e-06, + "loss": 0.9616, + "num_input_tokens_seen": 47685004, + "step": 852 + }, + { + "epoch": 1.8975501113585747, + "loss": 0.8197891116142273, + "loss_ce": 0.00045313817099668086, + "loss_iou": 0.33984375, + "loss_num": 0.02783203125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 47685004, + "step": 852 + }, + { + "epoch": 1.8997772828507795, + "grad_norm": 15.586737632751465, + "learning_rate": 1e-06, + "loss": 1.1863, + "num_input_tokens_seen": 47740520, + "step": 853 + }, + { + "epoch": 1.8997772828507795, + "loss": 1.022131323814392, + "loss_ce": 0.00040277119842357934, + "loss_iou": 0.431640625, + "loss_num": 0.031982421875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 47740520, + "step": 853 + }, + { + "epoch": 1.9020044543429844, + "grad_norm": 35.565025329589844, + "learning_rate": 1e-06, + "loss": 1.2452, + "num_input_tokens_seen": 47793144, + "step": 854 + }, + { + "epoch": 1.9020044543429844, + "loss": 1.1260898113250732, + "loss_ce": 0.000601442065089941, + "loss_iou": 0.453125, + "loss_num": 0.0439453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 47793144, + "step": 854 + }, + { + "epoch": 1.9042316258351892, + "grad_norm": 21.033187866210938, + "learning_rate": 1e-06, + "loss": 1.1192, + "num_input_tokens_seen": 47848176, + "step": 855 + }, + { + "epoch": 1.9042316258351892, + "loss": 1.057677984237671, + "loss_ce": 0.00042704385123215616, + "loss_iou": 0.43359375, + "loss_num": 0.037841796875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 47848176, + "step": 855 + }, + { + "epoch": 1.906458797327394, + "grad_norm": 16.94105339050293, + "learning_rate": 1e-06, + "loss": 1.0397, + "num_input_tokens_seen": 47901820, + "step": 856 + }, + { + "epoch": 1.906458797327394, + "loss": 0.8765525817871094, + "loss_ce": 0.0005760163185186684, + "loss_iou": 0.37890625, + "loss_num": 0.02392578125, + "loss_xval": 0.875, + "num_input_tokens_seen": 47901820, + "step": 856 + }, + { + "epoch": 1.908685968819599, + "grad_norm": 22.052213668823242, + "learning_rate": 1e-06, + "loss": 1.058, + "num_input_tokens_seen": 47958540, + "step": 857 + }, + { + "epoch": 1.908685968819599, + "loss": 0.9184067845344543, + "loss_ce": 0.0004380404716357589, + "loss_iou": 0.376953125, + "loss_num": 0.033203125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 47958540, + "step": 857 + }, + { + "epoch": 1.910913140311804, + "grad_norm": 18.550006866455078, + "learning_rate": 1e-06, + "loss": 0.8463, + "num_input_tokens_seen": 48015240, + "step": 858 + }, + { + "epoch": 1.910913140311804, + "loss": 0.8902982473373413, + "loss_ce": 0.0004057343176100403, + "loss_iou": 0.330078125, + "loss_num": 0.045654296875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 48015240, + "step": 858 + }, + { + "epoch": 1.913140311804009, + "grad_norm": 19.243886947631836, + "learning_rate": 1e-06, + "loss": 0.8471, + "num_input_tokens_seen": 48071956, + "step": 859 + }, + { + "epoch": 1.913140311804009, + "loss": 0.8775541186332703, + "loss_ce": 0.0010892475256696343, + "loss_iou": 0.36328125, + "loss_num": 0.02978515625, + "loss_xval": 0.875, + "num_input_tokens_seen": 48071956, + "step": 859 + }, + { + "epoch": 1.9153674832962138, + "grad_norm": 43.45414733886719, + "learning_rate": 1e-06, + "loss": 0.8823, + "num_input_tokens_seen": 48125932, + "step": 860 + }, + { + "epoch": 1.9153674832962138, + "loss": 0.9675887823104858, + "loss_ce": 0.0005477914237417281, + "loss_iou": 0.421875, + "loss_num": 0.0245361328125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 48125932, + "step": 860 + }, + { + "epoch": 1.9175946547884188, + "grad_norm": 17.62973976135254, + "learning_rate": 1e-06, + "loss": 0.8766, + "num_input_tokens_seen": 48183412, + "step": 861 + }, + { + "epoch": 1.9175946547884188, + "loss": 1.0546504259109497, + "loss_ce": 0.0006953147239983082, + "loss_iou": 0.439453125, + "loss_num": 0.034912109375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 48183412, + "step": 861 + }, + { + "epoch": 1.9198218262806237, + "grad_norm": 15.39229965209961, + "learning_rate": 1e-06, + "loss": 1.0492, + "num_input_tokens_seen": 48237972, + "step": 862 + }, + { + "epoch": 1.9198218262806237, + "loss": 1.0228043794631958, + "loss_ce": 0.0005876163486391306, + "loss_iou": 0.396484375, + "loss_num": 0.046142578125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 48237972, + "step": 862 + }, + { + "epoch": 1.9220489977728286, + "grad_norm": 35.672637939453125, + "learning_rate": 1e-06, + "loss": 1.1325, + "num_input_tokens_seen": 48294312, + "step": 863 + }, + { + "epoch": 1.9220489977728286, + "loss": 1.1503536701202393, + "loss_ce": 0.00045127171324566007, + "loss_iou": 0.474609375, + "loss_num": 0.039794921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 48294312, + "step": 863 + }, + { + "epoch": 1.9242761692650334, + "grad_norm": 18.644657135009766, + "learning_rate": 1e-06, + "loss": 1.1068, + "num_input_tokens_seen": 48352424, + "step": 864 + }, + { + "epoch": 1.9242761692650334, + "loss": 0.9790323972702026, + "loss_ce": 0.001493359450250864, + "loss_iou": 0.3984375, + "loss_num": 0.0361328125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 48352424, + "step": 864 + }, + { + "epoch": 1.9265033407572383, + "grad_norm": 19.904115676879883, + "learning_rate": 1e-06, + "loss": 1.2304, + "num_input_tokens_seen": 48407952, + "step": 865 + }, + { + "epoch": 1.9265033407572383, + "loss": 1.5032316446304321, + "loss_ce": 0.0037199431098997593, + "loss_iou": 0.5703125, + "loss_num": 0.072265625, + "loss_xval": 1.5, + "num_input_tokens_seen": 48407952, + "step": 865 + }, + { + "epoch": 1.9287305122494431, + "grad_norm": 16.542858123779297, + "learning_rate": 1e-06, + "loss": 0.8461, + "num_input_tokens_seen": 48463412, + "step": 866 + }, + { + "epoch": 1.9287305122494431, + "loss": 0.9278074502944946, + "loss_ce": 0.0025144873652607203, + "loss_iou": 0.36328125, + "loss_num": 0.039306640625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 48463412, + "step": 866 + }, + { + "epoch": 1.930957683741648, + "grad_norm": 28.734394073486328, + "learning_rate": 1e-06, + "loss": 1.206, + "num_input_tokens_seen": 48521584, + "step": 867 + }, + { + "epoch": 1.930957683741648, + "loss": 1.0615025758743286, + "loss_ce": 0.00046744622522965074, + "loss_iou": 0.45703125, + "loss_num": 0.0291748046875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 48521584, + "step": 867 + }, + { + "epoch": 1.9331848552338529, + "grad_norm": 16.990293502807617, + "learning_rate": 1e-06, + "loss": 1.0472, + "num_input_tokens_seen": 48575076, + "step": 868 + }, + { + "epoch": 1.9331848552338529, + "loss": 1.2131456136703491, + "loss_ce": 0.0012315567582845688, + "loss_iou": 0.482421875, + "loss_num": 0.049560546875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 48575076, + "step": 868 + }, + { + "epoch": 1.935412026726058, + "grad_norm": 20.22574234008789, + "learning_rate": 1e-06, + "loss": 1.0024, + "num_input_tokens_seen": 48632592, + "step": 869 + }, + { + "epoch": 1.935412026726058, + "loss": 1.0861210823059082, + "loss_ce": 0.0006718788645230234, + "loss_iou": 0.474609375, + "loss_num": 0.0272216796875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 48632592, + "step": 869 + }, + { + "epoch": 1.9376391982182628, + "grad_norm": 16.520029067993164, + "learning_rate": 1e-06, + "loss": 0.9542, + "num_input_tokens_seen": 48687552, + "step": 870 + }, + { + "epoch": 1.9376391982182628, + "loss": 1.0474047660827637, + "loss_ce": 0.0005297240568324924, + "loss_iou": 0.46484375, + "loss_num": 0.023193359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 48687552, + "step": 870 + }, + { + "epoch": 1.9398663697104677, + "grad_norm": 20.480531692504883, + "learning_rate": 1e-06, + "loss": 0.7512, + "num_input_tokens_seen": 48742604, + "step": 871 + }, + { + "epoch": 1.9398663697104677, + "loss": 0.876989483833313, + "loss_ce": 0.0005246583605185151, + "loss_iou": 0.37890625, + "loss_num": 0.024169921875, + "loss_xval": 0.875, + "num_input_tokens_seen": 48742604, + "step": 871 + }, + { + "epoch": 1.9420935412026727, + "grad_norm": 24.84157371520996, + "learning_rate": 1e-06, + "loss": 0.8841, + "num_input_tokens_seen": 48800660, + "step": 872 + }, + { + "epoch": 1.9420935412026727, + "loss": 0.9676753282546997, + "loss_ce": 0.0008784872479736805, + "loss_iou": 0.40234375, + "loss_num": 0.03271484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 48800660, + "step": 872 + }, + { + "epoch": 1.9443207126948776, + "grad_norm": 22.539447784423828, + "learning_rate": 1e-06, + "loss": 1.1714, + "num_input_tokens_seen": 48855408, + "step": 873 + }, + { + "epoch": 1.9443207126948776, + "loss": 1.1532479524612427, + "loss_ce": 0.0004159705131314695, + "loss_iou": 0.515625, + "loss_num": 0.0247802734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 48855408, + "step": 873 + }, + { + "epoch": 1.9465478841870825, + "grad_norm": 41.367530822753906, + "learning_rate": 1e-06, + "loss": 1.2651, + "num_input_tokens_seen": 48910668, + "step": 874 + }, + { + "epoch": 1.9465478841870825, + "loss": 1.3369231224060059, + "loss_ce": 0.0014739749021828175, + "loss_iou": 0.5625, + "loss_num": 0.041259765625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 48910668, + "step": 874 + }, + { + "epoch": 1.9487750556792873, + "grad_norm": 15.942052841186523, + "learning_rate": 1e-06, + "loss": 0.9964, + "num_input_tokens_seen": 48967356, + "step": 875 + }, + { + "epoch": 1.9487750556792873, + "loss": 1.1262295246124268, + "loss_ce": 0.0007413048297166824, + "loss_iou": 0.427734375, + "loss_num": 0.053955078125, + "loss_xval": 1.125, + "num_input_tokens_seen": 48967356, + "step": 875 + }, + { + "epoch": 1.9510022271714922, + "grad_norm": 45.24485778808594, + "learning_rate": 1e-06, + "loss": 1.0602, + "num_input_tokens_seen": 49023520, + "step": 876 + }, + { + "epoch": 1.9510022271714922, + "loss": 1.150200366973877, + "loss_ce": 0.0007863342761993408, + "loss_iou": 0.5, + "loss_num": 0.029296875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 49023520, + "step": 876 + }, + { + "epoch": 1.953229398663697, + "grad_norm": 24.16380500793457, + "learning_rate": 1e-06, + "loss": 0.9545, + "num_input_tokens_seen": 49079940, + "step": 877 + }, + { + "epoch": 1.953229398663697, + "loss": 0.8877462148666382, + "loss_ce": 0.0005392197053879499, + "loss_iou": 0.37109375, + "loss_num": 0.0289306640625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 49079940, + "step": 877 + }, + { + "epoch": 1.955456570155902, + "grad_norm": 22.12382698059082, + "learning_rate": 1e-06, + "loss": 1.1521, + "num_input_tokens_seen": 49138012, + "step": 878 + }, + { + "epoch": 1.955456570155902, + "loss": 1.5235247611999512, + "loss_ce": 0.0017962402198463678, + "loss_iou": 0.59765625, + "loss_num": 0.06494140625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 49138012, + "step": 878 + }, + { + "epoch": 1.9576837416481068, + "grad_norm": 18.208831787109375, + "learning_rate": 1e-06, + "loss": 0.9084, + "num_input_tokens_seen": 49194864, + "step": 879 + }, + { + "epoch": 1.9576837416481068, + "loss": 0.9498248100280762, + "loss_ce": 0.0003619292110670358, + "loss_iou": 0.3671875, + "loss_num": 0.042724609375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 49194864, + "step": 879 + }, + { + "epoch": 1.9599109131403119, + "grad_norm": 16.236581802368164, + "learning_rate": 1e-06, + "loss": 0.9203, + "num_input_tokens_seen": 49251060, + "step": 880 + }, + { + "epoch": 1.9599109131403119, + "loss": 0.9726754426956177, + "loss_ce": 0.0007516111945733428, + "loss_iou": 0.396484375, + "loss_num": 0.03564453125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 49251060, + "step": 880 + }, + { + "epoch": 1.9621380846325167, + "grad_norm": 23.136600494384766, + "learning_rate": 1e-06, + "loss": 1.1897, + "num_input_tokens_seen": 49309208, + "step": 881 + }, + { + "epoch": 1.9621380846325167, + "loss": 1.0319995880126953, + "loss_ce": 0.002946855966001749, + "loss_iou": 0.43359375, + "loss_num": 0.032470703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 49309208, + "step": 881 + }, + { + "epoch": 1.9643652561247216, + "grad_norm": 54.83725357055664, + "learning_rate": 1e-06, + "loss": 1.0684, + "num_input_tokens_seen": 49365352, + "step": 882 + }, + { + "epoch": 1.9643652561247216, + "loss": 1.0005229711532593, + "loss_ce": 0.0005229845410212874, + "loss_iou": 0.39453125, + "loss_num": 0.041748046875, + "loss_xval": 1.0, + "num_input_tokens_seen": 49365352, + "step": 882 + }, + { + "epoch": 1.9665924276169267, + "grad_norm": 20.81184959411621, + "learning_rate": 1e-06, + "loss": 1.3279, + "num_input_tokens_seen": 49420316, + "step": 883 + }, + { + "epoch": 1.9665924276169267, + "loss": 1.0947761535644531, + "loss_ce": 0.0005379515350796282, + "loss_iou": 0.451171875, + "loss_num": 0.03857421875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 49420316, + "step": 883 + }, + { + "epoch": 1.9688195991091315, + "grad_norm": 24.571720123291016, + "learning_rate": 1e-06, + "loss": 0.9046, + "num_input_tokens_seen": 49478252, + "step": 884 + }, + { + "epoch": 1.9688195991091315, + "loss": 1.0495492219924927, + "loss_ce": 0.0007211226620711386, + "loss_iou": 0.41796875, + "loss_num": 0.042236328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 49478252, + "step": 884 + }, + { + "epoch": 1.9710467706013364, + "grad_norm": 17.04405403137207, + "learning_rate": 1e-06, + "loss": 0.9426, + "num_input_tokens_seen": 49533784, + "step": 885 + }, + { + "epoch": 1.9710467706013364, + "loss": 0.8944860696792603, + "loss_ce": 0.0009313831687904894, + "loss_iou": 0.361328125, + "loss_num": 0.033935546875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 49533784, + "step": 885 + }, + { + "epoch": 1.9732739420935412, + "grad_norm": 21.639575958251953, + "learning_rate": 1e-06, + "loss": 1.0049, + "num_input_tokens_seen": 49589404, + "step": 886 + }, + { + "epoch": 1.9732739420935412, + "loss": 0.9363880157470703, + "loss_ce": 0.0008411717135459185, + "loss_iou": 0.369140625, + "loss_num": 0.038818359375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 49589404, + "step": 886 + }, + { + "epoch": 1.975501113585746, + "grad_norm": 15.142632484436035, + "learning_rate": 1e-06, + "loss": 1.0436, + "num_input_tokens_seen": 49646820, + "step": 887 + }, + { + "epoch": 1.975501113585746, + "loss": 0.9690762758255005, + "loss_ce": 0.010580191388726234, + "loss_iou": 0.37890625, + "loss_num": 0.039794921875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 49646820, + "step": 887 + }, + { + "epoch": 1.977728285077951, + "grad_norm": 132.77593994140625, + "learning_rate": 1e-06, + "loss": 1.0238, + "num_input_tokens_seen": 49702832, + "step": 888 + }, + { + "epoch": 1.977728285077951, + "loss": 1.0730714797973633, + "loss_ce": 0.0008057774393819273, + "loss_iou": 0.44140625, + "loss_num": 0.037109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 49702832, + "step": 888 + }, + { + "epoch": 1.9799554565701558, + "grad_norm": 46.88450622558594, + "learning_rate": 1e-06, + "loss": 1.0213, + "num_input_tokens_seen": 49759608, + "step": 889 + }, + { + "epoch": 1.9799554565701558, + "loss": 0.5785812139511108, + "loss_ce": 0.0004562578978948295, + "loss_iou": 0.23828125, + "loss_num": 0.020263671875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 49759608, + "step": 889 + }, + { + "epoch": 1.9821826280623607, + "grad_norm": 13.415266990661621, + "learning_rate": 1e-06, + "loss": 0.6749, + "num_input_tokens_seen": 49816076, + "step": 890 + }, + { + "epoch": 1.9821826280623607, + "loss": 0.6280744075775146, + "loss_ce": 0.00038887892151251435, + "loss_iou": 0.24609375, + "loss_num": 0.0269775390625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 49816076, + "step": 890 + }, + { + "epoch": 1.9844097995545658, + "grad_norm": 15.064915657043457, + "learning_rate": 1e-06, + "loss": 0.9421, + "num_input_tokens_seen": 49873896, + "step": 891 + }, + { + "epoch": 1.9844097995545658, + "loss": 0.9301207065582275, + "loss_ce": 0.00043319491669535637, + "loss_iou": 0.388671875, + "loss_num": 0.0302734375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 49873896, + "step": 891 + }, + { + "epoch": 1.9866369710467706, + "grad_norm": 19.410207748413086, + "learning_rate": 1e-06, + "loss": 0.8304, + "num_input_tokens_seen": 49929944, + "step": 892 + }, + { + "epoch": 1.9866369710467706, + "loss": 0.8065379858016968, + "loss_ce": 0.0003856243856716901, + "loss_iou": 0.345703125, + "loss_num": 0.0230712890625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 49929944, + "step": 892 + }, + { + "epoch": 1.9888641425389755, + "grad_norm": 18.616121292114258, + "learning_rate": 1e-06, + "loss": 0.9924, + "num_input_tokens_seen": 49987336, + "step": 893 + }, + { + "epoch": 1.9888641425389755, + "loss": 1.0283457040786743, + "loss_ce": 0.0005136135150678456, + "loss_iou": 0.392578125, + "loss_num": 0.048583984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 49987336, + "step": 893 + }, + { + "epoch": 1.9910913140311806, + "grad_norm": 20.091445922851562, + "learning_rate": 1e-06, + "loss": 1.0854, + "num_input_tokens_seen": 50045032, + "step": 894 + }, + { + "epoch": 1.9910913140311806, + "loss": 0.9809556007385254, + "loss_ce": 0.000730981701053679, + "loss_iou": 0.396484375, + "loss_num": 0.037353515625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 50045032, + "step": 894 + }, + { + "epoch": 1.9933184855233854, + "grad_norm": 19.74848175048828, + "learning_rate": 1e-06, + "loss": 1.0705, + "num_input_tokens_seen": 50100472, + "step": 895 + }, + { + "epoch": 1.9933184855233854, + "loss": 0.8635627031326294, + "loss_ce": 0.0012580410111695528, + "loss_iou": 0.326171875, + "loss_num": 0.04248046875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 50100472, + "step": 895 + }, + { + "epoch": 1.9955456570155903, + "grad_norm": 17.26140022277832, + "learning_rate": 1e-06, + "loss": 0.9529, + "num_input_tokens_seen": 50154436, + "step": 896 + }, + { + "epoch": 1.9955456570155903, + "loss": 0.9841878414154053, + "loss_ce": 0.0006672587478533387, + "loss_iou": 0.38671875, + "loss_num": 0.041748046875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 50154436, + "step": 896 + }, + { + "epoch": 1.9977728285077951, + "grad_norm": 20.126981735229492, + "learning_rate": 1e-06, + "loss": 1.0437, + "num_input_tokens_seen": 50211096, + "step": 897 + }, + { + "epoch": 1.9977728285077951, + "loss": 1.0043690204620361, + "loss_ce": 0.0004628162132576108, + "loss_iou": 0.408203125, + "loss_num": 0.037841796875, + "loss_xval": 1.0, + "num_input_tokens_seen": 50211096, + "step": 897 + }, + { + "epoch": 2.0, + "grad_norm": 23.061552047729492, + "learning_rate": 1e-06, + "loss": 1.0086, + "num_input_tokens_seen": 50270028, + "step": 898 + }, + { + "epoch": 2.0, + "loss": 0.6891332864761353, + "loss_ce": 0.0004125749983359128, + "loss_iou": 0.294921875, + "loss_num": 0.0198974609375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 50270028, + "step": 898 + }, + { + "epoch": 2.002227171492205, + "grad_norm": 18.92450523376465, + "learning_rate": 1e-06, + "loss": 1.1896, + "num_input_tokens_seen": 50325468, + "step": 899 + }, + { + "epoch": 2.002227171492205, + "loss": 1.5556280612945557, + "loss_ce": 0.0019170554587617517, + "loss_iou": 0.578125, + "loss_num": 0.080078125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 50325468, + "step": 899 + }, + { + "epoch": 2.0044543429844097, + "grad_norm": 20.99635887145996, + "learning_rate": 1e-06, + "loss": 0.8291, + "num_input_tokens_seen": 50384784, + "step": 900 + }, + { + "epoch": 2.0044543429844097, + "loss": 0.8486037254333496, + "loss_ce": 0.0004591494216583669, + "loss_iou": 0.341796875, + "loss_num": 0.033447265625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 50384784, + "step": 900 + }, + { + "epoch": 2.0066815144766146, + "grad_norm": 31.64121437072754, + "learning_rate": 1e-06, + "loss": 0.8485, + "num_input_tokens_seen": 50440300, + "step": 901 + }, + { + "epoch": 2.0066815144766146, + "loss": 0.8760842084884644, + "loss_ce": 0.0008400966180488467, + "loss_iou": 0.35546875, + "loss_num": 0.033447265625, + "loss_xval": 0.875, + "num_input_tokens_seen": 50440300, + "step": 901 + }, + { + "epoch": 2.0089086859688194, + "grad_norm": 17.200613021850586, + "learning_rate": 1e-06, + "loss": 1.2305, + "num_input_tokens_seen": 50497772, + "step": 902 + }, + { + "epoch": 2.0089086859688194, + "loss": 1.1643800735473633, + "loss_ce": 0.0008058917010203004, + "loss_iou": 0.46875, + "loss_num": 0.044677734375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 50497772, + "step": 902 + }, + { + "epoch": 2.0111358574610243, + "grad_norm": 16.72941780090332, + "learning_rate": 1e-06, + "loss": 0.7154, + "num_input_tokens_seen": 50555944, + "step": 903 + }, + { + "epoch": 2.0111358574610243, + "loss": 0.8082038164138794, + "loss_ce": 0.0008307406678795815, + "loss_iou": 0.302734375, + "loss_num": 0.040283203125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 50555944, + "step": 903 + }, + { + "epoch": 2.0133630289532296, + "grad_norm": 20.396434783935547, + "learning_rate": 1e-06, + "loss": 1.1836, + "num_input_tokens_seen": 50613104, + "step": 904 + }, + { + "epoch": 2.0133630289532296, + "loss": 1.1902202367782593, + "loss_ce": 0.0012554043205454946, + "loss_iou": 0.466796875, + "loss_num": 0.051025390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 50613104, + "step": 904 + }, + { + "epoch": 2.0155902004454345, + "grad_norm": 21.122411727905273, + "learning_rate": 1e-06, + "loss": 0.9534, + "num_input_tokens_seen": 50668660, + "step": 905 + }, + { + "epoch": 2.0155902004454345, + "loss": 0.9574775695800781, + "loss_ce": 0.00044635249651037157, + "loss_iou": 0.390625, + "loss_num": 0.035400390625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 50668660, + "step": 905 + }, + { + "epoch": 2.0178173719376393, + "grad_norm": 12.18447494506836, + "learning_rate": 1e-06, + "loss": 1.2904, + "num_input_tokens_seen": 50721156, + "step": 906 + }, + { + "epoch": 2.0178173719376393, + "loss": 0.806514322757721, + "loss_ce": 0.0004840257461182773, + "loss_iou": 0.333984375, + "loss_num": 0.0277099609375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 50721156, + "step": 906 + }, + { + "epoch": 2.020044543429844, + "grad_norm": 20.636756896972656, + "learning_rate": 1e-06, + "loss": 1.0138, + "num_input_tokens_seen": 50777508, + "step": 907 + }, + { + "epoch": 2.020044543429844, + "loss": 0.9786741733551025, + "loss_ce": 0.0006468579522334039, + "loss_iou": 0.41796875, + "loss_num": 0.02880859375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 50777508, + "step": 907 + }, + { + "epoch": 2.022271714922049, + "grad_norm": 41.294925689697266, + "learning_rate": 1e-06, + "loss": 1.1918, + "num_input_tokens_seen": 50833956, + "step": 908 + }, + { + "epoch": 2.022271714922049, + "loss": 0.883208692073822, + "loss_ce": 0.000396185350837186, + "loss_iou": 0.322265625, + "loss_num": 0.047607421875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 50833956, + "step": 908 + }, + { + "epoch": 2.024498886414254, + "grad_norm": 18.029842376708984, + "learning_rate": 1e-06, + "loss": 1.185, + "num_input_tokens_seen": 50892188, + "step": 909 + }, + { + "epoch": 2.024498886414254, + "loss": 1.0920612812042236, + "loss_ce": 0.0005085701122879982, + "loss_iou": 0.451171875, + "loss_num": 0.038330078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 50892188, + "step": 909 + }, + { + "epoch": 2.0267260579064588, + "grad_norm": 17.75556755065918, + "learning_rate": 1e-06, + "loss": 0.9455, + "num_input_tokens_seen": 50948976, + "step": 910 + }, + { + "epoch": 2.0267260579064588, + "loss": 0.8729127645492554, + "loss_ce": 0.0005983302253298461, + "loss_iou": 0.37109375, + "loss_num": 0.0264892578125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 50948976, + "step": 910 + }, + { + "epoch": 2.0289532293986636, + "grad_norm": 22.336694717407227, + "learning_rate": 1e-06, + "loss": 0.811, + "num_input_tokens_seen": 51005372, + "step": 911 + }, + { + "epoch": 2.0289532293986636, + "loss": 0.8184252977371216, + "loss_ce": 0.0010424721986055374, + "loss_iou": 0.33203125, + "loss_num": 0.0306396484375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 51005372, + "step": 911 + }, + { + "epoch": 2.0311804008908685, + "grad_norm": 24.3977108001709, + "learning_rate": 1e-06, + "loss": 0.7044, + "num_input_tokens_seen": 51060608, + "step": 912 + }, + { + "epoch": 2.0311804008908685, + "loss": 0.8316826820373535, + "loss_ce": 0.00038386776577681303, + "loss_iou": 0.349609375, + "loss_num": 0.0267333984375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 51060608, + "step": 912 + }, + { + "epoch": 2.0334075723830733, + "grad_norm": 21.548065185546875, + "learning_rate": 1e-06, + "loss": 0.9748, + "num_input_tokens_seen": 51119092, + "step": 913 + }, + { + "epoch": 2.0334075723830733, + "loss": 0.853911280632019, + "loss_ce": 0.0003957064473070204, + "loss_iou": 0.34765625, + "loss_num": 0.031494140625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 51119092, + "step": 913 + }, + { + "epoch": 2.035634743875278, + "grad_norm": 17.78352928161621, + "learning_rate": 1e-06, + "loss": 0.8966, + "num_input_tokens_seen": 51174088, + "step": 914 + }, + { + "epoch": 2.035634743875278, + "loss": 0.6430214047431946, + "loss_ce": 0.0004432816640473902, + "loss_iou": 0.275390625, + "loss_num": 0.0184326171875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 51174088, + "step": 914 + }, + { + "epoch": 2.0378619153674835, + "grad_norm": 18.09092903137207, + "learning_rate": 1e-06, + "loss": 1.0737, + "num_input_tokens_seen": 51231032, + "step": 915 + }, + { + "epoch": 2.0378619153674835, + "loss": 1.0431830883026123, + "loss_ce": 0.0007026625098660588, + "loss_iou": 0.412109375, + "loss_num": 0.0439453125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 51231032, + "step": 915 + }, + { + "epoch": 2.0400890868596884, + "grad_norm": 21.492124557495117, + "learning_rate": 1e-06, + "loss": 0.849, + "num_input_tokens_seen": 51288688, + "step": 916 + }, + { + "epoch": 2.0400890868596884, + "loss": 0.8285926580429077, + "loss_ce": 0.0004676440730690956, + "loss_iou": 0.3515625, + "loss_num": 0.02490234375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 51288688, + "step": 916 + }, + { + "epoch": 2.0423162583518932, + "grad_norm": 24.50774574279785, + "learning_rate": 1e-06, + "loss": 1.1289, + "num_input_tokens_seen": 51343280, + "step": 917 + }, + { + "epoch": 2.0423162583518932, + "loss": 1.1621774435043335, + "loss_ce": 0.0005563499871641397, + "loss_iou": 0.482421875, + "loss_num": 0.0390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 51343280, + "step": 917 + }, + { + "epoch": 2.044543429844098, + "grad_norm": 23.351022720336914, + "learning_rate": 1e-06, + "loss": 0.8928, + "num_input_tokens_seen": 51399164, + "step": 918 + }, + { + "epoch": 2.044543429844098, + "loss": 0.76546311378479, + "loss_ce": 0.0005705538205802441, + "loss_iou": 0.31640625, + "loss_num": 0.0263671875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 51399164, + "step": 918 + }, + { + "epoch": 2.046770601336303, + "grad_norm": 23.73122215270996, + "learning_rate": 1e-06, + "loss": 1.2118, + "num_input_tokens_seen": 51454772, + "step": 919 + }, + { + "epoch": 2.046770601336303, + "loss": 1.1581388711929321, + "loss_ce": 0.0006681361701339483, + "loss_iou": 0.5078125, + "loss_num": 0.0281982421875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 51454772, + "step": 919 + }, + { + "epoch": 2.048997772828508, + "grad_norm": 18.12936782836914, + "learning_rate": 1e-06, + "loss": 1.1355, + "num_input_tokens_seen": 51508836, + "step": 920 + }, + { + "epoch": 2.048997772828508, + "loss": 0.9356842041015625, + "loss_ce": 0.0016021885676309466, + "loss_iou": 0.3359375, + "loss_num": 0.05224609375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 51508836, + "step": 920 + }, + { + "epoch": 2.0512249443207127, + "grad_norm": 18.033832550048828, + "learning_rate": 1e-06, + "loss": 0.897, + "num_input_tokens_seen": 51562932, + "step": 921 + }, + { + "epoch": 2.0512249443207127, + "loss": 0.9213240742683411, + "loss_ce": 0.001158121507614851, + "loss_iou": 0.4140625, + "loss_num": 0.0185546875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 51562932, + "step": 921 + }, + { + "epoch": 2.0534521158129175, + "grad_norm": 22.288450241088867, + "learning_rate": 1e-06, + "loss": 1.1296, + "num_input_tokens_seen": 51617208, + "step": 922 + }, + { + "epoch": 2.0534521158129175, + "loss": 1.1455841064453125, + "loss_ce": 0.017166122794151306, + "loss_iou": 0.447265625, + "loss_num": 0.046630859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 51617208, + "step": 922 + }, + { + "epoch": 2.0556792873051224, + "grad_norm": 13.78810977935791, + "learning_rate": 1e-06, + "loss": 1.0396, + "num_input_tokens_seen": 51669524, + "step": 923 + }, + { + "epoch": 2.0556792873051224, + "loss": 1.045444130897522, + "loss_ce": 0.0005222847685217857, + "loss_iou": 0.421875, + "loss_num": 0.04052734375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 51669524, + "step": 923 + }, + { + "epoch": 2.0579064587973273, + "grad_norm": 15.79564380645752, + "learning_rate": 1e-06, + "loss": 0.9122, + "num_input_tokens_seen": 51727060, + "step": 924 + }, + { + "epoch": 2.0579064587973273, + "loss": 0.7765226364135742, + "loss_ce": 0.000399627722799778, + "loss_iou": 0.326171875, + "loss_num": 0.0245361328125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 51727060, + "step": 924 + }, + { + "epoch": 2.060133630289532, + "grad_norm": 27.499584197998047, + "learning_rate": 1e-06, + "loss": 1.1524, + "num_input_tokens_seen": 51782548, + "step": 925 + }, + { + "epoch": 2.060133630289532, + "loss": 1.210993766784668, + "loss_ce": 0.00042247679084539413, + "loss_iou": 0.51171875, + "loss_num": 0.037353515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 51782548, + "step": 925 + }, + { + "epoch": 2.062360801781737, + "grad_norm": 21.81817626953125, + "learning_rate": 1e-06, + "loss": 0.7765, + "num_input_tokens_seen": 51839240, + "step": 926 + }, + { + "epoch": 2.062360801781737, + "loss": 0.8635718822479248, + "loss_ce": 0.0005347240949049592, + "loss_iou": 0.365234375, + "loss_num": 0.02685546875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 51839240, + "step": 926 + }, + { + "epoch": 2.0645879732739423, + "grad_norm": 27.51004409790039, + "learning_rate": 1e-06, + "loss": 0.885, + "num_input_tokens_seen": 51896564, + "step": 927 + }, + { + "epoch": 2.0645879732739423, + "loss": 0.7961362600326538, + "loss_ce": 0.0007261328864842653, + "loss_iou": 0.357421875, + "loss_num": 0.016357421875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 51896564, + "step": 927 + }, + { + "epoch": 2.066815144766147, + "grad_norm": 19.509262084960938, + "learning_rate": 1e-06, + "loss": 0.9487, + "num_input_tokens_seen": 51954088, + "step": 928 + }, + { + "epoch": 2.066815144766147, + "loss": 1.0089809894561768, + "loss_ce": 0.0006801420240662992, + "loss_iou": 0.396484375, + "loss_num": 0.04296875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 51954088, + "step": 928 + }, + { + "epoch": 2.069042316258352, + "grad_norm": 21.640626907348633, + "learning_rate": 1e-06, + "loss": 0.9389, + "num_input_tokens_seen": 52010852, + "step": 929 + }, + { + "epoch": 2.069042316258352, + "loss": 1.0190068483352661, + "loss_ce": 0.0014287333469837904, + "loss_iou": 0.40625, + "loss_num": 0.04150390625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 52010852, + "step": 929 + }, + { + "epoch": 2.071269487750557, + "grad_norm": 18.838546752929688, + "learning_rate": 1e-06, + "loss": 1.213, + "num_input_tokens_seen": 52069620, + "step": 930 + }, + { + "epoch": 2.071269487750557, + "loss": 1.1467416286468506, + "loss_ce": 0.0009896388510242105, + "loss_iou": 0.498046875, + "loss_num": 0.030029296875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 52069620, + "step": 930 + }, + { + "epoch": 2.0734966592427617, + "grad_norm": 22.098485946655273, + "learning_rate": 1e-06, + "loss": 0.9992, + "num_input_tokens_seen": 52125176, + "step": 931 + }, + { + "epoch": 2.0734966592427617, + "loss": 1.115182638168335, + "loss_ce": 0.0004365970380604267, + "loss_iou": 0.42578125, + "loss_num": 0.05224609375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 52125176, + "step": 931 + }, + { + "epoch": 2.0757238307349666, + "grad_norm": 15.070525169372559, + "learning_rate": 1e-06, + "loss": 0.9082, + "num_input_tokens_seen": 52182404, + "step": 932 + }, + { + "epoch": 2.0757238307349666, + "loss": 0.8831936717033386, + "loss_ce": 0.0005032622721046209, + "loss_iou": 0.3828125, + "loss_num": 0.023681640625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 52182404, + "step": 932 + }, + { + "epoch": 2.0779510022271714, + "grad_norm": 22.24066925048828, + "learning_rate": 1e-06, + "loss": 0.9648, + "num_input_tokens_seen": 52238920, + "step": 933 + }, + { + "epoch": 2.0779510022271714, + "loss": 1.1474614143371582, + "loss_ce": 0.0004888359108008444, + "loss_iou": 0.462890625, + "loss_num": 0.044189453125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 52238920, + "step": 933 + }, + { + "epoch": 2.0801781737193763, + "grad_norm": 17.68231964111328, + "learning_rate": 1e-06, + "loss": 0.9119, + "num_input_tokens_seen": 52293392, + "step": 934 + }, + { + "epoch": 2.0801781737193763, + "loss": 0.9804922342300415, + "loss_ce": 0.0010000698966905475, + "loss_iou": 0.38671875, + "loss_num": 0.041015625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 52293392, + "step": 934 + }, + { + "epoch": 2.082405345211581, + "grad_norm": 16.659244537353516, + "learning_rate": 1e-06, + "loss": 0.9513, + "num_input_tokens_seen": 52345928, + "step": 935 + }, + { + "epoch": 2.082405345211581, + "loss": 0.7912268042564392, + "loss_ce": 0.00045530046918429434, + "loss_iou": 0.337890625, + "loss_num": 0.0228271484375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 52345928, + "step": 935 + }, + { + "epoch": 2.084632516703786, + "grad_norm": 17.690183639526367, + "learning_rate": 1e-06, + "loss": 0.8534, + "num_input_tokens_seen": 52401488, + "step": 936 + }, + { + "epoch": 2.084632516703786, + "loss": 0.9090708494186401, + "loss_ce": 0.00037944965879432857, + "loss_iou": 0.33984375, + "loss_num": 0.04541015625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 52401488, + "step": 936 + }, + { + "epoch": 2.086859688195991, + "grad_norm": 18.25249481201172, + "learning_rate": 1e-06, + "loss": 0.9348, + "num_input_tokens_seen": 52454132, + "step": 937 + }, + { + "epoch": 2.086859688195991, + "loss": 0.9369181990623474, + "loss_ce": 0.00039476132951676846, + "loss_iou": 0.3828125, + "loss_num": 0.0341796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 52454132, + "step": 937 + }, + { + "epoch": 2.089086859688196, + "grad_norm": 23.943071365356445, + "learning_rate": 1e-06, + "loss": 0.9978, + "num_input_tokens_seen": 52508724, + "step": 938 + }, + { + "epoch": 2.089086859688196, + "loss": 0.9771347045898438, + "loss_ce": 0.0025253635831177235, + "loss_iou": 0.40234375, + "loss_num": 0.033935546875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 52508724, + "step": 938 + }, + { + "epoch": 2.091314031180401, + "grad_norm": 19.138086318969727, + "learning_rate": 1e-06, + "loss": 0.9259, + "num_input_tokens_seen": 52567416, + "step": 939 + }, + { + "epoch": 2.091314031180401, + "loss": 0.8586698174476624, + "loss_ce": 0.0019803589675575495, + "loss_iou": 0.3515625, + "loss_num": 0.03125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 52567416, + "step": 939 + }, + { + "epoch": 2.093541202672606, + "grad_norm": 29.820850372314453, + "learning_rate": 1e-06, + "loss": 1.0665, + "num_input_tokens_seen": 52625816, + "step": 940 + }, + { + "epoch": 2.093541202672606, + "loss": 1.4001514911651611, + "loss_ce": 0.0012256972258910537, + "loss_iou": 0.5703125, + "loss_num": 0.051513671875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 52625816, + "step": 940 + }, + { + "epoch": 2.0957683741648108, + "grad_norm": 14.36317253112793, + "learning_rate": 1e-06, + "loss": 0.5621, + "num_input_tokens_seen": 52682968, + "step": 941 + }, + { + "epoch": 2.0957683741648108, + "loss": 0.5812158584594727, + "loss_ce": 0.00040527121745981276, + "loss_iou": 0.2197265625, + "loss_num": 0.0284423828125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 52682968, + "step": 941 + }, + { + "epoch": 2.0979955456570156, + "grad_norm": 19.567962646484375, + "learning_rate": 1e-06, + "loss": 0.9139, + "num_input_tokens_seen": 52737516, + "step": 942 + }, + { + "epoch": 2.0979955456570156, + "loss": 1.065826654434204, + "loss_ce": 0.00039695383748039603, + "loss_iou": 0.453125, + "loss_num": 0.031982421875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 52737516, + "step": 942 + }, + { + "epoch": 2.1002227171492205, + "grad_norm": 28.01074981689453, + "learning_rate": 1e-06, + "loss": 1.1929, + "num_input_tokens_seen": 52793536, + "step": 943 + }, + { + "epoch": 2.1002227171492205, + "loss": 1.1947014331817627, + "loss_ce": 0.00036561937304213643, + "loss_iou": 0.48046875, + "loss_num": 0.046630859375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 52793536, + "step": 943 + }, + { + "epoch": 2.1024498886414253, + "grad_norm": 20.295827865600586, + "learning_rate": 1e-06, + "loss": 1.2702, + "num_input_tokens_seen": 52847904, + "step": 944 + }, + { + "epoch": 2.1024498886414253, + "loss": 1.352550983428955, + "loss_ce": 0.0005001933313906193, + "loss_iou": 0.578125, + "loss_num": 0.0380859375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 52847904, + "step": 944 + }, + { + "epoch": 2.10467706013363, + "grad_norm": 16.32712745666504, + "learning_rate": 1e-06, + "loss": 1.0087, + "num_input_tokens_seen": 52904284, + "step": 945 + }, + { + "epoch": 2.10467706013363, + "loss": 1.0332987308502197, + "loss_ce": 0.0005839248769916594, + "loss_iou": 0.404296875, + "loss_num": 0.044921875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 52904284, + "step": 945 + }, + { + "epoch": 2.106904231625835, + "grad_norm": 29.043960571289062, + "learning_rate": 1e-06, + "loss": 1.0885, + "num_input_tokens_seen": 52960784, + "step": 946 + }, + { + "epoch": 2.106904231625835, + "loss": 0.9473793506622314, + "loss_ce": 0.0006019732682034373, + "loss_iou": 0.3671875, + "loss_num": 0.04248046875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 52960784, + "step": 946 + }, + { + "epoch": 2.10913140311804, + "grad_norm": 14.747257232666016, + "learning_rate": 1e-06, + "loss": 0.9877, + "num_input_tokens_seen": 53015812, + "step": 947 + }, + { + "epoch": 2.10913140311804, + "loss": 1.0732710361480713, + "loss_ce": 0.0007612318731844425, + "loss_iou": 0.42578125, + "loss_num": 0.044189453125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 53015812, + "step": 947 + }, + { + "epoch": 2.111358574610245, + "grad_norm": 103.11000061035156, + "learning_rate": 1e-06, + "loss": 0.9914, + "num_input_tokens_seen": 53072308, + "step": 948 + }, + { + "epoch": 2.111358574610245, + "loss": 0.9887726902961731, + "loss_ce": 0.000491408514790237, + "loss_iou": 0.447265625, + "loss_num": 0.0185546875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 53072308, + "step": 948 + }, + { + "epoch": 2.11358574610245, + "grad_norm": 23.691804885864258, + "learning_rate": 1e-06, + "loss": 1.2725, + "num_input_tokens_seen": 53127836, + "step": 949 + }, + { + "epoch": 2.11358574610245, + "loss": 1.3882193565368652, + "loss_ce": 0.001500481041148305, + "loss_iou": 0.51171875, + "loss_num": 0.07373046875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 53127836, + "step": 949 + }, + { + "epoch": 2.115812917594655, + "grad_norm": 13.513452529907227, + "learning_rate": 1e-06, + "loss": 0.9153, + "num_input_tokens_seen": 53184644, + "step": 950 + }, + { + "epoch": 2.115812917594655, + "loss": 1.0870821475982666, + "loss_ce": 0.0004122781683690846, + "loss_iou": 0.435546875, + "loss_num": 0.042724609375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 53184644, + "step": 950 + }, + { + "epoch": 2.11804008908686, + "grad_norm": 14.758941650390625, + "learning_rate": 1e-06, + "loss": 0.8424, + "num_input_tokens_seen": 53242056, + "step": 951 + }, + { + "epoch": 2.11804008908686, + "loss": 0.9238724708557129, + "loss_ce": 0.00041059922659769654, + "loss_iou": 0.384765625, + "loss_num": 0.0311279296875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 53242056, + "step": 951 + }, + { + "epoch": 2.1202672605790647, + "grad_norm": 50.94358825683594, + "learning_rate": 1e-06, + "loss": 0.9876, + "num_input_tokens_seen": 53296836, + "step": 952 + }, + { + "epoch": 2.1202672605790647, + "loss": 0.8739633560180664, + "loss_ce": 0.00042815617052838206, + "loss_iou": 0.35546875, + "loss_num": 0.0322265625, + "loss_xval": 0.875, + "num_input_tokens_seen": 53296836, + "step": 952 + }, + { + "epoch": 2.1224944320712695, + "grad_norm": 29.346717834472656, + "learning_rate": 1e-06, + "loss": 0.8476, + "num_input_tokens_seen": 53356080, + "step": 953 + }, + { + "epoch": 2.1224944320712695, + "loss": 0.7091416120529175, + "loss_ce": 0.00040138079202733934, + "loss_iou": 0.298828125, + "loss_num": 0.022705078125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 53356080, + "step": 953 + }, + { + "epoch": 2.1247216035634744, + "grad_norm": 30.826066970825195, + "learning_rate": 1e-06, + "loss": 0.8384, + "num_input_tokens_seen": 53411060, + "step": 954 + }, + { + "epoch": 2.1247216035634744, + "loss": 0.8927929401397705, + "loss_ce": 0.00045894747017882764, + "loss_iou": 0.330078125, + "loss_num": 0.04638671875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 53411060, + "step": 954 + }, + { + "epoch": 2.1269487750556793, + "grad_norm": 26.217329025268555, + "learning_rate": 1e-06, + "loss": 1.2109, + "num_input_tokens_seen": 53467452, + "step": 955 + }, + { + "epoch": 2.1269487750556793, + "loss": 1.171452283859253, + "loss_ce": 0.0005538875702768564, + "loss_iou": 0.50390625, + "loss_num": 0.031982421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 53467452, + "step": 955 + }, + { + "epoch": 2.129175946547884, + "grad_norm": 15.275303840637207, + "learning_rate": 1e-06, + "loss": 1.0093, + "num_input_tokens_seen": 53523000, + "step": 956 + }, + { + "epoch": 2.129175946547884, + "loss": 0.9647861123085022, + "loss_ce": 0.0004306259215809405, + "loss_iou": 0.3984375, + "loss_num": 0.033447265625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 53523000, + "step": 956 + }, + { + "epoch": 2.131403118040089, + "grad_norm": 20.987869262695312, + "learning_rate": 1e-06, + "loss": 0.8899, + "num_input_tokens_seen": 53577268, + "step": 957 + }, + { + "epoch": 2.131403118040089, + "loss": 0.8194369077682495, + "loss_ce": 0.00034510315163061023, + "loss_iou": 0.3359375, + "loss_num": 0.0296630859375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 53577268, + "step": 957 + }, + { + "epoch": 2.133630289532294, + "grad_norm": 17.244304656982422, + "learning_rate": 1e-06, + "loss": 0.9678, + "num_input_tokens_seen": 53632968, + "step": 958 + }, + { + "epoch": 2.133630289532294, + "loss": 0.7954371571540833, + "loss_ce": 0.0010035325540229678, + "loss_iou": 0.32421875, + "loss_num": 0.02880859375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 53632968, + "step": 958 + }, + { + "epoch": 2.1358574610244987, + "grad_norm": 27.137948989868164, + "learning_rate": 1e-06, + "loss": 0.8922, + "num_input_tokens_seen": 53686276, + "step": 959 + }, + { + "epoch": 2.1358574610244987, + "loss": 0.8764593005180359, + "loss_ce": 0.007318664342164993, + "loss_iou": 0.345703125, + "loss_num": 0.035888671875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 53686276, + "step": 959 + }, + { + "epoch": 2.138084632516704, + "grad_norm": 13.352965354919434, + "learning_rate": 1e-06, + "loss": 1.2179, + "num_input_tokens_seen": 53738980, + "step": 960 + }, + { + "epoch": 2.138084632516704, + "loss": 1.0099034309387207, + "loss_ce": 0.0003819413250312209, + "loss_iou": 0.4375, + "loss_num": 0.0272216796875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 53738980, + "step": 960 + }, + { + "epoch": 2.140311804008909, + "grad_norm": 26.12520980834961, + "learning_rate": 1e-06, + "loss": 1.0853, + "num_input_tokens_seen": 53795408, + "step": 961 + }, + { + "epoch": 2.140311804008909, + "loss": 1.0113427639007568, + "loss_ce": 0.00035642064176499844, + "loss_iou": 0.42578125, + "loss_num": 0.0322265625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 53795408, + "step": 961 + }, + { + "epoch": 2.1425389755011137, + "grad_norm": 25.219985961914062, + "learning_rate": 1e-06, + "loss": 1.0355, + "num_input_tokens_seen": 53849140, + "step": 962 + }, + { + "epoch": 2.1425389755011137, + "loss": 0.8350330591201782, + "loss_ce": 0.0008045152062550187, + "loss_iou": 0.33984375, + "loss_num": 0.030517578125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 53849140, + "step": 962 + }, + { + "epoch": 2.1447661469933186, + "grad_norm": 22.237886428833008, + "learning_rate": 1e-06, + "loss": 1.0859, + "num_input_tokens_seen": 53904680, + "step": 963 + }, + { + "epoch": 2.1447661469933186, + "loss": 0.9965952634811401, + "loss_ce": 0.000501530768815428, + "loss_iou": 0.41015625, + "loss_num": 0.034912109375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 53904680, + "step": 963 + }, + { + "epoch": 2.1469933184855234, + "grad_norm": 27.480417251586914, + "learning_rate": 1e-06, + "loss": 1.0019, + "num_input_tokens_seen": 53962276, + "step": 964 + }, + { + "epoch": 2.1469933184855234, + "loss": 0.9119249582290649, + "loss_ce": 0.0007921320502646267, + "loss_iou": 0.373046875, + "loss_num": 0.033203125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 53962276, + "step": 964 + }, + { + "epoch": 2.1492204899777283, + "grad_norm": 16.98265266418457, + "learning_rate": 1e-06, + "loss": 0.9955, + "num_input_tokens_seen": 54015980, + "step": 965 + }, + { + "epoch": 2.1492204899777283, + "loss": 0.8996407985687256, + "loss_ce": 0.0004708755586761981, + "loss_iou": 0.376953125, + "loss_num": 0.029052734375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 54015980, + "step": 965 + }, + { + "epoch": 2.151447661469933, + "grad_norm": 17.36399269104004, + "learning_rate": 1e-06, + "loss": 0.9589, + "num_input_tokens_seen": 54072540, + "step": 966 + }, + { + "epoch": 2.151447661469933, + "loss": 0.8351460099220276, + "loss_ce": 0.00042922317516058683, + "loss_iou": 0.341796875, + "loss_num": 0.0301513671875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 54072540, + "step": 966 + }, + { + "epoch": 2.153674832962138, + "grad_norm": 25.51936149597168, + "learning_rate": 1e-06, + "loss": 1.0361, + "num_input_tokens_seen": 54128332, + "step": 967 + }, + { + "epoch": 2.153674832962138, + "loss": 0.8526254892349243, + "loss_ce": 0.0005747509421780705, + "loss_iou": 0.365234375, + "loss_num": 0.02392578125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 54128332, + "step": 967 + }, + { + "epoch": 2.155902004454343, + "grad_norm": 25.46952247619629, + "learning_rate": 1e-06, + "loss": 1.2554, + "num_input_tokens_seen": 54184776, + "step": 968 + }, + { + "epoch": 2.155902004454343, + "loss": 1.3332043886184692, + "loss_ce": 0.0011730894912034273, + "loss_iou": 0.52734375, + "loss_num": 0.05615234375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 54184776, + "step": 968 + }, + { + "epoch": 2.1581291759465477, + "grad_norm": 16.16930389404297, + "learning_rate": 1e-06, + "loss": 0.8004, + "num_input_tokens_seen": 54241804, + "step": 969 + }, + { + "epoch": 2.1581291759465477, + "loss": 0.612720787525177, + "loss_ce": 0.006275475956499577, + "loss_iou": 0.2412109375, + "loss_num": 0.0245361328125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 54241804, + "step": 969 + }, + { + "epoch": 2.1603563474387526, + "grad_norm": 12.317280769348145, + "learning_rate": 1e-06, + "loss": 0.7401, + "num_input_tokens_seen": 54297804, + "step": 970 + }, + { + "epoch": 2.1603563474387526, + "loss": 0.8193256855010986, + "loss_ce": 0.00047803280176594853, + "loss_iou": 0.349609375, + "loss_num": 0.0240478515625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 54297804, + "step": 970 + }, + { + "epoch": 2.1625835189309575, + "grad_norm": 25.277143478393555, + "learning_rate": 1e-06, + "loss": 0.9579, + "num_input_tokens_seen": 54352416, + "step": 971 + }, + { + "epoch": 2.1625835189309575, + "loss": 1.0242624282836914, + "loss_ce": 0.0003366165910847485, + "loss_iou": 0.435546875, + "loss_num": 0.030517578125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 54352416, + "step": 971 + }, + { + "epoch": 2.1648106904231628, + "grad_norm": 23.600387573242188, + "learning_rate": 1e-06, + "loss": 1.0226, + "num_input_tokens_seen": 54408836, + "step": 972 + }, + { + "epoch": 2.1648106904231628, + "loss": 0.9383513927459717, + "loss_ce": 0.0003631227882578969, + "loss_iou": 0.392578125, + "loss_num": 0.0303955078125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 54408836, + "step": 972 + }, + { + "epoch": 2.1670378619153676, + "grad_norm": 28.22263526916504, + "learning_rate": 1e-06, + "loss": 1.102, + "num_input_tokens_seen": 54461188, + "step": 973 + }, + { + "epoch": 2.1670378619153676, + "loss": 1.2241106033325195, + "loss_ce": 0.0004167944425716996, + "loss_iou": 0.498046875, + "loss_num": 0.04541015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 54461188, + "step": 973 + }, + { + "epoch": 2.1692650334075725, + "grad_norm": 23.0220947265625, + "learning_rate": 1e-06, + "loss": 1.0596, + "num_input_tokens_seen": 54516224, + "step": 974 + }, + { + "epoch": 2.1692650334075725, + "loss": 1.1534892320632935, + "loss_ce": 0.0006571857957169414, + "loss_iou": 0.47265625, + "loss_num": 0.04150390625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 54516224, + "step": 974 + }, + { + "epoch": 2.1714922048997773, + "grad_norm": 50.8327751159668, + "learning_rate": 1e-06, + "loss": 0.962, + "num_input_tokens_seen": 54571536, + "step": 975 + }, + { + "epoch": 2.1714922048997773, + "loss": 0.8975195288658142, + "loss_ce": 0.0004247867036610842, + "loss_iou": 0.3828125, + "loss_num": 0.0264892578125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 54571536, + "step": 975 + }, + { + "epoch": 2.173719376391982, + "grad_norm": 18.573394775390625, + "learning_rate": 1e-06, + "loss": 1.0714, + "num_input_tokens_seen": 54624220, + "step": 976 + }, + { + "epoch": 2.173719376391982, + "loss": 0.7970374822616577, + "loss_ce": 0.0011390313738957047, + "loss_iou": 0.3046875, + "loss_num": 0.037109375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 54624220, + "step": 976 + }, + { + "epoch": 2.175946547884187, + "grad_norm": 26.248809814453125, + "learning_rate": 1e-06, + "loss": 0.9597, + "num_input_tokens_seen": 54681684, + "step": 977 + }, + { + "epoch": 2.175946547884187, + "loss": 0.7498317956924438, + "loss_ce": 0.0003200596256647259, + "loss_iou": 0.314453125, + "loss_num": 0.02392578125, + "loss_xval": 0.75, + "num_input_tokens_seen": 54681684, + "step": 977 + }, + { + "epoch": 2.178173719376392, + "grad_norm": 17.984722137451172, + "learning_rate": 1e-06, + "loss": 0.8119, + "num_input_tokens_seen": 54739128, + "step": 978 + }, + { + "epoch": 2.178173719376392, + "loss": 0.8240878582000732, + "loss_ce": 0.00035743031185120344, + "loss_iou": 0.345703125, + "loss_num": 0.0263671875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 54739128, + "step": 978 + }, + { + "epoch": 2.180400890868597, + "grad_norm": 16.49453353881836, + "learning_rate": 1e-06, + "loss": 1.1017, + "num_input_tokens_seen": 54795068, + "step": 979 + }, + { + "epoch": 2.180400890868597, + "loss": 1.3098089694976807, + "loss_ce": 0.0004827585944440216, + "loss_iou": 0.546875, + "loss_num": 0.042724609375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 54795068, + "step": 979 + }, + { + "epoch": 2.1826280623608016, + "grad_norm": 25.356178283691406, + "learning_rate": 1e-06, + "loss": 0.9565, + "num_input_tokens_seen": 54851036, + "step": 980 + }, + { + "epoch": 2.1826280623608016, + "loss": 0.794826328754425, + "loss_ce": 0.0003927270008716732, + "loss_iou": 0.3515625, + "loss_num": 0.018310546875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 54851036, + "step": 980 + }, + { + "epoch": 2.1848552338530065, + "grad_norm": 20.565073013305664, + "learning_rate": 1e-06, + "loss": 0.8048, + "num_input_tokens_seen": 54903052, + "step": 981 + }, + { + "epoch": 2.1848552338530065, + "loss": 0.7406924366950989, + "loss_ce": 0.00045808005961589515, + "loss_iou": 0.318359375, + "loss_num": 0.020751953125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 54903052, + "step": 981 + }, + { + "epoch": 2.187082405345212, + "grad_norm": 25.87790870666504, + "learning_rate": 1e-06, + "loss": 0.8178, + "num_input_tokens_seen": 54960984, + "step": 982 + }, + { + "epoch": 2.187082405345212, + "loss": 0.7066792249679565, + "loss_ce": 0.0006245420081540942, + "loss_iou": 0.294921875, + "loss_num": 0.0235595703125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 54960984, + "step": 982 + }, + { + "epoch": 2.1893095768374167, + "grad_norm": 19.50031089782715, + "learning_rate": 1e-06, + "loss": 1.1593, + "num_input_tokens_seen": 55017748, + "step": 983 + }, + { + "epoch": 2.1893095768374167, + "loss": 0.988908588886261, + "loss_ce": 0.0003832111251540482, + "loss_iou": 0.423828125, + "loss_num": 0.028076171875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 55017748, + "step": 983 + }, + { + "epoch": 2.1915367483296215, + "grad_norm": 16.748844146728516, + "learning_rate": 1e-06, + "loss": 0.8003, + "num_input_tokens_seen": 55074808, + "step": 984 + }, + { + "epoch": 2.1915367483296215, + "loss": 0.9688905477523804, + "loss_ce": 0.00038473017048090696, + "loss_iou": 0.419921875, + "loss_num": 0.0262451171875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 55074808, + "step": 984 + }, + { + "epoch": 2.1937639198218264, + "grad_norm": 68.782958984375, + "learning_rate": 1e-06, + "loss": 1.0795, + "num_input_tokens_seen": 55131672, + "step": 985 + }, + { + "epoch": 2.1937639198218264, + "loss": 0.8397893309593201, + "loss_ce": 0.0006780020194128156, + "loss_iou": 0.353515625, + "loss_num": 0.02685546875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 55131672, + "step": 985 + }, + { + "epoch": 2.1959910913140313, + "grad_norm": 21.74385643005371, + "learning_rate": 1e-06, + "loss": 0.966, + "num_input_tokens_seen": 55186240, + "step": 986 + }, + { + "epoch": 2.1959910913140313, + "loss": 1.022130012512207, + "loss_ce": 0.005528404843062162, + "loss_iou": 0.404296875, + "loss_num": 0.04150390625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 55186240, + "step": 986 + }, + { + "epoch": 2.198218262806236, + "grad_norm": 27.58509063720703, + "learning_rate": 1e-06, + "loss": 1.0463, + "num_input_tokens_seen": 55243732, + "step": 987 + }, + { + "epoch": 2.198218262806236, + "loss": 1.0700922012329102, + "loss_ce": 0.0005121089052408934, + "loss_iou": 0.4375, + "loss_num": 0.038330078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 55243732, + "step": 987 + }, + { + "epoch": 2.200445434298441, + "grad_norm": 19.08161735534668, + "learning_rate": 1e-06, + "loss": 1.0416, + "num_input_tokens_seen": 55300384, + "step": 988 + }, + { + "epoch": 2.200445434298441, + "loss": 1.1714062690734863, + "loss_ce": 0.0007519207429140806, + "loss_iou": 0.4375, + "loss_num": 0.0595703125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 55300384, + "step": 988 + }, + { + "epoch": 2.202672605790646, + "grad_norm": 21.197906494140625, + "learning_rate": 1e-06, + "loss": 0.9063, + "num_input_tokens_seen": 55355972, + "step": 989 + }, + { + "epoch": 2.202672605790646, + "loss": 0.8070752024650574, + "loss_ce": 0.0011670144740492105, + "loss_iou": 0.330078125, + "loss_num": 0.02880859375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 55355972, + "step": 989 + }, + { + "epoch": 2.2048997772828507, + "grad_norm": 57.792137145996094, + "learning_rate": 1e-06, + "loss": 0.7107, + "num_input_tokens_seen": 55411904, + "step": 990 + }, + { + "epoch": 2.2048997772828507, + "loss": 0.8724934458732605, + "loss_ce": 0.00042314609163440764, + "loss_iou": 0.357421875, + "loss_num": 0.03125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 55411904, + "step": 990 + }, + { + "epoch": 2.2071269487750556, + "grad_norm": 18.81410026550293, + "learning_rate": 1e-06, + "loss": 0.921, + "num_input_tokens_seen": 55469032, + "step": 991 + }, + { + "epoch": 2.2071269487750556, + "loss": 0.5198028087615967, + "loss_ce": 0.0003936740104109049, + "loss_iou": 0.2275390625, + "loss_num": 0.01287841796875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 55469032, + "step": 991 + }, + { + "epoch": 2.2093541202672604, + "grad_norm": 19.535879135131836, + "learning_rate": 1e-06, + "loss": 0.8962, + "num_input_tokens_seen": 55528000, + "step": 992 + }, + { + "epoch": 2.2093541202672604, + "loss": 1.013750433921814, + "loss_ce": 0.0005668357480317354, + "loss_iou": 0.423828125, + "loss_num": 0.033203125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 55528000, + "step": 992 + }, + { + "epoch": 2.2115812917594653, + "grad_norm": 19.903423309326172, + "learning_rate": 1e-06, + "loss": 1.1321, + "num_input_tokens_seen": 55585248, + "step": 993 + }, + { + "epoch": 2.2115812917594653, + "loss": 1.1713190078735352, + "loss_ce": 0.0009089382365345955, + "loss_iou": 0.486328125, + "loss_num": 0.03955078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 55585248, + "step": 993 + }, + { + "epoch": 2.2138084632516706, + "grad_norm": 20.668163299560547, + "learning_rate": 1e-06, + "loss": 1.093, + "num_input_tokens_seen": 55643640, + "step": 994 + }, + { + "epoch": 2.2138084632516706, + "loss": 1.0199007987976074, + "loss_ce": 0.0006136804004199803, + "loss_iou": 0.400390625, + "loss_num": 0.04345703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 55643640, + "step": 994 + }, + { + "epoch": 2.2160356347438754, + "grad_norm": 14.16650676727295, + "learning_rate": 1e-06, + "loss": 0.8074, + "num_input_tokens_seen": 55699880, + "step": 995 + }, + { + "epoch": 2.2160356347438754, + "loss": 0.7972851991653442, + "loss_ce": 0.0004102342645637691, + "loss_iou": 0.330078125, + "loss_num": 0.0274658203125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 55699880, + "step": 995 + }, + { + "epoch": 2.2182628062360803, + "grad_norm": 21.781885147094727, + "learning_rate": 1e-06, + "loss": 1.2838, + "num_input_tokens_seen": 55757468, + "step": 996 + }, + { + "epoch": 2.2182628062360803, + "loss": 1.5526351928710938, + "loss_ce": 0.0008773244917392731, + "loss_iou": 0.625, + "loss_num": 0.060546875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 55757468, + "step": 996 + }, + { + "epoch": 2.220489977728285, + "grad_norm": 13.922673225402832, + "learning_rate": 1e-06, + "loss": 0.8959, + "num_input_tokens_seen": 55811116, + "step": 997 + }, + { + "epoch": 2.220489977728285, + "loss": 0.8420233130455017, + "loss_ce": 0.00047059194184839725, + "loss_iou": 0.3515625, + "loss_num": 0.02783203125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 55811116, + "step": 997 + }, + { + "epoch": 2.22271714922049, + "grad_norm": 31.61424446105957, + "learning_rate": 1e-06, + "loss": 1.0489, + "num_input_tokens_seen": 55867056, + "step": 998 + }, + { + "epoch": 2.22271714922049, + "loss": 0.9875812530517578, + "loss_ce": 0.0016193470219150186, + "loss_iou": 0.4140625, + "loss_num": 0.031982421875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 55867056, + "step": 998 + }, + { + "epoch": 2.224944320712695, + "grad_norm": 18.81981658935547, + "learning_rate": 1e-06, + "loss": 1.1018, + "num_input_tokens_seen": 55922508, + "step": 999 + }, + { + "epoch": 2.224944320712695, + "loss": 1.031872272491455, + "loss_ce": 0.0011104578152298927, + "loss_iou": 0.41796875, + "loss_num": 0.0390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 55922508, + "step": 999 + }, + { + "epoch": 2.2271714922048997, + "grad_norm": 30.99945640563965, + "learning_rate": 1e-06, + "loss": 0.8712, + "num_input_tokens_seen": 55979796, + "step": 1000 + }, + { + "epoch": 2.2271714922048997, + "eval_seeclick_web_CIoU": 0.5659106969833374, + "eval_seeclick_web_GIoU": 0.5565789341926575, + "eval_seeclick_web_IoU": 0.5834561288356781, + "eval_seeclick_web_MAE_all": 0.016565547324717045, + "eval_seeclick_web_MAE_h": 0.010058594401925802, + "eval_seeclick_web_MAE_w": 0.016721216030418873, + "eval_seeclick_web_MAE_x_boxes": 0.009843998588621616, + "eval_seeclick_web_MAE_y_boxes": 0.02248636749573052, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9639145135879517, + "eval_seeclick_web_loss_ce": 0.0004584374401019886, + "eval_seeclick_web_loss_iou": 0.4400634765625, + "eval_seeclick_web_loss_num": 0.013164520263671875, + "eval_seeclick_web_loss_xval": 0.946044921875, + "eval_seeclick_web_runtime": 18.6045, + "eval_seeclick_web_samples_per_second": 2.688, + "eval_seeclick_web_steps_per_second": 0.108, + "num_input_tokens_seen": 55979796, + "step": 1000 + }, + { + "epoch": 2.2271714922048997, + "eval_icons_CIoU": 0.3126373589038849, + "eval_icons_GIoU": 0.3512195348739624, + "eval_icons_IoU": 0.38992173969745636, + "eval_icons_MAE_all": 0.06972917914390564, + "eval_icons_MAE_h": 0.035466980654746294, + "eval_icons_MAE_w": 0.07942605763673782, + "eval_icons_MAE_x_boxes": 0.06474643759429455, + "eval_icons_MAE_y_boxes": 0.03778674267232418, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.6989706754684448, + "eval_icons_loss_ce": 0.0012570468825288117, + "eval_icons_loss_iou": 0.6531982421875, + "eval_icons_loss_num": 0.06581878662109375, + "eval_icons_loss_xval": 1.634765625, + "eval_icons_runtime": 18.1838, + "eval_icons_samples_per_second": 2.75, + "eval_icons_steps_per_second": 0.11, + "num_input_tokens_seen": 55979796, + "step": 1000 + }, + { + "epoch": 2.2271714922048997, + "eval_screenspot_CIoU": 0.30459535121917725, + "eval_screenspot_GIoU": 0.326509823401769, + "eval_screenspot_IoU": 0.38954149683316547, + "eval_screenspot_MAE_all": 0.0834270715713501, + "eval_screenspot_MAE_h": 0.04727879042426745, + "eval_screenspot_MAE_w": 0.07983010758956273, + "eval_screenspot_MAE_x_boxes": 0.12237335244814555, + "eval_screenspot_MAE_y_boxes": 0.0545121505856514, + "eval_screenspot_inside_bbox": 0.6045833428700765, + "eval_screenspot_loss": 1.8311774730682373, + "eval_screenspot_loss_ce": 0.002042266229788462, + "eval_screenspot_loss_iou": 0.721923828125, + "eval_screenspot_loss_num": 0.09384409586588542, + "eval_screenspot_loss_xval": 1.9124348958333333, + "eval_screenspot_runtime": 27.92, + "eval_screenspot_samples_per_second": 3.188, + "eval_screenspot_steps_per_second": 0.107, + "num_input_tokens_seen": 55979796, + "step": 1000 + }, + { + "epoch": 2.2271714922048997, + "eval_compot_CIoU": 0.3376414477825165, + "eval_compot_GIoU": 0.3708181381225586, + "eval_compot_IoU": 0.40139785408973694, + "eval_compot_MAE_all": 0.02684677764773369, + "eval_compot_MAE_h": 0.011378975585103035, + "eval_compot_MAE_w": 0.037089540623128414, + "eval_compot_MAE_x_boxes": 0.034720015712082386, + "eval_compot_MAE_y_boxes": 0.007318113464862108, + "eval_compot_inside_bbox": 0.5868055522441864, + "eval_compot_loss": 1.426712989807129, + "eval_compot_loss_ce": 0.0004611587501130998, + "eval_compot_loss_iou": 0.6314697265625, + "eval_compot_loss_num": 0.024566650390625, + "eval_compot_loss_xval": 1.38525390625, + "eval_compot_runtime": 18.3333, + "eval_compot_samples_per_second": 2.727, + "eval_compot_steps_per_second": 0.109, + "num_input_tokens_seen": 55979796, + "step": 1000 + }, + { + "epoch": 2.2271714922048997, + "eval_custom_ui_val_CIoU": 0.411270619266563, + "eval_custom_ui_val_GIoU": 0.4448644651307, + "eval_custom_ui_val_IoU": 0.4744107557667626, + "eval_custom_ui_val_MAE_all": 0.040074211441808276, + "eval_custom_ui_val_MAE_h": 0.023370659496221278, + "eval_custom_ui_val_MAE_w": 0.04196054426332315, + "eval_custom_ui_val_MAE_x_boxes": 0.04563241203625997, + "eval_custom_ui_val_MAE_y_boxes": 0.02151624123669333, + "eval_custom_ui_val_inside_bbox": 0.6712962985038757, + "eval_custom_ui_val_loss": 1.345447063446045, + "eval_custom_ui_val_loss_ce": 0.0011988434901771445, + "eval_custom_ui_val_loss_iou": 0.5582411024305556, + "eval_custom_ui_val_loss_num": 0.03997251722547743, + "eval_custom_ui_val_loss_xval": 1.3163519965277777, + "eval_custom_ui_val_runtime": 55.6004, + "eval_custom_ui_val_samples_per_second": 4.766, + "eval_custom_ui_val_steps_per_second": 0.162, + "num_input_tokens_seen": 55979796, + "step": 1000 + }, + { + "epoch": 2.2271714922048997, + "loss": 1.0719767808914185, + "loss_ce": 0.000687746680341661, + "loss_iou": 0.45703125, + "loss_num": 0.03125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 55979796, + "step": 1000 + }, + { + "epoch": 2.2293986636971046, + "grad_norm": 23.139463424682617, + "learning_rate": 1e-06, + "loss": 1.2981, + "num_input_tokens_seen": 56034828, + "step": 1001 + }, + { + "epoch": 2.2293986636971046, + "loss": 1.173054575920105, + "loss_ce": 0.0004471880674827844, + "loss_iou": 0.484375, + "loss_num": 0.04052734375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 56034828, + "step": 1001 + }, + { + "epoch": 2.2316258351893095, + "grad_norm": 14.833909034729004, + "learning_rate": 1e-06, + "loss": 0.7852, + "num_input_tokens_seen": 56090360, + "step": 1002 + }, + { + "epoch": 2.2316258351893095, + "loss": 0.7579194903373718, + "loss_ce": 0.00035115116043016315, + "loss_iou": 0.32421875, + "loss_num": 0.0216064453125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 56090360, + "step": 1002 + }, + { + "epoch": 2.2338530066815143, + "grad_norm": 15.422577857971191, + "learning_rate": 1e-06, + "loss": 0.9412, + "num_input_tokens_seen": 56146060, + "step": 1003 + }, + { + "epoch": 2.2338530066815143, + "loss": 1.1035232543945312, + "loss_ce": 0.0006180062773637474, + "loss_iou": 0.474609375, + "loss_num": 0.030517578125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 56146060, + "step": 1003 + }, + { + "epoch": 2.236080178173719, + "grad_norm": 18.446163177490234, + "learning_rate": 1e-06, + "loss": 0.7755, + "num_input_tokens_seen": 56202244, + "step": 1004 + }, + { + "epoch": 2.236080178173719, + "loss": 0.8598674535751343, + "loss_ce": 0.000492453167680651, + "loss_iou": 0.349609375, + "loss_num": 0.031982421875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 56202244, + "step": 1004 + }, + { + "epoch": 2.2383073496659245, + "grad_norm": 15.064460754394531, + "learning_rate": 1e-06, + "loss": 0.9762, + "num_input_tokens_seen": 56257360, + "step": 1005 + }, + { + "epoch": 2.2383073496659245, + "loss": 1.0436865091323853, + "loss_ce": 0.0007178318337537348, + "loss_iou": 0.404296875, + "loss_num": 0.047119140625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 56257360, + "step": 1005 + }, + { + "epoch": 2.2405345211581293, + "grad_norm": 37.368812561035156, + "learning_rate": 1e-06, + "loss": 1.0937, + "num_input_tokens_seen": 56309892, + "step": 1006 + }, + { + "epoch": 2.2405345211581293, + "loss": 0.8646106123924255, + "loss_ce": 0.00035280632437206805, + "loss_iou": 0.3671875, + "loss_num": 0.0257568359375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 56309892, + "step": 1006 + }, + { + "epoch": 2.242761692650334, + "grad_norm": 20.58150863647461, + "learning_rate": 1e-06, + "loss": 1.0268, + "num_input_tokens_seen": 56365936, + "step": 1007 + }, + { + "epoch": 2.242761692650334, + "loss": 1.112687587738037, + "loss_ce": 0.00038295358535833657, + "loss_iou": 0.443359375, + "loss_num": 0.044921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 56365936, + "step": 1007 + }, + { + "epoch": 2.244988864142539, + "grad_norm": 19.283090591430664, + "learning_rate": 1e-06, + "loss": 0.9734, + "num_input_tokens_seen": 56422576, + "step": 1008 + }, + { + "epoch": 2.244988864142539, + "loss": 1.0260212421417236, + "loss_ce": 0.00038648463669233024, + "loss_iou": 0.4453125, + "loss_num": 0.02685546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 56422576, + "step": 1008 + }, + { + "epoch": 2.247216035634744, + "grad_norm": 15.9085111618042, + "learning_rate": 1e-06, + "loss": 1.1175, + "num_input_tokens_seen": 56481320, + "step": 1009 + }, + { + "epoch": 2.247216035634744, + "loss": 0.8886697888374329, + "loss_ce": 0.0004862197383772582, + "loss_iou": 0.33203125, + "loss_num": 0.04443359375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 56481320, + "step": 1009 + }, + { + "epoch": 2.249443207126949, + "grad_norm": 20.693801879882812, + "learning_rate": 1e-06, + "loss": 0.8815, + "num_input_tokens_seen": 56537736, + "step": 1010 + }, + { + "epoch": 2.249443207126949, + "loss": 0.7114344835281372, + "loss_ce": 0.000619084108620882, + "loss_iou": 0.294921875, + "loss_num": 0.024658203125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 56537736, + "step": 1010 + }, + { + "epoch": 2.2516703786191536, + "grad_norm": 16.531198501586914, + "learning_rate": 1e-06, + "loss": 1.0092, + "num_input_tokens_seen": 56594736, + "step": 1011 + }, + { + "epoch": 2.2516703786191536, + "loss": 0.9720375537872314, + "loss_ce": 0.0003578157047741115, + "loss_iou": 0.419921875, + "loss_num": 0.0264892578125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 56594736, + "step": 1011 + }, + { + "epoch": 2.2538975501113585, + "grad_norm": 36.781532287597656, + "learning_rate": 1e-06, + "loss": 1.0224, + "num_input_tokens_seen": 56651788, + "step": 1012 + }, + { + "epoch": 2.2538975501113585, + "loss": 1.0697126388549805, + "loss_ce": 0.00037672443431802094, + "loss_iou": 0.462890625, + "loss_num": 0.0281982421875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 56651788, + "step": 1012 + }, + { + "epoch": 2.2561247216035634, + "grad_norm": 18.51349449157715, + "learning_rate": 1e-06, + "loss": 0.7345, + "num_input_tokens_seen": 56707000, + "step": 1013 + }, + { + "epoch": 2.2561247216035634, + "loss": 0.8490347862243652, + "loss_ce": 0.000401980709284544, + "loss_iou": 0.3828125, + "loss_num": 0.0166015625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 56707000, + "step": 1013 + }, + { + "epoch": 2.2583518930957682, + "grad_norm": 23.15570068359375, + "learning_rate": 1e-06, + "loss": 1.0084, + "num_input_tokens_seen": 56761980, + "step": 1014 + }, + { + "epoch": 2.2583518930957682, + "loss": 0.8266038298606873, + "loss_ce": 0.0005540476413443685, + "loss_iou": 0.34375, + "loss_num": 0.02734375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 56761980, + "step": 1014 + }, + { + "epoch": 2.260579064587973, + "grad_norm": 34.847137451171875, + "learning_rate": 1e-06, + "loss": 1.0461, + "num_input_tokens_seen": 56820908, + "step": 1015 + }, + { + "epoch": 2.260579064587973, + "loss": 0.9271408319473267, + "loss_ce": 0.0006271736929193139, + "loss_iou": 0.36328125, + "loss_num": 0.0400390625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 56820908, + "step": 1015 + }, + { + "epoch": 2.262806236080178, + "grad_norm": 34.261474609375, + "learning_rate": 1e-06, + "loss": 1.0021, + "num_input_tokens_seen": 56877908, + "step": 1016 + }, + { + "epoch": 2.262806236080178, + "loss": 0.912013590335846, + "loss_ce": 0.00039250371628440917, + "loss_iou": 0.384765625, + "loss_num": 0.028076171875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 56877908, + "step": 1016 + }, + { + "epoch": 2.2650334075723833, + "grad_norm": 30.62000846862793, + "learning_rate": 1e-06, + "loss": 1.1779, + "num_input_tokens_seen": 56933112, + "step": 1017 + }, + { + "epoch": 2.2650334075723833, + "loss": 1.2571220397949219, + "loss_ce": 0.0012625895906239748, + "loss_iou": 0.5078125, + "loss_num": 0.04736328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 56933112, + "step": 1017 + }, + { + "epoch": 2.267260579064588, + "grad_norm": 21.855445861816406, + "learning_rate": 1e-06, + "loss": 0.8362, + "num_input_tokens_seen": 56988708, + "step": 1018 + }, + { + "epoch": 2.267260579064588, + "loss": 0.8795033097267151, + "loss_ce": 0.00035289014340378344, + "loss_iou": 0.380859375, + "loss_num": 0.023193359375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 56988708, + "step": 1018 + }, + { + "epoch": 2.269487750556793, + "grad_norm": 26.169057846069336, + "learning_rate": 1e-06, + "loss": 0.6786, + "num_input_tokens_seen": 57046476, + "step": 1019 + }, + { + "epoch": 2.269487750556793, + "loss": 0.7506150007247925, + "loss_ce": 0.00037088984390720725, + "loss_iou": 0.328125, + "loss_num": 0.0191650390625, + "loss_xval": 0.75, + "num_input_tokens_seen": 57046476, + "step": 1019 + }, + { + "epoch": 2.271714922048998, + "grad_norm": 13.190043449401855, + "learning_rate": 1e-06, + "loss": 0.6864, + "num_input_tokens_seen": 57104372, + "step": 1020 + }, + { + "epoch": 2.271714922048998, + "loss": 0.7020950317382812, + "loss_ce": 0.0004348924267105758, + "loss_iou": 0.298828125, + "loss_num": 0.0211181640625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 57104372, + "step": 1020 + }, + { + "epoch": 2.2739420935412027, + "grad_norm": 16.335966110229492, + "learning_rate": 1e-06, + "loss": 0.7953, + "num_input_tokens_seen": 57159292, + "step": 1021 + }, + { + "epoch": 2.2739420935412027, + "loss": 0.8143704533576965, + "loss_ce": 0.0004056024190504104, + "loss_iou": 0.359375, + "loss_num": 0.01904296875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 57159292, + "step": 1021 + }, + { + "epoch": 2.2761692650334076, + "grad_norm": 27.681427001953125, + "learning_rate": 1e-06, + "loss": 0.9381, + "num_input_tokens_seen": 57216432, + "step": 1022 + }, + { + "epoch": 2.2761692650334076, + "loss": 0.8490380048751831, + "loss_ce": 0.0004051811702083796, + "loss_iou": 0.369140625, + "loss_num": 0.0223388671875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 57216432, + "step": 1022 + }, + { + "epoch": 2.2783964365256124, + "grad_norm": 19.292463302612305, + "learning_rate": 1e-06, + "loss": 0.9677, + "num_input_tokens_seen": 57273288, + "step": 1023 + }, + { + "epoch": 2.2783964365256124, + "loss": 0.9046714305877686, + "loss_ce": 0.000374538212781772, + "loss_iou": 0.3515625, + "loss_num": 0.04052734375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 57273288, + "step": 1023 + }, + { + "epoch": 2.2806236080178173, + "grad_norm": 22.14390754699707, + "learning_rate": 1e-06, + "loss": 0.8592, + "num_input_tokens_seen": 57329664, + "step": 1024 + }, + { + "epoch": 2.2806236080178173, + "loss": 1.0152591466903687, + "loss_ce": 0.00036650500260293484, + "loss_iou": 0.427734375, + "loss_num": 0.031982421875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 57329664, + "step": 1024 + }, + { + "epoch": 2.282850779510022, + "grad_norm": 27.401857376098633, + "learning_rate": 1e-06, + "loss": 0.936, + "num_input_tokens_seen": 57384724, + "step": 1025 + }, + { + "epoch": 2.282850779510022, + "loss": 0.8641769886016846, + "loss_ce": 0.00040744812577031553, + "loss_iou": 0.357421875, + "loss_num": 0.0294189453125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 57384724, + "step": 1025 + }, + { + "epoch": 2.285077951002227, + "grad_norm": 13.289790153503418, + "learning_rate": 1e-06, + "loss": 0.8172, + "num_input_tokens_seen": 57441660, + "step": 1026 + }, + { + "epoch": 2.285077951002227, + "loss": 0.7961374521255493, + "loss_ce": 0.0017038530204445124, + "loss_iou": 0.30859375, + "loss_num": 0.03515625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 57441660, + "step": 1026 + }, + { + "epoch": 2.2873051224944323, + "grad_norm": 14.736815452575684, + "learning_rate": 1e-06, + "loss": 0.8734, + "num_input_tokens_seen": 57497360, + "step": 1027 + }, + { + "epoch": 2.2873051224944323, + "loss": 0.8420823216438293, + "loss_ce": 0.0005296375602483749, + "loss_iou": 0.345703125, + "loss_num": 0.0299072265625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 57497360, + "step": 1027 + }, + { + "epoch": 2.289532293986637, + "grad_norm": 18.238637924194336, + "learning_rate": 1e-06, + "loss": 1.3031, + "num_input_tokens_seen": 57556032, + "step": 1028 + }, + { + "epoch": 2.289532293986637, + "loss": 1.1549606323242188, + "loss_ce": 0.0009079101146198809, + "loss_iou": 0.44140625, + "loss_num": 0.0546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 57556032, + "step": 1028 + }, + { + "epoch": 2.291759465478842, + "grad_norm": 20.993566513061523, + "learning_rate": 1e-06, + "loss": 0.9124, + "num_input_tokens_seen": 57610720, + "step": 1029 + }, + { + "epoch": 2.291759465478842, + "loss": 0.8001816272735596, + "loss_ce": 0.00031593156745657325, + "loss_iou": 0.345703125, + "loss_num": 0.0216064453125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 57610720, + "step": 1029 + }, + { + "epoch": 2.293986636971047, + "grad_norm": 31.266921997070312, + "learning_rate": 1e-06, + "loss": 0.9937, + "num_input_tokens_seen": 57669460, + "step": 1030 + }, + { + "epoch": 2.293986636971047, + "loss": 1.138893485069275, + "loss_ce": 0.0021747485734522343, + "loss_iou": 0.45703125, + "loss_num": 0.044921875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 57669460, + "step": 1030 + }, + { + "epoch": 2.2962138084632517, + "grad_norm": 29.804710388183594, + "learning_rate": 1e-06, + "loss": 0.9726, + "num_input_tokens_seen": 57726024, + "step": 1031 + }, + { + "epoch": 2.2962138084632517, + "loss": 1.0021300315856934, + "loss_ce": 0.00042098466656170785, + "loss_iou": 0.388671875, + "loss_num": 0.045654296875, + "loss_xval": 1.0, + "num_input_tokens_seen": 57726024, + "step": 1031 + }, + { + "epoch": 2.2984409799554566, + "grad_norm": 16.084012985229492, + "learning_rate": 1e-06, + "loss": 1.006, + "num_input_tokens_seen": 57783076, + "step": 1032 + }, + { + "epoch": 2.2984409799554566, + "loss": 1.1425867080688477, + "loss_ce": 0.0004969405708834529, + "loss_iou": 0.4609375, + "loss_num": 0.04443359375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 57783076, + "step": 1032 + }, + { + "epoch": 2.3006681514476615, + "grad_norm": 27.19474983215332, + "learning_rate": 1e-06, + "loss": 0.992, + "num_input_tokens_seen": 57837908, + "step": 1033 + }, + { + "epoch": 2.3006681514476615, + "loss": 0.9429985284805298, + "loss_ce": 0.0006157412426546216, + "loss_iou": 0.3828125, + "loss_num": 0.03564453125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 57837908, + "step": 1033 + }, + { + "epoch": 2.3028953229398663, + "grad_norm": 30.541584014892578, + "learning_rate": 1e-06, + "loss": 1.1811, + "num_input_tokens_seen": 57893948, + "step": 1034 + }, + { + "epoch": 2.3028953229398663, + "loss": 1.2314056158065796, + "loss_ce": 0.00044855731539428234, + "loss_iou": 0.490234375, + "loss_num": 0.049560546875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 57893948, + "step": 1034 + }, + { + "epoch": 2.305122494432071, + "grad_norm": 16.411218643188477, + "learning_rate": 1e-06, + "loss": 0.8391, + "num_input_tokens_seen": 57950452, + "step": 1035 + }, + { + "epoch": 2.305122494432071, + "loss": 0.8310631513595581, + "loss_ce": 0.0004967194981873035, + "loss_iou": 0.328125, + "loss_num": 0.034423828125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 57950452, + "step": 1035 + }, + { + "epoch": 2.307349665924276, + "grad_norm": 20.609291076660156, + "learning_rate": 1e-06, + "loss": 0.9038, + "num_input_tokens_seen": 58006408, + "step": 1036 + }, + { + "epoch": 2.307349665924276, + "loss": 0.7673088312149048, + "loss_ce": 0.0004631901392713189, + "loss_iou": 0.326171875, + "loss_num": 0.0230712890625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 58006408, + "step": 1036 + }, + { + "epoch": 2.309576837416481, + "grad_norm": 31.944068908691406, + "learning_rate": 1e-06, + "loss": 1.2189, + "num_input_tokens_seen": 58062244, + "step": 1037 + }, + { + "epoch": 2.309576837416481, + "loss": 0.9620157480239868, + "loss_ce": 0.0003457932034507394, + "loss_iou": 0.396484375, + "loss_num": 0.033447265625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 58062244, + "step": 1037 + }, + { + "epoch": 2.3118040089086858, + "grad_norm": 28.491863250732422, + "learning_rate": 1e-06, + "loss": 1.0784, + "num_input_tokens_seen": 58116528, + "step": 1038 + }, + { + "epoch": 2.3118040089086858, + "loss": 0.8677387237548828, + "loss_ce": 0.0003071002720389515, + "loss_iou": 0.359375, + "loss_num": 0.0299072265625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 58116528, + "step": 1038 + }, + { + "epoch": 2.3140311804008906, + "grad_norm": 16.422298431396484, + "learning_rate": 1e-06, + "loss": 1.1027, + "num_input_tokens_seen": 58172968, + "step": 1039 + }, + { + "epoch": 2.3140311804008906, + "loss": 1.250427484512329, + "loss_ce": 0.0006715654162690043, + "loss_iou": 0.482421875, + "loss_num": 0.05712890625, + "loss_xval": 1.25, + "num_input_tokens_seen": 58172968, + "step": 1039 + }, + { + "epoch": 2.316258351893096, + "grad_norm": 33.38667297363281, + "learning_rate": 1e-06, + "loss": 1.3044, + "num_input_tokens_seen": 58228600, + "step": 1040 + }, + { + "epoch": 2.316258351893096, + "loss": 1.3572330474853516, + "loss_ce": 0.00029941959655843675, + "loss_iou": 0.5703125, + "loss_num": 0.042724609375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 58228600, + "step": 1040 + }, + { + "epoch": 2.318485523385301, + "grad_norm": 25.701223373413086, + "learning_rate": 1e-06, + "loss": 0.8746, + "num_input_tokens_seen": 58284388, + "step": 1041 + }, + { + "epoch": 2.318485523385301, + "loss": 0.9398232102394104, + "loss_ce": 0.0006142100319266319, + "loss_iou": 0.412109375, + "loss_num": 0.0233154296875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 58284388, + "step": 1041 + }, + { + "epoch": 2.3207126948775056, + "grad_norm": 30.38446807861328, + "learning_rate": 1e-06, + "loss": 0.9229, + "num_input_tokens_seen": 58342404, + "step": 1042 + }, + { + "epoch": 2.3207126948775056, + "loss": 1.0379189252853394, + "loss_ce": 0.0005654151318594813, + "loss_iou": 0.43359375, + "loss_num": 0.033447265625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 58342404, + "step": 1042 + }, + { + "epoch": 2.3229398663697105, + "grad_norm": 26.981040954589844, + "learning_rate": 1e-06, + "loss": 1.1894, + "num_input_tokens_seen": 58398820, + "step": 1043 + }, + { + "epoch": 2.3229398663697105, + "loss": 1.1476168632507324, + "loss_ce": 0.0004000905028078705, + "loss_iou": 0.494140625, + "loss_num": 0.031494140625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 58398820, + "step": 1043 + }, + { + "epoch": 2.3251670378619154, + "grad_norm": 17.203502655029297, + "learning_rate": 1e-06, + "loss": 1.1498, + "num_input_tokens_seen": 58455324, + "step": 1044 + }, + { + "epoch": 2.3251670378619154, + "loss": 1.3662068843841553, + "loss_ce": 0.00048417344805784523, + "loss_iou": 0.578125, + "loss_num": 0.04248046875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 58455324, + "step": 1044 + }, + { + "epoch": 2.3273942093541202, + "grad_norm": 26.704832077026367, + "learning_rate": 1e-06, + "loss": 1.1157, + "num_input_tokens_seen": 58512188, + "step": 1045 + }, + { + "epoch": 2.3273942093541202, + "loss": 1.0862228870391846, + "loss_ce": 0.000529451877810061, + "loss_iou": 0.451171875, + "loss_num": 0.03662109375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 58512188, + "step": 1045 + }, + { + "epoch": 2.329621380846325, + "grad_norm": 18.440515518188477, + "learning_rate": 1e-06, + "loss": 0.9097, + "num_input_tokens_seen": 58570316, + "step": 1046 + }, + { + "epoch": 2.329621380846325, + "loss": 0.8449282050132751, + "loss_ce": 0.0004457966424524784, + "loss_iou": 0.35546875, + "loss_num": 0.0262451171875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 58570316, + "step": 1046 + }, + { + "epoch": 2.33184855233853, + "grad_norm": 15.037751197814941, + "learning_rate": 1e-06, + "loss": 0.7533, + "num_input_tokens_seen": 58628892, + "step": 1047 + }, + { + "epoch": 2.33184855233853, + "loss": 0.7818921804428101, + "loss_ce": 0.00039804144762456417, + "loss_iou": 0.353515625, + "loss_num": 0.01507568359375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 58628892, + "step": 1047 + }, + { + "epoch": 2.334075723830735, + "grad_norm": 19.363588333129883, + "learning_rate": 1e-06, + "loss": 0.8876, + "num_input_tokens_seen": 58685104, + "step": 1048 + }, + { + "epoch": 2.334075723830735, + "loss": 1.0165672302246094, + "loss_ce": 0.000453831598861143, + "loss_iou": 0.408203125, + "loss_num": 0.039794921875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 58685104, + "step": 1048 + }, + { + "epoch": 2.33630289532294, + "grad_norm": 28.217439651489258, + "learning_rate": 1e-06, + "loss": 1.1681, + "num_input_tokens_seen": 58738936, + "step": 1049 + }, + { + "epoch": 2.33630289532294, + "loss": 1.4371428489685059, + "loss_ce": 0.0006193204899318516, + "loss_iou": 0.55078125, + "loss_num": 0.06787109375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 58738936, + "step": 1049 + }, + { + "epoch": 2.338530066815145, + "grad_norm": 17.534543991088867, + "learning_rate": 1e-06, + "loss": 0.6902, + "num_input_tokens_seen": 58798988, + "step": 1050 + }, + { + "epoch": 2.338530066815145, + "loss": 0.47515368461608887, + "loss_ce": 0.00030017929384484887, + "loss_iou": 0.212890625, + "loss_num": 0.0098876953125, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 58798988, + "step": 1050 + }, + { + "epoch": 2.34075723830735, + "grad_norm": 18.249549865722656, + "learning_rate": 1e-06, + "loss": 0.9676, + "num_input_tokens_seen": 58854660, + "step": 1051 + }, + { + "epoch": 2.34075723830735, + "loss": 0.977979302406311, + "loss_ce": 0.0004402369959279895, + "loss_iou": 0.40234375, + "loss_num": 0.0341796875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 58854660, + "step": 1051 + }, + { + "epoch": 2.3429844097995547, + "grad_norm": 21.75814437866211, + "learning_rate": 1e-06, + "loss": 0.9833, + "num_input_tokens_seen": 58911232, + "step": 1052 + }, + { + "epoch": 2.3429844097995547, + "loss": 1.1573677062988281, + "loss_ce": 0.0005073855281807482, + "loss_iou": 0.484375, + "loss_num": 0.0380859375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 58911232, + "step": 1052 + }, + { + "epoch": 2.3452115812917596, + "grad_norm": 18.00012969970703, + "learning_rate": 1e-06, + "loss": 0.7443, + "num_input_tokens_seen": 58968392, + "step": 1053 + }, + { + "epoch": 2.3452115812917596, + "loss": 0.7719112634658813, + "loss_ce": 0.0004268469929229468, + "loss_iou": 0.314453125, + "loss_num": 0.0286865234375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 58968392, + "step": 1053 + }, + { + "epoch": 2.3474387527839644, + "grad_norm": 17.328489303588867, + "learning_rate": 1e-06, + "loss": 0.9562, + "num_input_tokens_seen": 59026080, + "step": 1054 + }, + { + "epoch": 2.3474387527839644, + "loss": 0.7599896192550659, + "loss_ce": 0.00046816159738227725, + "loss_iou": 0.330078125, + "loss_num": 0.019775390625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 59026080, + "step": 1054 + }, + { + "epoch": 2.3496659242761693, + "grad_norm": 24.498754501342773, + "learning_rate": 1e-06, + "loss": 1.0251, + "num_input_tokens_seen": 59082868, + "step": 1055 + }, + { + "epoch": 2.3496659242761693, + "loss": 1.0223811864852905, + "loss_ce": 0.0024837690871208906, + "loss_iou": 0.443359375, + "loss_num": 0.0267333984375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 59082868, + "step": 1055 + }, + { + "epoch": 2.351893095768374, + "grad_norm": 258.32928466796875, + "learning_rate": 1e-06, + "loss": 1.0218, + "num_input_tokens_seen": 59139812, + "step": 1056 + }, + { + "epoch": 2.351893095768374, + "loss": 0.9473181962966919, + "loss_ce": 0.0005408285651355982, + "loss_iou": 0.3828125, + "loss_num": 0.0361328125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 59139812, + "step": 1056 + }, + { + "epoch": 2.354120267260579, + "grad_norm": 41.9471435546875, + "learning_rate": 1e-06, + "loss": 0.9818, + "num_input_tokens_seen": 59195536, + "step": 1057 + }, + { + "epoch": 2.354120267260579, + "loss": 1.0145529508590698, + "loss_ce": 0.0003928400401491672, + "loss_iou": 0.44140625, + "loss_num": 0.02587890625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 59195536, + "step": 1057 + }, + { + "epoch": 2.356347438752784, + "grad_norm": 20.948766708374023, + "learning_rate": 1e-06, + "loss": 1.0111, + "num_input_tokens_seen": 59252604, + "step": 1058 + }, + { + "epoch": 2.356347438752784, + "loss": 1.2552516460418701, + "loss_ce": 0.0003688871511258185, + "loss_iou": 0.4921875, + "loss_num": 0.053955078125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 59252604, + "step": 1058 + }, + { + "epoch": 2.3585746102449887, + "grad_norm": 33.876991271972656, + "learning_rate": 1e-06, + "loss": 0.8549, + "num_input_tokens_seen": 59308408, + "step": 1059 + }, + { + "epoch": 2.3585746102449887, + "loss": 0.6373202204704285, + "loss_ce": 0.0003573211724869907, + "loss_iou": 0.251953125, + "loss_num": 0.0269775390625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 59308408, + "step": 1059 + }, + { + "epoch": 2.3608017817371936, + "grad_norm": 18.77747917175293, + "learning_rate": 1e-06, + "loss": 0.7226, + "num_input_tokens_seen": 59365048, + "step": 1060 + }, + { + "epoch": 2.3608017817371936, + "loss": 0.7437995076179504, + "loss_ce": 0.0008795711910352111, + "loss_iou": 0.31640625, + "loss_num": 0.0220947265625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 59365048, + "step": 1060 + }, + { + "epoch": 2.3630289532293984, + "grad_norm": 28.6199893951416, + "learning_rate": 1e-06, + "loss": 1.1339, + "num_input_tokens_seen": 59421260, + "step": 1061 + }, + { + "epoch": 2.3630289532293984, + "loss": 0.9527925848960876, + "loss_ce": 0.000644176616333425, + "loss_iou": 0.396484375, + "loss_num": 0.03173828125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 59421260, + "step": 1061 + }, + { + "epoch": 2.3652561247216037, + "grad_norm": 60.87717056274414, + "learning_rate": 1e-06, + "loss": 0.9096, + "num_input_tokens_seen": 59479676, + "step": 1062 + }, + { + "epoch": 2.3652561247216037, + "loss": 0.817761242389679, + "loss_ce": 0.0006225479301065207, + "loss_iou": 0.34765625, + "loss_num": 0.024658203125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 59479676, + "step": 1062 + }, + { + "epoch": 2.3674832962138086, + "grad_norm": 21.296977996826172, + "learning_rate": 1e-06, + "loss": 0.9039, + "num_input_tokens_seen": 59534276, + "step": 1063 + }, + { + "epoch": 2.3674832962138086, + "loss": 0.9175959825515747, + "loss_ce": 0.0003596206079237163, + "loss_iou": 0.35546875, + "loss_num": 0.041015625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 59534276, + "step": 1063 + }, + { + "epoch": 2.3697104677060135, + "grad_norm": 28.44460105895996, + "learning_rate": 1e-06, + "loss": 1.0105, + "num_input_tokens_seen": 59589900, + "step": 1064 + }, + { + "epoch": 2.3697104677060135, + "loss": 1.1641950607299805, + "loss_ce": 0.0006208861595951021, + "loss_iou": 0.482421875, + "loss_num": 0.03955078125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 59589900, + "step": 1064 + }, + { + "epoch": 2.3719376391982183, + "grad_norm": 35.73344421386719, + "learning_rate": 1e-06, + "loss": 0.8737, + "num_input_tokens_seen": 59649136, + "step": 1065 + }, + { + "epoch": 2.3719376391982183, + "loss": 0.921160101890564, + "loss_ce": 0.000505772652104497, + "loss_iou": 0.37109375, + "loss_num": 0.03564453125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 59649136, + "step": 1065 + }, + { + "epoch": 2.374164810690423, + "grad_norm": 22.40777015686035, + "learning_rate": 1e-06, + "loss": 0.9714, + "num_input_tokens_seen": 59701696, + "step": 1066 + }, + { + "epoch": 2.374164810690423, + "loss": 0.939410388469696, + "loss_ce": 0.00044557781075127423, + "loss_iou": 0.390625, + "loss_num": 0.031982421875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 59701696, + "step": 1066 + }, + { + "epoch": 2.376391982182628, + "grad_norm": 19.74895668029785, + "learning_rate": 1e-06, + "loss": 0.7167, + "num_input_tokens_seen": 59759464, + "step": 1067 + }, + { + "epoch": 2.376391982182628, + "loss": 0.7840393781661987, + "loss_ce": 0.00034799351124092937, + "loss_iou": 0.333984375, + "loss_num": 0.0230712890625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 59759464, + "step": 1067 + }, + { + "epoch": 2.378619153674833, + "grad_norm": 45.2192268371582, + "learning_rate": 1e-06, + "loss": 0.9642, + "num_input_tokens_seen": 59813332, + "step": 1068 + }, + { + "epoch": 2.378619153674833, + "loss": 0.961778998374939, + "loss_ce": 0.0003532259142957628, + "loss_iou": 0.404296875, + "loss_num": 0.030517578125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 59813332, + "step": 1068 + }, + { + "epoch": 2.3808463251670378, + "grad_norm": 21.226028442382812, + "learning_rate": 1e-06, + "loss": 0.8399, + "num_input_tokens_seen": 59867084, + "step": 1069 + }, + { + "epoch": 2.3808463251670378, + "loss": 0.6262215375900269, + "loss_ce": 0.00036704522790387273, + "loss_iou": 0.2294921875, + "loss_num": 0.033447265625, + "loss_xval": 0.625, + "num_input_tokens_seen": 59867084, + "step": 1069 + }, + { + "epoch": 2.3830734966592426, + "grad_norm": 20.79296112060547, + "learning_rate": 1e-06, + "loss": 0.949, + "num_input_tokens_seen": 59923596, + "step": 1070 + }, + { + "epoch": 2.3830734966592426, + "loss": 0.9344473481178284, + "loss_ce": 0.0003653277817647904, + "loss_iou": 0.39453125, + "loss_num": 0.029052734375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 59923596, + "step": 1070 + }, + { + "epoch": 2.3853006681514475, + "grad_norm": 23.782529830932617, + "learning_rate": 1e-06, + "loss": 0.9666, + "num_input_tokens_seen": 59978588, + "step": 1071 + }, + { + "epoch": 2.3853006681514475, + "loss": 0.9463720917701721, + "loss_ce": 0.0005713239079341292, + "loss_iou": 0.3828125, + "loss_num": 0.03564453125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 59978588, + "step": 1071 + }, + { + "epoch": 2.387527839643653, + "grad_norm": 25.347000122070312, + "learning_rate": 1e-06, + "loss": 1.0797, + "num_input_tokens_seen": 60035216, + "step": 1072 + }, + { + "epoch": 2.387527839643653, + "loss": 1.156226396560669, + "loss_ce": 0.0004647444002330303, + "loss_iou": 0.482421875, + "loss_num": 0.037841796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 60035216, + "step": 1072 + }, + { + "epoch": 2.3897550111358576, + "grad_norm": 104.3314208984375, + "learning_rate": 1e-06, + "loss": 0.8764, + "num_input_tokens_seen": 60089212, + "step": 1073 + }, + { + "epoch": 2.3897550111358576, + "loss": 0.868368923664093, + "loss_ce": 0.00044899751082994044, + "loss_iou": 0.3671875, + "loss_num": 0.02685546875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 60089212, + "step": 1073 + }, + { + "epoch": 2.3919821826280625, + "grad_norm": 17.936288833618164, + "learning_rate": 1e-06, + "loss": 1.1517, + "num_input_tokens_seen": 60145076, + "step": 1074 + }, + { + "epoch": 2.3919821826280625, + "loss": 1.1004548072814941, + "loss_ce": 0.0006012015510350466, + "loss_iou": 0.431640625, + "loss_num": 0.0478515625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 60145076, + "step": 1074 + }, + { + "epoch": 2.3942093541202674, + "grad_norm": 71.37808227539062, + "learning_rate": 1e-06, + "loss": 0.9387, + "num_input_tokens_seen": 60201880, + "step": 1075 + }, + { + "epoch": 2.3942093541202674, + "loss": 0.8587861061096191, + "loss_ce": 0.00038765568751841784, + "loss_iou": 0.35546875, + "loss_num": 0.0294189453125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 60201880, + "step": 1075 + }, + { + "epoch": 2.3964365256124722, + "grad_norm": 15.348673820495605, + "learning_rate": 1e-06, + "loss": 0.7725, + "num_input_tokens_seen": 60257620, + "step": 1076 + }, + { + "epoch": 2.3964365256124722, + "loss": 0.8453316688537598, + "loss_ce": 0.00036094876122660935, + "loss_iou": 0.373046875, + "loss_num": 0.0196533203125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 60257620, + "step": 1076 + }, + { + "epoch": 2.398663697104677, + "grad_norm": 13.168649673461914, + "learning_rate": 1e-06, + "loss": 0.7955, + "num_input_tokens_seen": 60315036, + "step": 1077 + }, + { + "epoch": 2.398663697104677, + "loss": 1.0594782829284668, + "loss_ce": 0.000884645152837038, + "loss_iou": 0.435546875, + "loss_num": 0.037353515625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 60315036, + "step": 1077 + }, + { + "epoch": 2.400890868596882, + "grad_norm": 22.153902053833008, + "learning_rate": 1e-06, + "loss": 0.9847, + "num_input_tokens_seen": 60371132, + "step": 1078 + }, + { + "epoch": 2.400890868596882, + "loss": 0.6142248511314392, + "loss_ce": 0.0004552791069727391, + "loss_iou": 0.2470703125, + "loss_num": 0.0240478515625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 60371132, + "step": 1078 + }, + { + "epoch": 2.403118040089087, + "grad_norm": 16.075576782226562, + "learning_rate": 1e-06, + "loss": 0.9945, + "num_input_tokens_seen": 60425052, + "step": 1079 + }, + { + "epoch": 2.403118040089087, + "loss": 0.8446251153945923, + "loss_ce": 0.0003868020430672914, + "loss_iou": 0.34765625, + "loss_num": 0.030029296875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 60425052, + "step": 1079 + }, + { + "epoch": 2.4053452115812917, + "grad_norm": 30.00712013244629, + "learning_rate": 1e-06, + "loss": 0.9475, + "num_input_tokens_seen": 60481796, + "step": 1080 + }, + { + "epoch": 2.4053452115812917, + "loss": 1.1728054285049438, + "loss_ce": 0.0004421064513735473, + "loss_iou": 0.46875, + "loss_num": 0.047119140625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 60481796, + "step": 1080 + }, + { + "epoch": 2.4075723830734965, + "grad_norm": 37.473915100097656, + "learning_rate": 1e-06, + "loss": 1.0343, + "num_input_tokens_seen": 60535540, + "step": 1081 + }, + { + "epoch": 2.4075723830734965, + "loss": 0.8917477130889893, + "loss_ce": 0.00039027928141877055, + "loss_iou": 0.369140625, + "loss_num": 0.0308837890625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 60535540, + "step": 1081 + }, + { + "epoch": 2.4097995545657014, + "grad_norm": 26.055566787719727, + "learning_rate": 1e-06, + "loss": 0.9813, + "num_input_tokens_seen": 60591724, + "step": 1082 + }, + { + "epoch": 2.4097995545657014, + "loss": 0.8454374670982361, + "loss_ce": 0.00046675774501636624, + "loss_iou": 0.341796875, + "loss_num": 0.0322265625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 60591724, + "step": 1082 + }, + { + "epoch": 2.4120267260579062, + "grad_norm": 19.031705856323242, + "learning_rate": 1e-06, + "loss": 0.9273, + "num_input_tokens_seen": 60646416, + "step": 1083 + }, + { + "epoch": 2.4120267260579062, + "loss": 1.0545891523361206, + "loss_ce": 0.000389896216802299, + "loss_iou": 0.458984375, + "loss_num": 0.0277099609375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 60646416, + "step": 1083 + }, + { + "epoch": 2.4142538975501115, + "grad_norm": 13.565470695495605, + "learning_rate": 1e-06, + "loss": 0.8106, + "num_input_tokens_seen": 60703608, + "step": 1084 + }, + { + "epoch": 2.4142538975501115, + "loss": 0.821467399597168, + "loss_ce": 0.0004224562435410917, + "loss_iou": 0.3203125, + "loss_num": 0.036376953125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 60703608, + "step": 1084 + }, + { + "epoch": 2.4164810690423164, + "grad_norm": 12.959884643554688, + "learning_rate": 1e-06, + "loss": 0.8867, + "num_input_tokens_seen": 60760392, + "step": 1085 + }, + { + "epoch": 2.4164810690423164, + "loss": 0.7961212396621704, + "loss_ce": 0.00046693626791238785, + "loss_iou": 0.310546875, + "loss_num": 0.034912109375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 60760392, + "step": 1085 + }, + { + "epoch": 2.4187082405345213, + "grad_norm": 15.361568450927734, + "learning_rate": 1e-06, + "loss": 0.8654, + "num_input_tokens_seen": 60817288, + "step": 1086 + }, + { + "epoch": 2.4187082405345213, + "loss": 0.6870740056037903, + "loss_ce": 0.00030639575561508536, + "loss_iou": 0.294921875, + "loss_num": 0.01953125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 60817288, + "step": 1086 + }, + { + "epoch": 2.420935412026726, + "grad_norm": 22.026248931884766, + "learning_rate": 1e-06, + "loss": 1.1047, + "num_input_tokens_seen": 60874444, + "step": 1087 + }, + { + "epoch": 2.420935412026726, + "loss": 0.8688787221908569, + "loss_ce": 0.0007146652205847204, + "loss_iou": 0.3828125, + "loss_num": 0.0208740234375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 60874444, + "step": 1087 + }, + { + "epoch": 2.423162583518931, + "grad_norm": 20.72212028503418, + "learning_rate": 1e-06, + "loss": 1.1223, + "num_input_tokens_seen": 60932720, + "step": 1088 + }, + { + "epoch": 2.423162583518931, + "loss": 1.190258264541626, + "loss_ce": 0.00080519710900262, + "loss_iou": 0.4609375, + "loss_num": 0.0537109375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 60932720, + "step": 1088 + }, + { + "epoch": 2.425389755011136, + "grad_norm": 22.80421257019043, + "learning_rate": 1e-06, + "loss": 1.0687, + "num_input_tokens_seen": 60990336, + "step": 1089 + }, + { + "epoch": 2.425389755011136, + "loss": 1.2014228105545044, + "loss_ce": 0.0007392432307824492, + "loss_iou": 0.4921875, + "loss_num": 0.04345703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 60990336, + "step": 1089 + }, + { + "epoch": 2.4276169265033407, + "grad_norm": 23.451295852661133, + "learning_rate": 1e-06, + "loss": 1.0575, + "num_input_tokens_seen": 61045524, + "step": 1090 + }, + { + "epoch": 2.4276169265033407, + "loss": 1.208905816078186, + "loss_ce": 0.000409751373808831, + "loss_iou": 0.49609375, + "loss_num": 0.043212890625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 61045524, + "step": 1090 + }, + { + "epoch": 2.4298440979955456, + "grad_norm": 17.383296966552734, + "learning_rate": 1e-06, + "loss": 0.7853, + "num_input_tokens_seen": 61104956, + "step": 1091 + }, + { + "epoch": 2.4298440979955456, + "loss": 0.7447309494018555, + "loss_ce": 0.000834420439787209, + "loss_iou": 0.32421875, + "loss_num": 0.0189208984375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 61104956, + "step": 1091 + }, + { + "epoch": 2.4320712694877504, + "grad_norm": 18.50430679321289, + "learning_rate": 1e-06, + "loss": 0.9737, + "num_input_tokens_seen": 61159236, + "step": 1092 + }, + { + "epoch": 2.4320712694877504, + "loss": 0.9158762693405151, + "loss_ce": 0.00034890550887212157, + "loss_iou": 0.38671875, + "loss_num": 0.028076171875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 61159236, + "step": 1092 + }, + { + "epoch": 2.4342984409799553, + "grad_norm": 18.052396774291992, + "learning_rate": 1e-06, + "loss": 1.0082, + "num_input_tokens_seen": 61217816, + "step": 1093 + }, + { + "epoch": 2.4342984409799553, + "loss": 0.8641993999481201, + "loss_ce": 0.00042982713785022497, + "loss_iou": 0.36328125, + "loss_num": 0.027587890625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 61217816, + "step": 1093 + }, + { + "epoch": 2.4365256124721606, + "grad_norm": 20.244873046875, + "learning_rate": 1e-06, + "loss": 0.5503, + "num_input_tokens_seen": 61274332, + "step": 1094 + }, + { + "epoch": 2.4365256124721606, + "loss": 0.40073591470718384, + "loss_ce": 0.0003452802775427699, + "loss_iou": 0.1748046875, + "loss_num": 0.010009765625, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 61274332, + "step": 1094 + }, + { + "epoch": 2.4387527839643655, + "grad_norm": 18.268579483032227, + "learning_rate": 1e-06, + "loss": 0.9795, + "num_input_tokens_seen": 61327972, + "step": 1095 + }, + { + "epoch": 2.4387527839643655, + "loss": 0.696860671043396, + "loss_ce": 0.00044950933079235256, + "loss_iou": 0.291015625, + "loss_num": 0.02294921875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 61327972, + "step": 1095 + }, + { + "epoch": 2.4409799554565703, + "grad_norm": 13.835185050964355, + "learning_rate": 1e-06, + "loss": 0.8244, + "num_input_tokens_seen": 61384388, + "step": 1096 + }, + { + "epoch": 2.4409799554565703, + "loss": 0.6497031450271606, + "loss_ce": 0.00028911407571285963, + "loss_iou": 0.2421875, + "loss_num": 0.03271484375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 61384388, + "step": 1096 + }, + { + "epoch": 2.443207126948775, + "grad_norm": 17.01195526123047, + "learning_rate": 1e-06, + "loss": 0.9698, + "num_input_tokens_seen": 61439968, + "step": 1097 + }, + { + "epoch": 2.443207126948775, + "loss": 1.043675184249878, + "loss_ce": 0.0007063635857775807, + "loss_iou": 0.45703125, + "loss_num": 0.0260009765625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 61439968, + "step": 1097 + }, + { + "epoch": 2.44543429844098, + "grad_norm": 12.948113441467285, + "learning_rate": 1e-06, + "loss": 0.8937, + "num_input_tokens_seen": 61496432, + "step": 1098 + }, + { + "epoch": 2.44543429844098, + "loss": 0.6754014492034912, + "loss_ce": 0.00035263324389234185, + "loss_iou": 0.291015625, + "loss_num": 0.0184326171875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 61496432, + "step": 1098 + }, + { + "epoch": 2.447661469933185, + "grad_norm": 15.429597854614258, + "learning_rate": 1e-06, + "loss": 1.3256, + "num_input_tokens_seen": 61555176, + "step": 1099 + }, + { + "epoch": 2.447661469933185, + "loss": 1.1492905616760254, + "loss_ce": 0.0003646724799182266, + "loss_iou": 0.4609375, + "loss_num": 0.045654296875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 61555176, + "step": 1099 + }, + { + "epoch": 2.4498886414253898, + "grad_norm": 13.81130313873291, + "learning_rate": 1e-06, + "loss": 0.9434, + "num_input_tokens_seen": 61614452, + "step": 1100 + }, + { + "epoch": 2.4498886414253898, + "loss": 1.213216781616211, + "loss_ce": 0.00032615740201435983, + "loss_iou": 0.5234375, + "loss_num": 0.03271484375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 61614452, + "step": 1100 + }, + { + "epoch": 2.4521158129175946, + "grad_norm": 36.95311737060547, + "learning_rate": 1e-06, + "loss": 0.6854, + "num_input_tokens_seen": 61671728, + "step": 1101 + }, + { + "epoch": 2.4521158129175946, + "loss": 0.6432477235794067, + "loss_ce": 0.0006696175551041961, + "loss_iou": 0.24609375, + "loss_num": 0.0302734375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 61671728, + "step": 1101 + }, + { + "epoch": 2.4543429844097995, + "grad_norm": 18.458709716796875, + "learning_rate": 1e-06, + "loss": 0.8445, + "num_input_tokens_seen": 61723072, + "step": 1102 + }, + { + "epoch": 2.4543429844097995, + "loss": 1.0072041749954224, + "loss_ce": 0.00036824517883360386, + "loss_iou": 0.412109375, + "loss_num": 0.036865234375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 61723072, + "step": 1102 + }, + { + "epoch": 2.4565701559020043, + "grad_norm": 15.849419593811035, + "learning_rate": 1e-06, + "loss": 1.1075, + "num_input_tokens_seen": 61780896, + "step": 1103 + }, + { + "epoch": 2.4565701559020043, + "loss": 0.9099684953689575, + "loss_ce": 0.006648160517215729, + "loss_iou": 0.3984375, + "loss_num": 0.021240234375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 61780896, + "step": 1103 + }, + { + "epoch": 2.458797327394209, + "grad_norm": 19.382238388061523, + "learning_rate": 1e-06, + "loss": 1.0016, + "num_input_tokens_seen": 61836688, + "step": 1104 + }, + { + "epoch": 2.458797327394209, + "loss": 0.6892969608306885, + "loss_ce": 0.0003321617841720581, + "loss_iou": 0.28125, + "loss_num": 0.025146484375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 61836688, + "step": 1104 + }, + { + "epoch": 2.461024498886414, + "grad_norm": 19.609500885009766, + "learning_rate": 1e-06, + "loss": 1.1058, + "num_input_tokens_seen": 61892164, + "step": 1105 + }, + { + "epoch": 2.461024498886414, + "loss": 1.3998944759368896, + "loss_ce": 0.00048047915333881974, + "loss_iou": 0.546875, + "loss_num": 0.06103515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 61892164, + "step": 1105 + }, + { + "epoch": 2.463251670378619, + "grad_norm": 19.078765869140625, + "learning_rate": 1e-06, + "loss": 0.8841, + "num_input_tokens_seen": 61948728, + "step": 1106 + }, + { + "epoch": 2.463251670378619, + "loss": 0.673430323600769, + "loss_ce": 0.0003346248995512724, + "loss_iou": 0.251953125, + "loss_num": 0.033935546875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 61948728, + "step": 1106 + }, + { + "epoch": 2.4654788418708242, + "grad_norm": 28.457544326782227, + "learning_rate": 1e-06, + "loss": 0.7968, + "num_input_tokens_seen": 62002888, + "step": 1107 + }, + { + "epoch": 2.4654788418708242, + "loss": 0.6821703314781189, + "loss_ce": 0.00028555351309478283, + "loss_iou": 0.251953125, + "loss_num": 0.035888671875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 62002888, + "step": 1107 + }, + { + "epoch": 2.467706013363029, + "grad_norm": 15.16339111328125, + "learning_rate": 1e-06, + "loss": 0.8623, + "num_input_tokens_seen": 62058460, + "step": 1108 + }, + { + "epoch": 2.467706013363029, + "loss": 0.8493123650550842, + "loss_ce": 0.00043543853098526597, + "loss_iou": 0.3515625, + "loss_num": 0.029296875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 62058460, + "step": 1108 + }, + { + "epoch": 2.469933184855234, + "grad_norm": 21.689632415771484, + "learning_rate": 1e-06, + "loss": 0.8395, + "num_input_tokens_seen": 62116612, + "step": 1109 + }, + { + "epoch": 2.469933184855234, + "loss": 0.6302697658538818, + "loss_ce": 0.00038696054252795875, + "loss_iou": 0.267578125, + "loss_num": 0.0194091796875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 62116612, + "step": 1109 + }, + { + "epoch": 2.472160356347439, + "grad_norm": 32.711299896240234, + "learning_rate": 1e-06, + "loss": 0.9001, + "num_input_tokens_seen": 62173120, + "step": 1110 + }, + { + "epoch": 2.472160356347439, + "loss": 1.1314702033996582, + "loss_ce": 0.0003667679848149419, + "loss_iou": 0.5078125, + "loss_num": 0.0223388671875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 62173120, + "step": 1110 + }, + { + "epoch": 2.4743875278396437, + "grad_norm": 63.607330322265625, + "learning_rate": 1e-06, + "loss": 0.928, + "num_input_tokens_seen": 62231148, + "step": 1111 + }, + { + "epoch": 2.4743875278396437, + "loss": 0.8059262037277222, + "loss_ce": 0.00038422548095695674, + "loss_iou": 0.3359375, + "loss_num": 0.026611328125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 62231148, + "step": 1111 + }, + { + "epoch": 2.4766146993318485, + "grad_norm": 17.097545623779297, + "learning_rate": 1e-06, + "loss": 0.9165, + "num_input_tokens_seen": 62288036, + "step": 1112 + }, + { + "epoch": 2.4766146993318485, + "loss": 0.9176112413406372, + "loss_ce": 0.0003749003808479756, + "loss_iou": 0.400390625, + "loss_num": 0.02294921875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 62288036, + "step": 1112 + }, + { + "epoch": 2.4788418708240534, + "grad_norm": 19.068836212158203, + "learning_rate": 1e-06, + "loss": 1.0021, + "num_input_tokens_seen": 62343840, + "step": 1113 + }, + { + "epoch": 2.4788418708240534, + "loss": 0.8066169619560242, + "loss_ce": 0.0007087617414072156, + "loss_iou": 0.326171875, + "loss_num": 0.0311279296875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 62343840, + "step": 1113 + }, + { + "epoch": 2.4810690423162582, + "grad_norm": 18.9963321685791, + "learning_rate": 1e-06, + "loss": 0.8238, + "num_input_tokens_seen": 62399636, + "step": 1114 + }, + { + "epoch": 2.4810690423162582, + "loss": 0.6373310089111328, + "loss_ce": 0.00036811293102800846, + "loss_iou": 0.27734375, + "loss_num": 0.01611328125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 62399636, + "step": 1114 + }, + { + "epoch": 2.483296213808463, + "grad_norm": 18.128734588623047, + "learning_rate": 1e-06, + "loss": 1.0068, + "num_input_tokens_seen": 62457020, + "step": 1115 + }, + { + "epoch": 2.483296213808463, + "loss": 0.7235212326049805, + "loss_ce": 0.0003766651498153806, + "loss_iou": 0.30859375, + "loss_num": 0.021240234375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 62457020, + "step": 1115 + }, + { + "epoch": 2.485523385300668, + "grad_norm": 53.74859619140625, + "learning_rate": 1e-06, + "loss": 0.8546, + "num_input_tokens_seen": 62515924, + "step": 1116 + }, + { + "epoch": 2.485523385300668, + "loss": 1.067058801651001, + "loss_ce": 0.0006525892531499267, + "loss_iou": 0.4375, + "loss_num": 0.0380859375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 62515924, + "step": 1116 + }, + { + "epoch": 2.4877505567928733, + "grad_norm": 16.628570556640625, + "learning_rate": 1e-06, + "loss": 0.9406, + "num_input_tokens_seen": 62575028, + "step": 1117 + }, + { + "epoch": 2.4877505567928733, + "loss": 1.1619257926940918, + "loss_ce": 0.0005488308379426599, + "loss_iou": 0.4765625, + "loss_num": 0.041748046875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 62575028, + "step": 1117 + }, + { + "epoch": 2.489977728285078, + "grad_norm": 18.023460388183594, + "learning_rate": 1e-06, + "loss": 0.9142, + "num_input_tokens_seen": 62630184, + "step": 1118 + }, + { + "epoch": 2.489977728285078, + "loss": 1.1649341583251953, + "loss_ce": 0.00038348851376213133, + "loss_iou": 0.458984375, + "loss_num": 0.049560546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 62630184, + "step": 1118 + }, + { + "epoch": 2.492204899777283, + "grad_norm": 28.666088104248047, + "learning_rate": 1e-06, + "loss": 1.1036, + "num_input_tokens_seen": 62686600, + "step": 1119 + }, + { + "epoch": 2.492204899777283, + "loss": 1.091188907623291, + "loss_ce": 0.0008568049524910748, + "loss_iou": 0.443359375, + "loss_num": 0.040771484375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 62686600, + "step": 1119 + }, + { + "epoch": 2.494432071269488, + "grad_norm": 34.05298614501953, + "learning_rate": 1e-06, + "loss": 1.3067, + "num_input_tokens_seen": 62740120, + "step": 1120 + }, + { + "epoch": 2.494432071269488, + "loss": 1.4040180444717407, + "loss_ce": 0.00094178831204772, + "loss_iou": 0.53125, + "loss_num": 0.0673828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 62740120, + "step": 1120 + }, + { + "epoch": 2.4966592427616927, + "grad_norm": 23.138212203979492, + "learning_rate": 1e-06, + "loss": 0.9225, + "num_input_tokens_seen": 62796964, + "step": 1121 + }, + { + "epoch": 2.4966592427616927, + "loss": 1.013495922088623, + "loss_ce": 0.0003123595961369574, + "loss_iou": 0.404296875, + "loss_num": 0.04052734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 62796964, + "step": 1121 + }, + { + "epoch": 2.4988864142538976, + "grad_norm": 17.127120971679688, + "learning_rate": 1e-06, + "loss": 1.2497, + "num_input_tokens_seen": 62853264, + "step": 1122 + }, + { + "epoch": 2.4988864142538976, + "loss": 1.4592686891555786, + "loss_ce": 0.0007725717732682824, + "loss_iou": 0.546875, + "loss_num": 0.07373046875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 62853264, + "step": 1122 + }, + { + "epoch": 2.5011135857461024, + "grad_norm": 14.743764877319336, + "learning_rate": 1e-06, + "loss": 0.8212, + "num_input_tokens_seen": 62909164, + "step": 1123 + }, + { + "epoch": 2.5011135857461024, + "loss": 0.598504900932312, + "loss_ce": 0.0003604079829528928, + "loss_iou": 0.26171875, + "loss_num": 0.01519775390625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 62909164, + "step": 1123 + }, + { + "epoch": 2.5033407572383073, + "grad_norm": 13.89709758758545, + "learning_rate": 1e-06, + "loss": 0.9177, + "num_input_tokens_seen": 62965280, + "step": 1124 + }, + { + "epoch": 2.5033407572383073, + "loss": 0.7613284587860107, + "loss_ce": 0.0003421393339522183, + "loss_iou": 0.3046875, + "loss_num": 0.0301513671875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 62965280, + "step": 1124 + }, + { + "epoch": 2.505567928730512, + "grad_norm": 27.25965690612793, + "learning_rate": 1e-06, + "loss": 0.8435, + "num_input_tokens_seen": 63019164, + "step": 1125 + }, + { + "epoch": 2.505567928730512, + "loss": 0.9681985974311829, + "loss_ce": 0.0004251442151144147, + "loss_iou": 0.390625, + "loss_num": 0.037109375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 63019164, + "step": 1125 + }, + { + "epoch": 2.507795100222717, + "grad_norm": 19.890287399291992, + "learning_rate": 1e-06, + "loss": 0.8424, + "num_input_tokens_seen": 63073888, + "step": 1126 + }, + { + "epoch": 2.507795100222717, + "loss": 0.7713284492492676, + "loss_ce": 0.00033233320573344827, + "loss_iou": 0.32421875, + "loss_num": 0.0242919921875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 63073888, + "step": 1126 + }, + { + "epoch": 2.510022271714922, + "grad_norm": 19.229721069335938, + "learning_rate": 1e-06, + "loss": 0.799, + "num_input_tokens_seen": 63131120, + "step": 1127 + }, + { + "epoch": 2.510022271714922, + "loss": 0.7538760900497437, + "loss_ce": 0.0004581384710036218, + "loss_iou": 0.306640625, + "loss_num": 0.0279541015625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 63131120, + "step": 1127 + }, + { + "epoch": 2.5122494432071267, + "grad_norm": 18.243764877319336, + "learning_rate": 1e-06, + "loss": 1.0801, + "num_input_tokens_seen": 63184712, + "step": 1128 + }, + { + "epoch": 2.5122494432071267, + "loss": 1.014033555984497, + "loss_ce": 0.00036173040280118585, + "loss_iou": 0.431640625, + "loss_num": 0.0302734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 63184712, + "step": 1128 + }, + { + "epoch": 2.5144766146993316, + "grad_norm": 27.520559310913086, + "learning_rate": 1e-06, + "loss": 1.0515, + "num_input_tokens_seen": 63239260, + "step": 1129 + }, + { + "epoch": 2.5144766146993316, + "loss": 0.9654926061630249, + "loss_ce": 0.0005268162931315601, + "loss_iou": 0.369140625, + "loss_num": 0.0458984375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 63239260, + "step": 1129 + }, + { + "epoch": 2.516703786191537, + "grad_norm": 23.411195755004883, + "learning_rate": 1e-06, + "loss": 0.5888, + "num_input_tokens_seen": 63294036, + "step": 1130 + }, + { + "epoch": 2.516703786191537, + "loss": 0.5716757774353027, + "loss_ce": 0.00038673574454151094, + "loss_iou": 0.248046875, + "loss_num": 0.0150146484375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 63294036, + "step": 1130 + }, + { + "epoch": 2.5189309576837418, + "grad_norm": 27.774524688720703, + "learning_rate": 1e-06, + "loss": 0.9061, + "num_input_tokens_seen": 63350920, + "step": 1131 + }, + { + "epoch": 2.5189309576837418, + "loss": 0.7308274507522583, + "loss_ce": 0.00035869883140549064, + "loss_iou": 0.298828125, + "loss_num": 0.026123046875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 63350920, + "step": 1131 + }, + { + "epoch": 2.5211581291759466, + "grad_norm": 42.71458053588867, + "learning_rate": 1e-06, + "loss": 1.0052, + "num_input_tokens_seen": 63408328, + "step": 1132 + }, + { + "epoch": 2.5211581291759466, + "loss": 0.9227426052093506, + "loss_ce": 0.00037933725980110466, + "loss_iou": 0.41015625, + "loss_num": 0.0203857421875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 63408328, + "step": 1132 + }, + { + "epoch": 2.5233853006681515, + "grad_norm": 21.282625198364258, + "learning_rate": 1e-06, + "loss": 1.0933, + "num_input_tokens_seen": 63463376, + "step": 1133 + }, + { + "epoch": 2.5233853006681515, + "loss": 0.8489203453063965, + "loss_ce": 0.0005317054456099868, + "loss_iou": 0.380859375, + "loss_num": 0.017333984375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 63463376, + "step": 1133 + }, + { + "epoch": 2.5256124721603563, + "grad_norm": 13.566144943237305, + "learning_rate": 1e-06, + "loss": 0.9126, + "num_input_tokens_seen": 63521748, + "step": 1134 + }, + { + "epoch": 2.5256124721603563, + "loss": 1.027003288269043, + "loss_ce": 0.0006361395353451371, + "loss_iou": 0.42578125, + "loss_num": 0.03466796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 63521748, + "step": 1134 + }, + { + "epoch": 2.527839643652561, + "grad_norm": 22.058757781982422, + "learning_rate": 1e-06, + "loss": 0.8642, + "num_input_tokens_seen": 63575808, + "step": 1135 + }, + { + "epoch": 2.527839643652561, + "loss": 0.5723444223403931, + "loss_ce": 0.00032291823299601674, + "loss_iou": 0.224609375, + "loss_num": 0.024658203125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 63575808, + "step": 1135 + }, + { + "epoch": 2.530066815144766, + "grad_norm": 17.351253509521484, + "learning_rate": 1e-06, + "loss": 0.9297, + "num_input_tokens_seen": 63629380, + "step": 1136 + }, + { + "epoch": 2.530066815144766, + "loss": 1.1235007047653198, + "loss_ce": 0.0006980298785492778, + "loss_iou": 0.466796875, + "loss_num": 0.037841796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 63629380, + "step": 1136 + }, + { + "epoch": 2.532293986636971, + "grad_norm": 25.26835060119629, + "learning_rate": 1e-06, + "loss": 0.8625, + "num_input_tokens_seen": 63685404, + "step": 1137 + }, + { + "epoch": 2.532293986636971, + "loss": 0.7635213136672974, + "loss_ce": 0.0003376836539246142, + "loss_iou": 0.314453125, + "loss_num": 0.026611328125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 63685404, + "step": 1137 + }, + { + "epoch": 2.534521158129176, + "grad_norm": 23.412189483642578, + "learning_rate": 1e-06, + "loss": 1.0618, + "num_input_tokens_seen": 63742344, + "step": 1138 + }, + { + "epoch": 2.534521158129176, + "loss": 1.1275867223739624, + "loss_ce": 0.0006335656507872045, + "loss_iou": 0.470703125, + "loss_num": 0.03759765625, + "loss_xval": 1.125, + "num_input_tokens_seen": 63742344, + "step": 1138 + }, + { + "epoch": 2.536748329621381, + "grad_norm": 59.32328414916992, + "learning_rate": 1e-06, + "loss": 0.9894, + "num_input_tokens_seen": 63798548, + "step": 1139 + }, + { + "epoch": 2.536748329621381, + "loss": 0.9086170196533203, + "loss_ce": 0.0006579948822036386, + "loss_iou": 0.37890625, + "loss_num": 0.030029296875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 63798548, + "step": 1139 + }, + { + "epoch": 2.538975501113586, + "grad_norm": 34.01222610473633, + "learning_rate": 1e-06, + "loss": 1.2089, + "num_input_tokens_seen": 63851244, + "step": 1140 + }, + { + "epoch": 2.538975501113586, + "loss": 1.4944491386413574, + "loss_ce": 0.0003085851203650236, + "loss_iou": 0.66796875, + "loss_num": 0.03173828125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 63851244, + "step": 1140 + }, + { + "epoch": 2.541202672605791, + "grad_norm": 23.09809112548828, + "learning_rate": 1e-06, + "loss": 1.0927, + "num_input_tokens_seen": 63908384, + "step": 1141 + }, + { + "epoch": 2.541202672605791, + "loss": 0.905631959438324, + "loss_ce": 0.00035852432483807206, + "loss_iou": 0.375, + "loss_num": 0.0311279296875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 63908384, + "step": 1141 + }, + { + "epoch": 2.5434298440979957, + "grad_norm": 12.117959022521973, + "learning_rate": 1e-06, + "loss": 1.2241, + "num_input_tokens_seen": 63965248, + "step": 1142 + }, + { + "epoch": 2.5434298440979957, + "loss": 1.4422664642333984, + "loss_ce": 0.0006161456694826484, + "loss_iou": 0.515625, + "loss_num": 0.0830078125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 63965248, + "step": 1142 + }, + { + "epoch": 2.5456570155902005, + "grad_norm": 19.716156005859375, + "learning_rate": 1e-06, + "loss": 0.9176, + "num_input_tokens_seen": 64018792, + "step": 1143 + }, + { + "epoch": 2.5456570155902005, + "loss": 0.611274778842926, + "loss_ce": 0.000434944755397737, + "loss_iou": 0.248046875, + "loss_num": 0.023193359375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 64018792, + "step": 1143 + }, + { + "epoch": 2.5478841870824054, + "grad_norm": 16.11542320251465, + "learning_rate": 1e-06, + "loss": 0.9638, + "num_input_tokens_seen": 64073780, + "step": 1144 + }, + { + "epoch": 2.5478841870824054, + "loss": 1.0830767154693604, + "loss_ce": 0.0004351097741164267, + "loss_iou": 0.3984375, + "loss_num": 0.05712890625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 64073780, + "step": 1144 + }, + { + "epoch": 2.5501113585746102, + "grad_norm": 27.197479248046875, + "learning_rate": 1e-06, + "loss": 0.8059, + "num_input_tokens_seen": 64130436, + "step": 1145 + }, + { + "epoch": 2.5501113585746102, + "loss": 1.001615285873413, + "loss_ce": 0.0008829243597574532, + "loss_iou": 0.39453125, + "loss_num": 0.042236328125, + "loss_xval": 1.0, + "num_input_tokens_seen": 64130436, + "step": 1145 + }, + { + "epoch": 2.552338530066815, + "grad_norm": 17.01374053955078, + "learning_rate": 1e-06, + "loss": 0.9329, + "num_input_tokens_seen": 64183692, + "step": 1146 + }, + { + "epoch": 2.552338530066815, + "loss": 0.9256563186645508, + "loss_ce": 0.00036334185278974473, + "loss_iou": 0.375, + "loss_num": 0.034912109375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 64183692, + "step": 1146 + }, + { + "epoch": 2.55456570155902, + "grad_norm": 18.95051383972168, + "learning_rate": 1e-06, + "loss": 0.9019, + "num_input_tokens_seen": 64239220, + "step": 1147 + }, + { + "epoch": 2.55456570155902, + "loss": 1.1268051862716675, + "loss_ce": 0.0003403578884899616, + "loss_iou": 0.462890625, + "loss_num": 0.040283203125, + "loss_xval": 1.125, + "num_input_tokens_seen": 64239220, + "step": 1147 + }, + { + "epoch": 2.556792873051225, + "grad_norm": 26.1760196685791, + "learning_rate": 1e-06, + "loss": 0.8004, + "num_input_tokens_seen": 64292876, + "step": 1148 + }, + { + "epoch": 2.556792873051225, + "loss": 0.802330493927002, + "loss_ce": 0.0004505745891947299, + "loss_iou": 0.3125, + "loss_num": 0.03564453125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 64292876, + "step": 1148 + }, + { + "epoch": 2.5590200445434297, + "grad_norm": 18.058563232421875, + "learning_rate": 1e-06, + "loss": 0.9304, + "num_input_tokens_seen": 64350980, + "step": 1149 + }, + { + "epoch": 2.5590200445434297, + "loss": 1.0028624534606934, + "loss_ce": 0.00066518341191113, + "loss_iou": 0.41796875, + "loss_num": 0.032958984375, + "loss_xval": 1.0, + "num_input_tokens_seen": 64350980, + "step": 1149 + }, + { + "epoch": 2.5612472160356345, + "grad_norm": 15.238943099975586, + "learning_rate": 1e-06, + "loss": 0.8567, + "num_input_tokens_seen": 64407372, + "step": 1150 + }, + { + "epoch": 2.5612472160356345, + "loss": 0.6080912351608276, + "loss_ce": 0.00042517349356785417, + "loss_iou": 0.255859375, + "loss_num": 0.0194091796875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 64407372, + "step": 1150 + }, + { + "epoch": 2.5634743875278394, + "grad_norm": 24.09518814086914, + "learning_rate": 1e-06, + "loss": 0.9458, + "num_input_tokens_seen": 64459936, + "step": 1151 + }, + { + "epoch": 2.5634743875278394, + "loss": 0.8809654712677002, + "loss_ce": 0.0003502329345792532, + "loss_iou": 0.40234375, + "loss_num": 0.01507568359375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 64459936, + "step": 1151 + }, + { + "epoch": 2.5657015590200447, + "grad_norm": 18.098283767700195, + "learning_rate": 1e-06, + "loss": 0.9909, + "num_input_tokens_seen": 64516664, + "step": 1152 + }, + { + "epoch": 2.5657015590200447, + "loss": 0.8294593095779419, + "loss_ce": 0.00035776515142060816, + "loss_iou": 0.337890625, + "loss_num": 0.03076171875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 64516664, + "step": 1152 + }, + { + "epoch": 2.5679287305122496, + "grad_norm": 23.110363006591797, + "learning_rate": 1e-06, + "loss": 0.8105, + "num_input_tokens_seen": 64573704, + "step": 1153 + }, + { + "epoch": 2.5679287305122496, + "loss": 0.7701290845870972, + "loss_ce": 0.0003536652075126767, + "loss_iou": 0.326171875, + "loss_num": 0.0234375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 64573704, + "step": 1153 + }, + { + "epoch": 2.5701559020044544, + "grad_norm": 18.98008155822754, + "learning_rate": 1e-06, + "loss": 0.8578, + "num_input_tokens_seen": 64632268, + "step": 1154 + }, + { + "epoch": 2.5701559020044544, + "loss": 0.7266561985015869, + "loss_ce": 0.00033786421408876777, + "loss_iou": 0.32421875, + "loss_num": 0.015625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 64632268, + "step": 1154 + }, + { + "epoch": 2.5723830734966593, + "grad_norm": 17.440505981445312, + "learning_rate": 1e-06, + "loss": 1.0514, + "num_input_tokens_seen": 64687228, + "step": 1155 + }, + { + "epoch": 2.5723830734966593, + "loss": 1.1310064792633057, + "loss_ce": 0.0003913280088454485, + "loss_iou": 0.470703125, + "loss_num": 0.03759765625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 64687228, + "step": 1155 + }, + { + "epoch": 2.574610244988864, + "grad_norm": 17.068138122558594, + "learning_rate": 1e-06, + "loss": 0.7717, + "num_input_tokens_seen": 64744716, + "step": 1156 + }, + { + "epoch": 2.574610244988864, + "loss": 0.6678210496902466, + "loss_ce": 0.00034053760464303195, + "loss_iou": 0.291015625, + "loss_num": 0.01708984375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 64744716, + "step": 1156 + }, + { + "epoch": 2.576837416481069, + "grad_norm": 16.1224308013916, + "learning_rate": 1e-06, + "loss": 0.894, + "num_input_tokens_seen": 64803444, + "step": 1157 + }, + { + "epoch": 2.576837416481069, + "loss": 0.783063530921936, + "loss_ce": 0.0003486335917841643, + "loss_iou": 0.34375, + "loss_num": 0.0194091796875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 64803444, + "step": 1157 + }, + { + "epoch": 2.579064587973274, + "grad_norm": 31.78223991394043, + "learning_rate": 1e-06, + "loss": 0.8672, + "num_input_tokens_seen": 64859080, + "step": 1158 + }, + { + "epoch": 2.579064587973274, + "loss": 0.8566364645957947, + "loss_ce": 0.0004353098920546472, + "loss_iou": 0.33984375, + "loss_num": 0.03515625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 64859080, + "step": 1158 + }, + { + "epoch": 2.5812917594654787, + "grad_norm": 22.2586612701416, + "learning_rate": 1e-06, + "loss": 1.2238, + "num_input_tokens_seen": 64916436, + "step": 1159 + }, + { + "epoch": 2.5812917594654787, + "loss": 1.3102490901947021, + "loss_ce": 0.0009229998104274273, + "loss_iou": 0.5078125, + "loss_num": 0.059326171875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 64916436, + "step": 1159 + }, + { + "epoch": 2.5835189309576836, + "grad_norm": 47.00844955444336, + "learning_rate": 1e-06, + "loss": 1.2237, + "num_input_tokens_seen": 64972836, + "step": 1160 + }, + { + "epoch": 2.5835189309576836, + "loss": 1.0932590961456299, + "loss_ce": 0.0004857148160226643, + "loss_iou": 0.4375, + "loss_num": 0.04345703125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 64972836, + "step": 1160 + }, + { + "epoch": 2.585746102449889, + "grad_norm": 20.3150634765625, + "learning_rate": 1e-06, + "loss": 1.236, + "num_input_tokens_seen": 65027704, + "step": 1161 + }, + { + "epoch": 2.585746102449889, + "loss": 1.0992555618286133, + "loss_ce": 0.001111081801354885, + "loss_iou": 0.474609375, + "loss_num": 0.0296630859375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 65027704, + "step": 1161 + }, + { + "epoch": 2.5879732739420938, + "grad_norm": 18.011165618896484, + "learning_rate": 1e-06, + "loss": 0.932, + "num_input_tokens_seen": 65083788, + "step": 1162 + }, + { + "epoch": 2.5879732739420938, + "loss": 1.0766229629516602, + "loss_ce": 0.00045114755630493164, + "loss_iou": 0.470703125, + "loss_num": 0.0267333984375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 65083788, + "step": 1162 + }, + { + "epoch": 2.5902004454342986, + "grad_norm": 82.84613037109375, + "learning_rate": 1e-06, + "loss": 1.2092, + "num_input_tokens_seen": 65139956, + "step": 1163 + }, + { + "epoch": 2.5902004454342986, + "loss": 1.3553651571273804, + "loss_ce": 0.00038471657899208367, + "loss_iou": 0.546875, + "loss_num": 0.051513671875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 65139956, + "step": 1163 + }, + { + "epoch": 2.5924276169265035, + "grad_norm": 17.478727340698242, + "learning_rate": 1e-06, + "loss": 0.9212, + "num_input_tokens_seen": 65192748, + "step": 1164 + }, + { + "epoch": 2.5924276169265035, + "loss": 0.8342557549476624, + "loss_ce": 0.0005154828540980816, + "loss_iou": 0.337890625, + "loss_num": 0.03173828125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 65192748, + "step": 1164 + }, + { + "epoch": 2.5946547884187083, + "grad_norm": 16.476207733154297, + "learning_rate": 1e-06, + "loss": 0.9718, + "num_input_tokens_seen": 65247340, + "step": 1165 + }, + { + "epoch": 2.5946547884187083, + "loss": 0.9732043743133545, + "loss_ce": 0.002012967597693205, + "loss_iou": 0.4140625, + "loss_num": 0.029052734375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 65247340, + "step": 1165 + }, + { + "epoch": 2.596881959910913, + "grad_norm": 22.207536697387695, + "learning_rate": 1e-06, + "loss": 1.1978, + "num_input_tokens_seen": 65304100, + "step": 1166 + }, + { + "epoch": 2.596881959910913, + "loss": 1.1994106769561768, + "loss_ce": 0.00043600943172350526, + "loss_iou": 0.49609375, + "loss_num": 0.04150390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 65304100, + "step": 1166 + }, + { + "epoch": 2.599109131403118, + "grad_norm": 28.934432983398438, + "learning_rate": 1e-06, + "loss": 0.7938, + "num_input_tokens_seen": 65362144, + "step": 1167 + }, + { + "epoch": 2.599109131403118, + "loss": 0.9534372091293335, + "loss_ce": 0.0051950025372207165, + "loss_iou": 0.400390625, + "loss_num": 0.0294189453125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 65362144, + "step": 1167 + }, + { + "epoch": 2.601336302895323, + "grad_norm": 18.83047103881836, + "learning_rate": 1e-06, + "loss": 0.8324, + "num_input_tokens_seen": 65417596, + "step": 1168 + }, + { + "epoch": 2.601336302895323, + "loss": 1.0699725151062012, + "loss_ce": 0.0008807817357592285, + "loss_iou": 0.421875, + "loss_num": 0.044677734375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 65417596, + "step": 1168 + }, + { + "epoch": 2.6035634743875278, + "grad_norm": 24.358144760131836, + "learning_rate": 1e-06, + "loss": 1.0814, + "num_input_tokens_seen": 65470364, + "step": 1169 + }, + { + "epoch": 2.6035634743875278, + "loss": 1.0574841499328613, + "loss_ce": 0.0003552237758412957, + "loss_iou": 0.421875, + "loss_num": 0.04296875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 65470364, + "step": 1169 + }, + { + "epoch": 2.6057906458797326, + "grad_norm": 93.62918090820312, + "learning_rate": 1e-06, + "loss": 1.0645, + "num_input_tokens_seen": 65526196, + "step": 1170 + }, + { + "epoch": 2.6057906458797326, + "loss": 0.9251736998558044, + "loss_ce": 0.0003689858131110668, + "loss_iou": 0.40625, + "loss_num": 0.0223388671875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 65526196, + "step": 1170 + }, + { + "epoch": 2.6080178173719375, + "grad_norm": 18.0775146484375, + "learning_rate": 1e-06, + "loss": 0.9244, + "num_input_tokens_seen": 65580796, + "step": 1171 + }, + { + "epoch": 2.6080178173719375, + "loss": 0.8135091662406921, + "loss_ce": 0.0005208852817304432, + "loss_iou": 0.330078125, + "loss_num": 0.03076171875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 65580796, + "step": 1171 + }, + { + "epoch": 2.6102449888641424, + "grad_norm": 20.29970932006836, + "learning_rate": 1e-06, + "loss": 0.9885, + "num_input_tokens_seen": 65633880, + "step": 1172 + }, + { + "epoch": 2.6102449888641424, + "loss": 0.7842642068862915, + "loss_ce": 0.00032866618130356073, + "loss_iou": 0.318359375, + "loss_num": 0.0294189453125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 65633880, + "step": 1172 + }, + { + "epoch": 2.612472160356347, + "grad_norm": 16.221195220947266, + "learning_rate": 1e-06, + "loss": 0.7178, + "num_input_tokens_seen": 65689656, + "step": 1173 + }, + { + "epoch": 2.612472160356347, + "loss": 0.6070936322212219, + "loss_ce": 0.00040419274591840804, + "loss_iou": 0.2578125, + "loss_num": 0.0185546875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 65689656, + "step": 1173 + }, + { + "epoch": 2.614699331848552, + "grad_norm": 21.689422607421875, + "learning_rate": 1e-06, + "loss": 0.9887, + "num_input_tokens_seen": 65748144, + "step": 1174 + }, + { + "epoch": 2.614699331848552, + "loss": 0.829987108707428, + "loss_ce": 0.00039724778616800904, + "loss_iou": 0.369140625, + "loss_num": 0.0179443359375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 65748144, + "step": 1174 + }, + { + "epoch": 2.6169265033407574, + "grad_norm": 26.423198699951172, + "learning_rate": 1e-06, + "loss": 1.0744, + "num_input_tokens_seen": 65804144, + "step": 1175 + }, + { + "epoch": 2.6169265033407574, + "loss": 1.1194689273834229, + "loss_ce": 0.00032840511994436383, + "loss_iou": 0.4609375, + "loss_num": 0.03955078125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 65804144, + "step": 1175 + }, + { + "epoch": 2.6191536748329622, + "grad_norm": 24.602550506591797, + "learning_rate": 1e-06, + "loss": 1.1684, + "num_input_tokens_seen": 65858964, + "step": 1176 + }, + { + "epoch": 2.6191536748329622, + "loss": 0.9794027805328369, + "loss_ce": 0.00039887180901132524, + "loss_iou": 0.40625, + "loss_num": 0.033203125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 65858964, + "step": 1176 + }, + { + "epoch": 2.621380846325167, + "grad_norm": 20.516910552978516, + "learning_rate": 1e-06, + "loss": 0.8049, + "num_input_tokens_seen": 65914988, + "step": 1177 + }, + { + "epoch": 2.621380846325167, + "loss": 0.8118576407432556, + "loss_ce": 0.00033423834247514606, + "loss_iou": 0.3515625, + "loss_num": 0.0216064453125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 65914988, + "step": 1177 + }, + { + "epoch": 2.623608017817372, + "grad_norm": 20.05683135986328, + "learning_rate": 1e-06, + "loss": 1.0382, + "num_input_tokens_seen": 65971964, + "step": 1178 + }, + { + "epoch": 2.623608017817372, + "loss": 1.1247992515563965, + "loss_ce": 0.0005316782626323402, + "loss_iou": 0.451171875, + "loss_num": 0.044189453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 65971964, + "step": 1178 + }, + { + "epoch": 2.625835189309577, + "grad_norm": 33.51689910888672, + "learning_rate": 1e-06, + "loss": 0.919, + "num_input_tokens_seen": 66030480, + "step": 1179 + }, + { + "epoch": 2.625835189309577, + "loss": 1.0413250923156738, + "loss_ce": 0.00030942526063881814, + "loss_iou": 0.435546875, + "loss_num": 0.03369140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 66030480, + "step": 1179 + }, + { + "epoch": 2.6280623608017817, + "grad_norm": 21.715831756591797, + "learning_rate": 1e-06, + "loss": 1.1688, + "num_input_tokens_seen": 66084552, + "step": 1180 + }, + { + "epoch": 2.6280623608017817, + "loss": 1.3229314088821411, + "loss_ce": 0.0006658075144514441, + "loss_iou": 0.52734375, + "loss_num": 0.052978515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 66084552, + "step": 1180 + }, + { + "epoch": 2.6302895322939865, + "grad_norm": 16.5938777923584, + "learning_rate": 1e-06, + "loss": 0.8147, + "num_input_tokens_seen": 66140764, + "step": 1181 + }, + { + "epoch": 2.6302895322939865, + "loss": 0.7720546722412109, + "loss_ce": 0.0005702448543161154, + "loss_iou": 0.306640625, + "loss_num": 0.031494140625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 66140764, + "step": 1181 + }, + { + "epoch": 2.6325167037861914, + "grad_norm": 20.419330596923828, + "learning_rate": 1e-06, + "loss": 0.9905, + "num_input_tokens_seen": 66196076, + "step": 1182 + }, + { + "epoch": 2.6325167037861914, + "loss": 1.1305346488952637, + "loss_ce": 0.0006517736474052072, + "loss_iou": 0.50390625, + "loss_num": 0.0245361328125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 66196076, + "step": 1182 + }, + { + "epoch": 2.6347438752783967, + "grad_norm": 27.849132537841797, + "learning_rate": 1e-06, + "loss": 0.7988, + "num_input_tokens_seen": 66247728, + "step": 1183 + }, + { + "epoch": 2.6347438752783967, + "loss": 0.901054859161377, + "loss_ce": 0.000420065043726936, + "loss_iou": 0.375, + "loss_num": 0.0296630859375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 66247728, + "step": 1183 + }, + { + "epoch": 2.6369710467706016, + "grad_norm": 20.54474449157715, + "learning_rate": 1e-06, + "loss": 1.0772, + "num_input_tokens_seen": 66301456, + "step": 1184 + }, + { + "epoch": 2.6369710467706016, + "loss": 1.1666040420532227, + "loss_ce": 0.000588378170505166, + "loss_iou": 0.43359375, + "loss_num": 0.060302734375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 66301456, + "step": 1184 + }, + { + "epoch": 2.6391982182628064, + "grad_norm": 21.712364196777344, + "learning_rate": 1e-06, + "loss": 0.8525, + "num_input_tokens_seen": 66358172, + "step": 1185 + }, + { + "epoch": 2.6391982182628064, + "loss": 0.7973392009735107, + "loss_ce": 0.0003421990550123155, + "loss_iou": 0.35546875, + "loss_num": 0.017333984375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 66358172, + "step": 1185 + }, + { + "epoch": 2.6414253897550113, + "grad_norm": 46.89886474609375, + "learning_rate": 1e-06, + "loss": 1.2667, + "num_input_tokens_seen": 66415012, + "step": 1186 + }, + { + "epoch": 2.6414253897550113, + "loss": 1.1698817014694214, + "loss_ce": 0.0004481581272557378, + "loss_iou": 0.47265625, + "loss_num": 0.045166015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 66415012, + "step": 1186 + }, + { + "epoch": 2.643652561247216, + "grad_norm": 18.3448429107666, + "learning_rate": 1e-06, + "loss": 1.0143, + "num_input_tokens_seen": 66468628, + "step": 1187 + }, + { + "epoch": 2.643652561247216, + "loss": 1.03020179271698, + "loss_ce": 0.0004165967693552375, + "loss_iou": 0.439453125, + "loss_num": 0.030029296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 66468628, + "step": 1187 + }, + { + "epoch": 2.645879732739421, + "grad_norm": 36.016719818115234, + "learning_rate": 1e-06, + "loss": 0.8041, + "num_input_tokens_seen": 66524504, + "step": 1188 + }, + { + "epoch": 2.645879732739421, + "loss": 0.7011793851852417, + "loss_ce": 0.0004957778146490455, + "loss_iou": 0.275390625, + "loss_num": 0.0301513671875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 66524504, + "step": 1188 + }, + { + "epoch": 2.648106904231626, + "grad_norm": 31.499238967895508, + "learning_rate": 1e-06, + "loss": 0.923, + "num_input_tokens_seen": 66580192, + "step": 1189 + }, + { + "epoch": 2.648106904231626, + "loss": 0.7815631031990051, + "loss_ce": 0.00031307380413636565, + "loss_iou": 0.287109375, + "loss_num": 0.0419921875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 66580192, + "step": 1189 + }, + { + "epoch": 2.6503340757238307, + "grad_norm": 17.818838119506836, + "learning_rate": 1e-06, + "loss": 1.0473, + "num_input_tokens_seen": 66637228, + "step": 1190 + }, + { + "epoch": 2.6503340757238307, + "loss": 0.8321281671524048, + "loss_ce": 0.0003410530334804207, + "loss_iou": 0.3515625, + "loss_num": 0.0260009765625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 66637228, + "step": 1190 + }, + { + "epoch": 2.6525612472160356, + "grad_norm": 21.78716278076172, + "learning_rate": 1e-06, + "loss": 0.8932, + "num_input_tokens_seen": 66695384, + "step": 1191 + }, + { + "epoch": 2.6525612472160356, + "loss": 0.9930351376533508, + "loss_ce": 0.0003593713045120239, + "loss_iou": 0.41796875, + "loss_num": 0.03125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 66695384, + "step": 1191 + }, + { + "epoch": 2.6547884187082404, + "grad_norm": 19.491619110107422, + "learning_rate": 1e-06, + "loss": 0.8489, + "num_input_tokens_seen": 66749192, + "step": 1192 + }, + { + "epoch": 2.6547884187082404, + "loss": 0.7878109216690063, + "loss_ce": 0.0011898394441232085, + "loss_iou": 0.3359375, + "loss_num": 0.023193359375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 66749192, + "step": 1192 + }, + { + "epoch": 2.6570155902004453, + "grad_norm": 26.39838981628418, + "learning_rate": 1e-06, + "loss": 1.1329, + "num_input_tokens_seen": 66803576, + "step": 1193 + }, + { + "epoch": 2.6570155902004453, + "loss": 1.0078489780426025, + "loss_ce": 0.0005247670924291015, + "loss_iou": 0.421875, + "loss_num": 0.03271484375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 66803576, + "step": 1193 + }, + { + "epoch": 2.65924276169265, + "grad_norm": 23.46156883239746, + "learning_rate": 1e-06, + "loss": 0.8324, + "num_input_tokens_seen": 66859052, + "step": 1194 + }, + { + "epoch": 2.65924276169265, + "loss": 0.7302192449569702, + "loss_ce": 0.000482962466776371, + "loss_iou": 0.296875, + "loss_num": 0.0274658203125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 66859052, + "step": 1194 + }, + { + "epoch": 2.661469933184855, + "grad_norm": 16.806400299072266, + "learning_rate": 1e-06, + "loss": 1.0672, + "num_input_tokens_seen": 66914164, + "step": 1195 + }, + { + "epoch": 2.661469933184855, + "loss": 1.1170461177825928, + "loss_ce": 0.000591110554523766, + "loss_iou": 0.427734375, + "loss_num": 0.052001953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 66914164, + "step": 1195 + }, + { + "epoch": 2.66369710467706, + "grad_norm": 18.408832550048828, + "learning_rate": 1e-06, + "loss": 1.0065, + "num_input_tokens_seen": 66969884, + "step": 1196 + }, + { + "epoch": 2.66369710467706, + "loss": 0.8114081621170044, + "loss_ce": 0.00037299515679478645, + "loss_iou": 0.341796875, + "loss_num": 0.02587890625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 66969884, + "step": 1196 + }, + { + "epoch": 2.665924276169265, + "grad_norm": 23.257755279541016, + "learning_rate": 1e-06, + "loss": 1.0099, + "num_input_tokens_seen": 67028592, + "step": 1197 + }, + { + "epoch": 2.665924276169265, + "loss": 1.1483575105667114, + "loss_ce": 0.0004082891682628542, + "loss_iou": 0.451171875, + "loss_num": 0.04931640625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 67028592, + "step": 1197 + }, + { + "epoch": 2.66815144766147, + "grad_norm": 14.403682708740234, + "learning_rate": 1e-06, + "loss": 0.917, + "num_input_tokens_seen": 67085904, + "step": 1198 + }, + { + "epoch": 2.66815144766147, + "loss": 0.916401207447052, + "loss_ce": 0.000385561550501734, + "loss_iou": 0.3984375, + "loss_num": 0.0234375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 67085904, + "step": 1198 + }, + { + "epoch": 2.670378619153675, + "grad_norm": 16.39507484436035, + "learning_rate": 1e-06, + "loss": 0.7198, + "num_input_tokens_seen": 67143124, + "step": 1199 + }, + { + "epoch": 2.670378619153675, + "loss": 0.6678426861763, + "loss_ce": 0.0003622480435296893, + "loss_iou": 0.27734375, + "loss_num": 0.0224609375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 67143124, + "step": 1199 + }, + { + "epoch": 2.6726057906458798, + "grad_norm": 12.098701477050781, + "learning_rate": 1e-06, + "loss": 0.867, + "num_input_tokens_seen": 67196720, + "step": 1200 + }, + { + "epoch": 2.6726057906458798, + "loss": 1.0529296398162842, + "loss_ce": 0.0005615358240902424, + "loss_iou": 0.40625, + "loss_num": 0.0478515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 67196720, + "step": 1200 + }, + { + "epoch": 2.6748329621380846, + "grad_norm": 23.761484146118164, + "learning_rate": 1e-06, + "loss": 0.9273, + "num_input_tokens_seen": 67251244, + "step": 1201 + }, + { + "epoch": 2.6748329621380846, + "loss": 0.9254390001296997, + "loss_ce": 0.0003901528543792665, + "loss_iou": 0.3359375, + "loss_num": 0.051025390625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 67251244, + "step": 1201 + }, + { + "epoch": 2.6770601336302895, + "grad_norm": 13.946866989135742, + "learning_rate": 1e-06, + "loss": 0.9489, + "num_input_tokens_seen": 67305792, + "step": 1202 + }, + { + "epoch": 2.6770601336302895, + "loss": 1.0536152124404907, + "loss_ce": 0.0003925645723938942, + "loss_iou": 0.439453125, + "loss_num": 0.03466796875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 67305792, + "step": 1202 + }, + { + "epoch": 2.6792873051224944, + "grad_norm": 14.783041954040527, + "learning_rate": 1e-06, + "loss": 0.7948, + "num_input_tokens_seen": 67363808, + "step": 1203 + }, + { + "epoch": 2.6792873051224944, + "loss": 0.7142338156700134, + "loss_ce": 0.0010990574955940247, + "loss_iou": 0.30859375, + "loss_num": 0.0191650390625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 67363808, + "step": 1203 + }, + { + "epoch": 2.681514476614699, + "grad_norm": 85.64546966552734, + "learning_rate": 1e-06, + "loss": 0.7912, + "num_input_tokens_seen": 67419668, + "step": 1204 + }, + { + "epoch": 2.681514476614699, + "loss": 0.7728584408760071, + "loss_ce": 0.0003975207218900323, + "loss_iou": 0.296875, + "loss_num": 0.036376953125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 67419668, + "step": 1204 + }, + { + "epoch": 2.683741648106904, + "grad_norm": 26.301057815551758, + "learning_rate": 1e-06, + "loss": 0.7419, + "num_input_tokens_seen": 67477960, + "step": 1205 + }, + { + "epoch": 2.683741648106904, + "loss": 0.5447392463684082, + "loss_ce": 0.0003056719433516264, + "loss_iou": 0.2138671875, + "loss_num": 0.0235595703125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 67477960, + "step": 1205 + }, + { + "epoch": 2.6859688195991094, + "grad_norm": 20.807857513427734, + "learning_rate": 1e-06, + "loss": 1.0459, + "num_input_tokens_seen": 67530876, + "step": 1206 + }, + { + "epoch": 2.6859688195991094, + "loss": 1.1529654264450073, + "loss_ce": 0.0003775313380174339, + "loss_iou": 0.50390625, + "loss_num": 0.029052734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 67530876, + "step": 1206 + }, + { + "epoch": 2.6881959910913142, + "grad_norm": 23.6107234954834, + "learning_rate": 1e-06, + "loss": 0.826, + "num_input_tokens_seen": 67586056, + "step": 1207 + }, + { + "epoch": 2.6881959910913142, + "loss": 0.9727563858032227, + "loss_ce": 0.0005884337006136775, + "loss_iou": 0.392578125, + "loss_num": 0.037353515625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 67586056, + "step": 1207 + }, + { + "epoch": 2.690423162583519, + "grad_norm": 17.839683532714844, + "learning_rate": 1e-06, + "loss": 0.9884, + "num_input_tokens_seen": 67639676, + "step": 1208 + }, + { + "epoch": 2.690423162583519, + "loss": 0.9732263088226318, + "loss_ce": 0.0003259408404119313, + "loss_iou": 0.40625, + "loss_num": 0.0322265625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 67639676, + "step": 1208 + }, + { + "epoch": 2.692650334075724, + "grad_norm": 71.87188720703125, + "learning_rate": 1e-06, + "loss": 1.0553, + "num_input_tokens_seen": 67695312, + "step": 1209 + }, + { + "epoch": 2.692650334075724, + "loss": 1.0907628536224365, + "loss_ce": 0.0004308174247853458, + "loss_iou": 0.4296875, + "loss_num": 0.046142578125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 67695312, + "step": 1209 + }, + { + "epoch": 2.694877505567929, + "grad_norm": 21.322093963623047, + "learning_rate": 1e-06, + "loss": 0.7315, + "num_input_tokens_seen": 67754484, + "step": 1210 + }, + { + "epoch": 2.694877505567929, + "loss": 0.6453206539154053, + "loss_ce": 0.00030111317755654454, + "loss_iou": 0.279296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 67754484, + "step": 1210 + }, + { + "epoch": 2.6971046770601337, + "grad_norm": 17.93378448486328, + "learning_rate": 1e-06, + "loss": 0.834, + "num_input_tokens_seen": 67806972, + "step": 1211 + }, + { + "epoch": 2.6971046770601337, + "loss": 0.879258930683136, + "loss_ce": 0.00035265146289020777, + "loss_iou": 0.375, + "loss_num": 0.0257568359375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 67806972, + "step": 1211 + }, + { + "epoch": 2.6993318485523385, + "grad_norm": 17.423023223876953, + "learning_rate": 1e-06, + "loss": 0.9924, + "num_input_tokens_seen": 67865540, + "step": 1212 + }, + { + "epoch": 2.6993318485523385, + "loss": 1.1357271671295166, + "loss_ce": 0.0004733309615403414, + "loss_iou": 0.46875, + "loss_num": 0.0390625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 67865540, + "step": 1212 + }, + { + "epoch": 2.7015590200445434, + "grad_norm": 16.1553955078125, + "learning_rate": 1e-06, + "loss": 0.8851, + "num_input_tokens_seen": 67923660, + "step": 1213 + }, + { + "epoch": 2.7015590200445434, + "loss": 0.8159765005111694, + "loss_ce": 0.0004247480828780681, + "loss_iou": 0.357421875, + "loss_num": 0.0201416015625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 67923660, + "step": 1213 + }, + { + "epoch": 2.7037861915367483, + "grad_norm": 27.426773071289062, + "learning_rate": 1e-06, + "loss": 1.0762, + "num_input_tokens_seen": 67979792, + "step": 1214 + }, + { + "epoch": 2.7037861915367483, + "loss": 0.9264326095581055, + "loss_ce": 0.00040718415402807295, + "loss_iou": 0.361328125, + "loss_num": 0.04052734375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 67979792, + "step": 1214 + }, + { + "epoch": 2.706013363028953, + "grad_norm": 143.9352569580078, + "learning_rate": 1e-06, + "loss": 1.2101, + "num_input_tokens_seen": 68036028, + "step": 1215 + }, + { + "epoch": 2.706013363028953, + "loss": 1.3111499547958374, + "loss_ce": 0.0006030529621057212, + "loss_iou": 0.53125, + "loss_num": 0.0498046875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 68036028, + "step": 1215 + }, + { + "epoch": 2.708240534521158, + "grad_norm": 15.904016494750977, + "learning_rate": 1e-06, + "loss": 0.8784, + "num_input_tokens_seen": 68090580, + "step": 1216 + }, + { + "epoch": 2.708240534521158, + "loss": 0.8531776070594788, + "loss_ce": 0.00039442608249373734, + "loss_iou": 0.384765625, + "loss_num": 0.016845703125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 68090580, + "step": 1216 + }, + { + "epoch": 2.710467706013363, + "grad_norm": 16.765676498413086, + "learning_rate": 1e-06, + "loss": 0.7818, + "num_input_tokens_seen": 68146632, + "step": 1217 + }, + { + "epoch": 2.710467706013363, + "loss": 0.8600921630859375, + "loss_ce": 0.00047299021389335394, + "loss_iou": 0.3671875, + "loss_num": 0.02490234375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 68146632, + "step": 1217 + }, + { + "epoch": 2.7126948775055677, + "grad_norm": 16.13719367980957, + "learning_rate": 1e-06, + "loss": 0.8074, + "num_input_tokens_seen": 68202376, + "step": 1218 + }, + { + "epoch": 2.7126948775055677, + "loss": 0.9842748045921326, + "loss_ce": 0.00038809922989457846, + "loss_iou": 0.40625, + "loss_num": 0.0341796875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 68202376, + "step": 1218 + }, + { + "epoch": 2.7149220489977726, + "grad_norm": 16.392423629760742, + "learning_rate": 1e-06, + "loss": 0.832, + "num_input_tokens_seen": 68257756, + "step": 1219 + }, + { + "epoch": 2.7149220489977726, + "loss": 0.6619054079055786, + "loss_ce": 0.00028435979038476944, + "loss_iou": 0.275390625, + "loss_num": 0.0224609375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 68257756, + "step": 1219 + }, + { + "epoch": 2.717149220489978, + "grad_norm": 39.93339920043945, + "learning_rate": 1e-06, + "loss": 0.8111, + "num_input_tokens_seen": 68317524, + "step": 1220 + }, + { + "epoch": 2.717149220489978, + "loss": 0.8016022443771362, + "loss_ce": 0.0003327628946863115, + "loss_iou": 0.3203125, + "loss_num": 0.0322265625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 68317524, + "step": 1220 + }, + { + "epoch": 2.7193763919821827, + "grad_norm": 32.62158203125, + "learning_rate": 1e-06, + "loss": 0.8964, + "num_input_tokens_seen": 68370704, + "step": 1221 + }, + { + "epoch": 2.7193763919821827, + "loss": 0.9595242738723755, + "loss_ce": 0.0005399275105446577, + "loss_iou": 0.404296875, + "loss_num": 0.02978515625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 68370704, + "step": 1221 + }, + { + "epoch": 2.7216035634743876, + "grad_norm": 26.76487159729004, + "learning_rate": 1e-06, + "loss": 1.0057, + "num_input_tokens_seen": 68426772, + "step": 1222 + }, + { + "epoch": 2.7216035634743876, + "loss": 0.91068434715271, + "loss_ce": 0.00028396639390848577, + "loss_iou": 0.37890625, + "loss_num": 0.0303955078125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 68426772, + "step": 1222 + }, + { + "epoch": 2.7238307349665924, + "grad_norm": 18.720125198364258, + "learning_rate": 1e-06, + "loss": 0.8459, + "num_input_tokens_seen": 68483180, + "step": 1223 + }, + { + "epoch": 2.7238307349665924, + "loss": 0.9395471215248108, + "loss_ce": 0.0003381132846698165, + "loss_iou": 0.380859375, + "loss_num": 0.03564453125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 68483180, + "step": 1223 + }, + { + "epoch": 2.7260579064587973, + "grad_norm": 15.632884979248047, + "learning_rate": 1e-06, + "loss": 1.093, + "num_input_tokens_seen": 68538720, + "step": 1224 + }, + { + "epoch": 2.7260579064587973, + "loss": 1.2165559530258179, + "loss_ce": 0.0004914908786304295, + "loss_iou": 0.484375, + "loss_num": 0.0498046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 68538720, + "step": 1224 + }, + { + "epoch": 2.728285077951002, + "grad_norm": 16.02979850769043, + "learning_rate": 1e-06, + "loss": 0.8688, + "num_input_tokens_seen": 68596884, + "step": 1225 + }, + { + "epoch": 2.728285077951002, + "loss": 1.0046154260635376, + "loss_ce": 0.00046502824989147484, + "loss_iou": 0.427734375, + "loss_num": 0.0299072265625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 68596884, + "step": 1225 + }, + { + "epoch": 2.730512249443207, + "grad_norm": 19.00936508178711, + "learning_rate": 1e-06, + "loss": 0.8638, + "num_input_tokens_seen": 68654332, + "step": 1226 + }, + { + "epoch": 2.730512249443207, + "loss": 0.9798787832260132, + "loss_ce": 0.00038662960287183523, + "loss_iou": 0.373046875, + "loss_num": 0.046875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 68654332, + "step": 1226 + }, + { + "epoch": 2.732739420935412, + "grad_norm": 18.884357452392578, + "learning_rate": 1e-06, + "loss": 0.7695, + "num_input_tokens_seen": 68713232, + "step": 1227 + }, + { + "epoch": 2.732739420935412, + "loss": 0.8073458671569824, + "loss_ce": 0.00046107626985758543, + "loss_iou": 0.349609375, + "loss_num": 0.0213623046875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 68713232, + "step": 1227 + }, + { + "epoch": 2.734966592427617, + "grad_norm": 14.613882064819336, + "learning_rate": 1e-06, + "loss": 0.8354, + "num_input_tokens_seen": 68768872, + "step": 1228 + }, + { + "epoch": 2.734966592427617, + "loss": 0.870397686958313, + "loss_ce": 0.0005245659267529845, + "loss_iou": 0.353515625, + "loss_num": 0.032470703125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 68768872, + "step": 1228 + }, + { + "epoch": 2.737193763919822, + "grad_norm": 16.955501556396484, + "learning_rate": 1e-06, + "loss": 1.1796, + "num_input_tokens_seen": 68822780, + "step": 1229 + }, + { + "epoch": 2.737193763919822, + "loss": 1.0951062440872192, + "loss_ce": 0.0003797074896283448, + "loss_iou": 0.439453125, + "loss_num": 0.043701171875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 68822780, + "step": 1229 + }, + { + "epoch": 2.739420935412027, + "grad_norm": 20.716533660888672, + "learning_rate": 1e-06, + "loss": 1.0516, + "num_input_tokens_seen": 68877892, + "step": 1230 + }, + { + "epoch": 2.739420935412027, + "loss": 1.0252530574798584, + "loss_ce": 0.0008389821741729975, + "loss_iou": 0.412109375, + "loss_num": 0.0400390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 68877892, + "step": 1230 + }, + { + "epoch": 2.7416481069042318, + "grad_norm": 40.377017974853516, + "learning_rate": 1e-06, + "loss": 0.7578, + "num_input_tokens_seen": 68935384, + "step": 1231 + }, + { + "epoch": 2.7416481069042318, + "loss": 0.8238745331764221, + "loss_ce": 0.0003882101736962795, + "loss_iou": 0.34375, + "loss_num": 0.0274658203125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 68935384, + "step": 1231 + }, + { + "epoch": 2.7438752783964366, + "grad_norm": 20.555801391601562, + "learning_rate": 1e-06, + "loss": 0.9902, + "num_input_tokens_seen": 68991268, + "step": 1232 + }, + { + "epoch": 2.7438752783964366, + "loss": 1.0188990831375122, + "loss_ce": 0.0003444222966209054, + "loss_iou": 0.453125, + "loss_num": 0.0225830078125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 68991268, + "step": 1232 + }, + { + "epoch": 2.7461024498886415, + "grad_norm": 17.75653648376465, + "learning_rate": 1e-06, + "loss": 0.9883, + "num_input_tokens_seen": 69048292, + "step": 1233 + }, + { + "epoch": 2.7461024498886415, + "loss": 1.4018105268478394, + "loss_ce": 0.0009315803181380033, + "loss_iou": 0.5390625, + "loss_num": 0.06494140625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 69048292, + "step": 1233 + }, + { + "epoch": 2.7483296213808464, + "grad_norm": 25.2138614654541, + "learning_rate": 1e-06, + "loss": 0.9916, + "num_input_tokens_seen": 69102780, + "step": 1234 + }, + { + "epoch": 2.7483296213808464, + "loss": 1.0661447048187256, + "loss_ce": 0.00047087084385566413, + "loss_iou": 0.421875, + "loss_num": 0.044677734375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 69102780, + "step": 1234 + }, + { + "epoch": 2.750556792873051, + "grad_norm": 41.74817657470703, + "learning_rate": 1e-06, + "loss": 0.7839, + "num_input_tokens_seen": 69157904, + "step": 1235 + }, + { + "epoch": 2.750556792873051, + "loss": 0.9228337407112122, + "loss_ce": 0.00047044423990882933, + "loss_iou": 0.3828125, + "loss_num": 0.03173828125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 69157904, + "step": 1235 + }, + { + "epoch": 2.752783964365256, + "grad_norm": 17.140514373779297, + "learning_rate": 1e-06, + "loss": 0.7179, + "num_input_tokens_seen": 69215832, + "step": 1236 + }, + { + "epoch": 2.752783964365256, + "loss": 0.8632500171661377, + "loss_ce": 0.00045710656559094787, + "loss_iou": 0.330078125, + "loss_num": 0.040283203125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 69215832, + "step": 1236 + }, + { + "epoch": 2.755011135857461, + "grad_norm": 43.60929870605469, + "learning_rate": 1e-06, + "loss": 0.9365, + "num_input_tokens_seen": 69271512, + "step": 1237 + }, + { + "epoch": 2.755011135857461, + "loss": 1.1083848476409912, + "loss_ce": 0.0004747234925162047, + "loss_iou": 0.486328125, + "loss_num": 0.0272216796875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 69271512, + "step": 1237 + }, + { + "epoch": 2.757238307349666, + "grad_norm": 82.30256652832031, + "learning_rate": 1e-06, + "loss": 0.9312, + "num_input_tokens_seen": 69328124, + "step": 1238 + }, + { + "epoch": 2.757238307349666, + "loss": 1.1663775444030762, + "loss_ce": 0.000606063287705183, + "loss_iou": 0.466796875, + "loss_num": 0.046142578125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 69328124, + "step": 1238 + }, + { + "epoch": 2.7594654788418707, + "grad_norm": 18.209518432617188, + "learning_rate": 1e-06, + "loss": 0.9739, + "num_input_tokens_seen": 69385368, + "step": 1239 + }, + { + "epoch": 2.7594654788418707, + "loss": 1.1721243858337402, + "loss_ce": 0.0022025578655302525, + "loss_iou": 0.466796875, + "loss_num": 0.047119140625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 69385368, + "step": 1239 + }, + { + "epoch": 2.7616926503340755, + "grad_norm": 27.468372344970703, + "learning_rate": 1e-06, + "loss": 0.7438, + "num_input_tokens_seen": 69440864, + "step": 1240 + }, + { + "epoch": 2.7616926503340755, + "loss": 0.7869477868080139, + "loss_ce": 0.00032670435030013323, + "loss_iou": 0.333984375, + "loss_num": 0.0233154296875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 69440864, + "step": 1240 + }, + { + "epoch": 2.7639198218262804, + "grad_norm": 29.165250778198242, + "learning_rate": 1e-06, + "loss": 0.9046, + "num_input_tokens_seen": 69497304, + "step": 1241 + }, + { + "epoch": 2.7639198218262804, + "loss": 0.9364851117134094, + "loss_ce": 0.0004500235663726926, + "loss_iou": 0.34375, + "loss_num": 0.0498046875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 69497304, + "step": 1241 + }, + { + "epoch": 2.7661469933184857, + "grad_norm": 17.164466857910156, + "learning_rate": 1e-06, + "loss": 0.8518, + "num_input_tokens_seen": 69554524, + "step": 1242 + }, + { + "epoch": 2.7661469933184857, + "loss": 0.9595236778259277, + "loss_ce": 0.0005393511964939535, + "loss_iou": 0.404296875, + "loss_num": 0.0301513671875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 69554524, + "step": 1242 + }, + { + "epoch": 2.7683741648106905, + "grad_norm": 19.568635940551758, + "learning_rate": 1e-06, + "loss": 0.8746, + "num_input_tokens_seen": 69611204, + "step": 1243 + }, + { + "epoch": 2.7683741648106905, + "loss": 1.1134837865829468, + "loss_ce": 0.004352906718850136, + "loss_iou": 0.47265625, + "loss_num": 0.033203125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 69611204, + "step": 1243 + }, + { + "epoch": 2.7706013363028954, + "grad_norm": 16.17546844482422, + "learning_rate": 1e-06, + "loss": 0.7277, + "num_input_tokens_seen": 69668100, + "step": 1244 + }, + { + "epoch": 2.7706013363028954, + "loss": 0.7887986898422241, + "loss_ce": 0.00046862097224220634, + "loss_iou": 0.318359375, + "loss_num": 0.0301513671875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 69668100, + "step": 1244 + }, + { + "epoch": 2.7728285077951003, + "grad_norm": 22.0662784576416, + "learning_rate": 1e-06, + "loss": 0.9972, + "num_input_tokens_seen": 69723688, + "step": 1245 + }, + { + "epoch": 2.7728285077951003, + "loss": 1.2928619384765625, + "loss_ce": 0.00038146309088915586, + "loss_iou": 0.5234375, + "loss_num": 0.04931640625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 69723688, + "step": 1245 + }, + { + "epoch": 2.775055679287305, + "grad_norm": 19.460460662841797, + "learning_rate": 1e-06, + "loss": 0.601, + "num_input_tokens_seen": 69778896, + "step": 1246 + }, + { + "epoch": 2.775055679287305, + "loss": 0.5189080238342285, + "loss_ce": 0.000353323295712471, + "loss_iou": 0.2197265625, + "loss_num": 0.015869140625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 69778896, + "step": 1246 + }, + { + "epoch": 2.77728285077951, + "grad_norm": 20.321802139282227, + "learning_rate": 1e-06, + "loss": 1.1584, + "num_input_tokens_seen": 69836796, + "step": 1247 + }, + { + "epoch": 2.77728285077951, + "loss": 1.031623125076294, + "loss_ce": 0.00037302449345588684, + "loss_iou": 0.4296875, + "loss_num": 0.034912109375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 69836796, + "step": 1247 + }, + { + "epoch": 2.779510022271715, + "grad_norm": 15.81376838684082, + "learning_rate": 1e-06, + "loss": 0.8184, + "num_input_tokens_seen": 69893548, + "step": 1248 + }, + { + "epoch": 2.779510022271715, + "loss": 0.9024643898010254, + "loss_ce": 0.00036481593269854784, + "loss_iou": 0.365234375, + "loss_num": 0.03466796875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 69893548, + "step": 1248 + }, + { + "epoch": 2.7817371937639197, + "grad_norm": 131.67137145996094, + "learning_rate": 1e-06, + "loss": 1.0096, + "num_input_tokens_seen": 69944348, + "step": 1249 + }, + { + "epoch": 2.7817371937639197, + "loss": 0.9452086687088013, + "loss_ce": 0.00038447632687166333, + "loss_iou": 0.40625, + "loss_num": 0.0264892578125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 69944348, + "step": 1249 + }, + { + "epoch": 2.7839643652561246, + "grad_norm": 13.716216087341309, + "learning_rate": 1e-06, + "loss": 1.1417, + "num_input_tokens_seen": 70001912, + "step": 1250 + }, + { + "epoch": 2.7839643652561246, + "eval_seeclick_web_CIoU": 0.5651521682739258, + "eval_seeclick_web_GIoU": 0.5592525601387024, + "eval_seeclick_web_IoU": 0.5811098515987396, + "eval_seeclick_web_MAE_all": 0.017407238017767668, + "eval_seeclick_web_MAE_h": 0.01108331186696887, + "eval_seeclick_web_MAE_w": 0.01834576530382037, + "eval_seeclick_web_MAE_x_boxes": 0.008968821726739407, + "eval_seeclick_web_MAE_y_boxes": 0.02286715735681355, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9634284973144531, + "eval_seeclick_web_loss_ce": 0.00043979176552966237, + "eval_seeclick_web_loss_iou": 0.439697265625, + "eval_seeclick_web_loss_num": 0.013711929321289062, + "eval_seeclick_web_loss_xval": 0.9478759765625, + "eval_seeclick_web_runtime": 32.3264, + "eval_seeclick_web_samples_per_second": 1.547, + "eval_seeclick_web_steps_per_second": 0.062, + "num_input_tokens_seen": 70001912, + "step": 1250 + }, + { + "epoch": 2.7839643652561246, + "eval_icons_CIoU": 0.3112401217222214, + "eval_icons_GIoU": 0.33725370466709137, + "eval_icons_IoU": 0.38187770545482635, + "eval_icons_MAE_all": 0.06834794208407402, + "eval_icons_MAE_h": 0.03893335722386837, + "eval_icons_MAE_w": 0.08004930429160595, + "eval_icons_MAE_x_boxes": 0.05875684879720211, + "eval_icons_MAE_y_boxes": 0.039277092553675175, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.721555471420288, + "eval_icons_loss_ce": 0.0008201654127333313, + "eval_icons_loss_iou": 0.66845703125, + "eval_icons_loss_num": 0.06448173522949219, + "eval_icons_loss_xval": 1.65869140625, + "eval_icons_runtime": 30.6205, + "eval_icons_samples_per_second": 1.633, + "eval_icons_steps_per_second": 0.065, + "num_input_tokens_seen": 70001912, + "step": 1250 + }, + { + "epoch": 2.7839643652561246, + "eval_screenspot_CIoU": 0.31326837340990704, + "eval_screenspot_GIoU": 0.3382245600223541, + "eval_screenspot_IoU": 0.3957456350326538, + "eval_screenspot_MAE_all": 0.08052034179369609, + "eval_screenspot_MAE_h": 0.04156584106385708, + "eval_screenspot_MAE_w": 0.0848269909620285, + "eval_screenspot_MAE_x_boxes": 0.11255322148402531, + "eval_screenspot_MAE_y_boxes": 0.04932925725976626, + "eval_screenspot_inside_bbox": 0.6045833428700765, + "eval_screenspot_loss": 1.7774311304092407, + "eval_screenspot_loss_ce": 0.0008034493657760322, + "eval_screenspot_loss_iou": 0.7025553385416666, + "eval_screenspot_loss_num": 0.09097544352213542, + "eval_screenspot_loss_xval": 1.8603515625, + "eval_screenspot_runtime": 54.3229, + "eval_screenspot_samples_per_second": 1.638, + "eval_screenspot_steps_per_second": 0.055, + "num_input_tokens_seen": 70001912, + "step": 1250 + }, + { + "epoch": 2.7839643652561246, + "eval_compot_CIoU": 0.3437846302986145, + "eval_compot_GIoU": 0.3697910010814667, + "eval_compot_IoU": 0.4004169702529907, + "eval_compot_MAE_all": 0.021469497121870518, + "eval_compot_MAE_h": 0.011028026696294546, + "eval_compot_MAE_w": 0.02703993208706379, + "eval_compot_MAE_x_boxes": 0.030306325759738684, + "eval_compot_MAE_y_boxes": 0.007093302207067609, + "eval_compot_inside_bbox": 0.6145833432674408, + "eval_compot_loss": 1.3977539539337158, + "eval_compot_loss_ce": 0.0004101828089915216, + "eval_compot_loss_iou": 0.63037109375, + "eval_compot_loss_num": 0.020694732666015625, + "eval_compot_loss_xval": 1.36376953125, + "eval_compot_runtime": 32.0716, + "eval_compot_samples_per_second": 1.559, + "eval_compot_steps_per_second": 0.062, + "num_input_tokens_seen": 70001912, + "step": 1250 + }, + { + "epoch": 2.7839643652561246, + "eval_custom_ui_val_CIoU": 0.41552355140447617, + "eval_custom_ui_val_GIoU": 0.44775263799561393, + "eval_custom_ui_val_IoU": 0.47426238159338635, + "eval_custom_ui_val_MAE_all": 0.03807516168389055, + "eval_custom_ui_val_MAE_h": 0.020889455763002236, + "eval_custom_ui_val_MAE_w": 0.04122109152376652, + "eval_custom_ui_val_MAE_x_boxes": 0.04385989842315515, + "eval_custom_ui_val_MAE_y_boxes": 0.020979333875907794, + "eval_custom_ui_val_inside_bbox": 0.6678240762816535, + "eval_custom_ui_val_loss": 1.312791347503662, + "eval_custom_ui_val_loss_ce": 0.0007190946586585293, + "eval_custom_ui_val_loss_iou": 0.5456407335069444, + "eval_custom_ui_val_loss_num": 0.03783988952636719, + "eval_custom_ui_val_loss_xval": 1.2806260850694444, + "eval_custom_ui_val_runtime": 95.8302, + "eval_custom_ui_val_samples_per_second": 2.765, + "eval_custom_ui_val_steps_per_second": 0.094, + "num_input_tokens_seen": 70001912, + "step": 1250 + }, + { + "epoch": 2.7839643652561246, + "loss": 1.004927635192871, + "loss_ce": 0.0005332073196768761, + "loss_iou": 0.4296875, + "loss_num": 0.0286865234375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 70001912, + "step": 1250 + }, + { + "epoch": 2.78619153674833, + "grad_norm": 19.672489166259766, + "learning_rate": 1e-06, + "loss": 0.8286, + "num_input_tokens_seen": 70058016, + "step": 1251 + }, + { + "epoch": 2.78619153674833, + "loss": 0.6981078386306763, + "loss_ce": 0.00035395551822148263, + "loss_iou": 0.294921875, + "loss_num": 0.0213623046875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 70058016, + "step": 1251 + }, + { + "epoch": 2.7884187082405347, + "grad_norm": 17.608375549316406, + "learning_rate": 1e-06, + "loss": 0.9146, + "num_input_tokens_seen": 70112088, + "step": 1252 + }, + { + "epoch": 2.7884187082405347, + "loss": 0.9438179135322571, + "loss_ce": 0.0003364117001183331, + "loss_iou": 0.38671875, + "loss_num": 0.033935546875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 70112088, + "step": 1252 + }, + { + "epoch": 2.7906458797327396, + "grad_norm": 14.264594078063965, + "learning_rate": 1e-06, + "loss": 0.8837, + "num_input_tokens_seen": 70171184, + "step": 1253 + }, + { + "epoch": 2.7906458797327396, + "loss": 0.9383166432380676, + "loss_ce": 0.00032835284946486354, + "loss_iou": 0.400390625, + "loss_num": 0.0272216796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 70171184, + "step": 1253 + }, + { + "epoch": 2.7928730512249444, + "grad_norm": 21.34050750732422, + "learning_rate": 1e-06, + "loss": 1.0205, + "num_input_tokens_seen": 70227628, + "step": 1254 + }, + { + "epoch": 2.7928730512249444, + "loss": 1.0269427299499512, + "loss_ce": 0.0004534609033726156, + "loss_iou": 0.4296875, + "loss_num": 0.03369140625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 70227628, + "step": 1254 + }, + { + "epoch": 2.7951002227171493, + "grad_norm": 14.754793167114258, + "learning_rate": 1e-06, + "loss": 0.9247, + "num_input_tokens_seen": 70283180, + "step": 1255 + }, + { + "epoch": 2.7951002227171493, + "loss": 0.9754384160041809, + "loss_ce": 0.00034071545815095305, + "loss_iou": 0.392578125, + "loss_num": 0.037841796875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 70283180, + "step": 1255 + }, + { + "epoch": 2.797327394209354, + "grad_norm": 21.97988510131836, + "learning_rate": 1e-06, + "loss": 1.0107, + "num_input_tokens_seen": 70335640, + "step": 1256 + }, + { + "epoch": 2.797327394209354, + "loss": 1.1570931673049927, + "loss_ce": 0.0003548713284544647, + "loss_iou": 0.50390625, + "loss_num": 0.029541015625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 70335640, + "step": 1256 + }, + { + "epoch": 2.799554565701559, + "grad_norm": 38.17330551147461, + "learning_rate": 1e-06, + "loss": 0.7453, + "num_input_tokens_seen": 70390748, + "step": 1257 + }, + { + "epoch": 2.799554565701559, + "loss": 0.8263010382652283, + "loss_ce": 0.00037330458872020245, + "loss_iou": 0.37109375, + "loss_num": 0.01611328125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 70390748, + "step": 1257 + }, + { + "epoch": 2.801781737193764, + "grad_norm": 22.141918182373047, + "learning_rate": 1e-06, + "loss": 1.0046, + "num_input_tokens_seen": 70446116, + "step": 1258 + }, + { + "epoch": 2.801781737193764, + "loss": 1.124394178390503, + "loss_ce": 0.0003707860014401376, + "loss_iou": 0.4609375, + "loss_num": 0.040283203125, + "loss_xval": 1.125, + "num_input_tokens_seen": 70446116, + "step": 1258 + }, + { + "epoch": 2.8040089086859687, + "grad_norm": 115.46956634521484, + "learning_rate": 1e-06, + "loss": 0.9891, + "num_input_tokens_seen": 70503256, + "step": 1259 + }, + { + "epoch": 2.8040089086859687, + "loss": 0.8390759229660034, + "loss_ce": 0.0004528118879534304, + "loss_iou": 0.3828125, + "loss_num": 0.0147705078125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 70503256, + "step": 1259 + }, + { + "epoch": 2.8062360801781736, + "grad_norm": 18.191608428955078, + "learning_rate": 1e-06, + "loss": 0.6494, + "num_input_tokens_seen": 70559428, + "step": 1260 + }, + { + "epoch": 2.8062360801781736, + "loss": 0.4827492833137512, + "loss_ce": 0.0003274133778177202, + "loss_iou": 0.2041015625, + "loss_num": 0.014892578125, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 70559428, + "step": 1260 + }, + { + "epoch": 2.8084632516703785, + "grad_norm": 47.72921371459961, + "learning_rate": 1e-06, + "loss": 0.937, + "num_input_tokens_seen": 70614552, + "step": 1261 + }, + { + "epoch": 2.8084632516703785, + "loss": 0.9824411869049072, + "loss_ce": 0.0036814198829233646, + "loss_iou": 0.41015625, + "loss_num": 0.031494140625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 70614552, + "step": 1261 + }, + { + "epoch": 2.8106904231625833, + "grad_norm": 15.726812362670898, + "learning_rate": 1e-06, + "loss": 0.8683, + "num_input_tokens_seen": 70669544, + "step": 1262 + }, + { + "epoch": 2.8106904231625833, + "loss": 1.0436663627624512, + "loss_ce": 0.0004534028994385153, + "loss_iou": 0.42578125, + "loss_num": 0.0380859375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 70669544, + "step": 1262 + }, + { + "epoch": 2.812917594654788, + "grad_norm": 42.43077087402344, + "learning_rate": 1e-06, + "loss": 0.8308, + "num_input_tokens_seen": 70725468, + "step": 1263 + }, + { + "epoch": 2.812917594654788, + "loss": 1.070866346359253, + "loss_ce": 0.0005538529367186129, + "loss_iou": 0.400390625, + "loss_num": 0.0537109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 70725468, + "step": 1263 + }, + { + "epoch": 2.815144766146993, + "grad_norm": 15.935296058654785, + "learning_rate": 1e-06, + "loss": 0.8946, + "num_input_tokens_seen": 70780344, + "step": 1264 + }, + { + "epoch": 2.815144766146993, + "loss": 0.5159022212028503, + "loss_ce": 0.00027721430524252355, + "loss_iou": 0.2197265625, + "loss_num": 0.015380859375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 70780344, + "step": 1264 + }, + { + "epoch": 2.8173719376391984, + "grad_norm": 14.334254264831543, + "learning_rate": 1e-06, + "loss": 0.7528, + "num_input_tokens_seen": 70836060, + "step": 1265 + }, + { + "epoch": 2.8173719376391984, + "loss": 0.7817806601524353, + "loss_ce": 0.0002865034039132297, + "loss_iou": 0.33203125, + "loss_num": 0.0233154296875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 70836060, + "step": 1265 + }, + { + "epoch": 2.819599109131403, + "grad_norm": 19.307138442993164, + "learning_rate": 1e-06, + "loss": 0.8431, + "num_input_tokens_seen": 70893088, + "step": 1266 + }, + { + "epoch": 2.819599109131403, + "loss": 0.9788370132446289, + "loss_ce": 0.00032134755747392774, + "loss_iou": 0.412109375, + "loss_num": 0.030517578125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 70893088, + "step": 1266 + }, + { + "epoch": 2.821826280623608, + "grad_norm": 25.13027000427246, + "learning_rate": 1e-06, + "loss": 0.9889, + "num_input_tokens_seen": 70949580, + "step": 1267 + }, + { + "epoch": 2.821826280623608, + "loss": 1.048335313796997, + "loss_ce": 0.00048371872981078923, + "loss_iou": 0.421875, + "loss_num": 0.040771484375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 70949580, + "step": 1267 + }, + { + "epoch": 2.824053452115813, + "grad_norm": 23.69377899169922, + "learning_rate": 1e-06, + "loss": 0.7746, + "num_input_tokens_seen": 71005544, + "step": 1268 + }, + { + "epoch": 2.824053452115813, + "loss": 0.871030330657959, + "loss_ce": 0.00042489959741942585, + "loss_iou": 0.3203125, + "loss_num": 0.046142578125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 71005544, + "step": 1268 + }, + { + "epoch": 2.826280623608018, + "grad_norm": 17.32742691040039, + "learning_rate": 1e-06, + "loss": 0.6273, + "num_input_tokens_seen": 71061960, + "step": 1269 + }, + { + "epoch": 2.826280623608018, + "loss": 0.5913735628128052, + "loss_ce": 0.0003091061080340296, + "loss_iou": 0.25, + "loss_num": 0.0184326171875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 71061960, + "step": 1269 + }, + { + "epoch": 2.8285077951002227, + "grad_norm": 14.68806266784668, + "learning_rate": 1e-06, + "loss": 0.9952, + "num_input_tokens_seen": 71118024, + "step": 1270 + }, + { + "epoch": 2.8285077951002227, + "loss": 0.9375334978103638, + "loss_ce": 0.00027759268414229155, + "loss_iou": 0.400390625, + "loss_num": 0.0274658203125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 71118024, + "step": 1270 + }, + { + "epoch": 2.8307349665924275, + "grad_norm": 11.978171348571777, + "learning_rate": 1e-06, + "loss": 0.933, + "num_input_tokens_seen": 71170604, + "step": 1271 + }, + { + "epoch": 2.8307349665924275, + "loss": 1.082184076309204, + "loss_ce": 0.0006410967325791717, + "loss_iou": 0.455078125, + "loss_num": 0.033935546875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 71170604, + "step": 1271 + }, + { + "epoch": 2.8329621380846324, + "grad_norm": 16.340866088867188, + "learning_rate": 1e-06, + "loss": 0.7549, + "num_input_tokens_seen": 71227240, + "step": 1272 + }, + { + "epoch": 2.8329621380846324, + "loss": 0.6341564059257507, + "loss_ce": 0.000367343716789037, + "loss_iou": 0.27734375, + "loss_num": 0.01556396484375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 71227240, + "step": 1272 + }, + { + "epoch": 2.8351893095768377, + "grad_norm": 21.42453384399414, + "learning_rate": 1e-06, + "loss": 1.3223, + "num_input_tokens_seen": 71281800, + "step": 1273 + }, + { + "epoch": 2.8351893095768377, + "loss": 1.39503812789917, + "loss_ce": 0.0005069249891676009, + "loss_iou": 0.609375, + "loss_num": 0.034912109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 71281800, + "step": 1273 + }, + { + "epoch": 2.8374164810690425, + "grad_norm": 14.59418773651123, + "learning_rate": 1e-06, + "loss": 0.858, + "num_input_tokens_seen": 71338428, + "step": 1274 + }, + { + "epoch": 2.8374164810690425, + "loss": 0.6660067439079285, + "loss_ce": 0.00047936852206476033, + "loss_iou": 0.302734375, + "loss_num": 0.011962890625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 71338428, + "step": 1274 + }, + { + "epoch": 2.8396436525612474, + "grad_norm": 23.267770767211914, + "learning_rate": 1e-06, + "loss": 1.0233, + "num_input_tokens_seen": 71393332, + "step": 1275 + }, + { + "epoch": 2.8396436525612474, + "loss": 1.2549712657928467, + "loss_ce": 0.0005767274415120482, + "loss_iou": 0.4765625, + "loss_num": 0.06103515625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 71393332, + "step": 1275 + }, + { + "epoch": 2.8418708240534523, + "grad_norm": 19.297468185424805, + "learning_rate": 1e-06, + "loss": 0.863, + "num_input_tokens_seen": 71450628, + "step": 1276 + }, + { + "epoch": 2.8418708240534523, + "loss": 0.8579794764518738, + "loss_ce": 0.00031341775320470333, + "loss_iou": 0.349609375, + "loss_num": 0.03125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 71450628, + "step": 1276 + }, + { + "epoch": 2.844097995545657, + "grad_norm": 20.925310134887695, + "learning_rate": 1e-06, + "loss": 0.785, + "num_input_tokens_seen": 71509096, + "step": 1277 + }, + { + "epoch": 2.844097995545657, + "loss": 0.7992924451828003, + "loss_ce": 0.0003422028967179358, + "loss_iou": 0.310546875, + "loss_num": 0.03564453125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 71509096, + "step": 1277 + }, + { + "epoch": 2.846325167037862, + "grad_norm": 21.453073501586914, + "learning_rate": 1e-06, + "loss": 0.7625, + "num_input_tokens_seen": 71567296, + "step": 1278 + }, + { + "epoch": 2.846325167037862, + "loss": 0.7048360705375671, + "loss_ce": 0.0004903356893919408, + "loss_iou": 0.3046875, + "loss_num": 0.01904296875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 71567296, + "step": 1278 + }, + { + "epoch": 2.848552338530067, + "grad_norm": 33.181209564208984, + "learning_rate": 1e-06, + "loss": 0.891, + "num_input_tokens_seen": 71623012, + "step": 1279 + }, + { + "epoch": 2.848552338530067, + "loss": 0.5747618675231934, + "loss_ce": 0.00029898268985562027, + "loss_iou": 0.23828125, + "loss_num": 0.0194091796875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 71623012, + "step": 1279 + }, + { + "epoch": 2.8507795100222717, + "grad_norm": 17.72443199157715, + "learning_rate": 1e-06, + "loss": 0.8168, + "num_input_tokens_seen": 71680860, + "step": 1280 + }, + { + "epoch": 2.8507795100222717, + "loss": 0.709847629070282, + "loss_ce": 0.00037496426375582814, + "loss_iou": 0.3046875, + "loss_num": 0.0205078125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 71680860, + "step": 1280 + }, + { + "epoch": 2.8530066815144766, + "grad_norm": 24.76962661743164, + "learning_rate": 1e-06, + "loss": 0.9463, + "num_input_tokens_seen": 71734508, + "step": 1281 + }, + { + "epoch": 2.8530066815144766, + "loss": 0.9339801669120789, + "loss_ce": 0.0003863995661959052, + "loss_iou": 0.3828125, + "loss_num": 0.03369140625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 71734508, + "step": 1281 + }, + { + "epoch": 2.8552338530066814, + "grad_norm": 22.486467361450195, + "learning_rate": 1e-06, + "loss": 0.7574, + "num_input_tokens_seen": 71790040, + "step": 1282 + }, + { + "epoch": 2.8552338530066814, + "loss": 0.8373603820800781, + "loss_ce": 0.000446313846623525, + "loss_iou": 0.3359375, + "loss_num": 0.032470703125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 71790040, + "step": 1282 + }, + { + "epoch": 2.8574610244988863, + "grad_norm": 22.6174373626709, + "learning_rate": 1e-06, + "loss": 0.9784, + "num_input_tokens_seen": 71843828, + "step": 1283 + }, + { + "epoch": 2.8574610244988863, + "loss": 1.0318081378936768, + "loss_ce": 0.0005582119338214397, + "loss_iou": 0.40625, + "loss_num": 0.043212890625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 71843828, + "step": 1283 + }, + { + "epoch": 2.859688195991091, + "grad_norm": 18.821727752685547, + "learning_rate": 1e-06, + "loss": 0.939, + "num_input_tokens_seen": 71900448, + "step": 1284 + }, + { + "epoch": 2.859688195991091, + "loss": 0.8391966819763184, + "loss_ce": 0.0006956788711249828, + "loss_iou": 0.32421875, + "loss_num": 0.037841796875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 71900448, + "step": 1284 + }, + { + "epoch": 2.861915367483296, + "grad_norm": 13.810822486877441, + "learning_rate": 1e-06, + "loss": 0.9658, + "num_input_tokens_seen": 71958344, + "step": 1285 + }, + { + "epoch": 2.861915367483296, + "loss": 0.921851396560669, + "loss_ce": 0.0004646632296498865, + "loss_iou": 0.380859375, + "loss_num": 0.031494140625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 71958344, + "step": 1285 + }, + { + "epoch": 2.864142538975501, + "grad_norm": 19.463180541992188, + "learning_rate": 1e-06, + "loss": 0.9619, + "num_input_tokens_seen": 72014732, + "step": 1286 + }, + { + "epoch": 2.864142538975501, + "loss": 1.0936338901519775, + "loss_ce": 0.0003722285036928952, + "loss_iou": 0.44140625, + "loss_num": 0.041748046875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 72014732, + "step": 1286 + }, + { + "epoch": 2.866369710467706, + "grad_norm": 20.0224666595459, + "learning_rate": 1e-06, + "loss": 0.8855, + "num_input_tokens_seen": 72071312, + "step": 1287 + }, + { + "epoch": 2.866369710467706, + "loss": 0.9935523271560669, + "loss_ce": 0.0011207055067643523, + "loss_iou": 0.36328125, + "loss_num": 0.053466796875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 72071312, + "step": 1287 + }, + { + "epoch": 2.868596881959911, + "grad_norm": 18.678556442260742, + "learning_rate": 1e-06, + "loss": 0.8911, + "num_input_tokens_seen": 72126324, + "step": 1288 + }, + { + "epoch": 2.868596881959911, + "loss": 0.7727195024490356, + "loss_ce": 0.00025854576961137354, + "loss_iou": 0.3203125, + "loss_num": 0.0260009765625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 72126324, + "step": 1288 + }, + { + "epoch": 2.870824053452116, + "grad_norm": 13.80396556854248, + "learning_rate": 1e-06, + "loss": 0.9222, + "num_input_tokens_seen": 72181044, + "step": 1289 + }, + { + "epoch": 2.870824053452116, + "loss": 1.003150224685669, + "loss_ce": 0.0004646561574190855, + "loss_iou": 0.3984375, + "loss_num": 0.041259765625, + "loss_xval": 1.0, + "num_input_tokens_seen": 72181044, + "step": 1289 + }, + { + "epoch": 2.8730512249443207, + "grad_norm": 30.7203369140625, + "learning_rate": 1e-06, + "loss": 1.0782, + "num_input_tokens_seen": 72238480, + "step": 1290 + }, + { + "epoch": 2.8730512249443207, + "loss": 1.1529991626739502, + "loss_ce": 0.0004112512688152492, + "loss_iou": 0.51171875, + "loss_num": 0.0262451171875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 72238480, + "step": 1290 + }, + { + "epoch": 2.8752783964365256, + "grad_norm": 25.847972869873047, + "learning_rate": 1e-06, + "loss": 0.8204, + "num_input_tokens_seen": 72293396, + "step": 1291 + }, + { + "epoch": 2.8752783964365256, + "loss": 1.011704444885254, + "loss_ce": 0.0009622069192118943, + "loss_iou": 0.380859375, + "loss_num": 0.0498046875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 72293396, + "step": 1291 + }, + { + "epoch": 2.8775055679287305, + "grad_norm": 100.3687744140625, + "learning_rate": 1e-06, + "loss": 0.8099, + "num_input_tokens_seen": 72349376, + "step": 1292 + }, + { + "epoch": 2.8775055679287305, + "loss": 0.7852460741996765, + "loss_ce": 0.0003339627292007208, + "loss_iou": 0.287109375, + "loss_num": 0.0419921875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 72349376, + "step": 1292 + }, + { + "epoch": 2.8797327394209353, + "grad_norm": 31.29021644592285, + "learning_rate": 1e-06, + "loss": 0.8552, + "num_input_tokens_seen": 72405340, + "step": 1293 + }, + { + "epoch": 2.8797327394209353, + "loss": 0.8164255619049072, + "loss_ce": 0.0005075936205685139, + "loss_iou": 0.333984375, + "loss_num": 0.0296630859375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 72405340, + "step": 1293 + }, + { + "epoch": 2.88195991091314, + "grad_norm": 18.81956672668457, + "learning_rate": 1e-06, + "loss": 1.1343, + "num_input_tokens_seen": 72462008, + "step": 1294 + }, + { + "epoch": 2.88195991091314, + "loss": 0.831671416759491, + "loss_ce": 0.0003726041177287698, + "loss_iou": 0.3359375, + "loss_num": 0.031982421875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 72462008, + "step": 1294 + }, + { + "epoch": 2.884187082405345, + "grad_norm": 28.06816291809082, + "learning_rate": 1e-06, + "loss": 0.7352, + "num_input_tokens_seen": 72520860, + "step": 1295 + }, + { + "epoch": 2.884187082405345, + "loss": 0.8308737277984619, + "loss_ce": 0.0005514706717804074, + "loss_iou": 0.3671875, + "loss_num": 0.0191650390625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 72520860, + "step": 1295 + }, + { + "epoch": 2.8864142538975504, + "grad_norm": 119.70653533935547, + "learning_rate": 1e-06, + "loss": 0.9987, + "num_input_tokens_seen": 72578972, + "step": 1296 + }, + { + "epoch": 2.8864142538975504, + "loss": 1.2035748958587646, + "loss_ce": 0.00044986687134951353, + "loss_iou": 0.482421875, + "loss_num": 0.04736328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 72578972, + "step": 1296 + }, + { + "epoch": 2.888641425389755, + "grad_norm": 23.493093490600586, + "learning_rate": 1e-06, + "loss": 0.9778, + "num_input_tokens_seen": 72634868, + "step": 1297 + }, + { + "epoch": 2.888641425389755, + "loss": 0.8210999965667725, + "loss_ce": 0.0002992242225445807, + "loss_iou": 0.353515625, + "loss_num": 0.0224609375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 72634868, + "step": 1297 + }, + { + "epoch": 2.89086859688196, + "grad_norm": 112.7999038696289, + "learning_rate": 1e-06, + "loss": 1.1468, + "num_input_tokens_seen": 72686616, + "step": 1298 + }, + { + "epoch": 2.89086859688196, + "loss": 1.0459107160568237, + "loss_ce": 0.0007446431554853916, + "loss_iou": 0.4140625, + "loss_num": 0.04345703125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 72686616, + "step": 1298 + }, + { + "epoch": 2.893095768374165, + "grad_norm": 17.558181762695312, + "learning_rate": 1e-06, + "loss": 0.9396, + "num_input_tokens_seen": 72740388, + "step": 1299 + }, + { + "epoch": 2.893095768374165, + "loss": 0.843862771987915, + "loss_ce": 0.0008451773319393396, + "loss_iou": 0.359375, + "loss_num": 0.0250244140625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 72740388, + "step": 1299 + }, + { + "epoch": 2.89532293986637, + "grad_norm": 20.879600524902344, + "learning_rate": 1e-06, + "loss": 0.9484, + "num_input_tokens_seen": 72796572, + "step": 1300 + }, + { + "epoch": 2.89532293986637, + "loss": 1.0628635883331299, + "loss_ce": 0.00036358111537992954, + "loss_iou": 0.4296875, + "loss_num": 0.041015625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 72796572, + "step": 1300 + }, + { + "epoch": 2.8975501113585747, + "grad_norm": 24.84024429321289, + "learning_rate": 1e-06, + "loss": 0.8657, + "num_input_tokens_seen": 72854948, + "step": 1301 + }, + { + "epoch": 2.8975501113585747, + "loss": 0.7214281558990479, + "loss_ce": 0.00035886449040845037, + "loss_iou": 0.306640625, + "loss_num": 0.021728515625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 72854948, + "step": 1301 + }, + { + "epoch": 2.8997772828507795, + "grad_norm": 22.32291030883789, + "learning_rate": 1e-06, + "loss": 1.0913, + "num_input_tokens_seen": 72907880, + "step": 1302 + }, + { + "epoch": 2.8997772828507795, + "loss": 1.042357325553894, + "loss_ce": 0.00036513677332550287, + "loss_iou": 0.458984375, + "loss_num": 0.0245361328125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 72907880, + "step": 1302 + }, + { + "epoch": 2.9020044543429844, + "grad_norm": 94.72842407226562, + "learning_rate": 1e-06, + "loss": 0.8327, + "num_input_tokens_seen": 72965036, + "step": 1303 + }, + { + "epoch": 2.9020044543429844, + "loss": 0.7842406034469604, + "loss_ce": 0.00030507519841194153, + "loss_iou": 0.3203125, + "loss_num": 0.028564453125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 72965036, + "step": 1303 + }, + { + "epoch": 2.9042316258351892, + "grad_norm": 64.44385528564453, + "learning_rate": 1e-06, + "loss": 1.0017, + "num_input_tokens_seen": 73019812, + "step": 1304 + }, + { + "epoch": 2.9042316258351892, + "loss": 1.2140803337097168, + "loss_ce": 0.0009456113912165165, + "loss_iou": 0.46875, + "loss_num": 0.055419921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 73019812, + "step": 1304 + }, + { + "epoch": 2.906458797327394, + "grad_norm": 63.43607711791992, + "learning_rate": 1e-06, + "loss": 0.6413, + "num_input_tokens_seen": 73077144, + "step": 1305 + }, + { + "epoch": 2.906458797327394, + "loss": 0.6670837998390198, + "loss_ce": 0.0003357888199388981, + "loss_iou": 0.302734375, + "loss_num": 0.01202392578125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 73077144, + "step": 1305 + }, + { + "epoch": 2.908685968819599, + "grad_norm": 19.118846893310547, + "learning_rate": 1e-06, + "loss": 0.8712, + "num_input_tokens_seen": 73134624, + "step": 1306 + }, + { + "epoch": 2.908685968819599, + "loss": 0.8654381036758423, + "loss_ce": 0.00044786007492803037, + "loss_iou": 0.37890625, + "loss_num": 0.0208740234375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 73134624, + "step": 1306 + }, + { + "epoch": 2.910913140311804, + "grad_norm": 15.382525444030762, + "learning_rate": 1e-06, + "loss": 0.9095, + "num_input_tokens_seen": 73189224, + "step": 1307 + }, + { + "epoch": 2.910913140311804, + "loss": 0.9012154340744019, + "loss_ce": 0.0003364897274877876, + "loss_iou": 0.384765625, + "loss_num": 0.0260009765625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 73189224, + "step": 1307 + }, + { + "epoch": 2.9131403118040087, + "grad_norm": 19.829851150512695, + "learning_rate": 1e-06, + "loss": 0.6906, + "num_input_tokens_seen": 73245992, + "step": 1308 + }, + { + "epoch": 2.9131403118040087, + "loss": 0.6446969509124756, + "loss_ce": 0.00040984569932334125, + "loss_iou": 0.267578125, + "loss_num": 0.02197265625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 73245992, + "step": 1308 + }, + { + "epoch": 2.9153674832962135, + "grad_norm": 29.750280380249023, + "learning_rate": 1e-06, + "loss": 0.9183, + "num_input_tokens_seen": 73299640, + "step": 1309 + }, + { + "epoch": 2.9153674832962135, + "loss": 0.7756353616714478, + "loss_ce": 0.0004888884141109884, + "loss_iou": 0.33203125, + "loss_num": 0.0224609375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 73299640, + "step": 1309 + }, + { + "epoch": 2.917594654788419, + "grad_norm": 14.211082458496094, + "learning_rate": 1e-06, + "loss": 0.8288, + "num_input_tokens_seen": 73354816, + "step": 1310 + }, + { + "epoch": 2.917594654788419, + "loss": 0.7674408555030823, + "loss_ce": 0.0003510417591314763, + "loss_iou": 0.328125, + "loss_num": 0.0220947265625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 73354816, + "step": 1310 + }, + { + "epoch": 2.9198218262806237, + "grad_norm": 23.373992919921875, + "learning_rate": 1e-06, + "loss": 0.789, + "num_input_tokens_seen": 73412668, + "step": 1311 + }, + { + "epoch": 2.9198218262806237, + "loss": 0.7085351943969727, + "loss_ce": 0.0005274016875773668, + "loss_iou": 0.3125, + "loss_num": 0.016845703125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 73412668, + "step": 1311 + }, + { + "epoch": 2.9220489977728286, + "grad_norm": 16.870405197143555, + "learning_rate": 1e-06, + "loss": 1.0438, + "num_input_tokens_seen": 73468408, + "step": 1312 + }, + { + "epoch": 2.9220489977728286, + "loss": 0.9538618922233582, + "loss_ce": 0.002201691037043929, + "loss_iou": 0.421875, + "loss_num": 0.0218505859375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 73468408, + "step": 1312 + }, + { + "epoch": 2.9242761692650334, + "grad_norm": 17.644731521606445, + "learning_rate": 1e-06, + "loss": 0.9705, + "num_input_tokens_seen": 73525120, + "step": 1313 + }, + { + "epoch": 2.9242761692650334, + "loss": 1.1021685600280762, + "loss_ce": 0.0006060994928702712, + "loss_iou": 0.4296875, + "loss_num": 0.048828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 73525120, + "step": 1313 + }, + { + "epoch": 2.9265033407572383, + "grad_norm": 12.963172912597656, + "learning_rate": 1e-06, + "loss": 0.6575, + "num_input_tokens_seen": 73581328, + "step": 1314 + }, + { + "epoch": 2.9265033407572383, + "loss": 0.7298256754875183, + "loss_ce": 0.00033350015291944146, + "loss_iou": 0.30859375, + "loss_num": 0.022216796875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 73581328, + "step": 1314 + }, + { + "epoch": 2.928730512249443, + "grad_norm": 15.870047569274902, + "learning_rate": 1e-06, + "loss": 0.9587, + "num_input_tokens_seen": 73636896, + "step": 1315 + }, + { + "epoch": 2.928730512249443, + "loss": 0.8428421020507812, + "loss_ce": 0.0003127763920929283, + "loss_iou": 0.376953125, + "loss_num": 0.0179443359375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 73636896, + "step": 1315 + }, + { + "epoch": 2.930957683741648, + "grad_norm": 14.364209175109863, + "learning_rate": 1e-06, + "loss": 0.8652, + "num_input_tokens_seen": 73693568, + "step": 1316 + }, + { + "epoch": 2.930957683741648, + "loss": 0.7832823395729065, + "loss_ce": 0.0003234214964322746, + "loss_iou": 0.337890625, + "loss_num": 0.021240234375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 73693568, + "step": 1316 + }, + { + "epoch": 2.933184855233853, + "grad_norm": 32.526336669921875, + "learning_rate": 1e-06, + "loss": 0.9016, + "num_input_tokens_seen": 73752056, + "step": 1317 + }, + { + "epoch": 2.933184855233853, + "loss": 0.7434287071228027, + "loss_ce": 0.0003867132472805679, + "loss_iou": 0.33203125, + "loss_num": 0.015869140625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 73752056, + "step": 1317 + }, + { + "epoch": 2.935412026726058, + "grad_norm": 16.808130264282227, + "learning_rate": 1e-06, + "loss": 0.8978, + "num_input_tokens_seen": 73809184, + "step": 1318 + }, + { + "epoch": 2.935412026726058, + "loss": 0.7791517972946167, + "loss_ce": 0.0003431940567679703, + "loss_iou": 0.326171875, + "loss_num": 0.0255126953125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 73809184, + "step": 1318 + }, + { + "epoch": 2.937639198218263, + "grad_norm": 25.0362548828125, + "learning_rate": 1e-06, + "loss": 0.9062, + "num_input_tokens_seen": 73863224, + "step": 1319 + }, + { + "epoch": 2.937639198218263, + "loss": 0.8815177083015442, + "loss_ce": 0.00041416779276914895, + "loss_iou": 0.375, + "loss_num": 0.026123046875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 73863224, + "step": 1319 + }, + { + "epoch": 2.939866369710468, + "grad_norm": 23.442148208618164, + "learning_rate": 1e-06, + "loss": 1.0457, + "num_input_tokens_seen": 73919812, + "step": 1320 + }, + { + "epoch": 2.939866369710468, + "loss": 0.9350742101669312, + "loss_ce": 0.0002597759012132883, + "loss_iou": 0.388671875, + "loss_num": 0.03125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 73919812, + "step": 1320 + }, + { + "epoch": 2.9420935412026727, + "grad_norm": 39.34425735473633, + "learning_rate": 1e-06, + "loss": 0.7751, + "num_input_tokens_seen": 73974388, + "step": 1321 + }, + { + "epoch": 2.9420935412026727, + "loss": 0.6952590346336365, + "loss_ce": 0.00031273809145204723, + "loss_iou": 0.302734375, + "loss_num": 0.0179443359375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 73974388, + "step": 1321 + }, + { + "epoch": 2.9443207126948776, + "grad_norm": 15.462008476257324, + "learning_rate": 1e-06, + "loss": 0.9421, + "num_input_tokens_seen": 74032576, + "step": 1322 + }, + { + "epoch": 2.9443207126948776, + "loss": 0.9447818994522095, + "loss_ce": 0.0009342257399111986, + "loss_iou": 0.33984375, + "loss_num": 0.052978515625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 74032576, + "step": 1322 + }, + { + "epoch": 2.9465478841870825, + "grad_norm": 14.060518264770508, + "learning_rate": 1e-06, + "loss": 0.7786, + "num_input_tokens_seen": 74087264, + "step": 1323 + }, + { + "epoch": 2.9465478841870825, + "loss": 0.6352978944778442, + "loss_ce": 0.0005322670331224799, + "loss_iou": 0.28125, + "loss_num": 0.01483154296875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 74087264, + "step": 1323 + }, + { + "epoch": 2.9487750556792873, + "grad_norm": 22.72332763671875, + "learning_rate": 1e-06, + "loss": 1.0088, + "num_input_tokens_seen": 74142868, + "step": 1324 + }, + { + "epoch": 2.9487750556792873, + "loss": 1.0528690814971924, + "loss_ce": 0.0006230201106518507, + "loss_iou": 0.455078125, + "loss_num": 0.028564453125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 74142868, + "step": 1324 + }, + { + "epoch": 2.951002227171492, + "grad_norm": 12.463337898254395, + "learning_rate": 1e-06, + "loss": 1.0772, + "num_input_tokens_seen": 74195720, + "step": 1325 + }, + { + "epoch": 2.951002227171492, + "loss": 1.2419848442077637, + "loss_ce": 0.0002855784259736538, + "loss_iou": 0.53125, + "loss_num": 0.036376953125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 74195720, + "step": 1325 + }, + { + "epoch": 2.953229398663697, + "grad_norm": 21.965835571289062, + "learning_rate": 1e-06, + "loss": 1.1917, + "num_input_tokens_seen": 74249536, + "step": 1326 + }, + { + "epoch": 2.953229398663697, + "loss": 0.998505711555481, + "loss_ce": 0.00045889458851888776, + "loss_iou": 0.4140625, + "loss_num": 0.033935546875, + "loss_xval": 1.0, + "num_input_tokens_seen": 74249536, + "step": 1326 + }, + { + "epoch": 2.955456570155902, + "grad_norm": 19.335357666015625, + "learning_rate": 1e-06, + "loss": 0.79, + "num_input_tokens_seen": 74306892, + "step": 1327 + }, + { + "epoch": 2.955456570155902, + "loss": 0.7617478370666504, + "loss_ce": 0.0003953152918256819, + "loss_iou": 0.314453125, + "loss_num": 0.026611328125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 74306892, + "step": 1327 + }, + { + "epoch": 2.9576837416481068, + "grad_norm": 17.01285743713379, + "learning_rate": 1e-06, + "loss": 1.0855, + "num_input_tokens_seen": 74363500, + "step": 1328 + }, + { + "epoch": 2.9576837416481068, + "loss": 1.1959009170532227, + "loss_ce": 0.0015648790867999196, + "loss_iou": 0.46484375, + "loss_num": 0.05224609375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 74363500, + "step": 1328 + }, + { + "epoch": 2.9599109131403116, + "grad_norm": 15.02244758605957, + "learning_rate": 1e-06, + "loss": 0.8086, + "num_input_tokens_seen": 74420444, + "step": 1329 + }, + { + "epoch": 2.9599109131403116, + "loss": 0.6169714331626892, + "loss_ce": 0.00027223769575357437, + "loss_iou": 0.265625, + "loss_num": 0.0172119140625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 74420444, + "step": 1329 + }, + { + "epoch": 2.9621380846325165, + "grad_norm": 28.502256393432617, + "learning_rate": 1e-06, + "loss": 0.9974, + "num_input_tokens_seen": 74475096, + "step": 1330 + }, + { + "epoch": 2.9621380846325165, + "loss": 1.2459745407104492, + "loss_ce": 0.00036911843926645815, + "loss_iou": 0.53515625, + "loss_num": 0.03564453125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 74475096, + "step": 1330 + }, + { + "epoch": 2.9643652561247213, + "grad_norm": 78.01403045654297, + "learning_rate": 1e-06, + "loss": 0.695, + "num_input_tokens_seen": 74532852, + "step": 1331 + }, + { + "epoch": 2.9643652561247213, + "loss": 0.6845568418502808, + "loss_ce": 0.000352742470568046, + "loss_iou": 0.306640625, + "loss_num": 0.014404296875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 74532852, + "step": 1331 + }, + { + "epoch": 2.9665924276169267, + "grad_norm": 15.447741508483887, + "learning_rate": 1e-06, + "loss": 0.778, + "num_input_tokens_seen": 74588216, + "step": 1332 + }, + { + "epoch": 2.9665924276169267, + "loss": 0.9468116760253906, + "loss_ce": 0.00027843567659147084, + "loss_iou": 0.388671875, + "loss_num": 0.0341796875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 74588216, + "step": 1332 + }, + { + "epoch": 2.9688195991091315, + "grad_norm": 20.878141403198242, + "learning_rate": 1e-06, + "loss": 0.8829, + "num_input_tokens_seen": 74645576, + "step": 1333 + }, + { + "epoch": 2.9688195991091315, + "loss": 1.0510952472686768, + "loss_ce": 0.0003140345506835729, + "loss_iou": 0.443359375, + "loss_num": 0.033447265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 74645576, + "step": 1333 + }, + { + "epoch": 2.9710467706013364, + "grad_norm": 15.299370765686035, + "learning_rate": 1e-06, + "loss": 1.2055, + "num_input_tokens_seen": 74704232, + "step": 1334 + }, + { + "epoch": 2.9710467706013364, + "loss": 1.2119064331054688, + "loss_ce": 0.0012130287941545248, + "loss_iou": 0.51171875, + "loss_num": 0.037353515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 74704232, + "step": 1334 + }, + { + "epoch": 2.9732739420935412, + "grad_norm": 19.733963012695312, + "learning_rate": 1e-06, + "loss": 0.8997, + "num_input_tokens_seen": 74757716, + "step": 1335 + }, + { + "epoch": 2.9732739420935412, + "loss": 0.8220304250717163, + "loss_ce": 0.0004972000606358051, + "loss_iou": 0.345703125, + "loss_num": 0.0257568359375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 74757716, + "step": 1335 + }, + { + "epoch": 2.975501113585746, + "grad_norm": 23.857521057128906, + "learning_rate": 1e-06, + "loss": 0.9514, + "num_input_tokens_seen": 74811252, + "step": 1336 + }, + { + "epoch": 2.975501113585746, + "loss": 1.0858557224273682, + "loss_ce": 0.0004064875829499215, + "loss_iou": 0.455078125, + "loss_num": 0.035400390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 74811252, + "step": 1336 + }, + { + "epoch": 2.977728285077951, + "grad_norm": 17.06792449951172, + "learning_rate": 1e-06, + "loss": 0.8545, + "num_input_tokens_seen": 74867424, + "step": 1337 + }, + { + "epoch": 2.977728285077951, + "loss": 0.698322057723999, + "loss_ce": 0.0003239895449951291, + "loss_iou": 0.32421875, + "loss_num": 0.009765625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 74867424, + "step": 1337 + }, + { + "epoch": 2.979955456570156, + "grad_norm": 21.56948471069336, + "learning_rate": 1e-06, + "loss": 0.8931, + "num_input_tokens_seen": 74923808, + "step": 1338 + }, + { + "epoch": 2.979955456570156, + "loss": 1.2361738681793213, + "loss_ce": 0.0003339699178468436, + "loss_iou": 0.494140625, + "loss_num": 0.0498046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 74923808, + "step": 1338 + }, + { + "epoch": 2.9821826280623607, + "grad_norm": 18.15468978881836, + "learning_rate": 1e-06, + "loss": 0.7672, + "num_input_tokens_seen": 74978584, + "step": 1339 + }, + { + "epoch": 2.9821826280623607, + "loss": 0.5524653792381287, + "loss_ce": 0.0003413406084291637, + "loss_iou": 0.232421875, + "loss_num": 0.0174560546875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 74978584, + "step": 1339 + }, + { + "epoch": 2.984409799554566, + "grad_norm": 19.710803985595703, + "learning_rate": 1e-06, + "loss": 0.9056, + "num_input_tokens_seen": 75033200, + "step": 1340 + }, + { + "epoch": 2.984409799554566, + "loss": 0.7972690463066101, + "loss_ce": 0.00039405166171491146, + "loss_iou": 0.328125, + "loss_num": 0.02783203125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 75033200, + "step": 1340 + }, + { + "epoch": 2.986636971046771, + "grad_norm": 17.216930389404297, + "learning_rate": 1e-06, + "loss": 0.7153, + "num_input_tokens_seen": 75088396, + "step": 1341 + }, + { + "epoch": 2.986636971046771, + "loss": 0.7058947682380676, + "loss_ce": 0.00032835971796885133, + "loss_iou": 0.291015625, + "loss_num": 0.0247802734375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 75088396, + "step": 1341 + }, + { + "epoch": 2.9888641425389757, + "grad_norm": 40.40977478027344, + "learning_rate": 1e-06, + "loss": 1.0308, + "num_input_tokens_seen": 75146988, + "step": 1342 + }, + { + "epoch": 2.9888641425389757, + "loss": 0.981564462184906, + "loss_ce": 0.000363294588169083, + "loss_iou": 0.42578125, + "loss_num": 0.0255126953125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 75146988, + "step": 1342 + }, + { + "epoch": 2.9910913140311806, + "grad_norm": 409.5078430175781, + "learning_rate": 1e-06, + "loss": 1.0702, + "num_input_tokens_seen": 75204000, + "step": 1343 + }, + { + "epoch": 2.9910913140311806, + "loss": 0.9847346544265747, + "loss_ce": 0.0003596529713831842, + "loss_iou": 0.3984375, + "loss_num": 0.037841796875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 75204000, + "step": 1343 + }, + { + "epoch": 2.9933184855233854, + "grad_norm": 14.489014625549316, + "learning_rate": 1e-06, + "loss": 0.9728, + "num_input_tokens_seen": 75260556, + "step": 1344 + }, + { + "epoch": 2.9933184855233854, + "loss": 1.0310882329940796, + "loss_ce": 0.0003265210543759167, + "loss_iou": 0.390625, + "loss_num": 0.050048828125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 75260556, + "step": 1344 + }, + { + "epoch": 2.9955456570155903, + "grad_norm": 21.714834213256836, + "learning_rate": 1e-06, + "loss": 0.8475, + "num_input_tokens_seen": 75318464, + "step": 1345 + }, + { + "epoch": 2.9955456570155903, + "loss": 0.8694126605987549, + "loss_ce": 0.0005162069573998451, + "loss_iou": 0.384765625, + "loss_num": 0.0198974609375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 75318464, + "step": 1345 + }, + { + "epoch": 2.997772828507795, + "grad_norm": 18.179824829101562, + "learning_rate": 1e-06, + "loss": 0.8263, + "num_input_tokens_seen": 75375036, + "step": 1346 + }, + { + "epoch": 2.997772828507795, + "loss": 0.9615136384963989, + "loss_ce": 0.0003319892566651106, + "loss_iou": 0.376953125, + "loss_num": 0.04150390625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 75375036, + "step": 1346 + }, + { + "epoch": 3.0, + "grad_norm": 17.222766876220703, + "learning_rate": 1e-06, + "loss": 0.969, + "num_input_tokens_seen": 75431008, + "step": 1347 + }, + { + "epoch": 3.0, + "loss": 0.4970862567424774, + "loss_ce": 0.00038217363180592656, + "loss_iou": 0.2158203125, + "loss_num": 0.01287841796875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 75431008, + "step": 1347 + }, + { + "epoch": 3.002227171492205, + "grad_norm": 15.791449546813965, + "learning_rate": 1e-06, + "loss": 1.1072, + "num_input_tokens_seen": 75486424, + "step": 1348 + }, + { + "epoch": 3.002227171492205, + "loss": 1.2740671634674072, + "loss_ce": 0.0006296620704233646, + "loss_iou": 0.515625, + "loss_num": 0.04833984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 75486424, + "step": 1348 + }, + { + "epoch": 3.0044543429844097, + "grad_norm": 18.678091049194336, + "learning_rate": 1e-06, + "loss": 0.8753, + "num_input_tokens_seen": 75542876, + "step": 1349 + }, + { + "epoch": 3.0044543429844097, + "loss": 0.7326757907867432, + "loss_ce": 0.00025389608344994485, + "loss_iou": 0.328125, + "loss_num": 0.01513671875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 75542876, + "step": 1349 + }, + { + "epoch": 3.0066815144766146, + "grad_norm": 33.28176498413086, + "learning_rate": 1e-06, + "loss": 0.8996, + "num_input_tokens_seen": 75599980, + "step": 1350 + }, + { + "epoch": 3.0066815144766146, + "loss": 0.8423166275024414, + "loss_ce": 0.00051976612303406, + "loss_iou": 0.34375, + "loss_num": 0.0308837890625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 75599980, + "step": 1350 + }, + { + "epoch": 3.0089086859688194, + "grad_norm": 16.385852813720703, + "learning_rate": 1e-06, + "loss": 0.9101, + "num_input_tokens_seen": 75656456, + "step": 1351 + }, + { + "epoch": 3.0089086859688194, + "loss": 1.0261783599853516, + "loss_ce": 0.0005435821949504316, + "loss_iou": 0.44140625, + "loss_num": 0.0283203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 75656456, + "step": 1351 + }, + { + "epoch": 3.0111358574610243, + "grad_norm": 23.76874351501465, + "learning_rate": 1e-06, + "loss": 0.9519, + "num_input_tokens_seen": 75715892, + "step": 1352 + }, + { + "epoch": 3.0111358574610243, + "loss": 0.8311777114868164, + "loss_ce": 0.0003672138263937086, + "loss_iou": 0.353515625, + "loss_num": 0.0244140625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 75715892, + "step": 1352 + }, + { + "epoch": 3.0133630289532296, + "grad_norm": 20.76018714904785, + "learning_rate": 1e-06, + "loss": 0.8318, + "num_input_tokens_seen": 75774108, + "step": 1353 + }, + { + "epoch": 3.0133630289532296, + "loss": 0.8268052935600281, + "loss_ce": 0.0006334498757496476, + "loss_iou": 0.328125, + "loss_num": 0.034423828125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 75774108, + "step": 1353 + }, + { + "epoch": 3.0155902004454345, + "grad_norm": 13.831488609313965, + "learning_rate": 1e-06, + "loss": 0.8734, + "num_input_tokens_seen": 75829580, + "step": 1354 + }, + { + "epoch": 3.0155902004454345, + "loss": 0.7394604682922363, + "loss_ce": 0.00032472447492182255, + "loss_iou": 0.30859375, + "loss_num": 0.0242919921875, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 75829580, + "step": 1354 + }, + { + "epoch": 3.0178173719376393, + "grad_norm": 32.255157470703125, + "learning_rate": 1e-06, + "loss": 0.7957, + "num_input_tokens_seen": 75888104, + "step": 1355 + }, + { + "epoch": 3.0178173719376393, + "loss": 0.7028149366378784, + "loss_ce": 0.0003003134625032544, + "loss_iou": 0.29296875, + "loss_num": 0.023193359375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 75888104, + "step": 1355 + }, + { + "epoch": 3.020044543429844, + "grad_norm": 38.09650802612305, + "learning_rate": 1e-06, + "loss": 0.8811, + "num_input_tokens_seen": 75946020, + "step": 1356 + }, + { + "epoch": 3.020044543429844, + "loss": 0.7325311899185181, + "loss_ce": 0.00035350650432519615, + "loss_iou": 0.306640625, + "loss_num": 0.0240478515625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 75946020, + "step": 1356 + }, + { + "epoch": 3.022271714922049, + "grad_norm": 16.011274337768555, + "learning_rate": 1e-06, + "loss": 0.7803, + "num_input_tokens_seen": 76003528, + "step": 1357 + }, + { + "epoch": 3.022271714922049, + "loss": 0.8663822412490845, + "loss_ce": 0.0004154411144554615, + "loss_iou": 0.380859375, + "loss_num": 0.0211181640625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 76003528, + "step": 1357 + }, + { + "epoch": 3.024498886414254, + "grad_norm": 33.57292556762695, + "learning_rate": 1e-06, + "loss": 0.9545, + "num_input_tokens_seen": 76059112, + "step": 1358 + }, + { + "epoch": 3.024498886414254, + "loss": 0.9820139408111572, + "loss_ce": 0.00032445252873003483, + "loss_iou": 0.37890625, + "loss_num": 0.044921875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 76059112, + "step": 1358 + }, + { + "epoch": 3.0267260579064588, + "grad_norm": 18.756921768188477, + "learning_rate": 1e-06, + "loss": 0.8035, + "num_input_tokens_seen": 76115888, + "step": 1359 + }, + { + "epoch": 3.0267260579064588, + "loss": 0.6882917284965515, + "loss_ce": 0.00030343266553245485, + "loss_iou": 0.3046875, + "loss_num": 0.0155029296875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 76115888, + "step": 1359 + }, + { + "epoch": 3.0289532293986636, + "grad_norm": 46.1540641784668, + "learning_rate": 1e-06, + "loss": 0.9699, + "num_input_tokens_seen": 76175268, + "step": 1360 + }, + { + "epoch": 3.0289532293986636, + "loss": 0.7430423498153687, + "loss_ce": 0.00036659964825958014, + "loss_iou": 0.291015625, + "loss_num": 0.032470703125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 76175268, + "step": 1360 + }, + { + "epoch": 3.0311804008908685, + "grad_norm": 18.095458984375, + "learning_rate": 1e-06, + "loss": 1.0666, + "num_input_tokens_seen": 76232400, + "step": 1361 + }, + { + "epoch": 3.0311804008908685, + "loss": 0.8669412732124329, + "loss_ce": 0.00048618431901559234, + "loss_iou": 0.34765625, + "loss_num": 0.0341796875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 76232400, + "step": 1361 + }, + { + "epoch": 3.0334075723830733, + "grad_norm": 16.167116165161133, + "learning_rate": 1e-06, + "loss": 0.7831, + "num_input_tokens_seen": 76287924, + "step": 1362 + }, + { + "epoch": 3.0334075723830733, + "loss": 0.9040330648422241, + "loss_ce": 0.00046859911526553333, + "loss_iou": 0.3515625, + "loss_num": 0.0400390625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 76287924, + "step": 1362 + }, + { + "epoch": 3.035634743875278, + "grad_norm": 25.74004364013672, + "learning_rate": 1e-06, + "loss": 1.0122, + "num_input_tokens_seen": 76344832, + "step": 1363 + }, + { + "epoch": 3.035634743875278, + "loss": 1.1795121431350708, + "loss_ce": 0.0005570236244238913, + "loss_iou": 0.447265625, + "loss_num": 0.05712890625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 76344832, + "step": 1363 + }, + { + "epoch": 3.0378619153674835, + "grad_norm": 25.54564094543457, + "learning_rate": 1e-06, + "loss": 0.9171, + "num_input_tokens_seen": 76399332, + "step": 1364 + }, + { + "epoch": 3.0378619153674835, + "loss": 0.7992980480194092, + "loss_ce": 0.0004698993288911879, + "loss_iou": 0.359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 76399332, + "step": 1364 + }, + { + "epoch": 3.0400890868596884, + "grad_norm": 14.032575607299805, + "learning_rate": 1e-06, + "loss": 1.0658, + "num_input_tokens_seen": 76452824, + "step": 1365 + }, + { + "epoch": 3.0400890868596884, + "loss": 1.1477856636047363, + "loss_ce": 0.000324697612086311, + "loss_iou": 0.486328125, + "loss_num": 0.034912109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 76452824, + "step": 1365 + }, + { + "epoch": 3.0423162583518932, + "grad_norm": 27.30165672302246, + "learning_rate": 1e-06, + "loss": 0.838, + "num_input_tokens_seen": 76507928, + "step": 1366 + }, + { + "epoch": 3.0423162583518932, + "loss": 1.014754056930542, + "loss_ce": 0.00034971325658261776, + "loss_iou": 0.41796875, + "loss_num": 0.03515625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 76507928, + "step": 1366 + }, + { + "epoch": 3.044543429844098, + "grad_norm": 30.125919342041016, + "learning_rate": 1e-06, + "loss": 0.9891, + "num_input_tokens_seen": 76561868, + "step": 1367 + }, + { + "epoch": 3.044543429844098, + "loss": 1.0369231700897217, + "loss_ce": 0.0007904000231064856, + "loss_iou": 0.421875, + "loss_num": 0.038330078125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 76561868, + "step": 1367 + }, + { + "epoch": 3.046770601336303, + "grad_norm": 15.93181324005127, + "learning_rate": 1e-06, + "loss": 1.4316, + "num_input_tokens_seen": 76615632, + "step": 1368 + }, + { + "epoch": 3.046770601336303, + "loss": 1.6312259435653687, + "loss_ce": 0.00036655933945439756, + "loss_iou": 0.68359375, + "loss_num": 0.052734375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 76615632, + "step": 1368 + }, + { + "epoch": 3.048997772828508, + "grad_norm": 12.806370735168457, + "learning_rate": 1e-06, + "loss": 0.6985, + "num_input_tokens_seen": 76672624, + "step": 1369 + }, + { + "epoch": 3.048997772828508, + "loss": 0.7415366172790527, + "loss_ce": 0.0003256290510762483, + "loss_iou": 0.32421875, + "loss_num": 0.018310546875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 76672624, + "step": 1369 + }, + { + "epoch": 3.0512249443207127, + "grad_norm": 19.221446990966797, + "learning_rate": 1e-06, + "loss": 0.8891, + "num_input_tokens_seen": 76729096, + "step": 1370 + }, + { + "epoch": 3.0512249443207127, + "loss": 0.729907751083374, + "loss_ce": 0.0004155657079536468, + "loss_iou": 0.3125, + "loss_num": 0.0208740234375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 76729096, + "step": 1370 + }, + { + "epoch": 3.0534521158129175, + "grad_norm": 41.225067138671875, + "learning_rate": 1e-06, + "loss": 1.0878, + "num_input_tokens_seen": 76783684, + "step": 1371 + }, + { + "epoch": 3.0534521158129175, + "loss": 1.0934221744537354, + "loss_ce": 0.0004046167596243322, + "loss_iou": 0.44921875, + "loss_num": 0.03955078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 76783684, + "step": 1371 + }, + { + "epoch": 3.0556792873051224, + "grad_norm": 75.78993225097656, + "learning_rate": 1e-06, + "loss": 0.736, + "num_input_tokens_seen": 76841952, + "step": 1372 + }, + { + "epoch": 3.0556792873051224, + "loss": 0.587233304977417, + "loss_ce": 0.00031919381581246853, + "loss_iou": 0.251953125, + "loss_num": 0.0162353515625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 76841952, + "step": 1372 + }, + { + "epoch": 3.0579064587973273, + "grad_norm": 21.043071746826172, + "learning_rate": 1e-06, + "loss": 0.796, + "num_input_tokens_seen": 76897836, + "step": 1373 + }, + { + "epoch": 3.0579064587973273, + "loss": 0.8417444229125977, + "loss_ce": 0.0004358206642791629, + "loss_iou": 0.31640625, + "loss_num": 0.0419921875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 76897836, + "step": 1373 + }, + { + "epoch": 3.060133630289532, + "grad_norm": 15.805767059326172, + "learning_rate": 1e-06, + "loss": 0.9074, + "num_input_tokens_seen": 76955312, + "step": 1374 + }, + { + "epoch": 3.060133630289532, + "loss": 0.8958030343055725, + "loss_ce": 0.0002952259674202651, + "loss_iou": 0.369140625, + "loss_num": 0.03173828125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 76955312, + "step": 1374 + }, + { + "epoch": 3.062360801781737, + "grad_norm": 17.199810028076172, + "learning_rate": 1e-06, + "loss": 0.9986, + "num_input_tokens_seen": 77010728, + "step": 1375 + }, + { + "epoch": 3.062360801781737, + "loss": 0.8748060464859009, + "loss_ce": 0.00029435683973133564, + "loss_iou": 0.390625, + "loss_num": 0.018310546875, + "loss_xval": 0.875, + "num_input_tokens_seen": 77010728, + "step": 1375 + }, + { + "epoch": 3.0645879732739423, + "grad_norm": 25.19390869140625, + "learning_rate": 1e-06, + "loss": 0.8589, + "num_input_tokens_seen": 77067240, + "step": 1376 + }, + { + "epoch": 3.0645879732739423, + "loss": 0.8482306003570557, + "loss_ce": 0.00033024855656549335, + "loss_iou": 0.349609375, + "loss_num": 0.0301513671875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 77067240, + "step": 1376 + }, + { + "epoch": 3.066815144766147, + "grad_norm": 32.438499450683594, + "learning_rate": 1e-06, + "loss": 0.7083, + "num_input_tokens_seen": 77124068, + "step": 1377 + }, + { + "epoch": 3.066815144766147, + "loss": 0.721910297870636, + "loss_ce": 0.0035265071783214808, + "loss_iou": 0.310546875, + "loss_num": 0.019287109375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 77124068, + "step": 1377 + }, + { + "epoch": 3.069042316258352, + "grad_norm": 17.08912467956543, + "learning_rate": 1e-06, + "loss": 0.9961, + "num_input_tokens_seen": 77183416, + "step": 1378 + }, + { + "epoch": 3.069042316258352, + "loss": 1.1389042139053345, + "loss_ce": 0.0004764240875374526, + "loss_iou": 0.48046875, + "loss_num": 0.035400390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 77183416, + "step": 1378 + }, + { + "epoch": 3.071269487750557, + "grad_norm": 23.56962776184082, + "learning_rate": 1e-06, + "loss": 0.7918, + "num_input_tokens_seen": 77241704, + "step": 1379 + }, + { + "epoch": 3.071269487750557, + "loss": 0.9982140064239502, + "loss_ce": 0.000411315995734185, + "loss_iou": 0.375, + "loss_num": 0.050048828125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 77241704, + "step": 1379 + }, + { + "epoch": 3.0734966592427617, + "grad_norm": 55.8472785949707, + "learning_rate": 1e-06, + "loss": 0.9511, + "num_input_tokens_seen": 77297284, + "step": 1380 + }, + { + "epoch": 3.0734966592427617, + "loss": 0.7722369432449341, + "loss_ce": 0.0005083876312710345, + "loss_iou": 0.33984375, + "loss_num": 0.01806640625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 77297284, + "step": 1380 + }, + { + "epoch": 3.0757238307349666, + "grad_norm": 19.836666107177734, + "learning_rate": 1e-06, + "loss": 0.9226, + "num_input_tokens_seen": 77351728, + "step": 1381 + }, + { + "epoch": 3.0757238307349666, + "loss": 0.816044807434082, + "loss_ce": 0.0003710235469043255, + "loss_iou": 0.333984375, + "loss_num": 0.029296875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 77351728, + "step": 1381 + }, + { + "epoch": 3.0779510022271714, + "grad_norm": 32.599266052246094, + "learning_rate": 1e-06, + "loss": 1.1038, + "num_input_tokens_seen": 77408188, + "step": 1382 + }, + { + "epoch": 3.0779510022271714, + "loss": 1.1422669887542725, + "loss_ce": 0.0006654143799096346, + "loss_iou": 0.474609375, + "loss_num": 0.038818359375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 77408188, + "step": 1382 + }, + { + "epoch": 3.0801781737193763, + "grad_norm": 24.89826774597168, + "learning_rate": 1e-06, + "loss": 0.9169, + "num_input_tokens_seen": 77463240, + "step": 1383 + }, + { + "epoch": 3.0801781737193763, + "loss": 0.7601173520088196, + "loss_ce": 0.000351747585227713, + "loss_iou": 0.283203125, + "loss_num": 0.0390625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 77463240, + "step": 1383 + }, + { + "epoch": 3.082405345211581, + "grad_norm": 11.264007568359375, + "learning_rate": 1e-06, + "loss": 0.5929, + "num_input_tokens_seen": 77520856, + "step": 1384 + }, + { + "epoch": 3.082405345211581, + "loss": 0.5832231044769287, + "loss_ce": 0.0009477226412855089, + "loss_iou": 0.2177734375, + "loss_num": 0.029052734375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 77520856, + "step": 1384 + }, + { + "epoch": 3.084632516703786, + "grad_norm": 20.889442443847656, + "learning_rate": 1e-06, + "loss": 0.9322, + "num_input_tokens_seen": 77575336, + "step": 1385 + }, + { + "epoch": 3.084632516703786, + "loss": 0.9303987622261047, + "loss_ce": 0.000467123172711581, + "loss_iou": 0.39453125, + "loss_num": 0.0283203125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 77575336, + "step": 1385 + }, + { + "epoch": 3.086859688195991, + "grad_norm": 21.084487915039062, + "learning_rate": 1e-06, + "loss": 0.7207, + "num_input_tokens_seen": 77631024, + "step": 1386 + }, + { + "epoch": 3.086859688195991, + "loss": 0.8573043346405029, + "loss_ce": 0.0003706898423843086, + "loss_iou": 0.359375, + "loss_num": 0.0274658203125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 77631024, + "step": 1386 + }, + { + "epoch": 3.089086859688196, + "grad_norm": 27.087024688720703, + "learning_rate": 1e-06, + "loss": 0.8915, + "num_input_tokens_seen": 77689520, + "step": 1387 + }, + { + "epoch": 3.089086859688196, + "loss": 1.0073137283325195, + "loss_ce": 0.00047768885269761086, + "loss_iou": 0.416015625, + "loss_num": 0.03515625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 77689520, + "step": 1387 + }, + { + "epoch": 3.091314031180401, + "grad_norm": 13.960137367248535, + "learning_rate": 1e-06, + "loss": 0.8432, + "num_input_tokens_seen": 77744956, + "step": 1388 + }, + { + "epoch": 3.091314031180401, + "loss": 0.9717408418655396, + "loss_ce": 0.0003052502288483083, + "loss_iou": 0.4140625, + "loss_num": 0.029052734375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 77744956, + "step": 1388 + }, + { + "epoch": 3.093541202672606, + "grad_norm": 59.64553451538086, + "learning_rate": 1e-06, + "loss": 0.8163, + "num_input_tokens_seen": 77800484, + "step": 1389 + }, + { + "epoch": 3.093541202672606, + "loss": 0.7509998679161072, + "loss_ce": 0.00026743774651549757, + "loss_iou": 0.30859375, + "loss_num": 0.02685546875, + "loss_xval": 0.75, + "num_input_tokens_seen": 77800484, + "step": 1389 + }, + { + "epoch": 3.0957683741648108, + "grad_norm": 29.264339447021484, + "learning_rate": 1e-06, + "loss": 1.0133, + "num_input_tokens_seen": 77857748, + "step": 1390 + }, + { + "epoch": 3.0957683741648108, + "loss": 1.0564740896224976, + "loss_ce": 0.0003217510529793799, + "loss_iou": 0.43359375, + "loss_num": 0.0380859375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 77857748, + "step": 1390 + }, + { + "epoch": 3.0979955456570156, + "grad_norm": 16.573060989379883, + "learning_rate": 1e-06, + "loss": 0.8231, + "num_input_tokens_seen": 77912436, + "step": 1391 + }, + { + "epoch": 3.0979955456570156, + "loss": 0.7225488424301147, + "loss_ce": 0.0006250177975744009, + "loss_iou": 0.265625, + "loss_num": 0.03857421875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 77912436, + "step": 1391 + }, + { + "epoch": 3.1002227171492205, + "grad_norm": 23.289533615112305, + "learning_rate": 1e-06, + "loss": 0.6688, + "num_input_tokens_seen": 77971256, + "step": 1392 + }, + { + "epoch": 3.1002227171492205, + "loss": 0.6361066102981567, + "loss_ce": 0.0015851398929953575, + "loss_iou": 0.26171875, + "loss_num": 0.02197265625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 77971256, + "step": 1392 + }, + { + "epoch": 3.1024498886414253, + "grad_norm": 37.72678756713867, + "learning_rate": 1e-06, + "loss": 1.0144, + "num_input_tokens_seen": 78026280, + "step": 1393 + }, + { + "epoch": 3.1024498886414253, + "loss": 0.9221377968788147, + "loss_ce": 0.0007510941359214485, + "loss_iou": 0.392578125, + "loss_num": 0.027587890625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 78026280, + "step": 1393 + }, + { + "epoch": 3.10467706013363, + "grad_norm": 62.46157455444336, + "learning_rate": 1e-06, + "loss": 0.8603, + "num_input_tokens_seen": 78083452, + "step": 1394 + }, + { + "epoch": 3.10467706013363, + "loss": 0.6961342096328735, + "loss_ce": 0.000333470175974071, + "loss_iou": 0.3125, + "loss_num": 0.01434326171875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 78083452, + "step": 1394 + }, + { + "epoch": 3.106904231625835, + "grad_norm": 29.57560920715332, + "learning_rate": 1e-06, + "loss": 0.8214, + "num_input_tokens_seen": 78138744, + "step": 1395 + }, + { + "epoch": 3.106904231625835, + "loss": 0.8320956230163574, + "loss_ce": 0.00030853800126351416, + "loss_iou": 0.36328125, + "loss_num": 0.0211181640625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 78138744, + "step": 1395 + }, + { + "epoch": 3.10913140311804, + "grad_norm": 23.34572982788086, + "learning_rate": 1e-06, + "loss": 0.9635, + "num_input_tokens_seen": 78194916, + "step": 1396 + }, + { + "epoch": 3.10913140311804, + "loss": 1.1087517738342285, + "loss_ce": 0.0003533945418894291, + "loss_iou": 0.458984375, + "loss_num": 0.037841796875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 78194916, + "step": 1396 + }, + { + "epoch": 3.111358574610245, + "grad_norm": 19.57944679260254, + "learning_rate": 1e-06, + "loss": 0.7877, + "num_input_tokens_seen": 78251972, + "step": 1397 + }, + { + "epoch": 3.111358574610245, + "loss": 0.6399654150009155, + "loss_ce": 0.0003169975243508816, + "loss_iou": 0.28515625, + "loss_num": 0.01422119140625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 78251972, + "step": 1397 + }, + { + "epoch": 3.11358574610245, + "grad_norm": 15.57333755493164, + "learning_rate": 1e-06, + "loss": 0.7701, + "num_input_tokens_seen": 78304736, + "step": 1398 + }, + { + "epoch": 3.11358574610245, + "loss": 0.7310658693313599, + "loss_ce": 0.00035301962634548545, + "loss_iou": 0.28125, + "loss_num": 0.03369140625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 78304736, + "step": 1398 + }, + { + "epoch": 3.115812917594655, + "grad_norm": 20.79068946838379, + "learning_rate": 1e-06, + "loss": 0.9479, + "num_input_tokens_seen": 78360580, + "step": 1399 + }, + { + "epoch": 3.115812917594655, + "loss": 0.7796562910079956, + "loss_ce": 0.0006035708356648684, + "loss_iou": 0.3125, + "loss_num": 0.03076171875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 78360580, + "step": 1399 + }, + { + "epoch": 3.11804008908686, + "grad_norm": 21.23663330078125, + "learning_rate": 1e-06, + "loss": 0.8893, + "num_input_tokens_seen": 78417556, + "step": 1400 + }, + { + "epoch": 3.11804008908686, + "loss": 0.6944925785064697, + "loss_ce": 0.0004008029936812818, + "loss_iou": 0.30078125, + "loss_num": 0.018310546875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 78417556, + "step": 1400 + }, + { + "epoch": 3.1202672605790647, + "grad_norm": 20.82147789001465, + "learning_rate": 1e-06, + "loss": 0.7757, + "num_input_tokens_seen": 78472356, + "step": 1401 + }, + { + "epoch": 3.1202672605790647, + "loss": 0.6926823258399963, + "loss_ce": 0.00029954389901831746, + "loss_iou": 0.294921875, + "loss_num": 0.0203857421875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 78472356, + "step": 1401 + }, + { + "epoch": 3.1224944320712695, + "grad_norm": 24.593259811401367, + "learning_rate": 1e-06, + "loss": 1.1074, + "num_input_tokens_seen": 78529324, + "step": 1402 + }, + { + "epoch": 3.1224944320712695, + "loss": 1.332648515701294, + "loss_ce": 0.0006172613939270377, + "loss_iou": 0.52734375, + "loss_num": 0.0556640625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 78529324, + "step": 1402 + }, + { + "epoch": 3.1247216035634744, + "grad_norm": 16.219890594482422, + "learning_rate": 1e-06, + "loss": 0.8194, + "num_input_tokens_seen": 78582848, + "step": 1403 + }, + { + "epoch": 3.1247216035634744, + "loss": 0.8170123100280762, + "loss_ce": 0.00036187097430229187, + "loss_iou": 0.369140625, + "loss_num": 0.01556396484375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 78582848, + "step": 1403 + }, + { + "epoch": 3.1269487750556793, + "grad_norm": 24.382253646850586, + "learning_rate": 1e-06, + "loss": 0.9872, + "num_input_tokens_seen": 78637908, + "step": 1404 + }, + { + "epoch": 3.1269487750556793, + "loss": 0.9994413256645203, + "loss_ce": 0.0004178856033831835, + "loss_iou": 0.412109375, + "loss_num": 0.03515625, + "loss_xval": 1.0, + "num_input_tokens_seen": 78637908, + "step": 1404 + }, + { + "epoch": 3.129175946547884, + "grad_norm": 16.891210556030273, + "learning_rate": 1e-06, + "loss": 0.7293, + "num_input_tokens_seen": 78692304, + "step": 1405 + }, + { + "epoch": 3.129175946547884, + "loss": 0.6750854253768921, + "loss_ce": 0.0002806965203490108, + "loss_iou": 0.27734375, + "loss_num": 0.0240478515625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 78692304, + "step": 1405 + }, + { + "epoch": 3.131403118040089, + "grad_norm": 22.945178985595703, + "learning_rate": 1e-06, + "loss": 0.944, + "num_input_tokens_seen": 78746900, + "step": 1406 + }, + { + "epoch": 3.131403118040089, + "loss": 1.02711820602417, + "loss_ce": 0.000506936979945749, + "loss_iou": 0.431640625, + "loss_num": 0.03271484375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 78746900, + "step": 1406 + }, + { + "epoch": 3.133630289532294, + "grad_norm": 17.015195846557617, + "learning_rate": 1e-06, + "loss": 0.8693, + "num_input_tokens_seen": 78804192, + "step": 1407 + }, + { + "epoch": 3.133630289532294, + "loss": 0.7983866930007935, + "loss_ce": 0.0002909940667450428, + "loss_iou": 0.36328125, + "loss_num": 0.01446533203125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 78804192, + "step": 1407 + }, + { + "epoch": 3.1358574610244987, + "grad_norm": 38.74119567871094, + "learning_rate": 1e-06, + "loss": 0.9071, + "num_input_tokens_seen": 78858704, + "step": 1408 + }, + { + "epoch": 3.1358574610244987, + "loss": 0.9215487241744995, + "loss_ce": 0.00028408068465068936, + "loss_iou": 0.40625, + "loss_num": 0.021484375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 78858704, + "step": 1408 + }, + { + "epoch": 3.138084632516704, + "grad_norm": 16.403173446655273, + "learning_rate": 1e-06, + "loss": 0.8264, + "num_input_tokens_seen": 78913596, + "step": 1409 + }, + { + "epoch": 3.138084632516704, + "loss": 0.7442107796669006, + "loss_ce": 0.0003143020730931312, + "loss_iou": 0.28515625, + "loss_num": 0.03466796875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 78913596, + "step": 1409 + }, + { + "epoch": 3.140311804008909, + "grad_norm": 125.99543762207031, + "learning_rate": 1e-06, + "loss": 0.7983, + "num_input_tokens_seen": 78971496, + "step": 1410 + }, + { + "epoch": 3.140311804008909, + "loss": 0.7936596870422363, + "loss_ce": 0.0006909238873049617, + "loss_iou": 0.34765625, + "loss_num": 0.01953125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 78971496, + "step": 1410 + }, + { + "epoch": 3.1425389755011137, + "grad_norm": 29.1822509765625, + "learning_rate": 1e-06, + "loss": 0.7606, + "num_input_tokens_seen": 79027964, + "step": 1411 + }, + { + "epoch": 3.1425389755011137, + "loss": 0.5414670705795288, + "loss_ce": 0.00032938801450654864, + "loss_iou": 0.22265625, + "loss_num": 0.0191650390625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 79027964, + "step": 1411 + }, + { + "epoch": 3.1447661469933186, + "grad_norm": 62.62162399291992, + "learning_rate": 1e-06, + "loss": 1.0511, + "num_input_tokens_seen": 79085588, + "step": 1412 + }, + { + "epoch": 3.1447661469933186, + "loss": 0.763961911201477, + "loss_ce": 0.00029001818620599806, + "loss_iou": 0.3359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 79085588, + "step": 1412 + }, + { + "epoch": 3.1469933184855234, + "grad_norm": 20.36224937438965, + "learning_rate": 1e-06, + "loss": 0.9567, + "num_input_tokens_seen": 79143720, + "step": 1413 + }, + { + "epoch": 3.1469933184855234, + "loss": 0.7871879935264587, + "loss_ce": 0.00032272058888338506, + "loss_iou": 0.333984375, + "loss_num": 0.0234375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 79143720, + "step": 1413 + }, + { + "epoch": 3.1492204899777283, + "grad_norm": 16.99781608581543, + "learning_rate": 1e-06, + "loss": 0.9043, + "num_input_tokens_seen": 79200260, + "step": 1414 + }, + { + "epoch": 3.1492204899777283, + "loss": 1.0250790119171143, + "loss_ce": 0.00042086507892236114, + "loss_iou": 0.421875, + "loss_num": 0.035888671875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 79200260, + "step": 1414 + }, + { + "epoch": 3.151447661469933, + "grad_norm": 16.20554542541504, + "learning_rate": 1e-06, + "loss": 1.0504, + "num_input_tokens_seen": 79255792, + "step": 1415 + }, + { + "epoch": 3.151447661469933, + "loss": 1.2998144626617432, + "loss_ce": 0.0004980739904567599, + "loss_iou": 0.53515625, + "loss_num": 0.0458984375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 79255792, + "step": 1415 + }, + { + "epoch": 3.153674832962138, + "grad_norm": 25.340084075927734, + "learning_rate": 1e-06, + "loss": 1.1359, + "num_input_tokens_seen": 79312352, + "step": 1416 + }, + { + "epoch": 3.153674832962138, + "loss": 0.8494139313697815, + "loss_ce": 0.0007811552495695651, + "loss_iou": 0.353515625, + "loss_num": 0.0279541015625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 79312352, + "step": 1416 + }, + { + "epoch": 3.155902004454343, + "grad_norm": 11.992765426635742, + "learning_rate": 1e-06, + "loss": 0.8142, + "num_input_tokens_seen": 79369780, + "step": 1417 + }, + { + "epoch": 3.155902004454343, + "loss": 0.6079621911048889, + "loss_ce": 0.00029617524705827236, + "loss_iou": 0.2734375, + "loss_num": 0.01220703125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 79369780, + "step": 1417 + }, + { + "epoch": 3.1581291759465477, + "grad_norm": 20.040773391723633, + "learning_rate": 1e-06, + "loss": 0.9713, + "num_input_tokens_seen": 79424212, + "step": 1418 + }, + { + "epoch": 3.1581291759465477, + "loss": 0.855168342590332, + "loss_ce": 0.00030997302383184433, + "loss_iou": 0.37109375, + "loss_num": 0.022705078125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 79424212, + "step": 1418 + }, + { + "epoch": 3.1603563474387526, + "grad_norm": 24.867382049560547, + "learning_rate": 1e-06, + "loss": 1.0417, + "num_input_tokens_seen": 79481792, + "step": 1419 + }, + { + "epoch": 3.1603563474387526, + "loss": 1.1068477630615234, + "loss_ce": 0.00040248059667646885, + "loss_iou": 0.4296875, + "loss_num": 0.04931640625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 79481792, + "step": 1419 + }, + { + "epoch": 3.1625835189309575, + "grad_norm": 26.066076278686523, + "learning_rate": 1e-06, + "loss": 1.0346, + "num_input_tokens_seen": 79538200, + "step": 1420 + }, + { + "epoch": 3.1625835189309575, + "loss": 0.9904955625534058, + "loss_ce": 0.0005052955239079893, + "loss_iou": 0.392578125, + "loss_num": 0.041015625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 79538200, + "step": 1420 + }, + { + "epoch": 3.1648106904231628, + "grad_norm": 32.84996032714844, + "learning_rate": 1e-06, + "loss": 0.7832, + "num_input_tokens_seen": 79594752, + "step": 1421 + }, + { + "epoch": 3.1648106904231628, + "loss": 0.6035779714584351, + "loss_ce": 0.0003064596385229379, + "loss_iou": 0.244140625, + "loss_num": 0.0230712890625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 79594752, + "step": 1421 + }, + { + "epoch": 3.1670378619153676, + "grad_norm": 12.997925758361816, + "learning_rate": 1e-06, + "loss": 0.7086, + "num_input_tokens_seen": 79649304, + "step": 1422 + }, + { + "epoch": 3.1670378619153676, + "loss": 0.6079559326171875, + "loss_ce": 0.0002899472019635141, + "loss_iou": 0.2578125, + "loss_num": 0.0186767578125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 79649304, + "step": 1422 + }, + { + "epoch": 3.1692650334075725, + "grad_norm": 14.003304481506348, + "learning_rate": 1e-06, + "loss": 0.9371, + "num_input_tokens_seen": 79705016, + "step": 1423 + }, + { + "epoch": 3.1692650334075725, + "loss": 0.9424291253089905, + "loss_ce": 0.00029046309646219015, + "loss_iou": 0.408203125, + "loss_num": 0.0255126953125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 79705016, + "step": 1423 + }, + { + "epoch": 3.1714922048997773, + "grad_norm": 15.485200881958008, + "learning_rate": 1e-06, + "loss": 0.7606, + "num_input_tokens_seen": 79760764, + "step": 1424 + }, + { + "epoch": 3.1714922048997773, + "loss": 0.867828369140625, + "loss_ce": 0.0003967147204093635, + "loss_iou": 0.375, + "loss_num": 0.02294921875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 79760764, + "step": 1424 + }, + { + "epoch": 3.173719376391982, + "grad_norm": 19.317201614379883, + "learning_rate": 1e-06, + "loss": 0.7716, + "num_input_tokens_seen": 79816396, + "step": 1425 + }, + { + "epoch": 3.173719376391982, + "loss": 0.697672963142395, + "loss_ce": 0.0004073456802871078, + "loss_iou": 0.291015625, + "loss_num": 0.0234375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 79816396, + "step": 1425 + }, + { + "epoch": 3.175946547884187, + "grad_norm": 16.760774612426758, + "learning_rate": 1e-06, + "loss": 0.8792, + "num_input_tokens_seen": 79871868, + "step": 1426 + }, + { + "epoch": 3.175946547884187, + "loss": 0.954727292060852, + "loss_ce": 0.00038158154347911477, + "loss_iou": 0.3828125, + "loss_num": 0.0380859375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 79871868, + "step": 1426 + }, + { + "epoch": 3.178173719376392, + "grad_norm": 18.781694412231445, + "learning_rate": 1e-06, + "loss": 0.6773, + "num_input_tokens_seen": 79928048, + "step": 1427 + }, + { + "epoch": 3.178173719376392, + "loss": 0.6321383714675903, + "loss_ce": 0.0003024227044079453, + "loss_iou": 0.2734375, + "loss_num": 0.0167236328125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 79928048, + "step": 1427 + }, + { + "epoch": 3.180400890868597, + "grad_norm": 14.42310905456543, + "learning_rate": 1e-06, + "loss": 0.7926, + "num_input_tokens_seen": 79983508, + "step": 1428 + }, + { + "epoch": 3.180400890868597, + "loss": 0.8372380137443542, + "loss_ce": 0.0003239199868403375, + "loss_iou": 0.353515625, + "loss_num": 0.02587890625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 79983508, + "step": 1428 + }, + { + "epoch": 3.1826280623608016, + "grad_norm": 14.923147201538086, + "learning_rate": 1e-06, + "loss": 0.9376, + "num_input_tokens_seen": 80042264, + "step": 1429 + }, + { + "epoch": 3.1826280623608016, + "loss": 0.7183116674423218, + "loss_ce": 0.00029408183763734996, + "loss_iou": 0.294921875, + "loss_num": 0.025146484375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 80042264, + "step": 1429 + }, + { + "epoch": 3.1848552338530065, + "grad_norm": 14.917871475219727, + "learning_rate": 1e-06, + "loss": 0.9401, + "num_input_tokens_seen": 80098272, + "step": 1430 + }, + { + "epoch": 3.1848552338530065, + "loss": 1.095562219619751, + "loss_ce": 0.00034750672057271004, + "loss_iou": 0.451171875, + "loss_num": 0.038330078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 80098272, + "step": 1430 + }, + { + "epoch": 3.187082405345212, + "grad_norm": 21.82032585144043, + "learning_rate": 1e-06, + "loss": 0.813, + "num_input_tokens_seen": 80153352, + "step": 1431 + }, + { + "epoch": 3.187082405345212, + "loss": 0.6784963607788086, + "loss_ce": 0.00027368031442165375, + "loss_iou": 0.2890625, + "loss_num": 0.0196533203125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 80153352, + "step": 1431 + }, + { + "epoch": 3.1893095768374167, + "grad_norm": 20.35378074645996, + "learning_rate": 1e-06, + "loss": 0.8377, + "num_input_tokens_seen": 80209672, + "step": 1432 + }, + { + "epoch": 3.1893095768374167, + "loss": 0.9991272687911987, + "loss_ce": 0.0003479632141534239, + "loss_iou": 0.419921875, + "loss_num": 0.03173828125, + "loss_xval": 1.0, + "num_input_tokens_seen": 80209672, + "step": 1432 + }, + { + "epoch": 3.1915367483296215, + "grad_norm": 18.134105682373047, + "learning_rate": 1e-06, + "loss": 0.7804, + "num_input_tokens_seen": 80264404, + "step": 1433 + }, + { + "epoch": 3.1915367483296215, + "loss": 0.8298586010932922, + "loss_ce": 0.000268744770437479, + "loss_iou": 0.349609375, + "loss_num": 0.0257568359375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 80264404, + "step": 1433 + }, + { + "epoch": 3.1937639198218264, + "grad_norm": 25.565568923950195, + "learning_rate": 1e-06, + "loss": 0.7445, + "num_input_tokens_seen": 80322156, + "step": 1434 + }, + { + "epoch": 3.1937639198218264, + "loss": 0.8270922303199768, + "loss_ce": 0.00043206167174503207, + "loss_iou": 0.345703125, + "loss_num": 0.0269775390625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 80322156, + "step": 1434 + }, + { + "epoch": 3.1959910913140313, + "grad_norm": 19.158845901489258, + "learning_rate": 1e-06, + "loss": 0.7464, + "num_input_tokens_seen": 80378616, + "step": 1435 + }, + { + "epoch": 3.1959910913140313, + "loss": 0.6876095533370972, + "loss_ce": 0.0005977747496217489, + "loss_iou": 0.26171875, + "loss_num": 0.032470703125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 80378616, + "step": 1435 + }, + { + "epoch": 3.198218262806236, + "grad_norm": 12.419325828552246, + "learning_rate": 1e-06, + "loss": 0.5989, + "num_input_tokens_seen": 80437060, + "step": 1436 + }, + { + "epoch": 3.198218262806236, + "loss": 0.6812313199043274, + "loss_ce": 0.00032312102848663926, + "loss_iou": 0.287109375, + "loss_num": 0.0213623046875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 80437060, + "step": 1436 + }, + { + "epoch": 3.200445434298441, + "grad_norm": 27.797161102294922, + "learning_rate": 1e-06, + "loss": 0.8508, + "num_input_tokens_seen": 80492916, + "step": 1437 + }, + { + "epoch": 3.200445434298441, + "loss": 0.8674960732460022, + "loss_ce": 0.00030855537625029683, + "loss_iou": 0.34765625, + "loss_num": 0.0341796875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 80492916, + "step": 1437 + }, + { + "epoch": 3.202672605790646, + "grad_norm": 26.252103805541992, + "learning_rate": 1e-06, + "loss": 0.8936, + "num_input_tokens_seen": 80550292, + "step": 1438 + }, + { + "epoch": 3.202672605790646, + "loss": 0.885981559753418, + "loss_ce": 0.00048353534657508135, + "loss_iou": 0.384765625, + "loss_num": 0.022705078125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 80550292, + "step": 1438 + }, + { + "epoch": 3.2048997772828507, + "grad_norm": 18.954618453979492, + "learning_rate": 1e-06, + "loss": 1.1562, + "num_input_tokens_seen": 80606748, + "step": 1439 + }, + { + "epoch": 3.2048997772828507, + "loss": 0.9013663530349731, + "loss_ce": 0.00048747207620181143, + "loss_iou": 0.35546875, + "loss_num": 0.038330078125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 80606748, + "step": 1439 + }, + { + "epoch": 3.2071269487750556, + "grad_norm": 17.839231491088867, + "learning_rate": 1e-06, + "loss": 0.7546, + "num_input_tokens_seen": 80665732, + "step": 1440 + }, + { + "epoch": 3.2071269487750556, + "loss": 0.7411551475524902, + "loss_ce": 0.0004324775072745979, + "loss_iou": 0.3203125, + "loss_num": 0.0198974609375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 80665732, + "step": 1440 + }, + { + "epoch": 3.2093541202672604, + "grad_norm": 16.883333206176758, + "learning_rate": 1e-06, + "loss": 0.8151, + "num_input_tokens_seen": 80720260, + "step": 1441 + }, + { + "epoch": 3.2093541202672604, + "loss": 1.001312255859375, + "loss_ce": 0.00033568451181054115, + "loss_iou": 0.40234375, + "loss_num": 0.039794921875, + "loss_xval": 1.0, + "num_input_tokens_seen": 80720260, + "step": 1441 + }, + { + "epoch": 3.2115812917594653, + "grad_norm": 19.242998123168945, + "learning_rate": 1e-06, + "loss": 0.8271, + "num_input_tokens_seen": 80771032, + "step": 1442 + }, + { + "epoch": 3.2115812917594653, + "loss": 0.8663139343261719, + "loss_ce": 0.0005913155619055033, + "loss_iou": 0.345703125, + "loss_num": 0.034912109375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 80771032, + "step": 1442 + }, + { + "epoch": 3.2138084632516706, + "grad_norm": 18.97730827331543, + "learning_rate": 1e-06, + "loss": 0.7771, + "num_input_tokens_seen": 80823748, + "step": 1443 + }, + { + "epoch": 3.2138084632516706, + "loss": 0.7416298389434814, + "loss_ce": 0.0002968141343444586, + "loss_iou": 0.314453125, + "loss_num": 0.0224609375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 80823748, + "step": 1443 + }, + { + "epoch": 3.2160356347438754, + "grad_norm": 71.22569274902344, + "learning_rate": 1e-06, + "loss": 0.966, + "num_input_tokens_seen": 80879232, + "step": 1444 + }, + { + "epoch": 3.2160356347438754, + "loss": 0.8303946852684021, + "loss_ce": 0.000316560675855726, + "loss_iou": 0.345703125, + "loss_num": 0.02783203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 80879232, + "step": 1444 + }, + { + "epoch": 3.2182628062360803, + "grad_norm": 16.554031372070312, + "learning_rate": 1e-06, + "loss": 0.772, + "num_input_tokens_seen": 80936056, + "step": 1445 + }, + { + "epoch": 3.2182628062360803, + "loss": 0.7341041564941406, + "loss_ce": 0.0004616554651875049, + "loss_iou": 0.294921875, + "loss_num": 0.0286865234375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 80936056, + "step": 1445 + }, + { + "epoch": 3.220489977728285, + "grad_norm": 12.350296020507812, + "learning_rate": 1e-06, + "loss": 0.983, + "num_input_tokens_seen": 80992044, + "step": 1446 + }, + { + "epoch": 3.220489977728285, + "loss": 1.119089126586914, + "loss_ce": 0.00043676019413396716, + "loss_iou": 0.4375, + "loss_num": 0.048583984375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 80992044, + "step": 1446 + }, + { + "epoch": 3.22271714922049, + "grad_norm": 35.66438293457031, + "learning_rate": 1e-06, + "loss": 0.9671, + "num_input_tokens_seen": 81047568, + "step": 1447 + }, + { + "epoch": 3.22271714922049, + "loss": 1.1087019443511963, + "loss_ce": 0.000303465174511075, + "loss_iou": 0.46875, + "loss_num": 0.0341796875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 81047568, + "step": 1447 + }, + { + "epoch": 3.224944320712695, + "grad_norm": 23.309444427490234, + "learning_rate": 1e-06, + "loss": 1.0038, + "num_input_tokens_seen": 81105220, + "step": 1448 + }, + { + "epoch": 3.224944320712695, + "loss": 0.9000189304351807, + "loss_ce": 0.0008490003529004753, + "loss_iou": 0.37109375, + "loss_num": 0.031494140625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 81105220, + "step": 1448 + }, + { + "epoch": 3.2271714922048997, + "grad_norm": 20.23189926147461, + "learning_rate": 1e-06, + "loss": 0.7571, + "num_input_tokens_seen": 81157732, + "step": 1449 + }, + { + "epoch": 3.2271714922048997, + "loss": 0.5965100526809692, + "loss_ce": 0.00031864526681602, + "loss_iou": 0.2412109375, + "loss_num": 0.022705078125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 81157732, + "step": 1449 + }, + { + "epoch": 3.2293986636971046, + "grad_norm": 20.063142776489258, + "learning_rate": 1e-06, + "loss": 0.7535, + "num_input_tokens_seen": 81215292, + "step": 1450 + }, + { + "epoch": 3.2293986636971046, + "loss": 0.9407631158828735, + "loss_ce": 0.00033346362761221826, + "loss_iou": 0.40625, + "loss_num": 0.0255126953125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 81215292, + "step": 1450 + }, + { + "epoch": 3.2316258351893095, + "grad_norm": 19.31182289123535, + "learning_rate": 1e-06, + "loss": 0.817, + "num_input_tokens_seen": 81272552, + "step": 1451 + }, + { + "epoch": 3.2316258351893095, + "loss": 1.0058592557907104, + "loss_ce": 0.0004881612549070269, + "loss_iou": 0.376953125, + "loss_num": 0.050048828125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 81272552, + "step": 1451 + }, + { + "epoch": 3.2338530066815143, + "grad_norm": 34.98418426513672, + "learning_rate": 1e-06, + "loss": 0.9567, + "num_input_tokens_seen": 81322452, + "step": 1452 + }, + { + "epoch": 3.2338530066815143, + "loss": 0.8940277099609375, + "loss_ce": 0.000717109243851155, + "loss_iou": 0.33203125, + "loss_num": 0.04541015625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 81322452, + "step": 1452 + }, + { + "epoch": 3.236080178173719, + "grad_norm": 22.681028366088867, + "learning_rate": 1e-06, + "loss": 1.0263, + "num_input_tokens_seen": 81378452, + "step": 1453 + }, + { + "epoch": 3.236080178173719, + "loss": 1.1674110889434814, + "loss_ce": 0.0006629737326875329, + "loss_iou": 0.46484375, + "loss_num": 0.047607421875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 81378452, + "step": 1453 + }, + { + "epoch": 3.2383073496659245, + "grad_norm": 15.543540954589844, + "learning_rate": 1e-06, + "loss": 0.7653, + "num_input_tokens_seen": 81432916, + "step": 1454 + }, + { + "epoch": 3.2383073496659245, + "loss": 0.5583458542823792, + "loss_ce": 0.0002403960534138605, + "loss_iou": 0.2392578125, + "loss_num": 0.015869140625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 81432916, + "step": 1454 + }, + { + "epoch": 3.2405345211581293, + "grad_norm": 35.686893463134766, + "learning_rate": 1e-06, + "loss": 1.0985, + "num_input_tokens_seen": 81488412, + "step": 1455 + }, + { + "epoch": 3.2405345211581293, + "loss": 1.1553540229797363, + "loss_ce": 0.0003246957203373313, + "loss_iou": 0.490234375, + "loss_num": 0.034423828125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 81488412, + "step": 1455 + }, + { + "epoch": 3.242761692650334, + "grad_norm": 16.909992218017578, + "learning_rate": 1e-06, + "loss": 0.962, + "num_input_tokens_seen": 81543048, + "step": 1456 + }, + { + "epoch": 3.242761692650334, + "loss": 1.0606622695922852, + "loss_ce": 0.00035947078140452504, + "loss_iou": 0.41796875, + "loss_num": 0.045654296875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 81543048, + "step": 1456 + }, + { + "epoch": 3.244988864142539, + "grad_norm": 17.404476165771484, + "learning_rate": 1e-06, + "loss": 0.5719, + "num_input_tokens_seen": 81599572, + "step": 1457 + }, + { + "epoch": 3.244988864142539, + "loss": 0.43446803092956543, + "loss_ce": 0.00026391935534775257, + "loss_iou": 0.1845703125, + "loss_num": 0.01312255859375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 81599572, + "step": 1457 + }, + { + "epoch": 3.247216035634744, + "grad_norm": 15.075811386108398, + "learning_rate": 1e-06, + "loss": 0.8541, + "num_input_tokens_seen": 81658816, + "step": 1458 + }, + { + "epoch": 3.247216035634744, + "loss": 0.969578206539154, + "loss_ce": 0.0003399454872123897, + "loss_iou": 0.400390625, + "loss_num": 0.0341796875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 81658816, + "step": 1458 + }, + { + "epoch": 3.249443207126949, + "grad_norm": 189.14511108398438, + "learning_rate": 1e-06, + "loss": 0.767, + "num_input_tokens_seen": 81714620, + "step": 1459 + }, + { + "epoch": 3.249443207126949, + "loss": 0.8307961225509644, + "loss_ce": 0.00022976743639446795, + "loss_iou": 0.361328125, + "loss_num": 0.0213623046875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 81714620, + "step": 1459 + }, + { + "epoch": 3.2516703786191536, + "grad_norm": 16.0401611328125, + "learning_rate": 1e-06, + "loss": 0.6961, + "num_input_tokens_seen": 81768192, + "step": 1460 + }, + { + "epoch": 3.2516703786191536, + "loss": 0.4611513614654541, + "loss_ce": 0.00027491431683301926, + "loss_iou": 0.1875, + "loss_num": 0.0172119140625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 81768192, + "step": 1460 + }, + { + "epoch": 3.2538975501113585, + "grad_norm": 24.038867950439453, + "learning_rate": 1e-06, + "loss": 0.945, + "num_input_tokens_seen": 81823076, + "step": 1461 + }, + { + "epoch": 3.2538975501113585, + "loss": 1.0662281513214111, + "loss_ce": 0.00043232861207798123, + "loss_iou": 0.453125, + "loss_num": 0.031494140625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 81823076, + "step": 1461 + }, + { + "epoch": 3.2561247216035634, + "grad_norm": 27.399744033813477, + "learning_rate": 1e-06, + "loss": 0.8288, + "num_input_tokens_seen": 81878532, + "step": 1462 + }, + { + "epoch": 3.2561247216035634, + "loss": 1.0180044174194336, + "loss_ce": 0.00042639480670914054, + "loss_iou": 0.4453125, + "loss_num": 0.0252685546875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 81878532, + "step": 1462 + }, + { + "epoch": 3.2583518930957682, + "grad_norm": 30.450820922851562, + "learning_rate": 1e-06, + "loss": 0.9573, + "num_input_tokens_seen": 81934472, + "step": 1463 + }, + { + "epoch": 3.2583518930957682, + "loss": 0.7863764762878418, + "loss_ce": 0.00024365229182876647, + "loss_iou": 0.310546875, + "loss_num": 0.03271484375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 81934472, + "step": 1463 + }, + { + "epoch": 3.260579064587973, + "grad_norm": 73.2469482421875, + "learning_rate": 1e-06, + "loss": 0.8654, + "num_input_tokens_seen": 81992352, + "step": 1464 + }, + { + "epoch": 3.260579064587973, + "loss": 0.8775367736816406, + "loss_ce": 0.0003394762461539358, + "loss_iou": 0.373046875, + "loss_num": 0.0264892578125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 81992352, + "step": 1464 + }, + { + "epoch": 3.262806236080178, + "grad_norm": 17.928956985473633, + "learning_rate": 1e-06, + "loss": 0.9042, + "num_input_tokens_seen": 82047880, + "step": 1465 + }, + { + "epoch": 3.262806236080178, + "loss": 0.8798074722290039, + "loss_ce": 0.009446126408874989, + "loss_iou": 0.37109375, + "loss_num": 0.025390625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 82047880, + "step": 1465 + }, + { + "epoch": 3.2650334075723833, + "grad_norm": 13.953981399536133, + "learning_rate": 1e-06, + "loss": 0.7979, + "num_input_tokens_seen": 82105036, + "step": 1466 + }, + { + "epoch": 3.2650334075723833, + "loss": 0.9124147891998291, + "loss_ce": 0.0003053955442737788, + "loss_iou": 0.408203125, + "loss_num": 0.01953125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 82105036, + "step": 1466 + }, + { + "epoch": 3.267260579064588, + "grad_norm": 19.903533935546875, + "learning_rate": 1e-06, + "loss": 0.7328, + "num_input_tokens_seen": 82161524, + "step": 1467 + }, + { + "epoch": 3.267260579064588, + "loss": 0.7695513963699341, + "loss_ce": 0.0002642880426719785, + "loss_iou": 0.328125, + "loss_num": 0.0225830078125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 82161524, + "step": 1467 + }, + { + "epoch": 3.269487750556793, + "grad_norm": 16.431427001953125, + "learning_rate": 1e-06, + "loss": 0.7701, + "num_input_tokens_seen": 82216912, + "step": 1468 + }, + { + "epoch": 3.269487750556793, + "loss": 0.7241719365119934, + "loss_ce": 0.00029497878858819604, + "loss_iou": 0.3125, + "loss_num": 0.0194091796875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 82216912, + "step": 1468 + }, + { + "epoch": 3.271714922048998, + "grad_norm": 21.81812286376953, + "learning_rate": 1e-06, + "loss": 0.7118, + "num_input_tokens_seen": 82273656, + "step": 1469 + }, + { + "epoch": 3.271714922048998, + "loss": 0.6574879884719849, + "loss_ce": 0.0002613977121654898, + "loss_iou": 0.28125, + "loss_num": 0.0191650390625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 82273656, + "step": 1469 + }, + { + "epoch": 3.2739420935412027, + "grad_norm": 16.43152618408203, + "learning_rate": 1e-06, + "loss": 0.8491, + "num_input_tokens_seen": 82329848, + "step": 1470 + }, + { + "epoch": 3.2739420935412027, + "loss": 0.6564966440200806, + "loss_ce": 0.0002466381702106446, + "loss_iou": 0.26171875, + "loss_num": 0.0263671875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 82329848, + "step": 1470 + }, + { + "epoch": 3.2761692650334076, + "grad_norm": 26.318092346191406, + "learning_rate": 1e-06, + "loss": 0.7104, + "num_input_tokens_seen": 82388348, + "step": 1471 + }, + { + "epoch": 3.2761692650334076, + "loss": 0.7334426045417786, + "loss_ce": 0.00028831767849624157, + "loss_iou": 0.283203125, + "loss_num": 0.033447265625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 82388348, + "step": 1471 + }, + { + "epoch": 3.2783964365256124, + "grad_norm": 15.957210540771484, + "learning_rate": 1e-06, + "loss": 0.5911, + "num_input_tokens_seen": 82447224, + "step": 1472 + }, + { + "epoch": 3.2783964365256124, + "loss": 0.5700989961624146, + "loss_ce": 0.0002747434191405773, + "loss_iou": 0.2451171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 82447224, + "step": 1472 + }, + { + "epoch": 3.2806236080178173, + "grad_norm": 21.27912712097168, + "learning_rate": 1e-06, + "loss": 0.9417, + "num_input_tokens_seen": 82503100, + "step": 1473 + }, + { + "epoch": 3.2806236080178173, + "loss": 1.111638069152832, + "loss_ce": 0.0003100019530393183, + "loss_iou": 0.490234375, + "loss_num": 0.0257568359375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 82503100, + "step": 1473 + }, + { + "epoch": 3.282850779510022, + "grad_norm": 36.66477584838867, + "learning_rate": 1e-06, + "loss": 0.8626, + "num_input_tokens_seen": 82560744, + "step": 1474 + }, + { + "epoch": 3.282850779510022, + "loss": 0.839104413986206, + "loss_ce": 0.00048139755381271243, + "loss_iou": 0.359375, + "loss_num": 0.0244140625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 82560744, + "step": 1474 + }, + { + "epoch": 3.285077951002227, + "grad_norm": 20.039661407470703, + "learning_rate": 1e-06, + "loss": 0.8975, + "num_input_tokens_seen": 82617300, + "step": 1475 + }, + { + "epoch": 3.285077951002227, + "loss": 1.0146886110305786, + "loss_ce": 0.0002843354013748467, + "loss_iou": 0.439453125, + "loss_num": 0.0274658203125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 82617300, + "step": 1475 + }, + { + "epoch": 3.2873051224944323, + "grad_norm": 15.592321395874023, + "learning_rate": 1e-06, + "loss": 0.93, + "num_input_tokens_seen": 82670780, + "step": 1476 + }, + { + "epoch": 3.2873051224944323, + "loss": 0.9146976470947266, + "loss_ce": 0.0008792462758719921, + "loss_iou": 0.373046875, + "loss_num": 0.03369140625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 82670780, + "step": 1476 + }, + { + "epoch": 3.289532293986637, + "grad_norm": 15.493447303771973, + "learning_rate": 1e-06, + "loss": 0.7573, + "num_input_tokens_seen": 82729732, + "step": 1477 + }, + { + "epoch": 3.289532293986637, + "loss": 0.6953732371330261, + "loss_ce": 0.0003048654180020094, + "loss_iou": 0.291015625, + "loss_num": 0.022705078125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 82729732, + "step": 1477 + }, + { + "epoch": 3.291759465478842, + "grad_norm": 26.018573760986328, + "learning_rate": 1e-06, + "loss": 0.8349, + "num_input_tokens_seen": 82787736, + "step": 1478 + }, + { + "epoch": 3.291759465478842, + "loss": 0.6441195607185364, + "loss_ce": 0.0003207349800504744, + "loss_iou": 0.28515625, + "loss_num": 0.01446533203125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 82787736, + "step": 1478 + }, + { + "epoch": 3.293986636971047, + "grad_norm": 18.60807991027832, + "learning_rate": 1e-06, + "loss": 0.9953, + "num_input_tokens_seen": 82843952, + "step": 1479 + }, + { + "epoch": 3.293986636971047, + "loss": 0.9405885934829712, + "loss_ce": 0.0004030146519653499, + "loss_iou": 0.375, + "loss_num": 0.038330078125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 82843952, + "step": 1479 + }, + { + "epoch": 3.2962138084632517, + "grad_norm": 71.17398834228516, + "learning_rate": 1e-06, + "loss": 0.9742, + "num_input_tokens_seen": 82901868, + "step": 1480 + }, + { + "epoch": 3.2962138084632517, + "loss": 1.149244785308838, + "loss_ce": 0.00031894395942799747, + "loss_iou": 0.50390625, + "loss_num": 0.0283203125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 82901868, + "step": 1480 + }, + { + "epoch": 3.2984409799554566, + "grad_norm": 53.66946792602539, + "learning_rate": 1e-06, + "loss": 0.941, + "num_input_tokens_seen": 82955420, + "step": 1481 + }, + { + "epoch": 3.2984409799554566, + "loss": 1.0035812854766846, + "loss_ce": 0.004924064036458731, + "loss_iou": 0.39453125, + "loss_num": 0.042236328125, + "loss_xval": 1.0, + "num_input_tokens_seen": 82955420, + "step": 1481 + }, + { + "epoch": 3.3006681514476615, + "grad_norm": 26.24509048461914, + "learning_rate": 1e-06, + "loss": 0.9412, + "num_input_tokens_seen": 83009936, + "step": 1482 + }, + { + "epoch": 3.3006681514476615, + "loss": 1.0405406951904297, + "loss_ce": 0.0006236102781258523, + "loss_iou": 0.4296875, + "loss_num": 0.036376953125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 83009936, + "step": 1482 + }, + { + "epoch": 3.3028953229398663, + "grad_norm": 14.86413860321045, + "learning_rate": 1e-06, + "loss": 0.8586, + "num_input_tokens_seen": 83067024, + "step": 1483 + }, + { + "epoch": 3.3028953229398663, + "loss": 0.9363436102867126, + "loss_ce": 0.00030847761081531644, + "loss_iou": 0.404296875, + "loss_num": 0.0255126953125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 83067024, + "step": 1483 + }, + { + "epoch": 3.305122494432071, + "grad_norm": 16.618406295776367, + "learning_rate": 1e-06, + "loss": 0.8834, + "num_input_tokens_seen": 83122648, + "step": 1484 + }, + { + "epoch": 3.305122494432071, + "loss": 1.071757197380066, + "loss_ce": 0.0004681579302996397, + "loss_iou": 0.435546875, + "loss_num": 0.039794921875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 83122648, + "step": 1484 + }, + { + "epoch": 3.307349665924276, + "grad_norm": 23.63321876525879, + "learning_rate": 1e-06, + "loss": 1.0184, + "num_input_tokens_seen": 83176484, + "step": 1485 + }, + { + "epoch": 3.307349665924276, + "loss": 0.7948580980300903, + "loss_ce": 0.000424501282395795, + "loss_iou": 0.328125, + "loss_num": 0.0277099609375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 83176484, + "step": 1485 + }, + { + "epoch": 3.309576837416481, + "grad_norm": 18.189393997192383, + "learning_rate": 1e-06, + "loss": 0.8297, + "num_input_tokens_seen": 83233140, + "step": 1486 + }, + { + "epoch": 3.309576837416481, + "loss": 0.7806228399276733, + "loss_ce": 0.0003493690746836364, + "loss_iou": 0.31640625, + "loss_num": 0.02978515625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 83233140, + "step": 1486 + }, + { + "epoch": 3.3118040089086858, + "grad_norm": 21.735517501831055, + "learning_rate": 1e-06, + "loss": 0.8333, + "num_input_tokens_seen": 83288588, + "step": 1487 + }, + { + "epoch": 3.3118040089086858, + "loss": 0.6785666942596436, + "loss_ce": 0.0005881287506781518, + "loss_iou": 0.287109375, + "loss_num": 0.0203857421875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 83288588, + "step": 1487 + }, + { + "epoch": 3.3140311804008906, + "grad_norm": 239.3049774169922, + "learning_rate": 1e-06, + "loss": 0.896, + "num_input_tokens_seen": 83343812, + "step": 1488 + }, + { + "epoch": 3.3140311804008906, + "loss": 0.8566187024116516, + "loss_ce": 0.00041753414552658796, + "loss_iou": 0.349609375, + "loss_num": 0.031494140625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 83343812, + "step": 1488 + }, + { + "epoch": 3.316258351893096, + "grad_norm": 29.564838409423828, + "learning_rate": 1e-06, + "loss": 0.9014, + "num_input_tokens_seen": 83399608, + "step": 1489 + }, + { + "epoch": 3.316258351893096, + "loss": 0.8447275161743164, + "loss_ce": 0.0004892901633866131, + "loss_iou": 0.294921875, + "loss_num": 0.05126953125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 83399608, + "step": 1489 + }, + { + "epoch": 3.318485523385301, + "grad_norm": 27.595678329467773, + "learning_rate": 1e-06, + "loss": 0.918, + "num_input_tokens_seen": 83455260, + "step": 1490 + }, + { + "epoch": 3.318485523385301, + "loss": 0.9408430457115173, + "loss_ce": 0.00041330509702675045, + "loss_iou": 0.37890625, + "loss_num": 0.036865234375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 83455260, + "step": 1490 + }, + { + "epoch": 3.3207126948775056, + "grad_norm": 12.939767837524414, + "learning_rate": 1e-06, + "loss": 0.7262, + "num_input_tokens_seen": 83512012, + "step": 1491 + }, + { + "epoch": 3.3207126948775056, + "loss": 0.773056149482727, + "loss_ce": 0.00035112208570353687, + "loss_iou": 0.298828125, + "loss_num": 0.034912109375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 83512012, + "step": 1491 + }, + { + "epoch": 3.3229398663697105, + "grad_norm": 17.990427017211914, + "learning_rate": 1e-06, + "loss": 0.9087, + "num_input_tokens_seen": 83566724, + "step": 1492 + }, + { + "epoch": 3.3229398663697105, + "loss": 0.7485735416412354, + "loss_ce": 0.00028249542810954154, + "loss_iou": 0.28515625, + "loss_num": 0.035400390625, + "loss_xval": 0.75, + "num_input_tokens_seen": 83566724, + "step": 1492 + }, + { + "epoch": 3.3251670378619154, + "grad_norm": 22.296096801757812, + "learning_rate": 1e-06, + "loss": 0.8386, + "num_input_tokens_seen": 83623084, + "step": 1493 + }, + { + "epoch": 3.3251670378619154, + "loss": 0.6770757436752319, + "loss_ce": 0.0003179589402861893, + "loss_iou": 0.283203125, + "loss_num": 0.0223388671875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 83623084, + "step": 1493 + }, + { + "epoch": 3.3273942093541202, + "grad_norm": 23.940916061401367, + "learning_rate": 1e-06, + "loss": 0.8714, + "num_input_tokens_seen": 83679600, + "step": 1494 + }, + { + "epoch": 3.3273942093541202, + "loss": 0.8882294297218323, + "loss_ce": 0.0002899998507928103, + "loss_iou": 0.396484375, + "loss_num": 0.018798828125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 83679600, + "step": 1494 + }, + { + "epoch": 3.329621380846325, + "grad_norm": 27.720958709716797, + "learning_rate": 1e-06, + "loss": 0.7311, + "num_input_tokens_seen": 83736204, + "step": 1495 + }, + { + "epoch": 3.329621380846325, + "loss": 0.7086691856384277, + "loss_ce": 0.00041724531911313534, + "loss_iou": 0.302734375, + "loss_num": 0.0206298828125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 83736204, + "step": 1495 + }, + { + "epoch": 3.33184855233853, + "grad_norm": 23.420194625854492, + "learning_rate": 1e-06, + "loss": 0.8413, + "num_input_tokens_seen": 83792900, + "step": 1496 + }, + { + "epoch": 3.33184855233853, + "loss": 0.7663967609405518, + "loss_ce": 0.00028348196065053344, + "loss_iou": 0.328125, + "loss_num": 0.021728515625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 83792900, + "step": 1496 + }, + { + "epoch": 3.334075723830735, + "grad_norm": 35.49727249145508, + "learning_rate": 1e-06, + "loss": 0.8009, + "num_input_tokens_seen": 83847828, + "step": 1497 + }, + { + "epoch": 3.334075723830735, + "loss": 0.8171854615211487, + "loss_ce": 0.0002908838796429336, + "loss_iou": 0.34765625, + "loss_num": 0.024169921875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 83847828, + "step": 1497 + }, + { + "epoch": 3.33630289532294, + "grad_norm": 20.75282859802246, + "learning_rate": 1e-06, + "loss": 0.9144, + "num_input_tokens_seen": 83903316, + "step": 1498 + }, + { + "epoch": 3.33630289532294, + "loss": 1.1358669996261597, + "loss_ce": 0.00036891031777486205, + "loss_iou": 0.431640625, + "loss_num": 0.05517578125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 83903316, + "step": 1498 + }, + { + "epoch": 3.338530066815145, + "grad_norm": 28.5906925201416, + "learning_rate": 1e-06, + "loss": 0.8748, + "num_input_tokens_seen": 83958736, + "step": 1499 + }, + { + "epoch": 3.338530066815145, + "loss": 1.1274135112762451, + "loss_ce": 0.00046035420382395387, + "loss_iou": 0.4921875, + "loss_num": 0.028564453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 83958736, + "step": 1499 + }, + { + "epoch": 3.34075723830735, + "grad_norm": 23.704599380493164, + "learning_rate": 1e-06, + "loss": 0.8677, + "num_input_tokens_seen": 84012288, + "step": 1500 + }, + { + "epoch": 3.34075723830735, + "eval_seeclick_web_CIoU": 0.5594667792320251, + "eval_seeclick_web_GIoU": 0.5541926324367523, + "eval_seeclick_web_IoU": 0.5765488147735596, + "eval_seeclick_web_MAE_all": 0.017576972022652626, + "eval_seeclick_web_MAE_h": 0.010464820079505444, + "eval_seeclick_web_MAE_w": 0.018719857558608055, + "eval_seeclick_web_MAE_x_boxes": 0.00861422996968031, + "eval_seeclick_web_MAE_y_boxes": 0.022560626734048128, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9647981524467468, + "eval_seeclick_web_loss_ce": 0.0004042425425723195, + "eval_seeclick_web_loss_iou": 0.4376220703125, + "eval_seeclick_web_loss_num": 0.013916015625, + "eval_seeclick_web_loss_xval": 0.94482421875, + "eval_seeclick_web_runtime": 33.8762, + "eval_seeclick_web_samples_per_second": 1.476, + "eval_seeclick_web_steps_per_second": 0.059, + "num_input_tokens_seen": 84012288, + "step": 1500 + }, + { + "epoch": 3.34075723830735, + "eval_icons_CIoU": 0.3017665892839432, + "eval_icons_GIoU": 0.3255244195461273, + "eval_icons_IoU": 0.37552230060100555, + "eval_icons_MAE_all": 0.06774608045816422, + "eval_icons_MAE_h": 0.03980293497443199, + "eval_icons_MAE_w": 0.08278198912739754, + "eval_icons_MAE_x_boxes": 0.05543145537376404, + "eval_icons_MAE_y_boxes": 0.038841537199914455, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.737123966217041, + "eval_icons_loss_ce": 0.0005422734539024532, + "eval_icons_loss_iou": 0.670654296875, + "eval_icons_loss_num": 0.05875968933105469, + "eval_icons_loss_xval": 1.63427734375, + "eval_icons_runtime": 35.2512, + "eval_icons_samples_per_second": 1.418, + "eval_icons_steps_per_second": 0.057, + "num_input_tokens_seen": 84012288, + "step": 1500 + }, + { + "epoch": 3.34075723830735, + "eval_screenspot_CIoU": 0.31236544251441956, + "eval_screenspot_GIoU": 0.3288701077302297, + "eval_screenspot_IoU": 0.3995972275733948, + "eval_screenspot_MAE_all": 0.0782718521853288, + "eval_screenspot_MAE_h": 0.04242397534350554, + "eval_screenspot_MAE_w": 0.088284266491731, + "eval_screenspot_MAE_x_boxes": 0.10724017272392909, + "eval_screenspot_MAE_y_boxes": 0.047414361188809075, + "eval_screenspot_inside_bbox": 0.6150000095367432, + "eval_screenspot_loss": 1.7993109226226807, + "eval_screenspot_loss_ce": 0.0005223362823016942, + "eval_screenspot_loss_iou": 0.7171223958333334, + "eval_screenspot_loss_num": 0.08854929606119792, + "eval_screenspot_loss_xval": 1.8779296875, + "eval_screenspot_runtime": 49.5236, + "eval_screenspot_samples_per_second": 1.797, + "eval_screenspot_steps_per_second": 0.061, + "num_input_tokens_seen": 84012288, + "step": 1500 + }, + { + "epoch": 3.34075723830735, + "eval_compot_CIoU": 0.34500832855701447, + "eval_compot_GIoU": 0.37410005927085876, + "eval_compot_IoU": 0.40422166883945465, + "eval_compot_MAE_all": 0.020579061470925808, + "eval_compot_MAE_h": 0.009703563060611486, + "eval_compot_MAE_w": 0.026576916687190533, + "eval_compot_MAE_x_boxes": 0.03108334168791771, + "eval_compot_MAE_y_boxes": 0.006412426475435495, + "eval_compot_inside_bbox": 0.6145833432674408, + "eval_compot_loss": 1.364414930343628, + "eval_compot_loss_ce": 0.0003521185863064602, + "eval_compot_loss_iou": 0.6151123046875, + "eval_compot_loss_num": 0.019611358642578125, + "eval_compot_loss_xval": 1.329833984375, + "eval_compot_runtime": 23.3555, + "eval_compot_samples_per_second": 2.141, + "eval_compot_steps_per_second": 0.086, + "num_input_tokens_seen": 84012288, + "step": 1500 + }, + { + "epoch": 3.34075723830735, + "eval_custom_ui_val_CIoU": 0.4252634909417894, + "eval_custom_ui_val_GIoU": 0.4529141320122613, + "eval_custom_ui_val_IoU": 0.48057351344161564, + "eval_custom_ui_val_MAE_all": 0.03534992608345217, + "eval_custom_ui_val_MAE_h": 0.020278693590727117, + "eval_custom_ui_val_MAE_w": 0.039640864771273404, + "eval_custom_ui_val_MAE_x_boxes": 0.03883321676403284, + "eval_custom_ui_val_MAE_y_boxes": 0.020278344189541206, + "eval_custom_ui_val_inside_bbox": 0.6658950646718343, + "eval_custom_ui_val_loss": 1.297371745109558, + "eval_custom_ui_val_loss_ce": 0.0004658611885841108, + "eval_custom_ui_val_loss_iou": 0.5472547743055556, + "eval_custom_ui_val_loss_num": 0.03508186340332031, + "eval_custom_ui_val_loss_xval": 1.2700737847222223, + "eval_custom_ui_val_runtime": 65.9243, + "eval_custom_ui_val_samples_per_second": 4.02, + "eval_custom_ui_val_steps_per_second": 0.137, + "num_input_tokens_seen": 84012288, + "step": 1500 + }, + { + "epoch": 3.34075723830735, + "loss": 1.047553539276123, + "loss_ce": 0.00043441198067739606, + "loss_iou": 0.453125, + "loss_num": 0.0279541015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 84012288, + "step": 1500 + }, + { + "epoch": 3.3429844097995547, + "grad_norm": 17.67667007446289, + "learning_rate": 1e-06, + "loss": 0.8425, + "num_input_tokens_seen": 84069404, + "step": 1501 + }, + { + "epoch": 3.3429844097995547, + "loss": 0.8640942573547363, + "loss_ce": 0.00032469953293912113, + "loss_iou": 0.33203125, + "loss_num": 0.039794921875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 84069404, + "step": 1501 + }, + { + "epoch": 3.3452115812917596, + "grad_norm": 17.004056930541992, + "learning_rate": 1e-06, + "loss": 0.9548, + "num_input_tokens_seen": 84124248, + "step": 1502 + }, + { + "epoch": 3.3452115812917596, + "loss": 1.012810468673706, + "loss_ce": 0.0003593353903852403, + "loss_iou": 0.4296875, + "loss_num": 0.0306396484375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 84124248, + "step": 1502 + }, + { + "epoch": 3.3474387527839644, + "grad_norm": 17.8640193939209, + "learning_rate": 1e-06, + "loss": 0.8929, + "num_input_tokens_seen": 84182520, + "step": 1503 + }, + { + "epoch": 3.3474387527839644, + "loss": 1.041400671005249, + "loss_ce": 0.0003850944631267339, + "loss_iou": 0.453125, + "loss_num": 0.02685546875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 84182520, + "step": 1503 + }, + { + "epoch": 3.3496659242761693, + "grad_norm": 31.79290199279785, + "learning_rate": 1e-06, + "loss": 0.6244, + "num_input_tokens_seen": 84241328, + "step": 1504 + }, + { + "epoch": 3.3496659242761693, + "loss": 0.537376880645752, + "loss_ce": 0.0002675021532922983, + "loss_iou": 0.2177734375, + "loss_num": 0.0201416015625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 84241328, + "step": 1504 + }, + { + "epoch": 3.351893095768374, + "grad_norm": 19.212589263916016, + "learning_rate": 1e-06, + "loss": 0.8643, + "num_input_tokens_seen": 84296620, + "step": 1505 + }, + { + "epoch": 3.351893095768374, + "loss": 0.6515594124794006, + "loss_ce": 0.0004363722400739789, + "loss_iou": 0.236328125, + "loss_num": 0.035400390625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 84296620, + "step": 1505 + }, + { + "epoch": 3.354120267260579, + "grad_norm": 20.98340606689453, + "learning_rate": 1e-06, + "loss": 0.6714, + "num_input_tokens_seen": 84350084, + "step": 1506 + }, + { + "epoch": 3.354120267260579, + "loss": 0.7122939825057983, + "loss_ce": 0.0003799225087277591, + "loss_iou": 0.296875, + "loss_num": 0.023681640625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 84350084, + "step": 1506 + }, + { + "epoch": 3.356347438752784, + "grad_norm": 16.530118942260742, + "learning_rate": 1e-06, + "loss": 0.8061, + "num_input_tokens_seen": 84402640, + "step": 1507 + }, + { + "epoch": 3.356347438752784, + "loss": 0.8870806694030762, + "loss_ce": 0.0003618546761572361, + "loss_iou": 0.375, + "loss_num": 0.027587890625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 84402640, + "step": 1507 + }, + { + "epoch": 3.3585746102449887, + "grad_norm": 22.20184326171875, + "learning_rate": 1e-06, + "loss": 0.818, + "num_input_tokens_seen": 84458176, + "step": 1508 + }, + { + "epoch": 3.3585746102449887, + "loss": 0.7754631638526917, + "loss_ce": 0.0003166621900163591, + "loss_iou": 0.341796875, + "loss_num": 0.0181884765625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 84458176, + "step": 1508 + }, + { + "epoch": 3.3608017817371936, + "grad_norm": 17.622278213500977, + "learning_rate": 1e-06, + "loss": 0.6034, + "num_input_tokens_seen": 84514432, + "step": 1509 + }, + { + "epoch": 3.3608017817371936, + "loss": 0.4961128532886505, + "loss_ce": 0.0002632543910294771, + "loss_iou": 0.2109375, + "loss_num": 0.01507568359375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 84514432, + "step": 1509 + }, + { + "epoch": 3.3630289532293984, + "grad_norm": 19.41647720336914, + "learning_rate": 1e-06, + "loss": 0.9464, + "num_input_tokens_seen": 84572280, + "step": 1510 + }, + { + "epoch": 3.3630289532293984, + "loss": 0.9257339239120483, + "loss_ce": 0.0004409342654980719, + "loss_iou": 0.3984375, + "loss_num": 0.0257568359375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 84572280, + "step": 1510 + }, + { + "epoch": 3.3652561247216037, + "grad_norm": 30.21278190612793, + "learning_rate": 1e-06, + "loss": 0.8469, + "num_input_tokens_seen": 84629116, + "step": 1511 + }, + { + "epoch": 3.3652561247216037, + "loss": 0.769070029258728, + "loss_ce": 0.0002712323039304465, + "loss_iou": 0.333984375, + "loss_num": 0.0203857421875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 84629116, + "step": 1511 + }, + { + "epoch": 3.3674832962138086, + "grad_norm": 27.09176254272461, + "learning_rate": 1e-06, + "loss": 1.0643, + "num_input_tokens_seen": 84685628, + "step": 1512 + }, + { + "epoch": 3.3674832962138086, + "loss": 1.0953569412231445, + "loss_ce": 0.0003862777375616133, + "loss_iou": 0.421875, + "loss_num": 0.050048828125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 84685628, + "step": 1512 + }, + { + "epoch": 3.3697104677060135, + "grad_norm": 29.004039764404297, + "learning_rate": 1e-06, + "loss": 0.8344, + "num_input_tokens_seen": 84743456, + "step": 1513 + }, + { + "epoch": 3.3697104677060135, + "loss": 1.0513304471969604, + "loss_ce": 0.0003050968807656318, + "loss_iou": 0.40625, + "loss_num": 0.048095703125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 84743456, + "step": 1513 + }, + { + "epoch": 3.3719376391982183, + "grad_norm": 14.021499633789062, + "learning_rate": 1e-06, + "loss": 0.7724, + "num_input_tokens_seen": 84800152, + "step": 1514 + }, + { + "epoch": 3.3719376391982183, + "loss": 0.7707792520523071, + "loss_ce": 0.004665941931307316, + "loss_iou": 0.296875, + "loss_num": 0.0341796875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 84800152, + "step": 1514 + }, + { + "epoch": 3.374164810690423, + "grad_norm": 21.85736656188965, + "learning_rate": 1e-06, + "loss": 0.9805, + "num_input_tokens_seen": 84856756, + "step": 1515 + }, + { + "epoch": 3.374164810690423, + "loss": 1.0318880081176758, + "loss_ce": 0.0016146359266713262, + "loss_iou": 0.404296875, + "loss_num": 0.044189453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 84856756, + "step": 1515 + }, + { + "epoch": 3.376391982182628, + "grad_norm": 17.330366134643555, + "learning_rate": 1e-06, + "loss": 0.8845, + "num_input_tokens_seen": 84913964, + "step": 1516 + }, + { + "epoch": 3.376391982182628, + "loss": 0.9630565643310547, + "loss_ce": 0.00041007917025126517, + "loss_iou": 0.4140625, + "loss_num": 0.0264892578125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 84913964, + "step": 1516 + }, + { + "epoch": 3.378619153674833, + "grad_norm": 28.434722900390625, + "learning_rate": 1e-06, + "loss": 0.7971, + "num_input_tokens_seen": 84973372, + "step": 1517 + }, + { + "epoch": 3.378619153674833, + "loss": 0.6269816160202026, + "loss_ce": 0.0002726696548052132, + "loss_iou": 0.255859375, + "loss_num": 0.023193359375, + "loss_xval": 0.625, + "num_input_tokens_seen": 84973372, + "step": 1517 + }, + { + "epoch": 3.3808463251670378, + "grad_norm": 22.015655517578125, + "learning_rate": 1e-06, + "loss": 0.7052, + "num_input_tokens_seen": 85029976, + "step": 1518 + }, + { + "epoch": 3.3808463251670378, + "loss": 0.7049768567085266, + "loss_ce": 0.00038701502489857376, + "loss_iou": 0.314453125, + "loss_num": 0.01513671875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 85029976, + "step": 1518 + }, + { + "epoch": 3.3830734966592426, + "grad_norm": 18.153146743774414, + "learning_rate": 1e-06, + "loss": 0.7883, + "num_input_tokens_seen": 85085816, + "step": 1519 + }, + { + "epoch": 3.3830734966592426, + "loss": 0.9505753517150879, + "loss_ce": 0.00025792684755288064, + "loss_iou": 0.390625, + "loss_num": 0.0341796875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 85085816, + "step": 1519 + }, + { + "epoch": 3.3853006681514475, + "grad_norm": 21.065345764160156, + "learning_rate": 1e-06, + "loss": 0.9315, + "num_input_tokens_seen": 85143668, + "step": 1520 + }, + { + "epoch": 3.3853006681514475, + "loss": 0.7659118175506592, + "loss_ce": 0.00028681475669145584, + "loss_iou": 0.3359375, + "loss_num": 0.0185546875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 85143668, + "step": 1520 + }, + { + "epoch": 3.387527839643653, + "grad_norm": 25.462947845458984, + "learning_rate": 1e-06, + "loss": 0.9036, + "num_input_tokens_seen": 85199320, + "step": 1521 + }, + { + "epoch": 3.387527839643653, + "loss": 0.8738066554069519, + "loss_ce": 0.00027149327797815204, + "loss_iou": 0.380859375, + "loss_num": 0.0224609375, + "loss_xval": 0.875, + "num_input_tokens_seen": 85199320, + "step": 1521 + }, + { + "epoch": 3.3897550111358576, + "grad_norm": 22.21038055419922, + "learning_rate": 1e-06, + "loss": 0.8248, + "num_input_tokens_seen": 85256340, + "step": 1522 + }, + { + "epoch": 3.3897550111358576, + "loss": 0.8254960179328918, + "loss_ce": 0.0003007104678545147, + "loss_iou": 0.359375, + "loss_num": 0.0208740234375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 85256340, + "step": 1522 + }, + { + "epoch": 3.3919821826280625, + "grad_norm": 17.12187385559082, + "learning_rate": 1e-06, + "loss": 0.826, + "num_input_tokens_seen": 85312876, + "step": 1523 + }, + { + "epoch": 3.3919821826280625, + "loss": 0.9279178380966187, + "loss_ce": 0.0004276382096577436, + "loss_iou": 0.37890625, + "loss_num": 0.033935546875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 85312876, + "step": 1523 + }, + { + "epoch": 3.3942093541202674, + "grad_norm": 20.59052085876465, + "learning_rate": 1e-06, + "loss": 0.8034, + "num_input_tokens_seen": 85369068, + "step": 1524 + }, + { + "epoch": 3.3942093541202674, + "loss": 0.9275302886962891, + "loss_ce": 0.00028415530687198043, + "loss_iou": 0.36328125, + "loss_num": 0.0400390625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 85369068, + "step": 1524 + }, + { + "epoch": 3.3964365256124722, + "grad_norm": 23.074607849121094, + "learning_rate": 1e-06, + "loss": 1.0088, + "num_input_tokens_seen": 85424964, + "step": 1525 + }, + { + "epoch": 3.3964365256124722, + "loss": 1.4848394393920898, + "loss_ce": 0.00046440563164651394, + "loss_iou": 0.5859375, + "loss_num": 0.0634765625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 85424964, + "step": 1525 + }, + { + "epoch": 3.398663697104677, + "grad_norm": 103.47501373291016, + "learning_rate": 1e-06, + "loss": 0.7614, + "num_input_tokens_seen": 85481460, + "step": 1526 + }, + { + "epoch": 3.398663697104677, + "loss": 0.9770767688751221, + "loss_ce": 0.00027014350052922964, + "loss_iou": 0.400390625, + "loss_num": 0.03564453125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 85481460, + "step": 1526 + }, + { + "epoch": 3.400890868596882, + "grad_norm": 16.694969177246094, + "learning_rate": 1e-06, + "loss": 0.7637, + "num_input_tokens_seen": 85538076, + "step": 1527 + }, + { + "epoch": 3.400890868596882, + "loss": 0.7700480818748474, + "loss_ce": 0.00027271179715171456, + "loss_iou": 0.341796875, + "loss_num": 0.01708984375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 85538076, + "step": 1527 + }, + { + "epoch": 3.403118040089087, + "grad_norm": 15.084866523742676, + "learning_rate": 1e-06, + "loss": 0.8101, + "num_input_tokens_seen": 85593752, + "step": 1528 + }, + { + "epoch": 3.403118040089087, + "loss": 0.7312592267990112, + "loss_ce": 0.0003021882730536163, + "loss_iou": 0.30859375, + "loss_num": 0.0225830078125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 85593752, + "step": 1528 + }, + { + "epoch": 3.4053452115812917, + "grad_norm": 137.56297302246094, + "learning_rate": 1e-06, + "loss": 1.0569, + "num_input_tokens_seen": 85649224, + "step": 1529 + }, + { + "epoch": 3.4053452115812917, + "loss": 0.6406855583190918, + "loss_ce": 0.00030470843194052577, + "loss_iou": 0.2890625, + "loss_num": 0.0125732421875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 85649224, + "step": 1529 + }, + { + "epoch": 3.4075723830734965, + "grad_norm": 17.245445251464844, + "learning_rate": 1e-06, + "loss": 0.8775, + "num_input_tokens_seen": 85704284, + "step": 1530 + }, + { + "epoch": 3.4075723830734965, + "loss": 0.7688406109809875, + "loss_ce": 0.000285913614789024, + "loss_iou": 0.326171875, + "loss_num": 0.0233154296875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 85704284, + "step": 1530 + }, + { + "epoch": 3.4097995545657014, + "grad_norm": 28.163143157958984, + "learning_rate": 1e-06, + "loss": 0.6158, + "num_input_tokens_seen": 85758312, + "step": 1531 + }, + { + "epoch": 3.4097995545657014, + "loss": 0.6164954900741577, + "loss_ce": 0.000772814848460257, + "loss_iou": 0.28125, + "loss_num": 0.01080322265625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 85758312, + "step": 1531 + }, + { + "epoch": 3.4120267260579062, + "grad_norm": 20.04214096069336, + "learning_rate": 1e-06, + "loss": 0.8842, + "num_input_tokens_seen": 85814628, + "step": 1532 + }, + { + "epoch": 3.4120267260579062, + "loss": 1.005671739578247, + "loss_ce": 0.00030057106050662696, + "loss_iou": 0.4296875, + "loss_num": 0.029052734375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 85814628, + "step": 1532 + }, + { + "epoch": 3.4142538975501115, + "grad_norm": 31.450151443481445, + "learning_rate": 1e-06, + "loss": 0.8459, + "num_input_tokens_seen": 85870152, + "step": 1533 + }, + { + "epoch": 3.4142538975501115, + "loss": 1.0091707706451416, + "loss_ce": 0.0003816866665147245, + "loss_iou": 0.41796875, + "loss_num": 0.0341796875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 85870152, + "step": 1533 + }, + { + "epoch": 3.4164810690423164, + "grad_norm": 16.301393508911133, + "learning_rate": 1e-06, + "loss": 1.1544, + "num_input_tokens_seen": 85923552, + "step": 1534 + }, + { + "epoch": 3.4164810690423164, + "loss": 1.0733964443206787, + "loss_ce": 0.0003984392969869077, + "loss_iou": 0.412109375, + "loss_num": 0.04931640625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 85923552, + "step": 1534 + }, + { + "epoch": 3.4187082405345213, + "grad_norm": 22.28922462463379, + "learning_rate": 1e-06, + "loss": 1.2395, + "num_input_tokens_seen": 85978024, + "step": 1535 + }, + { + "epoch": 3.4187082405345213, + "loss": 1.1963934898376465, + "loss_ce": 0.0005926065496169031, + "loss_iou": 0.4609375, + "loss_num": 0.0546875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 85978024, + "step": 1535 + }, + { + "epoch": 3.420935412026726, + "grad_norm": 17.531831741333008, + "learning_rate": 1e-06, + "loss": 0.857, + "num_input_tokens_seen": 86032808, + "step": 1536 + }, + { + "epoch": 3.420935412026726, + "loss": 0.6863161325454712, + "loss_ce": 0.00028098217444494367, + "loss_iou": 0.267578125, + "loss_num": 0.0299072265625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 86032808, + "step": 1536 + }, + { + "epoch": 3.423162583518931, + "grad_norm": 40.43157958984375, + "learning_rate": 1e-06, + "loss": 0.9083, + "num_input_tokens_seen": 86084988, + "step": 1537 + }, + { + "epoch": 3.423162583518931, + "loss": 1.0269807577133179, + "loss_ce": 0.00036941259168088436, + "loss_iou": 0.416015625, + "loss_num": 0.038330078125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 86084988, + "step": 1537 + }, + { + "epoch": 3.425389755011136, + "grad_norm": 13.929858207702637, + "learning_rate": 1e-06, + "loss": 0.5897, + "num_input_tokens_seen": 86141804, + "step": 1538 + }, + { + "epoch": 3.425389755011136, + "loss": 0.5110272765159607, + "loss_ce": 0.00028507091337814927, + "loss_iou": 0.228515625, + "loss_num": 0.0106201171875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 86141804, + "step": 1538 + }, + { + "epoch": 3.4276169265033407, + "grad_norm": 22.480680465698242, + "learning_rate": 1e-06, + "loss": 1.0234, + "num_input_tokens_seen": 86196392, + "step": 1539 + }, + { + "epoch": 3.4276169265033407, + "loss": 1.0525072813034058, + "loss_ce": 0.0002611761447042227, + "loss_iou": 0.3828125, + "loss_num": 0.05712890625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 86196392, + "step": 1539 + }, + { + "epoch": 3.4298440979955456, + "grad_norm": 20.415674209594727, + "learning_rate": 1e-06, + "loss": 0.7179, + "num_input_tokens_seen": 86254652, + "step": 1540 + }, + { + "epoch": 3.4298440979955456, + "loss": 0.7557092905044556, + "loss_ce": 0.0003381772548891604, + "loss_iou": 0.3125, + "loss_num": 0.025390625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 86254652, + "step": 1540 + }, + { + "epoch": 3.4320712694877504, + "grad_norm": 14.296008110046387, + "learning_rate": 1e-06, + "loss": 0.9989, + "num_input_tokens_seen": 86310292, + "step": 1541 + }, + { + "epoch": 3.4320712694877504, + "loss": 1.1000525951385498, + "loss_ce": 0.00044316527782939374, + "loss_iou": 0.44921875, + "loss_num": 0.0400390625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 86310292, + "step": 1541 + }, + { + "epoch": 3.4342984409799553, + "grad_norm": 15.6137056350708, + "learning_rate": 1e-06, + "loss": 0.825, + "num_input_tokens_seen": 86365832, + "step": 1542 + }, + { + "epoch": 3.4342984409799553, + "loss": 0.7097923755645752, + "loss_ce": 0.00031970589770935476, + "loss_iou": 0.32421875, + "loss_num": 0.01190185546875, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 86365832, + "step": 1542 + }, + { + "epoch": 3.4365256124721606, + "grad_norm": 405.90081787109375, + "learning_rate": 1e-06, + "loss": 0.8, + "num_input_tokens_seen": 86423384, + "step": 1543 + }, + { + "epoch": 3.4365256124721606, + "loss": 1.0093533992767334, + "loss_ce": 0.0008084863657131791, + "loss_iou": 0.40625, + "loss_num": 0.0390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 86423384, + "step": 1543 + }, + { + "epoch": 3.4387527839643655, + "grad_norm": 23.693382263183594, + "learning_rate": 1e-06, + "loss": 0.9363, + "num_input_tokens_seen": 86479144, + "step": 1544 + }, + { + "epoch": 3.4387527839643655, + "loss": 0.854164719581604, + "loss_ce": 0.0004049596609547734, + "loss_iou": 0.37109375, + "loss_num": 0.022216796875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 86479144, + "step": 1544 + }, + { + "epoch": 3.4409799554565703, + "grad_norm": 16.630285263061523, + "learning_rate": 1e-06, + "loss": 1.0941, + "num_input_tokens_seen": 86535676, + "step": 1545 + }, + { + "epoch": 3.4409799554565703, + "loss": 1.3825310468673706, + "loss_ce": 0.00032891728915274143, + "loss_iou": 0.5546875, + "loss_num": 0.053955078125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 86535676, + "step": 1545 + }, + { + "epoch": 3.443207126948775, + "grad_norm": 15.714624404907227, + "learning_rate": 1e-06, + "loss": 0.9159, + "num_input_tokens_seen": 86593604, + "step": 1546 + }, + { + "epoch": 3.443207126948775, + "loss": 0.6130160093307495, + "loss_ce": 0.00028408144135028124, + "loss_iou": 0.259765625, + "loss_num": 0.018310546875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 86593604, + "step": 1546 + }, + { + "epoch": 3.44543429844098, + "grad_norm": 13.893007278442383, + "learning_rate": 1e-06, + "loss": 0.7666, + "num_input_tokens_seen": 86649364, + "step": 1547 + }, + { + "epoch": 3.44543429844098, + "loss": 0.6706589460372925, + "loss_ce": 0.0002487250021658838, + "loss_iou": 0.271484375, + "loss_num": 0.025390625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 86649364, + "step": 1547 + }, + { + "epoch": 3.447661469933185, + "grad_norm": 19.675296783447266, + "learning_rate": 1e-06, + "loss": 0.8681, + "num_input_tokens_seen": 86703940, + "step": 1548 + }, + { + "epoch": 3.447661469933185, + "loss": 0.9216609001159668, + "loss_ce": 0.0002741321222856641, + "loss_iou": 0.412109375, + "loss_num": 0.0198974609375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 86703940, + "step": 1548 + }, + { + "epoch": 3.4498886414253898, + "grad_norm": 19.515134811401367, + "learning_rate": 1e-06, + "loss": 1.096, + "num_input_tokens_seen": 86760584, + "step": 1549 + }, + { + "epoch": 3.4498886414253898, + "loss": 1.3511126041412354, + "loss_ce": 0.00028249574825167656, + "loss_iou": 0.51953125, + "loss_num": 0.06201171875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 86760584, + "step": 1549 + }, + { + "epoch": 3.4521158129175946, + "grad_norm": 17.653553009033203, + "learning_rate": 1e-06, + "loss": 0.7243, + "num_input_tokens_seen": 86815208, + "step": 1550 + }, + { + "epoch": 3.4521158129175946, + "loss": 0.7034174203872681, + "loss_ce": 0.0002923937572631985, + "loss_iou": 0.318359375, + "loss_num": 0.01348876953125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 86815208, + "step": 1550 + }, + { + "epoch": 3.4543429844097995, + "grad_norm": 13.833768844604492, + "learning_rate": 1e-06, + "loss": 0.9415, + "num_input_tokens_seen": 86869804, + "step": 1551 + }, + { + "epoch": 3.4543429844097995, + "loss": 0.8110692501068115, + "loss_ce": 0.00027830369072034955, + "loss_iou": 0.3515625, + "loss_num": 0.0218505859375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 86869804, + "step": 1551 + }, + { + "epoch": 3.4565701559020043, + "grad_norm": 21.14474868774414, + "learning_rate": 1e-06, + "loss": 0.8695, + "num_input_tokens_seen": 86925632, + "step": 1552 + }, + { + "epoch": 3.4565701559020043, + "loss": 0.7101556062698364, + "loss_ce": 0.0004388463275972754, + "loss_iou": 0.26953125, + "loss_num": 0.0341796875, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 86925632, + "step": 1552 + }, + { + "epoch": 3.458797327394209, + "grad_norm": 23.79280662536621, + "learning_rate": 1e-06, + "loss": 1.0303, + "num_input_tokens_seen": 86983484, + "step": 1553 + }, + { + "epoch": 3.458797327394209, + "loss": 1.0085933208465576, + "loss_ce": 0.0002924925647675991, + "loss_iou": 0.443359375, + "loss_num": 0.0244140625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 86983484, + "step": 1553 + }, + { + "epoch": 3.461024498886414, + "grad_norm": 21.37234115600586, + "learning_rate": 1e-06, + "loss": 0.6182, + "num_input_tokens_seen": 87040560, + "step": 1554 + }, + { + "epoch": 3.461024498886414, + "loss": 0.8163036108016968, + "loss_ce": 0.0003856316034216434, + "loss_iou": 0.33203125, + "loss_num": 0.0306396484375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 87040560, + "step": 1554 + }, + { + "epoch": 3.463251670378619, + "grad_norm": 22.6509952545166, + "learning_rate": 1e-06, + "loss": 0.7905, + "num_input_tokens_seen": 87096772, + "step": 1555 + }, + { + "epoch": 3.463251670378619, + "loss": 0.7041604518890381, + "loss_ce": 0.00030306505504995584, + "loss_iou": 0.28515625, + "loss_num": 0.0267333984375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 87096772, + "step": 1555 + }, + { + "epoch": 3.4654788418708242, + "grad_norm": 16.446748733520508, + "learning_rate": 1e-06, + "loss": 0.9045, + "num_input_tokens_seen": 87153824, + "step": 1556 + }, + { + "epoch": 3.4654788418708242, + "loss": 0.9084920287132263, + "loss_ce": 0.0002888740273192525, + "loss_iou": 0.392578125, + "loss_num": 0.0244140625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 87153824, + "step": 1556 + }, + { + "epoch": 3.467706013363029, + "grad_norm": 13.511149406433105, + "learning_rate": 1e-06, + "loss": 1.0573, + "num_input_tokens_seen": 87209524, + "step": 1557 + }, + { + "epoch": 3.467706013363029, + "loss": 1.2329661846160889, + "loss_ce": 0.0003001574077643454, + "loss_iou": 0.5, + "loss_num": 0.045654296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 87209524, + "step": 1557 + }, + { + "epoch": 3.469933184855234, + "grad_norm": 57.4884147644043, + "learning_rate": 1e-06, + "loss": 0.8035, + "num_input_tokens_seen": 87264100, + "step": 1558 + }, + { + "epoch": 3.469933184855234, + "loss": 0.8297716379165649, + "loss_ce": 0.00030388019513338804, + "loss_iou": 0.359375, + "loss_num": 0.0224609375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 87264100, + "step": 1558 + }, + { + "epoch": 3.472160356347439, + "grad_norm": 15.150367736816406, + "learning_rate": 1e-06, + "loss": 0.9682, + "num_input_tokens_seen": 87321728, + "step": 1559 + }, + { + "epoch": 3.472160356347439, + "loss": 1.013202428817749, + "loss_ce": 0.0002630281960591674, + "loss_iou": 0.435546875, + "loss_num": 0.0286865234375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 87321728, + "step": 1559 + }, + { + "epoch": 3.4743875278396437, + "grad_norm": 26.730871200561523, + "learning_rate": 1e-06, + "loss": 0.8033, + "num_input_tokens_seen": 87378128, + "step": 1560 + }, + { + "epoch": 3.4743875278396437, + "loss": 0.7272953391075134, + "loss_ce": 0.0004887018585577607, + "loss_iou": 0.326171875, + "loss_num": 0.014892578125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 87378128, + "step": 1560 + }, + { + "epoch": 3.4766146993318485, + "grad_norm": 63.709983825683594, + "learning_rate": 1e-06, + "loss": 0.9815, + "num_input_tokens_seen": 87432972, + "step": 1561 + }, + { + "epoch": 3.4766146993318485, + "loss": 0.9485345482826233, + "loss_ce": 0.0002923659631051123, + "loss_iou": 0.3828125, + "loss_num": 0.03662109375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 87432972, + "step": 1561 + }, + { + "epoch": 3.4788418708240534, + "grad_norm": 19.27259063720703, + "learning_rate": 1e-06, + "loss": 0.7692, + "num_input_tokens_seen": 87486680, + "step": 1562 + }, + { + "epoch": 3.4788418708240534, + "loss": 0.7349532842636108, + "loss_ce": 0.00033419817918911576, + "loss_iou": 0.33203125, + "loss_num": 0.01409912109375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 87486680, + "step": 1562 + }, + { + "epoch": 3.4810690423162582, + "grad_norm": 17.00948143005371, + "learning_rate": 1e-06, + "loss": 0.8309, + "num_input_tokens_seen": 87543352, + "step": 1563 + }, + { + "epoch": 3.4810690423162582, + "loss": 0.9362764954566956, + "loss_ce": 0.00024133155238814652, + "loss_iou": 0.412109375, + "loss_num": 0.022216796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 87543352, + "step": 1563 + }, + { + "epoch": 3.483296213808463, + "grad_norm": 24.209060668945312, + "learning_rate": 1e-06, + "loss": 0.9046, + "num_input_tokens_seen": 87600896, + "step": 1564 + }, + { + "epoch": 3.483296213808463, + "loss": 1.042458176612854, + "loss_ce": 0.00046601821668446064, + "loss_iou": 0.43359375, + "loss_num": 0.03515625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 87600896, + "step": 1564 + }, + { + "epoch": 3.485523385300668, + "grad_norm": 14.757556915283203, + "learning_rate": 1e-06, + "loss": 0.9321, + "num_input_tokens_seen": 87658544, + "step": 1565 + }, + { + "epoch": 3.485523385300668, + "loss": 0.7475967407226562, + "loss_ce": 0.000282272812910378, + "loss_iou": 0.3046875, + "loss_num": 0.02783203125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 87658544, + "step": 1565 + }, + { + "epoch": 3.4877505567928733, + "grad_norm": 29.213010787963867, + "learning_rate": 1e-06, + "loss": 0.9172, + "num_input_tokens_seen": 87715960, + "step": 1566 + }, + { + "epoch": 3.4877505567928733, + "loss": 0.8977096676826477, + "loss_ce": 0.00049288832815364, + "loss_iou": 0.3515625, + "loss_num": 0.038818359375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 87715960, + "step": 1566 + }, + { + "epoch": 3.489977728285078, + "grad_norm": 13.978538513183594, + "learning_rate": 1e-06, + "loss": 0.8842, + "num_input_tokens_seen": 87769772, + "step": 1567 + }, + { + "epoch": 3.489977728285078, + "loss": 0.8801678419113159, + "loss_ce": 0.0002850402379408479, + "loss_iou": 0.37109375, + "loss_num": 0.0272216796875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 87769772, + "step": 1567 + }, + { + "epoch": 3.492204899777283, + "grad_norm": 28.59236717224121, + "learning_rate": 1e-06, + "loss": 0.646, + "num_input_tokens_seen": 87825804, + "step": 1568 + }, + { + "epoch": 3.492204899777283, + "loss": 0.6284915208816528, + "loss_ce": 0.00031768600456416607, + "loss_iou": 0.28125, + "loss_num": 0.0130615234375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 87825804, + "step": 1568 + }, + { + "epoch": 3.494432071269488, + "grad_norm": 27.629867553710938, + "learning_rate": 1e-06, + "loss": 0.8455, + "num_input_tokens_seen": 87881064, + "step": 1569 + }, + { + "epoch": 3.494432071269488, + "loss": 0.7841451168060303, + "loss_ce": 0.00027057109400629997, + "loss_iou": 0.34375, + "loss_num": 0.0191650390625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 87881064, + "step": 1569 + }, + { + "epoch": 3.4966592427616927, + "grad_norm": 28.423208236694336, + "learning_rate": 1e-06, + "loss": 1.0515, + "num_input_tokens_seen": 87938884, + "step": 1570 + }, + { + "epoch": 3.4966592427616927, + "loss": 1.0150330066680908, + "loss_ce": 0.00038457888877019286, + "loss_iou": 0.4375, + "loss_num": 0.0279541015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 87938884, + "step": 1570 + }, + { + "epoch": 3.4988864142538976, + "grad_norm": 80.1189956665039, + "learning_rate": 1e-06, + "loss": 0.8306, + "num_input_tokens_seen": 87991052, + "step": 1571 + }, + { + "epoch": 3.4988864142538976, + "loss": 0.9712834358215332, + "loss_ce": 0.0005803282838314772, + "loss_iou": 0.412109375, + "loss_num": 0.0294189453125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 87991052, + "step": 1571 + }, + { + "epoch": 3.5011135857461024, + "grad_norm": 33.21288299560547, + "learning_rate": 1e-06, + "loss": 0.999, + "num_input_tokens_seen": 88047996, + "step": 1572 + }, + { + "epoch": 3.5011135857461024, + "loss": 0.9822986721992493, + "loss_ce": 0.00036505749449133873, + "loss_iou": 0.412109375, + "loss_num": 0.031982421875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 88047996, + "step": 1572 + }, + { + "epoch": 3.5033407572383073, + "grad_norm": 27.994529724121094, + "learning_rate": 1e-06, + "loss": 0.8276, + "num_input_tokens_seen": 88102512, + "step": 1573 + }, + { + "epoch": 3.5033407572383073, + "loss": 0.9002130031585693, + "loss_ce": 0.0003106549265794456, + "loss_iou": 0.392578125, + "loss_num": 0.0233154296875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 88102512, + "step": 1573 + }, + { + "epoch": 3.505567928730512, + "grad_norm": 29.116310119628906, + "learning_rate": 1e-06, + "loss": 1.0666, + "num_input_tokens_seen": 88156400, + "step": 1574 + }, + { + "epoch": 3.505567928730512, + "loss": 1.0999196767807007, + "loss_ce": 0.0003103648195974529, + "loss_iou": 0.47265625, + "loss_num": 0.031005859375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 88156400, + "step": 1574 + }, + { + "epoch": 3.507795100222717, + "grad_norm": 18.990516662597656, + "learning_rate": 1e-06, + "loss": 0.7131, + "num_input_tokens_seen": 88211188, + "step": 1575 + }, + { + "epoch": 3.507795100222717, + "loss": 0.8792534470558167, + "loss_ce": 0.000347210094332695, + "loss_iou": 0.388671875, + "loss_num": 0.02001953125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 88211188, + "step": 1575 + }, + { + "epoch": 3.510022271714922, + "grad_norm": 21.322250366210938, + "learning_rate": 1e-06, + "loss": 0.8679, + "num_input_tokens_seen": 88267384, + "step": 1576 + }, + { + "epoch": 3.510022271714922, + "loss": 1.0999305248260498, + "loss_ce": 0.0003211447037756443, + "loss_iou": 0.482421875, + "loss_num": 0.0272216796875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 88267384, + "step": 1576 + }, + { + "epoch": 3.5122494432071267, + "grad_norm": 26.06310272216797, + "learning_rate": 1e-06, + "loss": 0.9818, + "num_input_tokens_seen": 88323956, + "step": 1577 + }, + { + "epoch": 3.5122494432071267, + "loss": 1.0118680000305176, + "loss_ce": 0.0003933944390155375, + "loss_iou": 0.40625, + "loss_num": 0.03955078125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 88323956, + "step": 1577 + }, + { + "epoch": 3.5144766146993316, + "grad_norm": 17.4585018157959, + "learning_rate": 1e-06, + "loss": 0.9207, + "num_input_tokens_seen": 88377432, + "step": 1578 + }, + { + "epoch": 3.5144766146993316, + "loss": 0.7224873304367065, + "loss_ce": 0.00031937778112478554, + "loss_iou": 0.314453125, + "loss_num": 0.01904296875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 88377432, + "step": 1578 + }, + { + "epoch": 3.516703786191537, + "grad_norm": 24.705142974853516, + "learning_rate": 1e-06, + "loss": 0.9077, + "num_input_tokens_seen": 88433012, + "step": 1579 + }, + { + "epoch": 3.516703786191537, + "loss": 1.0539125204086304, + "loss_ce": 0.00044574099592864513, + "loss_iou": 0.400390625, + "loss_num": 0.05078125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 88433012, + "step": 1579 + }, + { + "epoch": 3.5189309576837418, + "grad_norm": 30.141464233398438, + "learning_rate": 1e-06, + "loss": 0.7654, + "num_input_tokens_seen": 88490756, + "step": 1580 + }, + { + "epoch": 3.5189309576837418, + "loss": 0.8266535997390747, + "loss_ce": 0.00048178687575273216, + "loss_iou": 0.306640625, + "loss_num": 0.042236328125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 88490756, + "step": 1580 + }, + { + "epoch": 3.5211581291759466, + "grad_norm": 21.32522964477539, + "learning_rate": 1e-06, + "loss": 0.8354, + "num_input_tokens_seen": 88548592, + "step": 1581 + }, + { + "epoch": 3.5211581291759466, + "loss": 0.9959642291069031, + "loss_ce": 0.0006028971401974559, + "loss_iou": 0.443359375, + "loss_num": 0.021484375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 88548592, + "step": 1581 + }, + { + "epoch": 3.5233853006681515, + "grad_norm": 26.187313079833984, + "learning_rate": 1e-06, + "loss": 0.7145, + "num_input_tokens_seen": 88605516, + "step": 1582 + }, + { + "epoch": 3.5233853006681515, + "loss": 0.4769740104675293, + "loss_ce": 0.0002894522622227669, + "loss_iou": 0.212890625, + "loss_num": 0.01025390625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 88605516, + "step": 1582 + }, + { + "epoch": 3.5256124721603563, + "grad_norm": 27.296201705932617, + "learning_rate": 1e-06, + "loss": 0.8584, + "num_input_tokens_seen": 88660980, + "step": 1583 + }, + { + "epoch": 3.5256124721603563, + "loss": 0.7954574823379517, + "loss_ce": 0.0002915282384492457, + "loss_iou": 0.33203125, + "loss_num": 0.026123046875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 88660980, + "step": 1583 + }, + { + "epoch": 3.527839643652561, + "grad_norm": 18.88905143737793, + "learning_rate": 1e-06, + "loss": 0.898, + "num_input_tokens_seen": 88715208, + "step": 1584 + }, + { + "epoch": 3.527839643652561, + "loss": 0.7566571235656738, + "loss_ce": 0.0003094491839874536, + "loss_iou": 0.328125, + "loss_num": 0.02001953125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 88715208, + "step": 1584 + }, + { + "epoch": 3.530066815144766, + "grad_norm": 17.91330337524414, + "learning_rate": 1e-06, + "loss": 0.7627, + "num_input_tokens_seen": 88773392, + "step": 1585 + }, + { + "epoch": 3.530066815144766, + "loss": 0.8976699113845825, + "loss_ce": 0.0004530604637693614, + "loss_iou": 0.380859375, + "loss_num": 0.02685546875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 88773392, + "step": 1585 + }, + { + "epoch": 3.532293986636971, + "grad_norm": 15.070040702819824, + "learning_rate": 1e-06, + "loss": 1.0047, + "num_input_tokens_seen": 88830772, + "step": 1586 + }, + { + "epoch": 3.532293986636971, + "loss": 0.675557017326355, + "loss_ce": 0.0002640365855768323, + "loss_iou": 0.30078125, + "loss_num": 0.0145263671875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 88830772, + "step": 1586 + }, + { + "epoch": 3.534521158129176, + "grad_norm": 15.70621109008789, + "learning_rate": 1e-06, + "loss": 0.9061, + "num_input_tokens_seen": 88887288, + "step": 1587 + }, + { + "epoch": 3.534521158129176, + "loss": 1.0725452899932861, + "loss_ce": 0.000279579107882455, + "loss_iou": 0.431640625, + "loss_num": 0.041259765625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 88887288, + "step": 1587 + }, + { + "epoch": 3.536748329621381, + "grad_norm": 16.636899948120117, + "learning_rate": 1e-06, + "loss": 0.7762, + "num_input_tokens_seen": 88943304, + "step": 1588 + }, + { + "epoch": 3.536748329621381, + "loss": 0.7620242834091187, + "loss_ce": 0.00030552022508345544, + "loss_iou": 0.333984375, + "loss_num": 0.0189208984375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 88943304, + "step": 1588 + }, + { + "epoch": 3.538975501113586, + "grad_norm": 19.53682518005371, + "learning_rate": 1e-06, + "loss": 0.7223, + "num_input_tokens_seen": 89000188, + "step": 1589 + }, + { + "epoch": 3.538975501113586, + "loss": 0.6042957305908203, + "loss_ce": 0.00029182256548665464, + "loss_iou": 0.24609375, + "loss_num": 0.0224609375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 89000188, + "step": 1589 + }, + { + "epoch": 3.541202672605791, + "grad_norm": 16.928892135620117, + "learning_rate": 1e-06, + "loss": 1.0069, + "num_input_tokens_seen": 89055864, + "step": 1590 + }, + { + "epoch": 3.541202672605791, + "loss": 1.0560284852981567, + "loss_ce": 0.0003644293174147606, + "loss_iou": 0.439453125, + "loss_num": 0.035400390625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 89055864, + "step": 1590 + }, + { + "epoch": 3.5434298440979957, + "grad_norm": 21.396242141723633, + "learning_rate": 1e-06, + "loss": 0.7472, + "num_input_tokens_seen": 89114124, + "step": 1591 + }, + { + "epoch": 3.5434298440979957, + "loss": 0.6990212202072144, + "loss_ce": 0.00029076545615680516, + "loss_iou": 0.314453125, + "loss_num": 0.013916015625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 89114124, + "step": 1591 + }, + { + "epoch": 3.5456570155902005, + "grad_norm": 14.654166221618652, + "learning_rate": 1e-06, + "loss": 1.0152, + "num_input_tokens_seen": 89170248, + "step": 1592 + }, + { + "epoch": 3.5456570155902005, + "loss": 0.6828731894493103, + "loss_ce": 0.0002559710410423577, + "loss_iou": 0.263671875, + "loss_num": 0.031494140625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 89170248, + "step": 1592 + }, + { + "epoch": 3.5478841870824054, + "grad_norm": 15.999966621398926, + "learning_rate": 1e-06, + "loss": 0.7778, + "num_input_tokens_seen": 89226156, + "step": 1593 + }, + { + "epoch": 3.5478841870824054, + "loss": 0.9013996124267578, + "loss_ce": 0.0002766078105196357, + "loss_iou": 0.3984375, + "loss_num": 0.0203857421875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 89226156, + "step": 1593 + }, + { + "epoch": 3.5501113585746102, + "grad_norm": 39.26863479614258, + "learning_rate": 1e-06, + "loss": 1.2987, + "num_input_tokens_seen": 89281032, + "step": 1594 + }, + { + "epoch": 3.5501113585746102, + "loss": 1.2463330030441284, + "loss_ce": 0.00048335548490285873, + "loss_iou": 0.466796875, + "loss_num": 0.06298828125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 89281032, + "step": 1594 + }, + { + "epoch": 3.552338530066815, + "grad_norm": 18.40961456298828, + "learning_rate": 1e-06, + "loss": 1.1405, + "num_input_tokens_seen": 89338460, + "step": 1595 + }, + { + "epoch": 3.552338530066815, + "loss": 1.0916733741760254, + "loss_ce": 0.000364768726285547, + "loss_iou": 0.466796875, + "loss_num": 0.031494140625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 89338460, + "step": 1595 + }, + { + "epoch": 3.55456570155902, + "grad_norm": 21.472373962402344, + "learning_rate": 1e-06, + "loss": 0.9278, + "num_input_tokens_seen": 89396420, + "step": 1596 + }, + { + "epoch": 3.55456570155902, + "loss": 1.1413955688476562, + "loss_ce": 0.0002824011608026922, + "loss_iou": 0.474609375, + "loss_num": 0.03857421875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 89396420, + "step": 1596 + }, + { + "epoch": 3.556792873051225, + "grad_norm": 14.41667366027832, + "learning_rate": 1e-06, + "loss": 0.8159, + "num_input_tokens_seen": 89450448, + "step": 1597 + }, + { + "epoch": 3.556792873051225, + "loss": 1.1159937381744385, + "loss_ce": 0.00027105180197395384, + "loss_iou": 0.447265625, + "loss_num": 0.044677734375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 89450448, + "step": 1597 + }, + { + "epoch": 3.5590200445434297, + "grad_norm": 20.48485565185547, + "learning_rate": 1e-06, + "loss": 1.0086, + "num_input_tokens_seen": 89508572, + "step": 1598 + }, + { + "epoch": 3.5590200445434297, + "loss": 0.9943912029266357, + "loss_ce": 0.00025058950996026397, + "loss_iou": 0.41796875, + "loss_num": 0.031982421875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 89508572, + "step": 1598 + }, + { + "epoch": 3.5612472160356345, + "grad_norm": 33.4490852355957, + "learning_rate": 1e-06, + "loss": 1.206, + "num_input_tokens_seen": 89561964, + "step": 1599 + }, + { + "epoch": 3.5612472160356345, + "loss": 1.290135383605957, + "loss_ce": 0.0003403525915928185, + "loss_iou": 0.50390625, + "loss_num": 0.055908203125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 89561964, + "step": 1599 + }, + { + "epoch": 3.5634743875278394, + "grad_norm": 14.543143272399902, + "learning_rate": 1e-06, + "loss": 0.748, + "num_input_tokens_seen": 89616600, + "step": 1600 + }, + { + "epoch": 3.5634743875278394, + "loss": 0.8427092432975769, + "loss_ce": 0.0009123453055508435, + "loss_iou": 0.328125, + "loss_num": 0.036865234375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 89616600, + "step": 1600 + }, + { + "epoch": 3.5657015590200447, + "grad_norm": 16.943578720092773, + "learning_rate": 1e-06, + "loss": 0.8685, + "num_input_tokens_seen": 89672620, + "step": 1601 + }, + { + "epoch": 3.5657015590200447, + "loss": 0.8163316249847412, + "loss_ce": 0.00041370143298991024, + "loss_iou": 0.337890625, + "loss_num": 0.028076171875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 89672620, + "step": 1601 + }, + { + "epoch": 3.5679287305122496, + "grad_norm": 16.56560707092285, + "learning_rate": 1e-06, + "loss": 0.8267, + "num_input_tokens_seen": 89727488, + "step": 1602 + }, + { + "epoch": 3.5679287305122496, + "loss": 0.9919605255126953, + "loss_ce": 0.000261296343524009, + "loss_iou": 0.423828125, + "loss_num": 0.0291748046875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 89727488, + "step": 1602 + }, + { + "epoch": 3.5701559020044544, + "grad_norm": 17.976058959960938, + "learning_rate": 1e-06, + "loss": 0.8624, + "num_input_tokens_seen": 89781992, + "step": 1603 + }, + { + "epoch": 3.5701559020044544, + "loss": 0.9566484093666077, + "loss_ce": 0.0003496097633615136, + "loss_iou": 0.40234375, + "loss_num": 0.030029296875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 89781992, + "step": 1603 + }, + { + "epoch": 3.5723830734966593, + "grad_norm": 21.834300994873047, + "learning_rate": 1e-06, + "loss": 1.0587, + "num_input_tokens_seen": 89837948, + "step": 1604 + }, + { + "epoch": 3.5723830734966593, + "loss": 1.1624748706817627, + "loss_ce": 0.00036550615914165974, + "loss_iou": 0.50390625, + "loss_num": 0.0302734375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 89837948, + "step": 1604 + }, + { + "epoch": 3.574610244988864, + "grad_norm": 33.87417221069336, + "learning_rate": 1e-06, + "loss": 0.9384, + "num_input_tokens_seen": 89892432, + "step": 1605 + }, + { + "epoch": 3.574610244988864, + "loss": 1.0320757627487183, + "loss_ce": 0.0003374941006768495, + "loss_iou": 0.4453125, + "loss_num": 0.0284423828125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 89892432, + "step": 1605 + }, + { + "epoch": 3.576837416481069, + "grad_norm": 31.87815284729004, + "learning_rate": 1e-06, + "loss": 1.0271, + "num_input_tokens_seen": 89949104, + "step": 1606 + }, + { + "epoch": 3.576837416481069, + "loss": 0.6741030216217041, + "loss_ce": 0.0002749357954598963, + "loss_iou": 0.302734375, + "loss_num": 0.01397705078125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 89949104, + "step": 1606 + }, + { + "epoch": 3.579064587973274, + "grad_norm": 19.64597511291504, + "learning_rate": 1e-06, + "loss": 0.688, + "num_input_tokens_seen": 90007552, + "step": 1607 + }, + { + "epoch": 3.579064587973274, + "loss": 0.7588503956794739, + "loss_ce": 0.0003054735716432333, + "loss_iou": 0.345703125, + "loss_num": 0.0135498046875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 90007552, + "step": 1607 + }, + { + "epoch": 3.5812917594654787, + "grad_norm": 16.6953067779541, + "learning_rate": 1e-06, + "loss": 0.841, + "num_input_tokens_seen": 90065300, + "step": 1608 + }, + { + "epoch": 3.5812917594654787, + "loss": 1.0172404050827026, + "loss_ce": 0.00039471167838200927, + "loss_iou": 0.42578125, + "loss_num": 0.03271484375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 90065300, + "step": 1608 + }, + { + "epoch": 3.5835189309576836, + "grad_norm": 18.6095027923584, + "learning_rate": 1e-06, + "loss": 0.8472, + "num_input_tokens_seen": 90117120, + "step": 1609 + }, + { + "epoch": 3.5835189309576836, + "loss": 0.7410043478012085, + "loss_ce": 0.00028169897268526256, + "loss_iou": 0.310546875, + "loss_num": 0.023681640625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 90117120, + "step": 1609 + }, + { + "epoch": 3.585746102449889, + "grad_norm": 19.11724090576172, + "learning_rate": 1e-06, + "loss": 0.9566, + "num_input_tokens_seen": 90171580, + "step": 1610 + }, + { + "epoch": 3.585746102449889, + "loss": 0.7324100136756897, + "loss_ce": 0.0004764144541695714, + "loss_iou": 0.30859375, + "loss_num": 0.0233154296875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 90171580, + "step": 1610 + }, + { + "epoch": 3.5879732739420938, + "grad_norm": 47.91438674926758, + "learning_rate": 1e-06, + "loss": 0.9755, + "num_input_tokens_seen": 90227144, + "step": 1611 + }, + { + "epoch": 3.5879732739420938, + "loss": 1.2943049669265747, + "loss_ce": 0.000359668250894174, + "loss_iou": 0.54296875, + "loss_num": 0.04248046875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 90227144, + "step": 1611 + }, + { + "epoch": 3.5902004454342986, + "grad_norm": 14.350851058959961, + "learning_rate": 1e-06, + "loss": 0.7686, + "num_input_tokens_seen": 90285220, + "step": 1612 + }, + { + "epoch": 3.5902004454342986, + "loss": 0.6935353875160217, + "loss_ce": 0.0004201638512313366, + "loss_iou": 0.3046875, + "loss_num": 0.0166015625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 90285220, + "step": 1612 + }, + { + "epoch": 3.5924276169265035, + "grad_norm": 87.72432708740234, + "learning_rate": 1e-06, + "loss": 0.9391, + "num_input_tokens_seen": 90341444, + "step": 1613 + }, + { + "epoch": 3.5924276169265035, + "loss": 0.8298938274383545, + "loss_ce": 0.00030400152900256217, + "loss_iou": 0.345703125, + "loss_num": 0.0274658203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 90341444, + "step": 1613 + }, + { + "epoch": 3.5946547884187083, + "grad_norm": 18.14371681213379, + "learning_rate": 1e-06, + "loss": 0.8685, + "num_input_tokens_seen": 90398880, + "step": 1614 + }, + { + "epoch": 3.5946547884187083, + "loss": 0.9817934632301331, + "loss_ce": 0.0003481835883576423, + "loss_iou": 0.400390625, + "loss_num": 0.0361328125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 90398880, + "step": 1614 + }, + { + "epoch": 3.596881959910913, + "grad_norm": 18.2519588470459, + "learning_rate": 1e-06, + "loss": 0.8095, + "num_input_tokens_seen": 90453864, + "step": 1615 + }, + { + "epoch": 3.596881959910913, + "loss": 0.7134523391723633, + "loss_ce": 0.00031755882082507014, + "loss_iou": 0.31640625, + "loss_num": 0.016357421875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 90453864, + "step": 1615 + }, + { + "epoch": 3.599109131403118, + "grad_norm": 11.235553741455078, + "learning_rate": 1e-06, + "loss": 0.9908, + "num_input_tokens_seen": 90508336, + "step": 1616 + }, + { + "epoch": 3.599109131403118, + "loss": 1.2815724611282349, + "loss_ce": 0.0003224927349947393, + "loss_iou": 0.5390625, + "loss_num": 0.041015625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 90508336, + "step": 1616 + }, + { + "epoch": 3.601336302895323, + "grad_norm": 19.534269332885742, + "learning_rate": 1e-06, + "loss": 0.9323, + "num_input_tokens_seen": 90565836, + "step": 1617 + }, + { + "epoch": 3.601336302895323, + "loss": 1.1978850364685059, + "loss_ce": 0.0006193737499415874, + "loss_iou": 0.435546875, + "loss_num": 0.06494140625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 90565836, + "step": 1617 + }, + { + "epoch": 3.6035634743875278, + "grad_norm": 16.99414825439453, + "learning_rate": 1e-06, + "loss": 0.791, + "num_input_tokens_seen": 90621756, + "step": 1618 + }, + { + "epoch": 3.6035634743875278, + "loss": 0.6094855070114136, + "loss_ce": 0.0003546725492924452, + "loss_iou": 0.265625, + "loss_num": 0.015869140625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 90621756, + "step": 1618 + }, + { + "epoch": 3.6057906458797326, + "grad_norm": 12.391351699829102, + "learning_rate": 1e-06, + "loss": 0.736, + "num_input_tokens_seen": 90679300, + "step": 1619 + }, + { + "epoch": 3.6057906458797326, + "loss": 0.6363743543624878, + "loss_ce": 0.0002659702149685472, + "loss_iou": 0.26953125, + "loss_num": 0.01904296875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 90679300, + "step": 1619 + }, + { + "epoch": 3.6080178173719375, + "grad_norm": 19.21337127685547, + "learning_rate": 1e-06, + "loss": 0.9495, + "num_input_tokens_seen": 90738552, + "step": 1620 + }, + { + "epoch": 3.6080178173719375, + "loss": 0.9752398729324341, + "loss_ce": 0.00038639939157292247, + "loss_iou": 0.41796875, + "loss_num": 0.0279541015625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 90738552, + "step": 1620 + }, + { + "epoch": 3.6102449888641424, + "grad_norm": 14.483777046203613, + "learning_rate": 1e-06, + "loss": 0.7827, + "num_input_tokens_seen": 90796600, + "step": 1621 + }, + { + "epoch": 3.6102449888641424, + "loss": 0.7163407802581787, + "loss_ce": 0.00027631394914351404, + "loss_iou": 0.298828125, + "loss_num": 0.0235595703125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 90796600, + "step": 1621 + }, + { + "epoch": 3.612472160356347, + "grad_norm": 18.561866760253906, + "learning_rate": 1e-06, + "loss": 0.9083, + "num_input_tokens_seen": 90852108, + "step": 1622 + }, + { + "epoch": 3.612472160356347, + "loss": 0.7986100912094116, + "loss_ce": 0.00027019885601475835, + "loss_iou": 0.345703125, + "loss_num": 0.021240234375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 90852108, + "step": 1622 + }, + { + "epoch": 3.614699331848552, + "grad_norm": 13.72581672668457, + "learning_rate": 1e-06, + "loss": 0.9149, + "num_input_tokens_seen": 90909780, + "step": 1623 + }, + { + "epoch": 3.614699331848552, + "loss": 0.8790796995162964, + "loss_ce": 0.0002955020754598081, + "loss_iou": 0.3515625, + "loss_num": 0.035400390625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 90909780, + "step": 1623 + }, + { + "epoch": 3.6169265033407574, + "grad_norm": 19.92896842956543, + "learning_rate": 1e-06, + "loss": 0.8137, + "num_input_tokens_seen": 90964636, + "step": 1624 + }, + { + "epoch": 3.6169265033407574, + "loss": 0.4973217248916626, + "loss_ce": 0.00025139018543995917, + "loss_iou": 0.2001953125, + "loss_num": 0.0194091796875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 90964636, + "step": 1624 + }, + { + "epoch": 3.6191536748329622, + "grad_norm": 24.191030502319336, + "learning_rate": 1e-06, + "loss": 0.8298, + "num_input_tokens_seen": 91020328, + "step": 1625 + }, + { + "epoch": 3.6191536748329622, + "loss": 0.8796758651733398, + "loss_ce": 0.0002813548780977726, + "loss_iou": 0.34765625, + "loss_num": 0.036865234375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 91020328, + "step": 1625 + }, + { + "epoch": 3.621380846325167, + "grad_norm": 18.71809959411621, + "learning_rate": 1e-06, + "loss": 0.7477, + "num_input_tokens_seen": 91076856, + "step": 1626 + }, + { + "epoch": 3.621380846325167, + "loss": 0.6939845085144043, + "loss_ce": 0.0003809723712038249, + "loss_iou": 0.27734375, + "loss_num": 0.027587890625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 91076856, + "step": 1626 + }, + { + "epoch": 3.623608017817372, + "grad_norm": 15.862205505371094, + "learning_rate": 1e-06, + "loss": 0.8313, + "num_input_tokens_seen": 91136100, + "step": 1627 + }, + { + "epoch": 3.623608017817372, + "loss": 0.8305182456970215, + "loss_ce": 0.0007453373400494456, + "loss_iou": 0.3359375, + "loss_num": 0.0311279296875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 91136100, + "step": 1627 + }, + { + "epoch": 3.625835189309577, + "grad_norm": 23.61324691772461, + "learning_rate": 1e-06, + "loss": 0.8207, + "num_input_tokens_seen": 91191620, + "step": 1628 + }, + { + "epoch": 3.625835189309577, + "loss": 0.6336687207221985, + "loss_ce": 0.0002458438102621585, + "loss_iou": 0.27734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 91191620, + "step": 1628 + }, + { + "epoch": 3.6280623608017817, + "grad_norm": 18.18329620361328, + "learning_rate": 1e-06, + "loss": 0.9281, + "num_input_tokens_seen": 91250588, + "step": 1629 + }, + { + "epoch": 3.6280623608017817, + "loss": 1.0516316890716553, + "loss_ce": 0.0003620931529439986, + "loss_iou": 0.421875, + "loss_num": 0.04150390625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 91250588, + "step": 1629 + }, + { + "epoch": 3.6302895322939865, + "grad_norm": 18.133012771606445, + "learning_rate": 1e-06, + "loss": 0.5779, + "num_input_tokens_seen": 91308444, + "step": 1630 + }, + { + "epoch": 3.6302895322939865, + "loss": 0.7188634872436523, + "loss_ce": 0.0002355621982133016, + "loss_iou": 0.29296875, + "loss_num": 0.026611328125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 91308444, + "step": 1630 + }, + { + "epoch": 3.6325167037861914, + "grad_norm": 21.32465934753418, + "learning_rate": 1e-06, + "loss": 0.7386, + "num_input_tokens_seen": 91363304, + "step": 1631 + }, + { + "epoch": 3.6325167037861914, + "loss": 0.7637060284614563, + "loss_ce": 0.0002782873052638024, + "loss_iou": 0.337890625, + "loss_num": 0.0172119140625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 91363304, + "step": 1631 + }, + { + "epoch": 3.6347438752783967, + "grad_norm": 15.17698860168457, + "learning_rate": 1e-06, + "loss": 1.0197, + "num_input_tokens_seen": 91417932, + "step": 1632 + }, + { + "epoch": 3.6347438752783967, + "loss": 0.6608976125717163, + "loss_ce": 0.0002531085046939552, + "loss_iou": 0.244140625, + "loss_num": 0.034423828125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 91417932, + "step": 1632 + }, + { + "epoch": 3.6369710467706016, + "grad_norm": 19.845325469970703, + "learning_rate": 1e-06, + "loss": 0.737, + "num_input_tokens_seen": 91474376, + "step": 1633 + }, + { + "epoch": 3.6369710467706016, + "loss": 0.8098554611206055, + "loss_ce": 0.0002851108438335359, + "loss_iou": 0.3359375, + "loss_num": 0.02783203125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 91474376, + "step": 1633 + }, + { + "epoch": 3.6391982182628064, + "grad_norm": 18.74712371826172, + "learning_rate": 1e-06, + "loss": 0.8073, + "num_input_tokens_seen": 91529408, + "step": 1634 + }, + { + "epoch": 3.6391982182628064, + "loss": 0.799332857131958, + "loss_ce": 0.0002606004709377885, + "loss_iou": 0.33984375, + "loss_num": 0.024169921875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 91529408, + "step": 1634 + }, + { + "epoch": 3.6414253897550113, + "grad_norm": 16.073997497558594, + "learning_rate": 1e-06, + "loss": 0.6983, + "num_input_tokens_seen": 91584964, + "step": 1635 + }, + { + "epoch": 3.6414253897550113, + "loss": 0.5875762104988098, + "loss_ce": 0.000418026524130255, + "loss_iou": 0.271484375, + "loss_num": 0.0091552734375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 91584964, + "step": 1635 + }, + { + "epoch": 3.643652561247216, + "grad_norm": 26.88153076171875, + "learning_rate": 1e-06, + "loss": 0.737, + "num_input_tokens_seen": 91642164, + "step": 1636 + }, + { + "epoch": 3.643652561247216, + "loss": 0.8176705837249756, + "loss_ce": 0.00028783181915059686, + "loss_iou": 0.33203125, + "loss_num": 0.03076171875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 91642164, + "step": 1636 + }, + { + "epoch": 3.645879732739421, + "grad_norm": 29.69915771484375, + "learning_rate": 1e-06, + "loss": 1.0198, + "num_input_tokens_seen": 91697608, + "step": 1637 + }, + { + "epoch": 3.645879732739421, + "loss": 0.971588134765625, + "loss_ce": 0.0003966961521655321, + "loss_iou": 0.39453125, + "loss_num": 0.0361328125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 91697608, + "step": 1637 + }, + { + "epoch": 3.648106904231626, + "grad_norm": 14.602875709533691, + "learning_rate": 1e-06, + "loss": 0.7628, + "num_input_tokens_seen": 91755904, + "step": 1638 + }, + { + "epoch": 3.648106904231626, + "loss": 0.7990860342979431, + "loss_ce": 0.0002579537685960531, + "loss_iou": 0.34765625, + "loss_num": 0.0208740234375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 91755904, + "step": 1638 + }, + { + "epoch": 3.6503340757238307, + "grad_norm": 19.639419555664062, + "learning_rate": 1e-06, + "loss": 0.9729, + "num_input_tokens_seen": 91813308, + "step": 1639 + }, + { + "epoch": 3.6503340757238307, + "loss": 0.7160975933074951, + "loss_ce": 0.0002772239677142352, + "loss_iou": 0.30078125, + "loss_num": 0.0225830078125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 91813308, + "step": 1639 + }, + { + "epoch": 3.6525612472160356, + "grad_norm": 26.63214683532715, + "learning_rate": 1e-06, + "loss": 0.7171, + "num_input_tokens_seen": 91870888, + "step": 1640 + }, + { + "epoch": 3.6525612472160356, + "loss": 0.6993926763534546, + "loss_ce": 0.0004180770483799279, + "loss_iou": 0.30859375, + "loss_num": 0.0166015625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 91870888, + "step": 1640 + }, + { + "epoch": 3.6547884187082404, + "grad_norm": 24.945594787597656, + "learning_rate": 1e-06, + "loss": 0.7824, + "num_input_tokens_seen": 91928644, + "step": 1641 + }, + { + "epoch": 3.6547884187082404, + "loss": 0.8066414594650269, + "loss_ce": 0.00036706856917589903, + "loss_iou": 0.322265625, + "loss_num": 0.0322265625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 91928644, + "step": 1641 + }, + { + "epoch": 3.6570155902004453, + "grad_norm": 28.210344314575195, + "learning_rate": 1e-06, + "loss": 0.7365, + "num_input_tokens_seen": 91982148, + "step": 1642 + }, + { + "epoch": 3.6570155902004453, + "loss": 0.6948254704475403, + "loss_ce": 0.0014660632004961371, + "loss_iou": 0.3046875, + "loss_num": 0.016357421875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 91982148, + "step": 1642 + }, + { + "epoch": 3.65924276169265, + "grad_norm": 83.1476821899414, + "learning_rate": 1e-06, + "loss": 0.8233, + "num_input_tokens_seen": 92038604, + "step": 1643 + }, + { + "epoch": 3.65924276169265, + "loss": 1.0671532154083252, + "loss_ce": 0.000502894283272326, + "loss_iou": 0.453125, + "loss_num": 0.031982421875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 92038604, + "step": 1643 + }, + { + "epoch": 3.661469933184855, + "grad_norm": 19.53736114501953, + "learning_rate": 1e-06, + "loss": 0.6483, + "num_input_tokens_seen": 92094664, + "step": 1644 + }, + { + "epoch": 3.661469933184855, + "loss": 0.4442579448223114, + "loss_ce": 0.0002882296103052795, + "loss_iou": 0.1650390625, + "loss_num": 0.022705078125, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 92094664, + "step": 1644 + }, + { + "epoch": 3.66369710467706, + "grad_norm": 30.144088745117188, + "learning_rate": 1e-06, + "loss": 0.8099, + "num_input_tokens_seen": 92149264, + "step": 1645 + }, + { + "epoch": 3.66369710467706, + "loss": 0.6731590032577515, + "loss_ce": 0.0003074193373322487, + "loss_iou": 0.275390625, + "loss_num": 0.0242919921875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 92149264, + "step": 1645 + }, + { + "epoch": 3.665924276169265, + "grad_norm": 24.664552688598633, + "learning_rate": 1e-06, + "loss": 0.9853, + "num_input_tokens_seen": 92207820, + "step": 1646 + }, + { + "epoch": 3.665924276169265, + "loss": 0.8198744654655457, + "loss_ce": 0.000294416124233976, + "loss_iou": 0.3515625, + "loss_num": 0.0235595703125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 92207820, + "step": 1646 + }, + { + "epoch": 3.66815144766147, + "grad_norm": 17.04414939880371, + "learning_rate": 1e-06, + "loss": 0.6132, + "num_input_tokens_seen": 92263900, + "step": 1647 + }, + { + "epoch": 3.66815144766147, + "loss": 0.5955994725227356, + "loss_ce": 0.0003235829062759876, + "loss_iou": 0.2451171875, + "loss_num": 0.0211181640625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 92263900, + "step": 1647 + }, + { + "epoch": 3.670378619153675, + "grad_norm": 26.547033309936523, + "learning_rate": 1e-06, + "loss": 0.7967, + "num_input_tokens_seen": 92319188, + "step": 1648 + }, + { + "epoch": 3.670378619153675, + "loss": 0.6533565521240234, + "loss_ce": 0.00028039264725521207, + "loss_iou": 0.291015625, + "loss_num": 0.01446533203125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 92319188, + "step": 1648 + }, + { + "epoch": 3.6726057906458798, + "grad_norm": 20.542510986328125, + "learning_rate": 1e-06, + "loss": 0.804, + "num_input_tokens_seen": 92375980, + "step": 1649 + }, + { + "epoch": 3.6726057906458798, + "loss": 0.8124038577079773, + "loss_ce": 0.00039214000571519136, + "loss_iou": 0.30859375, + "loss_num": 0.0390625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 92375980, + "step": 1649 + }, + { + "epoch": 3.6748329621380846, + "grad_norm": 77.64947509765625, + "learning_rate": 1e-06, + "loss": 1.049, + "num_input_tokens_seen": 92427792, + "step": 1650 + }, + { + "epoch": 3.6748329621380846, + "loss": 0.7283252477645874, + "loss_ce": 0.00029791187262162566, + "loss_iou": 0.31640625, + "loss_num": 0.019287109375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 92427792, + "step": 1650 + }, + { + "epoch": 3.6770601336302895, + "grad_norm": 20.271570205688477, + "learning_rate": 1e-06, + "loss": 0.7777, + "num_input_tokens_seen": 92484212, + "step": 1651 + }, + { + "epoch": 3.6770601336302895, + "loss": 0.6192773580551147, + "loss_ce": 0.00025881448527798057, + "loss_iou": 0.267578125, + "loss_num": 0.0172119140625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 92484212, + "step": 1651 + }, + { + "epoch": 3.6792873051224944, + "grad_norm": 15.280570030212402, + "learning_rate": 1e-06, + "loss": 0.7805, + "num_input_tokens_seen": 92538032, + "step": 1652 + }, + { + "epoch": 3.6792873051224944, + "loss": 0.6725317239761353, + "loss_ce": 0.00029047008138149977, + "loss_iou": 0.283203125, + "loss_num": 0.0211181640625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 92538032, + "step": 1652 + }, + { + "epoch": 3.681514476614699, + "grad_norm": 17.816753387451172, + "learning_rate": 1e-06, + "loss": 1.012, + "num_input_tokens_seen": 92592792, + "step": 1653 + }, + { + "epoch": 3.681514476614699, + "loss": 0.8765089511871338, + "loss_ce": 0.0002882035914808512, + "loss_iou": 0.384765625, + "loss_num": 0.0213623046875, + "loss_xval": 0.875, + "num_input_tokens_seen": 92592792, + "step": 1653 + }, + { + "epoch": 3.683741648106904, + "grad_norm": 34.85762405395508, + "learning_rate": 1e-06, + "loss": 0.9599, + "num_input_tokens_seen": 92649552, + "step": 1654 + }, + { + "epoch": 3.683741648106904, + "loss": 1.0684025287628174, + "loss_ce": 0.00028726100572384894, + "loss_iou": 0.453125, + "loss_num": 0.0322265625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 92649552, + "step": 1654 + }, + { + "epoch": 3.6859688195991094, + "grad_norm": 16.367671966552734, + "learning_rate": 1e-06, + "loss": 0.7476, + "num_input_tokens_seen": 92705912, + "step": 1655 + }, + { + "epoch": 3.6859688195991094, + "loss": 0.467061311006546, + "loss_ce": 0.0002644392370712012, + "loss_iou": 0.2080078125, + "loss_num": 0.01025390625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 92705912, + "step": 1655 + }, + { + "epoch": 3.6881959910913142, + "grad_norm": 28.718902587890625, + "learning_rate": 1e-06, + "loss": 0.8724, + "num_input_tokens_seen": 92764576, + "step": 1656 + }, + { + "epoch": 3.6881959910913142, + "loss": 0.7795916795730591, + "loss_ce": 0.0002947830653283745, + "loss_iou": 0.328125, + "loss_num": 0.0244140625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 92764576, + "step": 1656 + }, + { + "epoch": 3.690423162583519, + "grad_norm": 25.93492317199707, + "learning_rate": 1e-06, + "loss": 0.9498, + "num_input_tokens_seen": 92818528, + "step": 1657 + }, + { + "epoch": 3.690423162583519, + "loss": 1.08326256275177, + "loss_ce": 0.00025476596783846617, + "loss_iou": 0.474609375, + "loss_num": 0.026611328125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 92818528, + "step": 1657 + }, + { + "epoch": 3.692650334075724, + "grad_norm": 16.00531768798828, + "learning_rate": 1e-06, + "loss": 1.0849, + "num_input_tokens_seen": 92874168, + "step": 1658 + }, + { + "epoch": 3.692650334075724, + "loss": 0.8632932901382446, + "loss_ce": 0.0002562026202213019, + "loss_iou": 0.384765625, + "loss_num": 0.018798828125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 92874168, + "step": 1658 + }, + { + "epoch": 3.694877505567929, + "grad_norm": 48.529300689697266, + "learning_rate": 1e-06, + "loss": 0.9435, + "num_input_tokens_seen": 92926172, + "step": 1659 + }, + { + "epoch": 3.694877505567929, + "loss": 0.9439380168914795, + "loss_ce": 0.000822762493044138, + "loss_iou": 0.40234375, + "loss_num": 0.02783203125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 92926172, + "step": 1659 + }, + { + "epoch": 3.6971046770601337, + "grad_norm": 16.016910552978516, + "learning_rate": 1e-06, + "loss": 0.7698, + "num_input_tokens_seen": 92981500, + "step": 1660 + }, + { + "epoch": 3.6971046770601337, + "loss": 0.7871308326721191, + "loss_ce": 0.0002656061842571944, + "loss_iou": 0.341796875, + "loss_num": 0.0201416015625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 92981500, + "step": 1660 + }, + { + "epoch": 3.6993318485523385, + "grad_norm": 18.289608001708984, + "learning_rate": 1e-06, + "loss": 1.0147, + "num_input_tokens_seen": 93036804, + "step": 1661 + }, + { + "epoch": 3.6993318485523385, + "loss": 1.0226683616638184, + "loss_ce": 0.00032947887666523457, + "loss_iou": 0.4296875, + "loss_num": 0.031982421875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 93036804, + "step": 1661 + }, + { + "epoch": 3.7015590200445434, + "grad_norm": 15.377629280090332, + "learning_rate": 1e-06, + "loss": 0.7615, + "num_input_tokens_seen": 93093888, + "step": 1662 + }, + { + "epoch": 3.7015590200445434, + "loss": 0.8022950887680054, + "loss_ce": 0.00029318686574697495, + "loss_iou": 0.328125, + "loss_num": 0.0291748046875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 93093888, + "step": 1662 + }, + { + "epoch": 3.7037861915367483, + "grad_norm": 16.651514053344727, + "learning_rate": 1e-06, + "loss": 0.8527, + "num_input_tokens_seen": 93151972, + "step": 1663 + }, + { + "epoch": 3.7037861915367483, + "loss": 0.8089163303375244, + "loss_ce": 0.0003225764958187938, + "loss_iou": 0.353515625, + "loss_num": 0.0203857421875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 93151972, + "step": 1663 + }, + { + "epoch": 3.706013363028953, + "grad_norm": 87.34024047851562, + "learning_rate": 1e-06, + "loss": 0.9063, + "num_input_tokens_seen": 93208668, + "step": 1664 + }, + { + "epoch": 3.706013363028953, + "loss": 0.6086684465408325, + "loss_ce": 0.00026995883672498167, + "loss_iou": 0.25390625, + "loss_num": 0.019775390625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 93208668, + "step": 1664 + }, + { + "epoch": 3.708240534521158, + "grad_norm": 22.259021759033203, + "learning_rate": 1e-06, + "loss": 0.9715, + "num_input_tokens_seen": 93266120, + "step": 1665 + }, + { + "epoch": 3.708240534521158, + "loss": 1.0723727941513062, + "loss_ce": 0.0003513463889248669, + "loss_iou": 0.4453125, + "loss_num": 0.03662109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 93266120, + "step": 1665 + }, + { + "epoch": 3.710467706013363, + "grad_norm": 13.237698554992676, + "learning_rate": 1e-06, + "loss": 0.7824, + "num_input_tokens_seen": 93322796, + "step": 1666 + }, + { + "epoch": 3.710467706013363, + "loss": 0.803238034248352, + "loss_ce": 0.0002595084370113909, + "loss_iou": 0.33984375, + "loss_num": 0.0245361328125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 93322796, + "step": 1666 + }, + { + "epoch": 3.7126948775055677, + "grad_norm": 30.929845809936523, + "learning_rate": 1e-06, + "loss": 0.9966, + "num_input_tokens_seen": 93378408, + "step": 1667 + }, + { + "epoch": 3.7126948775055677, + "loss": 0.9339191913604736, + "loss_ce": 0.0003254116454627365, + "loss_iou": 0.423828125, + "loss_num": 0.0172119140625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 93378408, + "step": 1667 + }, + { + "epoch": 3.7149220489977726, + "grad_norm": 17.648576736450195, + "learning_rate": 1e-06, + "loss": 0.9418, + "num_input_tokens_seen": 93435448, + "step": 1668 + }, + { + "epoch": 3.7149220489977726, + "loss": 0.8993349075317383, + "loss_ce": 0.0013856550212949514, + "loss_iou": 0.373046875, + "loss_num": 0.0306396484375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 93435448, + "step": 1668 + }, + { + "epoch": 3.717149220489978, + "grad_norm": 18.18744659423828, + "learning_rate": 1e-06, + "loss": 0.5543, + "num_input_tokens_seen": 93490744, + "step": 1669 + }, + { + "epoch": 3.717149220489978, + "loss": 0.5734395980834961, + "loss_ce": 0.00044156977673992515, + "loss_iou": 0.2470703125, + "loss_num": 0.0157470703125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 93490744, + "step": 1669 + }, + { + "epoch": 3.7193763919821827, + "grad_norm": 26.080154418945312, + "learning_rate": 1e-06, + "loss": 0.8468, + "num_input_tokens_seen": 93546192, + "step": 1670 + }, + { + "epoch": 3.7193763919821827, + "loss": 1.0555299520492554, + "loss_ce": 0.00035410295822657645, + "loss_iou": 0.455078125, + "loss_num": 0.0286865234375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 93546192, + "step": 1670 + }, + { + "epoch": 3.7216035634743876, + "grad_norm": 19.251693725585938, + "learning_rate": 1e-06, + "loss": 0.9076, + "num_input_tokens_seen": 93604348, + "step": 1671 + }, + { + "epoch": 3.7216035634743876, + "loss": 0.8526378273963928, + "loss_ce": 0.00034295275690965354, + "loss_iou": 0.361328125, + "loss_num": 0.0263671875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 93604348, + "step": 1671 + }, + { + "epoch": 3.7238307349665924, + "grad_norm": 15.625646591186523, + "learning_rate": 1e-06, + "loss": 0.881, + "num_input_tokens_seen": 93659012, + "step": 1672 + }, + { + "epoch": 3.7238307349665924, + "loss": 0.9302690625190735, + "loss_ce": 0.00033744628308340907, + "loss_iou": 0.3984375, + "loss_num": 0.0262451171875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 93659012, + "step": 1672 + }, + { + "epoch": 3.7260579064587973, + "grad_norm": 47.977176666259766, + "learning_rate": 1e-06, + "loss": 1.0519, + "num_input_tokens_seen": 93710680, + "step": 1673 + }, + { + "epoch": 3.7260579064587973, + "loss": 0.8930314779281616, + "loss_ce": 0.0006975086871534586, + "loss_iou": 0.375, + "loss_num": 0.028564453125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 93710680, + "step": 1673 + }, + { + "epoch": 3.728285077951002, + "grad_norm": 27.666378021240234, + "learning_rate": 1e-06, + "loss": 0.8055, + "num_input_tokens_seen": 93766956, + "step": 1674 + }, + { + "epoch": 3.728285077951002, + "loss": 0.6055097579956055, + "loss_ce": 0.000285172660369426, + "loss_iou": 0.2490234375, + "loss_num": 0.0213623046875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 93766956, + "step": 1674 + }, + { + "epoch": 3.730512249443207, + "grad_norm": 28.261253356933594, + "learning_rate": 1e-06, + "loss": 0.6353, + "num_input_tokens_seen": 93824432, + "step": 1675 + }, + { + "epoch": 3.730512249443207, + "loss": 0.7097867727279663, + "loss_ce": 0.00031414441764354706, + "loss_iou": 0.30078125, + "loss_num": 0.021240234375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 93824432, + "step": 1675 + }, + { + "epoch": 3.732739420935412, + "grad_norm": 17.860763549804688, + "learning_rate": 1e-06, + "loss": 0.7996, + "num_input_tokens_seen": 93882600, + "step": 1676 + }, + { + "epoch": 3.732739420935412, + "loss": 0.6038820743560791, + "loss_ce": 0.00021385436411947012, + "loss_iou": 0.24609375, + "loss_num": 0.022216796875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 93882600, + "step": 1676 + }, + { + "epoch": 3.734966592427617, + "grad_norm": 22.68053436279297, + "learning_rate": 1e-06, + "loss": 1.0956, + "num_input_tokens_seen": 93938548, + "step": 1677 + }, + { + "epoch": 3.734966592427617, + "loss": 0.991020917892456, + "loss_ce": 0.00029826798709109426, + "loss_iou": 0.44140625, + "loss_num": 0.0216064453125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 93938548, + "step": 1677 + }, + { + "epoch": 3.737193763919822, + "grad_norm": 22.026281356811523, + "learning_rate": 1e-06, + "loss": 0.7681, + "num_input_tokens_seen": 93993212, + "step": 1678 + }, + { + "epoch": 3.737193763919822, + "loss": 0.945850670337677, + "loss_ce": 0.000294047174975276, + "loss_iou": 0.365234375, + "loss_num": 0.04296875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 93993212, + "step": 1678 + }, + { + "epoch": 3.739420935412027, + "grad_norm": 25.18975067138672, + "learning_rate": 1e-06, + "loss": 0.8155, + "num_input_tokens_seen": 94051056, + "step": 1679 + }, + { + "epoch": 3.739420935412027, + "loss": 0.9602963328361511, + "loss_ce": 0.00106780044734478, + "loss_iou": 0.384765625, + "loss_num": 0.038330078125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 94051056, + "step": 1679 + }, + { + "epoch": 3.7416481069042318, + "grad_norm": 22.7869873046875, + "learning_rate": 1e-06, + "loss": 0.7987, + "num_input_tokens_seen": 94110436, + "step": 1680 + }, + { + "epoch": 3.7416481069042318, + "loss": 0.8000843524932861, + "loss_ce": 0.0002796592016238719, + "loss_iou": 0.337890625, + "loss_num": 0.024658203125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 94110436, + "step": 1680 + }, + { + "epoch": 3.7438752783964366, + "grad_norm": 24.386417388916016, + "learning_rate": 1e-06, + "loss": 1.0109, + "num_input_tokens_seen": 94167244, + "step": 1681 + }, + { + "epoch": 3.7438752783964366, + "loss": 1.1265201568603516, + "loss_ce": 0.0005436294013634324, + "loss_iou": 0.4453125, + "loss_num": 0.047119140625, + "loss_xval": 1.125, + "num_input_tokens_seen": 94167244, + "step": 1681 + }, + { + "epoch": 3.7461024498886415, + "grad_norm": 19.943164825439453, + "learning_rate": 1e-06, + "loss": 1.1193, + "num_input_tokens_seen": 94223920, + "step": 1682 + }, + { + "epoch": 3.7461024498886415, + "loss": 0.9446117877960205, + "loss_ce": 0.00027577788569033146, + "loss_iou": 0.41796875, + "loss_num": 0.021728515625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 94223920, + "step": 1682 + }, + { + "epoch": 3.7483296213808464, + "grad_norm": 20.862585067749023, + "learning_rate": 1e-06, + "loss": 0.6461, + "num_input_tokens_seen": 94279276, + "step": 1683 + }, + { + "epoch": 3.7483296213808464, + "loss": 0.621061384677887, + "loss_ce": 0.0002728351391851902, + "loss_iou": 0.267578125, + "loss_num": 0.01708984375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 94279276, + "step": 1683 + }, + { + "epoch": 3.750556792873051, + "grad_norm": 19.76258087158203, + "learning_rate": 1e-06, + "loss": 0.8812, + "num_input_tokens_seen": 94333312, + "step": 1684 + }, + { + "epoch": 3.750556792873051, + "loss": 1.0115703344345093, + "loss_ce": 0.00033991390955634415, + "loss_iou": 0.4375, + "loss_num": 0.0277099609375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 94333312, + "step": 1684 + }, + { + "epoch": 3.752783964365256, + "grad_norm": 13.520242691040039, + "learning_rate": 1e-06, + "loss": 0.9022, + "num_input_tokens_seen": 94389944, + "step": 1685 + }, + { + "epoch": 3.752783964365256, + "loss": 1.0272419452667236, + "loss_ce": 0.00038654671516269445, + "loss_iou": 0.439453125, + "loss_num": 0.0296630859375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 94389944, + "step": 1685 + }, + { + "epoch": 3.755011135857461, + "grad_norm": 13.537060737609863, + "learning_rate": 1e-06, + "loss": 0.7416, + "num_input_tokens_seen": 94445592, + "step": 1686 + }, + { + "epoch": 3.755011135857461, + "loss": 0.7466691136360168, + "loss_ce": 0.00033120866282843053, + "loss_iou": 0.33203125, + "loss_num": 0.016357421875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 94445592, + "step": 1686 + }, + { + "epoch": 3.757238307349666, + "grad_norm": 17.105846405029297, + "learning_rate": 1e-06, + "loss": 0.8223, + "num_input_tokens_seen": 94504800, + "step": 1687 + }, + { + "epoch": 3.757238307349666, + "loss": 0.8430700302124023, + "loss_ce": 0.0002966249012388289, + "loss_iou": 0.337890625, + "loss_num": 0.033447265625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 94504800, + "step": 1687 + }, + { + "epoch": 3.7594654788418707, + "grad_norm": 18.66309356689453, + "learning_rate": 1e-06, + "loss": 1.0448, + "num_input_tokens_seen": 94560456, + "step": 1688 + }, + { + "epoch": 3.7594654788418707, + "loss": 0.9472213983535767, + "loss_ce": 0.00044402258936315775, + "loss_iou": 0.412109375, + "loss_num": 0.0245361328125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 94560456, + "step": 1688 + }, + { + "epoch": 3.7616926503340755, + "grad_norm": 16.696016311645508, + "learning_rate": 1e-06, + "loss": 0.7974, + "num_input_tokens_seen": 94617952, + "step": 1689 + }, + { + "epoch": 3.7616926503340755, + "loss": 0.7895834445953369, + "loss_ce": 0.0003989200631622225, + "loss_iou": 0.322265625, + "loss_num": 0.029052734375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 94617952, + "step": 1689 + }, + { + "epoch": 3.7639198218262804, + "grad_norm": 27.074752807617188, + "learning_rate": 1e-06, + "loss": 0.7939, + "num_input_tokens_seen": 94673300, + "step": 1690 + }, + { + "epoch": 3.7639198218262804, + "loss": 0.47075653076171875, + "loss_ce": 0.00029751902911812067, + "loss_iou": 0.2041015625, + "loss_num": 0.012451171875, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 94673300, + "step": 1690 + }, + { + "epoch": 3.7661469933184857, + "grad_norm": 23.333097457885742, + "learning_rate": 1e-06, + "loss": 0.901, + "num_input_tokens_seen": 94727112, + "step": 1691 + }, + { + "epoch": 3.7661469933184857, + "loss": 0.8041844964027405, + "loss_ce": 0.0004735640832222998, + "loss_iou": 0.349609375, + "loss_num": 0.0213623046875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 94727112, + "step": 1691 + }, + { + "epoch": 3.7683741648106905, + "grad_norm": 14.201781272888184, + "learning_rate": 1e-06, + "loss": 0.8346, + "num_input_tokens_seen": 94784200, + "step": 1692 + }, + { + "epoch": 3.7683741648106905, + "loss": 0.82025545835495, + "loss_ce": 0.0003091579128522426, + "loss_iou": 0.322265625, + "loss_num": 0.034912109375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 94784200, + "step": 1692 + }, + { + "epoch": 3.7706013363028954, + "grad_norm": 17.050172805786133, + "learning_rate": 1e-06, + "loss": 0.9037, + "num_input_tokens_seen": 94842204, + "step": 1693 + }, + { + "epoch": 3.7706013363028954, + "loss": 0.7915303707122803, + "loss_ce": 0.0002705698716454208, + "loss_iou": 0.3359375, + "loss_num": 0.02392578125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 94842204, + "step": 1693 + }, + { + "epoch": 3.7728285077951003, + "grad_norm": 20.24613380432129, + "learning_rate": 1e-06, + "loss": 0.6938, + "num_input_tokens_seen": 94897320, + "step": 1694 + }, + { + "epoch": 3.7728285077951003, + "loss": 0.6334647536277771, + "loss_ce": 0.000285994668956846, + "loss_iou": 0.275390625, + "loss_num": 0.0164794921875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 94897320, + "step": 1694 + }, + { + "epoch": 3.775055679287305, + "grad_norm": 23.602615356445312, + "learning_rate": 1e-06, + "loss": 0.8466, + "num_input_tokens_seen": 94956064, + "step": 1695 + }, + { + "epoch": 3.775055679287305, + "loss": 0.8750972747802734, + "loss_ce": 0.00034144739038310945, + "loss_iou": 0.380859375, + "loss_num": 0.0230712890625, + "loss_xval": 0.875, + "num_input_tokens_seen": 94956064, + "step": 1695 + }, + { + "epoch": 3.77728285077951, + "grad_norm": 19.40206527709961, + "learning_rate": 1e-06, + "loss": 0.7123, + "num_input_tokens_seen": 95011940, + "step": 1696 + }, + { + "epoch": 3.77728285077951, + "loss": 0.9245999455451965, + "loss_ce": 0.0002835210179910064, + "loss_iou": 0.375, + "loss_num": 0.03466796875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 95011940, + "step": 1696 + }, + { + "epoch": 3.779510022271715, + "grad_norm": 22.941545486450195, + "learning_rate": 1e-06, + "loss": 0.7155, + "num_input_tokens_seen": 95065572, + "step": 1697 + }, + { + "epoch": 3.779510022271715, + "loss": 0.5341686606407166, + "loss_ce": 0.00023310747928917408, + "loss_iou": 0.21484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 95065572, + "step": 1697 + }, + { + "epoch": 3.7817371937639197, + "grad_norm": 19.74363899230957, + "learning_rate": 1e-06, + "loss": 0.7572, + "num_input_tokens_seen": 95120584, + "step": 1698 + }, + { + "epoch": 3.7817371937639197, + "loss": 0.7215688228607178, + "loss_ce": 0.00037740974221378565, + "loss_iou": 0.294921875, + "loss_num": 0.0263671875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 95120584, + "step": 1698 + }, + { + "epoch": 3.7839643652561246, + "grad_norm": 34.252037048339844, + "learning_rate": 1e-06, + "loss": 0.8692, + "num_input_tokens_seen": 95176160, + "step": 1699 + }, + { + "epoch": 3.7839643652561246, + "loss": 0.9719327688217163, + "loss_ce": 0.0002530916826799512, + "loss_iou": 0.421875, + "loss_num": 0.025634765625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 95176160, + "step": 1699 + }, + { + "epoch": 3.78619153674833, + "grad_norm": 17.124095916748047, + "learning_rate": 1e-06, + "loss": 0.7826, + "num_input_tokens_seen": 95230472, + "step": 1700 + }, + { + "epoch": 3.78619153674833, + "loss": 0.7893081903457642, + "loss_ce": 0.0007340057054534554, + "loss_iou": 0.330078125, + "loss_num": 0.0255126953125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 95230472, + "step": 1700 + }, + { + "epoch": 3.7884187082405347, + "grad_norm": 18.04917335510254, + "learning_rate": 1e-06, + "loss": 0.7732, + "num_input_tokens_seen": 95286580, + "step": 1701 + }, + { + "epoch": 3.7884187082405347, + "loss": 0.8537644743919373, + "loss_ce": 0.0004929610877297819, + "loss_iou": 0.361328125, + "loss_num": 0.0257568359375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 95286580, + "step": 1701 + }, + { + "epoch": 3.7906458797327396, + "grad_norm": 19.060016632080078, + "learning_rate": 1e-06, + "loss": 0.7842, + "num_input_tokens_seen": 95341332, + "step": 1702 + }, + { + "epoch": 3.7906458797327396, + "loss": 0.5456702709197998, + "loss_ce": 0.00026007683482021093, + "loss_iou": 0.2236328125, + "loss_num": 0.01953125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 95341332, + "step": 1702 + }, + { + "epoch": 3.7928730512249444, + "grad_norm": 14.60232925415039, + "learning_rate": 1e-06, + "loss": 0.6544, + "num_input_tokens_seen": 95397800, + "step": 1703 + }, + { + "epoch": 3.7928730512249444, + "loss": 0.5783922076225281, + "loss_ce": 0.00026723096380010247, + "loss_iou": 0.248046875, + "loss_num": 0.016357421875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 95397800, + "step": 1703 + }, + { + "epoch": 3.7951002227171493, + "grad_norm": 237.25967407226562, + "learning_rate": 1e-06, + "loss": 0.8609, + "num_input_tokens_seen": 95453292, + "step": 1704 + }, + { + "epoch": 3.7951002227171493, + "loss": 0.7244041562080383, + "loss_ce": 0.0002830714511219412, + "loss_iou": 0.314453125, + "loss_num": 0.018798828125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 95453292, + "step": 1704 + }, + { + "epoch": 3.797327394209354, + "grad_norm": 23.924535751342773, + "learning_rate": 1e-06, + "loss": 0.8929, + "num_input_tokens_seen": 95509636, + "step": 1705 + }, + { + "epoch": 3.797327394209354, + "loss": 0.9049976468086243, + "loss_ce": 0.00045664224307984114, + "loss_iou": 0.376953125, + "loss_num": 0.0299072265625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 95509636, + "step": 1705 + }, + { + "epoch": 3.799554565701559, + "grad_norm": 13.076998710632324, + "learning_rate": 1e-06, + "loss": 0.7502, + "num_input_tokens_seen": 95567220, + "step": 1706 + }, + { + "epoch": 3.799554565701559, + "loss": 0.8397418260574341, + "loss_ce": 0.0006305105634965003, + "loss_iou": 0.3359375, + "loss_num": 0.03369140625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 95567220, + "step": 1706 + }, + { + "epoch": 3.801781737193764, + "grad_norm": 16.36588478088379, + "learning_rate": 1e-06, + "loss": 0.8995, + "num_input_tokens_seen": 95621892, + "step": 1707 + }, + { + "epoch": 3.801781737193764, + "loss": 0.8961795568466187, + "loss_ce": 0.00042754405876621604, + "loss_iou": 0.36328125, + "loss_num": 0.034423828125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 95621892, + "step": 1707 + }, + { + "epoch": 3.8040089086859687, + "grad_norm": 31.586313247680664, + "learning_rate": 1e-06, + "loss": 0.8272, + "num_input_tokens_seen": 95678952, + "step": 1708 + }, + { + "epoch": 3.8040089086859687, + "loss": 0.7548704147338867, + "loss_ce": 0.0002317242615390569, + "loss_iou": 0.333984375, + "loss_num": 0.0169677734375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 95678952, + "step": 1708 + }, + { + "epoch": 3.8062360801781736, + "grad_norm": 21.018543243408203, + "learning_rate": 1e-06, + "loss": 0.8788, + "num_input_tokens_seen": 95734860, + "step": 1709 + }, + { + "epoch": 3.8062360801781736, + "loss": 0.8822214603424072, + "loss_ce": 0.0011179400607943535, + "loss_iou": 0.37109375, + "loss_num": 0.0279541015625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 95734860, + "step": 1709 + }, + { + "epoch": 3.8084632516703785, + "grad_norm": 46.00482177734375, + "learning_rate": 1e-06, + "loss": 0.6713, + "num_input_tokens_seen": 95791268, + "step": 1710 + }, + { + "epoch": 3.8084632516703785, + "loss": 0.6450284123420715, + "loss_ce": 0.00025300466222688556, + "loss_iou": 0.296875, + "loss_num": 0.01031494140625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 95791268, + "step": 1710 + }, + { + "epoch": 3.8106904231625833, + "grad_norm": 19.820987701416016, + "learning_rate": 1e-06, + "loss": 0.9956, + "num_input_tokens_seen": 95847960, + "step": 1711 + }, + { + "epoch": 3.8106904231625833, + "loss": 0.8101856708526611, + "loss_ce": 0.00037119118496775627, + "loss_iou": 0.34765625, + "loss_num": 0.0230712890625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 95847960, + "step": 1711 + }, + { + "epoch": 3.812917594654788, + "grad_norm": 16.08196449279785, + "learning_rate": 1e-06, + "loss": 1.0883, + "num_input_tokens_seen": 95904500, + "step": 1712 + }, + { + "epoch": 3.812917594654788, + "loss": 0.8286195993423462, + "loss_ce": 0.0004946578992530704, + "loss_iou": 0.33984375, + "loss_num": 0.029541015625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 95904500, + "step": 1712 + }, + { + "epoch": 3.815144766146993, + "grad_norm": 25.859769821166992, + "learning_rate": 1e-06, + "loss": 0.747, + "num_input_tokens_seen": 95959000, + "step": 1713 + }, + { + "epoch": 3.815144766146993, + "loss": 0.749795138835907, + "loss_ce": 0.00028341758297756314, + "loss_iou": 0.31640625, + "loss_num": 0.0235595703125, + "loss_xval": 0.75, + "num_input_tokens_seen": 95959000, + "step": 1713 + }, + { + "epoch": 3.8173719376391984, + "grad_norm": 16.63954734802246, + "learning_rate": 1e-06, + "loss": 0.7589, + "num_input_tokens_seen": 96015556, + "step": 1714 + }, + { + "epoch": 3.8173719376391984, + "loss": 0.7214926481246948, + "loss_ce": 0.00030121111194603145, + "loss_iou": 0.32421875, + "loss_num": 0.01458740234375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 96015556, + "step": 1714 + }, + { + "epoch": 3.819599109131403, + "grad_norm": 21.721033096313477, + "learning_rate": 1e-06, + "loss": 0.7904, + "num_input_tokens_seen": 96072212, + "step": 1715 + }, + { + "epoch": 3.819599109131403, + "loss": 0.7724882364273071, + "loss_ce": 0.00027147267246618867, + "loss_iou": 0.333984375, + "loss_num": 0.0206298828125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 96072212, + "step": 1715 + }, + { + "epoch": 3.821826280623608, + "grad_norm": 13.081016540527344, + "learning_rate": 1e-06, + "loss": 0.7134, + "num_input_tokens_seen": 96129256, + "step": 1716 + }, + { + "epoch": 3.821826280623608, + "loss": 0.8351633548736572, + "loss_ce": 0.00026344467187300324, + "loss_iou": 0.375, + "loss_num": 0.0167236328125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 96129256, + "step": 1716 + }, + { + "epoch": 3.824053452115813, + "grad_norm": 23.20547866821289, + "learning_rate": 1e-06, + "loss": 0.9529, + "num_input_tokens_seen": 96186100, + "step": 1717 + }, + { + "epoch": 3.824053452115813, + "loss": 1.0769813060760498, + "loss_ce": 0.00032106111757457256, + "loss_iou": 0.42578125, + "loss_num": 0.044677734375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 96186100, + "step": 1717 + }, + { + "epoch": 3.826280623608018, + "grad_norm": 19.76165008544922, + "learning_rate": 1e-06, + "loss": 0.9206, + "num_input_tokens_seen": 96242056, + "step": 1718 + }, + { + "epoch": 3.826280623608018, + "loss": 0.7002924680709839, + "loss_ce": 0.0003413489321246743, + "loss_iou": 0.310546875, + "loss_num": 0.0155029296875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 96242056, + "step": 1718 + }, + { + "epoch": 3.8285077951002227, + "grad_norm": 19.560970306396484, + "learning_rate": 1e-06, + "loss": 0.9795, + "num_input_tokens_seen": 96296820, + "step": 1719 + }, + { + "epoch": 3.8285077951002227, + "loss": 1.1053590774536133, + "loss_ce": 0.00037851842353120446, + "loss_iou": 0.453125, + "loss_num": 0.039306640625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 96296820, + "step": 1719 + }, + { + "epoch": 3.8307349665924275, + "grad_norm": 13.943714141845703, + "learning_rate": 1e-06, + "loss": 0.7494, + "num_input_tokens_seen": 96352772, + "step": 1720 + }, + { + "epoch": 3.8307349665924275, + "loss": 0.9070804715156555, + "loss_ce": 0.00034218025393784046, + "loss_iou": 0.369140625, + "loss_num": 0.033935546875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 96352772, + "step": 1720 + }, + { + "epoch": 3.8329621380846324, + "grad_norm": 24.83785629272461, + "learning_rate": 1e-06, + "loss": 0.8379, + "num_input_tokens_seen": 96409184, + "step": 1721 + }, + { + "epoch": 3.8329621380846324, + "loss": 0.9676505327224731, + "loss_ce": 0.0003653773164842278, + "loss_iou": 0.4140625, + "loss_num": 0.027587890625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 96409184, + "step": 1721 + }, + { + "epoch": 3.8351893095768377, + "grad_norm": 18.294523239135742, + "learning_rate": 1e-06, + "loss": 0.7214, + "num_input_tokens_seen": 96465040, + "step": 1722 + }, + { + "epoch": 3.8351893095768377, + "loss": 0.5652261972427368, + "loss_ce": 0.00028477917658165097, + "loss_iou": 0.240234375, + "loss_num": 0.0169677734375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 96465040, + "step": 1722 + }, + { + "epoch": 3.8374164810690425, + "grad_norm": 14.921290397644043, + "learning_rate": 1e-06, + "loss": 0.7268, + "num_input_tokens_seen": 96523672, + "step": 1723 + }, + { + "epoch": 3.8374164810690425, + "loss": 0.7978357076644897, + "loss_ce": 0.00022831102251075208, + "loss_iou": 0.33203125, + "loss_num": 0.0269775390625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 96523672, + "step": 1723 + }, + { + "epoch": 3.8396436525612474, + "grad_norm": 25.57061767578125, + "learning_rate": 1e-06, + "loss": 0.7983, + "num_input_tokens_seen": 96579016, + "step": 1724 + }, + { + "epoch": 3.8396436525612474, + "loss": 0.9255920648574829, + "loss_ce": 0.0002990873181261122, + "loss_iou": 0.404296875, + "loss_num": 0.0235595703125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 96579016, + "step": 1724 + }, + { + "epoch": 3.8418708240534523, + "grad_norm": 25.671611785888672, + "learning_rate": 1e-06, + "loss": 0.7852, + "num_input_tokens_seen": 96633780, + "step": 1725 + }, + { + "epoch": 3.8418708240534523, + "loss": 0.7190070152282715, + "loss_ce": 0.0002570503856986761, + "loss_iou": 0.3203125, + "loss_num": 0.0157470703125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 96633780, + "step": 1725 + }, + { + "epoch": 3.844097995545657, + "grad_norm": 21.268661499023438, + "learning_rate": 1e-06, + "loss": 0.5661, + "num_input_tokens_seen": 96689492, + "step": 1726 + }, + { + "epoch": 3.844097995545657, + "loss": 0.563983678817749, + "loss_ce": 0.0002629789523780346, + "loss_iou": 0.2294921875, + "loss_num": 0.02099609375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 96689492, + "step": 1726 + }, + { + "epoch": 3.846325167037862, + "grad_norm": 26.356407165527344, + "learning_rate": 1e-06, + "loss": 0.7125, + "num_input_tokens_seen": 96746656, + "step": 1727 + }, + { + "epoch": 3.846325167037862, + "loss": 0.8865300416946411, + "loss_ce": 0.0004216782108414918, + "loss_iou": 0.375, + "loss_num": 0.027587890625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 96746656, + "step": 1727 + }, + { + "epoch": 3.848552338530067, + "grad_norm": 16.691015243530273, + "learning_rate": 1e-06, + "loss": 0.9871, + "num_input_tokens_seen": 96803292, + "step": 1728 + }, + { + "epoch": 3.848552338530067, + "loss": 0.9029386043548584, + "loss_ce": 0.0003506758948788047, + "loss_iou": 0.380859375, + "loss_num": 0.0279541015625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 96803292, + "step": 1728 + }, + { + "epoch": 3.8507795100222717, + "grad_norm": 25.19491195678711, + "learning_rate": 1e-06, + "loss": 0.723, + "num_input_tokens_seen": 96859896, + "step": 1729 + }, + { + "epoch": 3.8507795100222717, + "loss": 0.7351480722427368, + "loss_ce": 0.00028477725572884083, + "loss_iou": 0.2890625, + "loss_num": 0.0311279296875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 96859896, + "step": 1729 + }, + { + "epoch": 3.8530066815144766, + "grad_norm": 16.253820419311523, + "learning_rate": 1e-06, + "loss": 0.6062, + "num_input_tokens_seen": 96917348, + "step": 1730 + }, + { + "epoch": 3.8530066815144766, + "loss": 0.6677459478378296, + "loss_ce": 0.0002654629643075168, + "loss_iou": 0.28515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 96917348, + "step": 1730 + }, + { + "epoch": 3.8552338530066814, + "grad_norm": 12.308496475219727, + "learning_rate": 1e-06, + "loss": 0.7134, + "num_input_tokens_seen": 96975804, + "step": 1731 + }, + { + "epoch": 3.8552338530066814, + "loss": 0.6045119166374207, + "loss_ce": 0.0003859363787341863, + "loss_iou": 0.248046875, + "loss_num": 0.021484375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 96975804, + "step": 1731 + }, + { + "epoch": 3.8574610244988863, + "grad_norm": 26.0831298828125, + "learning_rate": 1e-06, + "loss": 1.1291, + "num_input_tokens_seen": 97033224, + "step": 1732 + }, + { + "epoch": 3.8574610244988863, + "loss": 1.0747888088226318, + "loss_ce": 0.00032590571208857, + "loss_iou": 0.435546875, + "loss_num": 0.04052734375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 97033224, + "step": 1732 + }, + { + "epoch": 3.859688195991091, + "grad_norm": 22.14068031311035, + "learning_rate": 1e-06, + "loss": 0.9682, + "num_input_tokens_seen": 97092012, + "step": 1733 + }, + { + "epoch": 3.859688195991091, + "loss": 1.2439754009246826, + "loss_ce": 0.00032304698834195733, + "loss_iou": 0.515625, + "loss_num": 0.042724609375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 97092012, + "step": 1733 + }, + { + "epoch": 3.861915367483296, + "grad_norm": 17.057979583740234, + "learning_rate": 1e-06, + "loss": 0.893, + "num_input_tokens_seen": 97145840, + "step": 1734 + }, + { + "epoch": 3.861915367483296, + "loss": 1.0669821500778198, + "loss_ce": 0.0003318190574645996, + "loss_iou": 0.39453125, + "loss_num": 0.05517578125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 97145840, + "step": 1734 + }, + { + "epoch": 3.864142538975501, + "grad_norm": 14.770302772521973, + "learning_rate": 1e-06, + "loss": 0.8305, + "num_input_tokens_seen": 97201400, + "step": 1735 + }, + { + "epoch": 3.864142538975501, + "loss": 0.9496909379959106, + "loss_ce": 0.00047216590610332787, + "loss_iou": 0.39453125, + "loss_num": 0.031982421875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 97201400, + "step": 1735 + }, + { + "epoch": 3.866369710467706, + "grad_norm": 19.580652236938477, + "learning_rate": 1e-06, + "loss": 0.8005, + "num_input_tokens_seen": 97258904, + "step": 1736 + }, + { + "epoch": 3.866369710467706, + "loss": 0.6937247514724731, + "loss_ce": 0.0003653843014035374, + "loss_iou": 0.298828125, + "loss_num": 0.0194091796875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 97258904, + "step": 1736 + }, + { + "epoch": 3.868596881959911, + "grad_norm": 29.482715606689453, + "learning_rate": 1e-06, + "loss": 0.8335, + "num_input_tokens_seen": 97317044, + "step": 1737 + }, + { + "epoch": 3.868596881959911, + "loss": 0.874514102935791, + "loss_ce": 0.00024650723207741976, + "loss_iou": 0.39453125, + "loss_num": 0.017333984375, + "loss_xval": 0.875, + "num_input_tokens_seen": 97317044, + "step": 1737 + }, + { + "epoch": 3.870824053452116, + "grad_norm": 17.633045196533203, + "learning_rate": 1e-06, + "loss": 0.965, + "num_input_tokens_seen": 97372048, + "step": 1738 + }, + { + "epoch": 3.870824053452116, + "loss": 1.020100474357605, + "loss_ce": 0.0005692073609679937, + "loss_iou": 0.40234375, + "loss_num": 0.04248046875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 97372048, + "step": 1738 + }, + { + "epoch": 3.8730512249443207, + "grad_norm": 18.022729873657227, + "learning_rate": 1e-06, + "loss": 0.7467, + "num_input_tokens_seen": 97427968, + "step": 1739 + }, + { + "epoch": 3.8730512249443207, + "loss": 0.7238982915878296, + "loss_ce": 0.0002655313292052597, + "loss_iou": 0.294921875, + "loss_num": 0.0267333984375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 97427968, + "step": 1739 + }, + { + "epoch": 3.8752783964365256, + "grad_norm": 20.802419662475586, + "learning_rate": 1e-06, + "loss": 0.8692, + "num_input_tokens_seen": 97483504, + "step": 1740 + }, + { + "epoch": 3.8752783964365256, + "loss": 0.9595529437065125, + "loss_ce": 0.00032444443786516786, + "loss_iou": 0.419921875, + "loss_num": 0.0240478515625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 97483504, + "step": 1740 + }, + { + "epoch": 3.8775055679287305, + "grad_norm": 25.31464385986328, + "learning_rate": 1e-06, + "loss": 0.9623, + "num_input_tokens_seen": 97539960, + "step": 1741 + }, + { + "epoch": 3.8775055679287305, + "loss": 0.9504677057266235, + "loss_ce": 0.0002723511715885252, + "loss_iou": 0.3828125, + "loss_num": 0.037109375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 97539960, + "step": 1741 + }, + { + "epoch": 3.8797327394209353, + "grad_norm": 18.857027053833008, + "learning_rate": 1e-06, + "loss": 0.6928, + "num_input_tokens_seen": 97597248, + "step": 1742 + }, + { + "epoch": 3.8797327394209353, + "loss": 0.7760539650917053, + "loss_ce": 0.00023611923097632825, + "loss_iou": 0.33984375, + "loss_num": 0.0196533203125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 97597248, + "step": 1742 + }, + { + "epoch": 3.88195991091314, + "grad_norm": 24.135141372680664, + "learning_rate": 1e-06, + "loss": 0.7342, + "num_input_tokens_seen": 97652256, + "step": 1743 + }, + { + "epoch": 3.88195991091314, + "loss": 0.636237621307373, + "loss_ce": 0.00025132749578915536, + "loss_iou": 0.26171875, + "loss_num": 0.02197265625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 97652256, + "step": 1743 + }, + { + "epoch": 3.884187082405345, + "grad_norm": 27.896411895751953, + "learning_rate": 1e-06, + "loss": 0.7845, + "num_input_tokens_seen": 97706708, + "step": 1744 + }, + { + "epoch": 3.884187082405345, + "loss": 0.7319508790969849, + "loss_ce": 0.00026141630951315165, + "loss_iou": 0.328125, + "loss_num": 0.01544189453125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 97706708, + "step": 1744 + }, + { + "epoch": 3.8864142538975504, + "grad_norm": 16.71023178100586, + "learning_rate": 1e-06, + "loss": 0.7643, + "num_input_tokens_seen": 97765116, + "step": 1745 + }, + { + "epoch": 3.8864142538975504, + "loss": 0.8230664730072021, + "loss_ce": 0.00031261841650120914, + "loss_iou": 0.328125, + "loss_num": 0.033447265625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 97765116, + "step": 1745 + }, + { + "epoch": 3.888641425389755, + "grad_norm": 17.422271728515625, + "learning_rate": 1e-06, + "loss": 0.7545, + "num_input_tokens_seen": 97822628, + "step": 1746 + }, + { + "epoch": 3.888641425389755, + "loss": 0.6438745260238647, + "loss_ce": 0.0003198395133949816, + "loss_iou": 0.2734375, + "loss_num": 0.0191650390625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 97822628, + "step": 1746 + }, + { + "epoch": 3.89086859688196, + "grad_norm": 14.700139999389648, + "learning_rate": 1e-06, + "loss": 0.8227, + "num_input_tokens_seen": 97878372, + "step": 1747 + }, + { + "epoch": 3.89086859688196, + "loss": 0.8928521275520325, + "loss_ce": 0.00027394542121328413, + "loss_iou": 0.380859375, + "loss_num": 0.0260009765625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 97878372, + "step": 1747 + }, + { + "epoch": 3.893095768374165, + "grad_norm": 25.434099197387695, + "learning_rate": 1e-06, + "loss": 0.6795, + "num_input_tokens_seen": 97932476, + "step": 1748 + }, + { + "epoch": 3.893095768374165, + "loss": 0.6719607710838318, + "loss_ce": 0.0013064806116744876, + "loss_iou": 0.2578125, + "loss_num": 0.03076171875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 97932476, + "step": 1748 + }, + { + "epoch": 3.89532293986637, + "grad_norm": 12.71288776397705, + "learning_rate": 1e-06, + "loss": 0.9539, + "num_input_tokens_seen": 97990880, + "step": 1749 + }, + { + "epoch": 3.89532293986637, + "loss": 0.8964526653289795, + "loss_ce": 0.000578734208829701, + "loss_iou": 0.3828125, + "loss_num": 0.02587890625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 97990880, + "step": 1749 + }, + { + "epoch": 3.8975501113585747, + "grad_norm": 20.1357421875, + "learning_rate": 1e-06, + "loss": 0.8062, + "num_input_tokens_seen": 98046088, + "step": 1750 + }, + { + "epoch": 3.8975501113585747, + "eval_seeclick_web_CIoU": 0.5617943108081818, + "eval_seeclick_web_GIoU": 0.558159202337265, + "eval_seeclick_web_IoU": 0.5777357220649719, + "eval_seeclick_web_MAE_all": 0.017627435736358166, + "eval_seeclick_web_MAE_h": 0.010515023721382022, + "eval_seeclick_web_MAE_w": 0.018624153919517994, + "eval_seeclick_web_MAE_x_boxes": 0.00829980755224824, + "eval_seeclick_web_MAE_y_boxes": 0.02257556258700788, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9613375663757324, + "eval_seeclick_web_loss_ce": 0.0003832996590062976, + "eval_seeclick_web_loss_iou": 0.43701171875, + "eval_seeclick_web_loss_num": 0.013935089111328125, + "eval_seeclick_web_loss_xval": 0.9443359375, + "eval_seeclick_web_runtime": 27.2225, + "eval_seeclick_web_samples_per_second": 1.837, + "eval_seeclick_web_steps_per_second": 0.073, + "num_input_tokens_seen": 98046088, + "step": 1750 + }, + { + "epoch": 3.8975501113585747, + "eval_icons_CIoU": 0.3064361959695816, + "eval_icons_GIoU": 0.32649458944797516, + "eval_icons_IoU": 0.38111720979213715, + "eval_icons_MAE_all": 0.06658957526087761, + "eval_icons_MAE_h": 0.040302242152392864, + "eval_icons_MAE_w": 0.07293413020670414, + "eval_icons_MAE_x_boxes": 0.06224925257265568, + "eval_icons_MAE_y_boxes": 0.03964635543525219, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.7142175436019897, + "eval_icons_loss_ce": 0.0004412867419887334, + "eval_icons_loss_iou": 0.6680908203125, + "eval_icons_loss_num": 0.06289100646972656, + "eval_icons_loss_xval": 1.64990234375, + "eval_icons_runtime": 24.1165, + "eval_icons_samples_per_second": 2.073, + "eval_icons_steps_per_second": 0.083, + "num_input_tokens_seen": 98046088, + "step": 1750 + }, + { + "epoch": 3.8975501113585747, + "eval_screenspot_CIoU": 0.31633803248405457, + "eval_screenspot_GIoU": 0.3353947401046753, + "eval_screenspot_IoU": 0.3984935482343038, + "eval_screenspot_MAE_all": 0.06795181334018707, + "eval_screenspot_MAE_h": 0.03754324403901895, + "eval_screenspot_MAE_w": 0.07950195794304211, + "eval_screenspot_MAE_x_boxes": 0.08205063392718633, + "eval_screenspot_MAE_y_boxes": 0.044876331463456154, + "eval_screenspot_inside_bbox": 0.6358333428700765, + "eval_screenspot_loss": 1.7439180612564087, + "eval_screenspot_loss_ce": 0.0004279320516313116, + "eval_screenspot_loss_iou": 0.712890625, + "eval_screenspot_loss_num": 0.07890574137369792, + "eval_screenspot_loss_xval": 1.8204752604166667, + "eval_screenspot_runtime": 34.5027, + "eval_screenspot_samples_per_second": 2.58, + "eval_screenspot_steps_per_second": 0.087, + "num_input_tokens_seen": 98046088, + "step": 1750 + }, + { + "epoch": 3.8975501113585747, + "eval_compot_CIoU": 0.3428885042667389, + "eval_compot_GIoU": 0.3697793632745743, + "eval_compot_IoU": 0.40237635374069214, + "eval_compot_MAE_all": 0.019974621944129467, + "eval_compot_MAE_h": 0.009356681257486343, + "eval_compot_MAE_w": 0.02669452875852585, + "eval_compot_MAE_x_boxes": 0.0306751299649477, + "eval_compot_MAE_y_boxes": 0.00634155492298305, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3990836143493652, + "eval_compot_loss_ce": 0.000333975360263139, + "eval_compot_loss_iou": 0.637939453125, + "eval_compot_loss_num": 0.018901824951171875, + "eval_compot_loss_xval": 1.37109375, + "eval_compot_runtime": 19.3662, + "eval_compot_samples_per_second": 2.582, + "eval_compot_steps_per_second": 0.103, + "num_input_tokens_seen": 98046088, + "step": 1750 + }, + { + "epoch": 3.8975501113585747, + "eval_custom_ui_val_CIoU": 0.4409982098473443, + "eval_custom_ui_val_GIoU": 0.46577325297726524, + "eval_custom_ui_val_IoU": 0.49479154580169254, + "eval_custom_ui_val_MAE_all": 0.03411407251324919, + "eval_custom_ui_val_MAE_h": 0.0205486583419972, + "eval_custom_ui_val_MAE_w": 0.03924656597276529, + "eval_custom_ui_val_MAE_x_boxes": 0.03744201848490371, + "eval_custom_ui_val_MAE_y_boxes": 0.019695010320800874, + "eval_custom_ui_val_inside_bbox": 0.6867283980051676, + "eval_custom_ui_val_loss": 1.2667858600616455, + "eval_custom_ui_val_loss_ce": 0.0004089692018977884, + "eval_custom_ui_val_loss_iou": 0.5344373914930556, + "eval_custom_ui_val_loss_num": 0.03371832105848524, + "eval_custom_ui_val_loss_xval": 1.2374403211805556, + "eval_custom_ui_val_runtime": 56.4523, + "eval_custom_ui_val_samples_per_second": 4.694, + "eval_custom_ui_val_steps_per_second": 0.159, + "num_input_tokens_seen": 98046088, + "step": 1750 + }, + { + "epoch": 3.8975501113585747, + "loss": 0.9943009614944458, + "loss_ce": 0.00040448884828947484, + "loss_iou": 0.42578125, + "loss_num": 0.0283203125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 98046088, + "step": 1750 + }, + { + "epoch": 3.8997772828507795, + "grad_norm": 17.32667350769043, + "learning_rate": 1e-06, + "loss": 0.9158, + "num_input_tokens_seen": 98101832, + "step": 1751 + }, + { + "epoch": 3.8997772828507795, + "loss": 0.8142361640930176, + "loss_ce": 0.00027136015705764294, + "loss_iou": 0.365234375, + "loss_num": 0.0167236328125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 98101832, + "step": 1751 + }, + { + "epoch": 3.9020044543429844, + "grad_norm": 14.840397834777832, + "learning_rate": 1e-06, + "loss": 0.7732, + "num_input_tokens_seen": 98158656, + "step": 1752 + }, + { + "epoch": 3.9020044543429844, + "loss": 0.9086735248565674, + "loss_ce": 0.0004703607992269099, + "loss_iou": 0.341796875, + "loss_num": 0.044677734375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 98158656, + "step": 1752 + }, + { + "epoch": 3.9042316258351892, + "grad_norm": 16.115385055541992, + "learning_rate": 1e-06, + "loss": 0.7102, + "num_input_tokens_seen": 98216120, + "step": 1753 + }, + { + "epoch": 3.9042316258351892, + "loss": 0.6780398488044739, + "loss_ce": 0.0003054844564758241, + "loss_iou": 0.2890625, + "loss_num": 0.020263671875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 98216120, + "step": 1753 + }, + { + "epoch": 3.906458797327394, + "grad_norm": 22.08925437927246, + "learning_rate": 1e-06, + "loss": 0.8575, + "num_input_tokens_seen": 98273000, + "step": 1754 + }, + { + "epoch": 3.906458797327394, + "loss": 0.6279443502426147, + "loss_ce": 0.0002587907947599888, + "loss_iou": 0.291015625, + "loss_num": 0.00958251953125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 98273000, + "step": 1754 + }, + { + "epoch": 3.908685968819599, + "grad_norm": 15.868555068969727, + "learning_rate": 1e-06, + "loss": 0.8731, + "num_input_tokens_seen": 98330372, + "step": 1755 + }, + { + "epoch": 3.908685968819599, + "loss": 1.0122852325439453, + "loss_ce": 0.00032239314168691635, + "loss_iou": 0.443359375, + "loss_num": 0.0250244140625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 98330372, + "step": 1755 + }, + { + "epoch": 3.910913140311804, + "grad_norm": 20.903287887573242, + "learning_rate": 1e-06, + "loss": 1.0159, + "num_input_tokens_seen": 98387768, + "step": 1756 + }, + { + "epoch": 3.910913140311804, + "loss": 0.9633511900901794, + "loss_ce": 0.00046056750579737127, + "loss_iou": 0.384765625, + "loss_num": 0.038818359375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 98387768, + "step": 1756 + }, + { + "epoch": 3.9131403118040087, + "grad_norm": 14.697265625, + "learning_rate": 1e-06, + "loss": 1.0258, + "num_input_tokens_seen": 98443212, + "step": 1757 + }, + { + "epoch": 3.9131403118040087, + "loss": 1.067274808883667, + "loss_ce": 0.00038028520066291094, + "loss_iou": 0.435546875, + "loss_num": 0.03857421875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 98443212, + "step": 1757 + }, + { + "epoch": 3.9153674832962135, + "grad_norm": 18.442344665527344, + "learning_rate": 1e-06, + "loss": 0.8647, + "num_input_tokens_seen": 98498060, + "step": 1758 + }, + { + "epoch": 3.9153674832962135, + "loss": 0.6922543048858643, + "loss_ce": 0.0002377206110395491, + "loss_iou": 0.27734375, + "loss_num": 0.027587890625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 98498060, + "step": 1758 + }, + { + "epoch": 3.917594654788419, + "grad_norm": 22.285242080688477, + "learning_rate": 1e-06, + "loss": 0.716, + "num_input_tokens_seen": 98553040, + "step": 1759 + }, + { + "epoch": 3.917594654788419, + "loss": 0.8192723393440247, + "loss_ce": 0.0006687932182103395, + "loss_iou": 0.36328125, + "loss_num": 0.0181884765625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 98553040, + "step": 1759 + }, + { + "epoch": 3.9198218262806237, + "grad_norm": 19.2437801361084, + "learning_rate": 1e-06, + "loss": 0.8274, + "num_input_tokens_seen": 98607992, + "step": 1760 + }, + { + "epoch": 3.9198218262806237, + "loss": 1.0185580253601074, + "loss_ce": 0.00024743605172261596, + "loss_iou": 0.451171875, + "loss_num": 0.0234375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 98607992, + "step": 1760 + }, + { + "epoch": 3.9220489977728286, + "grad_norm": 28.81540298461914, + "learning_rate": 1e-06, + "loss": 0.8937, + "num_input_tokens_seen": 98663688, + "step": 1761 + }, + { + "epoch": 3.9220489977728286, + "loss": 0.7943629026412964, + "loss_ce": 0.0004176311194896698, + "loss_iou": 0.32421875, + "loss_num": 0.0291748046875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 98663688, + "step": 1761 + }, + { + "epoch": 3.9242761692650334, + "grad_norm": 13.910033226013184, + "learning_rate": 1e-06, + "loss": 0.8205, + "num_input_tokens_seen": 98720668, + "step": 1762 + }, + { + "epoch": 3.9242761692650334, + "loss": 0.7007368803024292, + "loss_ce": 0.0023726352956146, + "loss_iou": 0.298828125, + "loss_num": 0.0205078125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 98720668, + "step": 1762 + }, + { + "epoch": 3.9265033407572383, + "grad_norm": 19.399320602416992, + "learning_rate": 1e-06, + "loss": 0.6872, + "num_input_tokens_seen": 98777756, + "step": 1763 + }, + { + "epoch": 3.9265033407572383, + "loss": 0.4689710736274719, + "loss_ce": 0.00022106988762971014, + "loss_iou": 0.203125, + "loss_num": 0.0123291015625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 98777756, + "step": 1763 + }, + { + "epoch": 3.928730512249443, + "grad_norm": 18.18318748474121, + "learning_rate": 1e-06, + "loss": 0.7284, + "num_input_tokens_seen": 98835320, + "step": 1764 + }, + { + "epoch": 3.928730512249443, + "loss": 0.7946980595588684, + "loss_ce": 0.0002644615597091615, + "loss_iou": 0.359375, + "loss_num": 0.01495361328125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 98835320, + "step": 1764 + }, + { + "epoch": 3.930957683741648, + "grad_norm": 17.789981842041016, + "learning_rate": 1e-06, + "loss": 0.846, + "num_input_tokens_seen": 98891900, + "step": 1765 + }, + { + "epoch": 3.930957683741648, + "loss": 0.5826168060302734, + "loss_ce": 0.0005855775089003146, + "loss_iou": 0.255859375, + "loss_num": 0.01446533203125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 98891900, + "step": 1765 + }, + { + "epoch": 3.933184855233853, + "grad_norm": 14.104592323303223, + "learning_rate": 1e-06, + "loss": 0.8345, + "num_input_tokens_seen": 98948704, + "step": 1766 + }, + { + "epoch": 3.933184855233853, + "loss": 1.0315115451812744, + "loss_ce": 0.0002615266712382436, + "loss_iou": 0.43359375, + "loss_num": 0.033203125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 98948704, + "step": 1766 + }, + { + "epoch": 3.935412026726058, + "grad_norm": 17.459924697875977, + "learning_rate": 1e-06, + "loss": 0.8304, + "num_input_tokens_seen": 99005660, + "step": 1767 + }, + { + "epoch": 3.935412026726058, + "loss": 0.9141278266906738, + "loss_ce": 0.00030949406209401786, + "loss_iou": 0.404296875, + "loss_num": 0.0208740234375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 99005660, + "step": 1767 + }, + { + "epoch": 3.937639198218263, + "grad_norm": 18.547863006591797, + "learning_rate": 1e-06, + "loss": 1.1333, + "num_input_tokens_seen": 99061216, + "step": 1768 + }, + { + "epoch": 3.937639198218263, + "loss": 0.909751296043396, + "loss_ce": 0.00032748148078098893, + "loss_iou": 0.380859375, + "loss_num": 0.029541015625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 99061216, + "step": 1768 + }, + { + "epoch": 3.939866369710468, + "grad_norm": 17.022930145263672, + "learning_rate": 1e-06, + "loss": 0.7887, + "num_input_tokens_seen": 99120308, + "step": 1769 + }, + { + "epoch": 3.939866369710468, + "loss": 0.8347407579421997, + "loss_ce": 0.0002681155747268349, + "loss_iou": 0.36328125, + "loss_num": 0.0216064453125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 99120308, + "step": 1769 + }, + { + "epoch": 3.9420935412026727, + "grad_norm": 18.062122344970703, + "learning_rate": 1e-06, + "loss": 0.846, + "num_input_tokens_seen": 99176104, + "step": 1770 + }, + { + "epoch": 3.9420935412026727, + "loss": 0.7344276905059814, + "loss_ce": 0.00029679658473469317, + "loss_iou": 0.29296875, + "loss_num": 0.030029296875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 99176104, + "step": 1770 + }, + { + "epoch": 3.9443207126948776, + "grad_norm": 24.9909610748291, + "learning_rate": 1e-06, + "loss": 0.9439, + "num_input_tokens_seen": 99230876, + "step": 1771 + }, + { + "epoch": 3.9443207126948776, + "loss": 0.9261212944984436, + "loss_ce": 0.0003400646965019405, + "loss_iou": 0.41015625, + "loss_num": 0.0211181640625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 99230876, + "step": 1771 + }, + { + "epoch": 3.9465478841870825, + "grad_norm": 14.309945106506348, + "learning_rate": 1e-06, + "loss": 0.7448, + "num_input_tokens_seen": 99286860, + "step": 1772 + }, + { + "epoch": 3.9465478841870825, + "loss": 0.7100058794021606, + "loss_ce": 0.000289075484033674, + "loss_iou": 0.310546875, + "loss_num": 0.017578125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 99286860, + "step": 1772 + }, + { + "epoch": 3.9487750556792873, + "grad_norm": 12.438628196716309, + "learning_rate": 1e-06, + "loss": 1.0524, + "num_input_tokens_seen": 99344372, + "step": 1773 + }, + { + "epoch": 3.9487750556792873, + "loss": 1.3829472064971924, + "loss_ce": 0.00037877040449529886, + "loss_iou": 0.59375, + "loss_num": 0.038330078125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 99344372, + "step": 1773 + }, + { + "epoch": 3.951002227171492, + "grad_norm": 19.387874603271484, + "learning_rate": 1e-06, + "loss": 0.8822, + "num_input_tokens_seen": 99401420, + "step": 1774 + }, + { + "epoch": 3.951002227171492, + "loss": 1.1286416053771973, + "loss_ce": 0.0004677603137679398, + "loss_iou": 0.455078125, + "loss_num": 0.043701171875, + "loss_xval": 1.125, + "num_input_tokens_seen": 99401420, + "step": 1774 + }, + { + "epoch": 3.953229398663697, + "grad_norm": 16.427181243896484, + "learning_rate": 1e-06, + "loss": 0.9915, + "num_input_tokens_seen": 99456744, + "step": 1775 + }, + { + "epoch": 3.953229398663697, + "loss": 0.9648089408874512, + "loss_ce": 0.00045345089165493846, + "loss_iou": 0.40234375, + "loss_num": 0.03173828125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 99456744, + "step": 1775 + }, + { + "epoch": 3.955456570155902, + "grad_norm": 16.38910675048828, + "learning_rate": 1e-06, + "loss": 0.8111, + "num_input_tokens_seen": 99511464, + "step": 1776 + }, + { + "epoch": 3.955456570155902, + "loss": 1.0158613920211792, + "loss_ce": 0.0002364249958191067, + "loss_iou": 0.42578125, + "loss_num": 0.03271484375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 99511464, + "step": 1776 + }, + { + "epoch": 3.9576837416481068, + "grad_norm": 28.928443908691406, + "learning_rate": 1e-06, + "loss": 0.8633, + "num_input_tokens_seen": 99567452, + "step": 1777 + }, + { + "epoch": 3.9576837416481068, + "loss": 1.1553375720977783, + "loss_ce": 0.0003082446346525103, + "loss_iou": 0.49609375, + "loss_num": 0.03271484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 99567452, + "step": 1777 + }, + { + "epoch": 3.9599109131403116, + "grad_norm": 21.195846557617188, + "learning_rate": 1e-06, + "loss": 0.846, + "num_input_tokens_seen": 99624932, + "step": 1778 + }, + { + "epoch": 3.9599109131403116, + "loss": 0.6980462074279785, + "loss_ce": 0.0002923067077063024, + "loss_iou": 0.2734375, + "loss_num": 0.0299072265625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 99624932, + "step": 1778 + }, + { + "epoch": 3.9621380846325165, + "grad_norm": 16.269763946533203, + "learning_rate": 1e-06, + "loss": 0.6815, + "num_input_tokens_seen": 99681052, + "step": 1779 + }, + { + "epoch": 3.9621380846325165, + "loss": 0.7171025276184082, + "loss_ce": 0.00030569382943212986, + "loss_iou": 0.291015625, + "loss_num": 0.0267333984375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 99681052, + "step": 1779 + }, + { + "epoch": 3.9643652561247213, + "grad_norm": 17.27662467956543, + "learning_rate": 1e-06, + "loss": 0.7603, + "num_input_tokens_seen": 99738304, + "step": 1780 + }, + { + "epoch": 3.9643652561247213, + "loss": 0.9583814740180969, + "loss_ce": 0.0002515443484298885, + "loss_iou": 0.431640625, + "loss_num": 0.01904296875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 99738304, + "step": 1780 + }, + { + "epoch": 3.9665924276169267, + "grad_norm": 16.905935287475586, + "learning_rate": 1e-06, + "loss": 0.701, + "num_input_tokens_seen": 99793864, + "step": 1781 + }, + { + "epoch": 3.9665924276169267, + "loss": 0.8442895412445068, + "loss_ce": 0.0002953645307570696, + "loss_iou": 0.359375, + "loss_num": 0.025146484375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 99793864, + "step": 1781 + }, + { + "epoch": 3.9688195991091315, + "grad_norm": 17.269075393676758, + "learning_rate": 1e-06, + "loss": 0.6253, + "num_input_tokens_seen": 99849596, + "step": 1782 + }, + { + "epoch": 3.9688195991091315, + "loss": 0.6899313926696777, + "loss_ce": 0.0004782522446475923, + "loss_iou": 0.279296875, + "loss_num": 0.0262451171875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 99849596, + "step": 1782 + }, + { + "epoch": 3.9710467706013364, + "grad_norm": 12.376049995422363, + "learning_rate": 1e-06, + "loss": 0.7759, + "num_input_tokens_seen": 99905032, + "step": 1783 + }, + { + "epoch": 3.9710467706013364, + "loss": 0.8750180602073669, + "loss_ce": 0.0002622200991027057, + "loss_iou": 0.361328125, + "loss_num": 0.0302734375, + "loss_xval": 0.875, + "num_input_tokens_seen": 99905032, + "step": 1783 + }, + { + "epoch": 3.9732739420935412, + "grad_norm": 15.985518455505371, + "learning_rate": 1e-06, + "loss": 0.7939, + "num_input_tokens_seen": 99961684, + "step": 1784 + }, + { + "epoch": 3.9732739420935412, + "loss": 0.7649087905883789, + "loss_ce": 0.00026036915369331837, + "loss_iou": 0.349609375, + "loss_num": 0.0128173828125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 99961684, + "step": 1784 + }, + { + "epoch": 3.975501113585746, + "grad_norm": 17.415231704711914, + "learning_rate": 1e-06, + "loss": 0.9524, + "num_input_tokens_seen": 100018708, + "step": 1785 + }, + { + "epoch": 3.975501113585746, + "loss": 1.1517889499664307, + "loss_ce": 0.00042183659388683736, + "loss_iou": 0.48828125, + "loss_num": 0.034912109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 100018708, + "step": 1785 + }, + { + "epoch": 3.977728285077951, + "grad_norm": 21.656478881835938, + "learning_rate": 1e-06, + "loss": 0.7554, + "num_input_tokens_seen": 100073244, + "step": 1786 + }, + { + "epoch": 3.977728285077951, + "loss": 0.634032130241394, + "loss_ce": 0.0002430875028949231, + "loss_iou": 0.287109375, + "loss_num": 0.01214599609375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 100073244, + "step": 1786 + }, + { + "epoch": 3.979955456570156, + "grad_norm": 55.99577713012695, + "learning_rate": 1e-06, + "loss": 1.0249, + "num_input_tokens_seen": 100127112, + "step": 1787 + }, + { + "epoch": 3.979955456570156, + "loss": 0.8672703504562378, + "loss_ce": 0.00032695726258680224, + "loss_iou": 0.328125, + "loss_num": 0.04248046875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 100127112, + "step": 1787 + }, + { + "epoch": 3.9821826280623607, + "grad_norm": 20.532896041870117, + "learning_rate": 1e-06, + "loss": 0.8107, + "num_input_tokens_seen": 100181880, + "step": 1788 + }, + { + "epoch": 3.9821826280623607, + "loss": 0.7985996603965759, + "loss_ce": 0.000259819149505347, + "loss_iou": 0.349609375, + "loss_num": 0.02001953125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 100181880, + "step": 1788 + }, + { + "epoch": 3.984409799554566, + "grad_norm": 22.649169921875, + "learning_rate": 1e-06, + "loss": 0.7598, + "num_input_tokens_seen": 100238956, + "step": 1789 + }, + { + "epoch": 3.984409799554566, + "loss": 0.7128958702087402, + "loss_ce": 0.00024937212583608925, + "loss_iou": 0.30859375, + "loss_num": 0.019287109375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 100238956, + "step": 1789 + }, + { + "epoch": 3.986636971046771, + "grad_norm": 37.473751068115234, + "learning_rate": 1e-06, + "loss": 0.9002, + "num_input_tokens_seen": 100293580, + "step": 1790 + }, + { + "epoch": 3.986636971046771, + "loss": 0.7505241632461548, + "loss_ce": 0.0005241355975158513, + "loss_iou": 0.330078125, + "loss_num": 0.017822265625, + "loss_xval": 0.75, + "num_input_tokens_seen": 100293580, + "step": 1790 + }, + { + "epoch": 3.9888641425389757, + "grad_norm": 17.84164810180664, + "learning_rate": 1e-06, + "loss": 0.8214, + "num_input_tokens_seen": 100347096, + "step": 1791 + }, + { + "epoch": 3.9888641425389757, + "loss": 0.83009272813797, + "loss_ce": 0.000258743908489123, + "loss_iou": 0.32421875, + "loss_num": 0.035888671875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 100347096, + "step": 1791 + }, + { + "epoch": 3.9910913140311806, + "grad_norm": 15.902947425842285, + "learning_rate": 1e-06, + "loss": 0.8695, + "num_input_tokens_seen": 100403084, + "step": 1792 + }, + { + "epoch": 3.9910913140311806, + "loss": 0.8725968599319458, + "loss_ce": 0.0002824169350787997, + "loss_iou": 0.376953125, + "loss_num": 0.0238037109375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 100403084, + "step": 1792 + }, + { + "epoch": 3.9933184855233854, + "grad_norm": 32.91238784790039, + "learning_rate": 1e-06, + "loss": 0.6368, + "num_input_tokens_seen": 100460528, + "step": 1793 + }, + { + "epoch": 3.9933184855233854, + "loss": 0.6155407428741455, + "loss_ce": 0.00030632468406111, + "loss_iou": 0.2578125, + "loss_num": 0.020263671875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 100460528, + "step": 1793 + }, + { + "epoch": 3.9955456570155903, + "grad_norm": 17.373069763183594, + "learning_rate": 1e-06, + "loss": 0.6237, + "num_input_tokens_seen": 100518164, + "step": 1794 + }, + { + "epoch": 3.9955456570155903, + "loss": 0.748991847038269, + "loss_ce": 0.00045668811071664095, + "loss_iou": 0.3125, + "loss_num": 0.024658203125, + "loss_xval": 0.75, + "num_input_tokens_seen": 100518164, + "step": 1794 + }, + { + "epoch": 3.997772828507795, + "grad_norm": 18.933780670166016, + "learning_rate": 1e-06, + "loss": 0.7479, + "num_input_tokens_seen": 100576824, + "step": 1795 + }, + { + "epoch": 3.997772828507795, + "loss": 0.8931920528411865, + "loss_ce": 0.0003698251966852695, + "loss_iou": 0.390625, + "loss_num": 0.0220947265625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 100576824, + "step": 1795 + }, + { + "epoch": 4.0, + "grad_norm": 18.814178466796875, + "learning_rate": 1e-06, + "loss": 0.8102, + "num_input_tokens_seen": 100631512, + "step": 1796 + }, + { + "epoch": 4.0, + "loss": 0.976672887802124, + "loss_ce": 0.0003544443752616644, + "loss_iou": 0.419921875, + "loss_num": 0.0274658203125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 100631512, + "step": 1796 + }, + { + "epoch": 4.002227171492205, + "grad_norm": 18.784257888793945, + "learning_rate": 1e-06, + "loss": 1.0043, + "num_input_tokens_seen": 100688068, + "step": 1797 + }, + { + "epoch": 4.002227171492205, + "loss": 1.069665551185608, + "loss_ce": 0.00032957567600533366, + "loss_iou": 0.431640625, + "loss_num": 0.041015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 100688068, + "step": 1797 + }, + { + "epoch": 4.00445434298441, + "grad_norm": 15.86024284362793, + "learning_rate": 1e-06, + "loss": 0.9139, + "num_input_tokens_seen": 100740940, + "step": 1798 + }, + { + "epoch": 4.00445434298441, + "loss": 0.6887216567993164, + "loss_ce": 0.00024506475892849267, + "loss_iou": 0.302734375, + "loss_num": 0.0166015625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 100740940, + "step": 1798 + }, + { + "epoch": 4.006681514476615, + "grad_norm": 41.829795837402344, + "learning_rate": 1e-06, + "loss": 0.7138, + "num_input_tokens_seen": 100797540, + "step": 1799 + }, + { + "epoch": 4.006681514476615, + "loss": 0.5916675329208374, + "loss_ce": 0.00035895261680707335, + "loss_iou": 0.248046875, + "loss_num": 0.0191650390625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 100797540, + "step": 1799 + }, + { + "epoch": 4.008908685968819, + "grad_norm": 15.94815731048584, + "learning_rate": 1e-06, + "loss": 0.8192, + "num_input_tokens_seen": 100852428, + "step": 1800 + }, + { + "epoch": 4.008908685968819, + "loss": 0.628261148929596, + "loss_ce": 0.000331463961629197, + "loss_iou": 0.259765625, + "loss_num": 0.0218505859375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 100852428, + "step": 1800 + }, + { + "epoch": 4.011135857461024, + "grad_norm": 17.08213233947754, + "learning_rate": 1e-06, + "loss": 0.6745, + "num_input_tokens_seen": 100909988, + "step": 1801 + }, + { + "epoch": 4.011135857461024, + "loss": 0.49927961826324463, + "loss_ce": 0.00025619484949856997, + "loss_iou": 0.2138671875, + "loss_num": 0.0142822265625, + "loss_xval": 0.5, + "num_input_tokens_seen": 100909988, + "step": 1801 + }, + { + "epoch": 4.013363028953229, + "grad_norm": 15.652112007141113, + "learning_rate": 1e-06, + "loss": 0.7803, + "num_input_tokens_seen": 100967540, + "step": 1802 + }, + { + "epoch": 4.013363028953229, + "loss": 0.6135426759719849, + "loss_ce": 0.0002614644472487271, + "loss_iou": 0.26171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 100967540, + "step": 1802 + }, + { + "epoch": 4.015590200445434, + "grad_norm": 18.24171257019043, + "learning_rate": 1e-06, + "loss": 0.9651, + "num_input_tokens_seen": 101023900, + "step": 1803 + }, + { + "epoch": 4.015590200445434, + "loss": 0.9055318832397461, + "loss_ce": 0.00025842676404863596, + "loss_iou": 0.376953125, + "loss_num": 0.030029296875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 101023900, + "step": 1803 + }, + { + "epoch": 4.017817371937639, + "grad_norm": 26.98381996154785, + "learning_rate": 1e-06, + "loss": 0.9335, + "num_input_tokens_seen": 101081336, + "step": 1804 + }, + { + "epoch": 4.017817371937639, + "loss": 0.942715048789978, + "loss_ce": 0.00033223340869881213, + "loss_iou": 0.3828125, + "loss_num": 0.03564453125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 101081336, + "step": 1804 + }, + { + "epoch": 4.020044543429844, + "grad_norm": 14.921833038330078, + "learning_rate": 1e-06, + "loss": 0.8849, + "num_input_tokens_seen": 101139376, + "step": 1805 + }, + { + "epoch": 4.020044543429844, + "loss": 1.0176324844360352, + "loss_ce": 0.0007868300890550017, + "loss_iou": 0.44921875, + "loss_num": 0.024169921875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 101139376, + "step": 1805 + }, + { + "epoch": 4.022271714922049, + "grad_norm": 53.253265380859375, + "learning_rate": 1e-06, + "loss": 0.6965, + "num_input_tokens_seen": 101197336, + "step": 1806 + }, + { + "epoch": 4.022271714922049, + "loss": 0.5695779323577881, + "loss_ce": 0.00024200681946240366, + "loss_iou": 0.24609375, + "loss_num": 0.0155029296875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 101197336, + "step": 1806 + }, + { + "epoch": 4.0244988864142535, + "grad_norm": 12.943678855895996, + "learning_rate": 1e-06, + "loss": 0.6534, + "num_input_tokens_seen": 101253816, + "step": 1807 + }, + { + "epoch": 4.0244988864142535, + "loss": 0.6627930402755737, + "loss_ce": 0.00043953658314421773, + "loss_iou": 0.26953125, + "loss_num": 0.0250244140625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 101253816, + "step": 1807 + }, + { + "epoch": 4.026726057906459, + "grad_norm": 45.154788970947266, + "learning_rate": 1e-06, + "loss": 0.9206, + "num_input_tokens_seen": 101307840, + "step": 1808 + }, + { + "epoch": 4.026726057906459, + "loss": 0.8648847341537476, + "loss_ce": 0.0002606944181025028, + "loss_iou": 0.34375, + "loss_num": 0.03564453125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 101307840, + "step": 1808 + }, + { + "epoch": 4.028953229398664, + "grad_norm": 18.014551162719727, + "learning_rate": 1e-06, + "loss": 0.7334, + "num_input_tokens_seen": 101363116, + "step": 1809 + }, + { + "epoch": 4.028953229398664, + "loss": 0.5737777352333069, + "loss_ce": 0.0002914007636718452, + "loss_iou": 0.255859375, + "loss_num": 0.0126953125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 101363116, + "step": 1809 + }, + { + "epoch": 4.031180400890869, + "grad_norm": 18.99349021911621, + "learning_rate": 1e-06, + "loss": 0.8291, + "num_input_tokens_seen": 101419912, + "step": 1810 + }, + { + "epoch": 4.031180400890869, + "loss": 0.7891653180122375, + "loss_ce": 0.0003469590446911752, + "loss_iou": 0.30859375, + "loss_num": 0.034423828125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 101419912, + "step": 1810 + }, + { + "epoch": 4.033407572383074, + "grad_norm": 19.479049682617188, + "learning_rate": 1e-06, + "loss": 0.6363, + "num_input_tokens_seen": 101478732, + "step": 1811 + }, + { + "epoch": 4.033407572383074, + "loss": 0.5214917659759521, + "loss_ce": 0.0002515409141778946, + "loss_iou": 0.2236328125, + "loss_num": 0.0146484375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 101478732, + "step": 1811 + }, + { + "epoch": 4.035634743875279, + "grad_norm": 21.70351791381836, + "learning_rate": 1e-06, + "loss": 0.8533, + "num_input_tokens_seen": 101536500, + "step": 1812 + }, + { + "epoch": 4.035634743875279, + "loss": 0.8468989133834839, + "loss_ce": 0.0003413098747842014, + "loss_iou": 0.365234375, + "loss_num": 0.0234375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 101536500, + "step": 1812 + }, + { + "epoch": 4.0378619153674835, + "grad_norm": 15.182201385498047, + "learning_rate": 1e-06, + "loss": 0.9404, + "num_input_tokens_seen": 101592900, + "step": 1813 + }, + { + "epoch": 4.0378619153674835, + "loss": 1.334275484085083, + "loss_ce": 0.00029111921321600676, + "loss_iou": 0.59375, + "loss_num": 0.030029296875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 101592900, + "step": 1813 + }, + { + "epoch": 4.040089086859688, + "grad_norm": 18.792926788330078, + "learning_rate": 1e-06, + "loss": 0.7653, + "num_input_tokens_seen": 101648332, + "step": 1814 + }, + { + "epoch": 4.040089086859688, + "loss": 0.7761302590370178, + "loss_ce": 0.0002513654180802405, + "loss_iou": 0.328125, + "loss_num": 0.0245361328125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 101648332, + "step": 1814 + }, + { + "epoch": 4.042316258351893, + "grad_norm": 22.391029357910156, + "learning_rate": 1e-06, + "loss": 0.7972, + "num_input_tokens_seen": 101702948, + "step": 1815 + }, + { + "epoch": 4.042316258351893, + "loss": 0.9708037376403809, + "loss_ce": 0.00034468824742361903, + "loss_iou": 0.427734375, + "loss_num": 0.0230712890625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 101702948, + "step": 1815 + }, + { + "epoch": 4.044543429844098, + "grad_norm": 19.350894927978516, + "learning_rate": 1e-06, + "loss": 0.8455, + "num_input_tokens_seen": 101756712, + "step": 1816 + }, + { + "epoch": 4.044543429844098, + "loss": 0.6643034219741821, + "loss_ce": 0.00024090104852803051, + "loss_iou": 0.275390625, + "loss_num": 0.0228271484375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 101756712, + "step": 1816 + }, + { + "epoch": 4.046770601336303, + "grad_norm": 18.292842864990234, + "learning_rate": 1e-06, + "loss": 0.8264, + "num_input_tokens_seen": 101812980, + "step": 1817 + }, + { + "epoch": 4.046770601336303, + "loss": 0.9234464168548584, + "loss_ce": 0.00035073645994998515, + "loss_iou": 0.392578125, + "loss_num": 0.027587890625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 101812980, + "step": 1817 + }, + { + "epoch": 4.048997772828508, + "grad_norm": 19.12327003479004, + "learning_rate": 1e-06, + "loss": 0.7468, + "num_input_tokens_seen": 101868528, + "step": 1818 + }, + { + "epoch": 4.048997772828508, + "loss": 0.8360873460769653, + "loss_ce": 0.0003940344322472811, + "loss_iou": 0.349609375, + "loss_num": 0.02734375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 101868528, + "step": 1818 + }, + { + "epoch": 4.051224944320713, + "grad_norm": 15.702198028564453, + "learning_rate": 1e-06, + "loss": 0.6871, + "num_input_tokens_seen": 101923832, + "step": 1819 + }, + { + "epoch": 4.051224944320713, + "loss": 0.7370009422302246, + "loss_ce": 0.0014051980106160045, + "loss_iou": 0.30859375, + "loss_num": 0.0234375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 101923832, + "step": 1819 + }, + { + "epoch": 4.0534521158129175, + "grad_norm": 21.061260223388672, + "learning_rate": 1e-06, + "loss": 0.7905, + "num_input_tokens_seen": 101978472, + "step": 1820 + }, + { + "epoch": 4.0534521158129175, + "loss": 0.8799552917480469, + "loss_ce": 0.0006828849436715245, + "loss_iou": 0.353515625, + "loss_num": 0.034423828125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 101978472, + "step": 1820 + }, + { + "epoch": 4.055679287305122, + "grad_norm": 23.500896453857422, + "learning_rate": 1e-06, + "loss": 0.9895, + "num_input_tokens_seen": 102035200, + "step": 1821 + }, + { + "epoch": 4.055679287305122, + "loss": 0.9768585562705994, + "loss_ce": 0.0002960490819532424, + "loss_iou": 0.40234375, + "loss_num": 0.034423828125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 102035200, + "step": 1821 + }, + { + "epoch": 4.057906458797327, + "grad_norm": 16.685222625732422, + "learning_rate": 1e-06, + "loss": 0.7959, + "num_input_tokens_seen": 102088752, + "step": 1822 + }, + { + "epoch": 4.057906458797327, + "loss": 0.8308522701263428, + "loss_ce": 0.0002858432417269796, + "loss_iou": 0.34375, + "loss_num": 0.0283203125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 102088752, + "step": 1822 + }, + { + "epoch": 4.060133630289532, + "grad_norm": 36.30082702636719, + "learning_rate": 1e-06, + "loss": 0.7506, + "num_input_tokens_seen": 102139624, + "step": 1823 + }, + { + "epoch": 4.060133630289532, + "loss": 0.8599013090133667, + "loss_ce": 0.0002821852976921946, + "loss_iou": 0.375, + "loss_num": 0.02197265625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 102139624, + "step": 1823 + }, + { + "epoch": 4.062360801781737, + "grad_norm": 17.49234962463379, + "learning_rate": 1e-06, + "loss": 0.7182, + "num_input_tokens_seen": 102196280, + "step": 1824 + }, + { + "epoch": 4.062360801781737, + "loss": 0.7073689103126526, + "loss_ce": 0.0003376654349267483, + "loss_iou": 0.3125, + "loss_num": 0.016845703125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 102196280, + "step": 1824 + }, + { + "epoch": 4.064587973273942, + "grad_norm": 23.85994529724121, + "learning_rate": 1e-06, + "loss": 0.9767, + "num_input_tokens_seen": 102252520, + "step": 1825 + }, + { + "epoch": 4.064587973273942, + "loss": 1.1830354928970337, + "loss_ce": 0.0002961636637337506, + "loss_iou": 0.482421875, + "loss_num": 0.043212890625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 102252520, + "step": 1825 + }, + { + "epoch": 4.066815144766147, + "grad_norm": 15.231406211853027, + "learning_rate": 1e-06, + "loss": 0.7989, + "num_input_tokens_seen": 102308852, + "step": 1826 + }, + { + "epoch": 4.066815144766147, + "loss": 0.7088180780410767, + "loss_ce": 0.0003219395875930786, + "loss_iou": 0.291015625, + "loss_num": 0.02490234375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 102308852, + "step": 1826 + }, + { + "epoch": 4.0690423162583516, + "grad_norm": 20.627899169921875, + "learning_rate": 1e-06, + "loss": 0.8451, + "num_input_tokens_seen": 102363256, + "step": 1827 + }, + { + "epoch": 4.0690423162583516, + "loss": 0.43590307235717773, + "loss_ce": 0.0003562027995940298, + "loss_iou": 0.177734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 102363256, + "step": 1827 + }, + { + "epoch": 4.071269487750556, + "grad_norm": 15.15736198425293, + "learning_rate": 1e-06, + "loss": 0.8284, + "num_input_tokens_seen": 102417712, + "step": 1828 + }, + { + "epoch": 4.071269487750556, + "loss": 0.876508355140686, + "loss_ce": 0.0002876708167605102, + "loss_iou": 0.390625, + "loss_num": 0.01904296875, + "loss_xval": 0.875, + "num_input_tokens_seen": 102417712, + "step": 1828 + }, + { + "epoch": 4.073496659242761, + "grad_norm": 16.91136360168457, + "learning_rate": 1e-06, + "loss": 0.7477, + "num_input_tokens_seen": 102476200, + "step": 1829 + }, + { + "epoch": 4.073496659242761, + "loss": 0.862647294998169, + "loss_ce": 0.0003425973991397768, + "loss_iou": 0.3828125, + "loss_num": 0.0196533203125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 102476200, + "step": 1829 + }, + { + "epoch": 4.075723830734967, + "grad_norm": 21.360063552856445, + "learning_rate": 1e-06, + "loss": 0.6998, + "num_input_tokens_seen": 102533876, + "step": 1830 + }, + { + "epoch": 4.075723830734967, + "loss": 0.8524512052536011, + "loss_ce": 0.0004004453949164599, + "loss_iou": 0.35546875, + "loss_num": 0.0284423828125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 102533876, + "step": 1830 + }, + { + "epoch": 4.077951002227172, + "grad_norm": 18.112483978271484, + "learning_rate": 1e-06, + "loss": 0.6848, + "num_input_tokens_seen": 102590972, + "step": 1831 + }, + { + "epoch": 4.077951002227172, + "loss": 0.6477828621864319, + "loss_ce": 0.0003219213103875518, + "loss_iou": 0.279296875, + "loss_num": 0.0177001953125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 102590972, + "step": 1831 + }, + { + "epoch": 4.080178173719377, + "grad_norm": 20.459156036376953, + "learning_rate": 1e-06, + "loss": 0.8724, + "num_input_tokens_seen": 102648508, + "step": 1832 + }, + { + "epoch": 4.080178173719377, + "loss": 0.8328180909156799, + "loss_ce": 0.00029852997977286577, + "loss_iou": 0.345703125, + "loss_num": 0.0281982421875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 102648508, + "step": 1832 + }, + { + "epoch": 4.082405345211582, + "grad_norm": 21.345972061157227, + "learning_rate": 1e-06, + "loss": 0.7649, + "num_input_tokens_seen": 102705228, + "step": 1833 + }, + { + "epoch": 4.082405345211582, + "loss": 0.7076017260551453, + "loss_ce": 0.0003263298131059855, + "loss_iou": 0.29296875, + "loss_num": 0.0238037109375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 102705228, + "step": 1833 + }, + { + "epoch": 4.0846325167037865, + "grad_norm": 17.683818817138672, + "learning_rate": 1e-06, + "loss": 0.8536, + "num_input_tokens_seen": 102761536, + "step": 1834 + }, + { + "epoch": 4.0846325167037865, + "loss": 0.8223095536231995, + "loss_ce": 0.00028804835164919496, + "loss_iou": 0.341796875, + "loss_num": 0.027099609375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 102761536, + "step": 1834 + }, + { + "epoch": 4.086859688195991, + "grad_norm": 20.6676082611084, + "learning_rate": 1e-06, + "loss": 0.9164, + "num_input_tokens_seen": 102818308, + "step": 1835 + }, + { + "epoch": 4.086859688195991, + "loss": 0.900152325630188, + "loss_ce": 0.0002499477122910321, + "loss_iou": 0.384765625, + "loss_num": 0.0263671875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 102818308, + "step": 1835 + }, + { + "epoch": 4.089086859688196, + "grad_norm": 16.989944458007812, + "learning_rate": 1e-06, + "loss": 0.7689, + "num_input_tokens_seen": 102870992, + "step": 1836 + }, + { + "epoch": 4.089086859688196, + "loss": 0.6965099573135376, + "loss_ce": 0.00022090264246799052, + "loss_iou": 0.2890625, + "loss_num": 0.023681640625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 102870992, + "step": 1836 + }, + { + "epoch": 4.091314031180401, + "grad_norm": 18.3848934173584, + "learning_rate": 1e-06, + "loss": 0.9445, + "num_input_tokens_seen": 102925516, + "step": 1837 + }, + { + "epoch": 4.091314031180401, + "loss": 0.8509833812713623, + "loss_ce": 0.00027541263261809945, + "loss_iou": 0.345703125, + "loss_num": 0.03173828125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 102925516, + "step": 1837 + }, + { + "epoch": 4.093541202672606, + "grad_norm": 19.348962783813477, + "learning_rate": 1e-06, + "loss": 1.0582, + "num_input_tokens_seen": 102980780, + "step": 1838 + }, + { + "epoch": 4.093541202672606, + "loss": 0.901675820350647, + "loss_ce": 0.00030863762367516756, + "loss_iou": 0.39453125, + "loss_num": 0.022705078125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 102980780, + "step": 1838 + }, + { + "epoch": 4.095768374164811, + "grad_norm": 18.351146697998047, + "learning_rate": 1e-06, + "loss": 0.6857, + "num_input_tokens_seen": 103038208, + "step": 1839 + }, + { + "epoch": 4.095768374164811, + "loss": 0.7659989595413208, + "loss_ce": 0.00025187875144183636, + "loss_iou": 0.310546875, + "loss_num": 0.02880859375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 103038208, + "step": 1839 + }, + { + "epoch": 4.097995545657016, + "grad_norm": 15.127579689025879, + "learning_rate": 1e-06, + "loss": 0.8049, + "num_input_tokens_seen": 103090740, + "step": 1840 + }, + { + "epoch": 4.097995545657016, + "loss": 0.9890508055686951, + "loss_ce": 0.0002812229795381427, + "loss_iou": 0.439453125, + "loss_num": 0.022216796875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 103090740, + "step": 1840 + }, + { + "epoch": 4.1002227171492205, + "grad_norm": 25.431516647338867, + "learning_rate": 1e-06, + "loss": 0.7564, + "num_input_tokens_seen": 103147724, + "step": 1841 + }, + { + "epoch": 4.1002227171492205, + "loss": 0.5847120881080627, + "loss_ce": 0.00023942730331327766, + "loss_iou": 0.25390625, + "loss_num": 0.0155029296875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 103147724, + "step": 1841 + }, + { + "epoch": 4.102449888641425, + "grad_norm": 14.01602554321289, + "learning_rate": 1e-06, + "loss": 0.6801, + "num_input_tokens_seen": 103206520, + "step": 1842 + }, + { + "epoch": 4.102449888641425, + "loss": 0.661648690700531, + "loss_ce": 0.004055918660014868, + "loss_iou": 0.27734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 103206520, + "step": 1842 + }, + { + "epoch": 4.10467706013363, + "grad_norm": 30.822895050048828, + "learning_rate": 1e-06, + "loss": 0.8477, + "num_input_tokens_seen": 103260592, + "step": 1843 + }, + { + "epoch": 4.10467706013363, + "loss": 1.0794177055358887, + "loss_ce": 0.0003160441410727799, + "loss_iou": 0.48046875, + "loss_num": 0.023681640625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 103260592, + "step": 1843 + }, + { + "epoch": 4.106904231625835, + "grad_norm": 19.740049362182617, + "learning_rate": 1e-06, + "loss": 0.8343, + "num_input_tokens_seen": 103318044, + "step": 1844 + }, + { + "epoch": 4.106904231625835, + "loss": 0.7112168669700623, + "loss_ce": 0.00027938373386859894, + "loss_iou": 0.3125, + "loss_num": 0.01708984375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 103318044, + "step": 1844 + }, + { + "epoch": 4.10913140311804, + "grad_norm": 23.76945686340332, + "learning_rate": 1e-06, + "loss": 0.5955, + "num_input_tokens_seen": 103371804, + "step": 1845 + }, + { + "epoch": 4.10913140311804, + "loss": 0.5928046107292175, + "loss_ce": 0.00027534179389476776, + "loss_iou": 0.240234375, + "loss_num": 0.0224609375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 103371804, + "step": 1845 + }, + { + "epoch": 4.111358574610245, + "grad_norm": 19.958959579467773, + "learning_rate": 1e-06, + "loss": 0.7486, + "num_input_tokens_seen": 103428372, + "step": 1846 + }, + { + "epoch": 4.111358574610245, + "loss": 0.8037309646606445, + "loss_ce": 0.00026413900195620954, + "loss_iou": 0.375, + "loss_num": 0.0108642578125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 103428372, + "step": 1846 + }, + { + "epoch": 4.11358574610245, + "grad_norm": 15.336278915405273, + "learning_rate": 1e-06, + "loss": 0.6876, + "num_input_tokens_seen": 103484976, + "step": 1847 + }, + { + "epoch": 4.11358574610245, + "loss": 0.7304455637931824, + "loss_ce": 0.0002209786616731435, + "loss_iou": 0.302734375, + "loss_num": 0.02490234375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 103484976, + "step": 1847 + }, + { + "epoch": 4.1158129175946545, + "grad_norm": 13.736832618713379, + "learning_rate": 1e-06, + "loss": 0.8494, + "num_input_tokens_seen": 103542500, + "step": 1848 + }, + { + "epoch": 4.1158129175946545, + "loss": 0.8857908248901367, + "loss_ce": 0.00041487094131298363, + "loss_iou": 0.35546875, + "loss_num": 0.034912109375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 103542500, + "step": 1848 + }, + { + "epoch": 4.118040089086859, + "grad_norm": 18.131500244140625, + "learning_rate": 1e-06, + "loss": 0.9464, + "num_input_tokens_seen": 103597080, + "step": 1849 + }, + { + "epoch": 4.118040089086859, + "loss": 0.9518570899963379, + "loss_ce": 0.0003189902927260846, + "loss_iou": 0.38671875, + "loss_num": 0.035888671875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 103597080, + "step": 1849 + }, + { + "epoch": 4.120267260579064, + "grad_norm": 18.16317367553711, + "learning_rate": 1e-06, + "loss": 1.0965, + "num_input_tokens_seen": 103651008, + "step": 1850 + }, + { + "epoch": 4.120267260579064, + "loss": 1.1296725273132324, + "loss_ce": 0.00027801876422017813, + "loss_iou": 0.458984375, + "loss_num": 0.042236328125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 103651008, + "step": 1850 + }, + { + "epoch": 4.122494432071269, + "grad_norm": 29.750585556030273, + "learning_rate": 1e-06, + "loss": 0.8961, + "num_input_tokens_seen": 103709900, + "step": 1851 + }, + { + "epoch": 4.122494432071269, + "loss": 0.8951970934867859, + "loss_ce": 0.00042168618529103696, + "loss_iou": 0.373046875, + "loss_num": 0.0296630859375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 103709900, + "step": 1851 + }, + { + "epoch": 4.124721603563474, + "grad_norm": 27.113773345947266, + "learning_rate": 1e-06, + "loss": 0.6256, + "num_input_tokens_seen": 103766964, + "step": 1852 + }, + { + "epoch": 4.124721603563474, + "loss": 0.6550110578536987, + "loss_ce": 0.00022586948762182146, + "loss_iou": 0.298828125, + "loss_num": 0.01153564453125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 103766964, + "step": 1852 + }, + { + "epoch": 4.12694877505568, + "grad_norm": 15.007980346679688, + "learning_rate": 1e-06, + "loss": 1.0075, + "num_input_tokens_seen": 103821128, + "step": 1853 + }, + { + "epoch": 4.12694877505568, + "loss": 0.9304366707801819, + "loss_ce": 0.00026090393657796085, + "loss_iou": 0.3984375, + "loss_num": 0.0267333984375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 103821128, + "step": 1853 + }, + { + "epoch": 4.129175946547885, + "grad_norm": 16.54915428161621, + "learning_rate": 1e-06, + "loss": 0.8283, + "num_input_tokens_seen": 103874556, + "step": 1854 + }, + { + "epoch": 4.129175946547885, + "loss": 0.6098688840866089, + "loss_ce": 0.00024977908469736576, + "loss_iou": 0.25390625, + "loss_num": 0.0201416015625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 103874556, + "step": 1854 + }, + { + "epoch": 4.131403118040089, + "grad_norm": 18.67920684814453, + "learning_rate": 1e-06, + "loss": 0.8078, + "num_input_tokens_seen": 103929176, + "step": 1855 + }, + { + "epoch": 4.131403118040089, + "loss": 0.6376897692680359, + "loss_ce": 0.00023860471264924854, + "loss_iou": 0.28515625, + "loss_num": 0.01318359375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 103929176, + "step": 1855 + }, + { + "epoch": 4.133630289532294, + "grad_norm": 14.864916801452637, + "learning_rate": 1e-06, + "loss": 0.7531, + "num_input_tokens_seen": 103985216, + "step": 1856 + }, + { + "epoch": 4.133630289532294, + "loss": 0.606116533279419, + "loss_ce": 0.0002816014748532325, + "loss_iou": 0.244140625, + "loss_num": 0.023681640625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 103985216, + "step": 1856 + }, + { + "epoch": 4.135857461024499, + "grad_norm": 15.250370025634766, + "learning_rate": 1e-06, + "loss": 0.7684, + "num_input_tokens_seen": 104043664, + "step": 1857 + }, + { + "epoch": 4.135857461024499, + "loss": 0.810298502445221, + "loss_ce": 0.00023990226327441633, + "loss_iou": 0.337890625, + "loss_num": 0.02734375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 104043664, + "step": 1857 + }, + { + "epoch": 4.138084632516704, + "grad_norm": 33.826820373535156, + "learning_rate": 1e-06, + "loss": 0.6527, + "num_input_tokens_seen": 104101224, + "step": 1858 + }, + { + "epoch": 4.138084632516704, + "loss": 0.5385033488273621, + "loss_ce": 0.00029532582266256213, + "loss_iou": 0.23046875, + "loss_num": 0.01519775390625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 104101224, + "step": 1858 + }, + { + "epoch": 4.140311804008909, + "grad_norm": 26.74302101135254, + "learning_rate": 1e-06, + "loss": 0.7609, + "num_input_tokens_seen": 104159992, + "step": 1859 + }, + { + "epoch": 4.140311804008909, + "loss": 0.7529861927032471, + "loss_ce": 0.00030061625875532627, + "loss_iou": 0.32421875, + "loss_num": 0.0208740234375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 104159992, + "step": 1859 + }, + { + "epoch": 4.142538975501114, + "grad_norm": 16.13300895690918, + "learning_rate": 1e-06, + "loss": 0.8149, + "num_input_tokens_seen": 104217472, + "step": 1860 + }, + { + "epoch": 4.142538975501114, + "loss": 0.7612708806991577, + "loss_ce": 0.00028455047868192196, + "loss_iou": 0.328125, + "loss_num": 0.02099609375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 104217472, + "step": 1860 + }, + { + "epoch": 4.144766146993319, + "grad_norm": 14.439784049987793, + "learning_rate": 1e-06, + "loss": 0.7813, + "num_input_tokens_seen": 104274900, + "step": 1861 + }, + { + "epoch": 4.144766146993319, + "loss": 0.7901536226272583, + "loss_ce": 0.0003587682731449604, + "loss_iou": 0.302734375, + "loss_num": 0.037109375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 104274900, + "step": 1861 + }, + { + "epoch": 4.146993318485523, + "grad_norm": 15.760991096496582, + "learning_rate": 1e-06, + "loss": 0.9137, + "num_input_tokens_seen": 104330380, + "step": 1862 + }, + { + "epoch": 4.146993318485523, + "loss": 0.831478476524353, + "loss_ce": 0.0004237755201756954, + "loss_iou": 0.33203125, + "loss_num": 0.033203125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 104330380, + "step": 1862 + }, + { + "epoch": 4.149220489977728, + "grad_norm": 16.417325973510742, + "learning_rate": 1e-06, + "loss": 0.796, + "num_input_tokens_seen": 104386476, + "step": 1863 + }, + { + "epoch": 4.149220489977728, + "loss": 0.8149293065071106, + "loss_ce": 0.0002320479543413967, + "loss_iou": 0.365234375, + "loss_num": 0.0169677734375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 104386476, + "step": 1863 + }, + { + "epoch": 4.151447661469933, + "grad_norm": 26.965940475463867, + "learning_rate": 1e-06, + "loss": 0.6169, + "num_input_tokens_seen": 104442536, + "step": 1864 + }, + { + "epoch": 4.151447661469933, + "loss": 0.6312210559844971, + "loss_ce": 0.00023957040684763342, + "loss_iou": 0.275390625, + "loss_num": 0.0159912109375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 104442536, + "step": 1864 + }, + { + "epoch": 4.153674832962138, + "grad_norm": 26.452974319458008, + "learning_rate": 1e-06, + "loss": 0.8482, + "num_input_tokens_seen": 104497928, + "step": 1865 + }, + { + "epoch": 4.153674832962138, + "loss": 0.9214756488800049, + "loss_ce": 0.00033305271062999964, + "loss_iou": 0.388671875, + "loss_num": 0.0289306640625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 104497928, + "step": 1865 + }, + { + "epoch": 4.155902004454343, + "grad_norm": 22.56464385986328, + "learning_rate": 1e-06, + "loss": 0.807, + "num_input_tokens_seen": 104553604, + "step": 1866 + }, + { + "epoch": 4.155902004454343, + "loss": 0.9364668130874634, + "loss_ce": 0.00030959240393713117, + "loss_iou": 0.36328125, + "loss_num": 0.042236328125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 104553604, + "step": 1866 + }, + { + "epoch": 4.158129175946548, + "grad_norm": 16.754383087158203, + "learning_rate": 1e-06, + "loss": 0.7331, + "num_input_tokens_seen": 104609096, + "step": 1867 + }, + { + "epoch": 4.158129175946548, + "loss": 0.7379820942878723, + "loss_ce": 0.0003112004487775266, + "loss_iou": 0.30859375, + "loss_num": 0.0244140625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 104609096, + "step": 1867 + }, + { + "epoch": 4.160356347438753, + "grad_norm": 21.52208709716797, + "learning_rate": 1e-06, + "loss": 0.9237, + "num_input_tokens_seen": 104662120, + "step": 1868 + }, + { + "epoch": 4.160356347438753, + "loss": 0.9280723333358765, + "loss_ce": 0.00033793269540183246, + "loss_iou": 0.37109375, + "loss_num": 0.036865234375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 104662120, + "step": 1868 + }, + { + "epoch": 4.1625835189309575, + "grad_norm": 21.401424407958984, + "learning_rate": 1e-06, + "loss": 0.6609, + "num_input_tokens_seen": 104720356, + "step": 1869 + }, + { + "epoch": 4.1625835189309575, + "loss": 0.6265453100204468, + "loss_ce": 0.00032457264023832977, + "loss_iou": 0.27734375, + "loss_num": 0.01416015625, + "loss_xval": 0.625, + "num_input_tokens_seen": 104720356, + "step": 1869 + }, + { + "epoch": 4.164810690423162, + "grad_norm": 20.714725494384766, + "learning_rate": 1e-06, + "loss": 0.8871, + "num_input_tokens_seen": 104777020, + "step": 1870 + }, + { + "epoch": 4.164810690423162, + "loss": 0.9072417616844177, + "loss_ce": 0.00038138843956403434, + "loss_iou": 0.3984375, + "loss_num": 0.0220947265625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 104777020, + "step": 1870 + }, + { + "epoch": 4.167037861915367, + "grad_norm": 19.34101104736328, + "learning_rate": 1e-06, + "loss": 0.9844, + "num_input_tokens_seen": 104832044, + "step": 1871 + }, + { + "epoch": 4.167037861915367, + "loss": 1.2446290254592896, + "loss_ce": 0.0004884034278802574, + "loss_iou": 0.5234375, + "loss_num": 0.038818359375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 104832044, + "step": 1871 + }, + { + "epoch": 4.169265033407572, + "grad_norm": 22.785999298095703, + "learning_rate": 1e-06, + "loss": 0.7377, + "num_input_tokens_seen": 104889672, + "step": 1872 + }, + { + "epoch": 4.169265033407572, + "loss": 0.5561378002166748, + "loss_ce": 0.0002295885351486504, + "loss_iou": 0.224609375, + "loss_num": 0.021484375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 104889672, + "step": 1872 + }, + { + "epoch": 4.171492204899777, + "grad_norm": 24.524354934692383, + "learning_rate": 1e-06, + "loss": 0.7842, + "num_input_tokens_seen": 104946972, + "step": 1873 + }, + { + "epoch": 4.171492204899777, + "loss": 0.7845892906188965, + "loss_ce": 0.0010199534008279443, + "loss_iou": 0.314453125, + "loss_num": 0.0311279296875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 104946972, + "step": 1873 + }, + { + "epoch": 4.173719376391982, + "grad_norm": 21.119625091552734, + "learning_rate": 1e-06, + "loss": 0.7332, + "num_input_tokens_seen": 105001336, + "step": 1874 + }, + { + "epoch": 4.173719376391982, + "loss": 0.803102970123291, + "loss_ce": 0.00036857047234661877, + "loss_iou": 0.33984375, + "loss_num": 0.024658203125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 105001336, + "step": 1874 + }, + { + "epoch": 4.1759465478841875, + "grad_norm": 21.471126556396484, + "learning_rate": 1e-06, + "loss": 0.9933, + "num_input_tokens_seen": 105056692, + "step": 1875 + }, + { + "epoch": 4.1759465478841875, + "loss": 0.8526608943939209, + "loss_ce": 0.0003660406800918281, + "loss_iou": 0.369140625, + "loss_num": 0.0228271484375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 105056692, + "step": 1875 + }, + { + "epoch": 4.178173719376392, + "grad_norm": 16.687490463256836, + "learning_rate": 1e-06, + "loss": 0.8024, + "num_input_tokens_seen": 105113712, + "step": 1876 + }, + { + "epoch": 4.178173719376392, + "loss": 0.668755054473877, + "loss_ce": 0.0002979860291816294, + "loss_iou": 0.28515625, + "loss_num": 0.01953125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 105113712, + "step": 1876 + }, + { + "epoch": 4.180400890868597, + "grad_norm": 13.53281021118164, + "learning_rate": 1e-06, + "loss": 0.9623, + "num_input_tokens_seen": 105169732, + "step": 1877 + }, + { + "epoch": 4.180400890868597, + "loss": 0.9789065718650818, + "loss_ce": 0.00026888877619057894, + "loss_iou": 0.3828125, + "loss_num": 0.04248046875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 105169732, + "step": 1877 + }, + { + "epoch": 4.182628062360802, + "grad_norm": 21.907808303833008, + "learning_rate": 1e-06, + "loss": 0.8223, + "num_input_tokens_seen": 105221624, + "step": 1878 + }, + { + "epoch": 4.182628062360802, + "loss": 0.7880837917327881, + "loss_ce": 0.00024200028565246612, + "loss_iou": 0.33984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 105221624, + "step": 1878 + }, + { + "epoch": 4.184855233853007, + "grad_norm": 28.09380340576172, + "learning_rate": 1e-06, + "loss": 1.0485, + "num_input_tokens_seen": 105278504, + "step": 1879 + }, + { + "epoch": 4.184855233853007, + "loss": 1.0873863697052002, + "loss_ce": 0.00035019166534766555, + "loss_iou": 0.458984375, + "loss_num": 0.03369140625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 105278504, + "step": 1879 + }, + { + "epoch": 4.187082405345212, + "grad_norm": 28.955883026123047, + "learning_rate": 1e-06, + "loss": 0.7196, + "num_input_tokens_seen": 105335440, + "step": 1880 + }, + { + "epoch": 4.187082405345212, + "loss": 0.7075688242912292, + "loss_ce": 0.0002934132644440979, + "loss_iou": 0.31640625, + "loss_num": 0.01556396484375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 105335440, + "step": 1880 + }, + { + "epoch": 4.189309576837417, + "grad_norm": 16.112701416015625, + "learning_rate": 1e-06, + "loss": 0.9407, + "num_input_tokens_seen": 105391556, + "step": 1881 + }, + { + "epoch": 4.189309576837417, + "loss": 1.010768175125122, + "loss_ce": 0.00027016678359359503, + "loss_iou": 0.365234375, + "loss_num": 0.056396484375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 105391556, + "step": 1881 + }, + { + "epoch": 4.1915367483296215, + "grad_norm": 12.533539772033691, + "learning_rate": 1e-06, + "loss": 0.8191, + "num_input_tokens_seen": 105444616, + "step": 1882 + }, + { + "epoch": 4.1915367483296215, + "loss": 0.994451642036438, + "loss_ce": 0.00031105236848816276, + "loss_iou": 0.431640625, + "loss_num": 0.026123046875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 105444616, + "step": 1882 + }, + { + "epoch": 4.193763919821826, + "grad_norm": 18.672292709350586, + "learning_rate": 1e-06, + "loss": 0.7595, + "num_input_tokens_seen": 105502508, + "step": 1883 + }, + { + "epoch": 4.193763919821826, + "loss": 0.787238359451294, + "loss_ce": 0.00025101928622461855, + "loss_iou": 0.326171875, + "loss_num": 0.026611328125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 105502508, + "step": 1883 + }, + { + "epoch": 4.195991091314031, + "grad_norm": 18.28311538696289, + "learning_rate": 1e-06, + "loss": 0.7337, + "num_input_tokens_seen": 105559380, + "step": 1884 + }, + { + "epoch": 4.195991091314031, + "loss": 0.6529708504676819, + "loss_ce": 0.000993293710052967, + "loss_iou": 0.283203125, + "loss_num": 0.0169677734375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 105559380, + "step": 1884 + }, + { + "epoch": 4.198218262806236, + "grad_norm": 23.031770706176758, + "learning_rate": 1e-06, + "loss": 0.7621, + "num_input_tokens_seen": 105614328, + "step": 1885 + }, + { + "epoch": 4.198218262806236, + "loss": 0.542103111743927, + "loss_ce": 0.00023299809254240245, + "loss_iou": 0.220703125, + "loss_num": 0.0198974609375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 105614328, + "step": 1885 + }, + { + "epoch": 4.200445434298441, + "grad_norm": 23.01361656188965, + "learning_rate": 1e-06, + "loss": 0.7555, + "num_input_tokens_seen": 105670768, + "step": 1886 + }, + { + "epoch": 4.200445434298441, + "loss": 0.7036082744598389, + "loss_ce": 0.00023906549904495478, + "loss_iou": 0.28125, + "loss_num": 0.0281982421875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 105670768, + "step": 1886 + }, + { + "epoch": 4.202672605790646, + "grad_norm": 12.780369758605957, + "learning_rate": 1e-06, + "loss": 0.7383, + "num_input_tokens_seen": 105727652, + "step": 1887 + }, + { + "epoch": 4.202672605790646, + "loss": 0.7529648542404175, + "loss_ce": 0.00027933655655942857, + "loss_iou": 0.330078125, + "loss_num": 0.018310546875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 105727652, + "step": 1887 + }, + { + "epoch": 4.204899777282851, + "grad_norm": 17.18299674987793, + "learning_rate": 1e-06, + "loss": 0.775, + "num_input_tokens_seen": 105784604, + "step": 1888 + }, + { + "epoch": 4.204899777282851, + "loss": 0.6293338537216187, + "loss_ce": 0.0003055296838283539, + "loss_iou": 0.2353515625, + "loss_num": 0.031494140625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 105784604, + "step": 1888 + }, + { + "epoch": 4.2071269487750556, + "grad_norm": 48.9861946105957, + "learning_rate": 1e-06, + "loss": 0.9114, + "num_input_tokens_seen": 105840236, + "step": 1889 + }, + { + "epoch": 4.2071269487750556, + "loss": 1.198516607284546, + "loss_ce": 0.0002743962104432285, + "loss_iou": 0.48828125, + "loss_num": 0.043701171875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 105840236, + "step": 1889 + }, + { + "epoch": 4.20935412026726, + "grad_norm": 18.402477264404297, + "learning_rate": 1e-06, + "loss": 0.6669, + "num_input_tokens_seen": 105895768, + "step": 1890 + }, + { + "epoch": 4.20935412026726, + "loss": 0.6518987417221069, + "loss_ce": 0.0002874084748327732, + "loss_iou": 0.283203125, + "loss_num": 0.017333984375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 105895768, + "step": 1890 + }, + { + "epoch": 4.211581291759465, + "grad_norm": 19.16950035095215, + "learning_rate": 1e-06, + "loss": 0.9874, + "num_input_tokens_seen": 105950604, + "step": 1891 + }, + { + "epoch": 4.211581291759465, + "loss": 1.1375155448913574, + "loss_ce": 0.00030853564385324717, + "loss_iou": 0.462890625, + "loss_num": 0.042236328125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 105950604, + "step": 1891 + }, + { + "epoch": 4.21380846325167, + "grad_norm": 12.357964515686035, + "learning_rate": 1e-06, + "loss": 0.6145, + "num_input_tokens_seen": 106007040, + "step": 1892 + }, + { + "epoch": 4.21380846325167, + "loss": 0.6721287965774536, + "loss_ce": 0.000253824342507869, + "loss_iou": 0.291015625, + "loss_num": 0.0177001953125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 106007040, + "step": 1892 + }, + { + "epoch": 4.216035634743875, + "grad_norm": 16.27143669128418, + "learning_rate": 1e-06, + "loss": 0.9689, + "num_input_tokens_seen": 106063332, + "step": 1893 + }, + { + "epoch": 4.216035634743875, + "loss": 0.9934124946594238, + "loss_ce": 0.00024840355035848916, + "loss_iou": 0.43359375, + "loss_num": 0.0255126953125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 106063332, + "step": 1893 + }, + { + "epoch": 4.21826280623608, + "grad_norm": 14.465191841125488, + "learning_rate": 1e-06, + "loss": 0.8634, + "num_input_tokens_seen": 106119888, + "step": 1894 + }, + { + "epoch": 4.21826280623608, + "loss": 0.752957820892334, + "loss_ce": 0.0002722431090660393, + "loss_iou": 0.3046875, + "loss_num": 0.02880859375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 106119888, + "step": 1894 + }, + { + "epoch": 4.220489977728285, + "grad_norm": 13.513664245605469, + "learning_rate": 1e-06, + "loss": 0.7989, + "num_input_tokens_seen": 106177424, + "step": 1895 + }, + { + "epoch": 4.220489977728285, + "loss": 0.869196891784668, + "loss_ce": 0.00030032347422093153, + "loss_iou": 0.376953125, + "loss_num": 0.022705078125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 106177424, + "step": 1895 + }, + { + "epoch": 4.22271714922049, + "grad_norm": 18.78656005859375, + "learning_rate": 1e-06, + "loss": 0.7666, + "num_input_tokens_seen": 106233892, + "step": 1896 + }, + { + "epoch": 4.22271714922049, + "loss": 0.7184998989105225, + "loss_ce": 0.00023819129273761064, + "loss_iou": 0.32421875, + "loss_num": 0.0142822265625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 106233892, + "step": 1896 + }, + { + "epoch": 4.224944320712694, + "grad_norm": 41.630496978759766, + "learning_rate": 1e-06, + "loss": 0.8828, + "num_input_tokens_seen": 106289772, + "step": 1897 + }, + { + "epoch": 4.224944320712694, + "loss": 0.8243512511253357, + "loss_ce": 0.00037670054007321596, + "loss_iou": 0.35546875, + "loss_num": 0.0230712890625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 106289772, + "step": 1897 + }, + { + "epoch": 4.2271714922049, + "grad_norm": 14.743143081665039, + "learning_rate": 1e-06, + "loss": 0.9489, + "num_input_tokens_seen": 106344316, + "step": 1898 + }, + { + "epoch": 4.2271714922049, + "loss": 0.888018012046814, + "loss_ce": 0.0003226427361369133, + "loss_iou": 0.41015625, + "loss_num": 0.0130615234375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 106344316, + "step": 1898 + }, + { + "epoch": 4.229398663697105, + "grad_norm": 16.215017318725586, + "learning_rate": 1e-06, + "loss": 0.8319, + "num_input_tokens_seen": 106399904, + "step": 1899 + }, + { + "epoch": 4.229398663697105, + "loss": 0.7622650265693665, + "loss_ce": 0.00030216382583603263, + "loss_iou": 0.349609375, + "loss_num": 0.0125732421875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 106399904, + "step": 1899 + }, + { + "epoch": 4.23162583518931, + "grad_norm": 21.640851974487305, + "learning_rate": 1e-06, + "loss": 0.7937, + "num_input_tokens_seen": 106456716, + "step": 1900 + }, + { + "epoch": 4.23162583518931, + "loss": 0.8867244124412537, + "loss_ce": 0.0002498445101082325, + "loss_iou": 0.365234375, + "loss_num": 0.03173828125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 106456716, + "step": 1900 + }, + { + "epoch": 4.233853006681515, + "grad_norm": 45.25251770019531, + "learning_rate": 1e-06, + "loss": 0.9745, + "num_input_tokens_seen": 106514668, + "step": 1901 + }, + { + "epoch": 4.233853006681515, + "loss": 0.8965962529182434, + "loss_ce": 0.0003560082404874265, + "loss_iou": 0.38671875, + "loss_num": 0.0242919921875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 106514668, + "step": 1901 + }, + { + "epoch": 4.23608017817372, + "grad_norm": 23.69890594482422, + "learning_rate": 1e-06, + "loss": 0.6128, + "num_input_tokens_seen": 106570596, + "step": 1902 + }, + { + "epoch": 4.23608017817372, + "loss": 0.7368512153625488, + "loss_ce": 0.0002789643476717174, + "loss_iou": 0.318359375, + "loss_num": 0.0201416015625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 106570596, + "step": 1902 + }, + { + "epoch": 4.2383073496659245, + "grad_norm": 14.473897933959961, + "learning_rate": 1e-06, + "loss": 0.8265, + "num_input_tokens_seen": 106625068, + "step": 1903 + }, + { + "epoch": 4.2383073496659245, + "loss": 0.7507649660110474, + "loss_ce": 0.00027672675787471235, + "loss_iou": 0.310546875, + "loss_num": 0.025634765625, + "loss_xval": 0.75, + "num_input_tokens_seen": 106625068, + "step": 1903 + }, + { + "epoch": 4.240534521158129, + "grad_norm": 15.77379322052002, + "learning_rate": 1e-06, + "loss": 0.651, + "num_input_tokens_seen": 106683920, + "step": 1904 + }, + { + "epoch": 4.240534521158129, + "loss": 0.5201445817947388, + "loss_ce": 0.0002471142797730863, + "loss_iou": 0.2265625, + "loss_num": 0.0133056640625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 106683920, + "step": 1904 + }, + { + "epoch": 4.242761692650334, + "grad_norm": 16.723033905029297, + "learning_rate": 1e-06, + "loss": 0.6274, + "num_input_tokens_seen": 106740932, + "step": 1905 + }, + { + "epoch": 4.242761692650334, + "loss": 0.5735186338424683, + "loss_ce": 0.00027646025409922004, + "loss_iou": 0.2578125, + "loss_num": 0.0115966796875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 106740932, + "step": 1905 + }, + { + "epoch": 4.244988864142539, + "grad_norm": 23.549467086791992, + "learning_rate": 1e-06, + "loss": 0.9234, + "num_input_tokens_seen": 106798536, + "step": 1906 + }, + { + "epoch": 4.244988864142539, + "loss": 0.8834173679351807, + "loss_ce": 0.0006048293435014784, + "loss_iou": 0.33984375, + "loss_num": 0.040771484375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 106798536, + "step": 1906 + }, + { + "epoch": 4.247216035634744, + "grad_norm": 20.045562744140625, + "learning_rate": 1e-06, + "loss": 0.9631, + "num_input_tokens_seen": 106855984, + "step": 1907 + }, + { + "epoch": 4.247216035634744, + "loss": 1.116750955581665, + "loss_ce": 0.0002958837430924177, + "loss_iou": 0.423828125, + "loss_num": 0.053466796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 106855984, + "step": 1907 + }, + { + "epoch": 4.249443207126949, + "grad_norm": 20.793867111206055, + "learning_rate": 1e-06, + "loss": 0.7098, + "num_input_tokens_seen": 106910552, + "step": 1908 + }, + { + "epoch": 4.249443207126949, + "loss": 0.691659152507782, + "loss_ce": 0.0002528924378566444, + "loss_iou": 0.318359375, + "loss_num": 0.01092529296875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 106910552, + "step": 1908 + }, + { + "epoch": 4.251670378619154, + "grad_norm": 31.35016632080078, + "learning_rate": 1e-06, + "loss": 0.8538, + "num_input_tokens_seen": 106966684, + "step": 1909 + }, + { + "epoch": 4.251670378619154, + "loss": 0.7839844226837158, + "loss_ce": 0.000293065735604614, + "loss_iou": 0.34765625, + "loss_num": 0.0174560546875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 106966684, + "step": 1909 + }, + { + "epoch": 4.2538975501113585, + "grad_norm": 34.71610641479492, + "learning_rate": 1e-06, + "loss": 0.6133, + "num_input_tokens_seen": 107025092, + "step": 1910 + }, + { + "epoch": 4.2538975501113585, + "loss": 0.6420915722846985, + "loss_ce": 0.0002459048992022872, + "loss_iou": 0.27734375, + "loss_num": 0.0174560546875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 107025092, + "step": 1910 + }, + { + "epoch": 4.256124721603563, + "grad_norm": 14.96351146697998, + "learning_rate": 1e-06, + "loss": 0.6374, + "num_input_tokens_seen": 107081992, + "step": 1911 + }, + { + "epoch": 4.256124721603563, + "loss": 0.4527207911014557, + "loss_ce": 0.00032822368666529655, + "loss_iou": 0.1943359375, + "loss_num": 0.0125732421875, + "loss_xval": 0.453125, + "num_input_tokens_seen": 107081992, + "step": 1911 + }, + { + "epoch": 4.258351893095768, + "grad_norm": 14.8378267288208, + "learning_rate": 1e-06, + "loss": 0.7082, + "num_input_tokens_seen": 107135848, + "step": 1912 + }, + { + "epoch": 4.258351893095768, + "loss": 0.5522253513336182, + "loss_ce": 0.00022335897665470839, + "loss_iou": 0.21875, + "loss_num": 0.0230712890625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 107135848, + "step": 1912 + }, + { + "epoch": 4.260579064587973, + "grad_norm": 21.59320068359375, + "learning_rate": 1e-06, + "loss": 0.8173, + "num_input_tokens_seen": 107191440, + "step": 1913 + }, + { + "epoch": 4.260579064587973, + "loss": 1.090116024017334, + "loss_ce": 0.0002723358920775354, + "loss_iou": 0.486328125, + "loss_num": 0.0235595703125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 107191440, + "step": 1913 + }, + { + "epoch": 4.262806236080178, + "grad_norm": 16.01348876953125, + "learning_rate": 1e-06, + "loss": 0.8553, + "num_input_tokens_seen": 107247028, + "step": 1914 + }, + { + "epoch": 4.262806236080178, + "loss": 0.773563027381897, + "loss_ce": 0.0002476052031852305, + "loss_iou": 0.296875, + "loss_num": 0.0361328125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 107247028, + "step": 1914 + }, + { + "epoch": 4.265033407572383, + "grad_norm": 16.764503479003906, + "learning_rate": 1e-06, + "loss": 1.0386, + "num_input_tokens_seen": 107301488, + "step": 1915 + }, + { + "epoch": 4.265033407572383, + "loss": 1.2570042610168457, + "loss_ce": 0.0004123870749026537, + "loss_iou": 0.515625, + "loss_num": 0.04443359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 107301488, + "step": 1915 + }, + { + "epoch": 4.267260579064588, + "grad_norm": 17.2139949798584, + "learning_rate": 1e-06, + "loss": 0.7674, + "num_input_tokens_seen": 107357008, + "step": 1916 + }, + { + "epoch": 4.267260579064588, + "loss": 0.8225438594818115, + "loss_ce": 0.0002781503717415035, + "loss_iou": 0.34765625, + "loss_num": 0.0252685546875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 107357008, + "step": 1916 + }, + { + "epoch": 4.2694877505567925, + "grad_norm": 18.4807186126709, + "learning_rate": 1e-06, + "loss": 0.4431, + "num_input_tokens_seen": 107411784, + "step": 1917 + }, + { + "epoch": 4.2694877505567925, + "loss": 0.4435253143310547, + "loss_ce": 0.00041007628897204995, + "loss_iou": 0.1630859375, + "loss_num": 0.0234375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 107411784, + "step": 1917 + }, + { + "epoch": 4.271714922048997, + "grad_norm": 16.725303649902344, + "learning_rate": 1e-06, + "loss": 0.7385, + "num_input_tokens_seen": 107463544, + "step": 1918 + }, + { + "epoch": 4.271714922048997, + "loss": 0.7992796897888184, + "loss_ce": 0.00032942870166152716, + "loss_iou": 0.3515625, + "loss_num": 0.01904296875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 107463544, + "step": 1918 + }, + { + "epoch": 4.273942093541203, + "grad_norm": 17.902894973754883, + "learning_rate": 1e-06, + "loss": 0.6089, + "num_input_tokens_seen": 107518944, + "step": 1919 + }, + { + "epoch": 4.273942093541203, + "loss": 0.7945454120635986, + "loss_ce": 0.0002338896010769531, + "loss_iou": 0.328125, + "loss_num": 0.027587890625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 107518944, + "step": 1919 + }, + { + "epoch": 4.276169265033408, + "grad_norm": 21.469985961914062, + "learning_rate": 1e-06, + "loss": 0.7481, + "num_input_tokens_seen": 107574840, + "step": 1920 + }, + { + "epoch": 4.276169265033408, + "loss": 0.7355300188064575, + "loss_ce": 0.0003004681202583015, + "loss_iou": 0.310546875, + "loss_num": 0.0228271484375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 107574840, + "step": 1920 + }, + { + "epoch": 4.278396436525613, + "grad_norm": 18.499727249145508, + "learning_rate": 1e-06, + "loss": 0.8113, + "num_input_tokens_seen": 107629476, + "step": 1921 + }, + { + "epoch": 4.278396436525613, + "loss": 0.7343944311141968, + "loss_ce": 0.00026357907336205244, + "loss_iou": 0.30859375, + "loss_num": 0.02294921875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 107629476, + "step": 1921 + }, + { + "epoch": 4.280623608017818, + "grad_norm": 25.940811157226562, + "learning_rate": 1e-06, + "loss": 0.7132, + "num_input_tokens_seen": 107686892, + "step": 1922 + }, + { + "epoch": 4.280623608017818, + "loss": 0.6143178343772888, + "loss_ce": 0.0003041609888896346, + "loss_iou": 0.2578125, + "loss_num": 0.0194091796875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 107686892, + "step": 1922 + }, + { + "epoch": 4.282850779510023, + "grad_norm": 21.650733947753906, + "learning_rate": 1e-06, + "loss": 0.726, + "num_input_tokens_seen": 107743916, + "step": 1923 + }, + { + "epoch": 4.282850779510023, + "loss": 0.6625887751579285, + "loss_ce": 0.00023527286248281598, + "loss_iou": 0.27734375, + "loss_num": 0.02197265625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 107743916, + "step": 1923 + }, + { + "epoch": 4.285077951002227, + "grad_norm": 15.383871078491211, + "learning_rate": 1e-06, + "loss": 0.7414, + "num_input_tokens_seen": 107800000, + "step": 1924 + }, + { + "epoch": 4.285077951002227, + "loss": 0.7688758373260498, + "loss_ce": 0.0003211286966688931, + "loss_iou": 0.33203125, + "loss_num": 0.02099609375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 107800000, + "step": 1924 + }, + { + "epoch": 4.287305122494432, + "grad_norm": 18.70506477355957, + "learning_rate": 1e-06, + "loss": 0.6572, + "num_input_tokens_seen": 107856952, + "step": 1925 + }, + { + "epoch": 4.287305122494432, + "loss": 0.5217354893684387, + "loss_ce": 0.0002510941121727228, + "loss_iou": 0.2314453125, + "loss_num": 0.01177978515625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 107856952, + "step": 1925 + }, + { + "epoch": 4.289532293986637, + "grad_norm": 15.325700759887695, + "learning_rate": 1e-06, + "loss": 0.8531, + "num_input_tokens_seen": 107914656, + "step": 1926 + }, + { + "epoch": 4.289532293986637, + "loss": 0.7412204146385193, + "loss_ce": 0.0002536483807489276, + "loss_iou": 0.328125, + "loss_num": 0.0169677734375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 107914656, + "step": 1926 + }, + { + "epoch": 4.291759465478842, + "grad_norm": 36.90719985961914, + "learning_rate": 1e-06, + "loss": 1.0012, + "num_input_tokens_seen": 107971084, + "step": 1927 + }, + { + "epoch": 4.291759465478842, + "loss": 0.8567878007888794, + "loss_ce": 0.00034247490111738443, + "loss_iou": 0.376953125, + "loss_num": 0.0206298828125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 107971084, + "step": 1927 + }, + { + "epoch": 4.293986636971047, + "grad_norm": 21.110570907592773, + "learning_rate": 1e-06, + "loss": 0.5608, + "num_input_tokens_seen": 108027536, + "step": 1928 + }, + { + "epoch": 4.293986636971047, + "loss": 0.49611085653305054, + "loss_ce": 0.0002612844982650131, + "loss_iou": 0.2041015625, + "loss_num": 0.017578125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 108027536, + "step": 1928 + }, + { + "epoch": 4.296213808463252, + "grad_norm": 19.765779495239258, + "learning_rate": 1e-06, + "loss": 0.6742, + "num_input_tokens_seen": 108086688, + "step": 1929 + }, + { + "epoch": 4.296213808463252, + "loss": 0.7515699863433838, + "loss_ce": 0.0002272275451105088, + "loss_iou": 0.32421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.75, + "num_input_tokens_seen": 108086688, + "step": 1929 + }, + { + "epoch": 4.298440979955457, + "grad_norm": 27.658580780029297, + "learning_rate": 1e-06, + "loss": 0.8779, + "num_input_tokens_seen": 108138612, + "step": 1930 + }, + { + "epoch": 4.298440979955457, + "loss": 0.9459295272827148, + "loss_ce": 0.0002508389297872782, + "loss_iou": 0.40625, + "loss_num": 0.0267333984375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 108138612, + "step": 1930 + }, + { + "epoch": 4.3006681514476615, + "grad_norm": 18.092632293701172, + "learning_rate": 1e-06, + "loss": 0.9169, + "num_input_tokens_seen": 108197300, + "step": 1931 + }, + { + "epoch": 4.3006681514476615, + "loss": 1.1985375881195068, + "loss_ce": 0.00029541528783738613, + "loss_iou": 0.46875, + "loss_num": 0.0517578125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 108197300, + "step": 1931 + }, + { + "epoch": 4.302895322939866, + "grad_norm": 22.87906265258789, + "learning_rate": 1e-06, + "loss": 0.7375, + "num_input_tokens_seen": 108253652, + "step": 1932 + }, + { + "epoch": 4.302895322939866, + "loss": 0.6818982362747192, + "loss_ce": 0.0002576185797806829, + "loss_iou": 0.30078125, + "loss_num": 0.015625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 108253652, + "step": 1932 + }, + { + "epoch": 4.305122494432071, + "grad_norm": 19.443403244018555, + "learning_rate": 1e-06, + "loss": 0.7365, + "num_input_tokens_seen": 108306668, + "step": 1933 + }, + { + "epoch": 4.305122494432071, + "loss": 0.8623777627944946, + "loss_ce": 0.0003171888238284737, + "loss_iou": 0.376953125, + "loss_num": 0.0216064453125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 108306668, + "step": 1933 + }, + { + "epoch": 4.307349665924276, + "grad_norm": 19.513713836669922, + "learning_rate": 1e-06, + "loss": 0.8245, + "num_input_tokens_seen": 108364872, + "step": 1934 + }, + { + "epoch": 4.307349665924276, + "loss": 0.8435297608375549, + "loss_ce": 0.00026804511435329914, + "loss_iou": 0.3671875, + "loss_num": 0.0220947265625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 108364872, + "step": 1934 + }, + { + "epoch": 4.309576837416481, + "grad_norm": 17.910724639892578, + "learning_rate": 1e-06, + "loss": 0.8137, + "num_input_tokens_seen": 108421172, + "step": 1935 + }, + { + "epoch": 4.309576837416481, + "loss": 1.0195529460906982, + "loss_ce": 0.000510023906826973, + "loss_iou": 0.41796875, + "loss_num": 0.03662109375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 108421172, + "step": 1935 + }, + { + "epoch": 4.311804008908686, + "grad_norm": 20.821613311767578, + "learning_rate": 1e-06, + "loss": 0.7728, + "num_input_tokens_seen": 108476148, + "step": 1936 + }, + { + "epoch": 4.311804008908686, + "loss": 0.7290493249893188, + "loss_ce": 0.00028955648303963244, + "loss_iou": 0.2890625, + "loss_num": 0.0301513671875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 108476148, + "step": 1936 + }, + { + "epoch": 4.314031180400891, + "grad_norm": 14.239187240600586, + "learning_rate": 1e-06, + "loss": 0.7459, + "num_input_tokens_seen": 108530832, + "step": 1937 + }, + { + "epoch": 4.314031180400891, + "loss": 0.7683401107788086, + "loss_ce": 0.0002737450413405895, + "loss_iou": 0.34375, + "loss_num": 0.0164794921875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 108530832, + "step": 1937 + }, + { + "epoch": 4.3162583518930955, + "grad_norm": 22.064966201782227, + "learning_rate": 1e-06, + "loss": 0.827, + "num_input_tokens_seen": 108586484, + "step": 1938 + }, + { + "epoch": 4.3162583518930955, + "loss": 0.7754453420639038, + "loss_ce": 0.00029886612901464105, + "loss_iou": 0.349609375, + "loss_num": 0.0150146484375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 108586484, + "step": 1938 + }, + { + "epoch": 4.3184855233853, + "grad_norm": 22.323854446411133, + "learning_rate": 1e-06, + "loss": 0.6395, + "num_input_tokens_seen": 108643224, + "step": 1939 + }, + { + "epoch": 4.3184855233853, + "loss": 0.7868392467498779, + "loss_ce": 0.000950582732912153, + "loss_iou": 0.34765625, + "loss_num": 0.0184326171875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 108643224, + "step": 1939 + }, + { + "epoch": 4.320712694877505, + "grad_norm": 18.545169830322266, + "learning_rate": 1e-06, + "loss": 0.6774, + "num_input_tokens_seen": 108699852, + "step": 1940 + }, + { + "epoch": 4.320712694877505, + "loss": 0.8520459532737732, + "loss_ce": 0.0003613817389123142, + "loss_iou": 0.337890625, + "loss_num": 0.035400390625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 108699852, + "step": 1940 + }, + { + "epoch": 4.32293986636971, + "grad_norm": 20.88629150390625, + "learning_rate": 1e-06, + "loss": 0.8882, + "num_input_tokens_seen": 108755364, + "step": 1941 + }, + { + "epoch": 4.32293986636971, + "loss": 0.8283944129943848, + "loss_ce": 0.000269408046733588, + "loss_iou": 0.322265625, + "loss_num": 0.037109375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 108755364, + "step": 1941 + }, + { + "epoch": 4.325167037861915, + "grad_norm": 26.91864585876465, + "learning_rate": 1e-06, + "loss": 0.573, + "num_input_tokens_seen": 108814096, + "step": 1942 + }, + { + "epoch": 4.325167037861915, + "loss": 0.6568059921264648, + "loss_ce": 0.00031184524414129555, + "loss_iou": 0.298828125, + "loss_num": 0.0120849609375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 108814096, + "step": 1942 + }, + { + "epoch": 4.327394209354121, + "grad_norm": 14.774848937988281, + "learning_rate": 1e-06, + "loss": 0.9021, + "num_input_tokens_seen": 108870164, + "step": 1943 + }, + { + "epoch": 4.327394209354121, + "loss": 0.8290045261383057, + "loss_ce": 0.001856090733781457, + "loss_iou": 0.33984375, + "loss_num": 0.0294189453125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 108870164, + "step": 1943 + }, + { + "epoch": 4.3296213808463255, + "grad_norm": 18.11646270751953, + "learning_rate": 1e-06, + "loss": 0.8127, + "num_input_tokens_seen": 108927584, + "step": 1944 + }, + { + "epoch": 4.3296213808463255, + "loss": 0.6893248558044434, + "loss_ce": 0.0002379524812567979, + "loss_iou": 0.29296875, + "loss_num": 0.0201416015625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 108927584, + "step": 1944 + }, + { + "epoch": 4.33184855233853, + "grad_norm": 14.257638931274414, + "learning_rate": 1e-06, + "loss": 0.8333, + "num_input_tokens_seen": 108983712, + "step": 1945 + }, + { + "epoch": 4.33184855233853, + "loss": 0.9684985876083374, + "loss_ce": 0.0003589991247281432, + "loss_iou": 0.400390625, + "loss_num": 0.03369140625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 108983712, + "step": 1945 + }, + { + "epoch": 4.334075723830735, + "grad_norm": 24.499502182006836, + "learning_rate": 1e-06, + "loss": 0.5697, + "num_input_tokens_seen": 109040176, + "step": 1946 + }, + { + "epoch": 4.334075723830735, + "loss": 0.4902326464653015, + "loss_ce": 0.00024243266670964658, + "loss_iou": 0.2158203125, + "loss_num": 0.01153564453125, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 109040176, + "step": 1946 + }, + { + "epoch": 4.33630289532294, + "grad_norm": 28.52815055847168, + "learning_rate": 1e-06, + "loss": 0.8584, + "num_input_tokens_seen": 109096776, + "step": 1947 + }, + { + "epoch": 4.33630289532294, + "loss": 0.8685885667800903, + "loss_ce": 0.0003024664765689522, + "loss_iou": 0.345703125, + "loss_num": 0.035400390625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 109096776, + "step": 1947 + }, + { + "epoch": 4.338530066815145, + "grad_norm": 24.575611114501953, + "learning_rate": 1e-06, + "loss": 1.0167, + "num_input_tokens_seen": 109153828, + "step": 1948 + }, + { + "epoch": 4.338530066815145, + "loss": 0.993535041809082, + "loss_ce": 0.0002489146136213094, + "loss_iou": 0.43359375, + "loss_num": 0.025634765625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 109153828, + "step": 1948 + }, + { + "epoch": 4.34075723830735, + "grad_norm": 15.715718269348145, + "learning_rate": 1e-06, + "loss": 0.8722, + "num_input_tokens_seen": 109212620, + "step": 1949 + }, + { + "epoch": 4.34075723830735, + "loss": 0.8171886205673218, + "loss_ce": 0.00029411769355647266, + "loss_iou": 0.34765625, + "loss_num": 0.0240478515625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 109212620, + "step": 1949 + }, + { + "epoch": 4.342984409799555, + "grad_norm": 23.913692474365234, + "learning_rate": 1e-06, + "loss": 0.6369, + "num_input_tokens_seen": 109270456, + "step": 1950 + }, + { + "epoch": 4.342984409799555, + "loss": 0.641880989074707, + "loss_ce": 0.0002794649626594037, + "loss_iou": 0.28125, + "loss_num": 0.015625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 109270456, + "step": 1950 + }, + { + "epoch": 4.3452115812917596, + "grad_norm": 18.232446670532227, + "learning_rate": 1e-06, + "loss": 0.768, + "num_input_tokens_seen": 109325660, + "step": 1951 + }, + { + "epoch": 4.3452115812917596, + "loss": 0.9707180857658386, + "loss_ce": 0.0002590723452158272, + "loss_iou": 0.416015625, + "loss_num": 0.0277099609375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 109325660, + "step": 1951 + }, + { + "epoch": 4.347438752783964, + "grad_norm": 16.996084213256836, + "learning_rate": 1e-06, + "loss": 0.9126, + "num_input_tokens_seen": 109381844, + "step": 1952 + }, + { + "epoch": 4.347438752783964, + "loss": 0.7782900333404541, + "loss_ce": 0.00027485546888783574, + "loss_iou": 0.3515625, + "loss_num": 0.01483154296875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 109381844, + "step": 1952 + }, + { + "epoch": 4.349665924276169, + "grad_norm": 17.12710189819336, + "learning_rate": 1e-06, + "loss": 0.826, + "num_input_tokens_seen": 109440512, + "step": 1953 + }, + { + "epoch": 4.349665924276169, + "loss": 1.0132062435150146, + "loss_ce": 0.00026673171669244766, + "loss_iou": 0.427734375, + "loss_num": 0.03173828125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 109440512, + "step": 1953 + }, + { + "epoch": 4.351893095768374, + "grad_norm": 14.912611961364746, + "learning_rate": 1e-06, + "loss": 0.8326, + "num_input_tokens_seen": 109497420, + "step": 1954 + }, + { + "epoch": 4.351893095768374, + "loss": 0.42840301990509033, + "loss_ce": 0.00030242273351177573, + "loss_iou": 0.17578125, + "loss_num": 0.01519775390625, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 109497420, + "step": 1954 + }, + { + "epoch": 4.354120267260579, + "grad_norm": 17.690128326416016, + "learning_rate": 1e-06, + "loss": 1.0669, + "num_input_tokens_seen": 109554340, + "step": 1955 + }, + { + "epoch": 4.354120267260579, + "loss": 1.157071590423584, + "loss_ce": 0.00033330710721202195, + "loss_iou": 0.466796875, + "loss_num": 0.044677734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 109554340, + "step": 1955 + }, + { + "epoch": 4.356347438752784, + "grad_norm": 279.8905029296875, + "learning_rate": 1e-06, + "loss": 0.7712, + "num_input_tokens_seen": 109611648, + "step": 1956 + }, + { + "epoch": 4.356347438752784, + "loss": 0.8818769454956055, + "loss_ce": 0.0010175781790167093, + "loss_iou": 0.384765625, + "loss_num": 0.0224609375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 109611648, + "step": 1956 + }, + { + "epoch": 4.358574610244989, + "grad_norm": 21.142581939697266, + "learning_rate": 1e-06, + "loss": 0.8436, + "num_input_tokens_seen": 109670164, + "step": 1957 + }, + { + "epoch": 4.358574610244989, + "loss": 0.8566855192184448, + "loss_ce": 0.00024018189287744462, + "loss_iou": 0.353515625, + "loss_num": 0.02978515625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 109670164, + "step": 1957 + }, + { + "epoch": 4.360801781737194, + "grad_norm": 23.72075080871582, + "learning_rate": 1e-06, + "loss": 0.5878, + "num_input_tokens_seen": 109727780, + "step": 1958 + }, + { + "epoch": 4.360801781737194, + "loss": 0.7790469527244568, + "loss_ce": 0.00023835319734644145, + "loss_iou": 0.318359375, + "loss_num": 0.0289306640625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 109727780, + "step": 1958 + }, + { + "epoch": 4.363028953229398, + "grad_norm": 22.97592544555664, + "learning_rate": 1e-06, + "loss": 0.9028, + "num_input_tokens_seen": 109786720, + "step": 1959 + }, + { + "epoch": 4.363028953229398, + "loss": 1.0544261932373047, + "loss_ce": 0.00022698812244925648, + "loss_iou": 0.4453125, + "loss_num": 0.032470703125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 109786720, + "step": 1959 + }, + { + "epoch": 4.365256124721603, + "grad_norm": 35.19451904296875, + "learning_rate": 1e-06, + "loss": 1.0087, + "num_input_tokens_seen": 109840344, + "step": 1960 + }, + { + "epoch": 4.365256124721603, + "loss": 0.9346234202384949, + "loss_ce": 0.00029728777008131146, + "loss_iou": 0.412109375, + "loss_num": 0.0218505859375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 109840344, + "step": 1960 + }, + { + "epoch": 4.367483296213808, + "grad_norm": 21.005878448486328, + "learning_rate": 1e-06, + "loss": 0.8335, + "num_input_tokens_seen": 109898760, + "step": 1961 + }, + { + "epoch": 4.367483296213808, + "loss": 0.7780290842056274, + "loss_ce": 0.0004412271664477885, + "loss_iou": 0.306640625, + "loss_num": 0.033203125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 109898760, + "step": 1961 + }, + { + "epoch": 4.369710467706013, + "grad_norm": 20.942707061767578, + "learning_rate": 1e-06, + "loss": 0.7858, + "num_input_tokens_seen": 109956856, + "step": 1962 + }, + { + "epoch": 4.369710467706013, + "loss": 0.6604050397872925, + "loss_ce": 0.0002488172031007707, + "loss_iou": 0.296875, + "loss_num": 0.0133056640625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 109956856, + "step": 1962 + }, + { + "epoch": 4.371937639198218, + "grad_norm": 14.403862953186035, + "learning_rate": 1e-06, + "loss": 0.5435, + "num_input_tokens_seen": 110010668, + "step": 1963 + }, + { + "epoch": 4.371937639198218, + "loss": 0.32872194051742554, + "loss_ce": 0.00023072000476531684, + "loss_iou": 0.1455078125, + "loss_num": 0.007476806640625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 110010668, + "step": 1963 + }, + { + "epoch": 4.374164810690424, + "grad_norm": 27.356098175048828, + "learning_rate": 1e-06, + "loss": 0.5899, + "num_input_tokens_seen": 110067848, + "step": 1964 + }, + { + "epoch": 4.374164810690424, + "loss": 0.6242412328720093, + "loss_ce": 0.00021780356473755091, + "loss_iou": 0.25390625, + "loss_num": 0.0233154296875, + "loss_xval": 0.625, + "num_input_tokens_seen": 110067848, + "step": 1964 + }, + { + "epoch": 4.3763919821826285, + "grad_norm": 24.027332305908203, + "learning_rate": 1e-06, + "loss": 0.6839, + "num_input_tokens_seen": 110122296, + "step": 1965 + }, + { + "epoch": 4.3763919821826285, + "loss": 0.6491550207138062, + "loss_ce": 0.00022922824427951127, + "loss_iou": 0.28125, + "loss_num": 0.0169677734375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 110122296, + "step": 1965 + }, + { + "epoch": 4.378619153674833, + "grad_norm": 18.06757354736328, + "learning_rate": 1e-06, + "loss": 0.8689, + "num_input_tokens_seen": 110177296, + "step": 1966 + }, + { + "epoch": 4.378619153674833, + "loss": 0.7419674396514893, + "loss_ce": 0.0002682540216483176, + "loss_iou": 0.31640625, + "loss_num": 0.021484375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 110177296, + "step": 1966 + }, + { + "epoch": 4.380846325167038, + "grad_norm": 22.51265525817871, + "learning_rate": 1e-06, + "loss": 0.8991, + "num_input_tokens_seen": 110233976, + "step": 1967 + }, + { + "epoch": 4.380846325167038, + "loss": 0.7884268760681152, + "loss_ce": 0.000340936123393476, + "loss_iou": 0.318359375, + "loss_num": 0.030029296875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 110233976, + "step": 1967 + }, + { + "epoch": 4.383073496659243, + "grad_norm": 34.43399429321289, + "learning_rate": 1e-06, + "loss": 1.0184, + "num_input_tokens_seen": 110289432, + "step": 1968 + }, + { + "epoch": 4.383073496659243, + "loss": 1.0707261562347412, + "loss_ce": 0.00041369907557964325, + "loss_iou": 0.4609375, + "loss_num": 0.0296630859375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 110289432, + "step": 1968 + }, + { + "epoch": 4.385300668151448, + "grad_norm": 21.057336807250977, + "learning_rate": 1e-06, + "loss": 0.8572, + "num_input_tokens_seen": 110345920, + "step": 1969 + }, + { + "epoch": 4.385300668151448, + "loss": 0.8917399644851685, + "loss_ce": 0.0003824798041023314, + "loss_iou": 0.369140625, + "loss_num": 0.0303955078125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 110345920, + "step": 1969 + }, + { + "epoch": 4.387527839643653, + "grad_norm": 17.0823917388916, + "learning_rate": 1e-06, + "loss": 0.8178, + "num_input_tokens_seen": 110398524, + "step": 1970 + }, + { + "epoch": 4.387527839643653, + "loss": 0.9053047895431519, + "loss_ce": 0.0002754859742708504, + "loss_iou": 0.349609375, + "loss_num": 0.041015625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 110398524, + "step": 1970 + }, + { + "epoch": 4.389755011135858, + "grad_norm": 19.87203598022461, + "learning_rate": 1e-06, + "loss": 0.89, + "num_input_tokens_seen": 110454132, + "step": 1971 + }, + { + "epoch": 4.389755011135858, + "loss": 0.851262092590332, + "loss_ce": 0.00030996621353551745, + "loss_iou": 0.3515625, + "loss_num": 0.02978515625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 110454132, + "step": 1971 + }, + { + "epoch": 4.3919821826280625, + "grad_norm": 25.85304069519043, + "learning_rate": 1e-06, + "loss": 0.668, + "num_input_tokens_seen": 110510644, + "step": 1972 + }, + { + "epoch": 4.3919821826280625, + "loss": 0.6331183314323425, + "loss_ce": 0.0003058624570257962, + "loss_iou": 0.265625, + "loss_num": 0.0201416015625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 110510644, + "step": 1972 + }, + { + "epoch": 4.394209354120267, + "grad_norm": 20.608179092407227, + "learning_rate": 1e-06, + "loss": 0.4726, + "num_input_tokens_seen": 110568336, + "step": 1973 + }, + { + "epoch": 4.394209354120267, + "loss": 0.5503981113433838, + "loss_ce": 0.00022721837740391493, + "loss_iou": 0.25, + "loss_num": 0.00994873046875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 110568336, + "step": 1973 + }, + { + "epoch": 4.396436525612472, + "grad_norm": 27.215190887451172, + "learning_rate": 1e-06, + "loss": 0.7294, + "num_input_tokens_seen": 110624496, + "step": 1974 + }, + { + "epoch": 4.396436525612472, + "loss": 0.8059403896331787, + "loss_ce": 0.00027631851844489574, + "loss_iou": 0.341796875, + "loss_num": 0.02392578125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 110624496, + "step": 1974 + }, + { + "epoch": 4.398663697104677, + "grad_norm": 17.576311111450195, + "learning_rate": 1e-06, + "loss": 0.5477, + "num_input_tokens_seen": 110679224, + "step": 1975 + }, + { + "epoch": 4.398663697104677, + "loss": 0.3737618923187256, + "loss_ce": 0.0002267223026137799, + "loss_iou": 0.1455078125, + "loss_num": 0.0167236328125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 110679224, + "step": 1975 + }, + { + "epoch": 4.400890868596882, + "grad_norm": 22.326269149780273, + "learning_rate": 1e-06, + "loss": 0.7698, + "num_input_tokens_seen": 110735720, + "step": 1976 + }, + { + "epoch": 4.400890868596882, + "loss": 0.7260935306549072, + "loss_ce": 0.0002634518896229565, + "loss_iou": 0.33203125, + "loss_num": 0.0120849609375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 110735720, + "step": 1976 + }, + { + "epoch": 4.403118040089087, + "grad_norm": 31.376184463500977, + "learning_rate": 1e-06, + "loss": 0.7239, + "num_input_tokens_seen": 110790824, + "step": 1977 + }, + { + "epoch": 4.403118040089087, + "loss": 0.8379002213478088, + "loss_ce": 0.0002537188120186329, + "loss_iou": 0.359375, + "loss_num": 0.024169921875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 110790824, + "step": 1977 + }, + { + "epoch": 4.405345211581292, + "grad_norm": 22.093713760375977, + "learning_rate": 1e-06, + "loss": 1.2021, + "num_input_tokens_seen": 110845432, + "step": 1978 + }, + { + "epoch": 4.405345211581292, + "loss": 1.1360301971435547, + "loss_ce": 0.00028802291490137577, + "loss_iou": 0.482421875, + "loss_num": 0.033935546875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 110845432, + "step": 1978 + }, + { + "epoch": 4.4075723830734965, + "grad_norm": 21.3291072845459, + "learning_rate": 1e-06, + "loss": 0.7553, + "num_input_tokens_seen": 110901496, + "step": 1979 + }, + { + "epoch": 4.4075723830734965, + "loss": 0.6370877623558044, + "loss_ce": 0.0003689899167511612, + "loss_iou": 0.279296875, + "loss_num": 0.015625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 110901496, + "step": 1979 + }, + { + "epoch": 4.409799554565701, + "grad_norm": 26.429105758666992, + "learning_rate": 1e-06, + "loss": 0.8782, + "num_input_tokens_seen": 110960112, + "step": 1980 + }, + { + "epoch": 4.409799554565701, + "loss": 0.8666300773620605, + "loss_ce": 0.00029704906046390533, + "loss_iou": 0.365234375, + "loss_num": 0.02734375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 110960112, + "step": 1980 + }, + { + "epoch": 4.412026726057906, + "grad_norm": 22.83293342590332, + "learning_rate": 1e-06, + "loss": 0.6349, + "num_input_tokens_seen": 111016200, + "step": 1981 + }, + { + "epoch": 4.412026726057906, + "loss": 0.6940905451774597, + "loss_ce": 0.0002428823063382879, + "loss_iou": 0.30078125, + "loss_num": 0.0184326171875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 111016200, + "step": 1981 + }, + { + "epoch": 4.414253897550111, + "grad_norm": 31.386951446533203, + "learning_rate": 1e-06, + "loss": 0.9319, + "num_input_tokens_seen": 111070980, + "step": 1982 + }, + { + "epoch": 4.414253897550111, + "loss": 0.7642084956169128, + "loss_ce": 0.00029245836776681244, + "loss_iou": 0.330078125, + "loss_num": 0.0206298828125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 111070980, + "step": 1982 + }, + { + "epoch": 4.416481069042316, + "grad_norm": 14.445569038391113, + "learning_rate": 1e-06, + "loss": 0.7298, + "num_input_tokens_seen": 111127432, + "step": 1983 + }, + { + "epoch": 4.416481069042316, + "loss": 0.7684084177017212, + "loss_ce": 0.00028093665605410933, + "loss_iou": 0.31640625, + "loss_num": 0.02734375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 111127432, + "step": 1983 + }, + { + "epoch": 4.418708240534521, + "grad_norm": 27.805484771728516, + "learning_rate": 1e-06, + "loss": 0.8484, + "num_input_tokens_seen": 111182920, + "step": 1984 + }, + { + "epoch": 4.418708240534521, + "loss": 1.1093249320983887, + "loss_ce": 0.0006822688737884164, + "loss_iou": 0.498046875, + "loss_num": 0.02294921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 111182920, + "step": 1984 + }, + { + "epoch": 4.420935412026726, + "grad_norm": 18.803524017333984, + "learning_rate": 1e-06, + "loss": 0.8397, + "num_input_tokens_seen": 111236748, + "step": 1985 + }, + { + "epoch": 4.420935412026726, + "loss": 0.7825398445129395, + "loss_ce": 0.0003133030259050429, + "loss_iou": 0.3125, + "loss_num": 0.03125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 111236748, + "step": 1985 + }, + { + "epoch": 4.4231625835189305, + "grad_norm": 15.333048820495605, + "learning_rate": 1e-06, + "loss": 0.7332, + "num_input_tokens_seen": 111292696, + "step": 1986 + }, + { + "epoch": 4.4231625835189305, + "loss": 0.8349952697753906, + "loss_ce": 0.00027839711401611567, + "loss_iou": 0.3359375, + "loss_num": 0.03271484375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 111292696, + "step": 1986 + }, + { + "epoch": 4.425389755011135, + "grad_norm": 19.713153839111328, + "learning_rate": 1e-06, + "loss": 0.8684, + "num_input_tokens_seen": 111348792, + "step": 1987 + }, + { + "epoch": 4.425389755011135, + "loss": 0.7836449146270752, + "loss_ce": 0.0009300732635892928, + "loss_iou": 0.310546875, + "loss_num": 0.0322265625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 111348792, + "step": 1987 + }, + { + "epoch": 4.427616926503341, + "grad_norm": 16.41890525817871, + "learning_rate": 1e-06, + "loss": 0.9639, + "num_input_tokens_seen": 111405636, + "step": 1988 + }, + { + "epoch": 4.427616926503341, + "loss": 0.8102682828903198, + "loss_ce": 0.0003318004310131073, + "loss_iou": 0.345703125, + "loss_num": 0.023193359375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 111405636, + "step": 1988 + }, + { + "epoch": 4.429844097995546, + "grad_norm": 17.086889266967773, + "learning_rate": 1e-06, + "loss": 0.9369, + "num_input_tokens_seen": 111463272, + "step": 1989 + }, + { + "epoch": 4.429844097995546, + "loss": 0.8751001358032227, + "loss_ce": 0.0003442912711761892, + "loss_iou": 0.36328125, + "loss_num": 0.0299072265625, + "loss_xval": 0.875, + "num_input_tokens_seen": 111463272, + "step": 1989 + }, + { + "epoch": 4.432071269487751, + "grad_norm": 25.840059280395508, + "learning_rate": 1e-06, + "loss": 0.8088, + "num_input_tokens_seen": 111518036, + "step": 1990 + }, + { + "epoch": 4.432071269487751, + "loss": 0.8438689708709717, + "loss_ce": 0.0003630804130807519, + "loss_iou": 0.37109375, + "loss_num": 0.020751953125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 111518036, + "step": 1990 + }, + { + "epoch": 4.434298440979956, + "grad_norm": 59.44672775268555, + "learning_rate": 1e-06, + "loss": 0.739, + "num_input_tokens_seen": 111575688, + "step": 1991 + }, + { + "epoch": 4.434298440979956, + "loss": 0.8342432975769043, + "loss_ce": 0.00025894519058056176, + "loss_iou": 0.3671875, + "loss_num": 0.019775390625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 111575688, + "step": 1991 + }, + { + "epoch": 4.436525612472161, + "grad_norm": 18.12961769104004, + "learning_rate": 1e-06, + "loss": 0.7432, + "num_input_tokens_seen": 111628236, + "step": 1992 + }, + { + "epoch": 4.436525612472161, + "loss": 0.42511090636253357, + "loss_ce": 0.00030621426412835717, + "loss_iou": 0.1845703125, + "loss_num": 0.01092529296875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 111628236, + "step": 1992 + }, + { + "epoch": 4.4387527839643655, + "grad_norm": 34.39860916137695, + "learning_rate": 1e-06, + "loss": 0.6518, + "num_input_tokens_seen": 111685388, + "step": 1993 + }, + { + "epoch": 4.4387527839643655, + "loss": 0.8032548427581787, + "loss_ce": 0.0002763355150818825, + "loss_iou": 0.326171875, + "loss_num": 0.0299072265625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 111685388, + "step": 1993 + }, + { + "epoch": 4.44097995545657, + "grad_norm": 18.156936645507812, + "learning_rate": 1e-06, + "loss": 0.6957, + "num_input_tokens_seen": 111740404, + "step": 1994 + }, + { + "epoch": 4.44097995545657, + "loss": 0.7895779013633728, + "loss_ce": 0.0002712547720875591, + "loss_iou": 0.318359375, + "loss_num": 0.0303955078125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 111740404, + "step": 1994 + }, + { + "epoch": 4.443207126948775, + "grad_norm": 16.068462371826172, + "learning_rate": 1e-06, + "loss": 0.8716, + "num_input_tokens_seen": 111795960, + "step": 1995 + }, + { + "epoch": 4.443207126948775, + "loss": 1.020226001739502, + "loss_ce": 0.0004505877732299268, + "loss_iou": 0.40625, + "loss_num": 0.041259765625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 111795960, + "step": 1995 + }, + { + "epoch": 4.44543429844098, + "grad_norm": 17.97376251220703, + "learning_rate": 1e-06, + "loss": 0.5944, + "num_input_tokens_seen": 111854164, + "step": 1996 + }, + { + "epoch": 4.44543429844098, + "loss": 0.6652653813362122, + "loss_ce": 0.00034838789724744856, + "loss_iou": 0.279296875, + "loss_num": 0.021484375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 111854164, + "step": 1996 + }, + { + "epoch": 4.447661469933185, + "grad_norm": 17.494909286499023, + "learning_rate": 1e-06, + "loss": 0.7858, + "num_input_tokens_seen": 111908960, + "step": 1997 + }, + { + "epoch": 4.447661469933185, + "loss": 0.6890178918838501, + "loss_ce": 0.0002971593930851668, + "loss_iou": 0.30078125, + "loss_num": 0.0172119140625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 111908960, + "step": 1997 + }, + { + "epoch": 4.44988864142539, + "grad_norm": 41.67517852783203, + "learning_rate": 1e-06, + "loss": 0.8325, + "num_input_tokens_seen": 111961680, + "step": 1998 + }, + { + "epoch": 4.44988864142539, + "loss": 0.6864081621170044, + "loss_ce": 0.0011054326314479113, + "loss_iou": 0.2890625, + "loss_num": 0.021728515625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 111961680, + "step": 1998 + }, + { + "epoch": 4.452115812917595, + "grad_norm": 31.639928817749023, + "learning_rate": 1e-06, + "loss": 0.6754, + "num_input_tokens_seen": 112020664, + "step": 1999 + }, + { + "epoch": 4.452115812917595, + "loss": 0.7533044219017029, + "loss_ce": 0.00025264680152758956, + "loss_iou": 0.326171875, + "loss_num": 0.02001953125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 112020664, + "step": 1999 + }, + { + "epoch": 4.4543429844097995, + "grad_norm": 17.85369300842285, + "learning_rate": 1e-06, + "loss": 0.6786, + "num_input_tokens_seen": 112077028, + "step": 2000 + }, + { + "epoch": 4.4543429844097995, + "eval_seeclick_web_CIoU": 0.565800279378891, + "eval_seeclick_web_GIoU": 0.5634768307209015, + "eval_seeclick_web_IoU": 0.581417441368103, + "eval_seeclick_web_MAE_all": 0.017455607652664185, + "eval_seeclick_web_MAE_h": 0.010133389849215746, + "eval_seeclick_web_MAE_w": 0.01889999955892563, + "eval_seeclick_web_MAE_x_boxes": 0.00820656050927937, + "eval_seeclick_web_MAE_y_boxes": 0.02206642786040902, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9660096764564514, + "eval_seeclick_web_loss_ce": 0.0003684775874717161, + "eval_seeclick_web_loss_iou": 0.4342041015625, + "eval_seeclick_web_loss_num": 0.013675689697265625, + "eval_seeclick_web_loss_xval": 0.9365234375, + "eval_seeclick_web_runtime": 30.3044, + "eval_seeclick_web_samples_per_second": 1.65, + "eval_seeclick_web_steps_per_second": 0.066, + "num_input_tokens_seen": 112077028, + "step": 2000 + }, + { + "epoch": 4.4543429844097995, + "eval_icons_CIoU": 0.30845747888088226, + "eval_icons_GIoU": 0.33300746977329254, + "eval_icons_IoU": 0.38466861844062805, + "eval_icons_MAE_all": 0.06535855308175087, + "eval_icons_MAE_h": 0.03866162151098251, + "eval_icons_MAE_w": 0.0722741037607193, + "eval_icons_MAE_x_boxes": 0.06343554332852364, + "eval_icons_MAE_y_boxes": 0.03758078906685114, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.709810495376587, + "eval_icons_loss_ce": 0.00041042393422685564, + "eval_icons_loss_iou": 0.6688232421875, + "eval_icons_loss_num": 0.06202888488769531, + "eval_icons_loss_xval": 1.6484375, + "eval_icons_runtime": 23.8114, + "eval_icons_samples_per_second": 2.1, + "eval_icons_steps_per_second": 0.084, + "num_input_tokens_seen": 112077028, + "step": 2000 + }, + { + "epoch": 4.4543429844097995, + "eval_screenspot_CIoU": 0.3293018539746602, + "eval_screenspot_GIoU": 0.34788986047108966, + "eval_screenspot_IoU": 0.4084410071372986, + "eval_screenspot_MAE_all": 0.06550942113002141, + "eval_screenspot_MAE_h": 0.036151558781663574, + "eval_screenspot_MAE_w": 0.07488848641514778, + "eval_screenspot_MAE_x_boxes": 0.07839654758572578, + "eval_screenspot_MAE_y_boxes": 0.04824645258486271, + "eval_screenspot_inside_bbox": 0.6462500095367432, + "eval_screenspot_loss": 1.6914161443710327, + "eval_screenspot_loss_ce": 0.00041740476929893094, + "eval_screenspot_loss_iou": 0.695556640625, + "eval_screenspot_loss_num": 0.07661692301432292, + "eval_screenspot_loss_xval": 1.7740885416666667, + "eval_screenspot_runtime": 39.3803, + "eval_screenspot_samples_per_second": 2.26, + "eval_screenspot_steps_per_second": 0.076, + "num_input_tokens_seen": 112077028, + "step": 2000 + }, + { + "epoch": 4.4543429844097995, + "eval_compot_CIoU": 0.35170771181583405, + "eval_compot_GIoU": 0.37089845538139343, + "eval_compot_IoU": 0.4089939594268799, + "eval_compot_MAE_all": 0.018402607180178165, + "eval_compot_MAE_h": 0.008481500204652548, + "eval_compot_MAE_w": 0.02456105500459671, + "eval_compot_MAE_x_boxes": 0.029113260563462973, + "eval_compot_MAE_y_boxes": 0.0059411004185676575, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3837639093399048, + "eval_compot_loss_ce": 0.0003268021682742983, + "eval_compot_loss_iou": 0.6363525390625, + "eval_compot_loss_num": 0.017194747924804688, + "eval_compot_loss_xval": 1.357666015625, + "eval_compot_runtime": 23.2732, + "eval_compot_samples_per_second": 2.148, + "eval_compot_steps_per_second": 0.086, + "num_input_tokens_seen": 112077028, + "step": 2000 + }, + { + "epoch": 4.4543429844097995, + "eval_custom_ui_val_CIoU": 0.4485127362940047, + "eval_custom_ui_val_GIoU": 0.4697931508223216, + "eval_custom_ui_val_IoU": 0.5003485398160087, + "eval_custom_ui_val_MAE_all": 0.03264946728530857, + "eval_custom_ui_val_MAE_h": 0.01890565103126897, + "eval_custom_ui_val_MAE_w": 0.03845942123896546, + "eval_custom_ui_val_MAE_x_boxes": 0.03562343731108639, + "eval_custom_ui_val_MAE_y_boxes": 0.017556848521861766, + "eval_custom_ui_val_inside_bbox": 0.7040895091162788, + "eval_custom_ui_val_loss": 1.2602308988571167, + "eval_custom_ui_val_loss_ce": 0.000424561229819018, + "eval_custom_ui_val_loss_iou": 0.533935546875, + "eval_custom_ui_val_loss_num": 0.03228992886013455, + "eval_custom_ui_val_loss_xval": 1.2295193142361112, + "eval_custom_ui_val_runtime": 61.5934, + "eval_custom_ui_val_samples_per_second": 4.302, + "eval_custom_ui_val_steps_per_second": 0.146, + "num_input_tokens_seen": 112077028, + "step": 2000 + }, + { + "epoch": 4.4543429844097995, + "loss": 0.9637588858604431, + "loss_ce": 0.0003799735859502107, + "loss_iou": 0.41796875, + "loss_num": 0.025146484375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 112077028, + "step": 2000 + }, + { + "epoch": 4.456570155902004, + "grad_norm": 21.966798782348633, + "learning_rate": 1e-06, + "loss": 0.8968, + "num_input_tokens_seen": 112130504, + "step": 2001 + }, + { + "epoch": 4.456570155902004, + "loss": 0.7630659341812134, + "loss_ce": 0.0003706031129695475, + "loss_iou": 0.31640625, + "loss_num": 0.025634765625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 112130504, + "step": 2001 + }, + { + "epoch": 4.458797327394209, + "grad_norm": 16.523204803466797, + "learning_rate": 1e-06, + "loss": 0.7768, + "num_input_tokens_seen": 112188064, + "step": 2002 + }, + { + "epoch": 4.458797327394209, + "loss": 0.6959242820739746, + "loss_ce": 0.00024555198615416884, + "loss_iou": 0.28515625, + "loss_num": 0.025390625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 112188064, + "step": 2002 + }, + { + "epoch": 4.461024498886414, + "grad_norm": 14.921442985534668, + "learning_rate": 1e-06, + "loss": 0.8676, + "num_input_tokens_seen": 112245504, + "step": 2003 + }, + { + "epoch": 4.461024498886414, + "loss": 0.7521045207977295, + "loss_ce": 0.00027342155226506293, + "loss_iou": 0.322265625, + "loss_num": 0.021728515625, + "loss_xval": 0.75, + "num_input_tokens_seen": 112245504, + "step": 2003 + }, + { + "epoch": 4.463251670378619, + "grad_norm": 28.89473533630371, + "learning_rate": 1e-06, + "loss": 0.8408, + "num_input_tokens_seen": 112300972, + "step": 2004 + }, + { + "epoch": 4.463251670378619, + "loss": 0.6384103298187256, + "loss_ce": 0.00022677732340525836, + "loss_iou": 0.265625, + "loss_num": 0.021484375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 112300972, + "step": 2004 + }, + { + "epoch": 4.465478841870824, + "grad_norm": 21.826824188232422, + "learning_rate": 1e-06, + "loss": 0.9587, + "num_input_tokens_seen": 112353748, + "step": 2005 + }, + { + "epoch": 4.465478841870824, + "loss": 0.8835301399230957, + "loss_ce": 0.00022938736947253346, + "loss_iou": 0.380859375, + "loss_num": 0.0244140625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 112353748, + "step": 2005 + }, + { + "epoch": 4.467706013363029, + "grad_norm": 18.621387481689453, + "learning_rate": 1e-06, + "loss": 0.857, + "num_input_tokens_seen": 112408688, + "step": 2006 + }, + { + "epoch": 4.467706013363029, + "loss": 0.7712559700012207, + "loss_ce": 0.0002599011640995741, + "loss_iou": 0.35546875, + "loss_num": 0.0123291015625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 112408688, + "step": 2006 + }, + { + "epoch": 4.4699331848552335, + "grad_norm": 20.207508087158203, + "learning_rate": 1e-06, + "loss": 0.7325, + "num_input_tokens_seen": 112465128, + "step": 2007 + }, + { + "epoch": 4.4699331848552335, + "loss": 0.6955528855323792, + "loss_ce": 0.00024040245625656098, + "loss_iou": 0.294921875, + "loss_num": 0.0211181640625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 112465128, + "step": 2007 + }, + { + "epoch": 4.472160356347438, + "grad_norm": 15.423189163208008, + "learning_rate": 1e-06, + "loss": 0.7108, + "num_input_tokens_seen": 112520084, + "step": 2008 + }, + { + "epoch": 4.472160356347438, + "loss": 0.7231366038322449, + "loss_ce": 0.0002362322702538222, + "loss_iou": 0.28125, + "loss_num": 0.03173828125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 112520084, + "step": 2008 + }, + { + "epoch": 4.474387527839644, + "grad_norm": 23.1674861907959, + "learning_rate": 1e-06, + "loss": 0.7111, + "num_input_tokens_seen": 112577288, + "step": 2009 + }, + { + "epoch": 4.474387527839644, + "loss": 0.6543446779251099, + "loss_ce": 0.0002919051912613213, + "loss_iou": 0.279296875, + "loss_num": 0.0194091796875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 112577288, + "step": 2009 + }, + { + "epoch": 4.476614699331849, + "grad_norm": 21.063098907470703, + "learning_rate": 1e-06, + "loss": 0.8211, + "num_input_tokens_seen": 112632456, + "step": 2010 + }, + { + "epoch": 4.476614699331849, + "loss": 0.9507274627685547, + "loss_ce": 0.000532110221683979, + "loss_iou": 0.421875, + "loss_num": 0.0208740234375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 112632456, + "step": 2010 + }, + { + "epoch": 4.478841870824054, + "grad_norm": 14.694646835327148, + "learning_rate": 1e-06, + "loss": 0.7868, + "num_input_tokens_seen": 112685992, + "step": 2011 + }, + { + "epoch": 4.478841870824054, + "loss": 0.8718289732933044, + "loss_ce": 0.0002469439641572535, + "loss_iou": 0.3359375, + "loss_num": 0.03955078125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 112685992, + "step": 2011 + }, + { + "epoch": 4.481069042316259, + "grad_norm": 15.614250183105469, + "learning_rate": 1e-06, + "loss": 0.6904, + "num_input_tokens_seen": 112736456, + "step": 2012 + }, + { + "epoch": 4.481069042316259, + "loss": 0.7561224699020386, + "loss_ce": 0.0002630477538332343, + "loss_iou": 0.30078125, + "loss_num": 0.031005859375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 112736456, + "step": 2012 + }, + { + "epoch": 4.4832962138084635, + "grad_norm": 19.059675216674805, + "learning_rate": 1e-06, + "loss": 0.7422, + "num_input_tokens_seen": 112791472, + "step": 2013 + }, + { + "epoch": 4.4832962138084635, + "loss": 0.5962854623794556, + "loss_ce": 0.00021605889196507633, + "loss_iou": 0.267578125, + "loss_num": 0.0120849609375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 112791472, + "step": 2013 + }, + { + "epoch": 4.485523385300668, + "grad_norm": 16.322175979614258, + "learning_rate": 1e-06, + "loss": 0.6247, + "num_input_tokens_seen": 112847224, + "step": 2014 + }, + { + "epoch": 4.485523385300668, + "loss": 0.6646714806556702, + "loss_ce": 0.00024277158081531525, + "loss_iou": 0.267578125, + "loss_num": 0.025390625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 112847224, + "step": 2014 + }, + { + "epoch": 4.487750556792873, + "grad_norm": 35.126644134521484, + "learning_rate": 1e-06, + "loss": 0.6962, + "num_input_tokens_seen": 112905124, + "step": 2015 + }, + { + "epoch": 4.487750556792873, + "loss": 0.7029978632926941, + "loss_ce": 0.00023909028095658869, + "loss_iou": 0.291015625, + "loss_num": 0.0242919921875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 112905124, + "step": 2015 + }, + { + "epoch": 4.489977728285078, + "grad_norm": 17.807477951049805, + "learning_rate": 1e-06, + "loss": 0.7735, + "num_input_tokens_seen": 112963088, + "step": 2016 + }, + { + "epoch": 4.489977728285078, + "loss": 0.7800430655479431, + "loss_ce": 0.0002578938438091427, + "loss_iou": 0.3515625, + "loss_num": 0.01495361328125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 112963088, + "step": 2016 + }, + { + "epoch": 4.492204899777283, + "grad_norm": 25.92805290222168, + "learning_rate": 1e-06, + "loss": 0.8668, + "num_input_tokens_seen": 113012908, + "step": 2017 + }, + { + "epoch": 4.492204899777283, + "loss": 0.7654862403869629, + "loss_ce": 0.0003495719865895808, + "loss_iou": 0.3359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 113012908, + "step": 2017 + }, + { + "epoch": 4.494432071269488, + "grad_norm": 26.185386657714844, + "learning_rate": 1e-06, + "loss": 0.666, + "num_input_tokens_seen": 113069828, + "step": 2018 + }, + { + "epoch": 4.494432071269488, + "loss": 0.5187711715698242, + "loss_ce": 0.0003385008021723479, + "loss_iou": 0.2294921875, + "loss_num": 0.0118408203125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 113069828, + "step": 2018 + }, + { + "epoch": 4.496659242761693, + "grad_norm": 16.547481536865234, + "learning_rate": 1e-06, + "loss": 0.7828, + "num_input_tokens_seen": 113126292, + "step": 2019 + }, + { + "epoch": 4.496659242761693, + "loss": 0.7228785753250122, + "loss_ce": 0.0002223363844677806, + "loss_iou": 0.302734375, + "loss_num": 0.02294921875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 113126292, + "step": 2019 + }, + { + "epoch": 4.498886414253898, + "grad_norm": 16.149658203125, + "learning_rate": 1e-06, + "loss": 0.7314, + "num_input_tokens_seen": 113180940, + "step": 2020 + }, + { + "epoch": 4.498886414253898, + "loss": 0.8957573771476746, + "loss_ce": 0.0002495555963832885, + "loss_iou": 0.392578125, + "loss_num": 0.021728515625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 113180940, + "step": 2020 + }, + { + "epoch": 4.501113585746102, + "grad_norm": 19.080398559570312, + "learning_rate": 1e-06, + "loss": 0.6275, + "num_input_tokens_seen": 113237920, + "step": 2021 + }, + { + "epoch": 4.501113585746102, + "loss": 0.8777415752410889, + "loss_ce": 0.0004222409042995423, + "loss_iou": 0.396484375, + "loss_num": 0.0166015625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 113237920, + "step": 2021 + }, + { + "epoch": 4.503340757238307, + "grad_norm": 18.565778732299805, + "learning_rate": 1e-06, + "loss": 0.8399, + "num_input_tokens_seen": 113292456, + "step": 2022 + }, + { + "epoch": 4.503340757238307, + "loss": 0.8745211362838745, + "loss_ce": 0.0002535720122978091, + "loss_iou": 0.373046875, + "loss_num": 0.025390625, + "loss_xval": 0.875, + "num_input_tokens_seen": 113292456, + "step": 2022 + }, + { + "epoch": 4.505567928730512, + "grad_norm": 16.41952133178711, + "learning_rate": 1e-06, + "loss": 0.6277, + "num_input_tokens_seen": 113348476, + "step": 2023 + }, + { + "epoch": 4.505567928730512, + "loss": 0.6538951992988586, + "loss_ce": 0.0005749051342718303, + "loss_iou": 0.302734375, + "loss_num": 0.00946044921875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 113348476, + "step": 2023 + }, + { + "epoch": 4.507795100222717, + "grad_norm": 21.629709243774414, + "learning_rate": 1e-06, + "loss": 0.6398, + "num_input_tokens_seen": 113406428, + "step": 2024 + }, + { + "epoch": 4.507795100222717, + "loss": 0.7275168299674988, + "loss_ce": 0.00022192415781319141, + "loss_iou": 0.298828125, + "loss_num": 0.0260009765625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 113406428, + "step": 2024 + }, + { + "epoch": 4.510022271714922, + "grad_norm": 20.57599449157715, + "learning_rate": 1e-06, + "loss": 0.7676, + "num_input_tokens_seen": 113458468, + "step": 2025 + }, + { + "epoch": 4.510022271714922, + "loss": 0.8076622486114502, + "loss_ce": 0.0002891695185098797, + "loss_iou": 0.349609375, + "loss_num": 0.0218505859375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 113458468, + "step": 2025 + }, + { + "epoch": 4.512249443207127, + "grad_norm": 17.691875457763672, + "learning_rate": 1e-06, + "loss": 0.7612, + "num_input_tokens_seen": 113514212, + "step": 2026 + }, + { + "epoch": 4.512249443207127, + "loss": 0.46555206179618835, + "loss_ce": 0.00046417216071859, + "loss_iou": 0.2060546875, + "loss_num": 0.01043701171875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 113514212, + "step": 2026 + }, + { + "epoch": 4.514476614699332, + "grad_norm": 21.262399673461914, + "learning_rate": 1e-06, + "loss": 0.7058, + "num_input_tokens_seen": 113568052, + "step": 2027 + }, + { + "epoch": 4.514476614699332, + "loss": 0.7981184720993042, + "loss_ce": 0.00026689909282140434, + "loss_iou": 0.328125, + "loss_num": 0.0281982421875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 113568052, + "step": 2027 + }, + { + "epoch": 4.5167037861915365, + "grad_norm": 19.464096069335938, + "learning_rate": 1e-06, + "loss": 0.7808, + "num_input_tokens_seen": 113625848, + "step": 2028 + }, + { + "epoch": 4.5167037861915365, + "loss": 0.9021308422088623, + "loss_ce": 0.0016181376995518804, + "loss_iou": 0.400390625, + "loss_num": 0.019775390625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 113625848, + "step": 2028 + }, + { + "epoch": 4.518930957683741, + "grad_norm": 18.429672241210938, + "learning_rate": 1e-06, + "loss": 0.723, + "num_input_tokens_seen": 113680916, + "step": 2029 + }, + { + "epoch": 4.518930957683741, + "loss": 0.9203833341598511, + "loss_ce": 0.00021731224842369556, + "loss_iou": 0.390625, + "loss_num": 0.027587890625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 113680916, + "step": 2029 + }, + { + "epoch": 4.521158129175946, + "grad_norm": 15.999838829040527, + "learning_rate": 1e-06, + "loss": 0.8875, + "num_input_tokens_seen": 113736420, + "step": 2030 + }, + { + "epoch": 4.521158129175946, + "loss": 0.799384593963623, + "loss_ce": 0.0003123145434074104, + "loss_iou": 0.33203125, + "loss_num": 0.0267333984375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 113736420, + "step": 2030 + }, + { + "epoch": 4.523385300668151, + "grad_norm": 15.953963279724121, + "learning_rate": 1e-06, + "loss": 0.7948, + "num_input_tokens_seen": 113792556, + "step": 2031 + }, + { + "epoch": 4.523385300668151, + "loss": 0.6049848794937134, + "loss_ce": 0.0002485427539795637, + "loss_iou": 0.26171875, + "loss_num": 0.0162353515625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 113792556, + "step": 2031 + }, + { + "epoch": 4.525612472160356, + "grad_norm": 24.632400512695312, + "learning_rate": 1e-06, + "loss": 0.707, + "num_input_tokens_seen": 113845716, + "step": 2032 + }, + { + "epoch": 4.525612472160356, + "loss": 0.9207833409309387, + "loss_ce": 0.00025110485148616135, + "loss_iou": 0.3984375, + "loss_num": 0.0244140625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 113845716, + "step": 2032 + }, + { + "epoch": 4.527839643652561, + "grad_norm": 17.756380081176758, + "learning_rate": 1e-06, + "loss": 1.0081, + "num_input_tokens_seen": 113902444, + "step": 2033 + }, + { + "epoch": 4.527839643652561, + "loss": 1.0257360935211182, + "loss_ce": 0.00034554791636765003, + "loss_iou": 0.384765625, + "loss_num": 0.051025390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 113902444, + "step": 2033 + }, + { + "epoch": 4.5300668151447665, + "grad_norm": 18.746212005615234, + "learning_rate": 1e-06, + "loss": 0.731, + "num_input_tokens_seen": 113957736, + "step": 2034 + }, + { + "epoch": 4.5300668151447665, + "loss": 0.6118181943893433, + "loss_ce": 0.00024596002185717225, + "loss_iou": 0.265625, + "loss_num": 0.015869140625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 113957736, + "step": 2034 + }, + { + "epoch": 4.532293986636971, + "grad_norm": 18.45545196533203, + "learning_rate": 1e-06, + "loss": 0.7164, + "num_input_tokens_seen": 114014944, + "step": 2035 + }, + { + "epoch": 4.532293986636971, + "loss": 0.799105167388916, + "loss_ce": 0.00027704003150574863, + "loss_iou": 0.353515625, + "loss_num": 0.0185546875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 114014944, + "step": 2035 + }, + { + "epoch": 4.534521158129176, + "grad_norm": 16.944326400756836, + "learning_rate": 1e-06, + "loss": 0.7828, + "num_input_tokens_seen": 114073000, + "step": 2036 + }, + { + "epoch": 4.534521158129176, + "loss": 0.7347084283828735, + "loss_ce": 0.00033347506541758776, + "loss_iou": 0.3203125, + "loss_num": 0.018798828125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 114073000, + "step": 2036 + }, + { + "epoch": 4.536748329621381, + "grad_norm": 42.41679000854492, + "learning_rate": 1e-06, + "loss": 0.6489, + "num_input_tokens_seen": 114129412, + "step": 2037 + }, + { + "epoch": 4.536748329621381, + "loss": 0.591286838054657, + "loss_ce": 0.0004665802407544106, + "loss_iou": 0.2392578125, + "loss_num": 0.0223388671875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 114129412, + "step": 2037 + }, + { + "epoch": 4.538975501113586, + "grad_norm": 20.462520599365234, + "learning_rate": 1e-06, + "loss": 0.8208, + "num_input_tokens_seen": 114182588, + "step": 2038 + }, + { + "epoch": 4.538975501113586, + "loss": 0.692272424697876, + "loss_ce": 0.00037790805799886584, + "loss_iou": 0.28125, + "loss_num": 0.0257568359375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 114182588, + "step": 2038 + }, + { + "epoch": 4.541202672605791, + "grad_norm": 23.208797454833984, + "learning_rate": 1e-06, + "loss": 1.2438, + "num_input_tokens_seen": 114236432, + "step": 2039 + }, + { + "epoch": 4.541202672605791, + "loss": 1.3942797183990479, + "loss_ce": 0.000725053483620286, + "loss_iou": 0.55078125, + "loss_num": 0.0576171875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 114236432, + "step": 2039 + }, + { + "epoch": 4.543429844097996, + "grad_norm": 20.509309768676758, + "learning_rate": 1e-06, + "loss": 0.894, + "num_input_tokens_seen": 114291080, + "step": 2040 + }, + { + "epoch": 4.543429844097996, + "loss": 0.9274067878723145, + "loss_ce": 0.0002827763673849404, + "loss_iou": 0.3828125, + "loss_num": 0.03271484375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 114291080, + "step": 2040 + }, + { + "epoch": 4.5456570155902005, + "grad_norm": 36.92770767211914, + "learning_rate": 1e-06, + "loss": 0.7234, + "num_input_tokens_seen": 114345108, + "step": 2041 + }, + { + "epoch": 4.5456570155902005, + "loss": 0.7667504549026489, + "loss_ce": 0.00027098384452983737, + "loss_iou": 0.3046875, + "loss_num": 0.03125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 114345108, + "step": 2041 + }, + { + "epoch": 4.547884187082405, + "grad_norm": 21.100217819213867, + "learning_rate": 1e-06, + "loss": 0.7731, + "num_input_tokens_seen": 114399096, + "step": 2042 + }, + { + "epoch": 4.547884187082405, + "loss": 0.7943999767303467, + "loss_ce": 0.00045465261791832745, + "loss_iou": 0.349609375, + "loss_num": 0.018798828125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 114399096, + "step": 2042 + }, + { + "epoch": 4.55011135857461, + "grad_norm": 20.20854949951172, + "learning_rate": 1e-06, + "loss": 0.6397, + "num_input_tokens_seen": 114454576, + "step": 2043 + }, + { + "epoch": 4.55011135857461, + "loss": 0.620108962059021, + "loss_ce": 0.00023591173521708697, + "loss_iou": 0.2431640625, + "loss_num": 0.026611328125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 114454576, + "step": 2043 + }, + { + "epoch": 4.552338530066815, + "grad_norm": 14.868428230285645, + "learning_rate": 1e-06, + "loss": 0.7342, + "num_input_tokens_seen": 114508528, + "step": 2044 + }, + { + "epoch": 4.552338530066815, + "loss": 0.7006996870040894, + "loss_ce": 0.0002602189779281616, + "loss_iou": 0.30078125, + "loss_num": 0.01953125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 114508528, + "step": 2044 + }, + { + "epoch": 4.55456570155902, + "grad_norm": 14.888712882995605, + "learning_rate": 1e-06, + "loss": 0.8115, + "num_input_tokens_seen": 114565380, + "step": 2045 + }, + { + "epoch": 4.55456570155902, + "loss": 0.7298033237457275, + "loss_ce": 0.00031118281185626984, + "loss_iou": 0.3203125, + "loss_num": 0.017578125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 114565380, + "step": 2045 + }, + { + "epoch": 4.556792873051225, + "grad_norm": 18.843162536621094, + "learning_rate": 1e-06, + "loss": 0.8365, + "num_input_tokens_seen": 114621044, + "step": 2046 + }, + { + "epoch": 4.556792873051225, + "loss": 0.9204500913619995, + "loss_ce": 0.0002840836241375655, + "loss_iou": 0.41796875, + "loss_num": 0.0167236328125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 114621044, + "step": 2046 + }, + { + "epoch": 4.55902004454343, + "grad_norm": 22.14583969116211, + "learning_rate": 1e-06, + "loss": 0.7857, + "num_input_tokens_seen": 114678684, + "step": 2047 + }, + { + "epoch": 4.55902004454343, + "loss": 0.9415132403373718, + "loss_ce": 0.0003511565155349672, + "loss_iou": 0.37890625, + "loss_num": 0.036865234375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 114678684, + "step": 2047 + }, + { + "epoch": 4.5612472160356345, + "grad_norm": 17.67383575439453, + "learning_rate": 1e-06, + "loss": 0.8186, + "num_input_tokens_seen": 114736248, + "step": 2048 + }, + { + "epoch": 4.5612472160356345, + "loss": 0.8276271820068359, + "loss_ce": 0.0004787354846484959, + "loss_iou": 0.35546875, + "loss_num": 0.023193359375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 114736248, + "step": 2048 + }, + { + "epoch": 4.563474387527839, + "grad_norm": 20.229520797729492, + "learning_rate": 1e-06, + "loss": 0.7565, + "num_input_tokens_seen": 114793940, + "step": 2049 + }, + { + "epoch": 4.563474387527839, + "loss": 0.8892278671264648, + "loss_ce": 0.00031182251404970884, + "loss_iou": 0.369140625, + "loss_num": 0.02978515625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 114793940, + "step": 2049 + }, + { + "epoch": 4.565701559020044, + "grad_norm": 18.42917823791504, + "learning_rate": 1e-06, + "loss": 0.611, + "num_input_tokens_seen": 114851312, + "step": 2050 + }, + { + "epoch": 4.565701559020044, + "loss": 0.5913413763046265, + "loss_ce": 0.0002769285929389298, + "loss_iou": 0.267578125, + "loss_num": 0.01153564453125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 114851312, + "step": 2050 + }, + { + "epoch": 4.567928730512249, + "grad_norm": 15.126654624938965, + "learning_rate": 1e-06, + "loss": 0.7946, + "num_input_tokens_seen": 114904084, + "step": 2051 + }, + { + "epoch": 4.567928730512249, + "loss": 0.9705009460449219, + "loss_ce": 0.0002861037792172283, + "loss_iou": 0.404296875, + "loss_num": 0.031982421875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 114904084, + "step": 2051 + }, + { + "epoch": 4.570155902004454, + "grad_norm": 101.9856948852539, + "learning_rate": 1e-06, + "loss": 0.7333, + "num_input_tokens_seen": 114958232, + "step": 2052 + }, + { + "epoch": 4.570155902004454, + "loss": 0.759264349937439, + "loss_ce": 0.00023114567738957703, + "loss_iou": 0.337890625, + "loss_num": 0.016845703125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 114958232, + "step": 2052 + }, + { + "epoch": 4.57238307349666, + "grad_norm": 16.39866065979004, + "learning_rate": 1e-06, + "loss": 0.6814, + "num_input_tokens_seen": 115015688, + "step": 2053 + }, + { + "epoch": 4.57238307349666, + "loss": 0.8523510694503784, + "loss_ce": 0.00030033523216843605, + "loss_iou": 0.361328125, + "loss_num": 0.025634765625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 115015688, + "step": 2053 + }, + { + "epoch": 4.574610244988865, + "grad_norm": 24.29990005493164, + "learning_rate": 1e-06, + "loss": 0.8799, + "num_input_tokens_seen": 115072520, + "step": 2054 + }, + { + "epoch": 4.574610244988865, + "loss": 0.7028564214706421, + "loss_ce": 0.00046386715257540345, + "loss_iou": 0.30859375, + "loss_num": 0.0172119140625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 115072520, + "step": 2054 + }, + { + "epoch": 4.5768374164810695, + "grad_norm": 42.747581481933594, + "learning_rate": 1e-06, + "loss": 0.7572, + "num_input_tokens_seen": 115128176, + "step": 2055 + }, + { + "epoch": 4.5768374164810695, + "loss": 0.7119477987289429, + "loss_ce": 0.00027786268037743866, + "loss_iou": 0.291015625, + "loss_num": 0.02587890625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 115128176, + "step": 2055 + }, + { + "epoch": 4.579064587973274, + "grad_norm": 28.751867294311523, + "learning_rate": 1e-06, + "loss": 0.7886, + "num_input_tokens_seen": 115185668, + "step": 2056 + }, + { + "epoch": 4.579064587973274, + "loss": 0.7783513069152832, + "loss_ce": 0.0002751080028247088, + "loss_iou": 0.330078125, + "loss_num": 0.0238037109375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 115185668, + "step": 2056 + }, + { + "epoch": 4.581291759465479, + "grad_norm": 20.60097885131836, + "learning_rate": 1e-06, + "loss": 0.8628, + "num_input_tokens_seen": 115242368, + "step": 2057 + }, + { + "epoch": 4.581291759465479, + "loss": 0.954852283000946, + "loss_ce": 0.00026240124134346843, + "loss_iou": 0.41796875, + "loss_num": 0.023681640625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 115242368, + "step": 2057 + }, + { + "epoch": 4.583518930957684, + "grad_norm": 31.440357208251953, + "learning_rate": 1e-06, + "loss": 1.0689, + "num_input_tokens_seen": 115297348, + "step": 2058 + }, + { + "epoch": 4.583518930957684, + "loss": 1.095000982284546, + "loss_ce": 0.000274458434432745, + "loss_iou": 0.46484375, + "loss_num": 0.033203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 115297348, + "step": 2058 + }, + { + "epoch": 4.585746102449889, + "grad_norm": 22.026607513427734, + "learning_rate": 1e-06, + "loss": 0.9932, + "num_input_tokens_seen": 115351760, + "step": 2059 + }, + { + "epoch": 4.585746102449889, + "loss": 0.8425930738449097, + "loss_ce": 0.0003078907902818173, + "loss_iou": 0.341796875, + "loss_num": 0.03173828125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 115351760, + "step": 2059 + }, + { + "epoch": 4.587973273942094, + "grad_norm": 21.124006271362305, + "learning_rate": 1e-06, + "loss": 0.7942, + "num_input_tokens_seen": 115407436, + "step": 2060 + }, + { + "epoch": 4.587973273942094, + "loss": 0.8991901874542236, + "loss_ce": 0.0002643706393428147, + "loss_iou": 0.369140625, + "loss_num": 0.032470703125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 115407436, + "step": 2060 + }, + { + "epoch": 4.590200445434299, + "grad_norm": 13.804505348205566, + "learning_rate": 1e-06, + "loss": 0.8642, + "num_input_tokens_seen": 115464140, + "step": 2061 + }, + { + "epoch": 4.590200445434299, + "loss": 0.6700685620307922, + "loss_ce": 0.0002687171217985451, + "loss_iou": 0.2578125, + "loss_num": 0.0308837890625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 115464140, + "step": 2061 + }, + { + "epoch": 4.5924276169265035, + "grad_norm": 30.673608779907227, + "learning_rate": 1e-06, + "loss": 0.8756, + "num_input_tokens_seen": 115517956, + "step": 2062 + }, + { + "epoch": 4.5924276169265035, + "loss": 0.6949182748794556, + "loss_ce": 0.00033817399526014924, + "loss_iou": 0.287109375, + "loss_num": 0.024169921875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 115517956, + "step": 2062 + }, + { + "epoch": 4.594654788418708, + "grad_norm": 16.69051170349121, + "learning_rate": 1e-06, + "loss": 0.7354, + "num_input_tokens_seen": 115575676, + "step": 2063 + }, + { + "epoch": 4.594654788418708, + "loss": 0.8584408760070801, + "loss_ce": 0.00028658530209213495, + "loss_iou": 0.330078125, + "loss_num": 0.039306640625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 115575676, + "step": 2063 + }, + { + "epoch": 4.596881959910913, + "grad_norm": 16.270042419433594, + "learning_rate": 1e-06, + "loss": 0.7017, + "num_input_tokens_seen": 115631496, + "step": 2064 + }, + { + "epoch": 4.596881959910913, + "loss": 0.5482097268104553, + "loss_ce": 0.0006023210007697344, + "loss_iou": 0.212890625, + "loss_num": 0.024169921875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 115631496, + "step": 2064 + }, + { + "epoch": 4.599109131403118, + "grad_norm": 18.328039169311523, + "learning_rate": 1e-06, + "loss": 0.9602, + "num_input_tokens_seen": 115685200, + "step": 2065 + }, + { + "epoch": 4.599109131403118, + "loss": 0.9518949389457703, + "loss_ce": 0.00023480196250602603, + "loss_iou": 0.41796875, + "loss_num": 0.022705078125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 115685200, + "step": 2065 + }, + { + "epoch": 4.601336302895323, + "grad_norm": 18.395797729492188, + "learning_rate": 1e-06, + "loss": 0.9051, + "num_input_tokens_seen": 115740640, + "step": 2066 + }, + { + "epoch": 4.601336302895323, + "loss": 0.9198845624923706, + "loss_ce": 0.00032886205008253455, + "loss_iou": 0.369140625, + "loss_num": 0.035888671875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 115740640, + "step": 2066 + }, + { + "epoch": 4.603563474387528, + "grad_norm": 78.51664733886719, + "learning_rate": 1e-06, + "loss": 0.7135, + "num_input_tokens_seen": 115794096, + "step": 2067 + }, + { + "epoch": 4.603563474387528, + "loss": 0.912857174873352, + "loss_ce": 0.00025952988653443754, + "loss_iou": 0.392578125, + "loss_num": 0.0260009765625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 115794096, + "step": 2067 + }, + { + "epoch": 4.605790645879733, + "grad_norm": 13.404295921325684, + "learning_rate": 1e-06, + "loss": 0.6822, + "num_input_tokens_seen": 115852012, + "step": 2068 + }, + { + "epoch": 4.605790645879733, + "loss": 0.6375604867935181, + "loss_ce": 0.00023134646471589804, + "loss_iou": 0.271484375, + "loss_num": 0.018798828125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 115852012, + "step": 2068 + }, + { + "epoch": 4.6080178173719375, + "grad_norm": 35.90913772583008, + "learning_rate": 1e-06, + "loss": 0.9463, + "num_input_tokens_seen": 115910272, + "step": 2069 + }, + { + "epoch": 4.6080178173719375, + "loss": 0.7615780830383301, + "loss_ce": 0.0003476125421002507, + "loss_iou": 0.33984375, + "loss_num": 0.0166015625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 115910272, + "step": 2069 + }, + { + "epoch": 4.610244988864142, + "grad_norm": 48.58989715576172, + "learning_rate": 1e-06, + "loss": 0.8322, + "num_input_tokens_seen": 115966060, + "step": 2070 + }, + { + "epoch": 4.610244988864142, + "loss": 0.9044246077537537, + "loss_ce": 0.00024984654737636447, + "loss_iou": 0.3828125, + "loss_num": 0.027587890625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 115966060, + "step": 2070 + }, + { + "epoch": 4.612472160356347, + "grad_norm": 24.12740135192871, + "learning_rate": 1e-06, + "loss": 0.8331, + "num_input_tokens_seen": 116023692, + "step": 2071 + }, + { + "epoch": 4.612472160356347, + "loss": 0.7075637578964233, + "loss_ce": 0.0002883510896936059, + "loss_iou": 0.296875, + "loss_num": 0.0228271484375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 116023692, + "step": 2071 + }, + { + "epoch": 4.614699331848552, + "grad_norm": 28.676658630371094, + "learning_rate": 1e-06, + "loss": 0.8544, + "num_input_tokens_seen": 116078540, + "step": 2072 + }, + { + "epoch": 4.614699331848552, + "loss": 0.9685863852500916, + "loss_ce": 0.00032463445677421987, + "loss_iou": 0.408203125, + "loss_num": 0.0303955078125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 116078540, + "step": 2072 + }, + { + "epoch": 4.616926503340757, + "grad_norm": 19.603900909423828, + "learning_rate": 1e-06, + "loss": 0.7623, + "num_input_tokens_seen": 116133732, + "step": 2073 + }, + { + "epoch": 4.616926503340757, + "loss": 0.7463170886039734, + "loss_ce": 0.0002233087579952553, + "loss_iou": 0.27734375, + "loss_num": 0.0380859375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 116133732, + "step": 2073 + }, + { + "epoch": 4.619153674832962, + "grad_norm": 15.20378589630127, + "learning_rate": 1e-06, + "loss": 0.7644, + "num_input_tokens_seen": 116188860, + "step": 2074 + }, + { + "epoch": 4.619153674832962, + "loss": 0.748953104019165, + "loss_ce": 0.0004179720126558095, + "loss_iou": 0.287109375, + "loss_num": 0.034423828125, + "loss_xval": 0.75, + "num_input_tokens_seen": 116188860, + "step": 2074 + }, + { + "epoch": 4.621380846325167, + "grad_norm": 15.175333976745605, + "learning_rate": 1e-06, + "loss": 0.8667, + "num_input_tokens_seen": 116243408, + "step": 2075 + }, + { + "epoch": 4.621380846325167, + "loss": 0.890459418296814, + "loss_ce": 0.00032265347545035183, + "loss_iou": 0.384765625, + "loss_num": 0.024169921875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 116243408, + "step": 2075 + }, + { + "epoch": 4.6236080178173715, + "grad_norm": 13.573973655700684, + "learning_rate": 1e-06, + "loss": 1.0168, + "num_input_tokens_seen": 116300164, + "step": 2076 + }, + { + "epoch": 4.6236080178173715, + "loss": 1.1477470397949219, + "loss_ce": 0.0002860655076801777, + "loss_iou": 0.486328125, + "loss_num": 0.035400390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 116300164, + "step": 2076 + }, + { + "epoch": 4.625835189309576, + "grad_norm": 19.694929122924805, + "learning_rate": 1e-06, + "loss": 0.8291, + "num_input_tokens_seen": 116357772, + "step": 2077 + }, + { + "epoch": 4.625835189309576, + "loss": 0.9049163460731506, + "loss_ce": 0.00037533161230385303, + "loss_iou": 0.35546875, + "loss_num": 0.038330078125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 116357772, + "step": 2077 + }, + { + "epoch": 4.628062360801781, + "grad_norm": 14.48517894744873, + "learning_rate": 1e-06, + "loss": 0.8624, + "num_input_tokens_seen": 116413892, + "step": 2078 + }, + { + "epoch": 4.628062360801781, + "loss": 0.7932718992233276, + "loss_ce": 0.0005472734337672591, + "loss_iou": 0.330078125, + "loss_num": 0.02685546875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 116413892, + "step": 2078 + }, + { + "epoch": 4.630289532293987, + "grad_norm": 26.142356872558594, + "learning_rate": 1e-06, + "loss": 0.9727, + "num_input_tokens_seen": 116471888, + "step": 2079 + }, + { + "epoch": 4.630289532293987, + "loss": 1.051328420639038, + "loss_ce": 0.0003031002124771476, + "loss_iou": 0.458984375, + "loss_num": 0.026611328125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 116471888, + "step": 2079 + }, + { + "epoch": 4.632516703786192, + "grad_norm": 38.62444305419922, + "learning_rate": 1e-06, + "loss": 0.7796, + "num_input_tokens_seen": 116527800, + "step": 2080 + }, + { + "epoch": 4.632516703786192, + "loss": 0.8833059668540955, + "loss_ce": 0.00024934165412560105, + "loss_iou": 0.404296875, + "loss_num": 0.01519775390625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 116527800, + "step": 2080 + }, + { + "epoch": 4.634743875278397, + "grad_norm": 26.539600372314453, + "learning_rate": 1e-06, + "loss": 0.9048, + "num_input_tokens_seen": 116583864, + "step": 2081 + }, + { + "epoch": 4.634743875278397, + "loss": 0.9949434995651245, + "loss_ce": 0.0005587629275396466, + "loss_iou": 0.4453125, + "loss_num": 0.0211181640625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 116583864, + "step": 2081 + }, + { + "epoch": 4.636971046770602, + "grad_norm": 19.73139190673828, + "learning_rate": 1e-06, + "loss": 0.6691, + "num_input_tokens_seen": 116635540, + "step": 2082 + }, + { + "epoch": 4.636971046770602, + "loss": 0.6694454550743103, + "loss_ce": 0.0009884194005280733, + "loss_iou": 0.2578125, + "loss_num": 0.0302734375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 116635540, + "step": 2082 + }, + { + "epoch": 4.639198218262806, + "grad_norm": 22.97101593017578, + "learning_rate": 1e-06, + "loss": 0.6584, + "num_input_tokens_seen": 116691464, + "step": 2083 + }, + { + "epoch": 4.639198218262806, + "loss": 0.5923573970794678, + "loss_ce": 0.0003163928631693125, + "loss_iou": 0.265625, + "loss_num": 0.012451171875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 116691464, + "step": 2083 + }, + { + "epoch": 4.641425389755011, + "grad_norm": 21.242584228515625, + "learning_rate": 1e-06, + "loss": 0.7134, + "num_input_tokens_seen": 116743160, + "step": 2084 + }, + { + "epoch": 4.641425389755011, + "loss": 0.5542248487472534, + "loss_ce": 0.0002697905874811113, + "loss_iou": 0.2177734375, + "loss_num": 0.02392578125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 116743160, + "step": 2084 + }, + { + "epoch": 4.643652561247216, + "grad_norm": 25.813098907470703, + "learning_rate": 1e-06, + "loss": 0.7164, + "num_input_tokens_seen": 116797988, + "step": 2085 + }, + { + "epoch": 4.643652561247216, + "loss": 0.5944602489471436, + "loss_ce": 0.00022195720521267503, + "loss_iou": 0.2578125, + "loss_num": 0.0159912109375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 116797988, + "step": 2085 + }, + { + "epoch": 4.645879732739421, + "grad_norm": 35.45105743408203, + "learning_rate": 1e-06, + "loss": 0.9267, + "num_input_tokens_seen": 116853140, + "step": 2086 + }, + { + "epoch": 4.645879732739421, + "loss": 0.6704750657081604, + "loss_ce": 0.0005531828501261771, + "loss_iou": 0.296875, + "loss_num": 0.0155029296875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 116853140, + "step": 2086 + }, + { + "epoch": 4.648106904231626, + "grad_norm": 19.315475463867188, + "learning_rate": 1e-06, + "loss": 0.7122, + "num_input_tokens_seen": 116909096, + "step": 2087 + }, + { + "epoch": 4.648106904231626, + "loss": 0.5913118720054626, + "loss_ce": 0.0002473921631462872, + "loss_iou": 0.251953125, + "loss_num": 0.0177001953125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 116909096, + "step": 2087 + }, + { + "epoch": 4.650334075723831, + "grad_norm": 27.988327026367188, + "learning_rate": 1e-06, + "loss": 0.8224, + "num_input_tokens_seen": 116966212, + "step": 2088 + }, + { + "epoch": 4.650334075723831, + "loss": 0.9297181367874146, + "loss_ce": 0.00027486798353493214, + "loss_iou": 0.376953125, + "loss_num": 0.03515625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 116966212, + "step": 2088 + }, + { + "epoch": 4.652561247216036, + "grad_norm": 16.152359008789062, + "learning_rate": 1e-06, + "loss": 0.5982, + "num_input_tokens_seen": 117022116, + "step": 2089 + }, + { + "epoch": 4.652561247216036, + "loss": 0.6331831216812134, + "loss_ce": 0.0002486219455022365, + "loss_iou": 0.265625, + "loss_num": 0.0201416015625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 117022116, + "step": 2089 + }, + { + "epoch": 4.6547884187082404, + "grad_norm": 14.966622352600098, + "learning_rate": 1e-06, + "loss": 0.8524, + "num_input_tokens_seen": 117077144, + "step": 2090 + }, + { + "epoch": 4.6547884187082404, + "loss": 0.8334739208221436, + "loss_ce": 0.0007102236268110573, + "loss_iou": 0.341796875, + "loss_num": 0.0294189453125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 117077144, + "step": 2090 + }, + { + "epoch": 4.657015590200445, + "grad_norm": 18.069534301757812, + "learning_rate": 1e-06, + "loss": 0.6994, + "num_input_tokens_seen": 117132324, + "step": 2091 + }, + { + "epoch": 4.657015590200445, + "loss": 0.6397680044174194, + "loss_ce": 0.00024168557138182223, + "loss_iou": 0.283203125, + "loss_num": 0.014404296875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 117132324, + "step": 2091 + }, + { + "epoch": 4.65924276169265, + "grad_norm": 16.776460647583008, + "learning_rate": 1e-06, + "loss": 0.6939, + "num_input_tokens_seen": 117192008, + "step": 2092 + }, + { + "epoch": 4.65924276169265, + "loss": 0.8577221632003784, + "loss_ce": 0.0003002659068442881, + "loss_iou": 0.3671875, + "loss_num": 0.0250244140625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 117192008, + "step": 2092 + }, + { + "epoch": 4.661469933184855, + "grad_norm": 12.129549980163574, + "learning_rate": 1e-06, + "loss": 0.7361, + "num_input_tokens_seen": 117248744, + "step": 2093 + }, + { + "epoch": 4.661469933184855, + "loss": 0.8417858481407166, + "loss_ce": 0.00035519120865501463, + "loss_iou": 0.3671875, + "loss_num": 0.0216064453125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 117248744, + "step": 2093 + }, + { + "epoch": 4.66369710467706, + "grad_norm": 47.98014450073242, + "learning_rate": 1e-06, + "loss": 1.0221, + "num_input_tokens_seen": 117303972, + "step": 2094 + }, + { + "epoch": 4.66369710467706, + "loss": 0.8582909107208252, + "loss_ce": 0.0002586581977084279, + "loss_iou": 0.365234375, + "loss_num": 0.0255126953125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 117303972, + "step": 2094 + }, + { + "epoch": 4.665924276169265, + "grad_norm": 17.894067764282227, + "learning_rate": 1e-06, + "loss": 0.7274, + "num_input_tokens_seen": 117360916, + "step": 2095 + }, + { + "epoch": 4.665924276169265, + "loss": 0.9711999297142029, + "loss_ce": 0.0002527014003135264, + "loss_iou": 0.39453125, + "loss_num": 0.036376953125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 117360916, + "step": 2095 + }, + { + "epoch": 4.66815144766147, + "grad_norm": 22.916641235351562, + "learning_rate": 1e-06, + "loss": 0.7064, + "num_input_tokens_seen": 117416304, + "step": 2096 + }, + { + "epoch": 4.66815144766147, + "loss": 0.7692503929138184, + "loss_ce": 0.00045159138971939683, + "loss_iou": 0.3125, + "loss_num": 0.0289306640625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 117416304, + "step": 2096 + }, + { + "epoch": 4.6703786191536745, + "grad_norm": 17.155508041381836, + "learning_rate": 1e-06, + "loss": 0.9104, + "num_input_tokens_seen": 117473004, + "step": 2097 + }, + { + "epoch": 4.6703786191536745, + "loss": 1.1037812232971191, + "loss_ce": 0.0002655387215781957, + "loss_iou": 0.453125, + "loss_num": 0.03955078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 117473004, + "step": 2097 + }, + { + "epoch": 4.67260579064588, + "grad_norm": 17.130844116210938, + "learning_rate": 1e-06, + "loss": 0.7475, + "num_input_tokens_seen": 117528424, + "step": 2098 + }, + { + "epoch": 4.67260579064588, + "loss": 0.8190168738365173, + "loss_ce": 0.0004133854527026415, + "loss_iou": 0.33984375, + "loss_num": 0.02783203125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 117528424, + "step": 2098 + }, + { + "epoch": 4.674832962138085, + "grad_norm": 16.85631561279297, + "learning_rate": 1e-06, + "loss": 0.5101, + "num_input_tokens_seen": 117581876, + "step": 2099 + }, + { + "epoch": 4.674832962138085, + "loss": 0.469228595495224, + "loss_ce": 0.00023444523685611784, + "loss_iou": 0.19921875, + "loss_num": 0.01416015625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 117581876, + "step": 2099 + }, + { + "epoch": 4.67706013363029, + "grad_norm": 17.609556198120117, + "learning_rate": 1e-06, + "loss": 1.0525, + "num_input_tokens_seen": 117638468, + "step": 2100 + }, + { + "epoch": 4.67706013363029, + "loss": 0.9739843606948853, + "loss_ce": 0.0003515969729050994, + "loss_iou": 0.43359375, + "loss_num": 0.0213623046875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 117638468, + "step": 2100 + }, + { + "epoch": 4.679287305122495, + "grad_norm": 20.728130340576172, + "learning_rate": 1e-06, + "loss": 0.7308, + "num_input_tokens_seen": 117694144, + "step": 2101 + }, + { + "epoch": 4.679287305122495, + "loss": 0.7154548168182373, + "loss_ce": 0.0003668900462798774, + "loss_iou": 0.298828125, + "loss_num": 0.0238037109375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 117694144, + "step": 2101 + }, + { + "epoch": 4.6815144766147, + "grad_norm": 18.357759475708008, + "learning_rate": 1e-06, + "loss": 0.9824, + "num_input_tokens_seen": 117747644, + "step": 2102 + }, + { + "epoch": 4.6815144766147, + "loss": 0.903360903263092, + "loss_ce": 0.0002847156720235944, + "loss_iou": 0.3515625, + "loss_num": 0.039794921875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 117747644, + "step": 2102 + }, + { + "epoch": 4.6837416481069045, + "grad_norm": 18.632221221923828, + "learning_rate": 1e-06, + "loss": 0.7674, + "num_input_tokens_seen": 117802256, + "step": 2103 + }, + { + "epoch": 4.6837416481069045, + "loss": 0.8225224018096924, + "loss_ce": 0.00025684869615361094, + "loss_iou": 0.337890625, + "loss_num": 0.029541015625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 117802256, + "step": 2103 + }, + { + "epoch": 4.685968819599109, + "grad_norm": 22.77741813659668, + "learning_rate": 1e-06, + "loss": 0.6407, + "num_input_tokens_seen": 117857308, + "step": 2104 + }, + { + "epoch": 4.685968819599109, + "loss": 0.6071910858154297, + "loss_ce": 0.0002575043763499707, + "loss_iou": 0.2216796875, + "loss_num": 0.03271484375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 117857308, + "step": 2104 + }, + { + "epoch": 4.688195991091314, + "grad_norm": 12.895187377929688, + "learning_rate": 1e-06, + "loss": 0.5916, + "num_input_tokens_seen": 117913116, + "step": 2105 + }, + { + "epoch": 4.688195991091314, + "loss": 0.7664899230003357, + "loss_ce": 0.0002545661700423807, + "loss_iou": 0.330078125, + "loss_num": 0.020751953125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 117913116, + "step": 2105 + }, + { + "epoch": 4.690423162583519, + "grad_norm": 36.03581237792969, + "learning_rate": 1e-06, + "loss": 0.7424, + "num_input_tokens_seen": 117970296, + "step": 2106 + }, + { + "epoch": 4.690423162583519, + "loss": 0.785170316696167, + "loss_ce": 0.00025823366013355553, + "loss_iou": 0.341796875, + "loss_num": 0.0205078125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 117970296, + "step": 2106 + }, + { + "epoch": 4.692650334075724, + "grad_norm": 24.407381057739258, + "learning_rate": 1e-06, + "loss": 0.7918, + "num_input_tokens_seen": 118024948, + "step": 2107 + }, + { + "epoch": 4.692650334075724, + "loss": 0.6611515283584595, + "loss_ce": 0.00026292851543985307, + "loss_iou": 0.29296875, + "loss_num": 0.01519775390625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 118024948, + "step": 2107 + }, + { + "epoch": 4.694877505567929, + "grad_norm": 30.365440368652344, + "learning_rate": 1e-06, + "loss": 0.8189, + "num_input_tokens_seen": 118081340, + "step": 2108 + }, + { + "epoch": 4.694877505567929, + "loss": 0.9890696406364441, + "loss_ce": 0.0003001365694217384, + "loss_iou": 0.384765625, + "loss_num": 0.043701171875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 118081340, + "step": 2108 + }, + { + "epoch": 4.697104677060134, + "grad_norm": 14.271038055419922, + "learning_rate": 1e-06, + "loss": 0.8856, + "num_input_tokens_seen": 118139104, + "step": 2109 + }, + { + "epoch": 4.697104677060134, + "loss": 1.0308204889297485, + "loss_ce": 0.0003028618230018765, + "loss_iou": 0.4296875, + "loss_num": 0.03466796875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 118139104, + "step": 2109 + }, + { + "epoch": 4.6993318485523385, + "grad_norm": 22.121456146240234, + "learning_rate": 1e-06, + "loss": 0.7422, + "num_input_tokens_seen": 118194740, + "step": 2110 + }, + { + "epoch": 4.6993318485523385, + "loss": 0.7307307124137878, + "loss_ce": 0.00026196378166787326, + "loss_iou": 0.32421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 118194740, + "step": 2110 + }, + { + "epoch": 4.701559020044543, + "grad_norm": 21.364805221557617, + "learning_rate": 1e-06, + "loss": 0.6672, + "num_input_tokens_seen": 118248584, + "step": 2111 + }, + { + "epoch": 4.701559020044543, + "loss": 0.8600109815597534, + "loss_ce": 0.000391850684536621, + "loss_iou": 0.37890625, + "loss_num": 0.02001953125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 118248584, + "step": 2111 + }, + { + "epoch": 4.703786191536748, + "grad_norm": 21.18739128112793, + "learning_rate": 1e-06, + "loss": 0.7989, + "num_input_tokens_seen": 118306024, + "step": 2112 + }, + { + "epoch": 4.703786191536748, + "loss": 0.8781961798667908, + "loss_ce": 0.00026655703550204635, + "loss_iou": 0.39453125, + "loss_num": 0.0177001953125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 118306024, + "step": 2112 + }, + { + "epoch": 4.706013363028953, + "grad_norm": 16.3918399810791, + "learning_rate": 1e-06, + "loss": 0.7998, + "num_input_tokens_seen": 118361808, + "step": 2113 + }, + { + "epoch": 4.706013363028953, + "loss": 0.5792876482009888, + "loss_ce": 0.0003081717586610466, + "loss_iou": 0.25390625, + "loss_num": 0.01397705078125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 118361808, + "step": 2113 + }, + { + "epoch": 4.708240534521158, + "grad_norm": 21.2349910736084, + "learning_rate": 1e-06, + "loss": 0.6949, + "num_input_tokens_seen": 118419444, + "step": 2114 + }, + { + "epoch": 4.708240534521158, + "loss": 0.6933687925338745, + "loss_ce": 0.00025352693046443164, + "loss_iou": 0.296875, + "loss_num": 0.019775390625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 118419444, + "step": 2114 + }, + { + "epoch": 4.710467706013363, + "grad_norm": 20.82132911682129, + "learning_rate": 1e-06, + "loss": 0.6637, + "num_input_tokens_seen": 118476632, + "step": 2115 + }, + { + "epoch": 4.710467706013363, + "loss": 0.6345095038414001, + "loss_ce": 0.0004763126198668033, + "loss_iou": 0.28125, + "loss_num": 0.01422119140625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 118476632, + "step": 2115 + }, + { + "epoch": 4.712694877505568, + "grad_norm": 17.699792861938477, + "learning_rate": 1e-06, + "loss": 0.6965, + "num_input_tokens_seen": 118532612, + "step": 2116 + }, + { + "epoch": 4.712694877505568, + "loss": 0.6084548234939575, + "loss_ce": 0.0002699895412661135, + "loss_iou": 0.265625, + "loss_num": 0.015625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 118532612, + "step": 2116 + }, + { + "epoch": 4.714922048997773, + "grad_norm": 15.86410903930664, + "learning_rate": 1e-06, + "loss": 0.9501, + "num_input_tokens_seen": 118589372, + "step": 2117 + }, + { + "epoch": 4.714922048997773, + "loss": 1.0604746341705322, + "loss_ce": 0.000416038790717721, + "loss_iou": 0.42578125, + "loss_num": 0.041748046875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 118589372, + "step": 2117 + }, + { + "epoch": 4.717149220489977, + "grad_norm": 20.29158592224121, + "learning_rate": 1e-06, + "loss": 0.6542, + "num_input_tokens_seen": 118644080, + "step": 2118 + }, + { + "epoch": 4.717149220489977, + "loss": 0.6954843401908875, + "loss_ce": 0.00041598049574531615, + "loss_iou": 0.29296875, + "loss_num": 0.02197265625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 118644080, + "step": 2118 + }, + { + "epoch": 4.719376391982182, + "grad_norm": 16.680042266845703, + "learning_rate": 1e-06, + "loss": 0.7097, + "num_input_tokens_seen": 118698048, + "step": 2119 + }, + { + "epoch": 4.719376391982182, + "loss": 0.8289152383804321, + "loss_ce": 0.00030192872509360313, + "loss_iou": 0.365234375, + "loss_num": 0.0196533203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 118698048, + "step": 2119 + }, + { + "epoch": 4.721603563474387, + "grad_norm": 20.128711700439453, + "learning_rate": 1e-06, + "loss": 0.7996, + "num_input_tokens_seen": 118754912, + "step": 2120 + }, + { + "epoch": 4.721603563474387, + "loss": 0.7679637670516968, + "loss_ce": 0.0003856367547996342, + "loss_iou": 0.33984375, + "loss_num": 0.017578125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 118754912, + "step": 2120 + }, + { + "epoch": 4.723830734966592, + "grad_norm": 26.825408935546875, + "learning_rate": 1e-06, + "loss": 1.0429, + "num_input_tokens_seen": 118809528, + "step": 2121 + }, + { + "epoch": 4.723830734966592, + "loss": 0.6894991993904114, + "loss_ce": 0.0002902133564930409, + "loss_iou": 0.275390625, + "loss_num": 0.02783203125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 118809528, + "step": 2121 + }, + { + "epoch": 4.726057906458797, + "grad_norm": 35.720130920410156, + "learning_rate": 1e-06, + "loss": 0.7786, + "num_input_tokens_seen": 118866560, + "step": 2122 + }, + { + "epoch": 4.726057906458797, + "loss": 0.766879677772522, + "loss_ce": 0.0002781344810500741, + "loss_iou": 0.298828125, + "loss_num": 0.033935546875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 118866560, + "step": 2122 + }, + { + "epoch": 4.728285077951003, + "grad_norm": 39.63307189941406, + "learning_rate": 1e-06, + "loss": 0.7441, + "num_input_tokens_seen": 118922208, + "step": 2123 + }, + { + "epoch": 4.728285077951003, + "loss": 0.6776679158210754, + "loss_ce": 0.0002997248957399279, + "loss_iou": 0.271484375, + "loss_num": 0.02685546875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 118922208, + "step": 2123 + }, + { + "epoch": 4.7305122494432075, + "grad_norm": 29.788860321044922, + "learning_rate": 1e-06, + "loss": 0.7002, + "num_input_tokens_seen": 118976912, + "step": 2124 + }, + { + "epoch": 4.7305122494432075, + "loss": 0.8606002926826477, + "loss_ce": 0.00024869441404007375, + "loss_iou": 0.3359375, + "loss_num": 0.0380859375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 118976912, + "step": 2124 + }, + { + "epoch": 4.732739420935412, + "grad_norm": 17.970624923706055, + "learning_rate": 1e-06, + "loss": 0.6788, + "num_input_tokens_seen": 119032532, + "step": 2125 + }, + { + "epoch": 4.732739420935412, + "loss": 0.9126179814338684, + "loss_ce": 0.0005085880402475595, + "loss_iou": 0.38671875, + "loss_num": 0.0279541015625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 119032532, + "step": 2125 + }, + { + "epoch": 4.734966592427617, + "grad_norm": 40.0213508605957, + "learning_rate": 1e-06, + "loss": 0.6293, + "num_input_tokens_seen": 119089356, + "step": 2126 + }, + { + "epoch": 4.734966592427617, + "loss": 0.6347508430480957, + "loss_ce": 0.00022931784042157233, + "loss_iou": 0.28515625, + "loss_num": 0.0130615234375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 119089356, + "step": 2126 + }, + { + "epoch": 4.737193763919822, + "grad_norm": 19.045475006103516, + "learning_rate": 1e-06, + "loss": 0.7774, + "num_input_tokens_seen": 119144504, + "step": 2127 + }, + { + "epoch": 4.737193763919822, + "loss": 0.9141049385070801, + "loss_ce": 0.0002866187132894993, + "loss_iou": 0.400390625, + "loss_num": 0.0223388671875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 119144504, + "step": 2127 + }, + { + "epoch": 4.739420935412027, + "grad_norm": 17.02509880065918, + "learning_rate": 1e-06, + "loss": 0.7687, + "num_input_tokens_seen": 119201984, + "step": 2128 + }, + { + "epoch": 4.739420935412027, + "loss": 0.7351812124252319, + "loss_ce": 0.0003179654595442116, + "loss_iou": 0.322265625, + "loss_num": 0.018310546875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 119201984, + "step": 2128 + }, + { + "epoch": 4.741648106904232, + "grad_norm": 42.980289459228516, + "learning_rate": 1e-06, + "loss": 0.6333, + "num_input_tokens_seen": 119259860, + "step": 2129 + }, + { + "epoch": 4.741648106904232, + "loss": 0.538473904132843, + "loss_ce": 0.0002658716693986207, + "loss_iou": 0.24609375, + "loss_num": 0.0091552734375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 119259860, + "step": 2129 + }, + { + "epoch": 4.743875278396437, + "grad_norm": 15.055540084838867, + "learning_rate": 1e-06, + "loss": 0.8069, + "num_input_tokens_seen": 119317616, + "step": 2130 + }, + { + "epoch": 4.743875278396437, + "loss": 0.8891835808753967, + "loss_ce": 0.0002675617579370737, + "loss_iou": 0.3671875, + "loss_num": 0.0306396484375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 119317616, + "step": 2130 + }, + { + "epoch": 4.7461024498886415, + "grad_norm": 20.52086067199707, + "learning_rate": 1e-06, + "loss": 0.8202, + "num_input_tokens_seen": 119373908, + "step": 2131 + }, + { + "epoch": 4.7461024498886415, + "loss": 0.8626622557640076, + "loss_ce": 0.00035758258309215307, + "loss_iou": 0.359375, + "loss_num": 0.0286865234375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 119373908, + "step": 2131 + }, + { + "epoch": 4.748329621380846, + "grad_norm": 24.670711517333984, + "learning_rate": 1e-06, + "loss": 0.8469, + "num_input_tokens_seen": 119429720, + "step": 2132 + }, + { + "epoch": 4.748329621380846, + "loss": 0.7893211841583252, + "loss_ce": 0.00025862548500299454, + "loss_iou": 0.34375, + "loss_num": 0.0201416015625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 119429720, + "step": 2132 + }, + { + "epoch": 4.750556792873051, + "grad_norm": 21.29181671142578, + "learning_rate": 1e-06, + "loss": 0.6766, + "num_input_tokens_seen": 119484036, + "step": 2133 + }, + { + "epoch": 4.750556792873051, + "loss": 0.609359860420227, + "loss_ce": 0.00022898372844792902, + "loss_iou": 0.240234375, + "loss_num": 0.02587890625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 119484036, + "step": 2133 + }, + { + "epoch": 4.752783964365256, + "grad_norm": 54.08604431152344, + "learning_rate": 1e-06, + "loss": 0.6534, + "num_input_tokens_seen": 119537184, + "step": 2134 + }, + { + "epoch": 4.752783964365256, + "loss": 0.6004340648651123, + "loss_ce": 0.0003363891737535596, + "loss_iou": 0.234375, + "loss_num": 0.026123046875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 119537184, + "step": 2134 + }, + { + "epoch": 4.755011135857461, + "grad_norm": 24.42340660095215, + "learning_rate": 1e-06, + "loss": 0.66, + "num_input_tokens_seen": 119591936, + "step": 2135 + }, + { + "epoch": 4.755011135857461, + "loss": 0.762969970703125, + "loss_ce": 0.0002747006365098059, + "loss_iou": 0.322265625, + "loss_num": 0.023681640625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 119591936, + "step": 2135 + }, + { + "epoch": 4.757238307349666, + "grad_norm": 18.103748321533203, + "learning_rate": 1e-06, + "loss": 0.7105, + "num_input_tokens_seen": 119650452, + "step": 2136 + }, + { + "epoch": 4.757238307349666, + "loss": 0.8064308166503906, + "loss_ce": 0.0002785040996968746, + "loss_iou": 0.35546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 119650452, + "step": 2136 + }, + { + "epoch": 4.759465478841871, + "grad_norm": 25.047883987426758, + "learning_rate": 1e-06, + "loss": 0.9671, + "num_input_tokens_seen": 119705364, + "step": 2137 + }, + { + "epoch": 4.759465478841871, + "loss": 0.9405218958854675, + "loss_ce": 0.0003363378345966339, + "loss_iou": 0.390625, + "loss_num": 0.03173828125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 119705364, + "step": 2137 + }, + { + "epoch": 4.7616926503340755, + "grad_norm": 11.171337127685547, + "learning_rate": 1e-06, + "loss": 0.4534, + "num_input_tokens_seen": 119761084, + "step": 2138 + }, + { + "epoch": 4.7616926503340755, + "loss": 0.43443742394447327, + "loss_ce": 0.00023330794647336006, + "loss_iou": 0.1904296875, + "loss_num": 0.01055908203125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 119761084, + "step": 2138 + }, + { + "epoch": 4.76391982182628, + "grad_norm": 14.953879356384277, + "learning_rate": 1e-06, + "loss": 0.8728, + "num_input_tokens_seen": 119816688, + "step": 2139 + }, + { + "epoch": 4.76391982182628, + "loss": 0.6874940991401672, + "loss_ce": 0.00029927384457550943, + "loss_iou": 0.3046875, + "loss_num": 0.0157470703125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 119816688, + "step": 2139 + }, + { + "epoch": 4.766146993318485, + "grad_norm": 25.45313262939453, + "learning_rate": 1e-06, + "loss": 0.8034, + "num_input_tokens_seen": 119874648, + "step": 2140 + }, + { + "epoch": 4.766146993318485, + "loss": 0.8113042116165161, + "loss_ce": 0.00026898583746515214, + "loss_iou": 0.328125, + "loss_num": 0.03125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 119874648, + "step": 2140 + }, + { + "epoch": 4.76837416481069, + "grad_norm": 22.319425582885742, + "learning_rate": 1e-06, + "loss": 0.7391, + "num_input_tokens_seen": 119930376, + "step": 2141 + }, + { + "epoch": 4.76837416481069, + "loss": 0.7610592246055603, + "loss_ce": 0.0005611696396954358, + "loss_iou": 0.314453125, + "loss_num": 0.0267333984375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 119930376, + "step": 2141 + }, + { + "epoch": 4.770601336302895, + "grad_norm": 27.872591018676758, + "learning_rate": 1e-06, + "loss": 0.7955, + "num_input_tokens_seen": 119985268, + "step": 2142 + }, + { + "epoch": 4.770601336302895, + "loss": 0.5949513912200928, + "loss_ce": 0.00022480121697299182, + "loss_iou": 0.2490234375, + "loss_num": 0.019287109375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 119985268, + "step": 2142 + }, + { + "epoch": 4.772828507795101, + "grad_norm": 21.563512802124023, + "learning_rate": 1e-06, + "loss": 0.6652, + "num_input_tokens_seen": 120041572, + "step": 2143 + }, + { + "epoch": 4.772828507795101, + "loss": 0.6074680089950562, + "loss_ce": 0.0002902495616581291, + "loss_iou": 0.26171875, + "loss_num": 0.0166015625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 120041572, + "step": 2143 + }, + { + "epoch": 4.775055679287306, + "grad_norm": 21.816646575927734, + "learning_rate": 1e-06, + "loss": 0.7993, + "num_input_tokens_seen": 120097280, + "step": 2144 + }, + { + "epoch": 4.775055679287306, + "loss": 0.8655096292495728, + "loss_ce": 0.00027528638020157814, + "loss_iou": 0.3828125, + "loss_num": 0.01953125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 120097280, + "step": 2144 + }, + { + "epoch": 4.77728285077951, + "grad_norm": 17.00129508972168, + "learning_rate": 1e-06, + "loss": 0.8654, + "num_input_tokens_seen": 120152928, + "step": 2145 + }, + { + "epoch": 4.77728285077951, + "loss": 1.0312559604644775, + "loss_ce": 0.00025006328360177577, + "loss_iou": 0.427734375, + "loss_num": 0.035400390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 120152928, + "step": 2145 + }, + { + "epoch": 4.779510022271715, + "grad_norm": 23.580554962158203, + "learning_rate": 1e-06, + "loss": 0.912, + "num_input_tokens_seen": 120211296, + "step": 2146 + }, + { + "epoch": 4.779510022271715, + "loss": 0.8288871645927429, + "loss_ce": 0.0005180308944545686, + "loss_iou": 0.34375, + "loss_num": 0.02783203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 120211296, + "step": 2146 + }, + { + "epoch": 4.78173719376392, + "grad_norm": 18.791494369506836, + "learning_rate": 1e-06, + "loss": 0.7229, + "num_input_tokens_seen": 120268204, + "step": 2147 + }, + { + "epoch": 4.78173719376392, + "loss": 0.7734935283660889, + "loss_ce": 0.000300163053907454, + "loss_iou": 0.310546875, + "loss_num": 0.0308837890625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 120268204, + "step": 2147 + }, + { + "epoch": 4.783964365256125, + "grad_norm": 17.211793899536133, + "learning_rate": 1e-06, + "loss": 0.6943, + "num_input_tokens_seen": 120320000, + "step": 2148 + }, + { + "epoch": 4.783964365256125, + "loss": 0.8804046511650085, + "loss_ce": 0.00027765982667915523, + "loss_iou": 0.357421875, + "loss_num": 0.032958984375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 120320000, + "step": 2148 + }, + { + "epoch": 4.78619153674833, + "grad_norm": 16.56837272644043, + "learning_rate": 1e-06, + "loss": 0.555, + "num_input_tokens_seen": 120375736, + "step": 2149 + }, + { + "epoch": 4.78619153674833, + "loss": 0.4850603938102722, + "loss_ce": 0.0003192013828083873, + "loss_iou": 0.2138671875, + "loss_num": 0.01116943359375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 120375736, + "step": 2149 + }, + { + "epoch": 4.788418708240535, + "grad_norm": 60.28794860839844, + "learning_rate": 1e-06, + "loss": 0.8792, + "num_input_tokens_seen": 120429608, + "step": 2150 + }, + { + "epoch": 4.788418708240535, + "loss": 0.890438437461853, + "loss_ce": 0.00030169825186021626, + "loss_iou": 0.38671875, + "loss_num": 0.023193359375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 120429608, + "step": 2150 + }, + { + "epoch": 4.79064587973274, + "grad_norm": 25.588119506835938, + "learning_rate": 1e-06, + "loss": 0.705, + "num_input_tokens_seen": 120484200, + "step": 2151 + }, + { + "epoch": 4.79064587973274, + "loss": 0.8378915786743164, + "loss_ce": 0.00036717430339194834, + "loss_iou": 0.345703125, + "loss_num": 0.0289306640625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 120484200, + "step": 2151 + }, + { + "epoch": 4.7928730512249444, + "grad_norm": 20.40205955505371, + "learning_rate": 1e-06, + "loss": 0.7429, + "num_input_tokens_seen": 120539644, + "step": 2152 + }, + { + "epoch": 4.7928730512249444, + "loss": 0.6924837827682495, + "loss_ce": 0.00034514523576945066, + "loss_iou": 0.302734375, + "loss_num": 0.017578125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 120539644, + "step": 2152 + }, + { + "epoch": 4.795100222717149, + "grad_norm": 24.60614013671875, + "learning_rate": 1e-06, + "loss": 0.7615, + "num_input_tokens_seen": 120594500, + "step": 2153 + }, + { + "epoch": 4.795100222717149, + "loss": 0.7266230583190918, + "loss_ce": 0.000487854442326352, + "loss_iou": 0.302734375, + "loss_num": 0.023681640625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 120594500, + "step": 2153 + }, + { + "epoch": 4.797327394209354, + "grad_norm": 16.96647834777832, + "learning_rate": 1e-06, + "loss": 0.7819, + "num_input_tokens_seen": 120649288, + "step": 2154 + }, + { + "epoch": 4.797327394209354, + "loss": 0.6350046396255493, + "loss_ce": 0.00023901589156594127, + "loss_iou": 0.28125, + "loss_num": 0.0145263671875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 120649288, + "step": 2154 + }, + { + "epoch": 4.799554565701559, + "grad_norm": 18.894594192504883, + "learning_rate": 1e-06, + "loss": 0.9572, + "num_input_tokens_seen": 120704828, + "step": 2155 + }, + { + "epoch": 4.799554565701559, + "loss": 0.9965546131134033, + "loss_ce": 0.000460872077383101, + "loss_iou": 0.41796875, + "loss_num": 0.031982421875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 120704828, + "step": 2155 + }, + { + "epoch": 4.801781737193764, + "grad_norm": 19.677793502807617, + "learning_rate": 1e-06, + "loss": 0.8639, + "num_input_tokens_seen": 120760452, + "step": 2156 + }, + { + "epoch": 4.801781737193764, + "loss": 0.9670339822769165, + "loss_ce": 0.00023707067884970456, + "loss_iou": 0.40625, + "loss_num": 0.0306396484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 120760452, + "step": 2156 + }, + { + "epoch": 4.804008908685969, + "grad_norm": 28.458290100097656, + "learning_rate": 1e-06, + "loss": 0.8885, + "num_input_tokens_seen": 120813932, + "step": 2157 + }, + { + "epoch": 4.804008908685969, + "loss": 0.9386167526245117, + "loss_ce": 0.0004453619185369462, + "loss_iou": 0.37109375, + "loss_num": 0.039794921875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 120813932, + "step": 2157 + }, + { + "epoch": 4.806236080178174, + "grad_norm": 19.169998168945312, + "learning_rate": 1e-06, + "loss": 0.7322, + "num_input_tokens_seen": 120867148, + "step": 2158 + }, + { + "epoch": 4.806236080178174, + "loss": 0.6552798748016357, + "loss_ce": 0.0008609433425590396, + "loss_iou": 0.275390625, + "loss_num": 0.02099609375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 120867148, + "step": 2158 + }, + { + "epoch": 4.8084632516703785, + "grad_norm": 100.8769302368164, + "learning_rate": 1e-06, + "loss": 0.8376, + "num_input_tokens_seen": 120921256, + "step": 2159 + }, + { + "epoch": 4.8084632516703785, + "loss": 0.7510162591934204, + "loss_ce": 0.0002837868523783982, + "loss_iou": 0.330078125, + "loss_num": 0.0184326171875, + "loss_xval": 0.75, + "num_input_tokens_seen": 120921256, + "step": 2159 + }, + { + "epoch": 4.810690423162583, + "grad_norm": 18.4330997467041, + "learning_rate": 1e-06, + "loss": 0.9006, + "num_input_tokens_seen": 120976736, + "step": 2160 + }, + { + "epoch": 4.810690423162583, + "loss": 1.1066399812698364, + "loss_ce": 0.00043876029667444527, + "loss_iou": 0.447265625, + "loss_num": 0.0419921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 120976736, + "step": 2160 + }, + { + "epoch": 4.812917594654788, + "grad_norm": 19.734941482543945, + "learning_rate": 1e-06, + "loss": 0.814, + "num_input_tokens_seen": 121034524, + "step": 2161 + }, + { + "epoch": 4.812917594654788, + "loss": 0.8756116628646851, + "loss_ce": 0.0006117259617894888, + "loss_iou": 0.3671875, + "loss_num": 0.0281982421875, + "loss_xval": 0.875, + "num_input_tokens_seen": 121034524, + "step": 2161 + }, + { + "epoch": 4.815144766146993, + "grad_norm": 18.92926788330078, + "learning_rate": 1e-06, + "loss": 0.6339, + "num_input_tokens_seen": 121090056, + "step": 2162 + }, + { + "epoch": 4.815144766146993, + "loss": 0.6168254017829895, + "loss_ce": 0.0003703091642819345, + "loss_iou": 0.27734375, + "loss_num": 0.01251220703125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 121090056, + "step": 2162 + }, + { + "epoch": 4.817371937639198, + "grad_norm": 30.59477996826172, + "learning_rate": 1e-06, + "loss": 0.8952, + "num_input_tokens_seen": 121148352, + "step": 2163 + }, + { + "epoch": 4.817371937639198, + "loss": 0.9438307881355286, + "loss_ce": 0.0004713647358585149, + "loss_iou": 0.388671875, + "loss_num": 0.03271484375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 121148352, + "step": 2163 + }, + { + "epoch": 4.819599109131403, + "grad_norm": 21.30016326904297, + "learning_rate": 1e-06, + "loss": 0.9444, + "num_input_tokens_seen": 121206584, + "step": 2164 + }, + { + "epoch": 4.819599109131403, + "loss": 0.7458754777908325, + "loss_ce": 0.000392090150853619, + "loss_iou": 0.298828125, + "loss_num": 0.0296630859375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 121206584, + "step": 2164 + }, + { + "epoch": 4.821826280623608, + "grad_norm": 25.39438247680664, + "learning_rate": 1e-06, + "loss": 0.7381, + "num_input_tokens_seen": 121265252, + "step": 2165 + }, + { + "epoch": 4.821826280623608, + "loss": 0.7435488700866699, + "loss_ce": 0.00026276521384716034, + "loss_iou": 0.326171875, + "loss_num": 0.01806640625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 121265252, + "step": 2165 + }, + { + "epoch": 4.8240534521158125, + "grad_norm": 36.96885681152344, + "learning_rate": 1e-06, + "loss": 0.7863, + "num_input_tokens_seen": 121321320, + "step": 2166 + }, + { + "epoch": 4.8240534521158125, + "loss": 0.82174152135849, + "loss_ce": 0.0002082870778394863, + "loss_iou": 0.34375, + "loss_num": 0.0267333984375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 121321320, + "step": 2166 + }, + { + "epoch": 4.826280623608017, + "grad_norm": 18.965267181396484, + "learning_rate": 1e-06, + "loss": 0.7267, + "num_input_tokens_seen": 121375740, + "step": 2167 + }, + { + "epoch": 4.826280623608017, + "loss": 0.8186416625976562, + "loss_ce": 0.0005264327628538013, + "loss_iou": 0.3671875, + "loss_num": 0.016845703125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 121375740, + "step": 2167 + }, + { + "epoch": 4.828507795100223, + "grad_norm": 14.640192031860352, + "learning_rate": 1e-06, + "loss": 0.6366, + "num_input_tokens_seen": 121433144, + "step": 2168 + }, + { + "epoch": 4.828507795100223, + "loss": 0.5576098561286926, + "loss_ce": 0.00029783969512209296, + "loss_iou": 0.2470703125, + "loss_num": 0.012451171875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 121433144, + "step": 2168 + }, + { + "epoch": 4.830734966592428, + "grad_norm": 16.91661834716797, + "learning_rate": 1e-06, + "loss": 0.7211, + "num_input_tokens_seen": 121491312, + "step": 2169 + }, + { + "epoch": 4.830734966592428, + "loss": 0.4785498082637787, + "loss_ce": 0.00027831370243802667, + "loss_iou": 0.18359375, + "loss_num": 0.0220947265625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 121491312, + "step": 2169 + }, + { + "epoch": 4.832962138084633, + "grad_norm": 31.92616844177246, + "learning_rate": 1e-06, + "loss": 0.9147, + "num_input_tokens_seen": 121546584, + "step": 2170 + }, + { + "epoch": 4.832962138084633, + "loss": 0.9893652200698853, + "loss_ce": 0.00035156396916136146, + "loss_iou": 0.384765625, + "loss_num": 0.044189453125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 121546584, + "step": 2170 + }, + { + "epoch": 4.835189309576838, + "grad_norm": 18.34603500366211, + "learning_rate": 1e-06, + "loss": 0.7332, + "num_input_tokens_seen": 121601848, + "step": 2171 + }, + { + "epoch": 4.835189309576838, + "loss": 0.6528722047805786, + "loss_ce": 0.00022326521866489202, + "loss_iou": 0.2578125, + "loss_num": 0.0277099609375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 121601848, + "step": 2171 + }, + { + "epoch": 4.8374164810690425, + "grad_norm": 23.596956253051758, + "learning_rate": 1e-06, + "loss": 0.6937, + "num_input_tokens_seen": 121654980, + "step": 2172 + }, + { + "epoch": 4.8374164810690425, + "loss": 0.7717235088348389, + "loss_ce": 0.00023919279919937253, + "loss_iou": 0.326171875, + "loss_num": 0.0240478515625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 121654980, + "step": 2172 + }, + { + "epoch": 4.839643652561247, + "grad_norm": 17.30280876159668, + "learning_rate": 1e-06, + "loss": 0.8213, + "num_input_tokens_seen": 121710832, + "step": 2173 + }, + { + "epoch": 4.839643652561247, + "loss": 0.890198290348053, + "loss_ce": 0.0003057056455872953, + "loss_iou": 0.388671875, + "loss_num": 0.022705078125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 121710832, + "step": 2173 + }, + { + "epoch": 4.841870824053452, + "grad_norm": 21.682880401611328, + "learning_rate": 1e-06, + "loss": 0.7604, + "num_input_tokens_seen": 121769308, + "step": 2174 + }, + { + "epoch": 4.841870824053452, + "loss": 0.8933024406433105, + "loss_ce": 0.00023604354646522552, + "loss_iou": 0.392578125, + "loss_num": 0.0216064453125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 121769308, + "step": 2174 + }, + { + "epoch": 4.844097995545657, + "grad_norm": 23.18276596069336, + "learning_rate": 1e-06, + "loss": 0.852, + "num_input_tokens_seen": 121825936, + "step": 2175 + }, + { + "epoch": 4.844097995545657, + "loss": 0.8222508430480957, + "loss_ce": 0.0004735908005386591, + "loss_iou": 0.34765625, + "loss_num": 0.0255126953125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 121825936, + "step": 2175 + }, + { + "epoch": 4.846325167037862, + "grad_norm": 16.410219192504883, + "learning_rate": 1e-06, + "loss": 0.5924, + "num_input_tokens_seen": 121880128, + "step": 2176 + }, + { + "epoch": 4.846325167037862, + "loss": 0.5815201997756958, + "loss_ce": 0.00022139312932267785, + "loss_iou": 0.2451171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 121880128, + "step": 2176 + }, + { + "epoch": 4.848552338530067, + "grad_norm": 16.55893898010254, + "learning_rate": 1e-06, + "loss": 0.5514, + "num_input_tokens_seen": 121937432, + "step": 2177 + }, + { + "epoch": 4.848552338530067, + "loss": 0.5693067312240601, + "loss_ce": 0.00021495725377462804, + "loss_iou": 0.25, + "loss_num": 0.013671875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 121937432, + "step": 2177 + }, + { + "epoch": 4.850779510022272, + "grad_norm": 22.6440372467041, + "learning_rate": 1e-06, + "loss": 0.694, + "num_input_tokens_seen": 121993900, + "step": 2178 + }, + { + "epoch": 4.850779510022272, + "loss": 0.8454983234405518, + "loss_ce": 0.0002834856859408319, + "loss_iou": 0.359375, + "loss_num": 0.0252685546875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 121993900, + "step": 2178 + }, + { + "epoch": 4.853006681514477, + "grad_norm": 47.70549392700195, + "learning_rate": 1e-06, + "loss": 0.8097, + "num_input_tokens_seen": 122050492, + "step": 2179 + }, + { + "epoch": 4.853006681514477, + "loss": 0.8886595964431763, + "loss_ce": 0.00035399917396716774, + "loss_iou": 0.38671875, + "loss_num": 0.0228271484375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 122050492, + "step": 2179 + }, + { + "epoch": 4.855233853006681, + "grad_norm": 15.52509593963623, + "learning_rate": 1e-06, + "loss": 0.6431, + "num_input_tokens_seen": 122105120, + "step": 2180 + }, + { + "epoch": 4.855233853006681, + "loss": 0.5089912414550781, + "loss_ce": 0.00020220015721861273, + "loss_iou": 0.2001953125, + "loss_num": 0.0218505859375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 122105120, + "step": 2180 + }, + { + "epoch": 4.857461024498886, + "grad_norm": 21.397817611694336, + "learning_rate": 1e-06, + "loss": 0.5721, + "num_input_tokens_seen": 122162916, + "step": 2181 + }, + { + "epoch": 4.857461024498886, + "loss": 0.5020558834075928, + "loss_ce": 0.0002248543023597449, + "loss_iou": 0.2236328125, + "loss_num": 0.01080322265625, + "loss_xval": 0.5, + "num_input_tokens_seen": 122162916, + "step": 2181 + }, + { + "epoch": 4.859688195991091, + "grad_norm": 18.195024490356445, + "learning_rate": 1e-06, + "loss": 0.6435, + "num_input_tokens_seen": 122219996, + "step": 2182 + }, + { + "epoch": 4.859688195991091, + "loss": 0.6611765027046204, + "loss_ce": 0.0002878474479075521, + "loss_iou": 0.283203125, + "loss_num": 0.0191650390625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 122219996, + "step": 2182 + }, + { + "epoch": 4.861915367483296, + "grad_norm": 72.6725082397461, + "learning_rate": 1e-06, + "loss": 0.7279, + "num_input_tokens_seen": 122274148, + "step": 2183 + }, + { + "epoch": 4.861915367483296, + "loss": 0.6504148244857788, + "loss_ce": 0.0002683540806174278, + "loss_iou": 0.30078125, + "loss_num": 0.010009765625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 122274148, + "step": 2183 + }, + { + "epoch": 4.864142538975501, + "grad_norm": 31.536542892456055, + "learning_rate": 1e-06, + "loss": 0.8554, + "num_input_tokens_seen": 122329776, + "step": 2184 + }, + { + "epoch": 4.864142538975501, + "loss": 0.7600322365760803, + "loss_ce": 0.00026659879949875176, + "loss_iou": 0.3125, + "loss_num": 0.027099609375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 122329776, + "step": 2184 + }, + { + "epoch": 4.866369710467706, + "grad_norm": 25.865697860717773, + "learning_rate": 1e-06, + "loss": 0.8837, + "num_input_tokens_seen": 122387312, + "step": 2185 + }, + { + "epoch": 4.866369710467706, + "loss": 0.8069164752960205, + "loss_ce": 0.00027582579059526324, + "loss_iou": 0.33203125, + "loss_num": 0.02880859375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 122387312, + "step": 2185 + }, + { + "epoch": 4.868596881959911, + "grad_norm": 20.460817337036133, + "learning_rate": 1e-06, + "loss": 0.7866, + "num_input_tokens_seen": 122444908, + "step": 2186 + }, + { + "epoch": 4.868596881959911, + "loss": 0.9665539264678955, + "loss_ce": 0.00036741438088938594, + "loss_iou": 0.396484375, + "loss_num": 0.03466796875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 122444908, + "step": 2186 + }, + { + "epoch": 4.870824053452115, + "grad_norm": 29.2179012298584, + "learning_rate": 1e-06, + "loss": 0.6808, + "num_input_tokens_seen": 122499836, + "step": 2187 + }, + { + "epoch": 4.870824053452115, + "loss": 0.8551727533340454, + "loss_ce": 0.00043644450488500297, + "loss_iou": 0.337890625, + "loss_num": 0.035400390625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 122499836, + "step": 2187 + }, + { + "epoch": 4.873051224944321, + "grad_norm": 26.670900344848633, + "learning_rate": 1e-06, + "loss": 0.7637, + "num_input_tokens_seen": 122556144, + "step": 2188 + }, + { + "epoch": 4.873051224944321, + "loss": 0.7377973794937134, + "loss_ce": 0.00024859551922418177, + "loss_iou": 0.33203125, + "loss_num": 0.01507568359375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 122556144, + "step": 2188 + }, + { + "epoch": 4.875278396436526, + "grad_norm": 24.723434448242188, + "learning_rate": 1e-06, + "loss": 0.72, + "num_input_tokens_seen": 122612748, + "step": 2189 + }, + { + "epoch": 4.875278396436526, + "loss": 0.6694687008857727, + "loss_ce": 0.0002792471495922655, + "loss_iou": 0.298828125, + "loss_num": 0.01422119140625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 122612748, + "step": 2189 + }, + { + "epoch": 4.877505567928731, + "grad_norm": 15.66889476776123, + "learning_rate": 1e-06, + "loss": 0.6759, + "num_input_tokens_seen": 122668312, + "step": 2190 + }, + { + "epoch": 4.877505567928731, + "loss": 0.7348698973655701, + "loss_ce": 0.00025075351004488766, + "loss_iou": 0.326171875, + "loss_num": 0.0166015625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 122668312, + "step": 2190 + }, + { + "epoch": 4.879732739420936, + "grad_norm": 25.57622718811035, + "learning_rate": 1e-06, + "loss": 0.8345, + "num_input_tokens_seen": 122721432, + "step": 2191 + }, + { + "epoch": 4.879732739420936, + "loss": 0.7397469282150269, + "loss_ce": 0.00024497421691194177, + "loss_iou": 0.31640625, + "loss_num": 0.021240234375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 122721432, + "step": 2191 + }, + { + "epoch": 4.881959910913141, + "grad_norm": 24.592416763305664, + "learning_rate": 1e-06, + "loss": 0.6403, + "num_input_tokens_seen": 122777480, + "step": 2192 + }, + { + "epoch": 4.881959910913141, + "loss": 0.6197606325149536, + "loss_ce": 0.00037585641257464886, + "loss_iou": 0.265625, + "loss_num": 0.0177001953125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 122777480, + "step": 2192 + }, + { + "epoch": 4.8841870824053455, + "grad_norm": 17.536779403686523, + "learning_rate": 1e-06, + "loss": 0.6577, + "num_input_tokens_seen": 122835032, + "step": 2193 + }, + { + "epoch": 4.8841870824053455, + "loss": 0.7718772888183594, + "loss_ce": 0.0002708572428673506, + "loss_iou": 0.337890625, + "loss_num": 0.0185546875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 122835032, + "step": 2193 + }, + { + "epoch": 4.88641425389755, + "grad_norm": 32.589019775390625, + "learning_rate": 1e-06, + "loss": 0.9171, + "num_input_tokens_seen": 122888828, + "step": 2194 + }, + { + "epoch": 4.88641425389755, + "loss": 1.2385810613632202, + "loss_ce": 0.00029984532739035785, + "loss_iou": 0.5234375, + "loss_num": 0.03759765625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 122888828, + "step": 2194 + }, + { + "epoch": 4.888641425389755, + "grad_norm": 22.544490814208984, + "learning_rate": 1e-06, + "loss": 0.7052, + "num_input_tokens_seen": 122946980, + "step": 2195 + }, + { + "epoch": 4.888641425389755, + "loss": 0.7754490971565247, + "loss_ce": 0.00030256161699071527, + "loss_iou": 0.314453125, + "loss_num": 0.0296630859375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 122946980, + "step": 2195 + }, + { + "epoch": 4.89086859688196, + "grad_norm": 16.414432525634766, + "learning_rate": 1e-06, + "loss": 0.6703, + "num_input_tokens_seen": 123006736, + "step": 2196 + }, + { + "epoch": 4.89086859688196, + "loss": 0.6196362972259521, + "loss_ce": 0.00025152770103886724, + "loss_iou": 0.263671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 123006736, + "step": 2196 + }, + { + "epoch": 4.893095768374165, + "grad_norm": 21.864694595336914, + "learning_rate": 1e-06, + "loss": 0.8546, + "num_input_tokens_seen": 123065160, + "step": 2197 + }, + { + "epoch": 4.893095768374165, + "loss": 0.8675484657287598, + "loss_ce": 0.00036098493728786707, + "loss_iou": 0.365234375, + "loss_num": 0.0274658203125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 123065160, + "step": 2197 + }, + { + "epoch": 4.89532293986637, + "grad_norm": 31.04578971862793, + "learning_rate": 1e-06, + "loss": 0.735, + "num_input_tokens_seen": 123117600, + "step": 2198 + }, + { + "epoch": 4.89532293986637, + "loss": 0.6225305795669556, + "loss_ce": 0.00021616063895635307, + "loss_iou": 0.275390625, + "loss_num": 0.01409912109375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 123117600, + "step": 2198 + }, + { + "epoch": 4.897550111358575, + "grad_norm": 13.26624870300293, + "learning_rate": 1e-06, + "loss": 0.6433, + "num_input_tokens_seen": 123171304, + "step": 2199 + }, + { + "epoch": 4.897550111358575, + "loss": 0.6789191961288452, + "loss_ce": 0.00023874480393715203, + "loss_iou": 0.259765625, + "loss_num": 0.03173828125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 123171304, + "step": 2199 + }, + { + "epoch": 4.8997772828507795, + "grad_norm": 19.680660247802734, + "learning_rate": 1e-06, + "loss": 0.8179, + "num_input_tokens_seen": 123227968, + "step": 2200 + }, + { + "epoch": 4.8997772828507795, + "loss": 1.005420446395874, + "loss_ce": 0.0002934019430540502, + "loss_iou": 0.421875, + "loss_num": 0.0322265625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 123227968, + "step": 2200 + }, + { + "epoch": 4.902004454342984, + "grad_norm": 14.69334888458252, + "learning_rate": 1e-06, + "loss": 0.6017, + "num_input_tokens_seen": 123286024, + "step": 2201 + }, + { + "epoch": 4.902004454342984, + "loss": 0.7753942012786865, + "loss_ce": 0.0002477114903740585, + "loss_iou": 0.345703125, + "loss_num": 0.0164794921875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 123286024, + "step": 2201 + }, + { + "epoch": 4.904231625835189, + "grad_norm": 23.2365665435791, + "learning_rate": 1e-06, + "loss": 0.6886, + "num_input_tokens_seen": 123337296, + "step": 2202 + }, + { + "epoch": 4.904231625835189, + "loss": 0.6639537811279297, + "loss_ce": 0.00037955871084704995, + "loss_iou": 0.287109375, + "loss_num": 0.017578125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 123337296, + "step": 2202 + }, + { + "epoch": 4.906458797327394, + "grad_norm": 21.47087287902832, + "learning_rate": 1e-06, + "loss": 0.7853, + "num_input_tokens_seen": 123391896, + "step": 2203 + }, + { + "epoch": 4.906458797327394, + "loss": 0.8630619049072266, + "loss_ce": 0.0002689045504666865, + "loss_iou": 0.34375, + "loss_num": 0.03466796875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 123391896, + "step": 2203 + }, + { + "epoch": 4.908685968819599, + "grad_norm": 15.741186141967773, + "learning_rate": 1e-06, + "loss": 0.644, + "num_input_tokens_seen": 123448656, + "step": 2204 + }, + { + "epoch": 4.908685968819599, + "loss": 0.6564854383468628, + "loss_ce": 0.00023539297399111092, + "loss_iou": 0.2734375, + "loss_num": 0.021728515625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 123448656, + "step": 2204 + }, + { + "epoch": 4.910913140311804, + "grad_norm": 21.5997257232666, + "learning_rate": 1e-06, + "loss": 0.823, + "num_input_tokens_seen": 123503840, + "step": 2205 + }, + { + "epoch": 4.910913140311804, + "loss": 0.8479044437408447, + "loss_ce": 0.00024817389203235507, + "loss_iou": 0.35546875, + "loss_num": 0.0269775390625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 123503840, + "step": 2205 + }, + { + "epoch": 4.913140311804009, + "grad_norm": 16.790218353271484, + "learning_rate": 1e-06, + "loss": 0.5964, + "num_input_tokens_seen": 123558308, + "step": 2206 + }, + { + "epoch": 4.913140311804009, + "loss": 0.6172899007797241, + "loss_ce": 0.00022445424110628664, + "loss_iou": 0.275390625, + "loss_num": 0.01287841796875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 123558308, + "step": 2206 + }, + { + "epoch": 4.9153674832962135, + "grad_norm": 66.33501434326172, + "learning_rate": 1e-06, + "loss": 0.8602, + "num_input_tokens_seen": 123615124, + "step": 2207 + }, + { + "epoch": 4.9153674832962135, + "loss": 0.8113021850585938, + "loss_ce": 0.00026704196352511644, + "loss_iou": 0.359375, + "loss_num": 0.01806640625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 123615124, + "step": 2207 + }, + { + "epoch": 4.917594654788418, + "grad_norm": 16.30253028869629, + "learning_rate": 1e-06, + "loss": 0.4585, + "num_input_tokens_seen": 123671784, + "step": 2208 + }, + { + "epoch": 4.917594654788418, + "loss": 0.4267365336418152, + "loss_ce": 0.00022286101011559367, + "loss_iou": 0.181640625, + "loss_num": 0.01263427734375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 123671784, + "step": 2208 + }, + { + "epoch": 4.919821826280623, + "grad_norm": 16.062482833862305, + "learning_rate": 1e-06, + "loss": 0.7222, + "num_input_tokens_seen": 123729044, + "step": 2209 + }, + { + "epoch": 4.919821826280623, + "loss": 0.7194496393203735, + "loss_ce": 0.0004554773331619799, + "loss_iou": 0.298828125, + "loss_num": 0.02392578125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 123729044, + "step": 2209 + }, + { + "epoch": 4.922048997772828, + "grad_norm": 19.926166534423828, + "learning_rate": 1e-06, + "loss": 0.9957, + "num_input_tokens_seen": 123784456, + "step": 2210 + }, + { + "epoch": 4.922048997772828, + "loss": 0.9393208026885986, + "loss_ce": 0.0003559546312317252, + "loss_iou": 0.376953125, + "loss_num": 0.037109375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 123784456, + "step": 2210 + }, + { + "epoch": 4.924276169265033, + "grad_norm": 26.751087188720703, + "learning_rate": 1e-06, + "loss": 0.8433, + "num_input_tokens_seen": 123839024, + "step": 2211 + }, + { + "epoch": 4.924276169265033, + "loss": 0.7800534963607788, + "loss_ce": 0.0002683330385480076, + "loss_iou": 0.330078125, + "loss_num": 0.023681640625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 123839024, + "step": 2211 + }, + { + "epoch": 4.926503340757238, + "grad_norm": 20.585493087768555, + "learning_rate": 1e-06, + "loss": 0.6179, + "num_input_tokens_seen": 123895700, + "step": 2212 + }, + { + "epoch": 4.926503340757238, + "loss": 0.6982933282852173, + "loss_ce": 0.000295252597425133, + "loss_iou": 0.31640625, + "loss_num": 0.01312255859375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 123895700, + "step": 2212 + }, + { + "epoch": 4.928730512249444, + "grad_norm": 16.665607452392578, + "learning_rate": 1e-06, + "loss": 0.7618, + "num_input_tokens_seen": 123950376, + "step": 2213 + }, + { + "epoch": 4.928730512249444, + "loss": 0.5816483497619629, + "loss_ce": 0.0002274463913636282, + "loss_iou": 0.2490234375, + "loss_num": 0.0167236328125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 123950376, + "step": 2213 + }, + { + "epoch": 4.9309576837416484, + "grad_norm": 21.802547454833984, + "learning_rate": 1e-06, + "loss": 0.6218, + "num_input_tokens_seen": 124006408, + "step": 2214 + }, + { + "epoch": 4.9309576837416484, + "loss": 0.6234120726585388, + "loss_ce": 0.00024313261383213103, + "loss_iou": 0.259765625, + "loss_num": 0.0206298828125, + "loss_xval": 0.625, + "num_input_tokens_seen": 124006408, + "step": 2214 + }, + { + "epoch": 4.933184855233853, + "grad_norm": 50.704349517822266, + "learning_rate": 1e-06, + "loss": 0.7325, + "num_input_tokens_seen": 124064376, + "step": 2215 + }, + { + "epoch": 4.933184855233853, + "loss": 0.8308193683624268, + "loss_ce": 0.0004971576854586601, + "loss_iou": 0.337890625, + "loss_num": 0.0311279296875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 124064376, + "step": 2215 + }, + { + "epoch": 4.935412026726058, + "grad_norm": 18.29207992553711, + "learning_rate": 1e-06, + "loss": 0.8372, + "num_input_tokens_seen": 124120104, + "step": 2216 + }, + { + "epoch": 4.935412026726058, + "loss": 0.6730678081512451, + "loss_ce": 0.0002162476594094187, + "loss_iou": 0.283203125, + "loss_num": 0.0211181640625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 124120104, + "step": 2216 + }, + { + "epoch": 4.937639198218263, + "grad_norm": 18.550386428833008, + "learning_rate": 1e-06, + "loss": 0.7834, + "num_input_tokens_seen": 124175912, + "step": 2217 + }, + { + "epoch": 4.937639198218263, + "loss": 0.635769784450531, + "loss_ce": 0.00027175506693311036, + "loss_iou": 0.27734375, + "loss_num": 0.0164794921875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 124175912, + "step": 2217 + }, + { + "epoch": 4.939866369710468, + "grad_norm": 20.139272689819336, + "learning_rate": 1e-06, + "loss": 0.9253, + "num_input_tokens_seen": 124231184, + "step": 2218 + }, + { + "epoch": 4.939866369710468, + "loss": 0.9445996284484863, + "loss_ce": 0.00038581539411097765, + "loss_iou": 0.365234375, + "loss_num": 0.04248046875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 124231184, + "step": 2218 + }, + { + "epoch": 4.942093541202673, + "grad_norm": 23.283292770385742, + "learning_rate": 1e-06, + "loss": 0.8611, + "num_input_tokens_seen": 124288768, + "step": 2219 + }, + { + "epoch": 4.942093541202673, + "loss": 0.8784899711608887, + "loss_ce": 0.0003162057837471366, + "loss_iou": 0.36328125, + "loss_num": 0.03076171875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 124288768, + "step": 2219 + }, + { + "epoch": 4.944320712694878, + "grad_norm": 15.636147499084473, + "learning_rate": 1e-06, + "loss": 0.5499, + "num_input_tokens_seen": 124344436, + "step": 2220 + }, + { + "epoch": 4.944320712694878, + "loss": 0.592854380607605, + "loss_ce": 0.0003251142334192991, + "loss_iou": 0.267578125, + "loss_num": 0.01129150390625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 124344436, + "step": 2220 + }, + { + "epoch": 4.9465478841870825, + "grad_norm": 14.876840591430664, + "learning_rate": 1e-06, + "loss": 0.6573, + "num_input_tokens_seen": 124400952, + "step": 2221 + }, + { + "epoch": 4.9465478841870825, + "loss": 0.7517122626304626, + "loss_ce": 0.0002474116045050323, + "loss_iou": 0.32421875, + "loss_num": 0.0205078125, + "loss_xval": 0.75, + "num_input_tokens_seen": 124400952, + "step": 2221 + }, + { + "epoch": 4.948775055679287, + "grad_norm": 28.595050811767578, + "learning_rate": 1e-06, + "loss": 0.901, + "num_input_tokens_seen": 124457384, + "step": 2222 + }, + { + "epoch": 4.948775055679287, + "loss": 0.9143351316452026, + "loss_ce": 0.00027260559727437794, + "loss_iou": 0.404296875, + "loss_num": 0.0213623046875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 124457384, + "step": 2222 + }, + { + "epoch": 4.951002227171492, + "grad_norm": 24.853782653808594, + "learning_rate": 1e-06, + "loss": 0.7986, + "num_input_tokens_seen": 124514252, + "step": 2223 + }, + { + "epoch": 4.951002227171492, + "loss": 0.6873869895935059, + "loss_ce": 0.00043627433478832245, + "loss_iou": 0.302734375, + "loss_num": 0.0164794921875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 124514252, + "step": 2223 + }, + { + "epoch": 4.953229398663697, + "grad_norm": 22.14551544189453, + "learning_rate": 1e-06, + "loss": 0.691, + "num_input_tokens_seen": 124569752, + "step": 2224 + }, + { + "epoch": 4.953229398663697, + "loss": 0.6962725520133972, + "loss_ce": 0.00022760960564482957, + "loss_iou": 0.294921875, + "loss_num": 0.021240234375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 124569752, + "step": 2224 + }, + { + "epoch": 4.955456570155902, + "grad_norm": 20.33577537536621, + "learning_rate": 1e-06, + "loss": 0.6513, + "num_input_tokens_seen": 124626644, + "step": 2225 + }, + { + "epoch": 4.955456570155902, + "loss": 0.7576655149459839, + "loss_ce": 0.0003413379890844226, + "loss_iou": 0.302734375, + "loss_num": 0.0306396484375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 124626644, + "step": 2225 + }, + { + "epoch": 4.957683741648107, + "grad_norm": 19.96550178527832, + "learning_rate": 1e-06, + "loss": 0.4482, + "num_input_tokens_seen": 124684520, + "step": 2226 + }, + { + "epoch": 4.957683741648107, + "loss": 0.46445581316947937, + "loss_ce": 0.0002224127674708143, + "loss_iou": 0.201171875, + "loss_num": 0.01226806640625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 124684520, + "step": 2226 + }, + { + "epoch": 4.959910913140312, + "grad_norm": 16.052396774291992, + "learning_rate": 1e-06, + "loss": 0.6533, + "num_input_tokens_seen": 124739472, + "step": 2227 + }, + { + "epoch": 4.959910913140312, + "loss": 0.8050886988639832, + "loss_ce": 0.0004011690034531057, + "loss_iou": 0.33203125, + "loss_num": 0.0277099609375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 124739472, + "step": 2227 + }, + { + "epoch": 4.9621380846325165, + "grad_norm": 19.418689727783203, + "learning_rate": 1e-06, + "loss": 0.7515, + "num_input_tokens_seen": 124795168, + "step": 2228 + }, + { + "epoch": 4.9621380846325165, + "loss": 0.6984738707542419, + "loss_ce": 0.00023169181076809764, + "loss_iou": 0.287109375, + "loss_num": 0.02490234375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 124795168, + "step": 2228 + }, + { + "epoch": 4.964365256124721, + "grad_norm": 19.798582077026367, + "learning_rate": 1e-06, + "loss": 0.8594, + "num_input_tokens_seen": 124850384, + "step": 2229 + }, + { + "epoch": 4.964365256124721, + "loss": 0.8433953523635864, + "loss_ce": 0.0002557524712756276, + "loss_iou": 0.349609375, + "loss_num": 0.0291748046875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 124850384, + "step": 2229 + }, + { + "epoch": 4.966592427616926, + "grad_norm": 28.708772659301758, + "learning_rate": 1e-06, + "loss": 0.8574, + "num_input_tokens_seen": 124905988, + "step": 2230 + }, + { + "epoch": 4.966592427616926, + "loss": 0.7451651692390442, + "loss_ce": 0.00029212021036073565, + "loss_iou": 0.306640625, + "loss_num": 0.0267333984375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 124905988, + "step": 2230 + }, + { + "epoch": 4.968819599109131, + "grad_norm": 24.822715759277344, + "learning_rate": 1e-06, + "loss": 0.7711, + "num_input_tokens_seen": 124961660, + "step": 2231 + }, + { + "epoch": 4.968819599109131, + "loss": 0.6456351280212402, + "loss_ce": 0.00018830940825864673, + "loss_iou": 0.275390625, + "loss_num": 0.018798828125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 124961660, + "step": 2231 + }, + { + "epoch": 4.971046770601336, + "grad_norm": 18.94268035888672, + "learning_rate": 1e-06, + "loss": 0.9299, + "num_input_tokens_seen": 125015652, + "step": 2232 + }, + { + "epoch": 4.971046770601336, + "loss": 0.9597747325897217, + "loss_ce": 0.00030205969233065844, + "loss_iou": 0.400390625, + "loss_num": 0.031494140625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 125015652, + "step": 2232 + }, + { + "epoch": 4.973273942093542, + "grad_norm": 17.5726318359375, + "learning_rate": 1e-06, + "loss": 0.7039, + "num_input_tokens_seen": 125073992, + "step": 2233 + }, + { + "epoch": 4.973273942093542, + "loss": 0.5961167216300964, + "loss_ce": 0.00023052276810631156, + "loss_iou": 0.26953125, + "loss_num": 0.0115966796875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 125073992, + "step": 2233 + }, + { + "epoch": 4.9755011135857465, + "grad_norm": 33.96272277832031, + "learning_rate": 1e-06, + "loss": 0.8093, + "num_input_tokens_seen": 125130660, + "step": 2234 + }, + { + "epoch": 4.9755011135857465, + "loss": 0.8671445846557617, + "loss_ce": 0.00020118044631090015, + "loss_iou": 0.345703125, + "loss_num": 0.034423828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 125130660, + "step": 2234 + }, + { + "epoch": 4.977728285077951, + "grad_norm": 25.479049682617188, + "learning_rate": 1e-06, + "loss": 0.7355, + "num_input_tokens_seen": 125188292, + "step": 2235 + }, + { + "epoch": 4.977728285077951, + "loss": 0.6173162460327148, + "loss_ce": 0.0002507962053641677, + "loss_iou": 0.26953125, + "loss_num": 0.0157470703125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 125188292, + "step": 2235 + }, + { + "epoch": 4.979955456570156, + "grad_norm": 17.59937286376953, + "learning_rate": 1e-06, + "loss": 0.5309, + "num_input_tokens_seen": 125245472, + "step": 2236 + }, + { + "epoch": 4.979955456570156, + "loss": 0.4158274531364441, + "loss_ce": 0.0002390348818153143, + "loss_iou": 0.17578125, + "loss_num": 0.012939453125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 125245472, + "step": 2236 + }, + { + "epoch": 4.982182628062361, + "grad_norm": 24.452638626098633, + "learning_rate": 1e-06, + "loss": 0.7179, + "num_input_tokens_seen": 125302944, + "step": 2237 + }, + { + "epoch": 4.982182628062361, + "loss": 0.5437171459197998, + "loss_ce": 0.0002601124288048595, + "loss_iou": 0.251953125, + "loss_num": 0.007659912109375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 125302944, + "step": 2237 + }, + { + "epoch": 4.984409799554566, + "grad_norm": 15.169961929321289, + "learning_rate": 1e-06, + "loss": 0.8227, + "num_input_tokens_seen": 125362732, + "step": 2238 + }, + { + "epoch": 4.984409799554566, + "loss": 0.7834569811820984, + "loss_ce": 0.00025388289941474795, + "loss_iou": 0.34375, + "loss_num": 0.01953125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 125362732, + "step": 2238 + }, + { + "epoch": 4.986636971046771, + "grad_norm": 15.975359916687012, + "learning_rate": 1e-06, + "loss": 0.7685, + "num_input_tokens_seen": 125419796, + "step": 2239 + }, + { + "epoch": 4.986636971046771, + "loss": 0.6306621432304382, + "loss_ce": 0.0002910442417487502, + "loss_iou": 0.265625, + "loss_num": 0.019287109375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 125419796, + "step": 2239 + }, + { + "epoch": 4.988864142538976, + "grad_norm": 41.811767578125, + "learning_rate": 1e-06, + "loss": 0.5101, + "num_input_tokens_seen": 125478088, + "step": 2240 + }, + { + "epoch": 4.988864142538976, + "loss": 0.5742073059082031, + "loss_ce": 0.0002326710382476449, + "loss_iou": 0.24609375, + "loss_num": 0.016357421875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 125478088, + "step": 2240 + }, + { + "epoch": 4.991091314031181, + "grad_norm": 21.81138801574707, + "learning_rate": 1e-06, + "loss": 0.8395, + "num_input_tokens_seen": 125533344, + "step": 2241 + }, + { + "epoch": 4.991091314031181, + "loss": 0.7543541789054871, + "loss_ce": 0.001912805950269103, + "loss_iou": 0.287109375, + "loss_num": 0.03564453125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 125533344, + "step": 2241 + }, + { + "epoch": 4.993318485523385, + "grad_norm": 30.80375099182129, + "learning_rate": 1e-06, + "loss": 0.9426, + "num_input_tokens_seen": 125588860, + "step": 2242 + }, + { + "epoch": 4.993318485523385, + "loss": 0.8981271386146545, + "loss_ce": 0.0005441233515739441, + "loss_iou": 0.384765625, + "loss_num": 0.0260009765625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 125588860, + "step": 2242 + }, + { + "epoch": 4.99554565701559, + "grad_norm": 18.436973571777344, + "learning_rate": 1e-06, + "loss": 0.6176, + "num_input_tokens_seen": 125646052, + "step": 2243 + }, + { + "epoch": 4.99554565701559, + "loss": 0.5616306066513062, + "loss_ce": 0.0002292500576004386, + "loss_iou": 0.236328125, + "loss_num": 0.017822265625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 125646052, + "step": 2243 + }, + { + "epoch": 4.997772828507795, + "grad_norm": 20.010271072387695, + "learning_rate": 1e-06, + "loss": 0.9739, + "num_input_tokens_seen": 125701944, + "step": 2244 + }, + { + "epoch": 4.997772828507795, + "loss": 0.7831923961639404, + "loss_ce": 0.0002333829615963623, + "loss_iou": 0.34765625, + "loss_num": 0.017333984375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 125701944, + "step": 2244 + }, + { + "epoch": 5.0, + "grad_norm": 22.875764846801758, + "learning_rate": 1e-06, + "loss": 0.8608, + "num_input_tokens_seen": 125761180, + "step": 2245 + }, + { + "epoch": 5.0, + "loss": 0.8516440391540527, + "loss_ce": 0.0003256534691900015, + "loss_iou": 0.349609375, + "loss_num": 0.03076171875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 125761180, + "step": 2245 + }, + { + "epoch": 5.002227171492205, + "grad_norm": 32.292686462402344, + "learning_rate": 1e-06, + "loss": 0.7312, + "num_input_tokens_seen": 125816488, + "step": 2246 + }, + { + "epoch": 5.002227171492205, + "loss": 0.7869899868965149, + "loss_ce": 0.00036892099888063967, + "loss_iou": 0.341796875, + "loss_num": 0.0205078125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 125816488, + "step": 2246 + }, + { + "epoch": 5.00445434298441, + "grad_norm": 22.563552856445312, + "learning_rate": 1e-06, + "loss": 0.6971, + "num_input_tokens_seen": 125871976, + "step": 2247 + }, + { + "epoch": 5.00445434298441, + "loss": 0.7504916191101074, + "loss_ce": 0.0002475018845871091, + "loss_iou": 0.333984375, + "loss_num": 0.016357421875, + "loss_xval": 0.75, + "num_input_tokens_seen": 125871976, + "step": 2247 + }, + { + "epoch": 5.006681514476615, + "grad_norm": 14.274714469909668, + "learning_rate": 1e-06, + "loss": 0.7045, + "num_input_tokens_seen": 125930388, + "step": 2248 + }, + { + "epoch": 5.006681514476615, + "loss": 0.598191499710083, + "loss_ce": 0.000291119038593024, + "loss_iou": 0.26953125, + "loss_num": 0.01202392578125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 125930388, + "step": 2248 + }, + { + "epoch": 5.008908685968819, + "grad_norm": 16.532169342041016, + "learning_rate": 1e-06, + "loss": 0.7869, + "num_input_tokens_seen": 125986868, + "step": 2249 + }, + { + "epoch": 5.008908685968819, + "loss": 0.7695136666297913, + "loss_ce": 0.0002265319344587624, + "loss_iou": 0.337890625, + "loss_num": 0.01904296875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 125986868, + "step": 2249 + }, + { + "epoch": 5.011135857461024, + "grad_norm": 14.793770790100098, + "learning_rate": 1e-06, + "loss": 0.901, + "num_input_tokens_seen": 126043356, + "step": 2250 + }, + { + "epoch": 5.011135857461024, + "eval_seeclick_web_CIoU": 0.5627106428146362, + "eval_seeclick_web_GIoU": 0.5593923330307007, + "eval_seeclick_web_IoU": 0.5781354308128357, + "eval_seeclick_web_MAE_all": 0.01731129875406623, + "eval_seeclick_web_MAE_h": 0.009559806901961565, + "eval_seeclick_web_MAE_w": 0.01916863350197673, + "eval_seeclick_web_MAE_x_boxes": 0.00826259353198111, + "eval_seeclick_web_MAE_y_boxes": 0.021791240433230996, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9538028240203857, + "eval_seeclick_web_loss_ce": 0.00035019248025491834, + "eval_seeclick_web_loss_iou": 0.4365234375, + "eval_seeclick_web_loss_num": 0.013622283935546875, + "eval_seeclick_web_loss_xval": 0.940673828125, + "eval_seeclick_web_runtime": 29.6362, + "eval_seeclick_web_samples_per_second": 1.687, + "eval_seeclick_web_steps_per_second": 0.067, + "num_input_tokens_seen": 126043356, + "step": 2250 + }, + { + "epoch": 5.011135857461024, + "eval_icons_CIoU": 0.3190227448940277, + "eval_icons_GIoU": 0.3375135064125061, + "eval_icons_IoU": 0.38641299307346344, + "eval_icons_MAE_all": 0.06205383501946926, + "eval_icons_MAE_h": 0.03881475329399109, + "eval_icons_MAE_w": 0.06065435893833637, + "eval_icons_MAE_x_boxes": 0.06400249153375626, + "eval_icons_MAE_y_boxes": 0.037100257351994514, + "eval_icons_inside_bbox": 0.6493055522441864, + "eval_icons_loss": 1.6723029613494873, + "eval_icons_loss_ce": 0.0004043810913572088, + "eval_icons_loss_iou": 0.663818359375, + "eval_icons_loss_num": 0.05780982971191406, + "eval_icons_loss_xval": 1.615234375, + "eval_icons_runtime": 27.5554, + "eval_icons_samples_per_second": 1.815, + "eval_icons_steps_per_second": 0.073, + "num_input_tokens_seen": 126043356, + "step": 2250 + }, + { + "epoch": 5.011135857461024, + "eval_screenspot_CIoU": 0.33845322330792743, + "eval_screenspot_GIoU": 0.3534944951534271, + "eval_screenspot_IoU": 0.41957201560338336, + "eval_screenspot_MAE_all": 0.06564747542142868, + "eval_screenspot_MAE_h": 0.03897890945275625, + "eval_screenspot_MAE_w": 0.07650155077377956, + "eval_screenspot_MAE_x_boxes": 0.07791633903980255, + "eval_screenspot_MAE_y_boxes": 0.04749378779282173, + "eval_screenspot_inside_bbox": 0.6566666762034098, + "eval_screenspot_loss": 1.6820822954177856, + "eval_screenspot_loss_ce": 0.00038540839644459385, + "eval_screenspot_loss_iou": 0.6896158854166666, + "eval_screenspot_loss_num": 0.07676951090494792, + "eval_screenspot_loss_xval": 1.7643229166666667, + "eval_screenspot_runtime": 47.089, + "eval_screenspot_samples_per_second": 1.89, + "eval_screenspot_steps_per_second": 0.064, + "num_input_tokens_seen": 126043356, + "step": 2250 + }, + { + "epoch": 5.011135857461024, + "eval_compot_CIoU": 0.35792700946331024, + "eval_compot_GIoU": 0.37573473155498505, + "eval_compot_IoU": 0.4099069982767105, + "eval_compot_MAE_all": 0.0185648575425148, + "eval_compot_MAE_h": 0.008833811618387699, + "eval_compot_MAE_w": 0.02340342104434967, + "eval_compot_MAE_x_boxes": 0.02984704216942191, + "eval_compot_MAE_y_boxes": 0.006203887518495321, + "eval_compot_inside_bbox": 0.6458333432674408, + "eval_compot_loss": 1.367824673652649, + "eval_compot_loss_ce": 0.00032587358145974576, + "eval_compot_loss_iou": 0.623779296875, + "eval_compot_loss_num": 0.017635345458984375, + "eval_compot_loss_xval": 1.33447265625, + "eval_compot_runtime": 24.6368, + "eval_compot_samples_per_second": 2.029, + "eval_compot_steps_per_second": 0.081, + "num_input_tokens_seen": 126043356, + "step": 2250 + }, + { + "epoch": 5.011135857461024, + "eval_custom_ui_val_CIoU": 0.45421523518032497, + "eval_custom_ui_val_GIoU": 0.47301916943656075, + "eval_custom_ui_val_IoU": 0.5073628789848752, + "eval_custom_ui_val_MAE_all": 0.03390951289070977, + "eval_custom_ui_val_MAE_h": 0.020180532294842932, + "eval_custom_ui_val_MAE_w": 0.03951867276595698, + "eval_custom_ui_val_MAE_x_boxes": 0.039389809800518885, + "eval_custom_ui_val_MAE_y_boxes": 0.016320706371011004, + "eval_custom_ui_val_inside_bbox": 0.7353395091162788, + "eval_custom_ui_val_loss": 1.2387813329696655, + "eval_custom_ui_val_loss_ce": 0.0004057693327518387, + "eval_custom_ui_val_loss_iou": 0.521484375, + "eval_custom_ui_val_loss_num": 0.033151202731662326, + "eval_custom_ui_val_loss_xval": 1.2088758680555556, + "eval_custom_ui_val_runtime": 73.0296, + "eval_custom_ui_val_samples_per_second": 3.629, + "eval_custom_ui_val_steps_per_second": 0.123, + "num_input_tokens_seen": 126043356, + "step": 2250 + }, + { + "epoch": 5.011135857461024, + "loss": 0.9642201066017151, + "loss_ce": 0.00035298120928928256, + "loss_iou": 0.419921875, + "loss_num": 0.024169921875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 126043356, + "step": 2250 + }, + { + "epoch": 5.013363028953229, + "grad_norm": 21.67910385131836, + "learning_rate": 1e-06, + "loss": 0.6784, + "num_input_tokens_seen": 126099280, + "step": 2251 + }, + { + "epoch": 5.013363028953229, + "loss": 0.6512374877929688, + "loss_ce": 0.0003585785743780434, + "loss_iou": 0.28125, + "loss_num": 0.017822265625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 126099280, + "step": 2251 + }, + { + "epoch": 5.015590200445434, + "grad_norm": 29.783891677856445, + "learning_rate": 1e-06, + "loss": 1.0225, + "num_input_tokens_seen": 126154972, + "step": 2252 + }, + { + "epoch": 5.015590200445434, + "loss": 0.9770213961601257, + "loss_ce": 0.0007030742126516998, + "loss_iou": 0.380859375, + "loss_num": 0.04296875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 126154972, + "step": 2252 + }, + { + "epoch": 5.017817371937639, + "grad_norm": 28.99271011352539, + "learning_rate": 1e-06, + "loss": 0.713, + "num_input_tokens_seen": 126208548, + "step": 2253 + }, + { + "epoch": 5.017817371937639, + "loss": 0.7910330295562744, + "loss_ce": 0.0005056494846940041, + "loss_iou": 0.3046875, + "loss_num": 0.0361328125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 126208548, + "step": 2253 + }, + { + "epoch": 5.020044543429844, + "grad_norm": 13.615692138671875, + "learning_rate": 1e-06, + "loss": 0.522, + "num_input_tokens_seen": 126262364, + "step": 2254 + }, + { + "epoch": 5.020044543429844, + "loss": 0.4722355008125305, + "loss_ce": 0.0002506262972019613, + "loss_iou": 0.1796875, + "loss_num": 0.022705078125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 126262364, + "step": 2254 + }, + { + "epoch": 5.022271714922049, + "grad_norm": 13.118451118469238, + "learning_rate": 1e-06, + "loss": 0.7156, + "num_input_tokens_seen": 126319680, + "step": 2255 + }, + { + "epoch": 5.022271714922049, + "loss": 0.7510231733322144, + "loss_ce": 0.00029075576458126307, + "loss_iou": 0.31640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.75, + "num_input_tokens_seen": 126319680, + "step": 2255 + }, + { + "epoch": 5.0244988864142535, + "grad_norm": 51.616050720214844, + "learning_rate": 1e-06, + "loss": 0.9654, + "num_input_tokens_seen": 126375108, + "step": 2256 + }, + { + "epoch": 5.0244988864142535, + "loss": 0.7489110231399536, + "loss_ce": 0.00025383190950378776, + "loss_iou": 0.3125, + "loss_num": 0.025146484375, + "loss_xval": 0.75, + "num_input_tokens_seen": 126375108, + "step": 2256 + }, + { + "epoch": 5.026726057906459, + "grad_norm": 15.720102310180664, + "learning_rate": 1e-06, + "loss": 0.8735, + "num_input_tokens_seen": 126432556, + "step": 2257 + }, + { + "epoch": 5.026726057906459, + "loss": 0.6794352531433105, + "loss_ce": 0.00023599226551596075, + "loss_iou": 0.30078125, + "loss_num": 0.015380859375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 126432556, + "step": 2257 + }, + { + "epoch": 5.028953229398664, + "grad_norm": 35.91153335571289, + "learning_rate": 1e-06, + "loss": 0.6807, + "num_input_tokens_seen": 126489040, + "step": 2258 + }, + { + "epoch": 5.028953229398664, + "loss": 0.6047276258468628, + "loss_ce": 0.00023541940026916564, + "loss_iou": 0.2470703125, + "loss_num": 0.0220947265625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 126489040, + "step": 2258 + }, + { + "epoch": 5.031180400890869, + "grad_norm": 19.358016967773438, + "learning_rate": 1e-06, + "loss": 0.7165, + "num_input_tokens_seen": 126546532, + "step": 2259 + }, + { + "epoch": 5.031180400890869, + "loss": 0.5831573605537415, + "loss_ce": 0.0006378005491569638, + "loss_iou": 0.25390625, + "loss_num": 0.01519775390625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 126546532, + "step": 2259 + }, + { + "epoch": 5.033407572383074, + "grad_norm": 29.59214973449707, + "learning_rate": 1e-06, + "loss": 0.8328, + "num_input_tokens_seen": 126600020, + "step": 2260 + }, + { + "epoch": 5.033407572383074, + "loss": 0.8578157424926758, + "loss_ce": 0.0002718048053793609, + "loss_iou": 0.365234375, + "loss_num": 0.025634765625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 126600020, + "step": 2260 + }, + { + "epoch": 5.035634743875279, + "grad_norm": 24.364622116088867, + "learning_rate": 1e-06, + "loss": 0.6877, + "num_input_tokens_seen": 126658112, + "step": 2261 + }, + { + "epoch": 5.035634743875279, + "loss": 0.7512416839599609, + "loss_ce": 0.00026507957954891026, + "loss_iou": 0.333984375, + "loss_num": 0.0166015625, + "loss_xval": 0.75, + "num_input_tokens_seen": 126658112, + "step": 2261 + }, + { + "epoch": 5.0378619153674835, + "grad_norm": 20.649215698242188, + "learning_rate": 1e-06, + "loss": 0.8893, + "num_input_tokens_seen": 126715528, + "step": 2262 + }, + { + "epoch": 5.0378619153674835, + "loss": 0.9017385244369507, + "loss_ce": 0.0002493016654625535, + "loss_iou": 0.3359375, + "loss_num": 0.04638671875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 126715528, + "step": 2262 + }, + { + "epoch": 5.040089086859688, + "grad_norm": 27.44195556640625, + "learning_rate": 1e-06, + "loss": 0.531, + "num_input_tokens_seen": 126769300, + "step": 2263 + }, + { + "epoch": 5.040089086859688, + "loss": 0.6221789121627808, + "loss_ce": 0.00023064325796440244, + "loss_iou": 0.283203125, + "loss_num": 0.01092529296875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 126769300, + "step": 2263 + }, + { + "epoch": 5.042316258351893, + "grad_norm": 16.656320571899414, + "learning_rate": 1e-06, + "loss": 0.5794, + "num_input_tokens_seen": 126825972, + "step": 2264 + }, + { + "epoch": 5.042316258351893, + "loss": 0.6281655430793762, + "loss_ce": 0.0002358591154916212, + "loss_iou": 0.271484375, + "loss_num": 0.0166015625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 126825972, + "step": 2264 + }, + { + "epoch": 5.044543429844098, + "grad_norm": 29.34036636352539, + "learning_rate": 1e-06, + "loss": 0.7923, + "num_input_tokens_seen": 126883048, + "step": 2265 + }, + { + "epoch": 5.044543429844098, + "loss": 0.6961706876754761, + "loss_ce": 0.000247849035076797, + "loss_iou": 0.3046875, + "loss_num": 0.0169677734375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 126883048, + "step": 2265 + }, + { + "epoch": 5.046770601336303, + "grad_norm": 28.162872314453125, + "learning_rate": 1e-06, + "loss": 1.0247, + "num_input_tokens_seen": 126937628, + "step": 2266 + }, + { + "epoch": 5.046770601336303, + "loss": 0.8716781139373779, + "loss_ce": 0.0003402004367671907, + "loss_iou": 0.392578125, + "loss_num": 0.0172119140625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 126937628, + "step": 2266 + }, + { + "epoch": 5.048997772828508, + "grad_norm": 20.128053665161133, + "learning_rate": 1e-06, + "loss": 1.1153, + "num_input_tokens_seen": 126989292, + "step": 2267 + }, + { + "epoch": 5.048997772828508, + "loss": 1.152024507522583, + "loss_ce": 0.0004131349269300699, + "loss_iou": 0.51171875, + "loss_num": 0.02587890625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 126989292, + "step": 2267 + }, + { + "epoch": 5.051224944320713, + "grad_norm": 17.286222457885742, + "learning_rate": 1e-06, + "loss": 0.5722, + "num_input_tokens_seen": 127046740, + "step": 2268 + }, + { + "epoch": 5.051224944320713, + "loss": 0.39940914511680603, + "loss_ce": 0.00023922644322738051, + "loss_iou": 0.16015625, + "loss_num": 0.015869140625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 127046740, + "step": 2268 + }, + { + "epoch": 5.0534521158129175, + "grad_norm": 17.838388442993164, + "learning_rate": 1e-06, + "loss": 0.6537, + "num_input_tokens_seen": 127103684, + "step": 2269 + }, + { + "epoch": 5.0534521158129175, + "loss": 0.643367350101471, + "loss_ce": 0.0003009595675393939, + "loss_iou": 0.291015625, + "loss_num": 0.011962890625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 127103684, + "step": 2269 + }, + { + "epoch": 5.055679287305122, + "grad_norm": 22.380460739135742, + "learning_rate": 1e-06, + "loss": 0.8209, + "num_input_tokens_seen": 127160244, + "step": 2270 + }, + { + "epoch": 5.055679287305122, + "loss": 0.7935837507247925, + "loss_ce": 0.000370887661119923, + "loss_iou": 0.32421875, + "loss_num": 0.028564453125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 127160244, + "step": 2270 + }, + { + "epoch": 5.057906458797327, + "grad_norm": 19.87263298034668, + "learning_rate": 1e-06, + "loss": 0.7521, + "num_input_tokens_seen": 127215716, + "step": 2271 + }, + { + "epoch": 5.057906458797327, + "loss": 0.8534216284751892, + "loss_ce": 0.0002721815253607929, + "loss_iou": 0.357421875, + "loss_num": 0.027587890625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 127215716, + "step": 2271 + }, + { + "epoch": 5.060133630289532, + "grad_norm": 24.000696182250977, + "learning_rate": 1e-06, + "loss": 0.5931, + "num_input_tokens_seen": 127274152, + "step": 2272 + }, + { + "epoch": 5.060133630289532, + "loss": 0.5939878225326538, + "loss_ce": 0.00023785245139151812, + "loss_iou": 0.2451171875, + "loss_num": 0.0208740234375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 127274152, + "step": 2272 + }, + { + "epoch": 5.062360801781737, + "grad_norm": 17.99005889892578, + "learning_rate": 1e-06, + "loss": 0.6256, + "num_input_tokens_seen": 127330676, + "step": 2273 + }, + { + "epoch": 5.062360801781737, + "loss": 0.5732445120811462, + "loss_ce": 0.00024645167286507785, + "loss_iou": 0.244140625, + "loss_num": 0.016845703125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 127330676, + "step": 2273 + }, + { + "epoch": 5.064587973273942, + "grad_norm": 17.27025032043457, + "learning_rate": 1e-06, + "loss": 0.6703, + "num_input_tokens_seen": 127385060, + "step": 2274 + }, + { + "epoch": 5.064587973273942, + "loss": 0.6232882738113403, + "loss_ce": 0.000241378482314758, + "loss_iou": 0.287109375, + "loss_num": 0.00958251953125, + "loss_xval": 0.625, + "num_input_tokens_seen": 127385060, + "step": 2274 + }, + { + "epoch": 5.066815144766147, + "grad_norm": 18.195852279663086, + "learning_rate": 1e-06, + "loss": 0.7011, + "num_input_tokens_seen": 127441056, + "step": 2275 + }, + { + "epoch": 5.066815144766147, + "loss": 0.6917237043380737, + "loss_ce": 0.00031748326728120446, + "loss_iou": 0.2734375, + "loss_num": 0.0289306640625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 127441056, + "step": 2275 + }, + { + "epoch": 5.0690423162583516, + "grad_norm": 17.40542221069336, + "learning_rate": 1e-06, + "loss": 0.6412, + "num_input_tokens_seen": 127495888, + "step": 2276 + }, + { + "epoch": 5.0690423162583516, + "loss": 0.7902520895004272, + "loss_ce": 0.00021307067072484642, + "loss_iou": 0.322265625, + "loss_num": 0.029052734375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 127495888, + "step": 2276 + }, + { + "epoch": 5.071269487750556, + "grad_norm": 15.051342010498047, + "learning_rate": 1e-06, + "loss": 0.9653, + "num_input_tokens_seen": 127552560, + "step": 2277 + }, + { + "epoch": 5.071269487750556, + "loss": 1.024023175239563, + "loss_ce": 0.0005856686620973051, + "loss_iou": 0.43359375, + "loss_num": 0.03173828125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 127552560, + "step": 2277 + }, + { + "epoch": 5.073496659242761, + "grad_norm": 15.868587493896484, + "learning_rate": 1e-06, + "loss": 0.6495, + "num_input_tokens_seen": 127609712, + "step": 2278 + }, + { + "epoch": 5.073496659242761, + "loss": 0.6852548718452454, + "loss_ce": 0.00019630559836514294, + "loss_iou": 0.29296875, + "loss_num": 0.0198974609375, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 127609712, + "step": 2278 + }, + { + "epoch": 5.075723830734967, + "grad_norm": 23.950061798095703, + "learning_rate": 1e-06, + "loss": 0.6507, + "num_input_tokens_seen": 127667116, + "step": 2279 + }, + { + "epoch": 5.075723830734967, + "loss": 0.6977980136871338, + "loss_ce": 0.00028824395849369466, + "loss_iou": 0.31640625, + "loss_num": 0.01287841796875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 127667116, + "step": 2279 + }, + { + "epoch": 5.077951002227172, + "grad_norm": 83.21546936035156, + "learning_rate": 1e-06, + "loss": 0.6931, + "num_input_tokens_seen": 127724152, + "step": 2280 + }, + { + "epoch": 5.077951002227172, + "loss": 0.575419545173645, + "loss_ce": 0.00022418698063120246, + "loss_iou": 0.2578125, + "loss_num": 0.01214599609375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 127724152, + "step": 2280 + }, + { + "epoch": 5.080178173719377, + "grad_norm": 16.499300003051758, + "learning_rate": 1e-06, + "loss": 0.7207, + "num_input_tokens_seen": 127779712, + "step": 2281 + }, + { + "epoch": 5.080178173719377, + "loss": 0.7757115364074707, + "loss_ce": 0.00025984214153140783, + "loss_iou": 0.30078125, + "loss_num": 0.034912109375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 127779712, + "step": 2281 + }, + { + "epoch": 5.082405345211582, + "grad_norm": 21.090255737304688, + "learning_rate": 1e-06, + "loss": 0.8105, + "num_input_tokens_seen": 127835904, + "step": 2282 + }, + { + "epoch": 5.082405345211582, + "loss": 0.7296441793441772, + "loss_ce": 0.00027409923495724797, + "loss_iou": 0.328125, + "loss_num": 0.0142822265625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 127835904, + "step": 2282 + }, + { + "epoch": 5.0846325167037865, + "grad_norm": 21.14578628540039, + "learning_rate": 1e-06, + "loss": 0.8618, + "num_input_tokens_seen": 127892840, + "step": 2283 + }, + { + "epoch": 5.0846325167037865, + "loss": 0.9588485360145569, + "loss_ce": 0.00023040127416606992, + "loss_iou": 0.3984375, + "loss_num": 0.0322265625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 127892840, + "step": 2283 + }, + { + "epoch": 5.086859688195991, + "grad_norm": 48.02599334716797, + "learning_rate": 1e-06, + "loss": 0.5714, + "num_input_tokens_seen": 127951528, + "step": 2284 + }, + { + "epoch": 5.086859688195991, + "loss": 0.5024325847625732, + "loss_ce": 0.00023529936152044684, + "loss_iou": 0.2265625, + "loss_num": 0.00994873046875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 127951528, + "step": 2284 + }, + { + "epoch": 5.089086859688196, + "grad_norm": 15.565524101257324, + "learning_rate": 1e-06, + "loss": 0.8572, + "num_input_tokens_seen": 128008636, + "step": 2285 + }, + { + "epoch": 5.089086859688196, + "loss": 0.9711748361587524, + "loss_ce": 0.0002275363076478243, + "loss_iou": 0.40625, + "loss_num": 0.03125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 128008636, + "step": 2285 + }, + { + "epoch": 5.091314031180401, + "grad_norm": 23.433475494384766, + "learning_rate": 1e-06, + "loss": 0.8038, + "num_input_tokens_seen": 128066100, + "step": 2286 + }, + { + "epoch": 5.091314031180401, + "loss": 0.787333607673645, + "loss_ce": 0.00022421692847274244, + "loss_iou": 0.345703125, + "loss_num": 0.0194091796875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 128066100, + "step": 2286 + }, + { + "epoch": 5.093541202672606, + "grad_norm": 24.027969360351562, + "learning_rate": 1e-06, + "loss": 0.7235, + "num_input_tokens_seen": 128121516, + "step": 2287 + }, + { + "epoch": 5.093541202672606, + "loss": 0.7629618644714355, + "loss_ce": 0.0002665651263669133, + "loss_iou": 0.349609375, + "loss_num": 0.012451171875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 128121516, + "step": 2287 + }, + { + "epoch": 5.095768374164811, + "grad_norm": 19.88593292236328, + "learning_rate": 1e-06, + "loss": 0.9881, + "num_input_tokens_seen": 128174484, + "step": 2288 + }, + { + "epoch": 5.095768374164811, + "loss": 1.1999242305755615, + "loss_ce": 0.00021719752112403512, + "loss_iou": 0.4921875, + "loss_num": 0.042724609375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 128174484, + "step": 2288 + }, + { + "epoch": 5.097995545657016, + "grad_norm": 45.70637130737305, + "learning_rate": 1e-06, + "loss": 0.5952, + "num_input_tokens_seen": 128232784, + "step": 2289 + }, + { + "epoch": 5.097995545657016, + "loss": 0.6135152578353882, + "loss_ce": 0.00023402433726005256, + "loss_iou": 0.267578125, + "loss_num": 0.015869140625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 128232784, + "step": 2289 + }, + { + "epoch": 5.1002227171492205, + "grad_norm": 16.309289932250977, + "learning_rate": 1e-06, + "loss": 0.7085, + "num_input_tokens_seen": 128287280, + "step": 2290 + }, + { + "epoch": 5.1002227171492205, + "loss": 0.734100878238678, + "loss_ce": 0.0002141737350029871, + "loss_iou": 0.30859375, + "loss_num": 0.02294921875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 128287280, + "step": 2290 + }, + { + "epoch": 5.102449888641425, + "grad_norm": 15.12460994720459, + "learning_rate": 1e-06, + "loss": 0.5811, + "num_input_tokens_seen": 128342260, + "step": 2291 + }, + { + "epoch": 5.102449888641425, + "loss": 0.5832387208938599, + "loss_ce": 0.000230930614634417, + "loss_iou": 0.2578125, + "loss_num": 0.013916015625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 128342260, + "step": 2291 + }, + { + "epoch": 5.10467706013363, + "grad_norm": 16.05999183654785, + "learning_rate": 1e-06, + "loss": 0.6387, + "num_input_tokens_seen": 128394812, + "step": 2292 + }, + { + "epoch": 5.10467706013363, + "loss": 0.6987403035163879, + "loss_ce": 0.00025396081036888063, + "loss_iou": 0.298828125, + "loss_num": 0.02001953125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 128394812, + "step": 2292 + }, + { + "epoch": 5.106904231625835, + "grad_norm": 17.23577117919922, + "learning_rate": 1e-06, + "loss": 0.5956, + "num_input_tokens_seen": 128451808, + "step": 2293 + }, + { + "epoch": 5.106904231625835, + "loss": 0.5036342740058899, + "loss_ce": 0.0002162978344131261, + "loss_iou": 0.212890625, + "loss_num": 0.0157470703125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 128451808, + "step": 2293 + }, + { + "epoch": 5.10913140311804, + "grad_norm": 28.054798126220703, + "learning_rate": 1e-06, + "loss": 1.03, + "num_input_tokens_seen": 128505588, + "step": 2294 + }, + { + "epoch": 5.10913140311804, + "loss": 0.8542524576187134, + "loss_ce": 0.0002485929289832711, + "loss_iou": 0.365234375, + "loss_num": 0.02490234375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 128505588, + "step": 2294 + }, + { + "epoch": 5.111358574610245, + "grad_norm": 15.79360580444336, + "learning_rate": 1e-06, + "loss": 0.7536, + "num_input_tokens_seen": 128564004, + "step": 2295 + }, + { + "epoch": 5.111358574610245, + "loss": 0.846659779548645, + "loss_ce": 0.00022423264454118907, + "loss_iou": 0.349609375, + "loss_num": 0.029541015625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 128564004, + "step": 2295 + }, + { + "epoch": 5.11358574610245, + "grad_norm": 35.329349517822266, + "learning_rate": 1e-06, + "loss": 0.8545, + "num_input_tokens_seen": 128622824, + "step": 2296 + }, + { + "epoch": 5.11358574610245, + "loss": 0.8776304721832275, + "loss_ce": 0.00043314468348398805, + "loss_iou": 0.369140625, + "loss_num": 0.02783203125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 128622824, + "step": 2296 + }, + { + "epoch": 5.1158129175946545, + "grad_norm": 17.822633743286133, + "learning_rate": 1e-06, + "loss": 0.7707, + "num_input_tokens_seen": 128679316, + "step": 2297 + }, + { + "epoch": 5.1158129175946545, + "loss": 0.49904176592826843, + "loss_ce": 0.0002624738262966275, + "loss_iou": 0.201171875, + "loss_num": 0.0194091796875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 128679316, + "step": 2297 + }, + { + "epoch": 5.118040089086859, + "grad_norm": 24.6488094329834, + "learning_rate": 1e-06, + "loss": 0.7886, + "num_input_tokens_seen": 128734780, + "step": 2298 + }, + { + "epoch": 5.118040089086859, + "loss": 0.8342887163162231, + "loss_ce": 0.00030440062982961535, + "loss_iou": 0.384765625, + "loss_num": 0.0133056640625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 128734780, + "step": 2298 + }, + { + "epoch": 5.120267260579064, + "grad_norm": 21.603078842163086, + "learning_rate": 1e-06, + "loss": 0.8968, + "num_input_tokens_seen": 128790756, + "step": 2299 + }, + { + "epoch": 5.120267260579064, + "loss": 1.144487738609314, + "loss_ce": 0.0004448448889888823, + "loss_iou": 0.451171875, + "loss_num": 0.048095703125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 128790756, + "step": 2299 + }, + { + "epoch": 5.122494432071269, + "grad_norm": 22.150482177734375, + "learning_rate": 1e-06, + "loss": 0.8125, + "num_input_tokens_seen": 128847224, + "step": 2300 + }, + { + "epoch": 5.122494432071269, + "loss": 0.8337835669517517, + "loss_ce": 0.0002874800411518663, + "loss_iou": 0.330078125, + "loss_num": 0.034423828125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 128847224, + "step": 2300 + }, + { + "epoch": 5.124721603563474, + "grad_norm": 17.031408309936523, + "learning_rate": 1e-06, + "loss": 0.5427, + "num_input_tokens_seen": 128904848, + "step": 2301 + }, + { + "epoch": 5.124721603563474, + "loss": 0.5903265476226807, + "loss_ce": 0.00023866641276981682, + "loss_iou": 0.2333984375, + "loss_num": 0.02490234375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 128904848, + "step": 2301 + }, + { + "epoch": 5.12694877505568, + "grad_norm": 17.657089233398438, + "learning_rate": 1e-06, + "loss": 0.5752, + "num_input_tokens_seen": 128960196, + "step": 2302 + }, + { + "epoch": 5.12694877505568, + "loss": 0.4570813775062561, + "loss_ce": 0.00023321554181165993, + "loss_iou": 0.193359375, + "loss_num": 0.01397705078125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 128960196, + "step": 2302 + }, + { + "epoch": 5.129175946547885, + "grad_norm": 16.3359317779541, + "learning_rate": 1e-06, + "loss": 0.6331, + "num_input_tokens_seen": 129017636, + "step": 2303 + }, + { + "epoch": 5.129175946547885, + "loss": 0.6099591255187988, + "loss_ce": 0.0005841399542987347, + "loss_iou": 0.263671875, + "loss_num": 0.0167236328125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 129017636, + "step": 2303 + }, + { + "epoch": 5.131403118040089, + "grad_norm": 21.530765533447266, + "learning_rate": 1e-06, + "loss": 0.9466, + "num_input_tokens_seen": 129073348, + "step": 2304 + }, + { + "epoch": 5.131403118040089, + "loss": 1.1084952354431152, + "loss_ce": 0.00034102535573765635, + "loss_iou": 0.474609375, + "loss_num": 0.03173828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 129073348, + "step": 2304 + }, + { + "epoch": 5.133630289532294, + "grad_norm": 37.55251693725586, + "learning_rate": 1e-06, + "loss": 0.9198, + "num_input_tokens_seen": 129130900, + "step": 2305 + }, + { + "epoch": 5.133630289532294, + "loss": 0.638685941696167, + "loss_ce": 0.0002581804874353111, + "loss_iou": 0.267578125, + "loss_num": 0.020751953125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 129130900, + "step": 2305 + }, + { + "epoch": 5.135857461024499, + "grad_norm": 28.42352867126465, + "learning_rate": 1e-06, + "loss": 0.737, + "num_input_tokens_seen": 129183076, + "step": 2306 + }, + { + "epoch": 5.135857461024499, + "loss": 0.6569973826408386, + "loss_ce": 0.0002591205993667245, + "loss_iou": 0.29296875, + "loss_num": 0.0145263671875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 129183076, + "step": 2306 + }, + { + "epoch": 5.138084632516704, + "grad_norm": 15.96066951751709, + "learning_rate": 1e-06, + "loss": 0.8596, + "num_input_tokens_seen": 129239480, + "step": 2307 + }, + { + "epoch": 5.138084632516704, + "loss": 0.7568236589431763, + "loss_ce": 0.00023182888980954885, + "loss_iou": 0.330078125, + "loss_num": 0.01953125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 129239480, + "step": 2307 + }, + { + "epoch": 5.140311804008909, + "grad_norm": 21.160701751708984, + "learning_rate": 1e-06, + "loss": 0.7788, + "num_input_tokens_seen": 129295984, + "step": 2308 + }, + { + "epoch": 5.140311804008909, + "loss": 0.9611057043075562, + "loss_ce": 0.00041232837247662246, + "loss_iou": 0.36328125, + "loss_num": 0.04638671875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 129295984, + "step": 2308 + }, + { + "epoch": 5.142538975501114, + "grad_norm": 23.850879669189453, + "learning_rate": 1e-06, + "loss": 0.8738, + "num_input_tokens_seen": 129350436, + "step": 2309 + }, + { + "epoch": 5.142538975501114, + "loss": 0.835231602191925, + "loss_ce": 0.0002706579980440438, + "loss_iou": 0.337890625, + "loss_num": 0.031982421875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 129350436, + "step": 2309 + }, + { + "epoch": 5.144766146993319, + "grad_norm": 40.127952575683594, + "learning_rate": 1e-06, + "loss": 0.7161, + "num_input_tokens_seen": 129406620, + "step": 2310 + }, + { + "epoch": 5.144766146993319, + "loss": 0.691180944442749, + "loss_ce": 0.00026295060524716973, + "loss_iou": 0.3125, + "loss_num": 0.01348876953125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 129406620, + "step": 2310 + }, + { + "epoch": 5.146993318485523, + "grad_norm": 21.454145431518555, + "learning_rate": 1e-06, + "loss": 0.5811, + "num_input_tokens_seen": 129462148, + "step": 2311 + }, + { + "epoch": 5.146993318485523, + "loss": 0.5917257070541382, + "loss_ce": 0.0002950755297206342, + "loss_iou": 0.234375, + "loss_num": 0.024658203125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 129462148, + "step": 2311 + }, + { + "epoch": 5.149220489977728, + "grad_norm": 16.070457458496094, + "learning_rate": 1e-06, + "loss": 0.458, + "num_input_tokens_seen": 129518980, + "step": 2312 + }, + { + "epoch": 5.149220489977728, + "loss": 0.5424692630767822, + "loss_ce": 0.0002329152193851769, + "loss_iou": 0.2314453125, + "loss_num": 0.015869140625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 129518980, + "step": 2312 + }, + { + "epoch": 5.151447661469933, + "grad_norm": 27.548030853271484, + "learning_rate": 1e-06, + "loss": 0.7495, + "num_input_tokens_seen": 129574880, + "step": 2313 + }, + { + "epoch": 5.151447661469933, + "loss": 0.7275496125221252, + "loss_ce": 0.00025467833620496094, + "loss_iou": 0.3203125, + "loss_num": 0.0169677734375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 129574880, + "step": 2313 + }, + { + "epoch": 5.153674832962138, + "grad_norm": 17.572242736816406, + "learning_rate": 1e-06, + "loss": 0.6046, + "num_input_tokens_seen": 129631804, + "step": 2314 + }, + { + "epoch": 5.153674832962138, + "loss": 0.7023811340332031, + "loss_ce": 0.0007209961186163127, + "loss_iou": 0.28125, + "loss_num": 0.028076171875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 129631804, + "step": 2314 + }, + { + "epoch": 5.155902004454343, + "grad_norm": 17.386499404907227, + "learning_rate": 1e-06, + "loss": 0.8311, + "num_input_tokens_seen": 129684280, + "step": 2315 + }, + { + "epoch": 5.155902004454343, + "loss": 0.8039573431015015, + "loss_ce": 0.00024638883769512177, + "loss_iou": 0.357421875, + "loss_num": 0.017578125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 129684280, + "step": 2315 + }, + { + "epoch": 5.158129175946548, + "grad_norm": 18.181276321411133, + "learning_rate": 1e-06, + "loss": 0.8983, + "num_input_tokens_seen": 129742428, + "step": 2316 + }, + { + "epoch": 5.158129175946548, + "loss": 0.8774632811546326, + "loss_ce": 0.000266014103544876, + "loss_iou": 0.34375, + "loss_num": 0.037841796875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 129742428, + "step": 2316 + }, + { + "epoch": 5.160356347438753, + "grad_norm": 21.5535888671875, + "learning_rate": 1e-06, + "loss": 0.8057, + "num_input_tokens_seen": 129796972, + "step": 2317 + }, + { + "epoch": 5.160356347438753, + "loss": 0.9526360630989075, + "loss_ce": 0.00024349387967959046, + "loss_iou": 0.38671875, + "loss_num": 0.0361328125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 129796972, + "step": 2317 + }, + { + "epoch": 5.1625835189309575, + "grad_norm": 21.653419494628906, + "learning_rate": 1e-06, + "loss": 0.7653, + "num_input_tokens_seen": 129851752, + "step": 2318 + }, + { + "epoch": 5.1625835189309575, + "loss": 0.8452595472335815, + "loss_ce": 0.0002888377639465034, + "loss_iou": 0.345703125, + "loss_num": 0.030517578125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 129851752, + "step": 2318 + }, + { + "epoch": 5.164810690423162, + "grad_norm": 23.30936622619629, + "learning_rate": 1e-06, + "loss": 0.7064, + "num_input_tokens_seen": 129907124, + "step": 2319 + }, + { + "epoch": 5.164810690423162, + "loss": 0.541006326675415, + "loss_ce": 0.0002348569978494197, + "loss_iou": 0.228515625, + "loss_num": 0.0164794921875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 129907124, + "step": 2319 + }, + { + "epoch": 5.167037861915367, + "grad_norm": 16.756088256835938, + "learning_rate": 1e-06, + "loss": 0.5693, + "num_input_tokens_seen": 129963388, + "step": 2320 + }, + { + "epoch": 5.167037861915367, + "loss": 0.481838583946228, + "loss_ce": 0.00027121190214529634, + "loss_iou": 0.203125, + "loss_num": 0.0150146484375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 129963388, + "step": 2320 + }, + { + "epoch": 5.169265033407572, + "grad_norm": 18.584272384643555, + "learning_rate": 1e-06, + "loss": 0.896, + "num_input_tokens_seen": 130019724, + "step": 2321 + }, + { + "epoch": 5.169265033407572, + "loss": 0.9863563776016235, + "loss_ce": 0.00027232500724494457, + "loss_iou": 0.41796875, + "loss_num": 0.0299072265625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 130019724, + "step": 2321 + }, + { + "epoch": 5.171492204899777, + "grad_norm": 23.582571029663086, + "learning_rate": 1e-06, + "loss": 0.6948, + "num_input_tokens_seen": 130077412, + "step": 2322 + }, + { + "epoch": 5.171492204899777, + "loss": 0.7178100347518921, + "loss_ce": 0.0002807402634061873, + "loss_iou": 0.3125, + "loss_num": 0.018798828125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 130077412, + "step": 2322 + }, + { + "epoch": 5.173719376391982, + "grad_norm": 15.536462783813477, + "learning_rate": 1e-06, + "loss": 0.8434, + "num_input_tokens_seen": 130133952, + "step": 2323 + }, + { + "epoch": 5.173719376391982, + "loss": 0.7571786642074585, + "loss_ce": 0.00076994055416435, + "loss_iou": 0.32421875, + "loss_num": 0.0216064453125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 130133952, + "step": 2323 + }, + { + "epoch": 5.1759465478841875, + "grad_norm": 21.30929946899414, + "learning_rate": 1e-06, + "loss": 0.832, + "num_input_tokens_seen": 130190140, + "step": 2324 + }, + { + "epoch": 5.1759465478841875, + "loss": 0.9763731956481934, + "loss_ce": 0.0002989959320984781, + "loss_iou": 0.380859375, + "loss_num": 0.042724609375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 130190140, + "step": 2324 + }, + { + "epoch": 5.178173719376392, + "grad_norm": 55.067665100097656, + "learning_rate": 1e-06, + "loss": 0.8182, + "num_input_tokens_seen": 130244552, + "step": 2325 + }, + { + "epoch": 5.178173719376392, + "loss": 1.0063856840133667, + "loss_ce": 0.0002821519155986607, + "loss_iou": 0.41796875, + "loss_num": 0.033935546875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 130244552, + "step": 2325 + }, + { + "epoch": 5.180400890868597, + "grad_norm": 19.690275192260742, + "learning_rate": 1e-06, + "loss": 0.9497, + "num_input_tokens_seen": 130301252, + "step": 2326 + }, + { + "epoch": 5.180400890868597, + "loss": 0.765385627746582, + "loss_ce": 0.00024891988141462207, + "loss_iou": 0.3359375, + "loss_num": 0.018310546875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 130301252, + "step": 2326 + }, + { + "epoch": 5.182628062360802, + "grad_norm": 14.636940002441406, + "learning_rate": 1e-06, + "loss": 0.5755, + "num_input_tokens_seen": 130357596, + "step": 2327 + }, + { + "epoch": 5.182628062360802, + "loss": 0.5990309715270996, + "loss_ce": 0.00027604633942246437, + "loss_iou": 0.26953125, + "loss_num": 0.011474609375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 130357596, + "step": 2327 + }, + { + "epoch": 5.184855233853007, + "grad_norm": 19.635517120361328, + "learning_rate": 1e-06, + "loss": 0.6214, + "num_input_tokens_seen": 130413372, + "step": 2328 + }, + { + "epoch": 5.184855233853007, + "loss": 0.7523684501647949, + "loss_ce": 0.00029323421767912805, + "loss_iou": 0.31640625, + "loss_num": 0.023681640625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 130413372, + "step": 2328 + }, + { + "epoch": 5.187082405345212, + "grad_norm": 15.182498931884766, + "learning_rate": 1e-06, + "loss": 0.628, + "num_input_tokens_seen": 130468056, + "step": 2329 + }, + { + "epoch": 5.187082405345212, + "loss": 0.513440728187561, + "loss_ce": 0.00025709014153108, + "loss_iou": 0.220703125, + "loss_num": 0.01434326171875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 130468056, + "step": 2329 + }, + { + "epoch": 5.189309576837417, + "grad_norm": 23.259883880615234, + "learning_rate": 1e-06, + "loss": 0.5599, + "num_input_tokens_seen": 130522728, + "step": 2330 + }, + { + "epoch": 5.189309576837417, + "loss": 0.483401358127594, + "loss_ce": 0.00024706803378649056, + "loss_iou": 0.1953125, + "loss_num": 0.018798828125, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 130522728, + "step": 2330 + }, + { + "epoch": 5.1915367483296215, + "grad_norm": 27.320947647094727, + "learning_rate": 1e-06, + "loss": 0.7893, + "num_input_tokens_seen": 130577656, + "step": 2331 + }, + { + "epoch": 5.1915367483296215, + "loss": 0.6123491525650024, + "loss_ce": 0.00028857935103587806, + "loss_iou": 0.271484375, + "loss_num": 0.0135498046875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 130577656, + "step": 2331 + }, + { + "epoch": 5.193763919821826, + "grad_norm": 74.38851928710938, + "learning_rate": 1e-06, + "loss": 0.6629, + "num_input_tokens_seen": 130635684, + "step": 2332 + }, + { + "epoch": 5.193763919821826, + "loss": 0.7268493175506592, + "loss_ce": 0.0002868149313144386, + "loss_iou": 0.326171875, + "loss_num": 0.0146484375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 130635684, + "step": 2332 + }, + { + "epoch": 5.195991091314031, + "grad_norm": 21.843446731567383, + "learning_rate": 1e-06, + "loss": 0.6575, + "num_input_tokens_seen": 130693564, + "step": 2333 + }, + { + "epoch": 5.195991091314031, + "loss": 0.5599232912063599, + "loss_ce": 0.00023093904019333422, + "loss_iou": 0.251953125, + "loss_num": 0.011474609375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 130693564, + "step": 2333 + }, + { + "epoch": 5.198218262806236, + "grad_norm": 27.58201789855957, + "learning_rate": 1e-06, + "loss": 0.6391, + "num_input_tokens_seen": 130751884, + "step": 2334 + }, + { + "epoch": 5.198218262806236, + "loss": 0.6432796716690063, + "loss_ce": 0.00021332071628421545, + "loss_iou": 0.28515625, + "loss_num": 0.01409912109375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 130751884, + "step": 2334 + }, + { + "epoch": 5.200445434298441, + "grad_norm": 68.57735443115234, + "learning_rate": 1e-06, + "loss": 0.6312, + "num_input_tokens_seen": 130808940, + "step": 2335 + }, + { + "epoch": 5.200445434298441, + "loss": 0.45080137252807617, + "loss_ce": 0.00023984728613868356, + "loss_iou": 0.203125, + "loss_num": 0.0087890625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 130808940, + "step": 2335 + }, + { + "epoch": 5.202672605790646, + "grad_norm": 23.095355987548828, + "learning_rate": 1e-06, + "loss": 0.7388, + "num_input_tokens_seen": 130864876, + "step": 2336 + }, + { + "epoch": 5.202672605790646, + "loss": 0.8970399498939514, + "loss_ce": 0.0003114146529696882, + "loss_iou": 0.380859375, + "loss_num": 0.0269775390625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 130864876, + "step": 2336 + }, + { + "epoch": 5.204899777282851, + "grad_norm": 16.609811782836914, + "learning_rate": 1e-06, + "loss": 0.8964, + "num_input_tokens_seen": 130919176, + "step": 2337 + }, + { + "epoch": 5.204899777282851, + "loss": 1.08896803855896, + "loss_ce": 0.0003450649091973901, + "loss_iou": 0.484375, + "loss_num": 0.024169921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 130919176, + "step": 2337 + }, + { + "epoch": 5.2071269487750556, + "grad_norm": 17.34803009033203, + "learning_rate": 1e-06, + "loss": 0.6675, + "num_input_tokens_seen": 130977396, + "step": 2338 + }, + { + "epoch": 5.2071269487750556, + "loss": 0.661956250667572, + "loss_ce": 0.00021310552256181836, + "loss_iou": 0.259765625, + "loss_num": 0.0281982421875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 130977396, + "step": 2338 + }, + { + "epoch": 5.20935412026726, + "grad_norm": 19.648202896118164, + "learning_rate": 1e-06, + "loss": 0.8492, + "num_input_tokens_seen": 131032596, + "step": 2339 + }, + { + "epoch": 5.20935412026726, + "loss": 0.8113721013069153, + "loss_ce": 0.0003369731712155044, + "loss_iou": 0.34765625, + "loss_num": 0.022705078125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 131032596, + "step": 2339 + }, + { + "epoch": 5.211581291759465, + "grad_norm": 28.624279022216797, + "learning_rate": 1e-06, + "loss": 0.5556, + "num_input_tokens_seen": 131089720, + "step": 2340 + }, + { + "epoch": 5.211581291759465, + "loss": 0.615825891494751, + "loss_ce": 0.0003474131808616221, + "loss_iou": 0.25390625, + "loss_num": 0.021728515625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 131089720, + "step": 2340 + }, + { + "epoch": 5.21380846325167, + "grad_norm": 32.367496490478516, + "learning_rate": 1e-06, + "loss": 0.8943, + "num_input_tokens_seen": 131147500, + "step": 2341 + }, + { + "epoch": 5.21380846325167, + "loss": 1.0326426029205322, + "loss_ce": 0.000355017080437392, + "loss_iou": 0.40234375, + "loss_num": 0.045166015625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 131147500, + "step": 2341 + }, + { + "epoch": 5.216035634743875, + "grad_norm": 32.042381286621094, + "learning_rate": 1e-06, + "loss": 0.7757, + "num_input_tokens_seen": 131202024, + "step": 2342 + }, + { + "epoch": 5.216035634743875, + "loss": 0.7637057900428772, + "loss_ce": 0.0002780459471978247, + "loss_iou": 0.31640625, + "loss_num": 0.02587890625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 131202024, + "step": 2342 + }, + { + "epoch": 5.21826280623608, + "grad_norm": 24.583831787109375, + "learning_rate": 1e-06, + "loss": 0.859, + "num_input_tokens_seen": 131258048, + "step": 2343 + }, + { + "epoch": 5.21826280623608, + "loss": 0.815448522567749, + "loss_ce": 0.0002629937371239066, + "loss_iou": 0.31640625, + "loss_num": 0.036865234375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 131258048, + "step": 2343 + }, + { + "epoch": 5.220489977728285, + "grad_norm": 19.044597625732422, + "learning_rate": 1e-06, + "loss": 0.7225, + "num_input_tokens_seen": 131316644, + "step": 2344 + }, + { + "epoch": 5.220489977728285, + "loss": 0.6471991539001465, + "loss_ce": 0.00022645638091489673, + "loss_iou": 0.2451171875, + "loss_num": 0.03125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 131316644, + "step": 2344 + }, + { + "epoch": 5.22271714922049, + "grad_norm": 16.071741104125977, + "learning_rate": 1e-06, + "loss": 0.5489, + "num_input_tokens_seen": 131372184, + "step": 2345 + }, + { + "epoch": 5.22271714922049, + "loss": 0.6769649386405945, + "loss_ce": 0.00020714016864076257, + "loss_iou": 0.296875, + "loss_num": 0.0162353515625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 131372184, + "step": 2345 + }, + { + "epoch": 5.224944320712694, + "grad_norm": 18.349136352539062, + "learning_rate": 1e-06, + "loss": 0.6446, + "num_input_tokens_seen": 131430784, + "step": 2346 + }, + { + "epoch": 5.224944320712694, + "loss": 0.6362409591674805, + "loss_ce": 0.00025463022757321596, + "loss_iou": 0.28515625, + "loss_num": 0.0133056640625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 131430784, + "step": 2346 + }, + { + "epoch": 5.2271714922049, + "grad_norm": 18.53408432006836, + "learning_rate": 1e-06, + "loss": 0.5966, + "num_input_tokens_seen": 131488492, + "step": 2347 + }, + { + "epoch": 5.2271714922049, + "loss": 0.5330299139022827, + "loss_ce": 0.0003150389820802957, + "loss_iou": 0.220703125, + "loss_num": 0.0184326171875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 131488492, + "step": 2347 + }, + { + "epoch": 5.229398663697105, + "grad_norm": 21.853288650512695, + "learning_rate": 1e-06, + "loss": 0.5483, + "num_input_tokens_seen": 131545288, + "step": 2348 + }, + { + "epoch": 5.229398663697105, + "loss": 0.47664040327072144, + "loss_ce": 0.00019999593496322632, + "loss_iou": 0.2080078125, + "loss_num": 0.01220703125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 131545288, + "step": 2348 + }, + { + "epoch": 5.23162583518931, + "grad_norm": 31.041902542114258, + "learning_rate": 1e-06, + "loss": 0.8796, + "num_input_tokens_seen": 131601784, + "step": 2349 + }, + { + "epoch": 5.23162583518931, + "loss": 0.9917435646057129, + "loss_ce": 0.0002884728601202369, + "loss_iou": 0.390625, + "loss_num": 0.042236328125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 131601784, + "step": 2349 + }, + { + "epoch": 5.233853006681515, + "grad_norm": 56.980098724365234, + "learning_rate": 1e-06, + "loss": 0.7844, + "num_input_tokens_seen": 131658228, + "step": 2350 + }, + { + "epoch": 5.233853006681515, + "loss": 0.7722232341766357, + "loss_ce": 0.0002506303135305643, + "loss_iou": 0.33203125, + "loss_num": 0.021728515625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 131658228, + "step": 2350 + }, + { + "epoch": 5.23608017817372, + "grad_norm": 10.815868377685547, + "learning_rate": 1e-06, + "loss": 0.6814, + "num_input_tokens_seen": 131713244, + "step": 2351 + }, + { + "epoch": 5.23608017817372, + "loss": 0.8227336406707764, + "loss_ce": 0.00034591203439049423, + "loss_iou": 0.333984375, + "loss_num": 0.03125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 131713244, + "step": 2351 + }, + { + "epoch": 5.2383073496659245, + "grad_norm": 34.09395217895508, + "learning_rate": 1e-06, + "loss": 0.5397, + "num_input_tokens_seen": 131771288, + "step": 2352 + }, + { + "epoch": 5.2383073496659245, + "loss": 0.5100328922271729, + "loss_ce": 0.0002672626869753003, + "loss_iou": 0.2109375, + "loss_num": 0.0177001953125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 131771288, + "step": 2352 + }, + { + "epoch": 5.240534521158129, + "grad_norm": 18.458494186401367, + "learning_rate": 1e-06, + "loss": 0.7453, + "num_input_tokens_seen": 131829120, + "step": 2353 + }, + { + "epoch": 5.240534521158129, + "loss": 0.4893495738506317, + "loss_ce": 0.00021382731210906059, + "loss_iou": 0.197265625, + "loss_num": 0.0191650390625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 131829120, + "step": 2353 + }, + { + "epoch": 5.242761692650334, + "grad_norm": 21.484622955322266, + "learning_rate": 1e-06, + "loss": 0.8861, + "num_input_tokens_seen": 131884604, + "step": 2354 + }, + { + "epoch": 5.242761692650334, + "loss": 0.9834548830986023, + "loss_ce": 0.0003006171027664095, + "loss_iou": 0.416015625, + "loss_num": 0.030029296875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 131884604, + "step": 2354 + }, + { + "epoch": 5.244988864142539, + "grad_norm": 24.594648361206055, + "learning_rate": 1e-06, + "loss": 0.7533, + "num_input_tokens_seen": 131942424, + "step": 2355 + }, + { + "epoch": 5.244988864142539, + "loss": 0.7038974761962891, + "loss_ce": 0.00028417640714906156, + "loss_iou": 0.310546875, + "loss_num": 0.0162353515625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 131942424, + "step": 2355 + }, + { + "epoch": 5.247216035634744, + "grad_norm": 46.79767990112305, + "learning_rate": 1e-06, + "loss": 0.6838, + "num_input_tokens_seen": 131998944, + "step": 2356 + }, + { + "epoch": 5.247216035634744, + "loss": 0.5929880738258362, + "loss_ce": 0.00021467695478349924, + "loss_iou": 0.259765625, + "loss_num": 0.01446533203125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 131998944, + "step": 2356 + }, + { + "epoch": 5.249443207126949, + "grad_norm": 18.105363845825195, + "learning_rate": 1e-06, + "loss": 0.6012, + "num_input_tokens_seen": 132055476, + "step": 2357 + }, + { + "epoch": 5.249443207126949, + "loss": 0.45707041025161743, + "loss_ce": 0.00028330169152468443, + "loss_iou": 0.189453125, + "loss_num": 0.015380859375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 132055476, + "step": 2357 + }, + { + "epoch": 5.251670378619154, + "grad_norm": 18.88360595703125, + "learning_rate": 1e-06, + "loss": 0.8412, + "num_input_tokens_seen": 132112904, + "step": 2358 + }, + { + "epoch": 5.251670378619154, + "loss": 0.8789228200912476, + "loss_ce": 0.0002607441274449229, + "loss_iou": 0.37109375, + "loss_num": 0.02685546875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 132112904, + "step": 2358 + }, + { + "epoch": 5.2538975501113585, + "grad_norm": 26.32551383972168, + "learning_rate": 1e-06, + "loss": 0.8518, + "num_input_tokens_seen": 132168780, + "step": 2359 + }, + { + "epoch": 5.2538975501113585, + "loss": 0.9214595556259155, + "loss_ce": 0.0003170058480463922, + "loss_iou": 0.42578125, + "loss_num": 0.01373291015625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 132168780, + "step": 2359 + }, + { + "epoch": 5.256124721603563, + "grad_norm": 39.1221809387207, + "learning_rate": 1e-06, + "loss": 0.5728, + "num_input_tokens_seen": 132223368, + "step": 2360 + }, + { + "epoch": 5.256124721603563, + "loss": 0.574551522731781, + "loss_ce": 0.00021067832130938768, + "loss_iou": 0.2265625, + "loss_num": 0.024169921875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 132223368, + "step": 2360 + }, + { + "epoch": 5.258351893095768, + "grad_norm": 24.009830474853516, + "learning_rate": 1e-06, + "loss": 0.8335, + "num_input_tokens_seen": 132279872, + "step": 2361 + }, + { + "epoch": 5.258351893095768, + "loss": 0.8674226999282837, + "loss_ce": 0.00023520024842582643, + "loss_iou": 0.388671875, + "loss_num": 0.017822265625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 132279872, + "step": 2361 + }, + { + "epoch": 5.260579064587973, + "grad_norm": 16.209796905517578, + "learning_rate": 1e-06, + "loss": 0.8201, + "num_input_tokens_seen": 132337356, + "step": 2362 + }, + { + "epoch": 5.260579064587973, + "loss": 1.021734356880188, + "loss_ce": 0.0004940725630149245, + "loss_iou": 0.435546875, + "loss_num": 0.0302734375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 132337356, + "step": 2362 + }, + { + "epoch": 5.262806236080178, + "grad_norm": 21.380468368530273, + "learning_rate": 1e-06, + "loss": 0.6083, + "num_input_tokens_seen": 132389212, + "step": 2363 + }, + { + "epoch": 5.262806236080178, + "loss": 0.5677247643470764, + "loss_ce": 0.00034196508931927383, + "loss_iou": 0.203125, + "loss_num": 0.0322265625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 132389212, + "step": 2363 + }, + { + "epoch": 5.265033407572383, + "grad_norm": 18.931297302246094, + "learning_rate": 1e-06, + "loss": 1.0995, + "num_input_tokens_seen": 132445236, + "step": 2364 + }, + { + "epoch": 5.265033407572383, + "loss": 0.9282673597335815, + "loss_ce": 0.0002889031311497092, + "loss_iou": 0.3671875, + "loss_num": 0.038818359375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 132445236, + "step": 2364 + }, + { + "epoch": 5.267260579064588, + "grad_norm": 51.42475128173828, + "learning_rate": 1e-06, + "loss": 0.7182, + "num_input_tokens_seen": 132499704, + "step": 2365 + }, + { + "epoch": 5.267260579064588, + "loss": 0.6144786477088928, + "loss_ce": 0.00022083369549363852, + "loss_iou": 0.2578125, + "loss_num": 0.0196533203125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 132499704, + "step": 2365 + }, + { + "epoch": 5.2694877505567925, + "grad_norm": 15.084627151489258, + "learning_rate": 1e-06, + "loss": 0.5506, + "num_input_tokens_seen": 132557716, + "step": 2366 + }, + { + "epoch": 5.2694877505567925, + "loss": 0.5173279047012329, + "loss_ce": 0.00023811672872398049, + "loss_iou": 0.2255859375, + "loss_num": 0.01312255859375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 132557716, + "step": 2366 + }, + { + "epoch": 5.271714922048997, + "grad_norm": 23.721725463867188, + "learning_rate": 1e-06, + "loss": 0.8121, + "num_input_tokens_seen": 132611180, + "step": 2367 + }, + { + "epoch": 5.271714922048997, + "loss": 0.6462873220443726, + "loss_ce": 0.00029126679874025285, + "loss_iou": 0.25390625, + "loss_num": 0.02734375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 132611180, + "step": 2367 + }, + { + "epoch": 5.273942093541203, + "grad_norm": 15.91121768951416, + "learning_rate": 1e-06, + "loss": 0.6395, + "num_input_tokens_seen": 132667040, + "step": 2368 + }, + { + "epoch": 5.273942093541203, + "loss": 0.6589342951774597, + "loss_ce": 0.00024289448629133403, + "loss_iou": 0.279296875, + "loss_num": 0.02001953125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 132667040, + "step": 2368 + }, + { + "epoch": 5.276169265033408, + "grad_norm": 17.777984619140625, + "learning_rate": 1e-06, + "loss": 0.6871, + "num_input_tokens_seen": 132722572, + "step": 2369 + }, + { + "epoch": 5.276169265033408, + "loss": 0.376442015171051, + "loss_ce": 0.00022133463062345982, + "loss_iou": 0.16015625, + "loss_num": 0.0111083984375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 132722572, + "step": 2369 + }, + { + "epoch": 5.278396436525613, + "grad_norm": 25.825546264648438, + "learning_rate": 1e-06, + "loss": 0.9053, + "num_input_tokens_seen": 132780576, + "step": 2370 + }, + { + "epoch": 5.278396436525613, + "loss": 0.6252577304840088, + "loss_ce": 0.0002577258856035769, + "loss_iou": 0.283203125, + "loss_num": 0.01141357421875, + "loss_xval": 0.625, + "num_input_tokens_seen": 132780576, + "step": 2370 + }, + { + "epoch": 5.280623608017818, + "grad_norm": 20.95437240600586, + "learning_rate": 1e-06, + "loss": 0.7428, + "num_input_tokens_seen": 132836940, + "step": 2371 + }, + { + "epoch": 5.280623608017818, + "loss": 0.8691422343254089, + "loss_ce": 0.00024575780844315886, + "loss_iou": 0.36328125, + "loss_num": 0.0284423828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 132836940, + "step": 2371 + }, + { + "epoch": 5.282850779510023, + "grad_norm": 16.127458572387695, + "learning_rate": 1e-06, + "loss": 0.7024, + "num_input_tokens_seen": 132891444, + "step": 2372 + }, + { + "epoch": 5.282850779510023, + "loss": 0.7618474364280701, + "loss_ce": 0.0003727960283868015, + "loss_iou": 0.31640625, + "loss_num": 0.0255126953125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 132891444, + "step": 2372 + }, + { + "epoch": 5.285077951002227, + "grad_norm": 19.69451141357422, + "learning_rate": 1e-06, + "loss": 0.9777, + "num_input_tokens_seen": 132948012, + "step": 2373 + }, + { + "epoch": 5.285077951002227, + "loss": 0.8320986032485962, + "loss_ce": 0.00031142972875386477, + "loss_iou": 0.333984375, + "loss_num": 0.032470703125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 132948012, + "step": 2373 + }, + { + "epoch": 5.287305122494432, + "grad_norm": 24.581279754638672, + "learning_rate": 1e-06, + "loss": 1.0444, + "num_input_tokens_seen": 133005612, + "step": 2374 + }, + { + "epoch": 5.287305122494432, + "loss": 0.9788609743118286, + "loss_ce": 0.00034534052247181535, + "loss_iou": 0.40625, + "loss_num": 0.032958984375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 133005612, + "step": 2374 + }, + { + "epoch": 5.289532293986637, + "grad_norm": 20.55710220336914, + "learning_rate": 1e-06, + "loss": 0.835, + "num_input_tokens_seen": 133062216, + "step": 2375 + }, + { + "epoch": 5.289532293986637, + "loss": 0.9570013880729675, + "loss_ce": 0.00033630896359682083, + "loss_iou": 0.390625, + "loss_num": 0.03466796875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 133062216, + "step": 2375 + }, + { + "epoch": 5.291759465478842, + "grad_norm": 25.395736694335938, + "learning_rate": 1e-06, + "loss": 0.7496, + "num_input_tokens_seen": 133117644, + "step": 2376 + }, + { + "epoch": 5.291759465478842, + "loss": 0.5810257196426392, + "loss_ce": 0.00021513670799322426, + "loss_iou": 0.25, + "loss_num": 0.0164794921875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 133117644, + "step": 2376 + }, + { + "epoch": 5.293986636971047, + "grad_norm": 58.38376235961914, + "learning_rate": 1e-06, + "loss": 0.8065, + "num_input_tokens_seen": 133176104, + "step": 2377 + }, + { + "epoch": 5.293986636971047, + "loss": 0.6448827981948853, + "loss_ce": 0.0002294883888680488, + "loss_iou": 0.28515625, + "loss_num": 0.014404296875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 133176104, + "step": 2377 + }, + { + "epoch": 5.296213808463252, + "grad_norm": 47.06428909301758, + "learning_rate": 1e-06, + "loss": 0.6075, + "num_input_tokens_seen": 133231892, + "step": 2378 + }, + { + "epoch": 5.296213808463252, + "loss": 0.6965197920799255, + "loss_ce": 0.00023071446048561484, + "loss_iou": 0.287109375, + "loss_num": 0.0245361328125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 133231892, + "step": 2378 + }, + { + "epoch": 5.298440979955457, + "grad_norm": 17.276321411132812, + "learning_rate": 1e-06, + "loss": 0.7652, + "num_input_tokens_seen": 133286408, + "step": 2379 + }, + { + "epoch": 5.298440979955457, + "loss": 0.8292036652565002, + "loss_ce": 0.00034621963277459145, + "loss_iou": 0.37109375, + "loss_num": 0.01708984375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 133286408, + "step": 2379 + }, + { + "epoch": 5.3006681514476615, + "grad_norm": 252.65249633789062, + "learning_rate": 1e-06, + "loss": 0.7102, + "num_input_tokens_seen": 133341916, + "step": 2380 + }, + { + "epoch": 5.3006681514476615, + "loss": 0.6878687143325806, + "loss_ce": 0.00024663194199092686, + "loss_iou": 0.30859375, + "loss_num": 0.01397705078125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 133341916, + "step": 2380 + }, + { + "epoch": 5.302895322939866, + "grad_norm": 22.686996459960938, + "learning_rate": 1e-06, + "loss": 0.7836, + "num_input_tokens_seen": 133397788, + "step": 2381 + }, + { + "epoch": 5.302895322939866, + "loss": 0.9040675759315491, + "loss_ce": 0.0002589549985714257, + "loss_iou": 0.41015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 133397788, + "step": 2381 + }, + { + "epoch": 5.305122494432071, + "grad_norm": 18.216825485229492, + "learning_rate": 1e-06, + "loss": 0.7332, + "num_input_tokens_seen": 133455328, + "step": 2382 + }, + { + "epoch": 5.305122494432071, + "loss": 0.8532497882843018, + "loss_ce": 0.000222432630835101, + "loss_iou": 0.36328125, + "loss_num": 0.02490234375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 133455328, + "step": 2382 + }, + { + "epoch": 5.307349665924276, + "grad_norm": 66.18022918701172, + "learning_rate": 1e-06, + "loss": 0.7825, + "num_input_tokens_seen": 133509936, + "step": 2383 + }, + { + "epoch": 5.307349665924276, + "loss": 0.7538729906082153, + "loss_ce": 0.0002108832122758031, + "loss_iou": 0.31640625, + "loss_num": 0.0242919921875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 133509936, + "step": 2383 + }, + { + "epoch": 5.309576837416481, + "grad_norm": 19.18753433227539, + "learning_rate": 1e-06, + "loss": 0.5276, + "num_input_tokens_seen": 133566504, + "step": 2384 + }, + { + "epoch": 5.309576837416481, + "loss": 0.6280398964881897, + "loss_ce": 0.00023228148347698152, + "loss_iou": 0.26953125, + "loss_num": 0.017822265625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 133566504, + "step": 2384 + }, + { + "epoch": 5.311804008908686, + "grad_norm": 21.568161010742188, + "learning_rate": 1e-06, + "loss": 0.6863, + "num_input_tokens_seen": 133624932, + "step": 2385 + }, + { + "epoch": 5.311804008908686, + "loss": 0.569631814956665, + "loss_ce": 0.00029586919117718935, + "loss_iou": 0.251953125, + "loss_num": 0.01275634765625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 133624932, + "step": 2385 + }, + { + "epoch": 5.314031180400891, + "grad_norm": 19.216602325439453, + "learning_rate": 1e-06, + "loss": 0.5267, + "num_input_tokens_seen": 133679920, + "step": 2386 + }, + { + "epoch": 5.314031180400891, + "loss": 0.5746643543243408, + "loss_ce": 0.00020149891497567296, + "loss_iou": 0.2412109375, + "loss_num": 0.0185546875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 133679920, + "step": 2386 + }, + { + "epoch": 5.3162583518930955, + "grad_norm": 25.528783798217773, + "learning_rate": 1e-06, + "loss": 0.8468, + "num_input_tokens_seen": 133735508, + "step": 2387 + }, + { + "epoch": 5.3162583518930955, + "loss": 0.7472808361053467, + "loss_ce": 0.00021054709213785827, + "loss_iou": 0.318359375, + "loss_num": 0.0216064453125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 133735508, + "step": 2387 + }, + { + "epoch": 5.3184855233853, + "grad_norm": 19.144241333007812, + "learning_rate": 1e-06, + "loss": 0.7418, + "num_input_tokens_seen": 133788248, + "step": 2388 + }, + { + "epoch": 5.3184855233853, + "loss": 0.7324228286743164, + "loss_ce": 0.0002451083273626864, + "loss_iou": 0.3203125, + "loss_num": 0.0186767578125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 133788248, + "step": 2388 + }, + { + "epoch": 5.320712694877505, + "grad_norm": 26.32198143005371, + "learning_rate": 1e-06, + "loss": 0.6614, + "num_input_tokens_seen": 133843512, + "step": 2389 + }, + { + "epoch": 5.320712694877505, + "loss": 0.6170359253883362, + "loss_ce": 0.00021462509175762534, + "loss_iou": 0.26171875, + "loss_num": 0.01904296875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 133843512, + "step": 2389 + }, + { + "epoch": 5.32293986636971, + "grad_norm": 17.643627166748047, + "learning_rate": 1e-06, + "loss": 0.6772, + "num_input_tokens_seen": 133898856, + "step": 2390 + }, + { + "epoch": 5.32293986636971, + "loss": 0.6501243710517883, + "loss_ce": 0.00022201667889021337, + "loss_iou": 0.27734375, + "loss_num": 0.01904296875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 133898856, + "step": 2390 + }, + { + "epoch": 5.325167037861915, + "grad_norm": 31.93773651123047, + "learning_rate": 1e-06, + "loss": 0.7977, + "num_input_tokens_seen": 133956748, + "step": 2391 + }, + { + "epoch": 5.325167037861915, + "loss": 0.6381719708442688, + "loss_ce": 0.00023252921528182924, + "loss_iou": 0.28125, + "loss_num": 0.01531982421875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 133956748, + "step": 2391 + }, + { + "epoch": 5.327394209354121, + "grad_norm": 19.902048110961914, + "learning_rate": 1e-06, + "loss": 0.6616, + "num_input_tokens_seen": 134011496, + "step": 2392 + }, + { + "epoch": 5.327394209354121, + "loss": 0.6387029886245728, + "loss_ce": 0.0007635352667421103, + "loss_iou": 0.2890625, + "loss_num": 0.01226806640625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 134011496, + "step": 2392 + }, + { + "epoch": 5.3296213808463255, + "grad_norm": 16.311479568481445, + "learning_rate": 1e-06, + "loss": 0.4794, + "num_input_tokens_seen": 134069204, + "step": 2393 + }, + { + "epoch": 5.3296213808463255, + "loss": 0.4752798080444336, + "loss_ce": 0.00018213533621747047, + "loss_iou": 0.19921875, + "loss_num": 0.01544189453125, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 134069204, + "step": 2393 + }, + { + "epoch": 5.33184855233853, + "grad_norm": 39.13882827758789, + "learning_rate": 1e-06, + "loss": 0.6946, + "num_input_tokens_seen": 134126304, + "step": 2394 + }, + { + "epoch": 5.33184855233853, + "loss": 0.837151288986206, + "loss_ce": 0.00023718301963526756, + "loss_iou": 0.34375, + "loss_num": 0.02978515625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 134126304, + "step": 2394 + }, + { + "epoch": 5.334075723830735, + "grad_norm": 20.73467445373535, + "learning_rate": 1e-06, + "loss": 0.7741, + "num_input_tokens_seen": 134183476, + "step": 2395 + }, + { + "epoch": 5.334075723830735, + "loss": 0.7032053470611572, + "loss_ce": 0.00032445762190036476, + "loss_iou": 0.29296875, + "loss_num": 0.023681640625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 134183476, + "step": 2395 + }, + { + "epoch": 5.33630289532294, + "grad_norm": 25.634654998779297, + "learning_rate": 1e-06, + "loss": 0.564, + "num_input_tokens_seen": 134240096, + "step": 2396 + }, + { + "epoch": 5.33630289532294, + "loss": 0.699744701385498, + "loss_ce": 0.0002818358479999006, + "loss_iou": 0.31640625, + "loss_num": 0.01373291015625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 134240096, + "step": 2396 + }, + { + "epoch": 5.338530066815145, + "grad_norm": 19.363285064697266, + "learning_rate": 1e-06, + "loss": 0.4604, + "num_input_tokens_seen": 134296184, + "step": 2397 + }, + { + "epoch": 5.338530066815145, + "loss": 0.5098617076873779, + "loss_ce": 0.00034024479100480676, + "loss_iou": 0.224609375, + "loss_num": 0.01226806640625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 134296184, + "step": 2397 + }, + { + "epoch": 5.34075723830735, + "grad_norm": 31.034711837768555, + "learning_rate": 1e-06, + "loss": 0.8775, + "num_input_tokens_seen": 134347520, + "step": 2398 + }, + { + "epoch": 5.34075723830735, + "loss": 0.730765163898468, + "loss_ce": 0.00029640039429068565, + "loss_iou": 0.322265625, + "loss_num": 0.01708984375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 134347520, + "step": 2398 + }, + { + "epoch": 5.342984409799555, + "grad_norm": 21.829448699951172, + "learning_rate": 1e-06, + "loss": 1.003, + "num_input_tokens_seen": 134402984, + "step": 2399 + }, + { + "epoch": 5.342984409799555, + "loss": 1.1158628463745117, + "loss_ce": 0.0006285187555477023, + "loss_iou": 0.462890625, + "loss_num": 0.0380859375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 134402984, + "step": 2399 + }, + { + "epoch": 5.3452115812917596, + "grad_norm": 21.801376342773438, + "learning_rate": 1e-06, + "loss": 0.8284, + "num_input_tokens_seen": 134457812, + "step": 2400 + }, + { + "epoch": 5.3452115812917596, + "loss": 0.8780108094215393, + "loss_ce": 0.00032530241878703237, + "loss_iou": 0.380859375, + "loss_num": 0.02294921875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 134457812, + "step": 2400 + }, + { + "epoch": 5.347438752783964, + "grad_norm": 33.11177444458008, + "learning_rate": 1e-06, + "loss": 0.8474, + "num_input_tokens_seen": 134512096, + "step": 2401 + }, + { + "epoch": 5.347438752783964, + "loss": 0.9323829412460327, + "loss_ce": 0.0002540295827202499, + "loss_iou": 0.41015625, + "loss_num": 0.0223388671875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 134512096, + "step": 2401 + }, + { + "epoch": 5.349665924276169, + "grad_norm": 19.453571319580078, + "learning_rate": 1e-06, + "loss": 0.7402, + "num_input_tokens_seen": 134565996, + "step": 2402 + }, + { + "epoch": 5.349665924276169, + "loss": 0.7348358631134033, + "loss_ce": 0.00021668968838639557, + "loss_iou": 0.2890625, + "loss_num": 0.031494140625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 134565996, + "step": 2402 + }, + { + "epoch": 5.351893095768374, + "grad_norm": 26.344636917114258, + "learning_rate": 1e-06, + "loss": 1.0379, + "num_input_tokens_seen": 134623536, + "step": 2403 + }, + { + "epoch": 5.351893095768374, + "loss": 0.9678047895431519, + "loss_ce": 0.0002755335299298167, + "loss_iou": 0.416015625, + "loss_num": 0.026611328125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 134623536, + "step": 2403 + }, + { + "epoch": 5.354120267260579, + "grad_norm": 24.26504898071289, + "learning_rate": 1e-06, + "loss": 0.7085, + "num_input_tokens_seen": 134676664, + "step": 2404 + }, + { + "epoch": 5.354120267260579, + "loss": 0.7473124265670776, + "loss_ce": 0.00024206144735217094, + "loss_iou": 0.283203125, + "loss_num": 0.0361328125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 134676664, + "step": 2404 + }, + { + "epoch": 5.356347438752784, + "grad_norm": 37.98905563354492, + "learning_rate": 1e-06, + "loss": 0.6818, + "num_input_tokens_seen": 134734292, + "step": 2405 + }, + { + "epoch": 5.356347438752784, + "loss": 0.8488801717758179, + "loss_ce": 0.00024738311185501516, + "loss_iou": 0.34765625, + "loss_num": 0.0302734375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 134734292, + "step": 2405 + }, + { + "epoch": 5.358574610244989, + "grad_norm": 110.85530090332031, + "learning_rate": 1e-06, + "loss": 0.5897, + "num_input_tokens_seen": 134791600, + "step": 2406 + }, + { + "epoch": 5.358574610244989, + "loss": 0.5577570796012878, + "loss_ce": 0.0002619822043925524, + "loss_iou": 0.25390625, + "loss_num": 0.00970458984375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 134791600, + "step": 2406 + }, + { + "epoch": 5.360801781737194, + "grad_norm": 25.807218551635742, + "learning_rate": 1e-06, + "loss": 0.8237, + "num_input_tokens_seen": 134847120, + "step": 2407 + }, + { + "epoch": 5.360801781737194, + "loss": 0.7333089709281921, + "loss_ce": 0.0002767038531601429, + "loss_iou": 0.29296875, + "loss_num": 0.0294189453125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 134847120, + "step": 2407 + }, + { + "epoch": 5.363028953229398, + "grad_norm": 23.73634147644043, + "learning_rate": 1e-06, + "loss": 0.7855, + "num_input_tokens_seen": 134901488, + "step": 2408 + }, + { + "epoch": 5.363028953229398, + "loss": 0.6962625980377197, + "loss_ce": 0.000339773076120764, + "loss_iou": 0.2890625, + "loss_num": 0.0234375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 134901488, + "step": 2408 + }, + { + "epoch": 5.365256124721603, + "grad_norm": 23.408971786499023, + "learning_rate": 1e-06, + "loss": 0.7951, + "num_input_tokens_seen": 134956808, + "step": 2409 + }, + { + "epoch": 5.365256124721603, + "loss": 0.8180850744247437, + "loss_ce": 0.00021401085541583598, + "loss_iou": 0.349609375, + "loss_num": 0.0240478515625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 134956808, + "step": 2409 + }, + { + "epoch": 5.367483296213808, + "grad_norm": 24.673824310302734, + "learning_rate": 1e-06, + "loss": 0.6417, + "num_input_tokens_seen": 135013228, + "step": 2410 + }, + { + "epoch": 5.367483296213808, + "loss": 0.3933447301387787, + "loss_ce": 0.0002172911772504449, + "loss_iou": 0.162109375, + "loss_num": 0.01373291015625, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 135013228, + "step": 2410 + }, + { + "epoch": 5.369710467706013, + "grad_norm": 19.17323875427246, + "learning_rate": 1e-06, + "loss": 0.6747, + "num_input_tokens_seen": 135070756, + "step": 2411 + }, + { + "epoch": 5.369710467706013, + "loss": 0.7497762441635132, + "loss_ce": 0.00026456580962985754, + "loss_iou": 0.326171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.75, + "num_input_tokens_seen": 135070756, + "step": 2411 + }, + { + "epoch": 5.371937639198218, + "grad_norm": 21.6716365814209, + "learning_rate": 1e-06, + "loss": 0.7877, + "num_input_tokens_seen": 135123704, + "step": 2412 + }, + { + "epoch": 5.371937639198218, + "loss": 0.9468029737472534, + "loss_ce": 0.0002697420713957399, + "loss_iou": 0.40234375, + "loss_num": 0.028076171875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 135123704, + "step": 2412 + }, + { + "epoch": 5.374164810690424, + "grad_norm": 28.3837833404541, + "learning_rate": 1e-06, + "loss": 0.8981, + "num_input_tokens_seen": 135177764, + "step": 2413 + }, + { + "epoch": 5.374164810690424, + "loss": 1.0344269275665283, + "loss_ce": 0.00024724419927224517, + "loss_iou": 0.421875, + "loss_num": 0.0380859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 135177764, + "step": 2413 + }, + { + "epoch": 5.3763919821826285, + "grad_norm": 38.46763229370117, + "learning_rate": 1e-06, + "loss": 0.7782, + "num_input_tokens_seen": 135233172, + "step": 2414 + }, + { + "epoch": 5.3763919821826285, + "loss": 0.8738818764686584, + "loss_ce": 0.00034671538742259145, + "loss_iou": 0.37890625, + "loss_num": 0.0228271484375, + "loss_xval": 0.875, + "num_input_tokens_seen": 135233172, + "step": 2414 + }, + { + "epoch": 5.378619153674833, + "grad_norm": 20.032617568969727, + "learning_rate": 1e-06, + "loss": 0.661, + "num_input_tokens_seen": 135287744, + "step": 2415 + }, + { + "epoch": 5.378619153674833, + "loss": 0.6660585403442383, + "loss_ce": 0.00040915622957982123, + "loss_iou": 0.2890625, + "loss_num": 0.017822265625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 135287744, + "step": 2415 + }, + { + "epoch": 5.380846325167038, + "grad_norm": 33.80507278442383, + "learning_rate": 1e-06, + "loss": 0.8263, + "num_input_tokens_seen": 135344300, + "step": 2416 + }, + { + "epoch": 5.380846325167038, + "loss": 0.8908694386482239, + "loss_ce": 0.0002443883859086782, + "loss_iou": 0.39453125, + "loss_num": 0.0206298828125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 135344300, + "step": 2416 + }, + { + "epoch": 5.383073496659243, + "grad_norm": 18.646997451782227, + "learning_rate": 1e-06, + "loss": 0.6062, + "num_input_tokens_seen": 135402736, + "step": 2417 + }, + { + "epoch": 5.383073496659243, + "loss": 0.6154400110244751, + "loss_ce": 0.0002056759112747386, + "loss_iou": 0.279296875, + "loss_num": 0.011474609375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 135402736, + "step": 2417 + }, + { + "epoch": 5.385300668151448, + "grad_norm": 27.358457565307617, + "learning_rate": 1e-06, + "loss": 0.7446, + "num_input_tokens_seen": 135457252, + "step": 2418 + }, + { + "epoch": 5.385300668151448, + "loss": 0.8736233711242676, + "loss_ce": 0.0003323230193927884, + "loss_iou": 0.38671875, + "loss_num": 0.0201416015625, + "loss_xval": 0.875, + "num_input_tokens_seen": 135457252, + "step": 2418 + }, + { + "epoch": 5.387527839643653, + "grad_norm": 37.07065200805664, + "learning_rate": 1e-06, + "loss": 0.9104, + "num_input_tokens_seen": 135514764, + "step": 2419 + }, + { + "epoch": 5.387527839643653, + "loss": 0.8482790589332581, + "loss_ce": 0.000378611555788666, + "loss_iou": 0.33984375, + "loss_num": 0.033447265625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 135514764, + "step": 2419 + }, + { + "epoch": 5.389755011135858, + "grad_norm": 15.80694580078125, + "learning_rate": 1e-06, + "loss": 0.7015, + "num_input_tokens_seen": 135571288, + "step": 2420 + }, + { + "epoch": 5.389755011135858, + "loss": 0.6910269856452942, + "loss_ce": 0.00023109573521651328, + "loss_iou": 0.306640625, + "loss_num": 0.015625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 135571288, + "step": 2420 + }, + { + "epoch": 5.3919821826280625, + "grad_norm": 16.644014358520508, + "learning_rate": 1e-06, + "loss": 0.5917, + "num_input_tokens_seen": 135627180, + "step": 2421 + }, + { + "epoch": 5.3919821826280625, + "loss": 0.6370264291763306, + "loss_ce": 0.00030768115539103746, + "loss_iou": 0.244140625, + "loss_num": 0.0299072265625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 135627180, + "step": 2421 + }, + { + "epoch": 5.394209354120267, + "grad_norm": 23.295394897460938, + "learning_rate": 1e-06, + "loss": 0.851, + "num_input_tokens_seen": 135681732, + "step": 2422 + }, + { + "epoch": 5.394209354120267, + "loss": 0.890271782875061, + "loss_ce": 0.0003792433417402208, + "loss_iou": 0.37109375, + "loss_num": 0.0296630859375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 135681732, + "step": 2422 + }, + { + "epoch": 5.396436525612472, + "grad_norm": 17.34937286376953, + "learning_rate": 1e-06, + "loss": 0.7305, + "num_input_tokens_seen": 135739488, + "step": 2423 + }, + { + "epoch": 5.396436525612472, + "loss": 0.7664504051208496, + "loss_ce": 0.00033708903356455266, + "loss_iou": 0.314453125, + "loss_num": 0.0272216796875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 135739488, + "step": 2423 + }, + { + "epoch": 5.398663697104677, + "grad_norm": 17.406497955322266, + "learning_rate": 1e-06, + "loss": 0.5762, + "num_input_tokens_seen": 135793620, + "step": 2424 + }, + { + "epoch": 5.398663697104677, + "loss": 0.5314889550209045, + "loss_ce": 0.00023894087644293904, + "loss_iou": 0.2158203125, + "loss_num": 0.019775390625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 135793620, + "step": 2424 + }, + { + "epoch": 5.400890868596882, + "grad_norm": 15.964725494384766, + "learning_rate": 1e-06, + "loss": 0.6212, + "num_input_tokens_seen": 135850120, + "step": 2425 + }, + { + "epoch": 5.400890868596882, + "loss": 0.5310900211334229, + "loss_ce": 0.0002062540443148464, + "loss_iou": 0.2275390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 135850120, + "step": 2425 + }, + { + "epoch": 5.403118040089087, + "grad_norm": 18.38140296936035, + "learning_rate": 1e-06, + "loss": 0.5365, + "num_input_tokens_seen": 135901984, + "step": 2426 + }, + { + "epoch": 5.403118040089087, + "loss": 0.48569074273109436, + "loss_ce": 0.0002171028172597289, + "loss_iou": 0.21484375, + "loss_num": 0.01123046875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 135901984, + "step": 2426 + }, + { + "epoch": 5.405345211581292, + "grad_norm": 17.759723663330078, + "learning_rate": 1e-06, + "loss": 0.6546, + "num_input_tokens_seen": 135956660, + "step": 2427 + }, + { + "epoch": 5.405345211581292, + "loss": 0.6573410034179688, + "loss_ce": 0.00035854033194482327, + "loss_iou": 0.28125, + "loss_num": 0.0186767578125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 135956660, + "step": 2427 + }, + { + "epoch": 5.4075723830734965, + "grad_norm": 18.03878402709961, + "learning_rate": 1e-06, + "loss": 0.5775, + "num_input_tokens_seen": 136013048, + "step": 2428 + }, + { + "epoch": 5.4075723830734965, + "loss": 0.592266857624054, + "loss_ce": 0.00022585978149436414, + "loss_iou": 0.2451171875, + "loss_num": 0.0201416015625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 136013048, + "step": 2428 + }, + { + "epoch": 5.409799554565701, + "grad_norm": 30.479446411132812, + "learning_rate": 1e-06, + "loss": 0.7551, + "num_input_tokens_seen": 136068760, + "step": 2429 + }, + { + "epoch": 5.409799554565701, + "loss": 1.0719916820526123, + "loss_ce": 0.00033644368522800505, + "loss_iou": 0.46875, + "loss_num": 0.0263671875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 136068760, + "step": 2429 + }, + { + "epoch": 5.412026726057906, + "grad_norm": 22.04293441772461, + "learning_rate": 1e-06, + "loss": 0.7282, + "num_input_tokens_seen": 136122268, + "step": 2430 + }, + { + "epoch": 5.412026726057906, + "loss": 0.7684208750724792, + "loss_ce": 0.00023237511049956083, + "loss_iou": 0.306640625, + "loss_num": 0.0306396484375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 136122268, + "step": 2430 + }, + { + "epoch": 5.414253897550111, + "grad_norm": 49.4771614074707, + "learning_rate": 1e-06, + "loss": 0.5288, + "num_input_tokens_seen": 136178720, + "step": 2431 + }, + { + "epoch": 5.414253897550111, + "loss": 0.4244188964366913, + "loss_ce": 0.00022456918668467551, + "loss_iou": 0.1728515625, + "loss_num": 0.01556396484375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 136178720, + "step": 2431 + }, + { + "epoch": 5.416481069042316, + "grad_norm": 31.579322814941406, + "learning_rate": 1e-06, + "loss": 0.5512, + "num_input_tokens_seen": 136237672, + "step": 2432 + }, + { + "epoch": 5.416481069042316, + "loss": 0.5803066492080688, + "loss_ce": 0.00022852106485515833, + "loss_iou": 0.2431640625, + "loss_num": 0.018798828125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 136237672, + "step": 2432 + }, + { + "epoch": 5.418708240534521, + "grad_norm": 30.84029197692871, + "learning_rate": 1e-06, + "loss": 0.574, + "num_input_tokens_seen": 136293760, + "step": 2433 + }, + { + "epoch": 5.418708240534521, + "loss": 0.6513895988464355, + "loss_ce": 0.00026654996327124536, + "loss_iou": 0.265625, + "loss_num": 0.02392578125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 136293760, + "step": 2433 + }, + { + "epoch": 5.420935412026726, + "grad_norm": 16.020021438598633, + "learning_rate": 1e-06, + "loss": 0.5865, + "num_input_tokens_seen": 136350844, + "step": 2434 + }, + { + "epoch": 5.420935412026726, + "loss": 0.5578700304031372, + "loss_ce": 0.0002528943296056241, + "loss_iou": 0.2353515625, + "loss_num": 0.017333984375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 136350844, + "step": 2434 + }, + { + "epoch": 5.4231625835189305, + "grad_norm": 22.808626174926758, + "learning_rate": 1e-06, + "loss": 0.9401, + "num_input_tokens_seen": 136404744, + "step": 2435 + }, + { + "epoch": 5.4231625835189305, + "loss": 0.8737362027168274, + "loss_ce": 0.00020110802142880857, + "loss_iou": 0.359375, + "loss_num": 0.03076171875, + "loss_xval": 0.875, + "num_input_tokens_seen": 136404744, + "step": 2435 + }, + { + "epoch": 5.425389755011135, + "grad_norm": 25.185213088989258, + "learning_rate": 1e-06, + "loss": 0.5613, + "num_input_tokens_seen": 136462516, + "step": 2436 + }, + { + "epoch": 5.425389755011135, + "loss": 0.5026748776435852, + "loss_ce": 0.00023346173111349344, + "loss_iou": 0.22265625, + "loss_num": 0.0113525390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 136462516, + "step": 2436 + }, + { + "epoch": 5.427616926503341, + "grad_norm": 16.586254119873047, + "learning_rate": 1e-06, + "loss": 0.7677, + "num_input_tokens_seen": 136520164, + "step": 2437 + }, + { + "epoch": 5.427616926503341, + "loss": 0.6672698259353638, + "loss_ce": 0.0002775907050818205, + "loss_iou": 0.275390625, + "loss_num": 0.0230712890625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 136520164, + "step": 2437 + }, + { + "epoch": 5.429844097995546, + "grad_norm": 15.319040298461914, + "learning_rate": 1e-06, + "loss": 0.5602, + "num_input_tokens_seen": 136579040, + "step": 2438 + }, + { + "epoch": 5.429844097995546, + "loss": 0.39814403653144836, + "loss_ce": 0.00019482253992464393, + "loss_iou": 0.16015625, + "loss_num": 0.0155029296875, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 136579040, + "step": 2438 + }, + { + "epoch": 5.432071269487751, + "grad_norm": 21.42112159729004, + "learning_rate": 1e-06, + "loss": 0.9151, + "num_input_tokens_seen": 136635020, + "step": 2439 + }, + { + "epoch": 5.432071269487751, + "loss": 0.9438084363937378, + "loss_ce": 0.0003270409069955349, + "loss_iou": 0.392578125, + "loss_num": 0.0322265625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 136635020, + "step": 2439 + }, + { + "epoch": 5.434298440979956, + "grad_norm": 19.967939376831055, + "learning_rate": 1e-06, + "loss": 0.9586, + "num_input_tokens_seen": 136693516, + "step": 2440 + }, + { + "epoch": 5.434298440979956, + "loss": 0.9183446168899536, + "loss_ce": 0.0003758435195777565, + "loss_iou": 0.390625, + "loss_num": 0.027587890625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 136693516, + "step": 2440 + }, + { + "epoch": 5.436525612472161, + "grad_norm": 21.792882919311523, + "learning_rate": 1e-06, + "loss": 0.7106, + "num_input_tokens_seen": 136750260, + "step": 2441 + }, + { + "epoch": 5.436525612472161, + "loss": 0.6200574636459351, + "loss_ce": 0.00018441499560140073, + "loss_iou": 0.2578125, + "loss_num": 0.0206298828125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 136750260, + "step": 2441 + }, + { + "epoch": 5.4387527839643655, + "grad_norm": 18.052919387817383, + "learning_rate": 1e-06, + "loss": 0.5861, + "num_input_tokens_seen": 136804772, + "step": 2442 + }, + { + "epoch": 5.4387527839643655, + "loss": 0.48152902722358704, + "loss_ce": 0.000449933490017429, + "loss_iou": 0.2021484375, + "loss_num": 0.015625, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 136804772, + "step": 2442 + }, + { + "epoch": 5.44097995545657, + "grad_norm": 20.881345748901367, + "learning_rate": 1e-06, + "loss": 0.8096, + "num_input_tokens_seen": 136861740, + "step": 2443 + }, + { + "epoch": 5.44097995545657, + "loss": 0.6257365942001343, + "loss_ce": 0.0002483331772964448, + "loss_iou": 0.248046875, + "loss_num": 0.02587890625, + "loss_xval": 0.625, + "num_input_tokens_seen": 136861740, + "step": 2443 + }, + { + "epoch": 5.443207126948775, + "grad_norm": 17.746784210205078, + "learning_rate": 1e-06, + "loss": 0.7268, + "num_input_tokens_seen": 136920452, + "step": 2444 + }, + { + "epoch": 5.443207126948775, + "loss": 0.8379260897636414, + "loss_ce": 0.00027960725128650665, + "loss_iou": 0.349609375, + "loss_num": 0.02783203125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 136920452, + "step": 2444 + }, + { + "epoch": 5.44543429844098, + "grad_norm": 17.058738708496094, + "learning_rate": 1e-06, + "loss": 0.8293, + "num_input_tokens_seen": 136980184, + "step": 2445 + }, + { + "epoch": 5.44543429844098, + "loss": 0.8342658281326294, + "loss_ce": 0.00028141395887359977, + "loss_iou": 0.357421875, + "loss_num": 0.024169921875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 136980184, + "step": 2445 + }, + { + "epoch": 5.447661469933185, + "grad_norm": 14.320577621459961, + "learning_rate": 1e-06, + "loss": 0.6234, + "num_input_tokens_seen": 137037052, + "step": 2446 + }, + { + "epoch": 5.447661469933185, + "loss": 0.6837434768676758, + "loss_ce": 0.00027177410083822906, + "loss_iou": 0.2734375, + "loss_num": 0.027587890625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 137037052, + "step": 2446 + }, + { + "epoch": 5.44988864142539, + "grad_norm": 19.701732635498047, + "learning_rate": 1e-06, + "loss": 0.6273, + "num_input_tokens_seen": 137094316, + "step": 2447 + }, + { + "epoch": 5.44988864142539, + "loss": 0.5671048164367676, + "loss_ce": 0.00021026638569310308, + "loss_iou": 0.2578125, + "loss_num": 0.01031494140625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 137094316, + "step": 2447 + }, + { + "epoch": 5.452115812917595, + "grad_norm": 47.633155822753906, + "learning_rate": 1e-06, + "loss": 0.7027, + "num_input_tokens_seen": 137152472, + "step": 2448 + }, + { + "epoch": 5.452115812917595, + "loss": 0.7645977735519409, + "loss_ce": 0.00019351180526427925, + "loss_iou": 0.33984375, + "loss_num": 0.0172119140625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 137152472, + "step": 2448 + }, + { + "epoch": 5.4543429844097995, + "grad_norm": 18.31052017211914, + "learning_rate": 1e-06, + "loss": 0.8306, + "num_input_tokens_seen": 137208136, + "step": 2449 + }, + { + "epoch": 5.4543429844097995, + "loss": 0.5993661284446716, + "loss_ce": 0.0002450407773721963, + "loss_iou": 0.263671875, + "loss_num": 0.01422119140625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 137208136, + "step": 2449 + }, + { + "epoch": 5.456570155902004, + "grad_norm": 19.636415481567383, + "learning_rate": 1e-06, + "loss": 0.9006, + "num_input_tokens_seen": 137262276, + "step": 2450 + }, + { + "epoch": 5.456570155902004, + "loss": 0.8564736843109131, + "loss_ce": 0.0002724617370404303, + "loss_iou": 0.3671875, + "loss_num": 0.0242919921875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 137262276, + "step": 2450 + }, + { + "epoch": 5.458797327394209, + "grad_norm": 21.784557342529297, + "learning_rate": 1e-06, + "loss": 0.6682, + "num_input_tokens_seen": 137320112, + "step": 2451 + }, + { + "epoch": 5.458797327394209, + "loss": 0.6689052581787109, + "loss_ce": 0.0002041200641542673, + "loss_iou": 0.287109375, + "loss_num": 0.0191650390625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 137320112, + "step": 2451 + }, + { + "epoch": 5.461024498886414, + "grad_norm": 19.112836837768555, + "learning_rate": 1e-06, + "loss": 0.7381, + "num_input_tokens_seen": 137375784, + "step": 2452 + }, + { + "epoch": 5.461024498886414, + "loss": 0.6511021852493286, + "loss_ce": 0.00022325036115944386, + "loss_iou": 0.259765625, + "loss_num": 0.0267333984375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 137375784, + "step": 2452 + }, + { + "epoch": 5.463251670378619, + "grad_norm": 25.689016342163086, + "learning_rate": 1e-06, + "loss": 0.7655, + "num_input_tokens_seen": 137431796, + "step": 2453 + }, + { + "epoch": 5.463251670378619, + "loss": 0.9367631673812866, + "loss_ce": 0.0006059646257199347, + "loss_iou": 0.376953125, + "loss_num": 0.03662109375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 137431796, + "step": 2453 + }, + { + "epoch": 5.465478841870824, + "grad_norm": 24.619190216064453, + "learning_rate": 1e-06, + "loss": 0.8185, + "num_input_tokens_seen": 137487700, + "step": 2454 + }, + { + "epoch": 5.465478841870824, + "loss": 0.683560311794281, + "loss_ce": 0.00021070845832582563, + "loss_iou": 0.27734375, + "loss_num": 0.02587890625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 137487700, + "step": 2454 + }, + { + "epoch": 5.467706013363029, + "grad_norm": 19.307693481445312, + "learning_rate": 1e-06, + "loss": 0.6936, + "num_input_tokens_seen": 137544564, + "step": 2455 + }, + { + "epoch": 5.467706013363029, + "loss": 0.6617897748947144, + "loss_ce": 0.0002907552698161453, + "loss_iou": 0.27734375, + "loss_num": 0.021484375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 137544564, + "step": 2455 + }, + { + "epoch": 5.4699331848552335, + "grad_norm": 20.65030860900879, + "learning_rate": 1e-06, + "loss": 0.8517, + "num_input_tokens_seen": 137602024, + "step": 2456 + }, + { + "epoch": 5.4699331848552335, + "loss": 0.7545545101165771, + "loss_ce": 0.0004040825879201293, + "loss_iou": 0.29296875, + "loss_num": 0.033447265625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 137602024, + "step": 2456 + }, + { + "epoch": 5.472160356347438, + "grad_norm": 19.01409339904785, + "learning_rate": 1e-06, + "loss": 0.593, + "num_input_tokens_seen": 137657656, + "step": 2457 + }, + { + "epoch": 5.472160356347438, + "loss": 0.577867329120636, + "loss_ce": 0.00023062352556735277, + "loss_iou": 0.2431640625, + "loss_num": 0.018310546875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 137657656, + "step": 2457 + }, + { + "epoch": 5.474387527839644, + "grad_norm": 25.600095748901367, + "learning_rate": 1e-06, + "loss": 0.9298, + "num_input_tokens_seen": 137713816, + "step": 2458 + }, + { + "epoch": 5.474387527839644, + "loss": 0.7569461464881897, + "loss_ce": 0.00023227237397804856, + "loss_iou": 0.34375, + "loss_num": 0.01361083984375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 137713816, + "step": 2458 + }, + { + "epoch": 5.476614699331849, + "grad_norm": 16.993986129760742, + "learning_rate": 1e-06, + "loss": 0.695, + "num_input_tokens_seen": 137771028, + "step": 2459 + }, + { + "epoch": 5.476614699331849, + "loss": 0.7477880120277405, + "loss_ce": 0.00022941670613363385, + "loss_iou": 0.302734375, + "loss_num": 0.0279541015625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 137771028, + "step": 2459 + }, + { + "epoch": 5.478841870824054, + "grad_norm": 25.62329864501953, + "learning_rate": 1e-06, + "loss": 0.755, + "num_input_tokens_seen": 137826816, + "step": 2460 + }, + { + "epoch": 5.478841870824054, + "loss": 0.8492061495780945, + "loss_ce": 0.0003292199980933219, + "loss_iou": 0.35546875, + "loss_num": 0.02734375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 137826816, + "step": 2460 + }, + { + "epoch": 5.481069042316259, + "grad_norm": 18.305076599121094, + "learning_rate": 1e-06, + "loss": 0.6416, + "num_input_tokens_seen": 137879896, + "step": 2461 + }, + { + "epoch": 5.481069042316259, + "loss": 0.6674618721008301, + "loss_ce": 0.00022550678113475442, + "loss_iou": 0.30078125, + "loss_num": 0.01275634765625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 137879896, + "step": 2461 + }, + { + "epoch": 5.4832962138084635, + "grad_norm": 14.69758129119873, + "learning_rate": 1e-06, + "loss": 0.5432, + "num_input_tokens_seen": 137937088, + "step": 2462 + }, + { + "epoch": 5.4832962138084635, + "loss": 0.4212685227394104, + "loss_ce": 0.0002480166731402278, + "loss_iou": 0.1875, + "loss_num": 0.009033203125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 137937088, + "step": 2462 + }, + { + "epoch": 5.485523385300668, + "grad_norm": 15.696910858154297, + "learning_rate": 1e-06, + "loss": 0.8152, + "num_input_tokens_seen": 137994708, + "step": 2463 + }, + { + "epoch": 5.485523385300668, + "loss": 0.7479099035263062, + "loss_ce": 0.00022926190285943449, + "loss_iou": 0.328125, + "loss_num": 0.0185546875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 137994708, + "step": 2463 + }, + { + "epoch": 5.487750556792873, + "grad_norm": 14.623006820678711, + "learning_rate": 1e-06, + "loss": 0.5012, + "num_input_tokens_seen": 138052660, + "step": 2464 + }, + { + "epoch": 5.487750556792873, + "loss": 0.48292332887649536, + "loss_ce": 0.00025733254733495414, + "loss_iou": 0.193359375, + "loss_num": 0.019287109375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 138052660, + "step": 2464 + }, + { + "epoch": 5.489977728285078, + "grad_norm": 60.842769622802734, + "learning_rate": 1e-06, + "loss": 0.7927, + "num_input_tokens_seen": 138108180, + "step": 2465 + }, + { + "epoch": 5.489977728285078, + "loss": 0.8183541297912598, + "loss_ce": 0.00023886302369646728, + "loss_iou": 0.3203125, + "loss_num": 0.035888671875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 138108180, + "step": 2465 + }, + { + "epoch": 5.492204899777283, + "grad_norm": 13.957056045532227, + "learning_rate": 1e-06, + "loss": 0.5878, + "num_input_tokens_seen": 138166092, + "step": 2466 + }, + { + "epoch": 5.492204899777283, + "loss": 0.6444277763366699, + "loss_ce": 0.0003848428023047745, + "loss_iou": 0.2734375, + "loss_num": 0.01953125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 138166092, + "step": 2466 + }, + { + "epoch": 5.494432071269488, + "grad_norm": 21.401012420654297, + "learning_rate": 1e-06, + "loss": 0.7396, + "num_input_tokens_seen": 138223400, + "step": 2467 + }, + { + "epoch": 5.494432071269488, + "loss": 0.7348067760467529, + "loss_ce": 0.0003097508451901376, + "loss_iou": 0.3046875, + "loss_num": 0.0247802734375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 138223400, + "step": 2467 + }, + { + "epoch": 5.496659242761693, + "grad_norm": 31.345884323120117, + "learning_rate": 1e-06, + "loss": 0.6474, + "num_input_tokens_seen": 138281468, + "step": 2468 + }, + { + "epoch": 5.496659242761693, + "loss": 0.5778509378433228, + "loss_ce": 0.00021416423260234296, + "loss_iou": 0.255859375, + "loss_num": 0.0133056640625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 138281468, + "step": 2468 + }, + { + "epoch": 5.498886414253898, + "grad_norm": 14.698542594909668, + "learning_rate": 1e-06, + "loss": 0.5857, + "num_input_tokens_seen": 138337216, + "step": 2469 + }, + { + "epoch": 5.498886414253898, + "loss": 0.5800629258155823, + "loss_ce": 0.00022892668494023383, + "loss_iou": 0.2412109375, + "loss_num": 0.01953125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 138337216, + "step": 2469 + }, + { + "epoch": 5.501113585746102, + "grad_norm": 18.81879425048828, + "learning_rate": 1e-06, + "loss": 0.5896, + "num_input_tokens_seen": 138395452, + "step": 2470 + }, + { + "epoch": 5.501113585746102, + "loss": 0.6011573672294617, + "loss_ce": 0.00020525051513686776, + "loss_iou": 0.26171875, + "loss_num": 0.015625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 138395452, + "step": 2470 + }, + { + "epoch": 5.503340757238307, + "grad_norm": 14.421637535095215, + "learning_rate": 1e-06, + "loss": 0.5715, + "num_input_tokens_seen": 138453528, + "step": 2471 + }, + { + "epoch": 5.503340757238307, + "loss": 0.5524652600288391, + "loss_ce": 0.00021915776596870273, + "loss_iou": 0.2216796875, + "loss_num": 0.02197265625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 138453528, + "step": 2471 + }, + { + "epoch": 5.505567928730512, + "grad_norm": 32.5775146484375, + "learning_rate": 1e-06, + "loss": 0.5319, + "num_input_tokens_seen": 138508352, + "step": 2472 + }, + { + "epoch": 5.505567928730512, + "loss": 0.5072603225708008, + "loss_ce": 0.00018024971359409392, + "loss_iou": 0.224609375, + "loss_num": 0.01141357421875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 138508352, + "step": 2472 + }, + { + "epoch": 5.507795100222717, + "grad_norm": 14.467394828796387, + "learning_rate": 1e-06, + "loss": 0.9051, + "num_input_tokens_seen": 138562664, + "step": 2473 + }, + { + "epoch": 5.507795100222717, + "loss": 1.1971768140792847, + "loss_ce": 0.0003995007718913257, + "loss_iou": 0.474609375, + "loss_num": 0.04931640625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 138562664, + "step": 2473 + }, + { + "epoch": 5.510022271714922, + "grad_norm": 15.590438842773438, + "learning_rate": 1e-06, + "loss": 0.7975, + "num_input_tokens_seen": 138614632, + "step": 2474 + }, + { + "epoch": 5.510022271714922, + "loss": 0.7288182973861694, + "loss_ce": 0.00030269764829427004, + "loss_iou": 0.32421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 138614632, + "step": 2474 + }, + { + "epoch": 5.512249443207127, + "grad_norm": 16.527565002441406, + "learning_rate": 1e-06, + "loss": 0.6321, + "num_input_tokens_seen": 138672144, + "step": 2475 + }, + { + "epoch": 5.512249443207127, + "loss": 0.5714784860610962, + "loss_ce": 0.00018944795010611415, + "loss_iou": 0.2255859375, + "loss_num": 0.02392578125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 138672144, + "step": 2475 + }, + { + "epoch": 5.514476614699332, + "grad_norm": 16.16336441040039, + "learning_rate": 1e-06, + "loss": 0.6376, + "num_input_tokens_seen": 138727908, + "step": 2476 + }, + { + "epoch": 5.514476614699332, + "loss": 0.5442032814025879, + "loss_ce": 0.0002579318534117192, + "loss_iou": 0.2421875, + "loss_num": 0.01177978515625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 138727908, + "step": 2476 + }, + { + "epoch": 5.5167037861915365, + "grad_norm": 19.183643341064453, + "learning_rate": 1e-06, + "loss": 0.6079, + "num_input_tokens_seen": 138783588, + "step": 2477 + }, + { + "epoch": 5.5167037861915365, + "loss": 0.5135550498962402, + "loss_ce": 0.00037142602377571166, + "loss_iou": 0.1826171875, + "loss_num": 0.029541015625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 138783588, + "step": 2477 + }, + { + "epoch": 5.518930957683741, + "grad_norm": 34.50285339355469, + "learning_rate": 1e-06, + "loss": 0.588, + "num_input_tokens_seen": 138836984, + "step": 2478 + }, + { + "epoch": 5.518930957683741, + "loss": 0.5181179046630859, + "loss_ce": 0.00029565999284386635, + "loss_iou": 0.2001953125, + "loss_num": 0.0233154296875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 138836984, + "step": 2478 + }, + { + "epoch": 5.521158129175946, + "grad_norm": 16.072481155395508, + "learning_rate": 1e-06, + "loss": 0.5809, + "num_input_tokens_seen": 138895444, + "step": 2479 + }, + { + "epoch": 5.521158129175946, + "loss": 0.4480636417865753, + "loss_ce": 0.00018766321591101587, + "loss_iou": 0.1806640625, + "loss_num": 0.0174560546875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 138895444, + "step": 2479 + }, + { + "epoch": 5.523385300668151, + "grad_norm": 27.131000518798828, + "learning_rate": 1e-06, + "loss": 0.8815, + "num_input_tokens_seen": 138948860, + "step": 2480 + }, + { + "epoch": 5.523385300668151, + "loss": 0.9381879568099976, + "loss_ce": 0.00019970518769696355, + "loss_iou": 0.390625, + "loss_num": 0.03173828125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 138948860, + "step": 2480 + }, + { + "epoch": 5.525612472160356, + "grad_norm": 21.947586059570312, + "learning_rate": 1e-06, + "loss": 0.6556, + "num_input_tokens_seen": 139003808, + "step": 2481 + }, + { + "epoch": 5.525612472160356, + "loss": 0.6769753694534302, + "loss_ce": 0.0004616759833879769, + "loss_iou": 0.287109375, + "loss_num": 0.020263671875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 139003808, + "step": 2481 + }, + { + "epoch": 5.527839643652561, + "grad_norm": 18.34388542175293, + "learning_rate": 1e-06, + "loss": 1.0344, + "num_input_tokens_seen": 139060604, + "step": 2482 + }, + { + "epoch": 5.527839643652561, + "loss": 1.1095937490463257, + "loss_ce": 0.00021878261759411544, + "loss_iou": 0.4296875, + "loss_num": 0.0498046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 139060604, + "step": 2482 + }, + { + "epoch": 5.5300668151447665, + "grad_norm": 28.046768188476562, + "learning_rate": 1e-06, + "loss": 0.9061, + "num_input_tokens_seen": 139115064, + "step": 2483 + }, + { + "epoch": 5.5300668151447665, + "loss": 0.8196831941604614, + "loss_ce": 0.00022515948512591422, + "loss_iou": 0.353515625, + "loss_num": 0.0225830078125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 139115064, + "step": 2483 + }, + { + "epoch": 5.532293986636971, + "grad_norm": 38.94261169433594, + "learning_rate": 1e-06, + "loss": 0.5469, + "num_input_tokens_seen": 139171664, + "step": 2484 + }, + { + "epoch": 5.532293986636971, + "loss": 0.418997585773468, + "loss_ce": 0.0002963862498290837, + "loss_iou": 0.1787109375, + "loss_num": 0.01220703125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 139171664, + "step": 2484 + }, + { + "epoch": 5.534521158129176, + "grad_norm": 21.334020614624023, + "learning_rate": 1e-06, + "loss": 0.7203, + "num_input_tokens_seen": 139228464, + "step": 2485 + }, + { + "epoch": 5.534521158129176, + "loss": 0.8227799534797668, + "loss_ce": 0.0002701581106521189, + "loss_iou": 0.349609375, + "loss_num": 0.0244140625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 139228464, + "step": 2485 + }, + { + "epoch": 5.536748329621381, + "grad_norm": 30.529455184936523, + "learning_rate": 1e-06, + "loss": 0.6742, + "num_input_tokens_seen": 139281716, + "step": 2486 + }, + { + "epoch": 5.536748329621381, + "loss": 0.769308865070343, + "loss_ce": 0.00026593299116939306, + "loss_iou": 0.353515625, + "loss_num": 0.01190185546875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 139281716, + "step": 2486 + }, + { + "epoch": 5.538975501113586, + "grad_norm": 15.466273307800293, + "learning_rate": 1e-06, + "loss": 0.5857, + "num_input_tokens_seen": 139339380, + "step": 2487 + }, + { + "epoch": 5.538975501113586, + "loss": 0.6729258298873901, + "loss_ce": 0.00031836878042668104, + "loss_iou": 0.28515625, + "loss_num": 0.0206298828125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 139339380, + "step": 2487 + }, + { + "epoch": 5.541202672605791, + "grad_norm": 21.989582061767578, + "learning_rate": 1e-06, + "loss": 0.6376, + "num_input_tokens_seen": 139398944, + "step": 2488 + }, + { + "epoch": 5.541202672605791, + "loss": 0.7792110443115234, + "loss_ce": 0.0004024332156404853, + "loss_iou": 0.31640625, + "loss_num": 0.029541015625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 139398944, + "step": 2488 + }, + { + "epoch": 5.543429844097996, + "grad_norm": 35.42259979248047, + "learning_rate": 1e-06, + "loss": 0.8917, + "num_input_tokens_seen": 139454540, + "step": 2489 + }, + { + "epoch": 5.543429844097996, + "loss": 1.0833284854888916, + "loss_ce": 0.00032068698783405125, + "loss_iou": 0.44140625, + "loss_num": 0.0400390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 139454540, + "step": 2489 + }, + { + "epoch": 5.5456570155902005, + "grad_norm": 20.069564819335938, + "learning_rate": 1e-06, + "loss": 0.7142, + "num_input_tokens_seen": 139510416, + "step": 2490 + }, + { + "epoch": 5.5456570155902005, + "loss": 0.6588754653930664, + "loss_ce": 0.0005502753192558885, + "loss_iou": 0.28515625, + "loss_num": 0.017822265625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 139510416, + "step": 2490 + }, + { + "epoch": 5.547884187082405, + "grad_norm": 28.32633399963379, + "learning_rate": 1e-06, + "loss": 0.5564, + "num_input_tokens_seen": 139567320, + "step": 2491 + }, + { + "epoch": 5.547884187082405, + "loss": 0.4439275860786438, + "loss_ce": 0.00026305546634830534, + "loss_iou": 0.2021484375, + "loss_num": 0.00799560546875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 139567320, + "step": 2491 + }, + { + "epoch": 5.55011135857461, + "grad_norm": 29.56279182434082, + "learning_rate": 1e-06, + "loss": 0.6047, + "num_input_tokens_seen": 139625440, + "step": 2492 + }, + { + "epoch": 5.55011135857461, + "loss": 0.5217064619064331, + "loss_ce": 0.00022206196445040405, + "loss_iou": 0.2431640625, + "loss_num": 0.006988525390625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 139625440, + "step": 2492 + }, + { + "epoch": 5.552338530066815, + "grad_norm": 46.10884475708008, + "learning_rate": 1e-06, + "loss": 0.5618, + "num_input_tokens_seen": 139682200, + "step": 2493 + }, + { + "epoch": 5.552338530066815, + "loss": 0.41375732421875, + "loss_ce": 0.00018312688916921616, + "loss_iou": 0.1875, + "loss_num": 0.00775146484375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 139682200, + "step": 2493 + }, + { + "epoch": 5.55456570155902, + "grad_norm": 18.429887771606445, + "learning_rate": 1e-06, + "loss": 0.5575, + "num_input_tokens_seen": 139738800, + "step": 2494 + }, + { + "epoch": 5.55456570155902, + "loss": 0.6128218770027161, + "loss_ce": 0.00027302149101160467, + "loss_iou": 0.28515625, + "loss_num": 0.008544921875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 139738800, + "step": 2494 + }, + { + "epoch": 5.556792873051225, + "grad_norm": 24.69832420349121, + "learning_rate": 1e-06, + "loss": 0.7079, + "num_input_tokens_seen": 139796508, + "step": 2495 + }, + { + "epoch": 5.556792873051225, + "loss": 0.8797301054000854, + "loss_ce": 0.00027458026306703687, + "loss_iou": 0.3515625, + "loss_num": 0.03466796875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 139796508, + "step": 2495 + }, + { + "epoch": 5.55902004454343, + "grad_norm": 20.031938552856445, + "learning_rate": 1e-06, + "loss": 0.8567, + "num_input_tokens_seen": 139852360, + "step": 2496 + }, + { + "epoch": 5.55902004454343, + "loss": 0.9052836894989014, + "loss_ce": 0.000254325830610469, + "loss_iou": 0.412109375, + "loss_num": 0.0166015625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 139852360, + "step": 2496 + }, + { + "epoch": 5.5612472160356345, + "grad_norm": 24.965023040771484, + "learning_rate": 1e-06, + "loss": 0.7616, + "num_input_tokens_seen": 139907408, + "step": 2497 + }, + { + "epoch": 5.5612472160356345, + "loss": 0.6985030770301819, + "loss_ce": 0.0002608746290206909, + "loss_iou": 0.287109375, + "loss_num": 0.0250244140625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 139907408, + "step": 2497 + }, + { + "epoch": 5.563474387527839, + "grad_norm": 22.246639251708984, + "learning_rate": 1e-06, + "loss": 0.9545, + "num_input_tokens_seen": 139963936, + "step": 2498 + }, + { + "epoch": 5.563474387527839, + "loss": 0.8662214875221252, + "loss_ce": 0.0002546662581153214, + "loss_iou": 0.35546875, + "loss_num": 0.031005859375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 139963936, + "step": 2498 + }, + { + "epoch": 5.565701559020044, + "grad_norm": 16.71168327331543, + "learning_rate": 1e-06, + "loss": 0.5694, + "num_input_tokens_seen": 140018732, + "step": 2499 + }, + { + "epoch": 5.565701559020044, + "loss": 0.6181437373161316, + "loss_ce": 0.00022381696908269078, + "loss_iou": 0.263671875, + "loss_num": 0.0177001953125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 140018732, + "step": 2499 + }, + { + "epoch": 5.567928730512249, + "grad_norm": 17.26616096496582, + "learning_rate": 1e-06, + "loss": 0.6123, + "num_input_tokens_seen": 140075980, + "step": 2500 + }, + { + "epoch": 5.567928730512249, + "eval_seeclick_web_CIoU": 0.5732596218585968, + "eval_seeclick_web_GIoU": 0.5691904425621033, + "eval_seeclick_web_IoU": 0.5895318686962128, + "eval_seeclick_web_MAE_all": 0.016760945785790682, + "eval_seeclick_web_MAE_h": 0.009123492753133178, + "eval_seeclick_web_MAE_w": 0.01760054472833872, + "eval_seeclick_web_MAE_x_boxes": 0.008502837270498276, + "eval_seeclick_web_MAE_y_boxes": 0.02224468719214201, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9365757703781128, + "eval_seeclick_web_loss_ce": 0.00031520209449809045, + "eval_seeclick_web_loss_iou": 0.4267578125, + "eval_seeclick_web_loss_num": 0.013166427612304688, + "eval_seeclick_web_loss_xval": 0.918701171875, + "eval_seeclick_web_runtime": 28.1194, + "eval_seeclick_web_samples_per_second": 1.778, + "eval_seeclick_web_steps_per_second": 0.071, + "num_input_tokens_seen": 140075980, + "step": 2500 + }, + { + "epoch": 5.567928730512249, + "eval_icons_CIoU": 0.32149502635002136, + "eval_icons_GIoU": 0.33890318870544434, + "eval_icons_IoU": 0.3914993554353714, + "eval_icons_MAE_all": 0.05996252968907356, + "eval_icons_MAE_h": 0.03914587013423443, + "eval_icons_MAE_w": 0.05691290646791458, + "eval_icons_MAE_x_boxes": 0.05956794694066048, + "eval_icons_MAE_y_boxes": 0.037835730239748955, + "eval_icons_inside_bbox": 0.6493055522441864, + "eval_icons_loss": 1.6447449922561646, + "eval_icons_loss_ce": 0.0003619373310357332, + "eval_icons_loss_iou": 0.6527099609375, + "eval_icons_loss_num": 0.05636787414550781, + "eval_icons_loss_xval": 1.58837890625, + "eval_icons_runtime": 25.1446, + "eval_icons_samples_per_second": 1.988, + "eval_icons_steps_per_second": 0.08, + "num_input_tokens_seen": 140075980, + "step": 2500 + }, + { + "epoch": 5.567928730512249, + "eval_screenspot_CIoU": 0.33252301812171936, + "eval_screenspot_GIoU": 0.3458707630634308, + "eval_screenspot_IoU": 0.4156401753425598, + "eval_screenspot_MAE_all": 0.066184946646293, + "eval_screenspot_MAE_h": 0.03784913197159767, + "eval_screenspot_MAE_w": 0.08022025724252065, + "eval_screenspot_MAE_x_boxes": 0.08052412172158559, + "eval_screenspot_MAE_y_boxes": 0.0472174392392238, + "eval_screenspot_inside_bbox": 0.6462500095367432, + "eval_screenspot_loss": 1.7031261920928955, + "eval_screenspot_loss_ce": 0.0003634931442017357, + "eval_screenspot_loss_iou": 0.6932779947916666, + "eval_screenspot_loss_num": 0.07747904459635417, + "eval_screenspot_loss_xval": 1.7745768229166667, + "eval_screenspot_runtime": 39.8835, + "eval_screenspot_samples_per_second": 2.231, + "eval_screenspot_steps_per_second": 0.075, + "num_input_tokens_seen": 140075980, + "step": 2500 + }, + { + "epoch": 5.567928730512249, + "eval_compot_CIoU": 0.36012406647205353, + "eval_compot_GIoU": 0.3780812919139862, + "eval_compot_IoU": 0.41496771574020386, + "eval_compot_MAE_all": 0.017852995079010725, + "eval_compot_MAE_h": 0.007951512467116117, + "eval_compot_MAE_w": 0.02203182876110077, + "eval_compot_MAE_x_boxes": 0.02926653064787388, + "eval_compot_MAE_y_boxes": 0.006283238530158997, + "eval_compot_inside_bbox": 0.6458333432674408, + "eval_compot_loss": 1.3696753978729248, + "eval_compot_loss_ce": 0.00030196372244972736, + "eval_compot_loss_iou": 0.6297607421875, + "eval_compot_loss_num": 0.016778945922851562, + "eval_compot_loss_xval": 1.343505859375, + "eval_compot_runtime": 24.2244, + "eval_compot_samples_per_second": 2.064, + "eval_compot_steps_per_second": 0.083, + "num_input_tokens_seen": 140075980, + "step": 2500 + }, + { + "epoch": 5.567928730512249, + "eval_custom_ui_val_CIoU": 0.4640722307893965, + "eval_custom_ui_val_GIoU": 0.48160041703118217, + "eval_custom_ui_val_IoU": 0.518558962477578, + "eval_custom_ui_val_MAE_all": 0.03118372191157606, + "eval_custom_ui_val_MAE_h": 0.018310750601813197, + "eval_custom_ui_val_MAE_w": 0.036142679759197764, + "eval_custom_ui_val_MAE_x_boxes": 0.03573724896543556, + "eval_custom_ui_val_MAE_y_boxes": 0.01598744459139804, + "eval_custom_ui_val_inside_bbox": 0.738811731338501, + "eval_custom_ui_val_loss": 1.2015693187713623, + "eval_custom_ui_val_loss_ce": 0.0003550093032471422, + "eval_custom_ui_val_loss_iou": 0.5086941189236112, + "eval_custom_ui_val_loss_num": 0.029562632242838543, + "eval_custom_ui_val_loss_xval": 1.1652560763888888, + "eval_custom_ui_val_runtime": 74.2518, + "eval_custom_ui_val_samples_per_second": 3.569, + "eval_custom_ui_val_steps_per_second": 0.121, + "num_input_tokens_seen": 140075980, + "step": 2500 + }, + { + "epoch": 5.567928730512249, + "loss": 0.907296895980835, + "loss_ce": 0.0003145075461361557, + "loss_iou": 0.3984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 140075980, + "step": 2500 + }, + { + "epoch": 5.570155902004454, + "grad_norm": 96.08206939697266, + "learning_rate": 1e-06, + "loss": 0.6085, + "num_input_tokens_seen": 140130244, + "step": 2501 + }, + { + "epoch": 5.570155902004454, + "loss": 0.6808421611785889, + "loss_ce": 0.00023916579084470868, + "loss_iou": 0.28125, + "loss_num": 0.0234375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 140130244, + "step": 2501 + }, + { + "epoch": 5.57238307349666, + "grad_norm": 23.38459587097168, + "learning_rate": 1e-06, + "loss": 0.7066, + "num_input_tokens_seen": 140187016, + "step": 2502 + }, + { + "epoch": 5.57238307349666, + "loss": 0.6098702549934387, + "loss_ce": 0.0002510999620426446, + "loss_iou": 0.267578125, + "loss_num": 0.0147705078125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 140187016, + "step": 2502 + }, + { + "epoch": 5.574610244988865, + "grad_norm": 18.638948440551758, + "learning_rate": 1e-06, + "loss": 0.6391, + "num_input_tokens_seen": 140245660, + "step": 2503 + }, + { + "epoch": 5.574610244988865, + "loss": 0.7389647364616394, + "loss_ce": 0.0001952238380908966, + "loss_iou": 0.287109375, + "loss_num": 0.03271484375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 140245660, + "step": 2503 + }, + { + "epoch": 5.5768374164810695, + "grad_norm": 15.6986722946167, + "learning_rate": 1e-06, + "loss": 0.6478, + "num_input_tokens_seen": 140300240, + "step": 2504 + }, + { + "epoch": 5.5768374164810695, + "loss": 0.4837605357170105, + "loss_ce": 0.00024005114391911775, + "loss_iou": 0.2177734375, + "loss_num": 0.00958251953125, + "loss_xval": 0.484375, + "num_input_tokens_seen": 140300240, + "step": 2504 + }, + { + "epoch": 5.579064587973274, + "grad_norm": 15.869756698608398, + "learning_rate": 1e-06, + "loss": 0.6703, + "num_input_tokens_seen": 140357396, + "step": 2505 + }, + { + "epoch": 5.579064587973274, + "loss": 0.8984197378158569, + "loss_ce": 0.00022636953508481383, + "loss_iou": 0.365234375, + "loss_num": 0.03369140625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 140357396, + "step": 2505 + }, + { + "epoch": 5.581291759465479, + "grad_norm": 15.25345516204834, + "learning_rate": 1e-06, + "loss": 0.8536, + "num_input_tokens_seen": 140413868, + "step": 2506 + }, + { + "epoch": 5.581291759465479, + "loss": 0.951309084892273, + "loss_ce": 0.0006254723994061351, + "loss_iou": 0.41015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 140413868, + "step": 2506 + }, + { + "epoch": 5.583518930957684, + "grad_norm": 35.47723388671875, + "learning_rate": 1e-06, + "loss": 0.6912, + "num_input_tokens_seen": 140471300, + "step": 2507 + }, + { + "epoch": 5.583518930957684, + "loss": 0.6750940084457397, + "loss_ce": 0.0002893685596063733, + "loss_iou": 0.306640625, + "loss_num": 0.011962890625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 140471300, + "step": 2507 + }, + { + "epoch": 5.585746102449889, + "grad_norm": 45.554256439208984, + "learning_rate": 1e-06, + "loss": 0.6581, + "num_input_tokens_seen": 140528484, + "step": 2508 + }, + { + "epoch": 5.585746102449889, + "loss": 0.7441803216934204, + "loss_ce": 0.00028382547316141427, + "loss_iou": 0.326171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 140528484, + "step": 2508 + }, + { + "epoch": 5.587973273942094, + "grad_norm": 13.767509460449219, + "learning_rate": 1e-06, + "loss": 0.7014, + "num_input_tokens_seen": 140585164, + "step": 2509 + }, + { + "epoch": 5.587973273942094, + "loss": 0.6096572875976562, + "loss_ce": 0.00028234010096639395, + "loss_iou": 0.25390625, + "loss_num": 0.0201416015625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 140585164, + "step": 2509 + }, + { + "epoch": 5.590200445434299, + "grad_norm": 42.34712219238281, + "learning_rate": 1e-06, + "loss": 0.7912, + "num_input_tokens_seen": 140640568, + "step": 2510 + }, + { + "epoch": 5.590200445434299, + "loss": 0.9159772396087646, + "loss_ce": 0.00020570913329720497, + "loss_iou": 0.3984375, + "loss_num": 0.0234375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 140640568, + "step": 2510 + }, + { + "epoch": 5.5924276169265035, + "grad_norm": 12.886595726013184, + "learning_rate": 1e-06, + "loss": 0.8791, + "num_input_tokens_seen": 140697100, + "step": 2511 + }, + { + "epoch": 5.5924276169265035, + "loss": 0.7073045372962952, + "loss_ce": 0.0002733045257627964, + "loss_iou": 0.306640625, + "loss_num": 0.0189208984375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 140697100, + "step": 2511 + }, + { + "epoch": 5.594654788418708, + "grad_norm": 20.680938720703125, + "learning_rate": 1e-06, + "loss": 0.8405, + "num_input_tokens_seen": 140754100, + "step": 2512 + }, + { + "epoch": 5.594654788418708, + "loss": 0.854705274105072, + "loss_ce": 0.00021311425371095538, + "loss_iou": 0.341796875, + "loss_num": 0.034423828125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 140754100, + "step": 2512 + }, + { + "epoch": 5.596881959910913, + "grad_norm": 23.940448760986328, + "learning_rate": 1e-06, + "loss": 0.8587, + "num_input_tokens_seen": 140809880, + "step": 2513 + }, + { + "epoch": 5.596881959910913, + "loss": 0.8762653470039368, + "loss_ce": 0.0002887820010073483, + "loss_iou": 0.40625, + "loss_num": 0.012451171875, + "loss_xval": 0.875, + "num_input_tokens_seen": 140809880, + "step": 2513 + }, + { + "epoch": 5.599109131403118, + "grad_norm": 22.817508697509766, + "learning_rate": 1e-06, + "loss": 0.6931, + "num_input_tokens_seen": 140865016, + "step": 2514 + }, + { + "epoch": 5.599109131403118, + "loss": 0.8307744860649109, + "loss_ce": 0.00020807303371839225, + "loss_iou": 0.353515625, + "loss_num": 0.0250244140625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 140865016, + "step": 2514 + }, + { + "epoch": 5.601336302895323, + "grad_norm": 27.795007705688477, + "learning_rate": 1e-06, + "loss": 0.6248, + "num_input_tokens_seen": 140921912, + "step": 2515 + }, + { + "epoch": 5.601336302895323, + "loss": 0.6642851829528809, + "loss_ce": 0.00022267791791819036, + "loss_iou": 0.294921875, + "loss_num": 0.014892578125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 140921912, + "step": 2515 + }, + { + "epoch": 5.603563474387528, + "grad_norm": 17.664352416992188, + "learning_rate": 1e-06, + "loss": 0.5839, + "num_input_tokens_seen": 140976712, + "step": 2516 + }, + { + "epoch": 5.603563474387528, + "loss": 0.4930035173892975, + "loss_ce": 0.00020567109459079802, + "loss_iou": 0.2109375, + "loss_num": 0.013916015625, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 140976712, + "step": 2516 + }, + { + "epoch": 5.605790645879733, + "grad_norm": 16.923248291015625, + "learning_rate": 1e-06, + "loss": 0.6882, + "num_input_tokens_seen": 141030776, + "step": 2517 + }, + { + "epoch": 5.605790645879733, + "loss": 0.6687592267990112, + "loss_ce": 0.0001800994505174458, + "loss_iou": 0.275390625, + "loss_num": 0.0233154296875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 141030776, + "step": 2517 + }, + { + "epoch": 5.6080178173719375, + "grad_norm": 21.347900390625, + "learning_rate": 1e-06, + "loss": 0.6674, + "num_input_tokens_seen": 141087176, + "step": 2518 + }, + { + "epoch": 5.6080178173719375, + "loss": 0.7416476011276245, + "loss_ce": 0.00019255219376645982, + "loss_iou": 0.333984375, + "loss_num": 0.01458740234375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 141087176, + "step": 2518 + }, + { + "epoch": 5.610244988864142, + "grad_norm": 29.194229125976562, + "learning_rate": 1e-06, + "loss": 0.7402, + "num_input_tokens_seen": 141143912, + "step": 2519 + }, + { + "epoch": 5.610244988864142, + "loss": 0.937484860420227, + "loss_ce": 0.00022902997443452477, + "loss_iou": 0.421875, + "loss_num": 0.0186767578125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 141143912, + "step": 2519 + }, + { + "epoch": 5.612472160356347, + "grad_norm": 25.27153778076172, + "learning_rate": 1e-06, + "loss": 0.6045, + "num_input_tokens_seen": 141199920, + "step": 2520 + }, + { + "epoch": 5.612472160356347, + "loss": 0.7792827486991882, + "loss_ce": 0.00023000439978204668, + "loss_iou": 0.318359375, + "loss_num": 0.028564453125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 141199920, + "step": 2520 + }, + { + "epoch": 5.614699331848552, + "grad_norm": 20.523181915283203, + "learning_rate": 1e-06, + "loss": 0.5798, + "num_input_tokens_seen": 141257784, + "step": 2521 + }, + { + "epoch": 5.614699331848552, + "loss": 0.6049889326095581, + "loss_ce": 0.0002525739837437868, + "loss_iou": 0.271484375, + "loss_num": 0.01226806640625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 141257784, + "step": 2521 + }, + { + "epoch": 5.616926503340757, + "grad_norm": 26.476478576660156, + "learning_rate": 1e-06, + "loss": 0.7458, + "num_input_tokens_seen": 141311380, + "step": 2522 + }, + { + "epoch": 5.616926503340757, + "loss": 0.8652767539024353, + "loss_ce": 0.000286536494968459, + "loss_iou": 0.3984375, + "loss_num": 0.01385498046875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 141311380, + "step": 2522 + }, + { + "epoch": 5.619153674832962, + "grad_norm": 25.442533493041992, + "learning_rate": 1e-06, + "loss": 0.8765, + "num_input_tokens_seen": 141367500, + "step": 2523 + }, + { + "epoch": 5.619153674832962, + "loss": 1.0669339895248413, + "loss_ce": 0.00028354296227917075, + "loss_iou": 0.44140625, + "loss_num": 0.037109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 141367500, + "step": 2523 + }, + { + "epoch": 5.621380846325167, + "grad_norm": 24.357839584350586, + "learning_rate": 1e-06, + "loss": 0.6908, + "num_input_tokens_seen": 141425116, + "step": 2524 + }, + { + "epoch": 5.621380846325167, + "loss": 0.751206636428833, + "loss_ce": 0.0002300894120708108, + "loss_iou": 0.3359375, + "loss_num": 0.0162353515625, + "loss_xval": 0.75, + "num_input_tokens_seen": 141425116, + "step": 2524 + }, + { + "epoch": 5.6236080178173715, + "grad_norm": 17.895536422729492, + "learning_rate": 1e-06, + "loss": 0.7793, + "num_input_tokens_seen": 141481940, + "step": 2525 + }, + { + "epoch": 5.6236080178173715, + "loss": 0.9199173450469971, + "loss_ce": 0.0002396509371465072, + "loss_iou": 0.390625, + "loss_num": 0.027587890625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 141481940, + "step": 2525 + }, + { + "epoch": 5.625835189309576, + "grad_norm": 13.240893363952637, + "learning_rate": 1e-06, + "loss": 0.686, + "num_input_tokens_seen": 141538040, + "step": 2526 + }, + { + "epoch": 5.625835189309576, + "loss": 0.52378910779953, + "loss_ce": 0.00022953613370191306, + "loss_iou": 0.234375, + "loss_num": 0.0107421875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 141538040, + "step": 2526 + }, + { + "epoch": 5.628062360801781, + "grad_norm": 16.763574600219727, + "learning_rate": 1e-06, + "loss": 0.6977, + "num_input_tokens_seen": 141595016, + "step": 2527 + }, + { + "epoch": 5.628062360801781, + "loss": 0.8276025056838989, + "loss_ce": 0.00020992739882785827, + "loss_iou": 0.32421875, + "loss_num": 0.03515625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 141595016, + "step": 2527 + }, + { + "epoch": 5.630289532293987, + "grad_norm": 23.006132125854492, + "learning_rate": 1e-06, + "loss": 0.788, + "num_input_tokens_seen": 141653304, + "step": 2528 + }, + { + "epoch": 5.630289532293987, + "loss": 0.8520069122314453, + "loss_ce": 0.0002002965338760987, + "loss_iou": 0.35546875, + "loss_num": 0.0281982421875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 141653304, + "step": 2528 + }, + { + "epoch": 5.632516703786192, + "grad_norm": 15.624357223510742, + "learning_rate": 1e-06, + "loss": 0.651, + "num_input_tokens_seen": 141710920, + "step": 2529 + }, + { + "epoch": 5.632516703786192, + "loss": 0.6601657867431641, + "loss_ce": 0.0002536678221076727, + "loss_iou": 0.296875, + "loss_num": 0.01312255859375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 141710920, + "step": 2529 + }, + { + "epoch": 5.634743875278397, + "grad_norm": 18.147720336914062, + "learning_rate": 1e-06, + "loss": 0.7309, + "num_input_tokens_seen": 141765836, + "step": 2530 + }, + { + "epoch": 5.634743875278397, + "loss": 0.8750065565109253, + "loss_ce": 0.000250728742685169, + "loss_iou": 0.36328125, + "loss_num": 0.029541015625, + "loss_xval": 0.875, + "num_input_tokens_seen": 141765836, + "step": 2530 + }, + { + "epoch": 5.636971046770602, + "grad_norm": 18.959402084350586, + "learning_rate": 1e-06, + "loss": 0.6962, + "num_input_tokens_seen": 141821112, + "step": 2531 + }, + { + "epoch": 5.636971046770602, + "loss": 0.8564548492431641, + "loss_ce": 0.00025371278752572834, + "loss_iou": 0.357421875, + "loss_num": 0.0281982421875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 141821112, + "step": 2531 + }, + { + "epoch": 5.639198218262806, + "grad_norm": 28.660118103027344, + "learning_rate": 1e-06, + "loss": 0.6739, + "num_input_tokens_seen": 141877664, + "step": 2532 + }, + { + "epoch": 5.639198218262806, + "loss": 0.8321478366851807, + "loss_ce": 0.0004828467790503055, + "loss_iou": 0.30078125, + "loss_num": 0.04541015625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 141877664, + "step": 2532 + }, + { + "epoch": 5.641425389755011, + "grad_norm": 33.3441162109375, + "learning_rate": 1e-06, + "loss": 0.6838, + "num_input_tokens_seen": 141934636, + "step": 2533 + }, + { + "epoch": 5.641425389755011, + "loss": 0.7307108640670776, + "loss_ce": 0.0002421102544758469, + "loss_iou": 0.318359375, + "loss_num": 0.0185546875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 141934636, + "step": 2533 + }, + { + "epoch": 5.643652561247216, + "grad_norm": 22.631315231323242, + "learning_rate": 1e-06, + "loss": 0.8627, + "num_input_tokens_seen": 141991128, + "step": 2534 + }, + { + "epoch": 5.643652561247216, + "loss": 0.9368560910224915, + "loss_ce": 0.0003326181322336197, + "loss_iou": 0.3984375, + "loss_num": 0.0283203125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 141991128, + "step": 2534 + }, + { + "epoch": 5.645879732739421, + "grad_norm": 13.450477600097656, + "learning_rate": 1e-06, + "loss": 0.61, + "num_input_tokens_seen": 142045436, + "step": 2535 + }, + { + "epoch": 5.645879732739421, + "loss": 0.5746713280677795, + "loss_ce": 0.00020842923549935222, + "loss_iou": 0.2470703125, + "loss_num": 0.015869140625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 142045436, + "step": 2535 + }, + { + "epoch": 5.648106904231626, + "grad_norm": 14.741578102111816, + "learning_rate": 1e-06, + "loss": 0.5904, + "num_input_tokens_seen": 142102496, + "step": 2536 + }, + { + "epoch": 5.648106904231626, + "loss": 0.4626786708831787, + "loss_ce": 0.0005204671761021018, + "loss_iou": 0.197265625, + "loss_num": 0.01361083984375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 142102496, + "step": 2536 + }, + { + "epoch": 5.650334075723831, + "grad_norm": 17.967416763305664, + "learning_rate": 1e-06, + "loss": 0.6889, + "num_input_tokens_seen": 142160328, + "step": 2537 + }, + { + "epoch": 5.650334075723831, + "loss": 0.5751903057098389, + "loss_ce": 0.00017807658878155053, + "loss_iou": 0.2265625, + "loss_num": 0.0242919921875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 142160328, + "step": 2537 + }, + { + "epoch": 5.652561247216036, + "grad_norm": 19.637807846069336, + "learning_rate": 1e-06, + "loss": 0.5937, + "num_input_tokens_seen": 142215832, + "step": 2538 + }, + { + "epoch": 5.652561247216036, + "loss": 0.5415201187133789, + "loss_ce": 0.0002603091998025775, + "loss_iou": 0.25, + "loss_num": 0.0081787109375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 142215832, + "step": 2538 + }, + { + "epoch": 5.6547884187082404, + "grad_norm": 27.54079246520996, + "learning_rate": 1e-06, + "loss": 0.8718, + "num_input_tokens_seen": 142272748, + "step": 2539 + }, + { + "epoch": 5.6547884187082404, + "loss": 0.9330868721008301, + "loss_ce": 0.00022556885960511863, + "loss_iou": 0.380859375, + "loss_num": 0.0341796875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 142272748, + "step": 2539 + }, + { + "epoch": 5.657015590200445, + "grad_norm": 23.226085662841797, + "learning_rate": 1e-06, + "loss": 0.6973, + "num_input_tokens_seen": 142329648, + "step": 2540 + }, + { + "epoch": 5.657015590200445, + "loss": 0.5698352456092834, + "loss_ce": 0.00025519938208162785, + "loss_iou": 0.232421875, + "loss_num": 0.0208740234375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 142329648, + "step": 2540 + }, + { + "epoch": 5.65924276169265, + "grad_norm": 28.16909408569336, + "learning_rate": 1e-06, + "loss": 0.6034, + "num_input_tokens_seen": 142385652, + "step": 2541 + }, + { + "epoch": 5.65924276169265, + "loss": 0.7484890222549438, + "loss_ce": 0.00019804939802270383, + "loss_iou": 0.3125, + "loss_num": 0.0247802734375, + "loss_xval": 0.75, + "num_input_tokens_seen": 142385652, + "step": 2541 + }, + { + "epoch": 5.661469933184855, + "grad_norm": 20.69179916381836, + "learning_rate": 1e-06, + "loss": 0.6642, + "num_input_tokens_seen": 142439416, + "step": 2542 + }, + { + "epoch": 5.661469933184855, + "loss": 0.7014844417572021, + "loss_ce": 0.0001904680102597922, + "loss_iou": 0.3046875, + "loss_num": 0.018798828125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 142439416, + "step": 2542 + }, + { + "epoch": 5.66369710467706, + "grad_norm": 22.986743927001953, + "learning_rate": 1e-06, + "loss": 0.6988, + "num_input_tokens_seen": 142494088, + "step": 2543 + }, + { + "epoch": 5.66369710467706, + "loss": 0.6153843402862549, + "loss_ce": 0.0003941247705370188, + "loss_iou": 0.27734375, + "loss_num": 0.01214599609375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 142494088, + "step": 2543 + }, + { + "epoch": 5.665924276169265, + "grad_norm": 15.41435432434082, + "learning_rate": 1e-06, + "loss": 0.584, + "num_input_tokens_seen": 142548856, + "step": 2544 + }, + { + "epoch": 5.665924276169265, + "loss": 0.4866850674152374, + "loss_ce": 0.0002348774141864851, + "loss_iou": 0.21875, + "loss_num": 0.00994873046875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 142548856, + "step": 2544 + }, + { + "epoch": 5.66815144766147, + "grad_norm": 19.401214599609375, + "learning_rate": 1e-06, + "loss": 0.803, + "num_input_tokens_seen": 142605360, + "step": 2545 + }, + { + "epoch": 5.66815144766147, + "loss": 0.8005963563919067, + "loss_ce": 0.0003033963148482144, + "loss_iou": 0.345703125, + "loss_num": 0.021728515625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 142605360, + "step": 2545 + }, + { + "epoch": 5.6703786191536745, + "grad_norm": 15.095193862915039, + "learning_rate": 1e-06, + "loss": 0.7953, + "num_input_tokens_seen": 142660940, + "step": 2546 + }, + { + "epoch": 5.6703786191536745, + "loss": 0.6447733640670776, + "loss_ce": 0.00024208366812672466, + "loss_iou": 0.265625, + "loss_num": 0.022705078125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 142660940, + "step": 2546 + }, + { + "epoch": 5.67260579064588, + "grad_norm": 17.20547103881836, + "learning_rate": 1e-06, + "loss": 0.6511, + "num_input_tokens_seen": 142717892, + "step": 2547 + }, + { + "epoch": 5.67260579064588, + "loss": 0.6638010740280151, + "loss_ce": 0.0002268501848448068, + "loss_iou": 0.267578125, + "loss_num": 0.0257568359375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 142717892, + "step": 2547 + }, + { + "epoch": 5.674832962138085, + "grad_norm": 26.371519088745117, + "learning_rate": 1e-06, + "loss": 0.7449, + "num_input_tokens_seen": 142769660, + "step": 2548 + }, + { + "epoch": 5.674832962138085, + "loss": 0.6620455980300903, + "loss_ce": 0.0003024160396307707, + "loss_iou": 0.267578125, + "loss_num": 0.025390625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 142769660, + "step": 2548 + }, + { + "epoch": 5.67706013363029, + "grad_norm": 27.11166763305664, + "learning_rate": 1e-06, + "loss": 0.9627, + "num_input_tokens_seen": 142825764, + "step": 2549 + }, + { + "epoch": 5.67706013363029, + "loss": 1.015166997909546, + "loss_ce": 0.00027431544731371105, + "loss_iou": 0.3984375, + "loss_num": 0.043701171875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 142825764, + "step": 2549 + }, + { + "epoch": 5.679287305122495, + "grad_norm": 14.629166603088379, + "learning_rate": 1e-06, + "loss": 0.6238, + "num_input_tokens_seen": 142880500, + "step": 2550 + }, + { + "epoch": 5.679287305122495, + "loss": 0.4876967668533325, + "loss_ce": 0.00039207623922266066, + "loss_iou": 0.2109375, + "loss_num": 0.01318359375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 142880500, + "step": 2550 + }, + { + "epoch": 5.6815144766147, + "grad_norm": 41.370121002197266, + "learning_rate": 1e-06, + "loss": 0.7918, + "num_input_tokens_seen": 142932608, + "step": 2551 + }, + { + "epoch": 5.6815144766147, + "loss": 0.9631957411766052, + "loss_ce": 0.0004271506331861019, + "loss_iou": 0.3984375, + "loss_num": 0.033203125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 142932608, + "step": 2551 + }, + { + "epoch": 5.6837416481069045, + "grad_norm": 17.59296417236328, + "learning_rate": 1e-06, + "loss": 0.6625, + "num_input_tokens_seen": 142988896, + "step": 2552 + }, + { + "epoch": 5.6837416481069045, + "loss": 0.51399827003479, + "loss_ce": 0.0003264106926508248, + "loss_iou": 0.2333984375, + "loss_num": 0.00933837890625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 142988896, + "step": 2552 + }, + { + "epoch": 5.685968819599109, + "grad_norm": 28.058643341064453, + "learning_rate": 1e-06, + "loss": 0.7143, + "num_input_tokens_seen": 143043040, + "step": 2553 + }, + { + "epoch": 5.685968819599109, + "loss": 0.6291226744651794, + "loss_ce": 0.00021646139794029295, + "loss_iou": 0.2890625, + "loss_num": 0.0098876953125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 143043040, + "step": 2553 + }, + { + "epoch": 5.688195991091314, + "grad_norm": 25.84493064880371, + "learning_rate": 1e-06, + "loss": 0.6757, + "num_input_tokens_seen": 143096936, + "step": 2554 + }, + { + "epoch": 5.688195991091314, + "loss": 0.8105074167251587, + "loss_ce": 0.0002046898298431188, + "loss_iou": 0.33203125, + "loss_num": 0.0291748046875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 143096936, + "step": 2554 + }, + { + "epoch": 5.690423162583519, + "grad_norm": 21.50450897216797, + "learning_rate": 1e-06, + "loss": 0.5824, + "num_input_tokens_seen": 143155644, + "step": 2555 + }, + { + "epoch": 5.690423162583519, + "loss": 0.5691958665847778, + "loss_ce": 0.00022610818268731236, + "loss_iou": 0.2578125, + "loss_num": 0.01043701171875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 143155644, + "step": 2555 + }, + { + "epoch": 5.692650334075724, + "grad_norm": 18.279918670654297, + "learning_rate": 1e-06, + "loss": 0.6493, + "num_input_tokens_seen": 143213008, + "step": 2556 + }, + { + "epoch": 5.692650334075724, + "loss": 0.828661322593689, + "loss_ce": 0.0002921561535913497, + "loss_iou": 0.373046875, + "loss_num": 0.0162353515625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 143213008, + "step": 2556 + }, + { + "epoch": 5.694877505567929, + "grad_norm": 16.526641845703125, + "learning_rate": 1e-06, + "loss": 0.6586, + "num_input_tokens_seen": 143269428, + "step": 2557 + }, + { + "epoch": 5.694877505567929, + "loss": 0.6889902353286743, + "loss_ce": 0.00026959230308420956, + "loss_iou": 0.2890625, + "loss_num": 0.02197265625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 143269428, + "step": 2557 + }, + { + "epoch": 5.697104677060134, + "grad_norm": 16.533336639404297, + "learning_rate": 1e-06, + "loss": 0.5711, + "num_input_tokens_seen": 143326208, + "step": 2558 + }, + { + "epoch": 5.697104677060134, + "loss": 0.5453148484230042, + "loss_ce": 0.0002708985994104296, + "loss_iou": 0.23828125, + "loss_num": 0.013671875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 143326208, + "step": 2558 + }, + { + "epoch": 5.6993318485523385, + "grad_norm": 85.07674407958984, + "learning_rate": 1e-06, + "loss": 0.7295, + "num_input_tokens_seen": 143383116, + "step": 2559 + }, + { + "epoch": 5.6993318485523385, + "loss": 0.5694315433502197, + "loss_ce": 0.00021770322928205132, + "loss_iou": 0.2236328125, + "loss_num": 0.024169921875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 143383116, + "step": 2559 + }, + { + "epoch": 5.701559020044543, + "grad_norm": 23.484725952148438, + "learning_rate": 1e-06, + "loss": 0.6961, + "num_input_tokens_seen": 143437420, + "step": 2560 + }, + { + "epoch": 5.701559020044543, + "loss": 0.736332893371582, + "loss_ce": 0.0002489334437996149, + "loss_iou": 0.28515625, + "loss_num": 0.033203125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 143437420, + "step": 2560 + }, + { + "epoch": 5.703786191536748, + "grad_norm": 18.436580657958984, + "learning_rate": 1e-06, + "loss": 0.7144, + "num_input_tokens_seen": 143493204, + "step": 2561 + }, + { + "epoch": 5.703786191536748, + "loss": 0.8088289499282837, + "loss_ce": 0.0002351756556890905, + "loss_iou": 0.3671875, + "loss_num": 0.01513671875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 143493204, + "step": 2561 + }, + { + "epoch": 5.706013363028953, + "grad_norm": 406.9714660644531, + "learning_rate": 1e-06, + "loss": 0.8129, + "num_input_tokens_seen": 143548960, + "step": 2562 + }, + { + "epoch": 5.706013363028953, + "loss": 0.7016435861587524, + "loss_ce": 0.00022760604042559862, + "loss_iou": 0.318359375, + "loss_num": 0.01287841796875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 143548960, + "step": 2562 + }, + { + "epoch": 5.708240534521158, + "grad_norm": 20.201797485351562, + "learning_rate": 1e-06, + "loss": 0.7235, + "num_input_tokens_seen": 143606860, + "step": 2563 + }, + { + "epoch": 5.708240534521158, + "loss": 0.6854332685470581, + "loss_ce": 0.0003746763104572892, + "loss_iou": 0.3125, + "loss_num": 0.01190185546875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 143606860, + "step": 2563 + }, + { + "epoch": 5.710467706013363, + "grad_norm": 17.661972045898438, + "learning_rate": 1e-06, + "loss": 0.6369, + "num_input_tokens_seen": 143661512, + "step": 2564 + }, + { + "epoch": 5.710467706013363, + "loss": 0.589299201965332, + "loss_ce": 0.00018784166604746133, + "loss_iou": 0.2578125, + "loss_num": 0.01434326171875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 143661512, + "step": 2564 + }, + { + "epoch": 5.712694877505568, + "grad_norm": 30.762678146362305, + "learning_rate": 1e-06, + "loss": 0.8426, + "num_input_tokens_seen": 143719200, + "step": 2565 + }, + { + "epoch": 5.712694877505568, + "loss": 0.8480957746505737, + "loss_ce": 0.00019540796347428113, + "loss_iou": 0.345703125, + "loss_num": 0.03125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 143719200, + "step": 2565 + }, + { + "epoch": 5.714922048997773, + "grad_norm": 18.01441764831543, + "learning_rate": 1e-06, + "loss": 0.7997, + "num_input_tokens_seen": 143772696, + "step": 2566 + }, + { + "epoch": 5.714922048997773, + "loss": 0.9778006076812744, + "loss_ce": 0.00020047195721417665, + "loss_iou": 0.400390625, + "loss_num": 0.03515625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 143772696, + "step": 2566 + }, + { + "epoch": 5.717149220489977, + "grad_norm": 19.236597061157227, + "learning_rate": 1e-06, + "loss": 0.6094, + "num_input_tokens_seen": 143828632, + "step": 2567 + }, + { + "epoch": 5.717149220489977, + "loss": 0.7114354372024536, + "loss_ce": 0.0002537810942158103, + "loss_iou": 0.30859375, + "loss_num": 0.018798828125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 143828632, + "step": 2567 + }, + { + "epoch": 5.719376391982182, + "grad_norm": 16.775983810424805, + "learning_rate": 1e-06, + "loss": 0.5711, + "num_input_tokens_seen": 143882672, + "step": 2568 + }, + { + "epoch": 5.719376391982182, + "loss": 0.6285956501960754, + "loss_ce": 0.00017768185352906585, + "loss_iou": 0.2734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 143882672, + "step": 2568 + }, + { + "epoch": 5.721603563474387, + "grad_norm": 15.795881271362305, + "learning_rate": 1e-06, + "loss": 0.7436, + "num_input_tokens_seen": 143938316, + "step": 2569 + }, + { + "epoch": 5.721603563474387, + "loss": 0.8578826189041138, + "loss_ce": 0.0002165931509807706, + "loss_iou": 0.392578125, + "loss_num": 0.0147705078125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 143938316, + "step": 2569 + }, + { + "epoch": 5.723830734966592, + "grad_norm": 21.437118530273438, + "learning_rate": 1e-06, + "loss": 0.7572, + "num_input_tokens_seen": 143995348, + "step": 2570 + }, + { + "epoch": 5.723830734966592, + "loss": 0.7881063222885132, + "loss_ce": 0.00026450445875525475, + "loss_iou": 0.345703125, + "loss_num": 0.019287109375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 143995348, + "step": 2570 + }, + { + "epoch": 5.726057906458797, + "grad_norm": 16.0108585357666, + "learning_rate": 1e-06, + "loss": 0.7053, + "num_input_tokens_seen": 144052660, + "step": 2571 + }, + { + "epoch": 5.726057906458797, + "loss": 0.58664470911026, + "loss_ce": 0.00021891661162953824, + "loss_iou": 0.251953125, + "loss_num": 0.0164794921875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 144052660, + "step": 2571 + }, + { + "epoch": 5.728285077951003, + "grad_norm": 26.33615493774414, + "learning_rate": 1e-06, + "loss": 0.6633, + "num_input_tokens_seen": 144105148, + "step": 2572 + }, + { + "epoch": 5.728285077951003, + "loss": 0.4833102822303772, + "loss_ce": 0.00021702511003240943, + "loss_iou": 0.205078125, + "loss_num": 0.0146484375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 144105148, + "step": 2572 + }, + { + "epoch": 5.7305122494432075, + "grad_norm": 21.177268981933594, + "learning_rate": 1e-06, + "loss": 0.9853, + "num_input_tokens_seen": 144161172, + "step": 2573 + }, + { + "epoch": 5.7305122494432075, + "loss": 1.158959150314331, + "loss_ce": 0.0007559259538538754, + "loss_iou": 0.48046875, + "loss_num": 0.039306640625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 144161172, + "step": 2573 + }, + { + "epoch": 5.732739420935412, + "grad_norm": 25.209182739257812, + "learning_rate": 1e-06, + "loss": 0.6585, + "num_input_tokens_seen": 144215804, + "step": 2574 + }, + { + "epoch": 5.732739420935412, + "loss": 0.6748742461204529, + "loss_ce": 0.00031366333132609725, + "loss_iou": 0.291015625, + "loss_num": 0.0185546875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 144215804, + "step": 2574 + }, + { + "epoch": 5.734966592427617, + "grad_norm": 16.51605796813965, + "learning_rate": 1e-06, + "loss": 0.6987, + "num_input_tokens_seen": 144273436, + "step": 2575 + }, + { + "epoch": 5.734966592427617, + "loss": 0.564425528049469, + "loss_ce": 0.00021653338626492769, + "loss_iou": 0.248046875, + "loss_num": 0.0135498046875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 144273436, + "step": 2575 + }, + { + "epoch": 5.737193763919822, + "grad_norm": 21.38966941833496, + "learning_rate": 1e-06, + "loss": 0.7136, + "num_input_tokens_seen": 144330508, + "step": 2576 + }, + { + "epoch": 5.737193763919822, + "loss": 0.7851696610450745, + "loss_ce": 0.00025755877140909433, + "loss_iou": 0.3125, + "loss_num": 0.0322265625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 144330508, + "step": 2576 + }, + { + "epoch": 5.739420935412027, + "grad_norm": 21.6599178314209, + "learning_rate": 1e-06, + "loss": 0.7422, + "num_input_tokens_seen": 144387136, + "step": 2577 + }, + { + "epoch": 5.739420935412027, + "loss": 0.8027091026306152, + "loss_ce": 0.00021889799972996116, + "loss_iou": 0.326171875, + "loss_num": 0.0303955078125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 144387136, + "step": 2577 + }, + { + "epoch": 5.741648106904232, + "grad_norm": 21.33940887451172, + "learning_rate": 1e-06, + "loss": 0.7533, + "num_input_tokens_seen": 144441500, + "step": 2578 + }, + { + "epoch": 5.741648106904232, + "loss": 0.6317015290260315, + "loss_ce": 0.0002318147598998621, + "loss_iou": 0.28515625, + "loss_num": 0.012451171875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 144441500, + "step": 2578 + }, + { + "epoch": 5.743875278396437, + "grad_norm": 18.354366302490234, + "learning_rate": 1e-06, + "loss": 0.6682, + "num_input_tokens_seen": 144499348, + "step": 2579 + }, + { + "epoch": 5.743875278396437, + "loss": 0.7804338335990906, + "loss_ce": 0.0002824625698849559, + "loss_iou": 0.28125, + "loss_num": 0.043701171875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 144499348, + "step": 2579 + }, + { + "epoch": 5.7461024498886415, + "grad_norm": 20.35249900817871, + "learning_rate": 1e-06, + "loss": 0.8315, + "num_input_tokens_seen": 144553584, + "step": 2580 + }, + { + "epoch": 5.7461024498886415, + "loss": 0.8381065130233765, + "loss_ce": 0.00021590120741166174, + "loss_iou": 0.365234375, + "loss_num": 0.0211181640625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 144553584, + "step": 2580 + }, + { + "epoch": 5.748329621380846, + "grad_norm": 16.173368453979492, + "learning_rate": 1e-06, + "loss": 0.4855, + "num_input_tokens_seen": 144610324, + "step": 2581 + }, + { + "epoch": 5.748329621380846, + "loss": 0.5163002610206604, + "loss_ce": 0.00018697154882829636, + "loss_iou": 0.2353515625, + "loss_num": 0.0091552734375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 144610324, + "step": 2581 + }, + { + "epoch": 5.750556792873051, + "grad_norm": 31.1335506439209, + "learning_rate": 1e-06, + "loss": 0.7519, + "num_input_tokens_seen": 144665984, + "step": 2582 + }, + { + "epoch": 5.750556792873051, + "loss": 0.5313022136688232, + "loss_ce": 0.0002963669830933213, + "loss_iou": 0.2353515625, + "loss_num": 0.0118408203125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 144665984, + "step": 2582 + }, + { + "epoch": 5.752783964365256, + "grad_norm": 24.663740158081055, + "learning_rate": 1e-06, + "loss": 0.5619, + "num_input_tokens_seen": 144722060, + "step": 2583 + }, + { + "epoch": 5.752783964365256, + "loss": 0.6767467260360718, + "loss_ce": 0.00023307063383981586, + "loss_iou": 0.30078125, + "loss_num": 0.015380859375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 144722060, + "step": 2583 + }, + { + "epoch": 5.755011135857461, + "grad_norm": 13.527139663696289, + "learning_rate": 1e-06, + "loss": 0.5107, + "num_input_tokens_seen": 144777528, + "step": 2584 + }, + { + "epoch": 5.755011135857461, + "loss": 0.5506859421730042, + "loss_ce": 0.0002709058462642133, + "loss_iou": 0.2373046875, + "loss_num": 0.01519775390625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 144777528, + "step": 2584 + }, + { + "epoch": 5.757238307349666, + "grad_norm": 19.172088623046875, + "learning_rate": 1e-06, + "loss": 0.7847, + "num_input_tokens_seen": 144833756, + "step": 2585 + }, + { + "epoch": 5.757238307349666, + "loss": 0.7893670797348022, + "loss_ce": 0.0003045589546673, + "loss_iou": 0.330078125, + "loss_num": 0.025634765625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 144833756, + "step": 2585 + }, + { + "epoch": 5.759465478841871, + "grad_norm": 17.034088134765625, + "learning_rate": 1e-06, + "loss": 0.6934, + "num_input_tokens_seen": 144892408, + "step": 2586 + }, + { + "epoch": 5.759465478841871, + "loss": 0.6261861324310303, + "loss_ce": 0.00020951575424987823, + "loss_iou": 0.26953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.625, + "num_input_tokens_seen": 144892408, + "step": 2586 + }, + { + "epoch": 5.7616926503340755, + "grad_norm": 22.121641159057617, + "learning_rate": 1e-06, + "loss": 0.7483, + "num_input_tokens_seen": 144946344, + "step": 2587 + }, + { + "epoch": 5.7616926503340755, + "loss": 0.8284502029418945, + "loss_ce": 0.00032524295966140926, + "loss_iou": 0.34765625, + "loss_num": 0.026611328125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 144946344, + "step": 2587 + }, + { + "epoch": 5.76391982182628, + "grad_norm": 18.808324813842773, + "learning_rate": 1e-06, + "loss": 0.706, + "num_input_tokens_seen": 145002944, + "step": 2588 + }, + { + "epoch": 5.76391982182628, + "loss": 0.5951967239379883, + "loss_ce": 0.00022597931092604995, + "loss_iou": 0.2578125, + "loss_num": 0.0157470703125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 145002944, + "step": 2588 + }, + { + "epoch": 5.766146993318485, + "grad_norm": 17.422155380249023, + "learning_rate": 1e-06, + "loss": 0.689, + "num_input_tokens_seen": 145056836, + "step": 2589 + }, + { + "epoch": 5.766146993318485, + "loss": 0.7689008712768555, + "loss_ce": 0.00022416308638639748, + "loss_iou": 0.310546875, + "loss_num": 0.029541015625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 145056836, + "step": 2589 + }, + { + "epoch": 5.76837416481069, + "grad_norm": 19.195316314697266, + "learning_rate": 1e-06, + "loss": 0.7372, + "num_input_tokens_seen": 145113532, + "step": 2590 + }, + { + "epoch": 5.76837416481069, + "loss": 0.8400857448577881, + "loss_ce": 0.0002420624950900674, + "loss_iou": 0.34375, + "loss_num": 0.0303955078125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 145113532, + "step": 2590 + }, + { + "epoch": 5.770601336302895, + "grad_norm": 18.72977638244629, + "learning_rate": 1e-06, + "loss": 0.7385, + "num_input_tokens_seen": 145169580, + "step": 2591 + }, + { + "epoch": 5.770601336302895, + "loss": 0.9092113375663757, + "loss_ce": 0.0002757691836450249, + "loss_iou": 0.361328125, + "loss_num": 0.037841796875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 145169580, + "step": 2591 + }, + { + "epoch": 5.772828507795101, + "grad_norm": 26.984228134155273, + "learning_rate": 1e-06, + "loss": 0.7739, + "num_input_tokens_seen": 145225448, + "step": 2592 + }, + { + "epoch": 5.772828507795101, + "loss": 0.6938083171844482, + "loss_ce": 0.0004489576967898756, + "loss_iou": 0.2890625, + "loss_num": 0.023193359375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 145225448, + "step": 2592 + }, + { + "epoch": 5.775055679287306, + "grad_norm": 26.926746368408203, + "learning_rate": 1e-06, + "loss": 0.8225, + "num_input_tokens_seen": 145282560, + "step": 2593 + }, + { + "epoch": 5.775055679287306, + "loss": 0.8718444108963013, + "loss_ce": 0.00026242341846227646, + "loss_iou": 0.37109375, + "loss_num": 0.0262451171875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 145282560, + "step": 2593 + }, + { + "epoch": 5.77728285077951, + "grad_norm": 17.646190643310547, + "learning_rate": 1e-06, + "loss": 0.7529, + "num_input_tokens_seen": 145339244, + "step": 2594 + }, + { + "epoch": 5.77728285077951, + "loss": 0.878664493560791, + "loss_ce": 0.00024651282001286745, + "loss_iou": 0.34375, + "loss_num": 0.037841796875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 145339244, + "step": 2594 + }, + { + "epoch": 5.779510022271715, + "grad_norm": 18.98468780517578, + "learning_rate": 1e-06, + "loss": 0.6249, + "num_input_tokens_seen": 145394856, + "step": 2595 + }, + { + "epoch": 5.779510022271715, + "loss": 0.6960242390632629, + "loss_ce": 0.00022345452453009784, + "loss_iou": 0.318359375, + "loss_num": 0.01214599609375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 145394856, + "step": 2595 + }, + { + "epoch": 5.78173719376392, + "grad_norm": 31.605703353881836, + "learning_rate": 1e-06, + "loss": 0.6959, + "num_input_tokens_seen": 145450920, + "step": 2596 + }, + { + "epoch": 5.78173719376392, + "loss": 0.6217830181121826, + "loss_ce": 0.00020095528452657163, + "loss_iou": 0.263671875, + "loss_num": 0.0191650390625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 145450920, + "step": 2596 + }, + { + "epoch": 5.783964365256125, + "grad_norm": 17.69574546813965, + "learning_rate": 1e-06, + "loss": 0.6234, + "num_input_tokens_seen": 145504552, + "step": 2597 + }, + { + "epoch": 5.783964365256125, + "loss": 0.8066386580467224, + "loss_ce": 0.0002421344688627869, + "loss_iou": 0.328125, + "loss_num": 0.02978515625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 145504552, + "step": 2597 + }, + { + "epoch": 5.78619153674833, + "grad_norm": 31.138301849365234, + "learning_rate": 1e-06, + "loss": 0.7631, + "num_input_tokens_seen": 145559244, + "step": 2598 + }, + { + "epoch": 5.78619153674833, + "loss": 0.7720120549201965, + "loss_ce": 0.00028353132074698806, + "loss_iou": 0.3515625, + "loss_num": 0.01336669921875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 145559244, + "step": 2598 + }, + { + "epoch": 5.788418708240535, + "grad_norm": 22.075687408447266, + "learning_rate": 1e-06, + "loss": 0.7088, + "num_input_tokens_seen": 145617544, + "step": 2599 + }, + { + "epoch": 5.788418708240535, + "loss": 0.8822649717330933, + "loss_ce": 0.00042899814434349537, + "loss_iou": 0.392578125, + "loss_num": 0.0196533203125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 145617544, + "step": 2599 + }, + { + "epoch": 5.79064587973274, + "grad_norm": 22.63301658630371, + "learning_rate": 1e-06, + "loss": 0.7705, + "num_input_tokens_seen": 145675480, + "step": 2600 + }, + { + "epoch": 5.79064587973274, + "loss": 0.7863603234291077, + "loss_ce": 0.0002275097358506173, + "loss_iou": 0.33203125, + "loss_num": 0.024169921875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 145675480, + "step": 2600 + }, + { + "epoch": 5.7928730512249444, + "grad_norm": 23.069360733032227, + "learning_rate": 1e-06, + "loss": 0.7148, + "num_input_tokens_seen": 145726312, + "step": 2601 + }, + { + "epoch": 5.7928730512249444, + "loss": 0.8112291097640991, + "loss_ce": 0.00019390250963624567, + "loss_iou": 0.349609375, + "loss_num": 0.0224609375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 145726312, + "step": 2601 + }, + { + "epoch": 5.795100222717149, + "grad_norm": 27.435791015625, + "learning_rate": 1e-06, + "loss": 0.7378, + "num_input_tokens_seen": 145783560, + "step": 2602 + }, + { + "epoch": 5.795100222717149, + "loss": 0.7958614826202393, + "loss_ce": 0.00020716458675451577, + "loss_iou": 0.35546875, + "loss_num": 0.01708984375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 145783560, + "step": 2602 + }, + { + "epoch": 5.797327394209354, + "grad_norm": 24.120868682861328, + "learning_rate": 1e-06, + "loss": 0.7342, + "num_input_tokens_seen": 145836256, + "step": 2603 + }, + { + "epoch": 5.797327394209354, + "loss": 0.8412899971008301, + "loss_ce": 0.00028652820037677884, + "loss_iou": 0.33203125, + "loss_num": 0.035400390625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 145836256, + "step": 2603 + }, + { + "epoch": 5.799554565701559, + "grad_norm": 28.57222557067871, + "learning_rate": 1e-06, + "loss": 0.7984, + "num_input_tokens_seen": 145892380, + "step": 2604 + }, + { + "epoch": 5.799554565701559, + "loss": 0.7485337853431702, + "loss_ce": 0.00024279108038172126, + "loss_iou": 0.318359375, + "loss_num": 0.0228271484375, + "loss_xval": 0.75, + "num_input_tokens_seen": 145892380, + "step": 2604 + }, + { + "epoch": 5.801781737193764, + "grad_norm": 30.485336303710938, + "learning_rate": 1e-06, + "loss": 0.7688, + "num_input_tokens_seen": 145948016, + "step": 2605 + }, + { + "epoch": 5.801781737193764, + "loss": 0.7458430528640747, + "loss_ce": 0.0002376111369812861, + "loss_iou": 0.3125, + "loss_num": 0.02392578125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 145948016, + "step": 2605 + }, + { + "epoch": 5.804008908685969, + "grad_norm": 18.45177459716797, + "learning_rate": 1e-06, + "loss": 0.8566, + "num_input_tokens_seen": 146005492, + "step": 2606 + }, + { + "epoch": 5.804008908685969, + "loss": 0.9877591133117676, + "loss_ce": 0.00021025189198553562, + "loss_iou": 0.40625, + "loss_num": 0.034912109375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 146005492, + "step": 2606 + }, + { + "epoch": 5.806236080178174, + "grad_norm": 23.423572540283203, + "learning_rate": 1e-06, + "loss": 0.5956, + "num_input_tokens_seen": 146062328, + "step": 2607 + }, + { + "epoch": 5.806236080178174, + "loss": 0.6337399482727051, + "loss_ce": 0.00019499435438774526, + "loss_iou": 0.28125, + "loss_num": 0.013671875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 146062328, + "step": 2607 + }, + { + "epoch": 5.8084632516703785, + "grad_norm": 23.9626407623291, + "learning_rate": 1e-06, + "loss": 0.8475, + "num_input_tokens_seen": 146116388, + "step": 2608 + }, + { + "epoch": 5.8084632516703785, + "loss": 0.6824861764907837, + "loss_ce": 0.00023519776004832238, + "loss_iou": 0.2470703125, + "loss_num": 0.037841796875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 146116388, + "step": 2608 + }, + { + "epoch": 5.810690423162583, + "grad_norm": 19.422828674316406, + "learning_rate": 1e-06, + "loss": 0.7589, + "num_input_tokens_seen": 146172864, + "step": 2609 + }, + { + "epoch": 5.810690423162583, + "loss": 0.7359454035758972, + "loss_ce": 0.0002276489103678614, + "loss_iou": 0.330078125, + "loss_num": 0.0152587890625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 146172864, + "step": 2609 + }, + { + "epoch": 5.812917594654788, + "grad_norm": 26.152000427246094, + "learning_rate": 1e-06, + "loss": 0.6478, + "num_input_tokens_seen": 146230640, + "step": 2610 + }, + { + "epoch": 5.812917594654788, + "loss": 0.7595158815383911, + "loss_ce": 0.00023854889150243253, + "loss_iou": 0.310546875, + "loss_num": 0.0279541015625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 146230640, + "step": 2610 + }, + { + "epoch": 5.815144766146993, + "grad_norm": 40.59634780883789, + "learning_rate": 1e-06, + "loss": 0.5484, + "num_input_tokens_seen": 146287508, + "step": 2611 + }, + { + "epoch": 5.815144766146993, + "loss": 0.6367042660713196, + "loss_ce": 0.00022962281946092844, + "loss_iou": 0.259765625, + "loss_num": 0.023193359375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 146287508, + "step": 2611 + }, + { + "epoch": 5.817371937639198, + "grad_norm": 16.996164321899414, + "learning_rate": 1e-06, + "loss": 0.8022, + "num_input_tokens_seen": 146343108, + "step": 2612 + }, + { + "epoch": 5.817371937639198, + "loss": 0.7761521339416504, + "loss_ce": 0.00027319122455082834, + "loss_iou": 0.310546875, + "loss_num": 0.03125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 146343108, + "step": 2612 + }, + { + "epoch": 5.819599109131403, + "grad_norm": 14.195480346679688, + "learning_rate": 1e-06, + "loss": 0.5662, + "num_input_tokens_seen": 146401944, + "step": 2613 + }, + { + "epoch": 5.819599109131403, + "loss": 0.6162877082824707, + "loss_ce": 0.00032092921901494265, + "loss_iou": 0.2421875, + "loss_num": 0.0262451171875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 146401944, + "step": 2613 + }, + { + "epoch": 5.821826280623608, + "grad_norm": 13.408287048339844, + "learning_rate": 1e-06, + "loss": 0.676, + "num_input_tokens_seen": 146459796, + "step": 2614 + }, + { + "epoch": 5.821826280623608, + "loss": 0.6257205009460449, + "loss_ce": 0.00023219409922603518, + "loss_iou": 0.26953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.625, + "num_input_tokens_seen": 146459796, + "step": 2614 + }, + { + "epoch": 5.8240534521158125, + "grad_norm": 37.40284729003906, + "learning_rate": 1e-06, + "loss": 0.6556, + "num_input_tokens_seen": 146515484, + "step": 2615 + }, + { + "epoch": 5.8240534521158125, + "loss": 0.6754716634750366, + "loss_ce": 0.00017868283612187952, + "loss_iou": 0.27734375, + "loss_num": 0.023681640625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 146515484, + "step": 2615 + }, + { + "epoch": 5.826280623608017, + "grad_norm": 22.057910919189453, + "learning_rate": 1e-06, + "loss": 0.7252, + "num_input_tokens_seen": 146570828, + "step": 2616 + }, + { + "epoch": 5.826280623608017, + "loss": 0.7309498190879822, + "loss_ce": 0.00023692671675235033, + "loss_iou": 0.296875, + "loss_num": 0.02734375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 146570828, + "step": 2616 + }, + { + "epoch": 5.828507795100223, + "grad_norm": 63.05039596557617, + "learning_rate": 1e-06, + "loss": 0.577, + "num_input_tokens_seen": 146627412, + "step": 2617 + }, + { + "epoch": 5.828507795100223, + "loss": 0.5437142848968506, + "loss_ce": 0.0002571970981080085, + "loss_iou": 0.23828125, + "loss_num": 0.01336669921875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 146627412, + "step": 2617 + }, + { + "epoch": 5.830734966592428, + "grad_norm": 29.350282669067383, + "learning_rate": 1e-06, + "loss": 0.6785, + "num_input_tokens_seen": 146684332, + "step": 2618 + }, + { + "epoch": 5.830734966592428, + "loss": 0.5683233141899109, + "loss_ce": 0.00020807163673453033, + "loss_iou": 0.236328125, + "loss_num": 0.01904296875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 146684332, + "step": 2618 + }, + { + "epoch": 5.832962138084633, + "grad_norm": 23.34769058227539, + "learning_rate": 1e-06, + "loss": 0.6697, + "num_input_tokens_seen": 146737280, + "step": 2619 + }, + { + "epoch": 5.832962138084633, + "loss": 0.6157218217849731, + "loss_ce": 0.0004874598525930196, + "loss_iou": 0.271484375, + "loss_num": 0.01446533203125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 146737280, + "step": 2619 + }, + { + "epoch": 5.835189309576838, + "grad_norm": 22.184803009033203, + "learning_rate": 1e-06, + "loss": 0.6025, + "num_input_tokens_seen": 146794400, + "step": 2620 + }, + { + "epoch": 5.835189309576838, + "loss": 0.6838958263397217, + "loss_ce": 0.0003020889707840979, + "loss_iou": 0.3125, + "loss_num": 0.0115966796875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 146794400, + "step": 2620 + }, + { + "epoch": 5.8374164810690425, + "grad_norm": 16.25792694091797, + "learning_rate": 1e-06, + "loss": 0.71, + "num_input_tokens_seen": 146849192, + "step": 2621 + }, + { + "epoch": 5.8374164810690425, + "loss": 0.6701442003250122, + "loss_ce": 0.00022233014169614762, + "loss_iou": 0.25390625, + "loss_num": 0.032470703125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 146849192, + "step": 2621 + }, + { + "epoch": 5.839643652561247, + "grad_norm": 17.911888122558594, + "learning_rate": 1e-06, + "loss": 0.6373, + "num_input_tokens_seen": 146906416, + "step": 2622 + }, + { + "epoch": 5.839643652561247, + "loss": 0.5712316632270813, + "loss_ce": 0.00018675101455301046, + "loss_iou": 0.2421875, + "loss_num": 0.0172119140625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 146906416, + "step": 2622 + }, + { + "epoch": 5.841870824053452, + "grad_norm": 15.35204029083252, + "learning_rate": 1e-06, + "loss": 0.6543, + "num_input_tokens_seen": 146963888, + "step": 2623 + }, + { + "epoch": 5.841870824053452, + "loss": 0.6473047733306885, + "loss_ce": 0.00021000676497351378, + "loss_iou": 0.265625, + "loss_num": 0.0233154296875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 146963888, + "step": 2623 + }, + { + "epoch": 5.844097995545657, + "grad_norm": 19.907550811767578, + "learning_rate": 1e-06, + "loss": 0.8551, + "num_input_tokens_seen": 147019984, + "step": 2624 + }, + { + "epoch": 5.844097995545657, + "loss": 0.6151929497718811, + "loss_ce": 0.0002027209848165512, + "loss_iou": 0.26953125, + "loss_num": 0.01483154296875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 147019984, + "step": 2624 + }, + { + "epoch": 5.846325167037862, + "grad_norm": 35.5843391418457, + "learning_rate": 1e-06, + "loss": 0.8085, + "num_input_tokens_seen": 147074124, + "step": 2625 + }, + { + "epoch": 5.846325167037862, + "loss": 0.6340126991271973, + "loss_ce": 0.00022365737822838128, + "loss_iou": 0.283203125, + "loss_num": 0.0135498046875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 147074124, + "step": 2625 + }, + { + "epoch": 5.848552338530067, + "grad_norm": 22.104284286499023, + "learning_rate": 1e-06, + "loss": 0.7518, + "num_input_tokens_seen": 147129964, + "step": 2626 + }, + { + "epoch": 5.848552338530067, + "loss": 0.7092376947402954, + "loss_ce": 0.0002533411025069654, + "loss_iou": 0.310546875, + "loss_num": 0.017578125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 147129964, + "step": 2626 + }, + { + "epoch": 5.850779510022272, + "grad_norm": 18.39105987548828, + "learning_rate": 1e-06, + "loss": 0.6966, + "num_input_tokens_seen": 147184592, + "step": 2627 + }, + { + "epoch": 5.850779510022272, + "loss": 0.661689043045044, + "loss_ce": 0.0003120756009593606, + "loss_iou": 0.27734375, + "loss_num": 0.021728515625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 147184592, + "step": 2627 + }, + { + "epoch": 5.853006681514477, + "grad_norm": 22.63771629333496, + "learning_rate": 1e-06, + "loss": 0.9064, + "num_input_tokens_seen": 147241272, + "step": 2628 + }, + { + "epoch": 5.853006681514477, + "loss": 0.5712363719940186, + "loss_ce": 0.00019146442355122417, + "loss_iou": 0.251953125, + "loss_num": 0.0135498046875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 147241272, + "step": 2628 + }, + { + "epoch": 5.855233853006681, + "grad_norm": 21.009822845458984, + "learning_rate": 1e-06, + "loss": 0.8111, + "num_input_tokens_seen": 147294076, + "step": 2629 + }, + { + "epoch": 5.855233853006681, + "loss": 0.9777696132659912, + "loss_ce": 0.00023052690085023642, + "loss_iou": 0.443359375, + "loss_num": 0.0179443359375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 147294076, + "step": 2629 + }, + { + "epoch": 5.857461024498886, + "grad_norm": 19.79799461364746, + "learning_rate": 1e-06, + "loss": 0.7446, + "num_input_tokens_seen": 147349408, + "step": 2630 + }, + { + "epoch": 5.857461024498886, + "loss": 0.6237329244613647, + "loss_ce": 0.00019774649990722537, + "loss_iou": 0.263671875, + "loss_num": 0.01904296875, + "loss_xval": 0.625, + "num_input_tokens_seen": 147349408, + "step": 2630 + }, + { + "epoch": 5.859688195991091, + "grad_norm": 22.230148315429688, + "learning_rate": 1e-06, + "loss": 0.7763, + "num_input_tokens_seen": 147405948, + "step": 2631 + }, + { + "epoch": 5.859688195991091, + "loss": 0.8681101202964783, + "loss_ce": 0.0003122506313957274, + "loss_iou": 0.37890625, + "loss_num": 0.021728515625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 147405948, + "step": 2631 + }, + { + "epoch": 5.861915367483296, + "grad_norm": 16.93956756591797, + "learning_rate": 1e-06, + "loss": 0.7856, + "num_input_tokens_seen": 147460328, + "step": 2632 + }, + { + "epoch": 5.861915367483296, + "loss": 0.8456612825393677, + "loss_ce": 0.00020235308329574764, + "loss_iou": 0.34375, + "loss_num": 0.031494140625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 147460328, + "step": 2632 + }, + { + "epoch": 5.864142538975501, + "grad_norm": 28.430946350097656, + "learning_rate": 1e-06, + "loss": 0.7788, + "num_input_tokens_seen": 147515700, + "step": 2633 + }, + { + "epoch": 5.864142538975501, + "loss": 0.8373937606811523, + "loss_ce": 0.00023549118486698717, + "loss_iou": 0.369140625, + "loss_num": 0.0198974609375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 147515700, + "step": 2633 + }, + { + "epoch": 5.866369710467706, + "grad_norm": 111.71736145019531, + "learning_rate": 1e-06, + "loss": 0.9358, + "num_input_tokens_seen": 147570320, + "step": 2634 + }, + { + "epoch": 5.866369710467706, + "loss": 0.9798734188079834, + "loss_ce": 0.0003812336944974959, + "loss_iou": 0.400390625, + "loss_num": 0.035888671875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 147570320, + "step": 2634 + }, + { + "epoch": 5.868596881959911, + "grad_norm": 20.809370040893555, + "learning_rate": 1e-06, + "loss": 0.7524, + "num_input_tokens_seen": 147625424, + "step": 2635 + }, + { + "epoch": 5.868596881959911, + "loss": 0.6215413808822632, + "loss_ce": 0.00020347216923255473, + "loss_iou": 0.255859375, + "loss_num": 0.021728515625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 147625424, + "step": 2635 + }, + { + "epoch": 5.870824053452115, + "grad_norm": 23.472732543945312, + "learning_rate": 1e-06, + "loss": 0.896, + "num_input_tokens_seen": 147682576, + "step": 2636 + }, + { + "epoch": 5.870824053452115, + "loss": 0.7243614196777344, + "loss_ce": 0.00024032902729231864, + "loss_iou": 0.322265625, + "loss_num": 0.0159912109375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 147682576, + "step": 2636 + }, + { + "epoch": 5.873051224944321, + "grad_norm": 19.540515899658203, + "learning_rate": 1e-06, + "loss": 0.8665, + "num_input_tokens_seen": 147738480, + "step": 2637 + }, + { + "epoch": 5.873051224944321, + "loss": 0.94027179479599, + "loss_ce": 0.0003304004785604775, + "loss_iou": 0.33984375, + "loss_num": 0.0517578125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 147738480, + "step": 2637 + }, + { + "epoch": 5.875278396436526, + "grad_norm": 18.206157684326172, + "learning_rate": 1e-06, + "loss": 0.9815, + "num_input_tokens_seen": 147795268, + "step": 2638 + }, + { + "epoch": 5.875278396436526, + "loss": 0.8137099146842957, + "loss_ce": 0.00023330387193709612, + "loss_iou": 0.328125, + "loss_num": 0.03125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 147795268, + "step": 2638 + }, + { + "epoch": 5.877505567928731, + "grad_norm": 36.962310791015625, + "learning_rate": 1e-06, + "loss": 0.8831, + "num_input_tokens_seen": 147851048, + "step": 2639 + }, + { + "epoch": 5.877505567928731, + "loss": 1.2098288536071777, + "loss_ce": 0.0003562095225788653, + "loss_iou": 0.5234375, + "loss_num": 0.03271484375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 147851048, + "step": 2639 + }, + { + "epoch": 5.879732739420936, + "grad_norm": 20.242557525634766, + "learning_rate": 1e-06, + "loss": 0.6815, + "num_input_tokens_seen": 147908012, + "step": 2640 + }, + { + "epoch": 5.879732739420936, + "loss": 0.6303223371505737, + "loss_ce": 0.0001953527971636504, + "loss_iou": 0.27734375, + "loss_num": 0.01495361328125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 147908012, + "step": 2640 + }, + { + "epoch": 5.881959910913141, + "grad_norm": 34.177433013916016, + "learning_rate": 1e-06, + "loss": 0.7303, + "num_input_tokens_seen": 147960696, + "step": 2641 + }, + { + "epoch": 5.881959910913141, + "loss": 0.9157909154891968, + "loss_ce": 0.000263590132817626, + "loss_iou": 0.39453125, + "loss_num": 0.0255126953125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 147960696, + "step": 2641 + }, + { + "epoch": 5.8841870824053455, + "grad_norm": 19.002622604370117, + "learning_rate": 1e-06, + "loss": 0.6177, + "num_input_tokens_seen": 148016204, + "step": 2642 + }, + { + "epoch": 5.8841870824053455, + "loss": 0.528780460357666, + "loss_ce": 0.0002160129661206156, + "loss_iou": 0.2265625, + "loss_num": 0.0150146484375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 148016204, + "step": 2642 + }, + { + "epoch": 5.88641425389755, + "grad_norm": 20.485605239868164, + "learning_rate": 1e-06, + "loss": 0.7832, + "num_input_tokens_seen": 148072492, + "step": 2643 + }, + { + "epoch": 5.88641425389755, + "loss": 1.044980764389038, + "loss_ce": 0.0015236493200063705, + "loss_iou": 0.455078125, + "loss_num": 0.0269775390625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 148072492, + "step": 2643 + }, + { + "epoch": 5.888641425389755, + "grad_norm": 25.896760940551758, + "learning_rate": 1e-06, + "loss": 0.6733, + "num_input_tokens_seen": 148124560, + "step": 2644 + }, + { + "epoch": 5.888641425389755, + "loss": 0.5408051013946533, + "loss_ce": 0.00021672301227226853, + "loss_iou": 0.248046875, + "loss_num": 0.00909423828125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 148124560, + "step": 2644 + }, + { + "epoch": 5.89086859688196, + "grad_norm": 15.1829252243042, + "learning_rate": 1e-06, + "loss": 0.7851, + "num_input_tokens_seen": 148182164, + "step": 2645 + }, + { + "epoch": 5.89086859688196, + "loss": 0.6976854801177979, + "loss_ce": 0.000297736085485667, + "loss_iou": 0.287109375, + "loss_num": 0.0242919921875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 148182164, + "step": 2645 + }, + { + "epoch": 5.893095768374165, + "grad_norm": 20.109758377075195, + "learning_rate": 1e-06, + "loss": 0.4663, + "num_input_tokens_seen": 148237212, + "step": 2646 + }, + { + "epoch": 5.893095768374165, + "loss": 0.403061181306839, + "loss_ce": 0.00016811591922305524, + "loss_iou": 0.1708984375, + "loss_num": 0.0120849609375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 148237212, + "step": 2646 + }, + { + "epoch": 5.89532293986637, + "grad_norm": 22.928607940673828, + "learning_rate": 1e-06, + "loss": 0.7238, + "num_input_tokens_seen": 148294320, + "step": 2647 + }, + { + "epoch": 5.89532293986637, + "loss": 0.7370385527610779, + "loss_ce": 0.00022214508499018848, + "loss_iou": 0.3203125, + "loss_num": 0.01953125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 148294320, + "step": 2647 + }, + { + "epoch": 5.897550111358575, + "grad_norm": 18.266637802124023, + "learning_rate": 1e-06, + "loss": 0.7101, + "num_input_tokens_seen": 148350012, + "step": 2648 + }, + { + "epoch": 5.897550111358575, + "loss": 0.841547429561615, + "loss_ce": 0.00023882737150415778, + "loss_iou": 0.357421875, + "loss_num": 0.0250244140625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 148350012, + "step": 2648 + }, + { + "epoch": 5.8997772828507795, + "grad_norm": 16.207386016845703, + "learning_rate": 1e-06, + "loss": 0.7109, + "num_input_tokens_seen": 148405388, + "step": 2649 + }, + { + "epoch": 5.8997772828507795, + "loss": 0.6293390989303589, + "loss_ce": 0.00018869154155254364, + "loss_iou": 0.275390625, + "loss_num": 0.0159912109375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 148405388, + "step": 2649 + }, + { + "epoch": 5.902004454342984, + "grad_norm": 16.40260124206543, + "learning_rate": 1e-06, + "loss": 0.4936, + "num_input_tokens_seen": 148462940, + "step": 2650 + }, + { + "epoch": 5.902004454342984, + "loss": 0.45450687408447266, + "loss_ce": 0.00016120026702992618, + "loss_iou": 0.1953125, + "loss_num": 0.01287841796875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 148462940, + "step": 2650 + }, + { + "epoch": 5.904231625835189, + "grad_norm": 22.90458869934082, + "learning_rate": 1e-06, + "loss": 0.7208, + "num_input_tokens_seen": 148518860, + "step": 2651 + }, + { + "epoch": 5.904231625835189, + "loss": 0.7937043309211731, + "loss_ce": 0.00024726998526602983, + "loss_iou": 0.34375, + "loss_num": 0.020751953125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 148518860, + "step": 2651 + }, + { + "epoch": 5.906458797327394, + "grad_norm": 15.67951488494873, + "learning_rate": 1e-06, + "loss": 0.478, + "num_input_tokens_seen": 148577372, + "step": 2652 + }, + { + "epoch": 5.906458797327394, + "loss": 0.41467005014419556, + "loss_ce": 0.0002413251786492765, + "loss_iou": 0.1962890625, + "loss_num": 0.00433349609375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 148577372, + "step": 2652 + }, + { + "epoch": 5.908685968819599, + "grad_norm": 23.35599708557129, + "learning_rate": 1e-06, + "loss": 0.698, + "num_input_tokens_seen": 148633988, + "step": 2653 + }, + { + "epoch": 5.908685968819599, + "loss": 0.7189466953277588, + "loss_ce": 0.0001966924173757434, + "loss_iou": 0.306640625, + "loss_num": 0.0211181640625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 148633988, + "step": 2653 + }, + { + "epoch": 5.910913140311804, + "grad_norm": 22.419492721557617, + "learning_rate": 1e-06, + "loss": 0.8223, + "num_input_tokens_seen": 148686904, + "step": 2654 + }, + { + "epoch": 5.910913140311804, + "loss": 0.5142409205436707, + "loss_ce": 0.0003248959837947041, + "loss_iou": 0.2138671875, + "loss_num": 0.01708984375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 148686904, + "step": 2654 + }, + { + "epoch": 5.913140311804009, + "grad_norm": 22.227079391479492, + "learning_rate": 1e-06, + "loss": 0.5734, + "num_input_tokens_seen": 148741860, + "step": 2655 + }, + { + "epoch": 5.913140311804009, + "loss": 0.6701195240020752, + "loss_ce": 0.00019767896446865052, + "loss_iou": 0.2578125, + "loss_num": 0.0308837890625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 148741860, + "step": 2655 + }, + { + "epoch": 5.9153674832962135, + "grad_norm": 24.240558624267578, + "learning_rate": 1e-06, + "loss": 0.7875, + "num_input_tokens_seen": 148798228, + "step": 2656 + }, + { + "epoch": 5.9153674832962135, + "loss": 0.7565721869468689, + "loss_ce": 0.0002245493233203888, + "loss_iou": 0.3203125, + "loss_num": 0.0228271484375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 148798228, + "step": 2656 + }, + { + "epoch": 5.917594654788418, + "grad_norm": 21.239978790283203, + "learning_rate": 1e-06, + "loss": 0.8062, + "num_input_tokens_seen": 148852728, + "step": 2657 + }, + { + "epoch": 5.917594654788418, + "loss": 0.9770278930664062, + "loss_ce": 0.00022124522365629673, + "loss_iou": 0.40234375, + "loss_num": 0.034423828125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 148852728, + "step": 2657 + }, + { + "epoch": 5.919821826280623, + "grad_norm": 21.198122024536133, + "learning_rate": 1e-06, + "loss": 0.5775, + "num_input_tokens_seen": 148910144, + "step": 2658 + }, + { + "epoch": 5.919821826280623, + "loss": 0.5937119126319885, + "loss_ce": 0.00020606812904588878, + "loss_iou": 0.25, + "loss_num": 0.0186767578125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 148910144, + "step": 2658 + }, + { + "epoch": 5.922048997772828, + "grad_norm": 21.191734313964844, + "learning_rate": 1e-06, + "loss": 0.6275, + "num_input_tokens_seen": 148965064, + "step": 2659 + }, + { + "epoch": 5.922048997772828, + "loss": 0.5399864912033081, + "loss_ce": 0.0001916133624035865, + "loss_iou": 0.244140625, + "loss_num": 0.01031494140625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 148965064, + "step": 2659 + }, + { + "epoch": 5.924276169265033, + "grad_norm": 22.334716796875, + "learning_rate": 1e-06, + "loss": 0.881, + "num_input_tokens_seen": 149019216, + "step": 2660 + }, + { + "epoch": 5.924276169265033, + "loss": 0.7107348442077637, + "loss_ce": 0.00022451579570770264, + "loss_iou": 0.298828125, + "loss_num": 0.0228271484375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 149019216, + "step": 2660 + }, + { + "epoch": 5.926503340757238, + "grad_norm": 26.160076141357422, + "learning_rate": 1e-06, + "loss": 0.6856, + "num_input_tokens_seen": 149076832, + "step": 2661 + }, + { + "epoch": 5.926503340757238, + "loss": 0.7112481594085693, + "loss_ce": 0.0001885854871943593, + "loss_iou": 0.294921875, + "loss_num": 0.0245361328125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 149076832, + "step": 2661 + }, + { + "epoch": 5.928730512249444, + "grad_norm": 35.00899887084961, + "learning_rate": 1e-06, + "loss": 0.7615, + "num_input_tokens_seen": 149133992, + "step": 2662 + }, + { + "epoch": 5.928730512249444, + "loss": 0.8671593070030212, + "loss_ce": 0.00021591814584098756, + "loss_iou": 0.396484375, + "loss_num": 0.01483154296875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 149133992, + "step": 2662 + }, + { + "epoch": 5.9309576837416484, + "grad_norm": 23.221233367919922, + "learning_rate": 1e-06, + "loss": 0.6566, + "num_input_tokens_seen": 149190436, + "step": 2663 + }, + { + "epoch": 5.9309576837416484, + "loss": 0.43981656432151794, + "loss_ce": 0.0002413657057331875, + "loss_iou": 0.1943359375, + "loss_num": 0.010009765625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 149190436, + "step": 2663 + }, + { + "epoch": 5.933184855233853, + "grad_norm": 19.708267211914062, + "learning_rate": 1e-06, + "loss": 0.7044, + "num_input_tokens_seen": 149247672, + "step": 2664 + }, + { + "epoch": 5.933184855233853, + "loss": 0.8448212742805481, + "loss_ce": 0.00021680007921531796, + "loss_iou": 0.375, + "loss_num": 0.018798828125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 149247672, + "step": 2664 + }, + { + "epoch": 5.935412026726058, + "grad_norm": 15.226134300231934, + "learning_rate": 1e-06, + "loss": 0.5869, + "num_input_tokens_seen": 149305572, + "step": 2665 + }, + { + "epoch": 5.935412026726058, + "loss": 0.5407141447067261, + "loss_ce": 0.00018680887296795845, + "loss_iou": 0.2333984375, + "loss_num": 0.0146484375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 149305572, + "step": 2665 + }, + { + "epoch": 5.937639198218263, + "grad_norm": 18.039369583129883, + "learning_rate": 1e-06, + "loss": 0.6593, + "num_input_tokens_seen": 149363100, + "step": 2666 + }, + { + "epoch": 5.937639198218263, + "loss": 0.4833727180957794, + "loss_ce": 0.00021840460249222815, + "loss_iou": 0.21875, + "loss_num": 0.0091552734375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 149363100, + "step": 2666 + }, + { + "epoch": 5.939866369710468, + "grad_norm": 14.021678924560547, + "learning_rate": 1e-06, + "loss": 0.9587, + "num_input_tokens_seen": 149418140, + "step": 2667 + }, + { + "epoch": 5.939866369710468, + "loss": 0.6920410990715027, + "loss_ce": 0.0002686180523596704, + "loss_iou": 0.306640625, + "loss_num": 0.0159912109375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 149418140, + "step": 2667 + }, + { + "epoch": 5.942093541202673, + "grad_norm": 17.604162216186523, + "learning_rate": 1e-06, + "loss": 0.5922, + "num_input_tokens_seen": 149474312, + "step": 2668 + }, + { + "epoch": 5.942093541202673, + "loss": 0.5115793347358704, + "loss_ce": 0.0002268127864226699, + "loss_iou": 0.2353515625, + "loss_num": 0.00811767578125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 149474312, + "step": 2668 + }, + { + "epoch": 5.944320712694878, + "grad_norm": 23.896390914916992, + "learning_rate": 1e-06, + "loss": 0.7063, + "num_input_tokens_seen": 149530356, + "step": 2669 + }, + { + "epoch": 5.944320712694878, + "loss": 0.5715218186378479, + "loss_ce": 0.00023274502018466592, + "loss_iou": 0.2490234375, + "loss_num": 0.0145263671875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 149530356, + "step": 2669 + }, + { + "epoch": 5.9465478841870825, + "grad_norm": 20.610628128051758, + "learning_rate": 1e-06, + "loss": 0.721, + "num_input_tokens_seen": 149587276, + "step": 2670 + }, + { + "epoch": 5.9465478841870825, + "loss": 0.7370386123657227, + "loss_ce": 0.00022224214626476169, + "loss_iou": 0.328125, + "loss_num": 0.0162353515625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 149587276, + "step": 2670 + }, + { + "epoch": 5.948775055679287, + "grad_norm": 18.218326568603516, + "learning_rate": 1e-06, + "loss": 0.8538, + "num_input_tokens_seen": 149643160, + "step": 2671 + }, + { + "epoch": 5.948775055679287, + "loss": 1.0529261827468872, + "loss_ce": 0.00043590826680883765, + "loss_iou": 0.40625, + "loss_num": 0.0478515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 149643160, + "step": 2671 + }, + { + "epoch": 5.951002227171492, + "grad_norm": 20.644004821777344, + "learning_rate": 1e-06, + "loss": 0.7755, + "num_input_tokens_seen": 149697316, + "step": 2672 + }, + { + "epoch": 5.951002227171492, + "loss": 0.9467422962188721, + "loss_ce": 0.0003311632899567485, + "loss_iou": 0.357421875, + "loss_num": 0.04638671875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 149697316, + "step": 2672 + }, + { + "epoch": 5.953229398663697, + "grad_norm": 15.27214527130127, + "learning_rate": 1e-06, + "loss": 0.5603, + "num_input_tokens_seen": 149754696, + "step": 2673 + }, + { + "epoch": 5.953229398663697, + "loss": 0.5694996118545532, + "loss_ce": 0.00040784955490380526, + "loss_iou": 0.23828125, + "loss_num": 0.0186767578125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 149754696, + "step": 2673 + }, + { + "epoch": 5.955456570155902, + "grad_norm": 18.75067138671875, + "learning_rate": 1e-06, + "loss": 0.7863, + "num_input_tokens_seen": 149809432, + "step": 2674 + }, + { + "epoch": 5.955456570155902, + "loss": 0.8103808760643005, + "loss_ce": 0.00032227032352238894, + "loss_iou": 0.365234375, + "loss_num": 0.01556396484375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 149809432, + "step": 2674 + }, + { + "epoch": 5.957683741648107, + "grad_norm": 25.840362548828125, + "learning_rate": 1e-06, + "loss": 0.8198, + "num_input_tokens_seen": 149865132, + "step": 2675 + }, + { + "epoch": 5.957683741648107, + "loss": 0.85858553647995, + "loss_ce": 0.00018709682626649737, + "loss_iou": 0.384765625, + "loss_num": 0.0177001953125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 149865132, + "step": 2675 + }, + { + "epoch": 5.959910913140312, + "grad_norm": 15.168061256408691, + "learning_rate": 1e-06, + "loss": 0.5927, + "num_input_tokens_seen": 149921252, + "step": 2676 + }, + { + "epoch": 5.959910913140312, + "loss": 0.5525012016296387, + "loss_ce": 0.00025514926528558135, + "loss_iou": 0.2412109375, + "loss_num": 0.01385498046875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 149921252, + "step": 2676 + }, + { + "epoch": 5.9621380846325165, + "grad_norm": 359.7155456542969, + "learning_rate": 1e-06, + "loss": 0.8264, + "num_input_tokens_seen": 149975912, + "step": 2677 + }, + { + "epoch": 5.9621380846325165, + "loss": 0.7250722646713257, + "loss_ce": 0.00021878087136428803, + "loss_iou": 0.30859375, + "loss_num": 0.0218505859375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 149975912, + "step": 2677 + }, + { + "epoch": 5.964365256124721, + "grad_norm": 20.11498260498047, + "learning_rate": 1e-06, + "loss": 0.6048, + "num_input_tokens_seen": 150032272, + "step": 2678 + }, + { + "epoch": 5.964365256124721, + "loss": 0.6044542789459229, + "loss_ce": 0.00020618733833543956, + "loss_iou": 0.255859375, + "loss_num": 0.018310546875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 150032272, + "step": 2678 + }, + { + "epoch": 5.966592427616926, + "grad_norm": 40.01982116699219, + "learning_rate": 1e-06, + "loss": 0.6705, + "num_input_tokens_seen": 150089664, + "step": 2679 + }, + { + "epoch": 5.966592427616926, + "loss": 0.48896247148513794, + "loss_ce": 0.00019291002536192536, + "loss_iou": 0.1884765625, + "loss_num": 0.0223388671875, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 150089664, + "step": 2679 + }, + { + "epoch": 5.968819599109131, + "grad_norm": 13.833683013916016, + "learning_rate": 1e-06, + "loss": 0.5939, + "num_input_tokens_seen": 150145520, + "step": 2680 + }, + { + "epoch": 5.968819599109131, + "loss": 0.5350244641304016, + "loss_ce": 0.000356461969204247, + "loss_iou": 0.236328125, + "loss_num": 0.0125732421875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 150145520, + "step": 2680 + }, + { + "epoch": 5.971046770601336, + "grad_norm": 28.182466506958008, + "learning_rate": 1e-06, + "loss": 0.9304, + "num_input_tokens_seen": 150198876, + "step": 2681 + }, + { + "epoch": 5.971046770601336, + "loss": 0.5284985899925232, + "loss_ce": 0.00017829591524787247, + "loss_iou": 0.220703125, + "loss_num": 0.017333984375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 150198876, + "step": 2681 + }, + { + "epoch": 5.973273942093542, + "grad_norm": 15.853994369506836, + "learning_rate": 1e-06, + "loss": 0.6288, + "num_input_tokens_seen": 150255024, + "step": 2682 + }, + { + "epoch": 5.973273942093542, + "loss": 0.5141396522521973, + "loss_ce": 0.0004677815013565123, + "loss_iou": 0.1982421875, + "loss_num": 0.023681640625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 150255024, + "step": 2682 + }, + { + "epoch": 5.9755011135857465, + "grad_norm": 19.383615493774414, + "learning_rate": 1e-06, + "loss": 0.9422, + "num_input_tokens_seen": 150309836, + "step": 2683 + }, + { + "epoch": 5.9755011135857465, + "loss": 0.8364056944847107, + "loss_ce": 0.00022408382210414857, + "loss_iou": 0.33203125, + "loss_num": 0.03466796875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 150309836, + "step": 2683 + }, + { + "epoch": 5.977728285077951, + "grad_norm": 23.231369018554688, + "learning_rate": 1e-06, + "loss": 0.7372, + "num_input_tokens_seen": 150365848, + "step": 2684 + }, + { + "epoch": 5.977728285077951, + "loss": 0.8447408676147461, + "loss_ce": 0.0002584208268672228, + "loss_iou": 0.34765625, + "loss_num": 0.02978515625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 150365848, + "step": 2684 + }, + { + "epoch": 5.979955456570156, + "grad_norm": 22.7137393951416, + "learning_rate": 1e-06, + "loss": 0.6089, + "num_input_tokens_seen": 150423628, + "step": 2685 + }, + { + "epoch": 5.979955456570156, + "loss": 0.5462290048599243, + "loss_ce": 0.00020853537716902792, + "loss_iou": 0.23828125, + "loss_num": 0.0137939453125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 150423628, + "step": 2685 + }, + { + "epoch": 5.982182628062361, + "grad_norm": 24.549579620361328, + "learning_rate": 1e-06, + "loss": 0.7922, + "num_input_tokens_seen": 150478840, + "step": 2686 + }, + { + "epoch": 5.982182628062361, + "loss": 0.8291885256767273, + "loss_ce": 0.00020902017422486097, + "loss_iou": 0.373046875, + "loss_num": 0.0166015625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 150478840, + "step": 2686 + }, + { + "epoch": 5.984409799554566, + "grad_norm": 26.245058059692383, + "learning_rate": 1e-06, + "loss": 0.6099, + "num_input_tokens_seen": 150534692, + "step": 2687 + }, + { + "epoch": 5.984409799554566, + "loss": 0.49533137679100037, + "loss_ce": 0.00021419850236270577, + "loss_iou": 0.224609375, + "loss_num": 0.00909423828125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 150534692, + "step": 2687 + }, + { + "epoch": 5.986636971046771, + "grad_norm": 24.997560501098633, + "learning_rate": 1e-06, + "loss": 0.5429, + "num_input_tokens_seen": 150591508, + "step": 2688 + }, + { + "epoch": 5.986636971046771, + "loss": 0.5927173495292664, + "loss_ce": 0.00031011985265649855, + "loss_iou": 0.267578125, + "loss_num": 0.01177978515625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 150591508, + "step": 2688 + }, + { + "epoch": 5.988864142538976, + "grad_norm": 16.52888298034668, + "learning_rate": 1e-06, + "loss": 0.7054, + "num_input_tokens_seen": 150648216, + "step": 2689 + }, + { + "epoch": 5.988864142538976, + "loss": 0.44299080967903137, + "loss_ce": 0.00022651677136309445, + "loss_iou": 0.171875, + "loss_num": 0.0198974609375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 150648216, + "step": 2689 + }, + { + "epoch": 5.991091314031181, + "grad_norm": 19.582372665405273, + "learning_rate": 1e-06, + "loss": 0.901, + "num_input_tokens_seen": 150705220, + "step": 2690 + }, + { + "epoch": 5.991091314031181, + "loss": 0.9914488196372986, + "loss_ce": 0.00023790652630850673, + "loss_iou": 0.43359375, + "loss_num": 0.0244140625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 150705220, + "step": 2690 + }, + { + "epoch": 5.993318485523385, + "grad_norm": 19.67958641052246, + "learning_rate": 1e-06, + "loss": 0.7656, + "num_input_tokens_seen": 150759608, + "step": 2691 + }, + { + "epoch": 5.993318485523385, + "loss": 0.586168110370636, + "loss_ce": 0.00023060785315465182, + "loss_iou": 0.2431640625, + "loss_num": 0.0198974609375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 150759608, + "step": 2691 + }, + { + "epoch": 5.99554565701559, + "grad_norm": 15.20195198059082, + "learning_rate": 1e-06, + "loss": 0.6715, + "num_input_tokens_seen": 150816908, + "step": 2692 + }, + { + "epoch": 5.99554565701559, + "loss": 0.6716079115867615, + "loss_ce": 0.00022117490880191326, + "loss_iou": 0.28515625, + "loss_num": 0.020263671875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 150816908, + "step": 2692 + }, + { + "epoch": 5.997772828507795, + "grad_norm": 25.80984878540039, + "learning_rate": 1e-06, + "loss": 0.8853, + "num_input_tokens_seen": 150872492, + "step": 2693 + }, + { + "epoch": 5.997772828507795, + "loss": 1.047957181930542, + "loss_ce": 0.00034974192385561764, + "loss_iou": 0.43359375, + "loss_num": 0.0361328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 150872492, + "step": 2693 + }, + { + "epoch": 6.0, + "grad_norm": 18.474491119384766, + "learning_rate": 1e-06, + "loss": 0.8475, + "num_input_tokens_seen": 150929208, + "step": 2694 + }, + { + "epoch": 6.0, + "loss": 1.0649967193603516, + "loss_ce": 0.0002994451788254082, + "loss_iou": 0.4453125, + "loss_num": 0.034912109375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 150929208, + "step": 2694 + }, + { + "epoch": 6.002227171492205, + "grad_norm": 25.486400604248047, + "learning_rate": 1e-06, + "loss": 0.7665, + "num_input_tokens_seen": 150983692, + "step": 2695 + }, + { + "epoch": 6.002227171492205, + "loss": 0.7017991542816162, + "loss_ce": 0.0002610695082694292, + "loss_iou": 0.283203125, + "loss_num": 0.0269775390625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 150983692, + "step": 2695 + }, + { + "epoch": 6.00445434298441, + "grad_norm": 15.804261207580566, + "learning_rate": 1e-06, + "loss": 0.621, + "num_input_tokens_seen": 151040928, + "step": 2696 + }, + { + "epoch": 6.00445434298441, + "loss": 0.6565641164779663, + "loss_ce": 0.000192045554285869, + "loss_iou": 0.28125, + "loss_num": 0.0185546875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 151040928, + "step": 2696 + }, + { + "epoch": 6.006681514476615, + "grad_norm": 18.53908348083496, + "learning_rate": 1e-06, + "loss": 0.7812, + "num_input_tokens_seen": 151095388, + "step": 2697 + }, + { + "epoch": 6.006681514476615, + "loss": 0.8815810084342957, + "loss_ce": 0.00023334722209256142, + "loss_iou": 0.369140625, + "loss_num": 0.02880859375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 151095388, + "step": 2697 + }, + { + "epoch": 6.008908685968819, + "grad_norm": 23.245758056640625, + "learning_rate": 1e-06, + "loss": 0.7773, + "num_input_tokens_seen": 151148968, + "step": 2698 + }, + { + "epoch": 6.008908685968819, + "loss": 0.8959696292877197, + "loss_ce": 0.00021769374143332243, + "loss_iou": 0.35546875, + "loss_num": 0.03759765625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 151148968, + "step": 2698 + }, + { + "epoch": 6.011135857461024, + "grad_norm": 13.859575271606445, + "learning_rate": 1e-06, + "loss": 0.54, + "num_input_tokens_seen": 151205420, + "step": 2699 + }, + { + "epoch": 6.011135857461024, + "loss": 0.5023019313812256, + "loss_ce": 0.00022672602790407836, + "loss_iou": 0.21484375, + "loss_num": 0.01434326171875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 151205420, + "step": 2699 + }, + { + "epoch": 6.013363028953229, + "grad_norm": 17.814104080200195, + "learning_rate": 1e-06, + "loss": 0.5899, + "num_input_tokens_seen": 151262892, + "step": 2700 + }, + { + "epoch": 6.013363028953229, + "loss": 0.6286102533340454, + "loss_ce": 0.0001923007657751441, + "loss_iou": 0.255859375, + "loss_num": 0.02294921875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 151262892, + "step": 2700 + }, + { + "epoch": 6.015590200445434, + "grad_norm": 20.40790557861328, + "learning_rate": 1e-06, + "loss": 0.7099, + "num_input_tokens_seen": 151316164, + "step": 2701 + }, + { + "epoch": 6.015590200445434, + "loss": 0.8200287818908691, + "loss_ce": 0.00020452812896110117, + "loss_iou": 0.34765625, + "loss_num": 0.025146484375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 151316164, + "step": 2701 + }, + { + "epoch": 6.017817371937639, + "grad_norm": 19.481016159057617, + "learning_rate": 1e-06, + "loss": 0.6958, + "num_input_tokens_seen": 151371160, + "step": 2702 + }, + { + "epoch": 6.017817371937639, + "loss": 0.6471884250640869, + "loss_ce": 0.00021577253937721252, + "loss_iou": 0.25, + "loss_num": 0.0296630859375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 151371160, + "step": 2702 + }, + { + "epoch": 6.020044543429844, + "grad_norm": 32.87615203857422, + "learning_rate": 1e-06, + "loss": 0.7168, + "num_input_tokens_seen": 151428440, + "step": 2703 + }, + { + "epoch": 6.020044543429844, + "loss": 0.737989604473114, + "loss_ce": 0.00019663787679746747, + "loss_iou": 0.330078125, + "loss_num": 0.015625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 151428440, + "step": 2703 + }, + { + "epoch": 6.022271714922049, + "grad_norm": 11.81971263885498, + "learning_rate": 1e-06, + "loss": 0.6217, + "num_input_tokens_seen": 151483012, + "step": 2704 + }, + { + "epoch": 6.022271714922049, + "loss": 0.6774269342422485, + "loss_ce": 0.00018085945339407772, + "loss_iou": 0.291015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 151483012, + "step": 2704 + }, + { + "epoch": 6.0244988864142535, + "grad_norm": 30.31116485595703, + "learning_rate": 1e-06, + "loss": 0.6544, + "num_input_tokens_seen": 151538432, + "step": 2705 + }, + { + "epoch": 6.0244988864142535, + "loss": 0.8250452280044556, + "loss_ce": 0.00033821084070950747, + "loss_iou": 0.341796875, + "loss_num": 0.0281982421875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 151538432, + "step": 2705 + }, + { + "epoch": 6.026726057906459, + "grad_norm": 18.400726318359375, + "learning_rate": 1e-06, + "loss": 0.5954, + "num_input_tokens_seen": 151594204, + "step": 2706 + }, + { + "epoch": 6.026726057906459, + "loss": 0.5231574177742004, + "loss_ce": 0.00020822283113375306, + "loss_iou": 0.21875, + "loss_num": 0.016845703125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 151594204, + "step": 2706 + }, + { + "epoch": 6.028953229398664, + "grad_norm": 15.375958442687988, + "learning_rate": 1e-06, + "loss": 0.7347, + "num_input_tokens_seen": 151651444, + "step": 2707 + }, + { + "epoch": 6.028953229398664, + "loss": 0.5390880107879639, + "loss_ce": 0.00026969151804223657, + "loss_iou": 0.2333984375, + "loss_num": 0.0146484375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 151651444, + "step": 2707 + }, + { + "epoch": 6.031180400890869, + "grad_norm": 20.725507736206055, + "learning_rate": 1e-06, + "loss": 0.546, + "num_input_tokens_seen": 151708908, + "step": 2708 + }, + { + "epoch": 6.031180400890869, + "loss": 0.49738240242004395, + "loss_ce": 0.0009224280365742743, + "loss_iou": 0.203125, + "loss_num": 0.01806640625, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 151708908, + "step": 2708 + }, + { + "epoch": 6.033407572383074, + "grad_norm": 21.394176483154297, + "learning_rate": 1e-06, + "loss": 0.4111, + "num_input_tokens_seen": 151765488, + "step": 2709 + }, + { + "epoch": 6.033407572383074, + "loss": 0.4496401846408844, + "loss_ce": 0.0001773049880284816, + "loss_iou": 0.1826171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 151765488, + "step": 2709 + }, + { + "epoch": 6.035634743875279, + "grad_norm": 26.451292037963867, + "learning_rate": 1e-06, + "loss": 0.5001, + "num_input_tokens_seen": 151820864, + "step": 2710 + }, + { + "epoch": 6.035634743875279, + "loss": 0.41302353143692017, + "loss_ce": 0.0001817169541027397, + "loss_iou": 0.177734375, + "loss_num": 0.0115966796875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 151820864, + "step": 2710 + }, + { + "epoch": 6.0378619153674835, + "grad_norm": 18.717567443847656, + "learning_rate": 1e-06, + "loss": 0.6072, + "num_input_tokens_seen": 151879976, + "step": 2711 + }, + { + "epoch": 6.0378619153674835, + "loss": 0.7275397181510925, + "loss_ce": 0.00024480142747052014, + "loss_iou": 0.31640625, + "loss_num": 0.0191650390625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 151879976, + "step": 2711 + }, + { + "epoch": 6.040089086859688, + "grad_norm": 15.812241554260254, + "learning_rate": 1e-06, + "loss": 0.6669, + "num_input_tokens_seen": 151938396, + "step": 2712 + }, + { + "epoch": 6.040089086859688, + "loss": 0.5586850047111511, + "loss_ce": 0.00021330438903532922, + "loss_iou": 0.2275390625, + "loss_num": 0.020751953125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 151938396, + "step": 2712 + }, + { + "epoch": 6.042316258351893, + "grad_norm": 28.703325271606445, + "learning_rate": 1e-06, + "loss": 1.0115, + "num_input_tokens_seen": 151992472, + "step": 2713 + }, + { + "epoch": 6.042316258351893, + "loss": 0.8724164366722107, + "loss_ce": 0.0003461412852630019, + "loss_iou": 0.384765625, + "loss_num": 0.020263671875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 151992472, + "step": 2713 + }, + { + "epoch": 6.044543429844098, + "grad_norm": 21.311323165893555, + "learning_rate": 1e-06, + "loss": 0.9992, + "num_input_tokens_seen": 152046528, + "step": 2714 + }, + { + "epoch": 6.044543429844098, + "loss": 0.9513915777206421, + "loss_ce": 0.00021972534887026995, + "loss_iou": 0.408203125, + "loss_num": 0.0269775390625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 152046528, + "step": 2714 + }, + { + "epoch": 6.046770601336303, + "grad_norm": 16.139442443847656, + "learning_rate": 1e-06, + "loss": 0.5864, + "num_input_tokens_seen": 152103856, + "step": 2715 + }, + { + "epoch": 6.046770601336303, + "loss": 0.7112746238708496, + "loss_ce": 0.00021506489429157227, + "loss_iou": 0.306640625, + "loss_num": 0.01953125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 152103856, + "step": 2715 + }, + { + "epoch": 6.048997772828508, + "grad_norm": 18.766185760498047, + "learning_rate": 1e-06, + "loss": 0.6731, + "num_input_tokens_seen": 152161252, + "step": 2716 + }, + { + "epoch": 6.048997772828508, + "loss": 0.6426092386245728, + "loss_ce": 0.0002752981963567436, + "loss_iou": 0.283203125, + "loss_num": 0.01483154296875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 152161252, + "step": 2716 + }, + { + "epoch": 6.051224944320713, + "grad_norm": 20.689594268798828, + "learning_rate": 1e-06, + "loss": 0.6403, + "num_input_tokens_seen": 152219268, + "step": 2717 + }, + { + "epoch": 6.051224944320713, + "loss": 0.6734442114830017, + "loss_ce": 0.00022642976546194404, + "loss_iou": 0.291015625, + "loss_num": 0.0181884765625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 152219268, + "step": 2717 + }, + { + "epoch": 6.0534521158129175, + "grad_norm": 20.202369689941406, + "learning_rate": 1e-06, + "loss": 0.9098, + "num_input_tokens_seen": 152275040, + "step": 2718 + }, + { + "epoch": 6.0534521158129175, + "loss": 1.088090181350708, + "loss_ce": 0.00019952133879996836, + "loss_iou": 0.462890625, + "loss_num": 0.032470703125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 152275040, + "step": 2718 + }, + { + "epoch": 6.055679287305122, + "grad_norm": 24.603801727294922, + "learning_rate": 1e-06, + "loss": 0.5698, + "num_input_tokens_seen": 152329728, + "step": 2719 + }, + { + "epoch": 6.055679287305122, + "loss": 0.7060960531234741, + "loss_ce": 0.0002855417551472783, + "loss_iou": 0.3125, + "loss_num": 0.0164794921875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 152329728, + "step": 2719 + }, + { + "epoch": 6.057906458797327, + "grad_norm": 17.9193172454834, + "learning_rate": 1e-06, + "loss": 0.528, + "num_input_tokens_seen": 152388868, + "step": 2720 + }, + { + "epoch": 6.057906458797327, + "loss": 0.4527170658111572, + "loss_ce": 0.00020240643061697483, + "loss_iou": 0.17578125, + "loss_num": 0.02001953125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 152388868, + "step": 2720 + }, + { + "epoch": 6.060133630289532, + "grad_norm": 16.661762237548828, + "learning_rate": 1e-06, + "loss": 0.6847, + "num_input_tokens_seen": 152442380, + "step": 2721 + }, + { + "epoch": 6.060133630289532, + "loss": 0.6767435073852539, + "loss_ce": 0.0007180861430242658, + "loss_iou": 0.263671875, + "loss_num": 0.030029296875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 152442380, + "step": 2721 + }, + { + "epoch": 6.062360801781737, + "grad_norm": 16.014999389648438, + "learning_rate": 1e-06, + "loss": 0.5119, + "num_input_tokens_seen": 152499432, + "step": 2722 + }, + { + "epoch": 6.062360801781737, + "loss": 0.574461817741394, + "loss_ce": 0.000243054106249474, + "loss_iou": 0.2431640625, + "loss_num": 0.017822265625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 152499432, + "step": 2722 + }, + { + "epoch": 6.064587973273942, + "grad_norm": 39.888118743896484, + "learning_rate": 1e-06, + "loss": 0.8712, + "num_input_tokens_seen": 152551644, + "step": 2723 + }, + { + "epoch": 6.064587973273942, + "loss": 0.9099514484405518, + "loss_ce": 0.0002834274200722575, + "loss_iou": 0.38671875, + "loss_num": 0.0277099609375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 152551644, + "step": 2723 + }, + { + "epoch": 6.066815144766147, + "grad_norm": 16.595260620117188, + "learning_rate": 1e-06, + "loss": 0.6684, + "num_input_tokens_seen": 152608960, + "step": 2724 + }, + { + "epoch": 6.066815144766147, + "loss": 0.7639758586883545, + "loss_ce": 0.0001819009194150567, + "loss_iou": 0.283203125, + "loss_num": 0.03955078125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 152608960, + "step": 2724 + }, + { + "epoch": 6.0690423162583516, + "grad_norm": 15.64448070526123, + "learning_rate": 1e-06, + "loss": 0.6383, + "num_input_tokens_seen": 152661612, + "step": 2725 + }, + { + "epoch": 6.0690423162583516, + "loss": 0.5945534706115723, + "loss_ce": 0.0003151525743305683, + "loss_iou": 0.234375, + "loss_num": 0.02490234375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 152661612, + "step": 2725 + }, + { + "epoch": 6.071269487750556, + "grad_norm": 14.511089324951172, + "learning_rate": 1e-06, + "loss": 0.7527, + "num_input_tokens_seen": 152717140, + "step": 2726 + }, + { + "epoch": 6.071269487750556, + "loss": 0.7612234354019165, + "loss_ce": 0.00023707791115157306, + "loss_iou": 0.337890625, + "loss_num": 0.0166015625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 152717140, + "step": 2726 + }, + { + "epoch": 6.073496659242761, + "grad_norm": 23.56868553161621, + "learning_rate": 1e-06, + "loss": 0.69, + "num_input_tokens_seen": 152772512, + "step": 2727 + }, + { + "epoch": 6.073496659242761, + "loss": 0.8910683393478394, + "loss_ce": 0.00019922200590372086, + "loss_iou": 0.373046875, + "loss_num": 0.029296875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 152772512, + "step": 2727 + }, + { + "epoch": 6.075723830734967, + "grad_norm": 15.447635650634766, + "learning_rate": 1e-06, + "loss": 0.5218, + "num_input_tokens_seen": 152828880, + "step": 2728 + }, + { + "epoch": 6.075723830734967, + "loss": 0.5673261284828186, + "loss_ce": 0.00018746175919659436, + "loss_iou": 0.21875, + "loss_num": 0.0257568359375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 152828880, + "step": 2728 + }, + { + "epoch": 6.077951002227172, + "grad_norm": 24.025293350219727, + "learning_rate": 1e-06, + "loss": 0.862, + "num_input_tokens_seen": 152884388, + "step": 2729 + }, + { + "epoch": 6.077951002227172, + "loss": 0.7360300421714783, + "loss_ce": 0.00019018063903786242, + "loss_iou": 0.32421875, + "loss_num": 0.01708984375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 152884388, + "step": 2729 + }, + { + "epoch": 6.080178173719377, + "grad_norm": 16.803165435791016, + "learning_rate": 1e-06, + "loss": 0.8949, + "num_input_tokens_seen": 152942756, + "step": 2730 + }, + { + "epoch": 6.080178173719377, + "loss": 0.9147862195968628, + "loss_ce": 0.00023543770657852292, + "loss_iou": 0.349609375, + "loss_num": 0.042724609375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 152942756, + "step": 2730 + }, + { + "epoch": 6.082405345211582, + "grad_norm": 18.61233139038086, + "learning_rate": 1e-06, + "loss": 0.6134, + "num_input_tokens_seen": 152998352, + "step": 2731 + }, + { + "epoch": 6.082405345211582, + "loss": 0.46365272998809814, + "loss_ce": 0.0004569324664771557, + "loss_iou": 0.2041015625, + "loss_num": 0.0108642578125, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 152998352, + "step": 2731 + }, + { + "epoch": 6.0846325167037865, + "grad_norm": 22.512348175048828, + "learning_rate": 1e-06, + "loss": 0.8523, + "num_input_tokens_seen": 153052908, + "step": 2732 + }, + { + "epoch": 6.0846325167037865, + "loss": 0.7741538882255554, + "loss_ce": 0.00022813121904619038, + "loss_iou": 0.310546875, + "loss_num": 0.03076171875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 153052908, + "step": 2732 + }, + { + "epoch": 6.086859688195991, + "grad_norm": 36.26032257080078, + "learning_rate": 1e-06, + "loss": 0.8099, + "num_input_tokens_seen": 153107940, + "step": 2733 + }, + { + "epoch": 6.086859688195991, + "loss": 0.8505562543869019, + "loss_ce": 0.00021446403115987778, + "loss_iou": 0.39453125, + "loss_num": 0.0123291015625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 153107940, + "step": 2733 + }, + { + "epoch": 6.089086859688196, + "grad_norm": 23.493366241455078, + "learning_rate": 1e-06, + "loss": 0.7301, + "num_input_tokens_seen": 153166016, + "step": 2734 + }, + { + "epoch": 6.089086859688196, + "loss": 0.6998932361602783, + "loss_ce": 0.00018625493976287544, + "loss_iou": 0.298828125, + "loss_num": 0.020263671875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 153166016, + "step": 2734 + }, + { + "epoch": 6.091314031180401, + "grad_norm": 14.597694396972656, + "learning_rate": 1e-06, + "loss": 0.6806, + "num_input_tokens_seen": 153222732, + "step": 2735 + }, + { + "epoch": 6.091314031180401, + "loss": 0.44916197657585144, + "loss_ce": 0.00018737564096227288, + "loss_iou": 0.1962890625, + "loss_num": 0.01123046875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 153222732, + "step": 2735 + }, + { + "epoch": 6.093541202672606, + "grad_norm": 22.396705627441406, + "learning_rate": 1e-06, + "loss": 0.7905, + "num_input_tokens_seen": 153279676, + "step": 2736 + }, + { + "epoch": 6.093541202672606, + "loss": 0.8422399163246155, + "loss_ce": 0.00019887213420588523, + "loss_iou": 0.361328125, + "loss_num": 0.02392578125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 153279676, + "step": 2736 + }, + { + "epoch": 6.095768374164811, + "grad_norm": 125.70928192138672, + "learning_rate": 1e-06, + "loss": 0.6704, + "num_input_tokens_seen": 153335272, + "step": 2737 + }, + { + "epoch": 6.095768374164811, + "loss": 0.6769283413887024, + "loss_ce": 0.0001705446484265849, + "loss_iou": 0.298828125, + "loss_num": 0.01611328125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 153335272, + "step": 2737 + }, + { + "epoch": 6.097995545657016, + "grad_norm": 16.32554817199707, + "learning_rate": 1e-06, + "loss": 0.6954, + "num_input_tokens_seen": 153390464, + "step": 2738 + }, + { + "epoch": 6.097995545657016, + "loss": 0.9421560764312744, + "loss_ce": 0.0002615359262563288, + "loss_iou": 0.419921875, + "loss_num": 0.0206298828125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 153390464, + "step": 2738 + }, + { + "epoch": 6.1002227171492205, + "grad_norm": 28.368751525878906, + "learning_rate": 1e-06, + "loss": 0.806, + "num_input_tokens_seen": 153448492, + "step": 2739 + }, + { + "epoch": 6.1002227171492205, + "loss": 0.846657395362854, + "loss_ce": 0.00022182743123266846, + "loss_iou": 0.357421875, + "loss_num": 0.0264892578125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 153448492, + "step": 2739 + }, + { + "epoch": 6.102449888641425, + "grad_norm": 16.504480361938477, + "learning_rate": 1e-06, + "loss": 0.651, + "num_input_tokens_seen": 153502140, + "step": 2740 + }, + { + "epoch": 6.102449888641425, + "loss": 0.6750026345252991, + "loss_ce": 0.0001979665830731392, + "loss_iou": 0.298828125, + "loss_num": 0.01531982421875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 153502140, + "step": 2740 + }, + { + "epoch": 6.10467706013363, + "grad_norm": 17.08942413330078, + "learning_rate": 1e-06, + "loss": 0.5972, + "num_input_tokens_seen": 153558592, + "step": 2741 + }, + { + "epoch": 6.10467706013363, + "loss": 0.7375233769416809, + "loss_ce": 0.00021869146439712495, + "loss_iou": 0.29296875, + "loss_num": 0.030029296875, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 153558592, + "step": 2741 + }, + { + "epoch": 6.106904231625835, + "grad_norm": 39.28758239746094, + "learning_rate": 1e-06, + "loss": 0.6328, + "num_input_tokens_seen": 153615344, + "step": 2742 + }, + { + "epoch": 6.106904231625835, + "loss": 0.6711956858634949, + "loss_ce": 0.00029724877094849944, + "loss_iou": 0.287109375, + "loss_num": 0.0194091796875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 153615344, + "step": 2742 + }, + { + "epoch": 6.10913140311804, + "grad_norm": 25.463825225830078, + "learning_rate": 1e-06, + "loss": 0.6359, + "num_input_tokens_seen": 153673836, + "step": 2743 + }, + { + "epoch": 6.10913140311804, + "loss": 0.6788397431373596, + "loss_ce": 0.0002203606127295643, + "loss_iou": 0.3046875, + "loss_num": 0.01385498046875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 153673836, + "step": 2743 + }, + { + "epoch": 6.111358574610245, + "grad_norm": 20.883134841918945, + "learning_rate": 1e-06, + "loss": 0.7075, + "num_input_tokens_seen": 153728812, + "step": 2744 + }, + { + "epoch": 6.111358574610245, + "loss": 0.9753950834274292, + "loss_ce": 0.00029741463367827237, + "loss_iou": 0.43359375, + "loss_num": 0.021728515625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 153728812, + "step": 2744 + }, + { + "epoch": 6.11358574610245, + "grad_norm": 24.50876808166504, + "learning_rate": 1e-06, + "loss": 0.8218, + "num_input_tokens_seen": 153784252, + "step": 2745 + }, + { + "epoch": 6.11358574610245, + "loss": 0.9287269115447998, + "loss_ce": 0.0009925166377797723, + "loss_iou": 0.3671875, + "loss_num": 0.038818359375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 153784252, + "step": 2745 + }, + { + "epoch": 6.1158129175946545, + "grad_norm": 22.58624839782715, + "learning_rate": 1e-06, + "loss": 0.6866, + "num_input_tokens_seen": 153839176, + "step": 2746 + }, + { + "epoch": 6.1158129175946545, + "loss": 0.7910230755805969, + "loss_ce": 0.0002516076201573014, + "loss_iou": 0.33203125, + "loss_num": 0.0247802734375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 153839176, + "step": 2746 + }, + { + "epoch": 6.118040089086859, + "grad_norm": 12.76305103302002, + "learning_rate": 1e-06, + "loss": 0.6154, + "num_input_tokens_seen": 153897360, + "step": 2747 + }, + { + "epoch": 6.118040089086859, + "loss": 0.6281359791755676, + "loss_ce": 0.0002062909334199503, + "loss_iou": 0.291015625, + "loss_num": 0.0089111328125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 153897360, + "step": 2747 + }, + { + "epoch": 6.120267260579064, + "grad_norm": 17.98402214050293, + "learning_rate": 1e-06, + "loss": 0.6908, + "num_input_tokens_seen": 153951844, + "step": 2748 + }, + { + "epoch": 6.120267260579064, + "loss": 0.8605327606201172, + "loss_ce": 0.0001811749825719744, + "loss_iou": 0.34765625, + "loss_num": 0.033203125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 153951844, + "step": 2748 + }, + { + "epoch": 6.122494432071269, + "grad_norm": 19.222530364990234, + "learning_rate": 1e-06, + "loss": 0.7594, + "num_input_tokens_seen": 154006004, + "step": 2749 + }, + { + "epoch": 6.122494432071269, + "loss": 0.7193889617919922, + "loss_ce": 0.0003948350786231458, + "loss_iou": 0.3046875, + "loss_num": 0.02197265625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 154006004, + "step": 2749 + }, + { + "epoch": 6.124721603563474, + "grad_norm": 16.589632034301758, + "learning_rate": 1e-06, + "loss": 0.7099, + "num_input_tokens_seen": 154061564, + "step": 2750 + }, + { + "epoch": 6.124721603563474, + "eval_seeclick_web_CIoU": 0.5717557966709137, + "eval_seeclick_web_GIoU": 0.5666035413742065, + "eval_seeclick_web_IoU": 0.5889425873756409, + "eval_seeclick_web_MAE_all": 0.016641407273709774, + "eval_seeclick_web_MAE_h": 0.009463720256462693, + "eval_seeclick_web_MAE_w": 0.0170047702267766, + "eval_seeclick_web_MAE_x_boxes": 0.009009606204926968, + "eval_seeclick_web_MAE_y_boxes": 0.022332632914185524, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9315496683120728, + "eval_seeclick_web_loss_ce": 0.0002911543124355376, + "eval_seeclick_web_loss_iou": 0.423828125, + "eval_seeclick_web_loss_num": 0.0130767822265625, + "eval_seeclick_web_loss_xval": 0.912841796875, + "eval_seeclick_web_runtime": 26.4518, + "eval_seeclick_web_samples_per_second": 1.89, + "eval_seeclick_web_steps_per_second": 0.076, + "num_input_tokens_seen": 154061564, + "step": 2750 + }, + { + "epoch": 6.124721603563474, + "eval_icons_CIoU": 0.2957966476678848, + "eval_icons_GIoU": 0.3207407593727112, + "eval_icons_IoU": 0.3739467114210129, + "eval_icons_MAE_all": 0.06642757169902325, + "eval_icons_MAE_h": 0.03895352780818939, + "eval_icons_MAE_w": 0.07084468938410282, + "eval_icons_MAE_x_boxes": 0.06239369884133339, + "eval_icons_MAE_y_boxes": 0.03859401401132345, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.7069156169891357, + "eval_icons_loss_ce": 0.00033613041159696877, + "eval_icons_loss_iou": 0.6629638671875, + "eval_icons_loss_num": 0.06306648254394531, + "eval_icons_loss_xval": 1.64111328125, + "eval_icons_runtime": 25.4698, + "eval_icons_samples_per_second": 1.963, + "eval_icons_steps_per_second": 0.079, + "num_input_tokens_seen": 154061564, + "step": 2750 + }, + { + "epoch": 6.124721603563474, + "eval_screenspot_CIoU": 0.3402452568213145, + "eval_screenspot_GIoU": 0.3564106822013855, + "eval_screenspot_IoU": 0.4236031075318654, + "eval_screenspot_MAE_all": 0.06354892750581105, + "eval_screenspot_MAE_h": 0.03823430463671684, + "eval_screenspot_MAE_w": 0.07551725829641025, + "eval_screenspot_MAE_x_boxes": 0.07483576859037082, + "eval_screenspot_MAE_y_boxes": 0.046273874429365, + "eval_screenspot_inside_bbox": 0.659583330154419, + "eval_screenspot_loss": 1.6718511581420898, + "eval_screenspot_loss_ce": 0.00034005365644892055, + "eval_screenspot_loss_iou": 0.6853841145833334, + "eval_screenspot_loss_num": 0.07482655843098958, + "eval_screenspot_loss_xval": 1.7449544270833333, + "eval_screenspot_runtime": 44.3938, + "eval_screenspot_samples_per_second": 2.005, + "eval_screenspot_steps_per_second": 0.068, + "num_input_tokens_seen": 154061564, + "step": 2750 + }, + { + "epoch": 6.124721603563474, + "eval_compot_CIoU": 0.3488970696926117, + "eval_compot_GIoU": 0.363851934671402, + "eval_compot_IoU": 0.40621405839920044, + "eval_compot_MAE_all": 0.017889784649014473, + "eval_compot_MAE_h": 0.008526601362973452, + "eval_compot_MAE_w": 0.021996816620230675, + "eval_compot_MAE_x_boxes": 0.02964367438107729, + "eval_compot_MAE_y_boxes": 0.006707766558974981, + "eval_compot_inside_bbox": 0.6458333432674408, + "eval_compot_loss": 1.3785548210144043, + "eval_compot_loss_ce": 0.0002755048044491559, + "eval_compot_loss_iou": 0.6339111328125, + "eval_compot_loss_num": 0.016744613647460938, + "eval_compot_loss_xval": 1.35205078125, + "eval_compot_runtime": 24.1215, + "eval_compot_samples_per_second": 2.073, + "eval_compot_steps_per_second": 0.083, + "num_input_tokens_seen": 154061564, + "step": 2750 + }, + { + "epoch": 6.124721603563474, + "eval_custom_ui_val_CIoU": 0.4712728477186627, + "eval_custom_ui_val_GIoU": 0.48705925544102985, + "eval_custom_ui_val_IoU": 0.5247346328364478, + "eval_custom_ui_val_MAE_all": 0.03072897769096825, + "eval_custom_ui_val_MAE_h": 0.017728664524232347, + "eval_custom_ui_val_MAE_w": 0.03841559050811662, + "eval_custom_ui_val_MAE_x_boxes": 0.03411081122855345, + "eval_custom_ui_val_MAE_y_boxes": 0.015722092292788956, + "eval_custom_ui_val_inside_bbox": 0.7353395091162788, + "eval_custom_ui_val_loss": 1.1931304931640625, + "eval_custom_ui_val_loss_ce": 0.0003192785694005175, + "eval_custom_ui_val_loss_iou": 0.5072292751736112, + "eval_custom_ui_val_loss_num": 0.02840868631998698, + "eval_custom_ui_val_loss_xval": 1.1561957465277777, + "eval_custom_ui_val_runtime": 72.27, + "eval_custom_ui_val_samples_per_second": 3.667, + "eval_custom_ui_val_steps_per_second": 0.125, + "num_input_tokens_seen": 154061564, + "step": 2750 + }, + { + "epoch": 6.124721603563474, + "loss": 0.908734917640686, + "loss_ce": 0.0002876613289117813, + "loss_iou": 0.39453125, + "loss_num": 0.024169921875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 154061564, + "step": 2750 + }, + { + "epoch": 6.12694877505568, + "grad_norm": 21.767990112304688, + "learning_rate": 1e-06, + "loss": 0.7365, + "num_input_tokens_seen": 154117696, + "step": 2751 + }, + { + "epoch": 6.12694877505568, + "loss": 0.7305013537406921, + "loss_ce": 0.0002767470432445407, + "loss_iou": 0.283203125, + "loss_num": 0.032958984375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 154117696, + "step": 2751 + }, + { + "epoch": 6.129175946547885, + "grad_norm": 26.993640899658203, + "learning_rate": 1e-06, + "loss": 0.8128, + "num_input_tokens_seen": 154173524, + "step": 2752 + }, + { + "epoch": 6.129175946547885, + "loss": 0.9162663221359253, + "loss_ce": 0.0002507681492716074, + "loss_iou": 0.3828125, + "loss_num": 0.0296630859375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 154173524, + "step": 2752 + }, + { + "epoch": 6.131403118040089, + "grad_norm": 18.245227813720703, + "learning_rate": 1e-06, + "loss": 0.5699, + "num_input_tokens_seen": 154231616, + "step": 2753 + }, + { + "epoch": 6.131403118040089, + "loss": 0.5690479278564453, + "loss_ce": 0.00020031025633215904, + "loss_iou": 0.251953125, + "loss_num": 0.0126953125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 154231616, + "step": 2753 + }, + { + "epoch": 6.133630289532294, + "grad_norm": 15.791337013244629, + "learning_rate": 1e-06, + "loss": 0.7701, + "num_input_tokens_seen": 154288064, + "step": 2754 + }, + { + "epoch": 6.133630289532294, + "loss": 0.6845183968544006, + "loss_ce": 0.00019223052368033677, + "loss_iou": 0.291015625, + "loss_num": 0.0206298828125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 154288064, + "step": 2754 + }, + { + "epoch": 6.135857461024499, + "grad_norm": 18.88831901550293, + "learning_rate": 1e-06, + "loss": 0.5883, + "num_input_tokens_seen": 154340732, + "step": 2755 + }, + { + "epoch": 6.135857461024499, + "loss": 0.5457676649093628, + "loss_ce": 0.00023538943787571043, + "loss_iou": 0.25390625, + "loss_num": 0.00787353515625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 154340732, + "step": 2755 + }, + { + "epoch": 6.138084632516704, + "grad_norm": 15.97834300994873, + "learning_rate": 1e-06, + "loss": 0.5491, + "num_input_tokens_seen": 154392456, + "step": 2756 + }, + { + "epoch": 6.138084632516704, + "loss": 0.5473060011863708, + "loss_ce": 0.00018683119560591877, + "loss_iou": 0.24609375, + "loss_num": 0.0107421875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 154392456, + "step": 2756 + }, + { + "epoch": 6.140311804008909, + "grad_norm": 13.842836380004883, + "learning_rate": 1e-06, + "loss": 0.6017, + "num_input_tokens_seen": 154448276, + "step": 2757 + }, + { + "epoch": 6.140311804008909, + "loss": 0.6372648477554321, + "loss_ce": 0.00017988680338021368, + "loss_iou": 0.271484375, + "loss_num": 0.018798828125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 154448276, + "step": 2757 + }, + { + "epoch": 6.142538975501114, + "grad_norm": 20.629444122314453, + "learning_rate": 1e-06, + "loss": 0.6205, + "num_input_tokens_seen": 154507936, + "step": 2758 + }, + { + "epoch": 6.142538975501114, + "loss": 0.643352210521698, + "loss_ce": 0.00028582755476236343, + "loss_iou": 0.25390625, + "loss_num": 0.0274658203125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 154507936, + "step": 2758 + }, + { + "epoch": 6.144766146993319, + "grad_norm": 26.82574462890625, + "learning_rate": 1e-06, + "loss": 0.7818, + "num_input_tokens_seen": 154564448, + "step": 2759 + }, + { + "epoch": 6.144766146993319, + "loss": 0.9775708913803101, + "loss_ce": 0.0002760253846645355, + "loss_iou": 0.427734375, + "loss_num": 0.025146484375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 154564448, + "step": 2759 + }, + { + "epoch": 6.146993318485523, + "grad_norm": 21.474916458129883, + "learning_rate": 1e-06, + "loss": 0.8228, + "num_input_tokens_seen": 154621152, + "step": 2760 + }, + { + "epoch": 6.146993318485523, + "loss": 0.8825445175170898, + "loss_ce": 0.00022031497792340815, + "loss_iou": 0.33203125, + "loss_num": 0.044189453125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 154621152, + "step": 2760 + }, + { + "epoch": 6.149220489977728, + "grad_norm": 21.773693084716797, + "learning_rate": 1e-06, + "loss": 0.9334, + "num_input_tokens_seen": 154676312, + "step": 2761 + }, + { + "epoch": 6.149220489977728, + "loss": 0.9210202693939209, + "loss_ce": 0.00024393736384809017, + "loss_iou": 0.384765625, + "loss_num": 0.0299072265625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 154676312, + "step": 2761 + }, + { + "epoch": 6.151447661469933, + "grad_norm": 18.154090881347656, + "learning_rate": 1e-06, + "loss": 0.563, + "num_input_tokens_seen": 154732704, + "step": 2762 + }, + { + "epoch": 6.151447661469933, + "loss": 0.3975493311882019, + "loss_ce": 0.00033253489527851343, + "loss_iou": 0.158203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 154732704, + "step": 2762 + }, + { + "epoch": 6.153674832962138, + "grad_norm": 20.072853088378906, + "learning_rate": 1e-06, + "loss": 0.5174, + "num_input_tokens_seen": 154790408, + "step": 2763 + }, + { + "epoch": 6.153674832962138, + "loss": 0.5634435415267944, + "loss_ce": 0.00021113727416377515, + "loss_iou": 0.2470703125, + "loss_num": 0.01397705078125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 154790408, + "step": 2763 + }, + { + "epoch": 6.155902004454343, + "grad_norm": 16.74744987487793, + "learning_rate": 1e-06, + "loss": 0.729, + "num_input_tokens_seen": 154847920, + "step": 2764 + }, + { + "epoch": 6.155902004454343, + "loss": 0.8036819696426392, + "loss_ce": 0.00021515009575523436, + "loss_iou": 0.34375, + "loss_num": 0.023193359375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 154847920, + "step": 2764 + }, + { + "epoch": 6.158129175946548, + "grad_norm": 31.824892044067383, + "learning_rate": 1e-06, + "loss": 0.8517, + "num_input_tokens_seen": 154904104, + "step": 2765 + }, + { + "epoch": 6.158129175946548, + "loss": 0.874335527420044, + "loss_ce": 0.00018998724408447742, + "loss_iou": 0.38671875, + "loss_num": 0.02001953125, + "loss_xval": 0.875, + "num_input_tokens_seen": 154904104, + "step": 2765 + }, + { + "epoch": 6.160356347438753, + "grad_norm": 18.871536254882812, + "learning_rate": 1e-06, + "loss": 0.6084, + "num_input_tokens_seen": 154960796, + "step": 2766 + }, + { + "epoch": 6.160356347438753, + "loss": 0.6629579067230225, + "loss_ce": 0.000360234291292727, + "loss_iou": 0.265625, + "loss_num": 0.0264892578125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 154960796, + "step": 2766 + }, + { + "epoch": 6.1625835189309575, + "grad_norm": 18.299470901489258, + "learning_rate": 1e-06, + "loss": 0.5538, + "num_input_tokens_seen": 155013768, + "step": 2767 + }, + { + "epoch": 6.1625835189309575, + "loss": 0.6405810117721558, + "loss_ce": 0.00020016740018036216, + "loss_iou": 0.28515625, + "loss_num": 0.01409912109375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 155013768, + "step": 2767 + }, + { + "epoch": 6.164810690423162, + "grad_norm": 13.490962982177734, + "learning_rate": 1e-06, + "loss": 0.8451, + "num_input_tokens_seen": 155069988, + "step": 2768 + }, + { + "epoch": 6.164810690423162, + "loss": 0.865939199924469, + "loss_ce": 0.00021657557226717472, + "loss_iou": 0.36328125, + "loss_num": 0.027587890625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 155069988, + "step": 2768 + }, + { + "epoch": 6.167037861915367, + "grad_norm": 15.8577241897583, + "learning_rate": 1e-06, + "loss": 0.6934, + "num_input_tokens_seen": 155125976, + "step": 2769 + }, + { + "epoch": 6.167037861915367, + "loss": 0.7875275611877441, + "loss_ce": 0.00017407389532309026, + "loss_iou": 0.3359375, + "loss_num": 0.023193359375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 155125976, + "step": 2769 + }, + { + "epoch": 6.169265033407572, + "grad_norm": 15.64920711517334, + "learning_rate": 1e-06, + "loss": 0.584, + "num_input_tokens_seen": 155181288, + "step": 2770 + }, + { + "epoch": 6.169265033407572, + "loss": 0.6932950615882874, + "loss_ce": 0.0004239326517563313, + "loss_iou": 0.291015625, + "loss_num": 0.0224609375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 155181288, + "step": 2770 + }, + { + "epoch": 6.171492204899777, + "grad_norm": 25.973970413208008, + "learning_rate": 1e-06, + "loss": 0.5113, + "num_input_tokens_seen": 155238888, + "step": 2771 + }, + { + "epoch": 6.171492204899777, + "loss": 0.5262500047683716, + "loss_ce": 0.0002490488113835454, + "loss_iou": 0.228515625, + "loss_num": 0.01409912109375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 155238888, + "step": 2771 + }, + { + "epoch": 6.173719376391982, + "grad_norm": 23.871335983276367, + "learning_rate": 1e-06, + "loss": 0.8139, + "num_input_tokens_seen": 155294792, + "step": 2772 + }, + { + "epoch": 6.173719376391982, + "loss": 0.7459384202957153, + "loss_ce": 0.0004549958393909037, + "loss_iou": 0.3125, + "loss_num": 0.0238037109375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 155294792, + "step": 2772 + }, + { + "epoch": 6.1759465478841875, + "grad_norm": 24.223966598510742, + "learning_rate": 1e-06, + "loss": 0.8061, + "num_input_tokens_seen": 155350512, + "step": 2773 + }, + { + "epoch": 6.1759465478841875, + "loss": 1.045411229133606, + "loss_ce": 0.00024516499252058566, + "loss_iou": 0.462890625, + "loss_num": 0.0235595703125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 155350512, + "step": 2773 + }, + { + "epoch": 6.178173719376392, + "grad_norm": 15.486029624938965, + "learning_rate": 1e-06, + "loss": 0.6705, + "num_input_tokens_seen": 155404252, + "step": 2774 + }, + { + "epoch": 6.178173719376392, + "loss": 0.8749101758003235, + "loss_ce": 0.0001542975369375199, + "loss_iou": 0.333984375, + "loss_num": 0.041748046875, + "loss_xval": 0.875, + "num_input_tokens_seen": 155404252, + "step": 2774 + }, + { + "epoch": 6.180400890868597, + "grad_norm": 20.75381851196289, + "learning_rate": 1e-06, + "loss": 0.6957, + "num_input_tokens_seen": 155459700, + "step": 2775 + }, + { + "epoch": 6.180400890868597, + "loss": 0.5211961269378662, + "loss_ce": 0.00020005203259643167, + "loss_iou": 0.2373046875, + "loss_num": 0.00921630859375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 155459700, + "step": 2775 + }, + { + "epoch": 6.182628062360802, + "grad_norm": 19.334497451782227, + "learning_rate": 1e-06, + "loss": 0.6821, + "num_input_tokens_seen": 155517152, + "step": 2776 + }, + { + "epoch": 6.182628062360802, + "loss": 0.7436808347702026, + "loss_ce": 0.0002726712264120579, + "loss_iou": 0.314453125, + "loss_num": 0.02294921875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 155517152, + "step": 2776 + }, + { + "epoch": 6.184855233853007, + "grad_norm": 13.525506019592285, + "learning_rate": 1e-06, + "loss": 0.5351, + "num_input_tokens_seen": 155574720, + "step": 2777 + }, + { + "epoch": 6.184855233853007, + "loss": 0.42132747173309326, + "loss_ce": 0.0001848653773777187, + "loss_iou": 0.1845703125, + "loss_num": 0.01055908203125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 155574720, + "step": 2777 + }, + { + "epoch": 6.187082405345212, + "grad_norm": 13.232222557067871, + "learning_rate": 1e-06, + "loss": 0.5728, + "num_input_tokens_seen": 155631268, + "step": 2778 + }, + { + "epoch": 6.187082405345212, + "loss": 0.5074321627616882, + "loss_ce": 0.00023002157104201615, + "loss_iou": 0.2021484375, + "loss_num": 0.020751953125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 155631268, + "step": 2778 + }, + { + "epoch": 6.189309576837417, + "grad_norm": 31.53081703186035, + "learning_rate": 1e-06, + "loss": 0.6913, + "num_input_tokens_seen": 155688784, + "step": 2779 + }, + { + "epoch": 6.189309576837417, + "loss": 0.9023219347000122, + "loss_ce": 0.0002223325427621603, + "loss_iou": 0.345703125, + "loss_num": 0.0419921875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 155688784, + "step": 2779 + }, + { + "epoch": 6.1915367483296215, + "grad_norm": 20.483375549316406, + "learning_rate": 1e-06, + "loss": 0.6798, + "num_input_tokens_seen": 155744636, + "step": 2780 + }, + { + "epoch": 6.1915367483296215, + "loss": 0.7736812233924866, + "loss_ce": 0.00024372681218665093, + "loss_iou": 0.34765625, + "loss_num": 0.0159912109375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 155744636, + "step": 2780 + }, + { + "epoch": 6.193763919821826, + "grad_norm": 13.428009986877441, + "learning_rate": 1e-06, + "loss": 0.5512, + "num_input_tokens_seen": 155803476, + "step": 2781 + }, + { + "epoch": 6.193763919821826, + "loss": 0.5040408372879028, + "loss_ce": 0.0002566420880611986, + "loss_iou": 0.2216796875, + "loss_num": 0.01202392578125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 155803476, + "step": 2781 + }, + { + "epoch": 6.195991091314031, + "grad_norm": 23.016326904296875, + "learning_rate": 1e-06, + "loss": 0.6668, + "num_input_tokens_seen": 155858004, + "step": 2782 + }, + { + "epoch": 6.195991091314031, + "loss": 0.6006724834442139, + "loss_ce": 0.0002086429885821417, + "loss_iou": 0.26953125, + "loss_num": 0.01202392578125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 155858004, + "step": 2782 + }, + { + "epoch": 6.198218262806236, + "grad_norm": 18.91625213623047, + "learning_rate": 1e-06, + "loss": 0.5554, + "num_input_tokens_seen": 155908832, + "step": 2783 + }, + { + "epoch": 6.198218262806236, + "loss": 0.4532051086425781, + "loss_ce": 0.00020219493308104575, + "loss_iou": 0.1884765625, + "loss_num": 0.0152587890625, + "loss_xval": 0.453125, + "num_input_tokens_seen": 155908832, + "step": 2783 + }, + { + "epoch": 6.200445434298441, + "grad_norm": 21.05170440673828, + "learning_rate": 1e-06, + "loss": 0.5745, + "num_input_tokens_seen": 155965312, + "step": 2784 + }, + { + "epoch": 6.200445434298441, + "loss": 0.6314213275909424, + "loss_ce": 0.00019577116472646594, + "loss_iou": 0.279296875, + "loss_num": 0.01416015625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 155965312, + "step": 2784 + }, + { + "epoch": 6.202672605790646, + "grad_norm": 20.375423431396484, + "learning_rate": 1e-06, + "loss": 0.6475, + "num_input_tokens_seen": 156018284, + "step": 2785 + }, + { + "epoch": 6.202672605790646, + "loss": 0.7216565608978271, + "loss_ce": 0.0002210296515841037, + "loss_iou": 0.2890625, + "loss_num": 0.0284423828125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 156018284, + "step": 2785 + }, + { + "epoch": 6.204899777282851, + "grad_norm": 16.35026741027832, + "learning_rate": 1e-06, + "loss": 0.7937, + "num_input_tokens_seen": 156075156, + "step": 2786 + }, + { + "epoch": 6.204899777282851, + "loss": 0.8708457946777344, + "loss_ce": 0.00024033612862695009, + "loss_iou": 0.37890625, + "loss_num": 0.0228271484375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 156075156, + "step": 2786 + }, + { + "epoch": 6.2071269487750556, + "grad_norm": 23.919958114624023, + "learning_rate": 1e-06, + "loss": 0.5475, + "num_input_tokens_seen": 156130308, + "step": 2787 + }, + { + "epoch": 6.2071269487750556, + "loss": 0.5468391180038452, + "loss_ce": 0.00020827000844292343, + "loss_iou": 0.2021484375, + "loss_num": 0.0284423828125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 156130308, + "step": 2787 + }, + { + "epoch": 6.20935412026726, + "grad_norm": 14.023747444152832, + "learning_rate": 1e-06, + "loss": 0.5289, + "num_input_tokens_seen": 156185484, + "step": 2788 + }, + { + "epoch": 6.20935412026726, + "loss": 0.6045721769332886, + "loss_ce": 0.00020206648332532495, + "loss_iou": 0.25390625, + "loss_num": 0.0191650390625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 156185484, + "step": 2788 + }, + { + "epoch": 6.211581291759465, + "grad_norm": 33.71453094482422, + "learning_rate": 1e-06, + "loss": 0.7773, + "num_input_tokens_seen": 156240220, + "step": 2789 + }, + { + "epoch": 6.211581291759465, + "loss": 0.7553761601448059, + "loss_ce": 0.0002492480562068522, + "loss_iou": 0.318359375, + "loss_num": 0.0240478515625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 156240220, + "step": 2789 + }, + { + "epoch": 6.21380846325167, + "grad_norm": 15.885749816894531, + "learning_rate": 1e-06, + "loss": 0.7896, + "num_input_tokens_seen": 156294752, + "step": 2790 + }, + { + "epoch": 6.21380846325167, + "loss": 0.7173956632614136, + "loss_ce": 0.00035462420783005655, + "loss_iou": 0.326171875, + "loss_num": 0.01300048828125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 156294752, + "step": 2790 + }, + { + "epoch": 6.216035634743875, + "grad_norm": 23.221410751342773, + "learning_rate": 1e-06, + "loss": 0.5949, + "num_input_tokens_seen": 156350172, + "step": 2791 + }, + { + "epoch": 6.216035634743875, + "loss": 0.4679461717605591, + "loss_ce": 0.00017272785771638155, + "loss_iou": 0.1923828125, + "loss_num": 0.0166015625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 156350172, + "step": 2791 + }, + { + "epoch": 6.21826280623608, + "grad_norm": 18.390562057495117, + "learning_rate": 1e-06, + "loss": 0.7924, + "num_input_tokens_seen": 156405748, + "step": 2792 + }, + { + "epoch": 6.21826280623608, + "loss": 0.5683201551437378, + "loss_ce": 0.0002049248432740569, + "loss_iou": 0.251953125, + "loss_num": 0.012451171875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 156405748, + "step": 2792 + }, + { + "epoch": 6.220489977728285, + "grad_norm": 21.410785675048828, + "learning_rate": 1e-06, + "loss": 0.6411, + "num_input_tokens_seen": 156458592, + "step": 2793 + }, + { + "epoch": 6.220489977728285, + "loss": 0.49654620885849, + "loss_ce": 0.0002083319704979658, + "loss_iou": 0.21484375, + "loss_num": 0.01348876953125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 156458592, + "step": 2793 + }, + { + "epoch": 6.22271714922049, + "grad_norm": 16.62869644165039, + "learning_rate": 1e-06, + "loss": 0.6773, + "num_input_tokens_seen": 156515956, + "step": 2794 + }, + { + "epoch": 6.22271714922049, + "loss": 0.6638964414596558, + "loss_ce": 0.00020011054584756494, + "loss_iou": 0.2734375, + "loss_num": 0.0233154296875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 156515956, + "step": 2794 + }, + { + "epoch": 6.224944320712694, + "grad_norm": 17.955623626708984, + "learning_rate": 1e-06, + "loss": 0.5419, + "num_input_tokens_seen": 156571336, + "step": 2795 + }, + { + "epoch": 6.224944320712694, + "loss": 0.5947801470756531, + "loss_ce": 0.00017566801398061216, + "loss_iou": 0.25, + "loss_num": 0.01904296875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 156571336, + "step": 2795 + }, + { + "epoch": 6.2271714922049, + "grad_norm": 16.663782119750977, + "learning_rate": 1e-06, + "loss": 0.5893, + "num_input_tokens_seen": 156627776, + "step": 2796 + }, + { + "epoch": 6.2271714922049, + "loss": 0.7995222806930542, + "loss_ce": 0.00020588196639437228, + "loss_iou": 0.30859375, + "loss_num": 0.03662109375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 156627776, + "step": 2796 + }, + { + "epoch": 6.229398663697105, + "grad_norm": 16.558795928955078, + "learning_rate": 1e-06, + "loss": 0.7245, + "num_input_tokens_seen": 156682772, + "step": 2797 + }, + { + "epoch": 6.229398663697105, + "loss": 0.7288368344306946, + "loss_ce": 0.0003212342271581292, + "loss_iou": 0.306640625, + "loss_num": 0.022705078125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 156682772, + "step": 2797 + }, + { + "epoch": 6.23162583518931, + "grad_norm": 56.9061164855957, + "learning_rate": 1e-06, + "loss": 0.6461, + "num_input_tokens_seen": 156739268, + "step": 2798 + }, + { + "epoch": 6.23162583518931, + "loss": 0.5878466367721558, + "loss_ce": 0.00020016153575852513, + "loss_iou": 0.265625, + "loss_num": 0.0108642578125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 156739268, + "step": 2798 + }, + { + "epoch": 6.233853006681515, + "grad_norm": 17.540464401245117, + "learning_rate": 1e-06, + "loss": 0.8046, + "num_input_tokens_seen": 156797304, + "step": 2799 + }, + { + "epoch": 6.233853006681515, + "loss": 0.8160011768341064, + "loss_ce": 0.0002052470954367891, + "loss_iou": 0.357421875, + "loss_num": 0.0201416015625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 156797304, + "step": 2799 + }, + { + "epoch": 6.23608017817372, + "grad_norm": 23.366252899169922, + "learning_rate": 1e-06, + "loss": 0.7489, + "num_input_tokens_seen": 156852524, + "step": 2800 + }, + { + "epoch": 6.23608017817372, + "loss": 0.7832313776016235, + "loss_ce": 0.0002724075166042894, + "loss_iou": 0.298828125, + "loss_num": 0.037109375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 156852524, + "step": 2800 + }, + { + "epoch": 6.2383073496659245, + "grad_norm": 18.745590209960938, + "learning_rate": 1e-06, + "loss": 0.602, + "num_input_tokens_seen": 156909388, + "step": 2801 + }, + { + "epoch": 6.2383073496659245, + "loss": 0.377541720867157, + "loss_ce": 0.0002223837363999337, + "loss_iou": 0.16015625, + "loss_num": 0.01141357421875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 156909388, + "step": 2801 + }, + { + "epoch": 6.240534521158129, + "grad_norm": 16.821367263793945, + "learning_rate": 1e-06, + "loss": 0.6813, + "num_input_tokens_seen": 156965248, + "step": 2802 + }, + { + "epoch": 6.240534521158129, + "loss": 0.6255514621734619, + "loss_ce": 0.00018525280756875873, + "loss_iou": 0.2490234375, + "loss_num": 0.025634765625, + "loss_xval": 0.625, + "num_input_tokens_seen": 156965248, + "step": 2802 + }, + { + "epoch": 6.242761692650334, + "grad_norm": 17.60284996032715, + "learning_rate": 1e-06, + "loss": 0.6629, + "num_input_tokens_seen": 157021544, + "step": 2803 + }, + { + "epoch": 6.242761692650334, + "loss": 0.7033724784851074, + "loss_ce": 0.00024746524286456406, + "loss_iou": 0.310546875, + "loss_num": 0.0166015625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 157021544, + "step": 2803 + }, + { + "epoch": 6.244988864142539, + "grad_norm": 18.265342712402344, + "learning_rate": 1e-06, + "loss": 0.5529, + "num_input_tokens_seen": 157077008, + "step": 2804 + }, + { + "epoch": 6.244988864142539, + "loss": 0.495932936668396, + "loss_ce": 0.0002053846837952733, + "loss_iou": 0.1982421875, + "loss_num": 0.019775390625, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 157077008, + "step": 2804 + }, + { + "epoch": 6.247216035634744, + "grad_norm": 36.90097427368164, + "learning_rate": 1e-06, + "loss": 0.6764, + "num_input_tokens_seen": 157131556, + "step": 2805 + }, + { + "epoch": 6.247216035634744, + "loss": 0.6967709064483643, + "loss_ce": 0.00023769214749336243, + "loss_iou": 0.306640625, + "loss_num": 0.0164794921875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 157131556, + "step": 2805 + }, + { + "epoch": 6.249443207126949, + "grad_norm": 20.952041625976562, + "learning_rate": 1e-06, + "loss": 0.6065, + "num_input_tokens_seen": 157187128, + "step": 2806 + }, + { + "epoch": 6.249443207126949, + "loss": 0.496565043926239, + "loss_ce": 0.00022713570797350258, + "loss_iou": 0.208984375, + "loss_num": 0.0157470703125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 157187128, + "step": 2806 + }, + { + "epoch": 6.251670378619154, + "grad_norm": 27.254335403442383, + "learning_rate": 1e-06, + "loss": 0.7668, + "num_input_tokens_seen": 157242080, + "step": 2807 + }, + { + "epoch": 6.251670378619154, + "loss": 0.721343457698822, + "loss_ce": 0.0002741218195296824, + "loss_iou": 0.28125, + "loss_num": 0.03173828125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 157242080, + "step": 2807 + }, + { + "epoch": 6.2538975501113585, + "grad_norm": 19.683401107788086, + "learning_rate": 1e-06, + "loss": 0.4727, + "num_input_tokens_seen": 157299408, + "step": 2808 + }, + { + "epoch": 6.2538975501113585, + "loss": 0.5055980682373047, + "loss_ce": 0.00022694582003168762, + "loss_iou": 0.22265625, + "loss_num": 0.01202392578125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 157299408, + "step": 2808 + }, + { + "epoch": 6.256124721603563, + "grad_norm": 16.38343620300293, + "learning_rate": 1e-06, + "loss": 0.6176, + "num_input_tokens_seen": 157354524, + "step": 2809 + }, + { + "epoch": 6.256124721603563, + "loss": 0.5820066332817078, + "loss_ce": 0.0002195206907344982, + "loss_iou": 0.232421875, + "loss_num": 0.0234375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 157354524, + "step": 2809 + }, + { + "epoch": 6.258351893095768, + "grad_norm": 23.633302688598633, + "learning_rate": 1e-06, + "loss": 0.7708, + "num_input_tokens_seen": 157409136, + "step": 2810 + }, + { + "epoch": 6.258351893095768, + "loss": 1.0537309646606445, + "loss_ce": 0.00026410428108647466, + "loss_iou": 0.46875, + "loss_num": 0.0225830078125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 157409136, + "step": 2810 + }, + { + "epoch": 6.260579064587973, + "grad_norm": 17.914230346679688, + "learning_rate": 1e-06, + "loss": 0.8414, + "num_input_tokens_seen": 157463620, + "step": 2811 + }, + { + "epoch": 6.260579064587973, + "loss": 1.0271397829055786, + "loss_ce": 0.00028427952202036977, + "loss_iou": 0.45703125, + "loss_num": 0.022216796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 157463620, + "step": 2811 + }, + { + "epoch": 6.262806236080178, + "grad_norm": 20.162099838256836, + "learning_rate": 1e-06, + "loss": 0.6692, + "num_input_tokens_seen": 157518172, + "step": 2812 + }, + { + "epoch": 6.262806236080178, + "loss": 0.709923267364502, + "loss_ce": 0.0004505990073084831, + "loss_iou": 0.30078125, + "loss_num": 0.0213623046875, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 157518172, + "step": 2812 + }, + { + "epoch": 6.265033407572383, + "grad_norm": 20.278295516967773, + "learning_rate": 1e-06, + "loss": 0.6405, + "num_input_tokens_seen": 157573484, + "step": 2813 + }, + { + "epoch": 6.265033407572383, + "loss": 0.7189103364944458, + "loss_ce": 0.0001603504060767591, + "loss_iou": 0.2890625, + "loss_num": 0.028076171875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 157573484, + "step": 2813 + }, + { + "epoch": 6.267260579064588, + "grad_norm": 19.324562072753906, + "learning_rate": 1e-06, + "loss": 0.6074, + "num_input_tokens_seen": 157627908, + "step": 2814 + }, + { + "epoch": 6.267260579064588, + "loss": 0.7765587568283081, + "loss_ce": 0.0002525679301470518, + "loss_iou": 0.330078125, + "loss_num": 0.0234375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 157627908, + "step": 2814 + }, + { + "epoch": 6.2694877505567925, + "grad_norm": 15.711827278137207, + "learning_rate": 1e-06, + "loss": 0.676, + "num_input_tokens_seen": 157685372, + "step": 2815 + }, + { + "epoch": 6.2694877505567925, + "loss": 0.6160228252410889, + "loss_ce": 0.0003001945442520082, + "loss_iou": 0.259765625, + "loss_num": 0.0189208984375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 157685372, + "step": 2815 + }, + { + "epoch": 6.271714922048997, + "grad_norm": 24.523929595947266, + "learning_rate": 1e-06, + "loss": 0.7507, + "num_input_tokens_seen": 157740784, + "step": 2816 + }, + { + "epoch": 6.271714922048997, + "loss": 0.6751787662506104, + "loss_ce": 0.0003741044201888144, + "loss_iou": 0.28125, + "loss_num": 0.0225830078125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 157740784, + "step": 2816 + }, + { + "epoch": 6.273942093541203, + "grad_norm": 17.52358627319336, + "learning_rate": 1e-06, + "loss": 0.627, + "num_input_tokens_seen": 157797720, + "step": 2817 + }, + { + "epoch": 6.273942093541203, + "loss": 0.6270310878753662, + "loss_ce": 0.00032214989187195897, + "loss_iou": 0.263671875, + "loss_num": 0.02001953125, + "loss_xval": 0.625, + "num_input_tokens_seen": 157797720, + "step": 2817 + }, + { + "epoch": 6.276169265033408, + "grad_norm": 15.597443580627441, + "learning_rate": 1e-06, + "loss": 0.511, + "num_input_tokens_seen": 157855748, + "step": 2818 + }, + { + "epoch": 6.276169265033408, + "loss": 0.45937642455101013, + "loss_ce": 0.0005141303990967572, + "loss_iou": 0.1962890625, + "loss_num": 0.01336669921875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 157855748, + "step": 2818 + }, + { + "epoch": 6.278396436525613, + "grad_norm": 20.574974060058594, + "learning_rate": 1e-06, + "loss": 0.8684, + "num_input_tokens_seen": 157906192, + "step": 2819 + }, + { + "epoch": 6.278396436525613, + "loss": 0.9832384586334229, + "loss_ce": 0.0003282871039118618, + "loss_iou": 0.4140625, + "loss_num": 0.0311279296875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 157906192, + "step": 2819 + }, + { + "epoch": 6.280623608017818, + "grad_norm": 16.417438507080078, + "learning_rate": 1e-06, + "loss": 0.878, + "num_input_tokens_seen": 157960736, + "step": 2820 + }, + { + "epoch": 6.280623608017818, + "loss": 1.0158679485321045, + "loss_ce": 0.0002430274907965213, + "loss_iou": 0.416015625, + "loss_num": 0.036865234375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 157960736, + "step": 2820 + }, + { + "epoch": 6.282850779510023, + "grad_norm": 21.210569381713867, + "learning_rate": 1e-06, + "loss": 0.724, + "num_input_tokens_seen": 158016856, + "step": 2821 + }, + { + "epoch": 6.282850779510023, + "loss": 0.6245869398117065, + "loss_ce": 0.0001973453618120402, + "loss_iou": 0.267578125, + "loss_num": 0.018310546875, + "loss_xval": 0.625, + "num_input_tokens_seen": 158016856, + "step": 2821 + }, + { + "epoch": 6.285077951002227, + "grad_norm": 18.306804656982422, + "learning_rate": 1e-06, + "loss": 0.7915, + "num_input_tokens_seen": 158071804, + "step": 2822 + }, + { + "epoch": 6.285077951002227, + "loss": 0.865458071231842, + "loss_ce": 0.0002236901200376451, + "loss_iou": 0.361328125, + "loss_num": 0.0284423828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 158071804, + "step": 2822 + }, + { + "epoch": 6.287305122494432, + "grad_norm": 18.746957778930664, + "learning_rate": 1e-06, + "loss": 0.8188, + "num_input_tokens_seen": 158126148, + "step": 2823 + }, + { + "epoch": 6.287305122494432, + "loss": 0.8055477142333984, + "loss_ce": 0.0003718675870914012, + "loss_iou": 0.359375, + "loss_num": 0.016845703125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 158126148, + "step": 2823 + }, + { + "epoch": 6.289532293986637, + "grad_norm": 20.189741134643555, + "learning_rate": 1e-06, + "loss": 0.7432, + "num_input_tokens_seen": 158181076, + "step": 2824 + }, + { + "epoch": 6.289532293986637, + "loss": 0.8805779218673706, + "loss_ce": 0.00020679559384007007, + "loss_iou": 0.375, + "loss_num": 0.026123046875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 158181076, + "step": 2824 + }, + { + "epoch": 6.291759465478842, + "grad_norm": 29.9582576751709, + "learning_rate": 1e-06, + "loss": 0.8211, + "num_input_tokens_seen": 158236796, + "step": 2825 + }, + { + "epoch": 6.291759465478842, + "loss": 0.7246193289756775, + "loss_ce": 0.0002540839195717126, + "loss_iou": 0.310546875, + "loss_num": 0.0206298828125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 158236796, + "step": 2825 + }, + { + "epoch": 6.293986636971047, + "grad_norm": 24.43955421447754, + "learning_rate": 1e-06, + "loss": 0.6298, + "num_input_tokens_seen": 158292708, + "step": 2826 + }, + { + "epoch": 6.293986636971047, + "loss": 0.560718297958374, + "loss_ce": 0.00017140517593361437, + "loss_iou": 0.251953125, + "loss_num": 0.01123046875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 158292708, + "step": 2826 + }, + { + "epoch": 6.296213808463252, + "grad_norm": 15.927586555480957, + "learning_rate": 1e-06, + "loss": 0.7068, + "num_input_tokens_seen": 158347268, + "step": 2827 + }, + { + "epoch": 6.296213808463252, + "loss": 0.6577511429786682, + "loss_ce": 0.0002804349351208657, + "loss_iou": 0.291015625, + "loss_num": 0.01519775390625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 158347268, + "step": 2827 + }, + { + "epoch": 6.298440979955457, + "grad_norm": 17.409822463989258, + "learning_rate": 1e-06, + "loss": 0.6776, + "num_input_tokens_seen": 158402892, + "step": 2828 + }, + { + "epoch": 6.298440979955457, + "loss": 0.6074726581573486, + "loss_ce": 0.00029491446912288666, + "loss_iou": 0.26171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 158402892, + "step": 2828 + }, + { + "epoch": 6.3006681514476615, + "grad_norm": 27.617082595825195, + "learning_rate": 1e-06, + "loss": 0.604, + "num_input_tokens_seen": 158457584, + "step": 2829 + }, + { + "epoch": 6.3006681514476615, + "loss": 0.6342703104019165, + "loss_ce": 0.00023712051915936172, + "loss_iou": 0.267578125, + "loss_num": 0.0194091796875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 158457584, + "step": 2829 + }, + { + "epoch": 6.302895322939866, + "grad_norm": 15.794069290161133, + "learning_rate": 1e-06, + "loss": 0.5568, + "num_input_tokens_seen": 158514100, + "step": 2830 + }, + { + "epoch": 6.302895322939866, + "loss": 0.591174840927124, + "loss_ce": 0.0003545153886079788, + "loss_iou": 0.255859375, + "loss_num": 0.015625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 158514100, + "step": 2830 + }, + { + "epoch": 6.305122494432071, + "grad_norm": 19.552928924560547, + "learning_rate": 1e-06, + "loss": 0.7125, + "num_input_tokens_seen": 158571860, + "step": 2831 + }, + { + "epoch": 6.305122494432071, + "loss": 0.5463184118270874, + "loss_ce": 0.00017578111146576703, + "loss_iou": 0.2392578125, + "loss_num": 0.013671875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 158571860, + "step": 2831 + }, + { + "epoch": 6.307349665924276, + "grad_norm": 31.347753524780273, + "learning_rate": 1e-06, + "loss": 0.5719, + "num_input_tokens_seen": 158627292, + "step": 2832 + }, + { + "epoch": 6.307349665924276, + "loss": 0.46991389989852905, + "loss_ce": 0.00018733731121756136, + "loss_iou": 0.189453125, + "loss_num": 0.0181884765625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 158627292, + "step": 2832 + }, + { + "epoch": 6.309576837416481, + "grad_norm": 19.185462951660156, + "learning_rate": 1e-06, + "loss": 0.6727, + "num_input_tokens_seen": 158682192, + "step": 2833 + }, + { + "epoch": 6.309576837416481, + "loss": 0.6861748695373535, + "loss_ce": 0.0002617766731418669, + "loss_iou": 0.275390625, + "loss_num": 0.02685546875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 158682192, + "step": 2833 + }, + { + "epoch": 6.311804008908686, + "grad_norm": 23.759424209594727, + "learning_rate": 1e-06, + "loss": 0.5722, + "num_input_tokens_seen": 158739748, + "step": 2834 + }, + { + "epoch": 6.311804008908686, + "loss": 0.6177768707275391, + "loss_ce": 0.0002231545513495803, + "loss_iou": 0.263671875, + "loss_num": 0.01806640625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 158739748, + "step": 2834 + }, + { + "epoch": 6.314031180400891, + "grad_norm": 26.93903350830078, + "learning_rate": 1e-06, + "loss": 0.7831, + "num_input_tokens_seen": 158792348, + "step": 2835 + }, + { + "epoch": 6.314031180400891, + "loss": 1.0019464492797852, + "loss_ce": 0.0004816856817342341, + "loss_iou": 0.42578125, + "loss_num": 0.02978515625, + "loss_xval": 1.0, + "num_input_tokens_seen": 158792348, + "step": 2835 + }, + { + "epoch": 6.3162583518930955, + "grad_norm": 55.32765197753906, + "learning_rate": 1e-06, + "loss": 0.7881, + "num_input_tokens_seen": 158850492, + "step": 2836 + }, + { + "epoch": 6.3162583518930955, + "loss": 0.7746649384498596, + "loss_ce": 0.00025085339439101517, + "loss_iou": 0.302734375, + "loss_num": 0.033935546875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 158850492, + "step": 2836 + }, + { + "epoch": 6.3184855233853, + "grad_norm": 18.002099990844727, + "learning_rate": 1e-06, + "loss": 0.5655, + "num_input_tokens_seen": 158907516, + "step": 2837 + }, + { + "epoch": 6.3184855233853, + "loss": 0.5236201882362366, + "loss_ce": 0.00018271096632815897, + "loss_iou": 0.19140625, + "loss_num": 0.0283203125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 158907516, + "step": 2837 + }, + { + "epoch": 6.320712694877505, + "grad_norm": 20.92586326599121, + "learning_rate": 1e-06, + "loss": 0.6947, + "num_input_tokens_seen": 158962112, + "step": 2838 + }, + { + "epoch": 6.320712694877505, + "loss": 0.7641561031341553, + "loss_ce": 0.0002400341909378767, + "loss_iou": 0.328125, + "loss_num": 0.0213623046875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 158962112, + "step": 2838 + }, + { + "epoch": 6.32293986636971, + "grad_norm": 24.1262264251709, + "learning_rate": 1e-06, + "loss": 0.8267, + "num_input_tokens_seen": 159017948, + "step": 2839 + }, + { + "epoch": 6.32293986636971, + "loss": 0.6984615325927734, + "loss_ce": 0.0002193648397224024, + "loss_iou": 0.283203125, + "loss_num": 0.026611328125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 159017948, + "step": 2839 + }, + { + "epoch": 6.325167037861915, + "grad_norm": 14.128987312316895, + "learning_rate": 1e-06, + "loss": 0.7434, + "num_input_tokens_seen": 159072920, + "step": 2840 + }, + { + "epoch": 6.325167037861915, + "loss": 0.782150387763977, + "loss_ce": 0.00029003899544477463, + "loss_iou": 0.326171875, + "loss_num": 0.0257568359375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 159072920, + "step": 2840 + }, + { + "epoch": 6.327394209354121, + "grad_norm": 15.3905029296875, + "learning_rate": 1e-06, + "loss": 0.5419, + "num_input_tokens_seen": 159130120, + "step": 2841 + }, + { + "epoch": 6.327394209354121, + "loss": 0.3986847996711731, + "loss_ce": 0.00024727691197767854, + "loss_iou": 0.169921875, + "loss_num": 0.01165771484375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 159130120, + "step": 2841 + }, + { + "epoch": 6.3296213808463255, + "grad_norm": 22.618736267089844, + "learning_rate": 1e-06, + "loss": 0.7229, + "num_input_tokens_seen": 159185880, + "step": 2842 + }, + { + "epoch": 6.3296213808463255, + "loss": 0.5595074892044067, + "loss_ce": 0.00018136684957426041, + "loss_iou": 0.240234375, + "loss_num": 0.015869140625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 159185880, + "step": 2842 + }, + { + "epoch": 6.33184855233853, + "grad_norm": 26.56134605407715, + "learning_rate": 1e-06, + "loss": 0.5805, + "num_input_tokens_seen": 159243236, + "step": 2843 + }, + { + "epoch": 6.33184855233853, + "loss": 0.5183169841766357, + "loss_ce": 0.0002505767624825239, + "loss_iou": 0.2255859375, + "loss_num": 0.01348876953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 159243236, + "step": 2843 + }, + { + "epoch": 6.334075723830735, + "grad_norm": 18.414081573486328, + "learning_rate": 1e-06, + "loss": 0.833, + "num_input_tokens_seen": 159300220, + "step": 2844 + }, + { + "epoch": 6.334075723830735, + "loss": 0.8649712800979614, + "loss_ce": 0.00022521875507663935, + "loss_iou": 0.37109375, + "loss_num": 0.024658203125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 159300220, + "step": 2844 + }, + { + "epoch": 6.33630289532294, + "grad_norm": 55.4049072265625, + "learning_rate": 1e-06, + "loss": 0.5908, + "num_input_tokens_seen": 159358836, + "step": 2845 + }, + { + "epoch": 6.33630289532294, + "loss": 0.5839510560035706, + "loss_ce": 0.0002108067856170237, + "loss_iou": 0.24609375, + "loss_num": 0.0181884765625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 159358836, + "step": 2845 + }, + { + "epoch": 6.338530066815145, + "grad_norm": 15.77271556854248, + "learning_rate": 1e-06, + "loss": 0.6155, + "num_input_tokens_seen": 159416428, + "step": 2846 + }, + { + "epoch": 6.338530066815145, + "loss": 0.5473378896713257, + "loss_ce": 0.00021875005040783435, + "loss_iou": 0.2451171875, + "loss_num": 0.0115966796875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 159416428, + "step": 2846 + }, + { + "epoch": 6.34075723830735, + "grad_norm": 17.767343521118164, + "learning_rate": 1e-06, + "loss": 0.4326, + "num_input_tokens_seen": 159474868, + "step": 2847 + }, + { + "epoch": 6.34075723830735, + "loss": 0.420124888420105, + "loss_ce": 0.00020299111201893538, + "loss_iou": 0.1865234375, + "loss_num": 0.00921630859375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 159474868, + "step": 2847 + }, + { + "epoch": 6.342984409799555, + "grad_norm": 17.833757400512695, + "learning_rate": 1e-06, + "loss": 0.6052, + "num_input_tokens_seen": 159530900, + "step": 2848 + }, + { + "epoch": 6.342984409799555, + "loss": 0.6891921758651733, + "loss_ce": 0.0002273364079883322, + "loss_iou": 0.306640625, + "loss_num": 0.01519775390625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 159530900, + "step": 2848 + }, + { + "epoch": 6.3452115812917596, + "grad_norm": 36.907676696777344, + "learning_rate": 1e-06, + "loss": 0.9243, + "num_input_tokens_seen": 159587904, + "step": 2849 + }, + { + "epoch": 6.3452115812917596, + "loss": 0.7456858158111572, + "loss_ce": 0.00020244505139999092, + "loss_iou": 0.31640625, + "loss_num": 0.022705078125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 159587904, + "step": 2849 + }, + { + "epoch": 6.347438752783964, + "grad_norm": 25.969263076782227, + "learning_rate": 1e-06, + "loss": 0.671, + "num_input_tokens_seen": 159643948, + "step": 2850 + }, + { + "epoch": 6.347438752783964, + "loss": 0.7656386494636536, + "loss_ce": 0.000257776933722198, + "loss_iou": 0.330078125, + "loss_num": 0.021240234375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 159643948, + "step": 2850 + }, + { + "epoch": 6.349665924276169, + "grad_norm": 16.245384216308594, + "learning_rate": 1e-06, + "loss": 0.6789, + "num_input_tokens_seen": 159701440, + "step": 2851 + }, + { + "epoch": 6.349665924276169, + "loss": 0.6860156655311584, + "loss_ce": 0.00034670589957386255, + "loss_iou": 0.27734375, + "loss_num": 0.026611328125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 159701440, + "step": 2851 + }, + { + "epoch": 6.351893095768374, + "grad_norm": 23.11394691467285, + "learning_rate": 1e-06, + "loss": 0.6914, + "num_input_tokens_seen": 159759676, + "step": 2852 + }, + { + "epoch": 6.351893095768374, + "loss": 0.4833386540412903, + "loss_ce": 0.00018433517834637314, + "loss_iou": 0.2197265625, + "loss_num": 0.0089111328125, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 159759676, + "step": 2852 + }, + { + "epoch": 6.354120267260579, + "grad_norm": 46.09978103637695, + "learning_rate": 1e-06, + "loss": 0.8392, + "num_input_tokens_seen": 159817828, + "step": 2853 + }, + { + "epoch": 6.354120267260579, + "loss": 0.5963437557220459, + "loss_ce": 0.00039652473060414195, + "loss_iou": 0.2333984375, + "loss_num": 0.02587890625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 159817828, + "step": 2853 + }, + { + "epoch": 6.356347438752784, + "grad_norm": 25.764183044433594, + "learning_rate": 1e-06, + "loss": 0.8404, + "num_input_tokens_seen": 159871560, + "step": 2854 + }, + { + "epoch": 6.356347438752784, + "loss": 0.6220239400863647, + "loss_ce": 0.00019775178225245327, + "loss_iou": 0.25, + "loss_num": 0.02392578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 159871560, + "step": 2854 + }, + { + "epoch": 6.358574610244989, + "grad_norm": 19.76352882385254, + "learning_rate": 1e-06, + "loss": 0.575, + "num_input_tokens_seen": 159923780, + "step": 2855 + }, + { + "epoch": 6.358574610244989, + "loss": 0.5392453074455261, + "loss_ce": 0.00018279827781952918, + "loss_iou": 0.2265625, + "loss_num": 0.01708984375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 159923780, + "step": 2855 + }, + { + "epoch": 6.360801781737194, + "grad_norm": 19.6981201171875, + "learning_rate": 1e-06, + "loss": 0.7724, + "num_input_tokens_seen": 159977408, + "step": 2856 + }, + { + "epoch": 6.360801781737194, + "loss": 0.7918369770050049, + "loss_ce": 0.00021093878604006022, + "loss_iou": 0.33203125, + "loss_num": 0.025390625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 159977408, + "step": 2856 + }, + { + "epoch": 6.363028953229398, + "grad_norm": 19.517127990722656, + "learning_rate": 1e-06, + "loss": 0.4262, + "num_input_tokens_seen": 160035044, + "step": 2857 + }, + { + "epoch": 6.363028953229398, + "loss": 0.45344364643096924, + "loss_ce": 0.00019658930250443518, + "loss_iou": 0.2001953125, + "loss_num": 0.0107421875, + "loss_xval": 0.453125, + "num_input_tokens_seen": 160035044, + "step": 2857 + }, + { + "epoch": 6.365256124721603, + "grad_norm": 27.655757904052734, + "learning_rate": 1e-06, + "loss": 0.6266, + "num_input_tokens_seen": 160091256, + "step": 2858 + }, + { + "epoch": 6.365256124721603, + "loss": 0.45751261711120605, + "loss_ce": 0.00023723256890662014, + "loss_iou": 0.1787109375, + "loss_num": 0.02001953125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 160091256, + "step": 2858 + }, + { + "epoch": 6.367483296213808, + "grad_norm": 13.697670936584473, + "learning_rate": 1e-06, + "loss": 0.6436, + "num_input_tokens_seen": 160146776, + "step": 2859 + }, + { + "epoch": 6.367483296213808, + "loss": 0.7326347827911377, + "loss_ce": 0.00021289548021741211, + "loss_iou": 0.306640625, + "loss_num": 0.02392578125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 160146776, + "step": 2859 + }, + { + "epoch": 6.369710467706013, + "grad_norm": 20.36834144592285, + "learning_rate": 1e-06, + "loss": 0.7276, + "num_input_tokens_seen": 160202160, + "step": 2860 + }, + { + "epoch": 6.369710467706013, + "loss": 0.8042212128639221, + "loss_ce": 0.0002660886093508452, + "loss_iou": 0.357421875, + "loss_num": 0.0179443359375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 160202160, + "step": 2860 + }, + { + "epoch": 6.371937639198218, + "grad_norm": 28.106962203979492, + "learning_rate": 1e-06, + "loss": 0.6299, + "num_input_tokens_seen": 160255840, + "step": 2861 + }, + { + "epoch": 6.371937639198218, + "loss": 0.5891226530075073, + "loss_ce": 0.00025547394761815667, + "loss_iou": 0.2734375, + "loss_num": 0.00823974609375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 160255840, + "step": 2861 + }, + { + "epoch": 6.374164810690424, + "grad_norm": 17.660959243774414, + "learning_rate": 1e-06, + "loss": 0.6536, + "num_input_tokens_seen": 160312148, + "step": 2862 + }, + { + "epoch": 6.374164810690424, + "loss": 0.6094207763671875, + "loss_ce": 0.000167851394508034, + "loss_iou": 0.263671875, + "loss_num": 0.0166015625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 160312148, + "step": 2862 + }, + { + "epoch": 6.3763919821826285, + "grad_norm": 17.073429107666016, + "learning_rate": 1e-06, + "loss": 0.7522, + "num_input_tokens_seen": 160368156, + "step": 2863 + }, + { + "epoch": 6.3763919821826285, + "loss": 0.7373796105384827, + "loss_ce": 0.00019699129916261882, + "loss_iou": 0.33203125, + "loss_num": 0.01458740234375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 160368156, + "step": 2863 + }, + { + "epoch": 6.378619153674833, + "grad_norm": 92.50753021240234, + "learning_rate": 1e-06, + "loss": 0.7973, + "num_input_tokens_seen": 160426052, + "step": 2864 + }, + { + "epoch": 6.378619153674833, + "loss": 0.7172311544418335, + "loss_ce": 0.00019013139535672963, + "loss_iou": 0.318359375, + "loss_num": 0.01611328125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 160426052, + "step": 2864 + }, + { + "epoch": 6.380846325167038, + "grad_norm": 18.557222366333008, + "learning_rate": 1e-06, + "loss": 0.6129, + "num_input_tokens_seen": 160482740, + "step": 2865 + }, + { + "epoch": 6.380846325167038, + "loss": 0.49754250049591064, + "loss_ce": 0.00022808092762716115, + "loss_iou": 0.2373046875, + "loss_num": 0.004638671875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 160482740, + "step": 2865 + }, + { + "epoch": 6.383073496659243, + "grad_norm": 19.247840881347656, + "learning_rate": 1e-06, + "loss": 0.8082, + "num_input_tokens_seen": 160538876, + "step": 2866 + }, + { + "epoch": 6.383073496659243, + "loss": 0.753126859664917, + "loss_ce": 0.00019718983094207942, + "loss_iou": 0.33984375, + "loss_num": 0.01483154296875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 160538876, + "step": 2866 + }, + { + "epoch": 6.385300668151448, + "grad_norm": 19.878429412841797, + "learning_rate": 1e-06, + "loss": 0.5645, + "num_input_tokens_seen": 160592400, + "step": 2867 + }, + { + "epoch": 6.385300668151448, + "loss": 0.44196611642837524, + "loss_ce": 0.0001936689077410847, + "loss_iou": 0.189453125, + "loss_num": 0.0128173828125, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 160592400, + "step": 2867 + }, + { + "epoch": 6.387527839643653, + "grad_norm": 23.526199340820312, + "learning_rate": 1e-06, + "loss": 0.7953, + "num_input_tokens_seen": 160648336, + "step": 2868 + }, + { + "epoch": 6.387527839643653, + "loss": 0.9042606353759766, + "loss_ce": 0.00032996918889693916, + "loss_iou": 0.39453125, + "loss_num": 0.02294921875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 160648336, + "step": 2868 + }, + { + "epoch": 6.389755011135858, + "grad_norm": 24.269256591796875, + "learning_rate": 1e-06, + "loss": 0.6684, + "num_input_tokens_seen": 160706796, + "step": 2869 + }, + { + "epoch": 6.389755011135858, + "loss": 0.7116117477416992, + "loss_ce": 0.00030804408015683293, + "loss_iou": 0.283203125, + "loss_num": 0.0286865234375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 160706796, + "step": 2869 + }, + { + "epoch": 6.3919821826280625, + "grad_norm": 31.342178344726562, + "learning_rate": 1e-06, + "loss": 0.647, + "num_input_tokens_seen": 160762660, + "step": 2870 + }, + { + "epoch": 6.3919821826280625, + "loss": 0.4261830449104309, + "loss_ce": 0.00021868752082809806, + "loss_iou": 0.189453125, + "loss_num": 0.00946044921875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 160762660, + "step": 2870 + }, + { + "epoch": 6.394209354120267, + "grad_norm": 16.151552200317383, + "learning_rate": 1e-06, + "loss": 0.5474, + "num_input_tokens_seen": 160818852, + "step": 2871 + }, + { + "epoch": 6.394209354120267, + "loss": 0.5044623017311096, + "loss_ce": 0.0001898359478218481, + "loss_iou": 0.2265625, + "loss_num": 0.01007080078125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 160818852, + "step": 2871 + }, + { + "epoch": 6.396436525612472, + "grad_norm": 15.111863136291504, + "learning_rate": 1e-06, + "loss": 0.6786, + "num_input_tokens_seen": 160875212, + "step": 2872 + }, + { + "epoch": 6.396436525612472, + "loss": 0.6591340899467468, + "loss_ce": 0.00019854362471960485, + "loss_iou": 0.28125, + "loss_num": 0.0189208984375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 160875212, + "step": 2872 + }, + { + "epoch": 6.398663697104677, + "grad_norm": 20.276626586914062, + "learning_rate": 1e-06, + "loss": 0.5893, + "num_input_tokens_seen": 160933880, + "step": 2873 + }, + { + "epoch": 6.398663697104677, + "loss": 0.4171835482120514, + "loss_ce": 0.00019134554895572364, + "loss_iou": 0.1865234375, + "loss_num": 0.00872802734375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 160933880, + "step": 2873 + }, + { + "epoch": 6.400890868596882, + "grad_norm": 33.857666015625, + "learning_rate": 1e-06, + "loss": 0.6731, + "num_input_tokens_seen": 160988996, + "step": 2874 + }, + { + "epoch": 6.400890868596882, + "loss": 0.8410516977310181, + "loss_ce": 0.00023136789968702942, + "loss_iou": 0.357421875, + "loss_num": 0.0252685546875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 160988996, + "step": 2874 + }, + { + "epoch": 6.403118040089087, + "grad_norm": 12.741671562194824, + "learning_rate": 1e-06, + "loss": 0.7232, + "num_input_tokens_seen": 161043496, + "step": 2875 + }, + { + "epoch": 6.403118040089087, + "loss": 0.6495035886764526, + "loss_ce": 0.00021157064475119114, + "loss_iou": 0.28125, + "loss_num": 0.0172119140625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 161043496, + "step": 2875 + }, + { + "epoch": 6.405345211581292, + "grad_norm": 18.550369262695312, + "learning_rate": 1e-06, + "loss": 0.6694, + "num_input_tokens_seen": 161099512, + "step": 2876 + }, + { + "epoch": 6.405345211581292, + "loss": 0.7444691061973572, + "loss_ce": 0.00032850331626832485, + "loss_iou": 0.296875, + "loss_num": 0.030029296875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 161099512, + "step": 2876 + }, + { + "epoch": 6.4075723830734965, + "grad_norm": 23.05417823791504, + "learning_rate": 1e-06, + "loss": 0.4981, + "num_input_tokens_seen": 161155008, + "step": 2877 + }, + { + "epoch": 6.4075723830734965, + "loss": 0.6640802621841431, + "loss_ce": 0.00020084393327124417, + "loss_iou": 0.3046875, + "loss_num": 0.0107421875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 161155008, + "step": 2877 + }, + { + "epoch": 6.409799554565701, + "grad_norm": 16.260465621948242, + "learning_rate": 1e-06, + "loss": 0.8791, + "num_input_tokens_seen": 161211428, + "step": 2878 + }, + { + "epoch": 6.409799554565701, + "loss": 0.9614474773406982, + "loss_ce": 0.0005100243142805994, + "loss_iou": 0.40234375, + "loss_num": 0.031005859375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 161211428, + "step": 2878 + }, + { + "epoch": 6.412026726057906, + "grad_norm": 25.339269638061523, + "learning_rate": 1e-06, + "loss": 0.6356, + "num_input_tokens_seen": 161268840, + "step": 2879 + }, + { + "epoch": 6.412026726057906, + "loss": 0.5295264720916748, + "loss_ce": 0.00022961897775530815, + "loss_iou": 0.2353515625, + "loss_num": 0.01165771484375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 161268840, + "step": 2879 + }, + { + "epoch": 6.414253897550111, + "grad_norm": 17.69098663330078, + "learning_rate": 1e-06, + "loss": 0.7186, + "num_input_tokens_seen": 161326592, + "step": 2880 + }, + { + "epoch": 6.414253897550111, + "loss": 0.6872239708900452, + "loss_ce": 0.00021221938368398696, + "loss_iou": 0.3046875, + "loss_num": 0.015380859375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 161326592, + "step": 2880 + }, + { + "epoch": 6.416481069042316, + "grad_norm": 20.64066505432129, + "learning_rate": 1e-06, + "loss": 0.6329, + "num_input_tokens_seen": 161379560, + "step": 2881 + }, + { + "epoch": 6.416481069042316, + "loss": 0.8198229074478149, + "loss_ce": 0.00024278149066958576, + "loss_iou": 0.36328125, + "loss_num": 0.0184326171875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 161379560, + "step": 2881 + }, + { + "epoch": 6.418708240534521, + "grad_norm": 16.438508987426758, + "learning_rate": 1e-06, + "loss": 0.6852, + "num_input_tokens_seen": 161436220, + "step": 2882 + }, + { + "epoch": 6.418708240534521, + "loss": 0.7768357992172241, + "loss_ce": 0.0002244812058052048, + "loss_iou": 0.298828125, + "loss_num": 0.0361328125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 161436220, + "step": 2882 + }, + { + "epoch": 6.420935412026726, + "grad_norm": 20.693113327026367, + "learning_rate": 1e-06, + "loss": 0.6301, + "num_input_tokens_seen": 161489760, + "step": 2883 + }, + { + "epoch": 6.420935412026726, + "loss": 0.6213924884796143, + "loss_ce": 0.00029870617436245084, + "loss_iou": 0.271484375, + "loss_num": 0.01531982421875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 161489760, + "step": 2883 + }, + { + "epoch": 6.4231625835189305, + "grad_norm": 89.39657592773438, + "learning_rate": 1e-06, + "loss": 0.8208, + "num_input_tokens_seen": 161546596, + "step": 2884 + }, + { + "epoch": 6.4231625835189305, + "loss": 0.8657987117767334, + "loss_ce": 0.00032019149512052536, + "loss_iou": 0.361328125, + "loss_num": 0.028564453125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 161546596, + "step": 2884 + }, + { + "epoch": 6.425389755011135, + "grad_norm": 19.063907623291016, + "learning_rate": 1e-06, + "loss": 0.5801, + "num_input_tokens_seen": 161600504, + "step": 2885 + }, + { + "epoch": 6.425389755011135, + "loss": 0.5580852627754211, + "loss_ce": 0.00019339279970154166, + "loss_iou": 0.2578125, + "loss_num": 0.00860595703125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 161600504, + "step": 2885 + }, + { + "epoch": 6.427616926503341, + "grad_norm": 21.565977096557617, + "learning_rate": 1e-06, + "loss": 0.6976, + "num_input_tokens_seen": 161659560, + "step": 2886 + }, + { + "epoch": 6.427616926503341, + "loss": 0.8639631271362305, + "loss_ce": 0.0004377923032734543, + "loss_iou": 0.400390625, + "loss_num": 0.01287841796875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 161659560, + "step": 2886 + }, + { + "epoch": 6.429844097995546, + "grad_norm": 17.81540870666504, + "learning_rate": 1e-06, + "loss": 0.4801, + "num_input_tokens_seen": 161717364, + "step": 2887 + }, + { + "epoch": 6.429844097995546, + "loss": 0.49559223651885986, + "loss_ce": 0.00023093473282642663, + "loss_iou": 0.2216796875, + "loss_num": 0.01043701171875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 161717364, + "step": 2887 + }, + { + "epoch": 6.432071269487751, + "grad_norm": 46.8631591796875, + "learning_rate": 1e-06, + "loss": 0.7548, + "num_input_tokens_seen": 161772008, + "step": 2888 + }, + { + "epoch": 6.432071269487751, + "loss": 0.8915398716926575, + "loss_ce": 0.0004266298783477396, + "loss_iou": 0.353515625, + "loss_num": 0.03662109375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 161772008, + "step": 2888 + }, + { + "epoch": 6.434298440979956, + "grad_norm": 17.266054153442383, + "learning_rate": 1e-06, + "loss": 0.5134, + "num_input_tokens_seen": 161831844, + "step": 2889 + }, + { + "epoch": 6.434298440979956, + "loss": 0.5959360003471375, + "loss_ce": 0.00023290744866244495, + "loss_iou": 0.25, + "loss_num": 0.0189208984375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 161831844, + "step": 2889 + }, + { + "epoch": 6.436525612472161, + "grad_norm": 14.699010848999023, + "learning_rate": 1e-06, + "loss": 0.7692, + "num_input_tokens_seen": 161889280, + "step": 2890 + }, + { + "epoch": 6.436525612472161, + "loss": 0.817835807800293, + "loss_ce": 0.0004529977450147271, + "loss_iou": 0.330078125, + "loss_num": 0.031494140625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 161889280, + "step": 2890 + }, + { + "epoch": 6.4387527839643655, + "grad_norm": 15.680800437927246, + "learning_rate": 1e-06, + "loss": 0.6009, + "num_input_tokens_seen": 161944376, + "step": 2891 + }, + { + "epoch": 6.4387527839643655, + "loss": 0.6331249475479126, + "loss_ce": 0.0003124309587292373, + "loss_iou": 0.27734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 161944376, + "step": 2891 + }, + { + "epoch": 6.44097995545657, + "grad_norm": 15.222108840942383, + "learning_rate": 1e-06, + "loss": 0.7805, + "num_input_tokens_seen": 162001860, + "step": 2892 + }, + { + "epoch": 6.44097995545657, + "loss": 0.8359357714653015, + "loss_ce": 0.00024238668265752494, + "loss_iou": 0.345703125, + "loss_num": 0.028564453125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 162001860, + "step": 2892 + }, + { + "epoch": 6.443207126948775, + "grad_norm": 44.775211334228516, + "learning_rate": 1e-06, + "loss": 0.7518, + "num_input_tokens_seen": 162057500, + "step": 2893 + }, + { + "epoch": 6.443207126948775, + "loss": 0.6410810351371765, + "loss_ce": 0.0007001558551564813, + "loss_iou": 0.287109375, + "loss_num": 0.01312255859375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 162057500, + "step": 2893 + }, + { + "epoch": 6.44543429844098, + "grad_norm": 16.15131378173828, + "learning_rate": 1e-06, + "loss": 0.7321, + "num_input_tokens_seen": 162112232, + "step": 2894 + }, + { + "epoch": 6.44543429844098, + "loss": 0.7305101156234741, + "loss_ce": 0.0002855030761566013, + "loss_iou": 0.32421875, + "loss_num": 0.0164794921875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 162112232, + "step": 2894 + }, + { + "epoch": 6.447661469933185, + "grad_norm": 62.584171295166016, + "learning_rate": 1e-06, + "loss": 0.6187, + "num_input_tokens_seen": 162166996, + "step": 2895 + }, + { + "epoch": 6.447661469933185, + "loss": 0.5436509847640991, + "loss_ce": 0.00019392551621422172, + "loss_iou": 0.2431640625, + "loss_num": 0.011474609375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 162166996, + "step": 2895 + }, + { + "epoch": 6.44988864142539, + "grad_norm": 25.94312858581543, + "learning_rate": 1e-06, + "loss": 0.8496, + "num_input_tokens_seen": 162221552, + "step": 2896 + }, + { + "epoch": 6.44988864142539, + "loss": 0.8812910914421082, + "loss_ce": 0.00018758632359094918, + "loss_iou": 0.37890625, + "loss_num": 0.0247802734375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 162221552, + "step": 2896 + }, + { + "epoch": 6.452115812917595, + "grad_norm": 17.239500045776367, + "learning_rate": 1e-06, + "loss": 0.6339, + "num_input_tokens_seen": 162278460, + "step": 2897 + }, + { + "epoch": 6.452115812917595, + "loss": 0.5580621957778931, + "loss_ce": 0.00020086884615011513, + "loss_iou": 0.220703125, + "loss_num": 0.0233154296875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 162278460, + "step": 2897 + }, + { + "epoch": 6.4543429844097995, + "grad_norm": 30.1453800201416, + "learning_rate": 1e-06, + "loss": 0.6447, + "num_input_tokens_seen": 162335104, + "step": 2898 + }, + { + "epoch": 6.4543429844097995, + "loss": 0.529179036617279, + "loss_ce": 0.00037046882789582014, + "loss_iou": 0.244140625, + "loss_num": 0.00799560546875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 162335104, + "step": 2898 + }, + { + "epoch": 6.456570155902004, + "grad_norm": 14.278246879577637, + "learning_rate": 1e-06, + "loss": 0.8011, + "num_input_tokens_seen": 162390036, + "step": 2899 + }, + { + "epoch": 6.456570155902004, + "loss": 0.6543991565704346, + "loss_ce": 0.00022433955746237189, + "loss_iou": 0.255859375, + "loss_num": 0.0283203125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 162390036, + "step": 2899 + }, + { + "epoch": 6.458797327394209, + "grad_norm": 27.82345962524414, + "learning_rate": 1e-06, + "loss": 0.5877, + "num_input_tokens_seen": 162446176, + "step": 2900 + }, + { + "epoch": 6.458797327394209, + "loss": 0.5277654528617859, + "loss_ce": 0.0001775633281795308, + "loss_iou": 0.2353515625, + "loss_num": 0.01129150390625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 162446176, + "step": 2900 + }, + { + "epoch": 6.461024498886414, + "grad_norm": 16.007659912109375, + "learning_rate": 1e-06, + "loss": 0.7475, + "num_input_tokens_seen": 162502660, + "step": 2901 + }, + { + "epoch": 6.461024498886414, + "loss": 0.9330655932426453, + "loss_ce": 0.00020428383140824735, + "loss_iou": 0.349609375, + "loss_num": 0.046630859375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 162502660, + "step": 2901 + }, + { + "epoch": 6.463251670378619, + "grad_norm": 21.829668045043945, + "learning_rate": 1e-06, + "loss": 0.6332, + "num_input_tokens_seen": 162559356, + "step": 2902 + }, + { + "epoch": 6.463251670378619, + "loss": 0.623560905456543, + "loss_ce": 0.0002699050819501281, + "loss_iou": 0.23828125, + "loss_num": 0.0294189453125, + "loss_xval": 0.625, + "num_input_tokens_seen": 162559356, + "step": 2902 + }, + { + "epoch": 6.465478841870824, + "grad_norm": 16.68356704711914, + "learning_rate": 1e-06, + "loss": 0.6161, + "num_input_tokens_seen": 162617696, + "step": 2903 + }, + { + "epoch": 6.465478841870824, + "loss": 0.5705079436302185, + "loss_ce": 0.00019540925859473646, + "loss_iou": 0.240234375, + "loss_num": 0.01806640625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 162617696, + "step": 2903 + }, + { + "epoch": 6.467706013363029, + "grad_norm": 19.659059524536133, + "learning_rate": 1e-06, + "loss": 0.6401, + "num_input_tokens_seen": 162674516, + "step": 2904 + }, + { + "epoch": 6.467706013363029, + "loss": 0.7844167351722717, + "loss_ce": 0.00023700644669588655, + "loss_iou": 0.33203125, + "loss_num": 0.0238037109375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 162674516, + "step": 2904 + }, + { + "epoch": 6.4699331848552335, + "grad_norm": 21.870929718017578, + "learning_rate": 1e-06, + "loss": 0.6373, + "num_input_tokens_seen": 162731232, + "step": 2905 + }, + { + "epoch": 6.4699331848552335, + "loss": 0.6285732984542847, + "loss_ce": 0.00039952859515324235, + "loss_iou": 0.28515625, + "loss_num": 0.01171875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 162731232, + "step": 2905 + }, + { + "epoch": 6.472160356347438, + "grad_norm": 22.746726989746094, + "learning_rate": 1e-06, + "loss": 0.5828, + "num_input_tokens_seen": 162786624, + "step": 2906 + }, + { + "epoch": 6.472160356347438, + "loss": 0.6897099018096924, + "loss_ce": 0.0005009524757042527, + "loss_iou": 0.30078125, + "loss_num": 0.017822265625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 162786624, + "step": 2906 + }, + { + "epoch": 6.474387527839644, + "grad_norm": 22.810585021972656, + "learning_rate": 1e-06, + "loss": 0.858, + "num_input_tokens_seen": 162844052, + "step": 2907 + }, + { + "epoch": 6.474387527839644, + "loss": 0.7649703025817871, + "loss_ce": 0.00032186286989599466, + "loss_iou": 0.34375, + "loss_num": 0.01513671875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 162844052, + "step": 2907 + }, + { + "epoch": 6.476614699331849, + "grad_norm": 14.657524108886719, + "learning_rate": 1e-06, + "loss": 0.7491, + "num_input_tokens_seen": 162903604, + "step": 2908 + }, + { + "epoch": 6.476614699331849, + "loss": 0.6694676280021667, + "loss_ce": 0.00027815584326162934, + "loss_iou": 0.255859375, + "loss_num": 0.03173828125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 162903604, + "step": 2908 + }, + { + "epoch": 6.478841870824054, + "grad_norm": 24.23907470703125, + "learning_rate": 1e-06, + "loss": 0.6508, + "num_input_tokens_seen": 162959780, + "step": 2909 + }, + { + "epoch": 6.478841870824054, + "loss": 0.7415847778320312, + "loss_ce": 0.0001907538971863687, + "loss_iou": 0.326171875, + "loss_num": 0.01806640625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 162959780, + "step": 2909 + }, + { + "epoch": 6.481069042316259, + "grad_norm": 20.807043075561523, + "learning_rate": 1e-06, + "loss": 0.5773, + "num_input_tokens_seen": 163016212, + "step": 2910 + }, + { + "epoch": 6.481069042316259, + "loss": 0.7409608364105225, + "loss_ce": 0.00048227497609332204, + "loss_iou": 0.3203125, + "loss_num": 0.0196533203125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 163016212, + "step": 2910 + }, + { + "epoch": 6.4832962138084635, + "grad_norm": 22.38340187072754, + "learning_rate": 1e-06, + "loss": 0.69, + "num_input_tokens_seen": 163071812, + "step": 2911 + }, + { + "epoch": 6.4832962138084635, + "loss": 0.6240810751914978, + "loss_ce": 0.00030178556335158646, + "loss_iou": 0.26953125, + "loss_num": 0.017333984375, + "loss_xval": 0.625, + "num_input_tokens_seen": 163071812, + "step": 2911 + }, + { + "epoch": 6.485523385300668, + "grad_norm": 17.558237075805664, + "learning_rate": 1e-06, + "loss": 0.942, + "num_input_tokens_seen": 163127468, + "step": 2912 + }, + { + "epoch": 6.485523385300668, + "loss": 1.0024060010910034, + "loss_ce": 0.00020868651336058974, + "loss_iou": 0.443359375, + "loss_num": 0.02294921875, + "loss_xval": 1.0, + "num_input_tokens_seen": 163127468, + "step": 2912 + }, + { + "epoch": 6.487750556792873, + "grad_norm": 20.10137367248535, + "learning_rate": 1e-06, + "loss": 0.6397, + "num_input_tokens_seen": 163182160, + "step": 2913 + }, + { + "epoch": 6.487750556792873, + "loss": 0.37796449661254883, + "loss_ce": 0.0002789545978885144, + "loss_iou": 0.173828125, + "loss_num": 0.00579833984375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 163182160, + "step": 2913 + }, + { + "epoch": 6.489977728285078, + "grad_norm": 59.727386474609375, + "learning_rate": 1e-06, + "loss": 0.7155, + "num_input_tokens_seen": 163238980, + "step": 2914 + }, + { + "epoch": 6.489977728285078, + "loss": 0.6282615661621094, + "loss_ce": 0.00020979228429496288, + "loss_iou": 0.271484375, + "loss_num": 0.01708984375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 163238980, + "step": 2914 + }, + { + "epoch": 6.492204899777283, + "grad_norm": 25.564571380615234, + "learning_rate": 1e-06, + "loss": 0.737, + "num_input_tokens_seen": 163293456, + "step": 2915 + }, + { + "epoch": 6.492204899777283, + "loss": 0.5602612495422363, + "loss_ce": 0.00020264273916836828, + "loss_iou": 0.263671875, + "loss_num": 0.006744384765625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 163293456, + "step": 2915 + }, + { + "epoch": 6.494432071269488, + "grad_norm": 19.56485939025879, + "learning_rate": 1e-06, + "loss": 0.6112, + "num_input_tokens_seen": 163347456, + "step": 2916 + }, + { + "epoch": 6.494432071269488, + "loss": 0.6698689460754395, + "loss_ce": 0.00028277470846660435, + "loss_iou": 0.251953125, + "loss_num": 0.032958984375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 163347456, + "step": 2916 + }, + { + "epoch": 6.496659242761693, + "grad_norm": 16.429033279418945, + "learning_rate": 1e-06, + "loss": 0.6937, + "num_input_tokens_seen": 163405948, + "step": 2917 + }, + { + "epoch": 6.496659242761693, + "loss": 0.42375442385673523, + "loss_ce": 0.00017044274136424065, + "loss_iou": 0.17578125, + "loss_num": 0.014404296875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 163405948, + "step": 2917 + }, + { + "epoch": 6.498886414253898, + "grad_norm": 18.85441780090332, + "learning_rate": 1e-06, + "loss": 0.7792, + "num_input_tokens_seen": 163461656, + "step": 2918 + }, + { + "epoch": 6.498886414253898, + "loss": 0.5861859321594238, + "loss_ce": 0.00024842872517183423, + "loss_iou": 0.26171875, + "loss_num": 0.01263427734375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 163461656, + "step": 2918 + }, + { + "epoch": 6.501113585746102, + "grad_norm": 23.43665885925293, + "learning_rate": 1e-06, + "loss": 0.5503, + "num_input_tokens_seen": 163519632, + "step": 2919 + }, + { + "epoch": 6.501113585746102, + "loss": 0.4104507565498352, + "loss_ce": 0.00017243438924197108, + "loss_iou": 0.1845703125, + "loss_num": 0.0081787109375, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 163519632, + "step": 2919 + }, + { + "epoch": 6.503340757238307, + "grad_norm": 28.950546264648438, + "learning_rate": 1e-06, + "loss": 0.741, + "num_input_tokens_seen": 163577312, + "step": 2920 + }, + { + "epoch": 6.503340757238307, + "loss": 0.7677791714668274, + "loss_ce": 0.0002010307798627764, + "loss_iou": 0.330078125, + "loss_num": 0.0216064453125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 163577312, + "step": 2920 + }, + { + "epoch": 6.505567928730512, + "grad_norm": 24.427560806274414, + "learning_rate": 1e-06, + "loss": 0.5989, + "num_input_tokens_seen": 163632820, + "step": 2921 + }, + { + "epoch": 6.505567928730512, + "loss": 0.7374780178070068, + "loss_ce": 0.00017326742818113416, + "loss_iou": 0.296875, + "loss_num": 0.0281982421875, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 163632820, + "step": 2921 + }, + { + "epoch": 6.507795100222717, + "grad_norm": 18.299259185791016, + "learning_rate": 1e-06, + "loss": 0.676, + "num_input_tokens_seen": 163689372, + "step": 2922 + }, + { + "epoch": 6.507795100222717, + "loss": 0.6376951932907104, + "loss_ce": 0.00024403775751125067, + "loss_iou": 0.298828125, + "loss_num": 0.00830078125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 163689372, + "step": 2922 + }, + { + "epoch": 6.510022271714922, + "grad_norm": 31.762420654296875, + "learning_rate": 1e-06, + "loss": 0.8069, + "num_input_tokens_seen": 163745852, + "step": 2923 + }, + { + "epoch": 6.510022271714922, + "loss": 1.0176315307617188, + "loss_ce": 0.00029755313880741596, + "loss_iou": 0.4140625, + "loss_num": 0.037353515625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 163745852, + "step": 2923 + }, + { + "epoch": 6.512249443207127, + "grad_norm": 25.43143653869629, + "learning_rate": 1e-06, + "loss": 1.0766, + "num_input_tokens_seen": 163801732, + "step": 2924 + }, + { + "epoch": 6.512249443207127, + "loss": 1.3304567337036133, + "loss_ce": 0.00025651464238762856, + "loss_iou": 0.53125, + "loss_num": 0.0537109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 163801732, + "step": 2924 + }, + { + "epoch": 6.514476614699332, + "grad_norm": 15.741415977478027, + "learning_rate": 1e-06, + "loss": 0.6474, + "num_input_tokens_seen": 163859180, + "step": 2925 + }, + { + "epoch": 6.514476614699332, + "loss": 0.6122488975524902, + "loss_ce": 0.00018836073286365718, + "loss_iou": 0.279296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 163859180, + "step": 2925 + }, + { + "epoch": 6.5167037861915365, + "grad_norm": 20.761812210083008, + "learning_rate": 1e-06, + "loss": 0.8235, + "num_input_tokens_seen": 163916244, + "step": 2926 + }, + { + "epoch": 6.5167037861915365, + "loss": 0.5054827332496643, + "loss_ce": 0.00023371227143798023, + "loss_iou": 0.2216796875, + "loss_num": 0.01226806640625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 163916244, + "step": 2926 + }, + { + "epoch": 6.518930957683741, + "grad_norm": 61.213077545166016, + "learning_rate": 1e-06, + "loss": 0.9006, + "num_input_tokens_seen": 163970816, + "step": 2927 + }, + { + "epoch": 6.518930957683741, + "loss": 0.9136002659797668, + "loss_ce": 0.0003922681207768619, + "loss_iou": 0.3984375, + "loss_num": 0.023681640625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 163970816, + "step": 2927 + }, + { + "epoch": 6.521158129175946, + "grad_norm": 21.650136947631836, + "learning_rate": 1e-06, + "loss": 0.7797, + "num_input_tokens_seen": 164028208, + "step": 2928 + }, + { + "epoch": 6.521158129175946, + "loss": 0.8101509213447571, + "loss_ce": 0.00021437015675473958, + "loss_iou": 0.337890625, + "loss_num": 0.0269775390625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 164028208, + "step": 2928 + }, + { + "epoch": 6.523385300668151, + "grad_norm": 20.427627563476562, + "learning_rate": 1e-06, + "loss": 0.8737, + "num_input_tokens_seen": 164083608, + "step": 2929 + }, + { + "epoch": 6.523385300668151, + "loss": 0.9638548493385315, + "loss_ce": 0.00023182647419162095, + "loss_iou": 0.43359375, + "loss_num": 0.01953125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 164083608, + "step": 2929 + }, + { + "epoch": 6.525612472160356, + "grad_norm": 17.497631072998047, + "learning_rate": 1e-06, + "loss": 0.7885, + "num_input_tokens_seen": 164138104, + "step": 2930 + }, + { + "epoch": 6.525612472160356, + "loss": 0.9440581798553467, + "loss_ce": 0.00021053646923974156, + "loss_iou": 0.3515625, + "loss_num": 0.04833984375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 164138104, + "step": 2930 + }, + { + "epoch": 6.527839643652561, + "grad_norm": 26.011911392211914, + "learning_rate": 1e-06, + "loss": 0.7072, + "num_input_tokens_seen": 164194992, + "step": 2931 + }, + { + "epoch": 6.527839643652561, + "loss": 0.5633350610733032, + "loss_ce": 0.00022474557044915855, + "loss_iou": 0.251953125, + "loss_num": 0.0118408203125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 164194992, + "step": 2931 + }, + { + "epoch": 6.5300668151447665, + "grad_norm": 14.881753921508789, + "learning_rate": 1e-06, + "loss": 0.6276, + "num_input_tokens_seen": 164252436, + "step": 2932 + }, + { + "epoch": 6.5300668151447665, + "loss": 0.7083165645599365, + "loss_ce": 0.0005529068876057863, + "loss_iou": 0.2890625, + "loss_num": 0.02587890625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 164252436, + "step": 2932 + }, + { + "epoch": 6.532293986636971, + "grad_norm": 22.067697525024414, + "learning_rate": 1e-06, + "loss": 0.6472, + "num_input_tokens_seen": 164309220, + "step": 2933 + }, + { + "epoch": 6.532293986636971, + "loss": 0.6984990835189819, + "loss_ce": 0.0002568979107309133, + "loss_iou": 0.3125, + "loss_num": 0.01416015625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 164309220, + "step": 2933 + }, + { + "epoch": 6.534521158129176, + "grad_norm": 18.131603240966797, + "learning_rate": 1e-06, + "loss": 0.6463, + "num_input_tokens_seen": 164366052, + "step": 2934 + }, + { + "epoch": 6.534521158129176, + "loss": 0.6132932901382446, + "loss_ce": 0.00025620122323744, + "loss_iou": 0.228515625, + "loss_num": 0.031005859375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 164366052, + "step": 2934 + }, + { + "epoch": 6.536748329621381, + "grad_norm": 28.302825927734375, + "learning_rate": 1e-06, + "loss": 0.5853, + "num_input_tokens_seen": 164417760, + "step": 2935 + }, + { + "epoch": 6.536748329621381, + "loss": 0.7578645944595337, + "loss_ce": 0.0002961784484796226, + "loss_iou": 0.318359375, + "loss_num": 0.024169921875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 164417760, + "step": 2935 + }, + { + "epoch": 6.538975501113586, + "grad_norm": 18.224735260009766, + "learning_rate": 1e-06, + "loss": 0.6522, + "num_input_tokens_seen": 164472700, + "step": 2936 + }, + { + "epoch": 6.538975501113586, + "loss": 0.6816818118095398, + "loss_ce": 0.000285336805973202, + "loss_iou": 0.29296875, + "loss_num": 0.018798828125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 164472700, + "step": 2936 + }, + { + "epoch": 6.541202672605791, + "grad_norm": 21.562236785888672, + "learning_rate": 1e-06, + "loss": 0.6994, + "num_input_tokens_seen": 164528444, + "step": 2937 + }, + { + "epoch": 6.541202672605791, + "loss": 0.641094446182251, + "loss_ce": 0.00022529340640176088, + "loss_iou": 0.296875, + "loss_num": 0.00982666015625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 164528444, + "step": 2937 + }, + { + "epoch": 6.543429844097996, + "grad_norm": 44.151615142822266, + "learning_rate": 1e-06, + "loss": 0.7169, + "num_input_tokens_seen": 164583964, + "step": 2938 + }, + { + "epoch": 6.543429844097996, + "loss": 0.6977666616439819, + "loss_ce": 0.00025689357426017523, + "loss_iou": 0.31640625, + "loss_num": 0.01287841796875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 164583964, + "step": 2938 + }, + { + "epoch": 6.5456570155902005, + "grad_norm": 27.166345596313477, + "learning_rate": 1e-06, + "loss": 0.6231, + "num_input_tokens_seen": 164638820, + "step": 2939 + }, + { + "epoch": 6.5456570155902005, + "loss": 0.6275501251220703, + "loss_ce": 0.0002307354734512046, + "loss_iou": 0.251953125, + "loss_num": 0.0242919921875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 164638820, + "step": 2939 + }, + { + "epoch": 6.547884187082405, + "grad_norm": 15.208316802978516, + "learning_rate": 1e-06, + "loss": 0.4751, + "num_input_tokens_seen": 164693132, + "step": 2940 + }, + { + "epoch": 6.547884187082405, + "loss": 0.3183910846710205, + "loss_ce": 0.00018428399926051497, + "loss_iou": 0.1455078125, + "loss_num": 0.005584716796875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 164693132, + "step": 2940 + }, + { + "epoch": 6.55011135857461, + "grad_norm": 32.17951965332031, + "learning_rate": 1e-06, + "loss": 0.7, + "num_input_tokens_seen": 164749436, + "step": 2941 + }, + { + "epoch": 6.55011135857461, + "loss": 0.7634174823760986, + "loss_ce": 0.0004780220042448491, + "loss_iou": 0.33203125, + "loss_num": 0.02001953125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 164749436, + "step": 2941 + }, + { + "epoch": 6.552338530066815, + "grad_norm": 21.82436752319336, + "learning_rate": 1e-06, + "loss": 0.6542, + "num_input_tokens_seen": 164805000, + "step": 2942 + }, + { + "epoch": 6.552338530066815, + "loss": 0.6028134822845459, + "loss_ce": 0.0002744359662756324, + "loss_iou": 0.25390625, + "loss_num": 0.018798828125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 164805000, + "step": 2942 + }, + { + "epoch": 6.55456570155902, + "grad_norm": 20.991344451904297, + "learning_rate": 1e-06, + "loss": 0.7364, + "num_input_tokens_seen": 164861344, + "step": 2943 + }, + { + "epoch": 6.55456570155902, + "loss": 0.7187168002128601, + "loss_ce": 0.00021093602117616683, + "loss_iou": 0.26953125, + "loss_num": 0.03564453125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 164861344, + "step": 2943 + }, + { + "epoch": 6.556792873051225, + "grad_norm": 21.933055877685547, + "learning_rate": 1e-06, + "loss": 0.4837, + "num_input_tokens_seen": 164919076, + "step": 2944 + }, + { + "epoch": 6.556792873051225, + "loss": 0.5052962899208069, + "loss_ce": 0.00016931179561652243, + "loss_iou": 0.2099609375, + "loss_num": 0.0167236328125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 164919076, + "step": 2944 + }, + { + "epoch": 6.55902004454343, + "grad_norm": 19.509490966796875, + "learning_rate": 1e-06, + "loss": 0.9163, + "num_input_tokens_seen": 164972212, + "step": 2945 + }, + { + "epoch": 6.55902004454343, + "loss": 0.810613751411438, + "loss_ce": 0.00025000711320899427, + "loss_iou": 0.33984375, + "loss_num": 0.026123046875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 164972212, + "step": 2945 + }, + { + "epoch": 6.5612472160356345, + "grad_norm": 33.0357780456543, + "learning_rate": 1e-06, + "loss": 0.6847, + "num_input_tokens_seen": 165028004, + "step": 2946 + }, + { + "epoch": 6.5612472160356345, + "loss": 0.7473153471946716, + "loss_ce": 0.0002450351894367486, + "loss_iou": 0.31640625, + "loss_num": 0.0223388671875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 165028004, + "step": 2946 + }, + { + "epoch": 6.563474387527839, + "grad_norm": 19.9017391204834, + "learning_rate": 1e-06, + "loss": 0.6142, + "num_input_tokens_seen": 165083812, + "step": 2947 + }, + { + "epoch": 6.563474387527839, + "loss": 0.6369014382362366, + "loss_ce": 0.0001826911757234484, + "loss_iou": 0.28515625, + "loss_num": 0.0130615234375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 165083812, + "step": 2947 + }, + { + "epoch": 6.565701559020044, + "grad_norm": 30.455806732177734, + "learning_rate": 1e-06, + "loss": 0.601, + "num_input_tokens_seen": 165139816, + "step": 2948 + }, + { + "epoch": 6.565701559020044, + "loss": 0.6039369106292725, + "loss_ce": 0.00023821514332666993, + "loss_iou": 0.2333984375, + "loss_num": 0.027587890625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 165139816, + "step": 2948 + }, + { + "epoch": 6.567928730512249, + "grad_norm": 20.041481018066406, + "learning_rate": 1e-06, + "loss": 0.62, + "num_input_tokens_seen": 165195512, + "step": 2949 + }, + { + "epoch": 6.567928730512249, + "loss": 0.6124828457832336, + "loss_ce": 0.00017815582395996898, + "loss_iou": 0.2490234375, + "loss_num": 0.0228271484375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 165195512, + "step": 2949 + }, + { + "epoch": 6.570155902004454, + "grad_norm": 17.582809448242188, + "learning_rate": 1e-06, + "loss": 0.5694, + "num_input_tokens_seen": 165251664, + "step": 2950 + }, + { + "epoch": 6.570155902004454, + "loss": 0.6081294417381287, + "loss_ce": 0.00021927471971139312, + "loss_iou": 0.263671875, + "loss_num": 0.015869140625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 165251664, + "step": 2950 + }, + { + "epoch": 6.57238307349666, + "grad_norm": 18.174318313598633, + "learning_rate": 1e-06, + "loss": 0.4721, + "num_input_tokens_seen": 165307188, + "step": 2951 + }, + { + "epoch": 6.57238307349666, + "loss": 0.49299192428588867, + "loss_ce": 0.00016358881839551032, + "loss_iou": 0.2099609375, + "loss_num": 0.01434326171875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 165307188, + "step": 2951 + }, + { + "epoch": 6.574610244988865, + "grad_norm": 21.09537696838379, + "learning_rate": 1e-06, + "loss": 0.6628, + "num_input_tokens_seen": 165362820, + "step": 2952 + }, + { + "epoch": 6.574610244988865, + "loss": 0.7518377304077148, + "loss_ce": 0.0002508063626009971, + "loss_iou": 0.326171875, + "loss_num": 0.02001953125, + "loss_xval": 0.75, + "num_input_tokens_seen": 165362820, + "step": 2952 + }, + { + "epoch": 6.5768374164810695, + "grad_norm": 30.387908935546875, + "learning_rate": 1e-06, + "loss": 0.5611, + "num_input_tokens_seen": 165419408, + "step": 2953 + }, + { + "epoch": 6.5768374164810695, + "loss": 0.5768758058547974, + "loss_ce": 0.00021561238099820912, + "loss_iou": 0.2578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 165419408, + "step": 2953 + }, + { + "epoch": 6.579064587973274, + "grad_norm": 20.413816452026367, + "learning_rate": 1e-06, + "loss": 0.7898, + "num_input_tokens_seen": 165472128, + "step": 2954 + }, + { + "epoch": 6.579064587973274, + "loss": 0.7371137142181396, + "loss_ce": 0.0001751929521560669, + "loss_iou": 0.287109375, + "loss_num": 0.03271484375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 165472128, + "step": 2954 + }, + { + "epoch": 6.581291759465479, + "grad_norm": 20.204029083251953, + "learning_rate": 1e-06, + "loss": 0.5606, + "num_input_tokens_seen": 165529144, + "step": 2955 + }, + { + "epoch": 6.581291759465479, + "loss": 0.6010243892669678, + "loss_ce": 0.00019429507665336132, + "loss_iou": 0.28125, + "loss_num": 0.00787353515625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 165529144, + "step": 2955 + }, + { + "epoch": 6.583518930957684, + "grad_norm": 17.47038459777832, + "learning_rate": 1e-06, + "loss": 0.6566, + "num_input_tokens_seen": 165588264, + "step": 2956 + }, + { + "epoch": 6.583518930957684, + "loss": 0.7859352827072144, + "loss_ce": 0.0002907089365180582, + "loss_iou": 0.333984375, + "loss_num": 0.023681640625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 165588264, + "step": 2956 + }, + { + "epoch": 6.585746102449889, + "grad_norm": 16.517253875732422, + "learning_rate": 1e-06, + "loss": 0.547, + "num_input_tokens_seen": 165644784, + "step": 2957 + }, + { + "epoch": 6.585746102449889, + "loss": 0.6574386358261108, + "loss_ce": 0.00021205886150710285, + "loss_iou": 0.287109375, + "loss_num": 0.016357421875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 165644784, + "step": 2957 + }, + { + "epoch": 6.587973273942094, + "grad_norm": 18.46898078918457, + "learning_rate": 1e-06, + "loss": 0.6594, + "num_input_tokens_seen": 165701372, + "step": 2958 + }, + { + "epoch": 6.587973273942094, + "loss": 0.38792580366134644, + "loss_ce": 0.0003525797219481319, + "loss_iou": 0.162109375, + "loss_num": 0.01251220703125, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 165701372, + "step": 2958 + }, + { + "epoch": 6.590200445434299, + "grad_norm": 28.243539810180664, + "learning_rate": 1e-06, + "loss": 0.7292, + "num_input_tokens_seen": 165756960, + "step": 2959 + }, + { + "epoch": 6.590200445434299, + "loss": 0.9738781452178955, + "loss_ce": 0.0002453392662573606, + "loss_iou": 0.42578125, + "loss_num": 0.0244140625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 165756960, + "step": 2959 + }, + { + "epoch": 6.5924276169265035, + "grad_norm": 15.872271537780762, + "learning_rate": 1e-06, + "loss": 0.5439, + "num_input_tokens_seen": 165813860, + "step": 2960 + }, + { + "epoch": 6.5924276169265035, + "loss": 0.624319314956665, + "loss_ce": 0.00029592958162538707, + "loss_iou": 0.26171875, + "loss_num": 0.0201416015625, + "loss_xval": 0.625, + "num_input_tokens_seen": 165813860, + "step": 2960 + }, + { + "epoch": 6.594654788418708, + "grad_norm": 109.15684509277344, + "learning_rate": 1e-06, + "loss": 0.7413, + "num_input_tokens_seen": 165871416, + "step": 2961 + }, + { + "epoch": 6.594654788418708, + "loss": 0.9980896711349487, + "loss_ce": 0.00016483690706081688, + "loss_iou": 0.44140625, + "loss_num": 0.023193359375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 165871416, + "step": 2961 + }, + { + "epoch": 6.596881959910913, + "grad_norm": 16.21949577331543, + "learning_rate": 1e-06, + "loss": 0.9368, + "num_input_tokens_seen": 165925904, + "step": 2962 + }, + { + "epoch": 6.596881959910913, + "loss": 0.8550323247909546, + "loss_ce": 0.00017394759925082326, + "loss_iou": 0.33984375, + "loss_num": 0.034912109375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 165925904, + "step": 2962 + }, + { + "epoch": 6.599109131403118, + "grad_norm": 31.012943267822266, + "learning_rate": 1e-06, + "loss": 0.6417, + "num_input_tokens_seen": 165983760, + "step": 2963 + }, + { + "epoch": 6.599109131403118, + "loss": 0.6024689674377441, + "loss_ce": 0.000662269361782819, + "loss_iou": 0.25390625, + "loss_num": 0.018798828125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 165983760, + "step": 2963 + }, + { + "epoch": 6.601336302895323, + "grad_norm": 20.181808471679688, + "learning_rate": 1e-06, + "loss": 0.6337, + "num_input_tokens_seen": 166040296, + "step": 2964 + }, + { + "epoch": 6.601336302895323, + "loss": 0.724341094493866, + "loss_ce": 0.0003420562425162643, + "loss_iou": 0.298828125, + "loss_num": 0.0252685546875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 166040296, + "step": 2964 + }, + { + "epoch": 6.603563474387528, + "grad_norm": 28.568126678466797, + "learning_rate": 1e-06, + "loss": 0.7788, + "num_input_tokens_seen": 166092104, + "step": 2965 + }, + { + "epoch": 6.603563474387528, + "loss": 0.7236001491546631, + "loss_ce": 0.00021149440726730973, + "loss_iou": 0.275390625, + "loss_num": 0.034912109375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 166092104, + "step": 2965 + }, + { + "epoch": 6.605790645879733, + "grad_norm": 14.282336235046387, + "learning_rate": 1e-06, + "loss": 0.7572, + "num_input_tokens_seen": 166147648, + "step": 2966 + }, + { + "epoch": 6.605790645879733, + "loss": 0.9260650873184204, + "loss_ce": 0.0002839115622919053, + "loss_iou": 0.365234375, + "loss_num": 0.0390625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 166147648, + "step": 2966 + }, + { + "epoch": 6.6080178173719375, + "grad_norm": 18.43797492980957, + "learning_rate": 1e-06, + "loss": 0.8469, + "num_input_tokens_seen": 166205440, + "step": 2967 + }, + { + "epoch": 6.6080178173719375, + "loss": 1.090362548828125, + "loss_ce": 0.0002746962709352374, + "loss_iou": 0.4453125, + "loss_num": 0.03955078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 166205440, + "step": 2967 + }, + { + "epoch": 6.610244988864142, + "grad_norm": 20.853721618652344, + "learning_rate": 1e-06, + "loss": 0.7179, + "num_input_tokens_seen": 166262912, + "step": 2968 + }, + { + "epoch": 6.610244988864142, + "loss": 0.7870877981185913, + "loss_ce": 0.00022257049567997456, + "loss_iou": 0.34375, + "loss_num": 0.020263671875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 166262912, + "step": 2968 + }, + { + "epoch": 6.612472160356347, + "grad_norm": 119.40055847167969, + "learning_rate": 1e-06, + "loss": 0.6213, + "num_input_tokens_seen": 166320972, + "step": 2969 + }, + { + "epoch": 6.612472160356347, + "loss": 0.6227630376815796, + "loss_ce": 0.00020442584354896098, + "loss_iou": 0.275390625, + "loss_num": 0.01434326171875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 166320972, + "step": 2969 + }, + { + "epoch": 6.614699331848552, + "grad_norm": 21.16520881652832, + "learning_rate": 1e-06, + "loss": 0.7879, + "num_input_tokens_seen": 166377852, + "step": 2970 + }, + { + "epoch": 6.614699331848552, + "loss": 0.6298392415046692, + "loss_ce": 0.0003226206754334271, + "loss_iou": 0.27734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 166377852, + "step": 2970 + }, + { + "epoch": 6.616926503340757, + "grad_norm": 24.545520782470703, + "learning_rate": 1e-06, + "loss": 0.761, + "num_input_tokens_seen": 166434064, + "step": 2971 + }, + { + "epoch": 6.616926503340757, + "loss": 0.798297643661499, + "loss_ce": 0.00020199231221340597, + "loss_iou": 0.345703125, + "loss_num": 0.021240234375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 166434064, + "step": 2971 + }, + { + "epoch": 6.619153674832962, + "grad_norm": 14.242857933044434, + "learning_rate": 1e-06, + "loss": 0.7568, + "num_input_tokens_seen": 166486632, + "step": 2972 + }, + { + "epoch": 6.619153674832962, + "loss": 0.8938003778457642, + "loss_ce": 0.00024563493207097054, + "loss_iou": 0.35546875, + "loss_num": 0.036376953125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 166486632, + "step": 2972 + }, + { + "epoch": 6.621380846325167, + "grad_norm": 13.78774642944336, + "learning_rate": 1e-06, + "loss": 0.5507, + "num_input_tokens_seen": 166543024, + "step": 2973 + }, + { + "epoch": 6.621380846325167, + "loss": 0.5280297994613647, + "loss_ce": 0.00019778512069024146, + "loss_iou": 0.2314453125, + "loss_num": 0.01312255859375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 166543024, + "step": 2973 + }, + { + "epoch": 6.6236080178173715, + "grad_norm": 16.257577896118164, + "learning_rate": 1e-06, + "loss": 0.5946, + "num_input_tokens_seen": 166601376, + "step": 2974 + }, + { + "epoch": 6.6236080178173715, + "loss": 0.705248236656189, + "loss_ce": 0.00017008130089379847, + "loss_iou": 0.30859375, + "loss_num": 0.0179443359375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 166601376, + "step": 2974 + }, + { + "epoch": 6.625835189309576, + "grad_norm": 19.095983505249023, + "learning_rate": 1e-06, + "loss": 0.7031, + "num_input_tokens_seen": 166654948, + "step": 2975 + }, + { + "epoch": 6.625835189309576, + "loss": 0.48734956979751587, + "loss_ce": 0.00016694515943527222, + "loss_iou": 0.201171875, + "loss_num": 0.0169677734375, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 166654948, + "step": 2975 + }, + { + "epoch": 6.628062360801781, + "grad_norm": 21.924911499023438, + "learning_rate": 1e-06, + "loss": 0.6498, + "num_input_tokens_seen": 166711656, + "step": 2976 + }, + { + "epoch": 6.628062360801781, + "loss": 0.5872310996055603, + "loss_ce": 0.0001949925208464265, + "loss_iou": 0.25, + "loss_num": 0.0174560546875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 166711656, + "step": 2976 + }, + { + "epoch": 6.630289532293987, + "grad_norm": 20.422719955444336, + "learning_rate": 1e-06, + "loss": 0.736, + "num_input_tokens_seen": 166770256, + "step": 2977 + }, + { + "epoch": 6.630289532293987, + "loss": 0.9298211932182312, + "loss_ce": 0.0002557823609095067, + "loss_iou": 0.3515625, + "loss_num": 0.045654296875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 166770256, + "step": 2977 + }, + { + "epoch": 6.632516703786192, + "grad_norm": 18.542539596557617, + "learning_rate": 1e-06, + "loss": 0.6461, + "num_input_tokens_seen": 166826808, + "step": 2978 + }, + { + "epoch": 6.632516703786192, + "loss": 0.5421923398971558, + "loss_ce": 0.00020013400353491306, + "loss_iou": 0.22265625, + "loss_num": 0.0191650390625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 166826808, + "step": 2978 + }, + { + "epoch": 6.634743875278397, + "grad_norm": 21.711483001708984, + "learning_rate": 1e-06, + "loss": 0.833, + "num_input_tokens_seen": 166881468, + "step": 2979 + }, + { + "epoch": 6.634743875278397, + "loss": 0.8562052249908447, + "loss_ce": 0.000248211930738762, + "loss_iou": 0.33984375, + "loss_num": 0.034912109375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 166881468, + "step": 2979 + }, + { + "epoch": 6.636971046770602, + "grad_norm": 19.191953659057617, + "learning_rate": 1e-06, + "loss": 0.5379, + "num_input_tokens_seen": 166938348, + "step": 2980 + }, + { + "epoch": 6.636971046770602, + "loss": 0.47580990195274353, + "loss_ce": 0.0002239710884168744, + "loss_iou": 0.216796875, + "loss_num": 0.00836181640625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 166938348, + "step": 2980 + }, + { + "epoch": 6.639198218262806, + "grad_norm": 19.030431747436523, + "learning_rate": 1e-06, + "loss": 0.5368, + "num_input_tokens_seen": 166994888, + "step": 2981 + }, + { + "epoch": 6.639198218262806, + "loss": 0.5439237952232361, + "loss_ce": 0.00022264779545366764, + "loss_iou": 0.220703125, + "loss_num": 0.0205078125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 166994888, + "step": 2981 + }, + { + "epoch": 6.641425389755011, + "grad_norm": 17.75498390197754, + "learning_rate": 1e-06, + "loss": 0.5462, + "num_input_tokens_seen": 167050384, + "step": 2982 + }, + { + "epoch": 6.641425389755011, + "loss": 0.4909123182296753, + "loss_ce": 0.00018965097842738032, + "loss_iou": 0.2158203125, + "loss_num": 0.0118408203125, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 167050384, + "step": 2982 + }, + { + "epoch": 6.643652561247216, + "grad_norm": 15.332694053649902, + "learning_rate": 1e-06, + "loss": 0.4491, + "num_input_tokens_seen": 167108932, + "step": 2983 + }, + { + "epoch": 6.643652561247216, + "loss": 0.5531620979309082, + "loss_ce": 0.00018356410146225244, + "loss_iou": 0.2490234375, + "loss_num": 0.01116943359375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 167108932, + "step": 2983 + }, + { + "epoch": 6.645879732739421, + "grad_norm": 26.91922378540039, + "learning_rate": 1e-06, + "loss": 0.6703, + "num_input_tokens_seen": 167167308, + "step": 2984 + }, + { + "epoch": 6.645879732739421, + "loss": 0.5900508165359497, + "loss_ce": 0.0002071046328637749, + "loss_iou": 0.2490234375, + "loss_num": 0.0181884765625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 167167308, + "step": 2984 + }, + { + "epoch": 6.648106904231626, + "grad_norm": 15.314505577087402, + "learning_rate": 1e-06, + "loss": 0.6925, + "num_input_tokens_seen": 167223976, + "step": 2985 + }, + { + "epoch": 6.648106904231626, + "loss": 0.6426599621772766, + "loss_ce": 0.00032597355311736465, + "loss_iou": 0.28515625, + "loss_num": 0.0147705078125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 167223976, + "step": 2985 + }, + { + "epoch": 6.650334075723831, + "grad_norm": 20.561725616455078, + "learning_rate": 1e-06, + "loss": 0.7403, + "num_input_tokens_seen": 167279468, + "step": 2986 + }, + { + "epoch": 6.650334075723831, + "loss": 0.6292366981506348, + "loss_ce": 0.00020838108321186155, + "loss_iou": 0.26953125, + "loss_num": 0.0177001953125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 167279468, + "step": 2986 + }, + { + "epoch": 6.652561247216036, + "grad_norm": 22.052919387817383, + "learning_rate": 1e-06, + "loss": 0.6358, + "num_input_tokens_seen": 167338024, + "step": 2987 + }, + { + "epoch": 6.652561247216036, + "loss": 0.6393663287162781, + "loss_ce": 0.00032825471134856343, + "loss_iou": 0.27734375, + "loss_num": 0.0172119140625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 167338024, + "step": 2987 + }, + { + "epoch": 6.6547884187082404, + "grad_norm": 25.92134666442871, + "learning_rate": 1e-06, + "loss": 0.9487, + "num_input_tokens_seen": 167394980, + "step": 2988 + }, + { + "epoch": 6.6547884187082404, + "loss": 0.8820292949676514, + "loss_ce": 0.00019336529658176005, + "loss_iou": 0.376953125, + "loss_num": 0.0255126953125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 167394980, + "step": 2988 + }, + { + "epoch": 6.657015590200445, + "grad_norm": 18.905071258544922, + "learning_rate": 1e-06, + "loss": 0.5956, + "num_input_tokens_seen": 167451864, + "step": 2989 + }, + { + "epoch": 6.657015590200445, + "loss": 0.5608692169189453, + "loss_ce": 0.00032231814111582935, + "loss_iou": 0.251953125, + "loss_num": 0.01153564453125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 167451864, + "step": 2989 + }, + { + "epoch": 6.65924276169265, + "grad_norm": 18.516260147094727, + "learning_rate": 1e-06, + "loss": 0.6818, + "num_input_tokens_seen": 167507760, + "step": 2990 + }, + { + "epoch": 6.65924276169265, + "loss": 0.5940439701080322, + "loss_ce": 0.0002939357655122876, + "loss_iou": 0.259765625, + "loss_num": 0.01519775390625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 167507760, + "step": 2990 + }, + { + "epoch": 6.661469933184855, + "grad_norm": 16.960506439208984, + "learning_rate": 1e-06, + "loss": 0.6908, + "num_input_tokens_seen": 167560344, + "step": 2991 + }, + { + "epoch": 6.661469933184855, + "loss": 0.620684802532196, + "loss_ce": 0.00020142148423474282, + "loss_iou": 0.263671875, + "loss_num": 0.018798828125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 167560344, + "step": 2991 + }, + { + "epoch": 6.66369710467706, + "grad_norm": 24.14487648010254, + "learning_rate": 1e-06, + "loss": 0.5732, + "num_input_tokens_seen": 167618800, + "step": 2992 + }, + { + "epoch": 6.66369710467706, + "loss": 0.6208098530769348, + "loss_ce": 0.0003264534752815962, + "loss_iou": 0.279296875, + "loss_num": 0.01226806640625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 167618800, + "step": 2992 + }, + { + "epoch": 6.665924276169265, + "grad_norm": 16.674419403076172, + "learning_rate": 1e-06, + "loss": 0.4944, + "num_input_tokens_seen": 167674316, + "step": 2993 + }, + { + "epoch": 6.665924276169265, + "loss": 0.5599268674850464, + "loss_ce": 0.0002344850217923522, + "loss_iou": 0.2255859375, + "loss_num": 0.0216064453125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 167674316, + "step": 2993 + }, + { + "epoch": 6.66815144766147, + "grad_norm": 14.498659133911133, + "learning_rate": 1e-06, + "loss": 0.8444, + "num_input_tokens_seen": 167730912, + "step": 2994 + }, + { + "epoch": 6.66815144766147, + "loss": 0.7760642766952515, + "loss_ce": 0.0009178139735013247, + "loss_iou": 0.31640625, + "loss_num": 0.0283203125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 167730912, + "step": 2994 + }, + { + "epoch": 6.6703786191536745, + "grad_norm": 25.62771987915039, + "learning_rate": 1e-06, + "loss": 0.7389, + "num_input_tokens_seen": 167786696, + "step": 2995 + }, + { + "epoch": 6.6703786191536745, + "loss": 0.6130115985870361, + "loss_ce": 0.00021857366664335132, + "loss_iou": 0.27734375, + "loss_num": 0.01190185546875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 167786696, + "step": 2995 + }, + { + "epoch": 6.67260579064588, + "grad_norm": 84.09690856933594, + "learning_rate": 1e-06, + "loss": 0.7173, + "num_input_tokens_seen": 167843364, + "step": 2996 + }, + { + "epoch": 6.67260579064588, + "loss": 0.5746430158615112, + "loss_ce": 0.00018016056856140494, + "loss_iou": 0.2578125, + "loss_num": 0.011962890625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 167843364, + "step": 2996 + }, + { + "epoch": 6.674832962138085, + "grad_norm": 25.761995315551758, + "learning_rate": 1e-06, + "loss": 0.7212, + "num_input_tokens_seen": 167900084, + "step": 2997 + }, + { + "epoch": 6.674832962138085, + "loss": 0.848706066608429, + "loss_ce": 0.0003174202283844352, + "loss_iou": 0.3671875, + "loss_num": 0.0230712890625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 167900084, + "step": 2997 + }, + { + "epoch": 6.67706013363029, + "grad_norm": 20.495059967041016, + "learning_rate": 1e-06, + "loss": 0.6009, + "num_input_tokens_seen": 167955952, + "step": 2998 + }, + { + "epoch": 6.67706013363029, + "loss": 0.6451590061187744, + "loss_ce": 0.0002615168341435492, + "loss_iou": 0.2890625, + "loss_num": 0.0135498046875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 167955952, + "step": 2998 + }, + { + "epoch": 6.679287305122495, + "grad_norm": 18.190731048583984, + "learning_rate": 1e-06, + "loss": 0.5682, + "num_input_tokens_seen": 168011540, + "step": 2999 + }, + { + "epoch": 6.679287305122495, + "loss": 0.5880811810493469, + "loss_ce": 0.0001905930694192648, + "loss_iou": 0.25390625, + "loss_num": 0.0157470703125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 168011540, + "step": 2999 + }, + { + "epoch": 6.6815144766147, + "grad_norm": 15.931896209716797, + "learning_rate": 1e-06, + "loss": 0.556, + "num_input_tokens_seen": 168068960, + "step": 3000 + }, + { + "epoch": 6.6815144766147, + "eval_seeclick_web_CIoU": 0.5745645761489868, + "eval_seeclick_web_GIoU": 0.5703821778297424, + "eval_seeclick_web_IoU": 0.5911202728748322, + "eval_seeclick_web_MAE_all": 0.016741125378757715, + "eval_seeclick_web_MAE_h": 0.009390837512910366, + "eval_seeclick_web_MAE_w": 0.01750754565000534, + "eval_seeclick_web_MAE_x_boxes": 0.009469148702919483, + "eval_seeclick_web_MAE_y_boxes": 0.022113264771178365, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9277820587158203, + "eval_seeclick_web_loss_ce": 0.00027503210003487766, + "eval_seeclick_web_loss_iou": 0.421875, + "eval_seeclick_web_loss_num": 0.013193130493164062, + "eval_seeclick_web_loss_xval": 0.909423828125, + "eval_seeclick_web_runtime": 22.6295, + "eval_seeclick_web_samples_per_second": 2.21, + "eval_seeclick_web_steps_per_second": 0.088, + "num_input_tokens_seen": 168068960, + "step": 3000 + }, + { + "epoch": 6.6815144766147, + "eval_icons_CIoU": 0.28626811504364014, + "eval_icons_GIoU": 0.31125396490097046, + "eval_icons_IoU": 0.36675940454006195, + "eval_icons_MAE_all": 0.06613602861762047, + "eval_icons_MAE_h": 0.04001020174473524, + "eval_icons_MAE_w": 0.07005784474313259, + "eval_icons_MAE_x_boxes": 0.06103345938026905, + "eval_icons_MAE_y_boxes": 0.039336864836513996, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.7287678718566895, + "eval_icons_loss_ce": 0.00033969079959206283, + "eval_icons_loss_iou": 0.669921875, + "eval_icons_loss_num": 0.06263542175292969, + "eval_icons_loss_xval": 1.65380859375, + "eval_icons_runtime": 20.8368, + "eval_icons_samples_per_second": 2.4, + "eval_icons_steps_per_second": 0.096, + "num_input_tokens_seen": 168068960, + "step": 3000 + }, + { + "epoch": 6.6815144766147, + "eval_screenspot_CIoU": 0.34714128573735553, + "eval_screenspot_GIoU": 0.36292509237925213, + "eval_screenspot_IoU": 0.42989224195480347, + "eval_screenspot_MAE_all": 0.06254567454258601, + "eval_screenspot_MAE_h": 0.03752323302129904, + "eval_screenspot_MAE_w": 0.07452885061502457, + "eval_screenspot_MAE_x_boxes": 0.07285770525534947, + "eval_screenspot_MAE_y_boxes": 0.04644376132637262, + "eval_screenspot_inside_bbox": 0.659583330154419, + "eval_screenspot_loss": 1.6588377952575684, + "eval_screenspot_loss_ce": 0.00031722194398753345, + "eval_screenspot_loss_iou": 0.6805826822916666, + "eval_screenspot_loss_num": 0.07418060302734375, + "eval_screenspot_loss_xval": 1.7322591145833333, + "eval_screenspot_runtime": 32.6151, + "eval_screenspot_samples_per_second": 2.729, + "eval_screenspot_steps_per_second": 0.092, + "num_input_tokens_seen": 168068960, + "step": 3000 + }, + { + "epoch": 6.6815144766147, + "eval_compot_CIoU": 0.3531677573919296, + "eval_compot_GIoU": 0.3627708703279495, + "eval_compot_IoU": 0.4095195233821869, + "eval_compot_MAE_all": 0.01802563015371561, + "eval_compot_MAE_h": 0.008967408444732428, + "eval_compot_MAE_w": 0.022056237794458866, + "eval_compot_MAE_x_boxes": 0.02982906997203827, + "eval_compot_MAE_y_boxes": 0.006724046776071191, + "eval_compot_inside_bbox": 0.6458333432674408, + "eval_compot_loss": 1.3883819580078125, + "eval_compot_loss_ce": 0.0002577164559625089, + "eval_compot_loss_iou": 0.633544921875, + "eval_compot_loss_num": 0.016773223876953125, + "eval_compot_loss_xval": 1.350830078125, + "eval_compot_runtime": 20.0639, + "eval_compot_samples_per_second": 2.492, + "eval_compot_steps_per_second": 0.1, + "num_input_tokens_seen": 168068960, + "step": 3000 + }, + { + "epoch": 6.6815144766147, + "eval_custom_ui_val_CIoU": 0.46863051421112484, + "eval_custom_ui_val_GIoU": 0.4837728473875258, + "eval_custom_ui_val_IoU": 0.5239554312494066, + "eval_custom_ui_val_MAE_all": 0.030405950939489737, + "eval_custom_ui_val_MAE_h": 0.01713582917323543, + "eval_custom_ui_val_MAE_w": 0.038816668921046786, + "eval_custom_ui_val_MAE_x_boxes": 0.034813721767730184, + "eval_custom_ui_val_MAE_y_boxes": 0.015315383543363877, + "eval_custom_ui_val_inside_bbox": 0.7422839535607232, + "eval_custom_ui_val_loss": 1.1846333742141724, + "eval_custom_ui_val_loss_ce": 0.00029205658291983936, + "eval_custom_ui_val_loss_iou": 0.5049913194444444, + "eval_custom_ui_val_loss_num": 0.02808909946017795, + "eval_custom_ui_val_loss_xval": 1.1504720052083333, + "eval_custom_ui_val_runtime": 56.2868, + "eval_custom_ui_val_samples_per_second": 4.708, + "eval_custom_ui_val_steps_per_second": 0.16, + "num_input_tokens_seen": 168068960, + "step": 3000 + }, + { + "epoch": 6.6815144766147, + "loss": 0.9355728030204773, + "loss_ce": 0.00027009687619283795, + "loss_iou": 0.40625, + "loss_num": 0.0244140625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 168068960, + "step": 3000 + }, + { + "epoch": 6.6837416481069045, + "grad_norm": 13.867417335510254, + "learning_rate": 1e-06, + "loss": 0.778, + "num_input_tokens_seen": 168123220, + "step": 3001 + }, + { + "epoch": 6.6837416481069045, + "loss": 0.8318126201629639, + "loss_ce": 0.00026967888697981834, + "loss_iou": 0.345703125, + "loss_num": 0.0283203125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 168123220, + "step": 3001 + }, + { + "epoch": 6.685968819599109, + "grad_norm": 23.010339736938477, + "learning_rate": 1e-06, + "loss": 0.8531, + "num_input_tokens_seen": 168177788, + "step": 3002 + }, + { + "epoch": 6.685968819599109, + "loss": 0.9224424958229065, + "loss_ce": 0.00032336413278244436, + "loss_iou": 0.390625, + "loss_num": 0.028076171875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 168177788, + "step": 3002 + }, + { + "epoch": 6.688195991091314, + "grad_norm": 17.75686264038086, + "learning_rate": 1e-06, + "loss": 0.6219, + "num_input_tokens_seen": 168232444, + "step": 3003 + }, + { + "epoch": 6.688195991091314, + "loss": 0.38138991594314575, + "loss_ce": 0.00016430205141659826, + "loss_iou": 0.166015625, + "loss_num": 0.00994873046875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 168232444, + "step": 3003 + }, + { + "epoch": 6.690423162583519, + "grad_norm": 25.14574432373047, + "learning_rate": 1e-06, + "loss": 0.6382, + "num_input_tokens_seen": 168285212, + "step": 3004 + }, + { + "epoch": 6.690423162583519, + "loss": 0.7175732851028442, + "loss_ce": 0.000288100796751678, + "loss_iou": 0.2890625, + "loss_num": 0.028076171875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 168285212, + "step": 3004 + }, + { + "epoch": 6.692650334075724, + "grad_norm": 15.96047592163086, + "learning_rate": 1e-06, + "loss": 0.8216, + "num_input_tokens_seen": 168341780, + "step": 3005 + }, + { + "epoch": 6.692650334075724, + "loss": 1.042626142501831, + "loss_ce": 0.0002676715375855565, + "loss_iou": 0.408203125, + "loss_num": 0.045166015625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 168341780, + "step": 3005 + }, + { + "epoch": 6.694877505567929, + "grad_norm": 14.52697467803955, + "learning_rate": 1e-06, + "loss": 0.4355, + "num_input_tokens_seen": 168397592, + "step": 3006 + }, + { + "epoch": 6.694877505567929, + "loss": 0.4221663177013397, + "loss_ce": 0.00023030643933452666, + "loss_iou": 0.17578125, + "loss_num": 0.0140380859375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 168397592, + "step": 3006 + }, + { + "epoch": 6.697104677060134, + "grad_norm": 17.11386489868164, + "learning_rate": 1e-06, + "loss": 0.7575, + "num_input_tokens_seen": 168453048, + "step": 3007 + }, + { + "epoch": 6.697104677060134, + "loss": 0.9401570558547974, + "loss_ce": 0.0002156144182663411, + "loss_iou": 0.408203125, + "loss_num": 0.0247802734375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 168453048, + "step": 3007 + }, + { + "epoch": 6.6993318485523385, + "grad_norm": 23.60494041442871, + "learning_rate": 1e-06, + "loss": 0.6955, + "num_input_tokens_seen": 168508760, + "step": 3008 + }, + { + "epoch": 6.6993318485523385, + "loss": 0.49571555852890015, + "loss_ce": 0.00023218031856231391, + "loss_iou": 0.2314453125, + "loss_num": 0.006805419921875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 168508760, + "step": 3008 + }, + { + "epoch": 6.701559020044543, + "grad_norm": 14.749323844909668, + "learning_rate": 1e-06, + "loss": 0.6877, + "num_input_tokens_seen": 168565604, + "step": 3009 + }, + { + "epoch": 6.701559020044543, + "loss": 0.6759235858917236, + "loss_ce": 0.0003864543105009943, + "loss_iou": 0.271484375, + "loss_num": 0.0267333984375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 168565604, + "step": 3009 + }, + { + "epoch": 6.703786191536748, + "grad_norm": 24.45879554748535, + "learning_rate": 1e-06, + "loss": 0.6726, + "num_input_tokens_seen": 168623400, + "step": 3010 + }, + { + "epoch": 6.703786191536748, + "loss": 0.5331798791885376, + "loss_ce": 0.00022087327670305967, + "loss_iou": 0.23828125, + "loss_num": 0.01153564453125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 168623400, + "step": 3010 + }, + { + "epoch": 6.706013363028953, + "grad_norm": 22.486263275146484, + "learning_rate": 1e-06, + "loss": 0.5951, + "num_input_tokens_seen": 168678412, + "step": 3011 + }, + { + "epoch": 6.706013363028953, + "loss": 0.6491339206695557, + "loss_ce": 0.0002081873535644263, + "loss_iou": 0.259765625, + "loss_num": 0.0263671875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 168678412, + "step": 3011 + }, + { + "epoch": 6.708240534521158, + "grad_norm": 12.50680923461914, + "learning_rate": 1e-06, + "loss": 0.721, + "num_input_tokens_seen": 168734048, + "step": 3012 + }, + { + "epoch": 6.708240534521158, + "loss": 0.7475176453590393, + "loss_ce": 0.00020322672207839787, + "loss_iou": 0.296875, + "loss_num": 0.0306396484375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 168734048, + "step": 3012 + }, + { + "epoch": 6.710467706013363, + "grad_norm": 28.51459503173828, + "learning_rate": 1e-06, + "loss": 0.5842, + "num_input_tokens_seen": 168790568, + "step": 3013 + }, + { + "epoch": 6.710467706013363, + "loss": 0.6308811902999878, + "loss_ce": 0.000265932030742988, + "loss_iou": 0.275390625, + "loss_num": 0.0159912109375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 168790568, + "step": 3013 + }, + { + "epoch": 6.712694877505568, + "grad_norm": 18.395198822021484, + "learning_rate": 1e-06, + "loss": 0.8135, + "num_input_tokens_seen": 168844228, + "step": 3014 + }, + { + "epoch": 6.712694877505568, + "loss": 0.7819979190826416, + "loss_ce": 0.00025961300707422197, + "loss_iou": 0.333984375, + "loss_num": 0.0230712890625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 168844228, + "step": 3014 + }, + { + "epoch": 6.714922048997773, + "grad_norm": 23.87611198425293, + "learning_rate": 1e-06, + "loss": 0.7382, + "num_input_tokens_seen": 168899728, + "step": 3015 + }, + { + "epoch": 6.714922048997773, + "loss": 0.7516616582870483, + "loss_ce": 0.00019676925148814917, + "loss_iou": 0.326171875, + "loss_num": 0.0198974609375, + "loss_xval": 0.75, + "num_input_tokens_seen": 168899728, + "step": 3015 + }, + { + "epoch": 6.717149220489977, + "grad_norm": 20.738792419433594, + "learning_rate": 1e-06, + "loss": 0.786, + "num_input_tokens_seen": 168956592, + "step": 3016 + }, + { + "epoch": 6.717149220489977, + "loss": 0.8078007102012634, + "loss_ce": 0.00018351592007093132, + "loss_iou": 0.306640625, + "loss_num": 0.0390625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 168956592, + "step": 3016 + }, + { + "epoch": 6.719376391982182, + "grad_norm": 19.863662719726562, + "learning_rate": 1e-06, + "loss": 0.7038, + "num_input_tokens_seen": 169012588, + "step": 3017 + }, + { + "epoch": 6.719376391982182, + "loss": 0.5791813135147095, + "loss_ce": 0.00020178730483166873, + "loss_iou": 0.24609375, + "loss_num": 0.017333984375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 169012588, + "step": 3017 + }, + { + "epoch": 6.721603563474387, + "grad_norm": 18.61119842529297, + "learning_rate": 1e-06, + "loss": 0.6261, + "num_input_tokens_seen": 169067608, + "step": 3018 + }, + { + "epoch": 6.721603563474387, + "loss": 0.6960831880569458, + "loss_ce": 0.0002824185648933053, + "loss_iou": 0.31640625, + "loss_num": 0.01263427734375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 169067608, + "step": 3018 + }, + { + "epoch": 6.723830734966592, + "grad_norm": 19.781635284423828, + "learning_rate": 1e-06, + "loss": 0.6468, + "num_input_tokens_seen": 169122988, + "step": 3019 + }, + { + "epoch": 6.723830734966592, + "loss": 0.7083573341369629, + "loss_ce": 0.0002274075523018837, + "loss_iou": 0.26953125, + "loss_num": 0.033447265625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 169122988, + "step": 3019 + }, + { + "epoch": 6.726057906458797, + "grad_norm": 17.748201370239258, + "learning_rate": 1e-06, + "loss": 0.5458, + "num_input_tokens_seen": 169178700, + "step": 3020 + }, + { + "epoch": 6.726057906458797, + "loss": 0.5851503610610962, + "loss_ce": 0.00018942491442430764, + "loss_iou": 0.25390625, + "loss_num": 0.01513671875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 169178700, + "step": 3020 + }, + { + "epoch": 6.728285077951003, + "grad_norm": 14.647541999816895, + "learning_rate": 1e-06, + "loss": 0.4199, + "num_input_tokens_seen": 169234748, + "step": 3021 + }, + { + "epoch": 6.728285077951003, + "loss": 0.4449549615383148, + "loss_ce": 0.00019176513887941837, + "loss_iou": 0.17578125, + "loss_num": 0.0185546875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 169234748, + "step": 3021 + }, + { + "epoch": 6.7305122494432075, + "grad_norm": 22.735586166381836, + "learning_rate": 1e-06, + "loss": 0.8144, + "num_input_tokens_seen": 169291708, + "step": 3022 + }, + { + "epoch": 6.7305122494432075, + "loss": 0.8748047351837158, + "loss_ce": 0.0002930166956502944, + "loss_iou": 0.328125, + "loss_num": 0.0439453125, + "loss_xval": 0.875, + "num_input_tokens_seen": 169291708, + "step": 3022 + }, + { + "epoch": 6.732739420935412, + "grad_norm": 10.555468559265137, + "learning_rate": 1e-06, + "loss": 0.8329, + "num_input_tokens_seen": 169346188, + "step": 3023 + }, + { + "epoch": 6.732739420935412, + "loss": 0.5163635611534119, + "loss_ce": 0.00018923793686553836, + "loss_iou": 0.1982421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 169346188, + "step": 3023 + }, + { + "epoch": 6.734966592427617, + "grad_norm": 25.756729125976562, + "learning_rate": 1e-06, + "loss": 0.7476, + "num_input_tokens_seen": 169401748, + "step": 3024 + }, + { + "epoch": 6.734966592427617, + "loss": 0.6750069856643677, + "loss_ce": 0.00020230711379554123, + "loss_iou": 0.291015625, + "loss_num": 0.0186767578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 169401748, + "step": 3024 + }, + { + "epoch": 6.737193763919822, + "grad_norm": 16.90264129638672, + "learning_rate": 1e-06, + "loss": 0.5714, + "num_input_tokens_seen": 169456820, + "step": 3025 + }, + { + "epoch": 6.737193763919822, + "loss": 0.4832545220851898, + "loss_ce": 0.0004664373118430376, + "loss_iou": 0.2041015625, + "loss_num": 0.01483154296875, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 169456820, + "step": 3025 + }, + { + "epoch": 6.739420935412027, + "grad_norm": 19.030052185058594, + "learning_rate": 1e-06, + "loss": 0.8233, + "num_input_tokens_seen": 169512288, + "step": 3026 + }, + { + "epoch": 6.739420935412027, + "loss": 0.8015903234481812, + "loss_ce": 0.00032082191319204867, + "loss_iou": 0.3515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 169512288, + "step": 3026 + }, + { + "epoch": 6.741648106904232, + "grad_norm": 22.391340255737305, + "learning_rate": 1e-06, + "loss": 0.5371, + "num_input_tokens_seen": 169568768, + "step": 3027 + }, + { + "epoch": 6.741648106904232, + "loss": 0.39635348320007324, + "loss_ce": 0.00017430493608117104, + "loss_iou": 0.154296875, + "loss_num": 0.017578125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 169568768, + "step": 3027 + }, + { + "epoch": 6.743875278396437, + "grad_norm": 23.041179656982422, + "learning_rate": 1e-06, + "loss": 0.6764, + "num_input_tokens_seen": 169623344, + "step": 3028 + }, + { + "epoch": 6.743875278396437, + "loss": 0.6840122938156128, + "loss_ce": 0.00017437028873246163, + "loss_iou": 0.296875, + "loss_num": 0.0185546875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 169623344, + "step": 3028 + }, + { + "epoch": 6.7461024498886415, + "grad_norm": 22.090497970581055, + "learning_rate": 1e-06, + "loss": 0.6771, + "num_input_tokens_seen": 169677776, + "step": 3029 + }, + { + "epoch": 6.7461024498886415, + "loss": 0.7656627297401428, + "loss_ce": 0.00015980687749106437, + "loss_iou": 0.33203125, + "loss_num": 0.020263671875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 169677776, + "step": 3029 + }, + { + "epoch": 6.748329621380846, + "grad_norm": 16.185291290283203, + "learning_rate": 1e-06, + "loss": 0.6104, + "num_input_tokens_seen": 169734036, + "step": 3030 + }, + { + "epoch": 6.748329621380846, + "loss": 0.643264651298523, + "loss_ce": 0.00019819998124148697, + "loss_iou": 0.279296875, + "loss_num": 0.01708984375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 169734036, + "step": 3030 + }, + { + "epoch": 6.750556792873051, + "grad_norm": 19.104719161987305, + "learning_rate": 1e-06, + "loss": 0.5948, + "num_input_tokens_seen": 169788020, + "step": 3031 + }, + { + "epoch": 6.750556792873051, + "loss": 0.5878357887268066, + "loss_ce": 0.00018932024249807, + "loss_iou": 0.251953125, + "loss_num": 0.0164794921875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 169788020, + "step": 3031 + }, + { + "epoch": 6.752783964365256, + "grad_norm": 23.02263641357422, + "learning_rate": 1e-06, + "loss": 0.629, + "num_input_tokens_seen": 169844864, + "step": 3032 + }, + { + "epoch": 6.752783964365256, + "loss": 0.5922929048538208, + "loss_ce": 0.0002518455730751157, + "loss_iou": 0.2373046875, + "loss_num": 0.0235595703125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 169844864, + "step": 3032 + }, + { + "epoch": 6.755011135857461, + "grad_norm": 21.84469985961914, + "learning_rate": 1e-06, + "loss": 0.6563, + "num_input_tokens_seen": 169901280, + "step": 3033 + }, + { + "epoch": 6.755011135857461, + "loss": 0.6547205448150635, + "loss_ce": 0.0001795107964426279, + "loss_iou": 0.263671875, + "loss_num": 0.025146484375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 169901280, + "step": 3033 + }, + { + "epoch": 6.757238307349666, + "grad_norm": 24.893165588378906, + "learning_rate": 1e-06, + "loss": 0.8051, + "num_input_tokens_seen": 169955720, + "step": 3034 + }, + { + "epoch": 6.757238307349666, + "loss": 0.9663440585136414, + "loss_ce": 0.00027958687860518694, + "loss_iou": 0.4140625, + "loss_num": 0.0274658203125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 169955720, + "step": 3034 + }, + { + "epoch": 6.759465478841871, + "grad_norm": 29.10569953918457, + "learning_rate": 1e-06, + "loss": 0.6784, + "num_input_tokens_seen": 170011236, + "step": 3035 + }, + { + "epoch": 6.759465478841871, + "loss": 0.6698566675186157, + "loss_ce": 0.00017896070494316518, + "loss_iou": 0.287109375, + "loss_num": 0.0191650390625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 170011236, + "step": 3035 + }, + { + "epoch": 6.7616926503340755, + "grad_norm": 22.953475952148438, + "learning_rate": 1e-06, + "loss": 1.0044, + "num_input_tokens_seen": 170068108, + "step": 3036 + }, + { + "epoch": 6.7616926503340755, + "loss": 1.014899492263794, + "loss_ce": 0.0002510766498744488, + "loss_iou": 0.416015625, + "loss_num": 0.0361328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 170068108, + "step": 3036 + }, + { + "epoch": 6.76391982182628, + "grad_norm": 18.038188934326172, + "learning_rate": 1e-06, + "loss": 0.6545, + "num_input_tokens_seen": 170127512, + "step": 3037 + }, + { + "epoch": 6.76391982182628, + "loss": 0.6023421883583069, + "loss_ce": 0.00029138405807316303, + "loss_iou": 0.271484375, + "loss_num": 0.0118408203125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 170127512, + "step": 3037 + }, + { + "epoch": 6.766146993318485, + "grad_norm": 20.037540435791016, + "learning_rate": 1e-06, + "loss": 0.5158, + "num_input_tokens_seen": 170181356, + "step": 3038 + }, + { + "epoch": 6.766146993318485, + "loss": 0.5402380228042603, + "loss_ce": 0.00019893058924935758, + "loss_iou": 0.2294921875, + "loss_num": 0.0162353515625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 170181356, + "step": 3038 + }, + { + "epoch": 6.76837416481069, + "grad_norm": 16.7264347076416, + "learning_rate": 1e-06, + "loss": 0.9252, + "num_input_tokens_seen": 170237844, + "step": 3039 + }, + { + "epoch": 6.76837416481069, + "loss": 1.0988414287567139, + "loss_ce": 0.00020856756600551307, + "loss_iou": 0.46875, + "loss_num": 0.032470703125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 170237844, + "step": 3039 + }, + { + "epoch": 6.770601336302895, + "grad_norm": 14.285926818847656, + "learning_rate": 1e-06, + "loss": 0.5733, + "num_input_tokens_seen": 170291264, + "step": 3040 + }, + { + "epoch": 6.770601336302895, + "loss": 0.44891494512557983, + "loss_ce": 0.00018448734772391617, + "loss_iou": 0.1826171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 170291264, + "step": 3040 + }, + { + "epoch": 6.772828507795101, + "grad_norm": 29.201885223388672, + "learning_rate": 1e-06, + "loss": 0.5023, + "num_input_tokens_seen": 170348048, + "step": 3041 + }, + { + "epoch": 6.772828507795101, + "loss": 0.4634665846824646, + "loss_ce": 0.000209752848604694, + "loss_iou": 0.2021484375, + "loss_num": 0.01190185546875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 170348048, + "step": 3041 + }, + { + "epoch": 6.775055679287306, + "grad_norm": 41.29672622680664, + "learning_rate": 1e-06, + "loss": 0.7947, + "num_input_tokens_seen": 170402500, + "step": 3042 + }, + { + "epoch": 6.775055679287306, + "loss": 0.8640490770339966, + "loss_ce": 0.00027952369418926537, + "loss_iou": 0.37109375, + "loss_num": 0.02392578125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 170402500, + "step": 3042 + }, + { + "epoch": 6.77728285077951, + "grad_norm": 31.665985107421875, + "learning_rate": 1e-06, + "loss": 0.5856, + "num_input_tokens_seen": 170458260, + "step": 3043 + }, + { + "epoch": 6.77728285077951, + "loss": 0.45639604330062866, + "loss_ce": 0.00021927341003902256, + "loss_iou": 0.18359375, + "loss_num": 0.0177001953125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 170458260, + "step": 3043 + }, + { + "epoch": 6.779510022271715, + "grad_norm": 43.43659591674805, + "learning_rate": 1e-06, + "loss": 0.7794, + "num_input_tokens_seen": 170514684, + "step": 3044 + }, + { + "epoch": 6.779510022271715, + "loss": 0.8676573038101196, + "loss_ce": 0.0009580720216035843, + "loss_iou": 0.365234375, + "loss_num": 0.02734375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 170514684, + "step": 3044 + }, + { + "epoch": 6.78173719376392, + "grad_norm": 24.82050323486328, + "learning_rate": 1e-06, + "loss": 0.531, + "num_input_tokens_seen": 170569476, + "step": 3045 + }, + { + "epoch": 6.78173719376392, + "loss": 0.5165289640426636, + "loss_ce": 0.00017151227802969515, + "loss_iou": 0.2255859375, + "loss_num": 0.01318359375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 170569476, + "step": 3045 + }, + { + "epoch": 6.783964365256125, + "grad_norm": 19.520343780517578, + "learning_rate": 1e-06, + "loss": 0.7418, + "num_input_tokens_seen": 170622728, + "step": 3046 + }, + { + "epoch": 6.783964365256125, + "loss": 0.9240363836288452, + "loss_ce": 0.00026935621281154454, + "loss_iou": 0.380859375, + "loss_num": 0.032470703125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 170622728, + "step": 3046 + }, + { + "epoch": 6.78619153674833, + "grad_norm": 18.211183547973633, + "learning_rate": 1e-06, + "loss": 0.6673, + "num_input_tokens_seen": 170679272, + "step": 3047 + }, + { + "epoch": 6.78619153674833, + "loss": 0.7375867962837219, + "loss_ce": 0.000282141612842679, + "loss_iou": 0.328125, + "loss_num": 0.0162353515625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 170679272, + "step": 3047 + }, + { + "epoch": 6.788418708240535, + "grad_norm": 21.0445499420166, + "learning_rate": 1e-06, + "loss": 0.5899, + "num_input_tokens_seen": 170733732, + "step": 3048 + }, + { + "epoch": 6.788418708240535, + "loss": 0.5784125924110413, + "loss_ce": 0.00016554353351239115, + "loss_iou": 0.248046875, + "loss_num": 0.016357421875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 170733732, + "step": 3048 + }, + { + "epoch": 6.79064587973274, + "grad_norm": 25.12030601501465, + "learning_rate": 1e-06, + "loss": 0.7525, + "num_input_tokens_seen": 170790864, + "step": 3049 + }, + { + "epoch": 6.79064587973274, + "loss": 0.8492100238800049, + "loss_ce": 0.0011875506024807692, + "loss_iou": 0.349609375, + "loss_num": 0.0299072265625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 170790864, + "step": 3049 + }, + { + "epoch": 6.7928730512249444, + "grad_norm": 14.534224510192871, + "learning_rate": 1e-06, + "loss": 0.5735, + "num_input_tokens_seen": 170846928, + "step": 3050 + }, + { + "epoch": 6.7928730512249444, + "loss": 0.6872592568397522, + "loss_ce": 0.00024754402693361044, + "loss_iou": 0.30078125, + "loss_num": 0.01708984375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 170846928, + "step": 3050 + }, + { + "epoch": 6.795100222717149, + "grad_norm": 32.43819808959961, + "learning_rate": 1e-06, + "loss": 0.7584, + "num_input_tokens_seen": 170901412, + "step": 3051 + }, + { + "epoch": 6.795100222717149, + "loss": 0.47796866297721863, + "loss_ce": 0.00018544365593697876, + "loss_iou": 0.2138671875, + "loss_num": 0.00994873046875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 170901412, + "step": 3051 + }, + { + "epoch": 6.797327394209354, + "grad_norm": 26.212574005126953, + "learning_rate": 1e-06, + "loss": 0.7598, + "num_input_tokens_seen": 170955608, + "step": 3052 + }, + { + "epoch": 6.797327394209354, + "loss": 0.8884332776069641, + "loss_ce": 0.0002496936358511448, + "loss_iou": 0.39453125, + "loss_num": 0.019775390625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 170955608, + "step": 3052 + }, + { + "epoch": 6.799554565701559, + "grad_norm": 18.317108154296875, + "learning_rate": 1e-06, + "loss": 0.5841, + "num_input_tokens_seen": 171010808, + "step": 3053 + }, + { + "epoch": 6.799554565701559, + "loss": 0.6397278308868408, + "loss_ce": 0.00020147013128735125, + "loss_iou": 0.29296875, + "loss_num": 0.0111083984375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 171010808, + "step": 3053 + }, + { + "epoch": 6.801781737193764, + "grad_norm": 17.737529754638672, + "learning_rate": 1e-06, + "loss": 0.8411, + "num_input_tokens_seen": 171066148, + "step": 3054 + }, + { + "epoch": 6.801781737193764, + "loss": 0.8929228186607361, + "loss_ce": 0.0003446534974500537, + "loss_iou": 0.375, + "loss_num": 0.0284423828125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 171066148, + "step": 3054 + }, + { + "epoch": 6.804008908685969, + "grad_norm": 18.264446258544922, + "learning_rate": 1e-06, + "loss": 0.8505, + "num_input_tokens_seen": 171121628, + "step": 3055 + }, + { + "epoch": 6.804008908685969, + "loss": 0.6899810433387756, + "loss_ce": 0.00028378370916470885, + "loss_iou": 0.296875, + "loss_num": 0.019287109375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 171121628, + "step": 3055 + }, + { + "epoch": 6.806236080178174, + "grad_norm": 16.88157844543457, + "learning_rate": 1e-06, + "loss": 0.5521, + "num_input_tokens_seen": 171177668, + "step": 3056 + }, + { + "epoch": 6.806236080178174, + "loss": 0.5917320251464844, + "loss_ce": 0.00017928854504134506, + "loss_iou": 0.263671875, + "loss_num": 0.0130615234375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 171177668, + "step": 3056 + }, + { + "epoch": 6.8084632516703785, + "grad_norm": 19.890636444091797, + "learning_rate": 1e-06, + "loss": 0.7256, + "num_input_tokens_seen": 171234924, + "step": 3057 + }, + { + "epoch": 6.8084632516703785, + "loss": 0.9203733205795288, + "loss_ce": 0.0002072805364150554, + "loss_iou": 0.35546875, + "loss_num": 0.0419921875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 171234924, + "step": 3057 + }, + { + "epoch": 6.810690423162583, + "grad_norm": 24.549684524536133, + "learning_rate": 1e-06, + "loss": 0.6066, + "num_input_tokens_seen": 171289624, + "step": 3058 + }, + { + "epoch": 6.810690423162583, + "loss": 0.6476849317550659, + "loss_ce": 0.00022400161833502352, + "loss_iou": 0.28125, + "loss_num": 0.0167236328125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 171289624, + "step": 3058 + }, + { + "epoch": 6.812917594654788, + "grad_norm": 29.96609878540039, + "learning_rate": 1e-06, + "loss": 0.544, + "num_input_tokens_seen": 171345904, + "step": 3059 + }, + { + "epoch": 6.812917594654788, + "loss": 0.37130942940711975, + "loss_ce": 0.00021568889496847987, + "loss_iou": 0.150390625, + "loss_num": 0.0137939453125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 171345904, + "step": 3059 + }, + { + "epoch": 6.815144766146993, + "grad_norm": 17.452735900878906, + "learning_rate": 1e-06, + "loss": 0.6359, + "num_input_tokens_seen": 171403776, + "step": 3060 + }, + { + "epoch": 6.815144766146993, + "loss": 0.5905120372772217, + "loss_ce": 0.00018000410636886954, + "loss_iou": 0.24609375, + "loss_num": 0.019775390625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 171403776, + "step": 3060 + }, + { + "epoch": 6.817371937639198, + "grad_norm": 24.2515926361084, + "learning_rate": 1e-06, + "loss": 0.6321, + "num_input_tokens_seen": 171459812, + "step": 3061 + }, + { + "epoch": 6.817371937639198, + "loss": 0.6198359131813049, + "loss_ce": 0.00020700221648439765, + "loss_iou": 0.27734375, + "loss_num": 0.01318359375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 171459812, + "step": 3061 + }, + { + "epoch": 6.819599109131403, + "grad_norm": 26.85552215576172, + "learning_rate": 1e-06, + "loss": 0.583, + "num_input_tokens_seen": 171517492, + "step": 3062 + }, + { + "epoch": 6.819599109131403, + "loss": 0.5180150270462036, + "loss_ce": 0.0001927339908434078, + "loss_iou": 0.1943359375, + "loss_num": 0.02587890625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 171517492, + "step": 3062 + }, + { + "epoch": 6.821826280623608, + "grad_norm": 17.88697624206543, + "learning_rate": 1e-06, + "loss": 0.587, + "num_input_tokens_seen": 171572508, + "step": 3063 + }, + { + "epoch": 6.821826280623608, + "loss": 0.6976819634437561, + "loss_ce": 0.00017221369489561766, + "loss_iou": 0.294921875, + "loss_num": 0.021728515625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 171572508, + "step": 3063 + }, + { + "epoch": 6.8240534521158125, + "grad_norm": 18.11661148071289, + "learning_rate": 1e-06, + "loss": 0.5901, + "num_input_tokens_seen": 171630664, + "step": 3064 + }, + { + "epoch": 6.8240534521158125, + "loss": 0.6340024471282959, + "loss_ce": 0.00021340540843084455, + "loss_iou": 0.2734375, + "loss_num": 0.017578125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 171630664, + "step": 3064 + }, + { + "epoch": 6.826280623608017, + "grad_norm": 15.912714004516602, + "learning_rate": 1e-06, + "loss": 0.7199, + "num_input_tokens_seen": 171688464, + "step": 3065 + }, + { + "epoch": 6.826280623608017, + "loss": 0.7723828554153442, + "loss_ce": 0.0002881219261325896, + "loss_iou": 0.34765625, + "loss_num": 0.015625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 171688464, + "step": 3065 + }, + { + "epoch": 6.828507795100223, + "grad_norm": 104.20018768310547, + "learning_rate": 1e-06, + "loss": 0.829, + "num_input_tokens_seen": 171743324, + "step": 3066 + }, + { + "epoch": 6.828507795100223, + "loss": 1.210741400718689, + "loss_ce": 0.0007805360364727676, + "loss_iou": 0.51953125, + "loss_num": 0.033935546875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 171743324, + "step": 3066 + }, + { + "epoch": 6.830734966592428, + "grad_norm": 16.49806785583496, + "learning_rate": 1e-06, + "loss": 0.6798, + "num_input_tokens_seen": 171799732, + "step": 3067 + }, + { + "epoch": 6.830734966592428, + "loss": 0.5802951455116272, + "loss_ce": 0.00021701655350625515, + "loss_iou": 0.2294921875, + "loss_num": 0.024169921875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 171799732, + "step": 3067 + }, + { + "epoch": 6.832962138084633, + "grad_norm": 20.636322021484375, + "learning_rate": 1e-06, + "loss": 0.6688, + "num_input_tokens_seen": 171855072, + "step": 3068 + }, + { + "epoch": 6.832962138084633, + "loss": 0.7048916816711426, + "loss_ce": 0.00017975937225855887, + "loss_iou": 0.310546875, + "loss_num": 0.016357421875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 171855072, + "step": 3068 + }, + { + "epoch": 6.835189309576838, + "grad_norm": 14.782151222229004, + "learning_rate": 1e-06, + "loss": 0.7034, + "num_input_tokens_seen": 171912412, + "step": 3069 + }, + { + "epoch": 6.835189309576838, + "loss": 0.6367747187614441, + "loss_ce": 0.0015207845717668533, + "loss_iou": 0.2734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 171912412, + "step": 3069 + }, + { + "epoch": 6.8374164810690425, + "grad_norm": 22.69580078125, + "learning_rate": 1e-06, + "loss": 0.6691, + "num_input_tokens_seen": 171967604, + "step": 3070 + }, + { + "epoch": 6.8374164810690425, + "loss": 0.6948800086975098, + "loss_ce": 0.00017786939861252904, + "loss_iou": 0.318359375, + "loss_num": 0.01171875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 171967604, + "step": 3070 + }, + { + "epoch": 6.839643652561247, + "grad_norm": 16.151691436767578, + "learning_rate": 1e-06, + "loss": 0.7264, + "num_input_tokens_seen": 172024480, + "step": 3071 + }, + { + "epoch": 6.839643652561247, + "loss": 0.7126206159591675, + "loss_ce": 0.0002182956231990829, + "loss_iou": 0.31640625, + "loss_num": 0.0157470703125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 172024480, + "step": 3071 + }, + { + "epoch": 6.841870824053452, + "grad_norm": 23.6954402923584, + "learning_rate": 1e-06, + "loss": 0.5101, + "num_input_tokens_seen": 172081368, + "step": 3072 + }, + { + "epoch": 6.841870824053452, + "loss": 0.41563284397125244, + "loss_ce": 0.00034961808705702424, + "loss_iou": 0.171875, + "loss_num": 0.014404296875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 172081368, + "step": 3072 + }, + { + "epoch": 6.844097995545657, + "grad_norm": 14.069080352783203, + "learning_rate": 1e-06, + "loss": 0.4567, + "num_input_tokens_seen": 172134880, + "step": 3073 + }, + { + "epoch": 6.844097995545657, + "loss": 0.4463517367839813, + "loss_ce": 0.00018475353135727346, + "loss_iou": 0.18359375, + "loss_num": 0.0159912109375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 172134880, + "step": 3073 + }, + { + "epoch": 6.846325167037862, + "grad_norm": 16.129291534423828, + "learning_rate": 1e-06, + "loss": 0.7115, + "num_input_tokens_seen": 172191948, + "step": 3074 + }, + { + "epoch": 6.846325167037862, + "loss": 1.0690743923187256, + "loss_ce": 0.00022675658692605793, + "loss_iou": 0.486328125, + "loss_num": 0.0194091796875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 172191948, + "step": 3074 + }, + { + "epoch": 6.848552338530067, + "grad_norm": 15.745563507080078, + "learning_rate": 1e-06, + "loss": 0.4251, + "num_input_tokens_seen": 172250304, + "step": 3075 + }, + { + "epoch": 6.848552338530067, + "loss": 0.40329307317733765, + "loss_ce": 0.0002169150102417916, + "loss_iou": 0.1806640625, + "loss_num": 0.00848388671875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 172250304, + "step": 3075 + }, + { + "epoch": 6.850779510022272, + "grad_norm": 23.94293975830078, + "learning_rate": 1e-06, + "loss": 0.7518, + "num_input_tokens_seen": 172304700, + "step": 3076 + }, + { + "epoch": 6.850779510022272, + "loss": 0.6821050643920898, + "loss_ce": 0.00022032001288607717, + "loss_iou": 0.2890625, + "loss_num": 0.0205078125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 172304700, + "step": 3076 + }, + { + "epoch": 6.853006681514477, + "grad_norm": 22.777400970458984, + "learning_rate": 1e-06, + "loss": 0.6088, + "num_input_tokens_seen": 172360340, + "step": 3077 + }, + { + "epoch": 6.853006681514477, + "loss": 0.36834925413131714, + "loss_ce": 0.00030725146643817425, + "loss_iou": 0.1416015625, + "loss_num": 0.016845703125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 172360340, + "step": 3077 + }, + { + "epoch": 6.855233853006681, + "grad_norm": 23.746376037597656, + "learning_rate": 1e-06, + "loss": 0.592, + "num_input_tokens_seen": 172418912, + "step": 3078 + }, + { + "epoch": 6.855233853006681, + "loss": 0.6163959503173828, + "loss_ce": 0.0001850596017902717, + "loss_iou": 0.265625, + "loss_num": 0.0166015625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 172418912, + "step": 3078 + }, + { + "epoch": 6.857461024498886, + "grad_norm": 35.40282440185547, + "learning_rate": 1e-06, + "loss": 0.8302, + "num_input_tokens_seen": 172477360, + "step": 3079 + }, + { + "epoch": 6.857461024498886, + "loss": 0.8708090782165527, + "loss_ce": 0.00020358533947728574, + "loss_iou": 0.3828125, + "loss_num": 0.020751953125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 172477360, + "step": 3079 + }, + { + "epoch": 6.859688195991091, + "grad_norm": 15.791609764099121, + "learning_rate": 1e-06, + "loss": 0.5824, + "num_input_tokens_seen": 172533360, + "step": 3080 + }, + { + "epoch": 6.859688195991091, + "loss": 0.8971776962280273, + "loss_ce": 0.00020497874356806278, + "loss_iou": 0.388671875, + "loss_num": 0.0244140625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 172533360, + "step": 3080 + }, + { + "epoch": 6.861915367483296, + "grad_norm": 16.064542770385742, + "learning_rate": 1e-06, + "loss": 0.6309, + "num_input_tokens_seen": 172586820, + "step": 3081 + }, + { + "epoch": 6.861915367483296, + "loss": 0.5737239122390747, + "loss_ce": 0.0002375697804382071, + "loss_iou": 0.267578125, + "loss_num": 0.007598876953125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 172586820, + "step": 3081 + }, + { + "epoch": 6.864142538975501, + "grad_norm": 23.688007354736328, + "learning_rate": 1e-06, + "loss": 0.7019, + "num_input_tokens_seen": 172644792, + "step": 3082 + }, + { + "epoch": 6.864142538975501, + "loss": 0.7206803560256958, + "loss_ce": 0.00022136216284707189, + "loss_iou": 0.322265625, + "loss_num": 0.01519775390625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 172644792, + "step": 3082 + }, + { + "epoch": 6.866369710467706, + "grad_norm": 16.832897186279297, + "learning_rate": 1e-06, + "loss": 0.4801, + "num_input_tokens_seen": 172703448, + "step": 3083 + }, + { + "epoch": 6.866369710467706, + "loss": 0.4925019443035126, + "loss_ce": 0.00019236501248087734, + "loss_iou": 0.2158203125, + "loss_num": 0.01202392578125, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 172703448, + "step": 3083 + }, + { + "epoch": 6.868596881959911, + "grad_norm": 19.060598373413086, + "learning_rate": 1e-06, + "loss": 0.6689, + "num_input_tokens_seen": 172759888, + "step": 3084 + }, + { + "epoch": 6.868596881959911, + "loss": 0.6370642185211182, + "loss_ce": 0.0003454496618360281, + "loss_iou": 0.275390625, + "loss_num": 0.01708984375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 172759888, + "step": 3084 + }, + { + "epoch": 6.870824053452115, + "grad_norm": 16.259754180908203, + "learning_rate": 1e-06, + "loss": 0.4995, + "num_input_tokens_seen": 172815816, + "step": 3085 + }, + { + "epoch": 6.870824053452115, + "loss": 0.529482364654541, + "loss_ce": 0.00018545490456745028, + "loss_iou": 0.228515625, + "loss_num": 0.01434326171875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 172815816, + "step": 3085 + }, + { + "epoch": 6.873051224944321, + "grad_norm": 21.378643035888672, + "learning_rate": 1e-06, + "loss": 0.6019, + "num_input_tokens_seen": 172871560, + "step": 3086 + }, + { + "epoch": 6.873051224944321, + "loss": 0.6235019564628601, + "loss_ce": 0.00021095495321787894, + "loss_iou": 0.255859375, + "loss_num": 0.02294921875, + "loss_xval": 0.625, + "num_input_tokens_seen": 172871560, + "step": 3086 + }, + { + "epoch": 6.875278396436526, + "grad_norm": 36.62589645385742, + "learning_rate": 1e-06, + "loss": 0.7479, + "num_input_tokens_seen": 172926412, + "step": 3087 + }, + { + "epoch": 6.875278396436526, + "loss": 0.9482687711715698, + "loss_ce": 0.00027069164207205176, + "loss_iou": 0.412109375, + "loss_num": 0.024658203125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 172926412, + "step": 3087 + }, + { + "epoch": 6.877505567928731, + "grad_norm": 21.01188087463379, + "learning_rate": 1e-06, + "loss": 0.548, + "num_input_tokens_seen": 172984452, + "step": 3088 + }, + { + "epoch": 6.877505567928731, + "loss": 0.5002177357673645, + "loss_ce": 0.00021776201901957393, + "loss_iou": 0.2216796875, + "loss_num": 0.01129150390625, + "loss_xval": 0.5, + "num_input_tokens_seen": 172984452, + "step": 3088 + }, + { + "epoch": 6.879732739420936, + "grad_norm": 16.86750030517578, + "learning_rate": 1e-06, + "loss": 0.6602, + "num_input_tokens_seen": 173040592, + "step": 3089 + }, + { + "epoch": 6.879732739420936, + "loss": 0.757439911365509, + "loss_ce": 0.0003598659241106361, + "loss_iou": 0.314453125, + "loss_num": 0.02587890625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 173040592, + "step": 3089 + }, + { + "epoch": 6.881959910913141, + "grad_norm": 21.569618225097656, + "learning_rate": 1e-06, + "loss": 0.7411, + "num_input_tokens_seen": 173097352, + "step": 3090 + }, + { + "epoch": 6.881959910913141, + "loss": 0.8854343891143799, + "loss_ce": 0.0001805470819817856, + "loss_iou": 0.376953125, + "loss_num": 0.0263671875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 173097352, + "step": 3090 + }, + { + "epoch": 6.8841870824053455, + "grad_norm": 49.518394470214844, + "learning_rate": 1e-06, + "loss": 0.5467, + "num_input_tokens_seen": 173156100, + "step": 3091 + }, + { + "epoch": 6.8841870824053455, + "loss": 0.465660035610199, + "loss_ce": 0.00020591478096321225, + "loss_iou": 0.2041015625, + "loss_num": 0.01153564453125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 173156100, + "step": 3091 + }, + { + "epoch": 6.88641425389755, + "grad_norm": 75.19503784179688, + "learning_rate": 1e-06, + "loss": 0.7018, + "num_input_tokens_seen": 173212776, + "step": 3092 + }, + { + "epoch": 6.88641425389755, + "loss": 0.8085219860076904, + "loss_ce": 0.00029449607245624065, + "loss_iou": 0.34375, + "loss_num": 0.024169921875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 173212776, + "step": 3092 + }, + { + "epoch": 6.888641425389755, + "grad_norm": 18.865760803222656, + "learning_rate": 1e-06, + "loss": 0.7596, + "num_input_tokens_seen": 173267624, + "step": 3093 + }, + { + "epoch": 6.888641425389755, + "loss": 0.8561751246452332, + "loss_ce": 0.00021812312479596585, + "loss_iou": 0.3671875, + "loss_num": 0.024658203125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 173267624, + "step": 3093 + }, + { + "epoch": 6.89086859688196, + "grad_norm": 26.241153717041016, + "learning_rate": 1e-06, + "loss": 0.6993, + "num_input_tokens_seen": 173324276, + "step": 3094 + }, + { + "epoch": 6.89086859688196, + "loss": 0.7165099382400513, + "loss_ce": 0.00020136788953095675, + "loss_iou": 0.294921875, + "loss_num": 0.025634765625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 173324276, + "step": 3094 + }, + { + "epoch": 6.893095768374165, + "grad_norm": 17.569229125976562, + "learning_rate": 1e-06, + "loss": 0.5308, + "num_input_tokens_seen": 173382708, + "step": 3095 + }, + { + "epoch": 6.893095768374165, + "loss": 0.4684421718120575, + "loss_ce": 0.0001804671046556905, + "loss_iou": 0.1953125, + "loss_num": 0.015625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 173382708, + "step": 3095 + }, + { + "epoch": 6.89532293986637, + "grad_norm": 13.874095916748047, + "learning_rate": 1e-06, + "loss": 0.7751, + "num_input_tokens_seen": 173438884, + "step": 3096 + }, + { + "epoch": 6.89532293986637, + "loss": 0.9164137840270996, + "loss_ce": 0.00021500332513824105, + "loss_iou": 0.345703125, + "loss_num": 0.045166015625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 173438884, + "step": 3096 + }, + { + "epoch": 6.897550111358575, + "grad_norm": 19.509395599365234, + "learning_rate": 1e-06, + "loss": 0.523, + "num_input_tokens_seen": 173493924, + "step": 3097 + }, + { + "epoch": 6.897550111358575, + "loss": 0.6444209814071655, + "loss_ce": 0.000255966791883111, + "loss_iou": 0.28125, + "loss_num": 0.0167236328125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 173493924, + "step": 3097 + }, + { + "epoch": 6.8997772828507795, + "grad_norm": 19.37684440612793, + "learning_rate": 1e-06, + "loss": 0.6321, + "num_input_tokens_seen": 173549456, + "step": 3098 + }, + { + "epoch": 6.8997772828507795, + "loss": 0.6488611698150635, + "loss_ce": 0.00017949687025975436, + "loss_iou": 0.271484375, + "loss_num": 0.021484375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 173549456, + "step": 3098 + }, + { + "epoch": 6.902004454342984, + "grad_norm": 17.55590057373047, + "learning_rate": 1e-06, + "loss": 0.5922, + "num_input_tokens_seen": 173608184, + "step": 3099 + }, + { + "epoch": 6.902004454342984, + "loss": 0.43987372517585754, + "loss_ce": 0.00017645125626586378, + "loss_iou": 0.19140625, + "loss_num": 0.01141357421875, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 173608184, + "step": 3099 + }, + { + "epoch": 6.904231625835189, + "grad_norm": 17.88399887084961, + "learning_rate": 1e-06, + "loss": 0.5861, + "num_input_tokens_seen": 173663616, + "step": 3100 + }, + { + "epoch": 6.904231625835189, + "loss": 0.5784924030303955, + "loss_ce": 0.00018430282943882048, + "loss_iou": 0.2470703125, + "loss_num": 0.016845703125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 173663616, + "step": 3100 + }, + { + "epoch": 6.906458797327394, + "grad_norm": 16.59260368347168, + "learning_rate": 1e-06, + "loss": 0.5021, + "num_input_tokens_seen": 173716408, + "step": 3101 + }, + { + "epoch": 6.906458797327394, + "loss": 0.5853925943374634, + "loss_ce": 0.0001875399611890316, + "loss_iou": 0.2392578125, + "loss_num": 0.0213623046875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 173716408, + "step": 3101 + }, + { + "epoch": 6.908685968819599, + "grad_norm": 17.521724700927734, + "learning_rate": 1e-06, + "loss": 0.6656, + "num_input_tokens_seen": 173772800, + "step": 3102 + }, + { + "epoch": 6.908685968819599, + "loss": 0.6075997948646545, + "loss_ce": 0.0001779411395546049, + "loss_iou": 0.2392578125, + "loss_num": 0.025634765625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 173772800, + "step": 3102 + }, + { + "epoch": 6.910913140311804, + "grad_norm": 28.117921829223633, + "learning_rate": 1e-06, + "loss": 0.6282, + "num_input_tokens_seen": 173829652, + "step": 3103 + }, + { + "epoch": 6.910913140311804, + "loss": 0.5315419435501099, + "loss_ce": 0.00016988006245810539, + "loss_iou": 0.2294921875, + "loss_num": 0.01446533203125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 173829652, + "step": 3103 + }, + { + "epoch": 6.913140311804009, + "grad_norm": 22.19889259338379, + "learning_rate": 1e-06, + "loss": 0.7273, + "num_input_tokens_seen": 173884456, + "step": 3104 + }, + { + "epoch": 6.913140311804009, + "loss": 0.7211758494377136, + "loss_ce": 0.0002286143571836874, + "loss_iou": 0.30859375, + "loss_num": 0.0208740234375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 173884456, + "step": 3104 + }, + { + "epoch": 6.9153674832962135, + "grad_norm": 11.665279388427734, + "learning_rate": 1e-06, + "loss": 0.62, + "num_input_tokens_seen": 173939820, + "step": 3105 + }, + { + "epoch": 6.9153674832962135, + "loss": 0.4306148886680603, + "loss_ce": 0.0001950013975147158, + "loss_iou": 0.1943359375, + "loss_num": 0.00836181640625, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 173939820, + "step": 3105 + }, + { + "epoch": 6.917594654788418, + "grad_norm": 20.158309936523438, + "learning_rate": 1e-06, + "loss": 0.5542, + "num_input_tokens_seen": 173995772, + "step": 3106 + }, + { + "epoch": 6.917594654788418, + "loss": 0.44402259588241577, + "loss_ce": 0.00017491859034635127, + "loss_iou": 0.1796875, + "loss_num": 0.0169677734375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 173995772, + "step": 3106 + }, + { + "epoch": 6.919821826280623, + "grad_norm": 18.162250518798828, + "learning_rate": 1e-06, + "loss": 0.482, + "num_input_tokens_seen": 174053676, + "step": 3107 + }, + { + "epoch": 6.919821826280623, + "loss": 0.42890018224716187, + "loss_ce": 0.0001892539585242048, + "loss_iou": 0.17578125, + "loss_num": 0.015380859375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 174053676, + "step": 3107 + }, + { + "epoch": 6.922048997772828, + "grad_norm": 48.48846435546875, + "learning_rate": 1e-06, + "loss": 0.7151, + "num_input_tokens_seen": 174112208, + "step": 3108 + }, + { + "epoch": 6.922048997772828, + "loss": 0.827580451965332, + "loss_ce": 0.00018786173313856125, + "loss_iou": 0.373046875, + "loss_num": 0.0164794921875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 174112208, + "step": 3108 + }, + { + "epoch": 6.924276169265033, + "grad_norm": 25.14827537536621, + "learning_rate": 1e-06, + "loss": 0.7384, + "num_input_tokens_seen": 174168272, + "step": 3109 + }, + { + "epoch": 6.924276169265033, + "loss": 0.7882997393608093, + "loss_ce": 0.00021379378449637443, + "loss_iou": 0.35546875, + "loss_num": 0.0157470703125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 174168272, + "step": 3109 + }, + { + "epoch": 6.926503340757238, + "grad_norm": 18.243032455444336, + "learning_rate": 1e-06, + "loss": 0.7986, + "num_input_tokens_seen": 174226660, + "step": 3110 + }, + { + "epoch": 6.926503340757238, + "loss": 0.8874292969703674, + "loss_ce": 0.00022228219313547015, + "loss_iou": 0.359375, + "loss_num": 0.033447265625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 174226660, + "step": 3110 + }, + { + "epoch": 6.928730512249444, + "grad_norm": 14.311541557312012, + "learning_rate": 1e-06, + "loss": 0.6963, + "num_input_tokens_seen": 174279336, + "step": 3111 + }, + { + "epoch": 6.928730512249444, + "loss": 0.7939878702163696, + "loss_ce": 0.00028668707818724215, + "loss_iou": 0.318359375, + "loss_num": 0.031494140625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 174279336, + "step": 3111 + }, + { + "epoch": 6.9309576837416484, + "grad_norm": 20.134523391723633, + "learning_rate": 1e-06, + "loss": 0.7007, + "num_input_tokens_seen": 174335124, + "step": 3112 + }, + { + "epoch": 6.9309576837416484, + "loss": 0.7140600681304932, + "loss_ce": 0.00019287687609903514, + "loss_iou": 0.302734375, + "loss_num": 0.021728515625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 174335124, + "step": 3112 + }, + { + "epoch": 6.933184855233853, + "grad_norm": 18.099767684936523, + "learning_rate": 1e-06, + "loss": 0.8233, + "num_input_tokens_seen": 174391556, + "step": 3113 + }, + { + "epoch": 6.933184855233853, + "loss": 0.7465390563011169, + "loss_ce": 0.00020114146173000336, + "loss_iou": 0.32421875, + "loss_num": 0.02001953125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 174391556, + "step": 3113 + }, + { + "epoch": 6.935412026726058, + "grad_norm": 22.79920768737793, + "learning_rate": 1e-06, + "loss": 0.6987, + "num_input_tokens_seen": 174446136, + "step": 3114 + }, + { + "epoch": 6.935412026726058, + "loss": 0.6254494190216064, + "loss_ce": 0.00020533311180770397, + "loss_iou": 0.2490234375, + "loss_num": 0.0252685546875, + "loss_xval": 0.625, + "num_input_tokens_seen": 174446136, + "step": 3114 + }, + { + "epoch": 6.937639198218263, + "grad_norm": 25.27695655822754, + "learning_rate": 1e-06, + "loss": 0.6646, + "num_input_tokens_seen": 174500684, + "step": 3115 + }, + { + "epoch": 6.937639198218263, + "loss": 0.7533596754074097, + "loss_ce": 0.000185904442332685, + "loss_iou": 0.328125, + "loss_num": 0.0191650390625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 174500684, + "step": 3115 + }, + { + "epoch": 6.939866369710468, + "grad_norm": 15.809171676635742, + "learning_rate": 1e-06, + "loss": 0.4947, + "num_input_tokens_seen": 174554360, + "step": 3116 + }, + { + "epoch": 6.939866369710468, + "loss": 0.6411548852920532, + "loss_ce": 0.0002857603249140084, + "loss_iou": 0.2734375, + "loss_num": 0.0184326171875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 174554360, + "step": 3116 + }, + { + "epoch": 6.942093541202673, + "grad_norm": 21.57350730895996, + "learning_rate": 1e-06, + "loss": 0.5634, + "num_input_tokens_seen": 174608692, + "step": 3117 + }, + { + "epoch": 6.942093541202673, + "loss": 0.550343930721283, + "loss_ce": 0.00017302096239291131, + "loss_iou": 0.2421875, + "loss_num": 0.0130615234375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 174608692, + "step": 3117 + }, + { + "epoch": 6.944320712694878, + "grad_norm": 16.204471588134766, + "learning_rate": 1e-06, + "loss": 0.7834, + "num_input_tokens_seen": 174665252, + "step": 3118 + }, + { + "epoch": 6.944320712694878, + "loss": 0.9167299270629883, + "loss_ce": 0.0008363361121155322, + "loss_iou": 0.41015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 174665252, + "step": 3118 + }, + { + "epoch": 6.9465478841870825, + "grad_norm": 17.026905059814453, + "learning_rate": 1e-06, + "loss": 0.5704, + "num_input_tokens_seen": 174719864, + "step": 3119 + }, + { + "epoch": 6.9465478841870825, + "loss": 0.657658576965332, + "loss_ce": 0.0001878339098766446, + "loss_iou": 0.28125, + "loss_num": 0.0191650390625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 174719864, + "step": 3119 + }, + { + "epoch": 6.948775055679287, + "grad_norm": 22.539409637451172, + "learning_rate": 1e-06, + "loss": 0.75, + "num_input_tokens_seen": 174773804, + "step": 3120 + }, + { + "epoch": 6.948775055679287, + "loss": 0.5996890068054199, + "loss_ce": 0.00020167973707430065, + "loss_iou": 0.26171875, + "loss_num": 0.014892578125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 174773804, + "step": 3120 + }, + { + "epoch": 6.951002227171492, + "grad_norm": 22.28173828125, + "learning_rate": 1e-06, + "loss": 0.7117, + "num_input_tokens_seen": 174828396, + "step": 3121 + }, + { + "epoch": 6.951002227171492, + "loss": 0.5873724818229675, + "loss_ce": 0.00021425656450446695, + "loss_iou": 0.25, + "loss_num": 0.017333984375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 174828396, + "step": 3121 + }, + { + "epoch": 6.953229398663697, + "grad_norm": 54.56577682495117, + "learning_rate": 1e-06, + "loss": 0.4691, + "num_input_tokens_seen": 174884540, + "step": 3122 + }, + { + "epoch": 6.953229398663697, + "loss": 0.5313279628753662, + "loss_ce": 0.0003220998914912343, + "loss_iou": 0.2333984375, + "loss_num": 0.0128173828125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 174884540, + "step": 3122 + }, + { + "epoch": 6.955456570155902, + "grad_norm": 33.31269454956055, + "learning_rate": 1e-06, + "loss": 0.8895, + "num_input_tokens_seen": 174939284, + "step": 3123 + }, + { + "epoch": 6.955456570155902, + "loss": 0.7173866033554077, + "loss_ce": 0.0003456372069194913, + "loss_iou": 0.2734375, + "loss_num": 0.03369140625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 174939284, + "step": 3123 + }, + { + "epoch": 6.957683741648107, + "grad_norm": 25.145395278930664, + "learning_rate": 1e-06, + "loss": 0.5721, + "num_input_tokens_seen": 174994868, + "step": 3124 + }, + { + "epoch": 6.957683741648107, + "loss": 0.6920799612998962, + "loss_ce": 0.00018543089390732348, + "loss_iou": 0.27734375, + "loss_num": 0.0277099609375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 174994868, + "step": 3124 + }, + { + "epoch": 6.959910913140312, + "grad_norm": 31.5352840423584, + "learning_rate": 1e-06, + "loss": 0.6128, + "num_input_tokens_seen": 175050648, + "step": 3125 + }, + { + "epoch": 6.959910913140312, + "loss": 0.5849337577819824, + "loss_ce": 0.00021696032490581274, + "loss_iou": 0.26171875, + "loss_num": 0.0123291015625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 175050648, + "step": 3125 + }, + { + "epoch": 6.9621380846325165, + "grad_norm": 28.711498260498047, + "learning_rate": 1e-06, + "loss": 0.7421, + "num_input_tokens_seen": 175109124, + "step": 3126 + }, + { + "epoch": 6.9621380846325165, + "loss": 0.5786285400390625, + "loss_ce": 0.0002593994140625, + "loss_iou": 0.25390625, + "loss_num": 0.0142822265625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 175109124, + "step": 3126 + }, + { + "epoch": 6.964365256124721, + "grad_norm": 81.01925659179688, + "learning_rate": 1e-06, + "loss": 0.6552, + "num_input_tokens_seen": 175165652, + "step": 3127 + }, + { + "epoch": 6.964365256124721, + "loss": 0.4998403489589691, + "loss_ce": 0.00020655704429373145, + "loss_iou": 0.212890625, + "loss_num": 0.01483154296875, + "loss_xval": 0.5, + "num_input_tokens_seen": 175165652, + "step": 3127 + }, + { + "epoch": 6.966592427616926, + "grad_norm": 20.51897621154785, + "learning_rate": 1e-06, + "loss": 0.5369, + "num_input_tokens_seen": 175223100, + "step": 3128 + }, + { + "epoch": 6.966592427616926, + "loss": 0.5199558734893799, + "loss_ce": 0.00018052573432214558, + "loss_iou": 0.240234375, + "loss_num": 0.0081787109375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 175223100, + "step": 3128 + }, + { + "epoch": 6.968819599109131, + "grad_norm": 13.959333419799805, + "learning_rate": 1e-06, + "loss": 0.5609, + "num_input_tokens_seen": 175280568, + "step": 3129 + }, + { + "epoch": 6.968819599109131, + "loss": 0.7189445495605469, + "loss_ce": 0.0001945712574524805, + "loss_iou": 0.333984375, + "loss_num": 0.010009765625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 175280568, + "step": 3129 + }, + { + "epoch": 6.971046770601336, + "grad_norm": 22.97583770751953, + "learning_rate": 1e-06, + "loss": 1.0313, + "num_input_tokens_seen": 175337324, + "step": 3130 + }, + { + "epoch": 6.971046770601336, + "loss": 0.8275806903839111, + "loss_ce": 0.00018814984650816768, + "loss_iou": 0.359375, + "loss_num": 0.0213623046875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 175337324, + "step": 3130 + }, + { + "epoch": 6.973273942093542, + "grad_norm": 34.06120681762695, + "learning_rate": 1e-06, + "loss": 0.8319, + "num_input_tokens_seen": 175392008, + "step": 3131 + }, + { + "epoch": 6.973273942093542, + "loss": 0.9467490315437317, + "loss_ce": 0.00021585801732726395, + "loss_iou": 0.412109375, + "loss_num": 0.024658203125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 175392008, + "step": 3131 + }, + { + "epoch": 6.9755011135857465, + "grad_norm": 16.659931182861328, + "learning_rate": 1e-06, + "loss": 0.5561, + "num_input_tokens_seen": 175450048, + "step": 3132 + }, + { + "epoch": 6.9755011135857465, + "loss": 0.5903693437576294, + "loss_ce": 0.0002204178017564118, + "loss_iou": 0.255859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 175450048, + "step": 3132 + }, + { + "epoch": 6.977728285077951, + "grad_norm": 31.275850296020508, + "learning_rate": 1e-06, + "loss": 0.7978, + "num_input_tokens_seen": 175504996, + "step": 3133 + }, + { + "epoch": 6.977728285077951, + "loss": 1.0408833026885986, + "loss_ce": 0.0002338237245567143, + "loss_iou": 0.419921875, + "loss_num": 0.039794921875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 175504996, + "step": 3133 + }, + { + "epoch": 6.979955456570156, + "grad_norm": 19.884502410888672, + "learning_rate": 1e-06, + "loss": 0.5947, + "num_input_tokens_seen": 175562712, + "step": 3134 + }, + { + "epoch": 6.979955456570156, + "loss": 0.4893288016319275, + "loss_ce": 0.00019303697627037764, + "loss_iou": 0.2314453125, + "loss_num": 0.00555419921875, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 175562712, + "step": 3134 + }, + { + "epoch": 6.982182628062361, + "grad_norm": 13.180671691894531, + "learning_rate": 1e-06, + "loss": 0.7, + "num_input_tokens_seen": 175620280, + "step": 3135 + }, + { + "epoch": 6.982182628062361, + "loss": 0.8063207864761353, + "loss_ce": 0.0004125875420868397, + "loss_iou": 0.35546875, + "loss_num": 0.01904296875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 175620280, + "step": 3135 + }, + { + "epoch": 6.984409799554566, + "grad_norm": 21.996858596801758, + "learning_rate": 1e-06, + "loss": 0.5783, + "num_input_tokens_seen": 175678996, + "step": 3136 + }, + { + "epoch": 6.984409799554566, + "loss": 0.7316303253173828, + "loss_ce": 0.00018497416749596596, + "loss_iou": 0.302734375, + "loss_num": 0.025146484375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 175678996, + "step": 3136 + }, + { + "epoch": 6.986636971046771, + "grad_norm": 19.231781005859375, + "learning_rate": 1e-06, + "loss": 0.8218, + "num_input_tokens_seen": 175733860, + "step": 3137 + }, + { + "epoch": 6.986636971046771, + "loss": 0.6332485675811768, + "loss_ce": 0.00019193578918930143, + "loss_iou": 0.25, + "loss_num": 0.0262451171875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 175733860, + "step": 3137 + }, + { + "epoch": 6.988864142538976, + "grad_norm": 11.28582763671875, + "learning_rate": 1e-06, + "loss": 0.7356, + "num_input_tokens_seen": 175792092, + "step": 3138 + }, + { + "epoch": 6.988864142538976, + "loss": 0.7703654766082764, + "loss_ce": 0.00022392123355530202, + "loss_iou": 0.3046875, + "loss_num": 0.032470703125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 175792092, + "step": 3138 + }, + { + "epoch": 6.991091314031181, + "grad_norm": 18.971759796142578, + "learning_rate": 1e-06, + "loss": 0.6312, + "num_input_tokens_seen": 175848452, + "step": 3139 + }, + { + "epoch": 6.991091314031181, + "loss": 0.7722006440162659, + "loss_ce": 0.00016694323858246207, + "loss_iou": 0.3046875, + "loss_num": 0.0322265625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 175848452, + "step": 3139 + }, + { + "epoch": 6.993318485523385, + "grad_norm": 16.247879028320312, + "learning_rate": 1e-06, + "loss": 0.5357, + "num_input_tokens_seen": 175906920, + "step": 3140 + }, + { + "epoch": 6.993318485523385, + "loss": 0.5915122628211975, + "loss_ce": 0.00020364229567348957, + "loss_iou": 0.25390625, + "loss_num": 0.0162353515625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 175906920, + "step": 3140 + }, + { + "epoch": 6.99554565701559, + "grad_norm": 14.442233085632324, + "learning_rate": 1e-06, + "loss": 0.4958, + "num_input_tokens_seen": 175965616, + "step": 3141 + }, + { + "epoch": 6.99554565701559, + "loss": 0.5104349851608276, + "loss_ce": 0.00018110190285369754, + "loss_iou": 0.2373046875, + "loss_num": 0.0069580078125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 175965616, + "step": 3141 + }, + { + "epoch": 6.997772828507795, + "grad_norm": 13.822257995605469, + "learning_rate": 1e-06, + "loss": 0.9366, + "num_input_tokens_seen": 176025056, + "step": 3142 + }, + { + "epoch": 6.997772828507795, + "loss": 0.7665646076202393, + "loss_ce": 0.00045135943219065666, + "loss_iou": 0.3359375, + "loss_num": 0.0185546875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 176025056, + "step": 3142 + }, + { + "epoch": 7.0, + "grad_norm": 20.180612564086914, + "learning_rate": 1e-06, + "loss": 0.7645, + "num_input_tokens_seen": 176083108, + "step": 3143 + }, + { + "epoch": 7.0, + "loss": 0.6660090088844299, + "loss_ce": 0.00023752517881803215, + "loss_iou": 0.291015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 176083108, + "step": 3143 + }, + { + "epoch": 7.002227171492205, + "grad_norm": 15.68157958984375, + "learning_rate": 1e-06, + "loss": 0.6653, + "num_input_tokens_seen": 176139524, + "step": 3144 + }, + { + "epoch": 7.002227171492205, + "loss": 0.6556283831596375, + "loss_ce": 0.00023284553026314825, + "loss_iou": 0.287109375, + "loss_num": 0.0164794921875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 176139524, + "step": 3144 + }, + { + "epoch": 7.00445434298441, + "grad_norm": 31.179643630981445, + "learning_rate": 1e-06, + "loss": 0.5867, + "num_input_tokens_seen": 176194724, + "step": 3145 + }, + { + "epoch": 7.00445434298441, + "loss": 0.49978122115135193, + "loss_ce": 0.0002695125003810972, + "loss_iou": 0.23046875, + "loss_num": 0.00750732421875, + "loss_xval": 0.5, + "num_input_tokens_seen": 176194724, + "step": 3145 + }, + { + "epoch": 7.006681514476615, + "grad_norm": 32.13369369506836, + "learning_rate": 1e-06, + "loss": 0.7949, + "num_input_tokens_seen": 176250820, + "step": 3146 + }, + { + "epoch": 7.006681514476615, + "loss": 0.7021375298500061, + "loss_ce": 0.00023322663037106395, + "loss_iou": 0.302734375, + "loss_num": 0.019287109375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 176250820, + "step": 3146 + }, + { + "epoch": 7.008908685968819, + "grad_norm": 18.130083084106445, + "learning_rate": 1e-06, + "loss": 0.755, + "num_input_tokens_seen": 176306224, + "step": 3147 + }, + { + "epoch": 7.008908685968819, + "loss": 0.9072054624557495, + "loss_ce": 0.00022300847922451794, + "loss_iou": 0.365234375, + "loss_num": 0.03564453125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 176306224, + "step": 3147 + }, + { + "epoch": 7.011135857461024, + "grad_norm": 24.085601806640625, + "learning_rate": 1e-06, + "loss": 0.5465, + "num_input_tokens_seen": 176362740, + "step": 3148 + }, + { + "epoch": 7.011135857461024, + "loss": 0.5722109079360962, + "loss_ce": 0.00018940077279694378, + "loss_iou": 0.25390625, + "loss_num": 0.01275634765625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 176362740, + "step": 3148 + }, + { + "epoch": 7.013363028953229, + "grad_norm": 16.56767463684082, + "learning_rate": 1e-06, + "loss": 0.6452, + "num_input_tokens_seen": 176420184, + "step": 3149 + }, + { + "epoch": 7.013363028953229, + "loss": 0.7920135259628296, + "loss_ce": 0.00026545586297288537, + "loss_iou": 0.322265625, + "loss_num": 0.029052734375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 176420184, + "step": 3149 + }, + { + "epoch": 7.015590200445434, + "grad_norm": 21.7204532623291, + "learning_rate": 1e-06, + "loss": 0.7989, + "num_input_tokens_seen": 176475432, + "step": 3150 + }, + { + "epoch": 7.015590200445434, + "loss": 0.5816566944122314, + "loss_ce": 0.0003578995238058269, + "loss_iou": 0.251953125, + "loss_num": 0.01507568359375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 176475432, + "step": 3150 + }, + { + "epoch": 7.017817371937639, + "grad_norm": 18.18883514404297, + "learning_rate": 1e-06, + "loss": 0.8199, + "num_input_tokens_seen": 176532060, + "step": 3151 + }, + { + "epoch": 7.017817371937639, + "loss": 0.8285827040672302, + "loss_ce": 0.0002135752292815596, + "loss_iou": 0.349609375, + "loss_num": 0.02587890625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 176532060, + "step": 3151 + }, + { + "epoch": 7.020044543429844, + "grad_norm": 15.747713088989258, + "learning_rate": 1e-06, + "loss": 0.8188, + "num_input_tokens_seen": 176589504, + "step": 3152 + }, + { + "epoch": 7.020044543429844, + "loss": 0.8652061223983765, + "loss_ce": 0.00021594867575913668, + "loss_iou": 0.375, + "loss_num": 0.0228271484375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 176589504, + "step": 3152 + }, + { + "epoch": 7.022271714922049, + "grad_norm": 22.65471649169922, + "learning_rate": 1e-06, + "loss": 0.6007, + "num_input_tokens_seen": 176647648, + "step": 3153 + }, + { + "epoch": 7.022271714922049, + "loss": 0.7232741117477417, + "loss_ce": 0.0003737136139534414, + "loss_iou": 0.3203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 176647648, + "step": 3153 + }, + { + "epoch": 7.0244988864142535, + "grad_norm": 16.48569107055664, + "learning_rate": 1e-06, + "loss": 0.7161, + "num_input_tokens_seen": 176704740, + "step": 3154 + }, + { + "epoch": 7.0244988864142535, + "loss": 0.6486464738845825, + "loss_ce": 0.00045317187323234975, + "loss_iou": 0.265625, + "loss_num": 0.0235595703125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 176704740, + "step": 3154 + }, + { + "epoch": 7.026726057906459, + "grad_norm": 26.504276275634766, + "learning_rate": 1e-06, + "loss": 0.6456, + "num_input_tokens_seen": 176761396, + "step": 3155 + }, + { + "epoch": 7.026726057906459, + "loss": 0.7807806730270386, + "loss_ce": 0.00026307476218789816, + "loss_iou": 0.296875, + "loss_num": 0.037353515625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 176761396, + "step": 3155 + }, + { + "epoch": 7.028953229398664, + "grad_norm": 18.16960334777832, + "learning_rate": 1e-06, + "loss": 0.4767, + "num_input_tokens_seen": 176816940, + "step": 3156 + }, + { + "epoch": 7.028953229398664, + "loss": 0.40581727027893066, + "loss_ce": 0.00017763671348802745, + "loss_iou": 0.177734375, + "loss_num": 0.01019287109375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 176816940, + "step": 3156 + }, + { + "epoch": 7.031180400890869, + "grad_norm": 16.1363468170166, + "learning_rate": 1e-06, + "loss": 0.5441, + "num_input_tokens_seen": 176873404, + "step": 3157 + }, + { + "epoch": 7.031180400890869, + "loss": 0.65606689453125, + "loss_ce": 0.00018308302969671786, + "loss_iou": 0.25390625, + "loss_num": 0.0299072265625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 176873404, + "step": 3157 + }, + { + "epoch": 7.033407572383074, + "grad_norm": 21.781497955322266, + "learning_rate": 1e-06, + "loss": 0.4695, + "num_input_tokens_seen": 176931848, + "step": 3158 + }, + { + "epoch": 7.033407572383074, + "loss": 0.3050188720226288, + "loss_ce": 0.00020930425671394914, + "loss_iou": 0.1376953125, + "loss_num": 0.00604248046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 176931848, + "step": 3158 + }, + { + "epoch": 7.035634743875279, + "grad_norm": 23.22254753112793, + "learning_rate": 1e-06, + "loss": 0.7001, + "num_input_tokens_seen": 176988340, + "step": 3159 + }, + { + "epoch": 7.035634743875279, + "loss": 0.7503657341003418, + "loss_ce": 0.0020747713278979063, + "loss_iou": 0.33203125, + "loss_num": 0.01708984375, + "loss_xval": 0.75, + "num_input_tokens_seen": 176988340, + "step": 3159 + }, + { + "epoch": 7.0378619153674835, + "grad_norm": 15.618306159973145, + "learning_rate": 1e-06, + "loss": 0.5591, + "num_input_tokens_seen": 177043904, + "step": 3160 + }, + { + "epoch": 7.0378619153674835, + "loss": 0.41194963455200195, + "loss_ce": 0.0009388765902258456, + "loss_iou": 0.189453125, + "loss_num": 0.006500244140625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 177043904, + "step": 3160 + }, + { + "epoch": 7.040089086859688, + "grad_norm": 17.742382049560547, + "learning_rate": 1e-06, + "loss": 0.5555, + "num_input_tokens_seen": 177098532, + "step": 3161 + }, + { + "epoch": 7.040089086859688, + "loss": 0.557831883430481, + "loss_ce": 0.00021466660837177187, + "loss_iou": 0.265625, + "loss_num": 0.0050048828125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 177098532, + "step": 3161 + }, + { + "epoch": 7.042316258351893, + "grad_norm": 22.0341854095459, + "learning_rate": 1e-06, + "loss": 0.7129, + "num_input_tokens_seen": 177154748, + "step": 3162 + }, + { + "epoch": 7.042316258351893, + "loss": 0.8048324584960938, + "loss_ce": 0.0002669950481504202, + "loss_iou": 0.353515625, + "loss_num": 0.01904296875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 177154748, + "step": 3162 + }, + { + "epoch": 7.044543429844098, + "grad_norm": 19.422056198120117, + "learning_rate": 1e-06, + "loss": 0.5373, + "num_input_tokens_seen": 177208936, + "step": 3163 + }, + { + "epoch": 7.044543429844098, + "loss": 0.5412275791168213, + "loss_ce": 0.0031416614074259996, + "loss_iou": 0.2490234375, + "loss_num": 0.00799560546875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 177208936, + "step": 3163 + }, + { + "epoch": 7.046770601336303, + "grad_norm": 16.3929443359375, + "learning_rate": 1e-06, + "loss": 0.819, + "num_input_tokens_seen": 177262612, + "step": 3164 + }, + { + "epoch": 7.046770601336303, + "loss": 0.8707385063171387, + "loss_ce": 0.00025512618594802916, + "loss_iou": 0.369140625, + "loss_num": 0.0262451171875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 177262612, + "step": 3164 + }, + { + "epoch": 7.048997772828508, + "grad_norm": 28.08599281311035, + "learning_rate": 1e-06, + "loss": 0.7073, + "num_input_tokens_seen": 177319504, + "step": 3165 + }, + { + "epoch": 7.048997772828508, + "loss": 0.7279913425445557, + "loss_ce": 0.00020817822951357812, + "loss_iou": 0.3125, + "loss_num": 0.020751953125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 177319504, + "step": 3165 + }, + { + "epoch": 7.051224944320713, + "grad_norm": 18.15179443359375, + "learning_rate": 1e-06, + "loss": 0.6731, + "num_input_tokens_seen": 177375208, + "step": 3166 + }, + { + "epoch": 7.051224944320713, + "loss": 0.7211670279502869, + "loss_ce": 0.00021975839626975358, + "loss_iou": 0.32421875, + "loss_num": 0.01434326171875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 177375208, + "step": 3166 + }, + { + "epoch": 7.0534521158129175, + "grad_norm": 24.333948135375977, + "learning_rate": 1e-06, + "loss": 0.7331, + "num_input_tokens_seen": 177432468, + "step": 3167 + }, + { + "epoch": 7.0534521158129175, + "loss": 0.6586510539054871, + "loss_ce": 0.00020380858040880412, + "loss_iou": 0.26953125, + "loss_num": 0.02392578125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 177432468, + "step": 3167 + }, + { + "epoch": 7.055679287305122, + "grad_norm": 24.297325134277344, + "learning_rate": 1e-06, + "loss": 0.7255, + "num_input_tokens_seen": 177487920, + "step": 3168 + }, + { + "epoch": 7.055679287305122, + "loss": 0.5983210206031799, + "loss_ce": 0.0001764908665791154, + "loss_iou": 0.2578125, + "loss_num": 0.0166015625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 177487920, + "step": 3168 + }, + { + "epoch": 7.057906458797327, + "grad_norm": 24.517797470092773, + "learning_rate": 1e-06, + "loss": 0.7016, + "num_input_tokens_seen": 177543680, + "step": 3169 + }, + { + "epoch": 7.057906458797327, + "loss": 0.697583019733429, + "loss_ce": 0.00019531394354999065, + "loss_iou": 0.296875, + "loss_num": 0.0206298828125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 177543680, + "step": 3169 + }, + { + "epoch": 7.060133630289532, + "grad_norm": 15.318553924560547, + "learning_rate": 1e-06, + "loss": 0.6435, + "num_input_tokens_seen": 177600092, + "step": 3170 + }, + { + "epoch": 7.060133630289532, + "loss": 0.6004433035850525, + "loss_ce": 0.0002235766005469486, + "loss_iou": 0.26953125, + "loss_num": 0.012451171875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 177600092, + "step": 3170 + }, + { + "epoch": 7.062360801781737, + "grad_norm": 26.361791610717773, + "learning_rate": 1e-06, + "loss": 0.6517, + "num_input_tokens_seen": 177652892, + "step": 3171 + }, + { + "epoch": 7.062360801781737, + "loss": 0.7172492146492004, + "loss_ce": 0.00020820970530621707, + "loss_iou": 0.30859375, + "loss_num": 0.0203857421875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 177652892, + "step": 3171 + }, + { + "epoch": 7.064587973273942, + "grad_norm": 18.68074607849121, + "learning_rate": 1e-06, + "loss": 0.6691, + "num_input_tokens_seen": 177705568, + "step": 3172 + }, + { + "epoch": 7.064587973273942, + "loss": 0.696946382522583, + "loss_ce": 0.0001690261415205896, + "loss_iou": 0.30078125, + "loss_num": 0.018798828125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 177705568, + "step": 3172 + }, + { + "epoch": 7.066815144766147, + "grad_norm": 28.12506866455078, + "learning_rate": 1e-06, + "loss": 0.7585, + "num_input_tokens_seen": 177759380, + "step": 3173 + }, + { + "epoch": 7.066815144766147, + "loss": 0.8229764699935913, + "loss_ce": 0.0002226082724519074, + "loss_iou": 0.341796875, + "loss_num": 0.0281982421875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 177759380, + "step": 3173 + }, + { + "epoch": 7.0690423162583516, + "grad_norm": 25.993946075439453, + "learning_rate": 1e-06, + "loss": 0.6376, + "num_input_tokens_seen": 177814872, + "step": 3174 + }, + { + "epoch": 7.0690423162583516, + "loss": 0.8440980911254883, + "loss_ce": 0.0015688535058870912, + "loss_iou": 0.341796875, + "loss_num": 0.031494140625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 177814872, + "step": 3174 + }, + { + "epoch": 7.071269487750556, + "grad_norm": 30.0826416015625, + "learning_rate": 1e-06, + "loss": 0.656, + "num_input_tokens_seen": 177871144, + "step": 3175 + }, + { + "epoch": 7.071269487750556, + "loss": 0.46419858932495117, + "loss_ce": 0.00020933072664774954, + "loss_iou": 0.197265625, + "loss_num": 0.0140380859375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 177871144, + "step": 3175 + }, + { + "epoch": 7.073496659242761, + "grad_norm": 22.121746063232422, + "learning_rate": 1e-06, + "loss": 0.6732, + "num_input_tokens_seen": 177924664, + "step": 3176 + }, + { + "epoch": 7.073496659242761, + "loss": 0.5211669206619263, + "loss_ce": 0.00017085002036765218, + "loss_iou": 0.232421875, + "loss_num": 0.0111083984375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 177924664, + "step": 3176 + }, + { + "epoch": 7.075723830734967, + "grad_norm": 52.88250732421875, + "learning_rate": 1e-06, + "loss": 0.5979, + "num_input_tokens_seen": 177977040, + "step": 3177 + }, + { + "epoch": 7.075723830734967, + "loss": 0.6259975433349609, + "loss_ce": 0.00026510769384913146, + "loss_iou": 0.25, + "loss_num": 0.02490234375, + "loss_xval": 0.625, + "num_input_tokens_seen": 177977040, + "step": 3177 + }, + { + "epoch": 7.077951002227172, + "grad_norm": 23.79888916015625, + "learning_rate": 1e-06, + "loss": 0.8311, + "num_input_tokens_seen": 178030552, + "step": 3178 + }, + { + "epoch": 7.077951002227172, + "loss": 0.6989488005638123, + "loss_ce": 0.0002183011529268697, + "loss_iou": 0.298828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 178030552, + "step": 3178 + }, + { + "epoch": 7.080178173719377, + "grad_norm": 18.478776931762695, + "learning_rate": 1e-06, + "loss": 0.6773, + "num_input_tokens_seen": 178089092, + "step": 3179 + }, + { + "epoch": 7.080178173719377, + "loss": 0.6097142696380615, + "loss_ce": 0.0004613480414263904, + "loss_iou": 0.275390625, + "loss_num": 0.01141357421875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 178089092, + "step": 3179 + }, + { + "epoch": 7.082405345211582, + "grad_norm": 37.656944274902344, + "learning_rate": 1e-06, + "loss": 0.7103, + "num_input_tokens_seen": 178139908, + "step": 3180 + }, + { + "epoch": 7.082405345211582, + "loss": 0.7475607395172119, + "loss_ce": 0.001344920601695776, + "loss_iou": 0.333984375, + "loss_num": 0.015380859375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 178139908, + "step": 3180 + }, + { + "epoch": 7.0846325167037865, + "grad_norm": 21.057231903076172, + "learning_rate": 1e-06, + "loss": 0.7319, + "num_input_tokens_seen": 178196864, + "step": 3181 + }, + { + "epoch": 7.0846325167037865, + "loss": 0.6234728097915649, + "loss_ce": 0.00018174458818975836, + "loss_iou": 0.25390625, + "loss_num": 0.0228271484375, + "loss_xval": 0.625, + "num_input_tokens_seen": 178196864, + "step": 3181 + }, + { + "epoch": 7.086859688195991, + "grad_norm": 13.602341651916504, + "learning_rate": 1e-06, + "loss": 0.4496, + "num_input_tokens_seen": 178254848, + "step": 3182 + }, + { + "epoch": 7.086859688195991, + "loss": 0.40568333864212036, + "loss_ce": 0.00016574125038459897, + "loss_iou": 0.17578125, + "loss_num": 0.01080322265625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 178254848, + "step": 3182 + }, + { + "epoch": 7.089086859688196, + "grad_norm": 19.736835479736328, + "learning_rate": 1e-06, + "loss": 0.6679, + "num_input_tokens_seen": 178312356, + "step": 3183 + }, + { + "epoch": 7.089086859688196, + "loss": 0.8520088195800781, + "loss_ce": 0.00020223407773301005, + "loss_iou": 0.373046875, + "loss_num": 0.0213623046875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 178312356, + "step": 3183 + }, + { + "epoch": 7.091314031180401, + "grad_norm": 18.998903274536133, + "learning_rate": 1e-06, + "loss": 0.577, + "num_input_tokens_seen": 178369760, + "step": 3184 + }, + { + "epoch": 7.091314031180401, + "loss": 0.6121712327003479, + "loss_ce": 0.0004158708034083247, + "loss_iou": 0.232421875, + "loss_num": 0.0294189453125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 178369760, + "step": 3184 + }, + { + "epoch": 7.093541202672606, + "grad_norm": 17.410633087158203, + "learning_rate": 1e-06, + "loss": 0.5794, + "num_input_tokens_seen": 178424408, + "step": 3185 + }, + { + "epoch": 7.093541202672606, + "loss": 0.6892697215080261, + "loss_ce": 0.00030487452750094235, + "loss_iou": 0.294921875, + "loss_num": 0.0196533203125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 178424408, + "step": 3185 + }, + { + "epoch": 7.095768374164811, + "grad_norm": 25.50342559814453, + "learning_rate": 1e-06, + "loss": 0.752, + "num_input_tokens_seen": 178478552, + "step": 3186 + }, + { + "epoch": 7.095768374164811, + "loss": 0.6170066595077515, + "loss_ce": 0.00018539902521297336, + "loss_iou": 0.267578125, + "loss_num": 0.0164794921875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 178478552, + "step": 3186 + }, + { + "epoch": 7.097995545657016, + "grad_norm": 23.802183151245117, + "learning_rate": 1e-06, + "loss": 0.6589, + "num_input_tokens_seen": 178533624, + "step": 3187 + }, + { + "epoch": 7.097995545657016, + "loss": 0.632292628288269, + "loss_ce": 0.0002125354076270014, + "loss_iou": 0.271484375, + "loss_num": 0.017578125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 178533624, + "step": 3187 + }, + { + "epoch": 7.1002227171492205, + "grad_norm": 22.64967155456543, + "learning_rate": 1e-06, + "loss": 0.4275, + "num_input_tokens_seen": 178590396, + "step": 3188 + }, + { + "epoch": 7.1002227171492205, + "loss": 0.49148237705230713, + "loss_ce": 0.00014936855586711317, + "loss_iou": 0.22265625, + "loss_num": 0.00933837890625, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 178590396, + "step": 3188 + }, + { + "epoch": 7.102449888641425, + "grad_norm": 17.186386108398438, + "learning_rate": 1e-06, + "loss": 0.6466, + "num_input_tokens_seen": 178648316, + "step": 3189 + }, + { + "epoch": 7.102449888641425, + "loss": 0.7294230461120605, + "loss_ce": 0.00017502682749181986, + "loss_iou": 0.333984375, + "loss_num": 0.0125732421875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 178648316, + "step": 3189 + }, + { + "epoch": 7.10467706013363, + "grad_norm": 14.459593772888184, + "learning_rate": 1e-06, + "loss": 0.7198, + "num_input_tokens_seen": 178705096, + "step": 3190 + }, + { + "epoch": 7.10467706013363, + "loss": 0.62459796667099, + "loss_ce": 0.00020834297174587846, + "loss_iou": 0.265625, + "loss_num": 0.018798828125, + "loss_xval": 0.625, + "num_input_tokens_seen": 178705096, + "step": 3190 + }, + { + "epoch": 7.106904231625835, + "grad_norm": 19.029233932495117, + "learning_rate": 1e-06, + "loss": 0.7772, + "num_input_tokens_seen": 178761896, + "step": 3191 + }, + { + "epoch": 7.106904231625835, + "loss": 0.5232353210449219, + "loss_ce": 0.00016402616165578365, + "loss_iou": 0.23046875, + "loss_num": 0.0125732421875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 178761896, + "step": 3191 + }, + { + "epoch": 7.10913140311804, + "grad_norm": 27.418760299682617, + "learning_rate": 1e-06, + "loss": 0.7062, + "num_input_tokens_seen": 178816456, + "step": 3192 + }, + { + "epoch": 7.10913140311804, + "loss": 0.9633630514144897, + "loss_ce": 0.0004724356404040009, + "loss_iou": 0.390625, + "loss_num": 0.036376953125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 178816456, + "step": 3192 + }, + { + "epoch": 7.111358574610245, + "grad_norm": 14.343728065490723, + "learning_rate": 1e-06, + "loss": 0.4866, + "num_input_tokens_seen": 178872960, + "step": 3193 + }, + { + "epoch": 7.111358574610245, + "loss": 0.5601356029510498, + "loss_ce": 0.0001990435121115297, + "loss_iou": 0.2080078125, + "loss_num": 0.0286865234375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 178872960, + "step": 3193 + }, + { + "epoch": 7.11358574610245, + "grad_norm": 32.39339828491211, + "learning_rate": 1e-06, + "loss": 0.6818, + "num_input_tokens_seen": 178927728, + "step": 3194 + }, + { + "epoch": 7.11358574610245, + "loss": 0.7690376043319702, + "loss_ce": 0.00023876590421423316, + "loss_iou": 0.296875, + "loss_num": 0.034912109375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 178927728, + "step": 3194 + }, + { + "epoch": 7.1158129175946545, + "grad_norm": 21.278362274169922, + "learning_rate": 1e-06, + "loss": 0.6444, + "num_input_tokens_seen": 178984676, + "step": 3195 + }, + { + "epoch": 7.1158129175946545, + "loss": 0.5341231822967529, + "loss_ce": 0.0001876165042631328, + "loss_iou": 0.2216796875, + "loss_num": 0.0181884765625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 178984676, + "step": 3195 + }, + { + "epoch": 7.118040089086859, + "grad_norm": 30.706249237060547, + "learning_rate": 1e-06, + "loss": 0.7029, + "num_input_tokens_seen": 179040412, + "step": 3196 + }, + { + "epoch": 7.118040089086859, + "loss": 0.627882719039917, + "loss_ce": 0.00019718779367394745, + "loss_iou": 0.291015625, + "loss_num": 0.00909423828125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 179040412, + "step": 3196 + }, + { + "epoch": 7.120267260579064, + "grad_norm": 14.424131393432617, + "learning_rate": 1e-06, + "loss": 0.4814, + "num_input_tokens_seen": 179096912, + "step": 3197 + }, + { + "epoch": 7.120267260579064, + "loss": 0.46601158380508423, + "loss_ce": 0.0001913084415718913, + "loss_iou": 0.2099609375, + "loss_num": 0.0093994140625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 179096912, + "step": 3197 + }, + { + "epoch": 7.122494432071269, + "grad_norm": 17.124177932739258, + "learning_rate": 1e-06, + "loss": 0.6929, + "num_input_tokens_seen": 179153884, + "step": 3198 + }, + { + "epoch": 7.122494432071269, + "loss": 0.7391812801361084, + "loss_ce": 0.00016765895998105407, + "loss_iou": 0.3203125, + "loss_num": 0.01953125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 179153884, + "step": 3198 + }, + { + "epoch": 7.124721603563474, + "grad_norm": 71.17576599121094, + "learning_rate": 1e-06, + "loss": 0.7984, + "num_input_tokens_seen": 179208364, + "step": 3199 + }, + { + "epoch": 7.124721603563474, + "loss": 0.8881849646568298, + "loss_ce": 0.0002455237554386258, + "loss_iou": 0.357421875, + "loss_num": 0.03466796875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 179208364, + "step": 3199 + }, + { + "epoch": 7.12694877505568, + "grad_norm": 16.409549713134766, + "learning_rate": 1e-06, + "loss": 0.5439, + "num_input_tokens_seen": 179262820, + "step": 3200 + }, + { + "epoch": 7.12694877505568, + "loss": 0.5343835353851318, + "loss_ce": 0.00020382186630740762, + "loss_iou": 0.232421875, + "loss_num": 0.01385498046875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 179262820, + "step": 3200 + }, + { + "epoch": 7.129175946547885, + "grad_norm": 19.769577026367188, + "learning_rate": 1e-06, + "loss": 0.8334, + "num_input_tokens_seen": 179320280, + "step": 3201 + }, + { + "epoch": 7.129175946547885, + "loss": 0.5122010111808777, + "loss_ce": 0.00017708051018416882, + "loss_iou": 0.224609375, + "loss_num": 0.0128173828125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 179320280, + "step": 3201 + }, + { + "epoch": 7.131403118040089, + "grad_norm": 15.5679292678833, + "learning_rate": 1e-06, + "loss": 0.5519, + "num_input_tokens_seen": 179377732, + "step": 3202 + }, + { + "epoch": 7.131403118040089, + "loss": 0.5272064805030823, + "loss_ce": 0.00022892479319125414, + "loss_iou": 0.240234375, + "loss_num": 0.00921630859375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 179377732, + "step": 3202 + }, + { + "epoch": 7.133630289532294, + "grad_norm": 20.086763381958008, + "learning_rate": 1e-06, + "loss": 0.7019, + "num_input_tokens_seen": 179433624, + "step": 3203 + }, + { + "epoch": 7.133630289532294, + "loss": 0.6670153141021729, + "loss_ce": 0.0002672579721547663, + "loss_iou": 0.2890625, + "loss_num": 0.01806640625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 179433624, + "step": 3203 + }, + { + "epoch": 7.135857461024499, + "grad_norm": 23.608802795410156, + "learning_rate": 1e-06, + "loss": 0.511, + "num_input_tokens_seen": 179491448, + "step": 3204 + }, + { + "epoch": 7.135857461024499, + "loss": 0.4480966031551361, + "loss_ce": 0.00022062801872380078, + "loss_iou": 0.1953125, + "loss_num": 0.01141357421875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 179491448, + "step": 3204 + }, + { + "epoch": 7.138084632516704, + "grad_norm": 15.496456146240234, + "learning_rate": 1e-06, + "loss": 0.4442, + "num_input_tokens_seen": 179550108, + "step": 3205 + }, + { + "epoch": 7.138084632516704, + "loss": 0.369751513004303, + "loss_ce": 0.00036674662260338664, + "loss_iou": 0.1572265625, + "loss_num": 0.01104736328125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 179550108, + "step": 3205 + }, + { + "epoch": 7.140311804008909, + "grad_norm": 16.712627410888672, + "learning_rate": 1e-06, + "loss": 0.6253, + "num_input_tokens_seen": 179609256, + "step": 3206 + }, + { + "epoch": 7.140311804008909, + "loss": 0.561935305595398, + "loss_ce": 0.00016772476374171674, + "loss_iou": 0.2353515625, + "loss_num": 0.0181884765625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 179609256, + "step": 3206 + }, + { + "epoch": 7.142538975501114, + "grad_norm": 18.983253479003906, + "learning_rate": 1e-06, + "loss": 0.8887, + "num_input_tokens_seen": 179664724, + "step": 3207 + }, + { + "epoch": 7.142538975501114, + "loss": 1.041954755783081, + "loss_ce": 0.00020668681827373803, + "loss_iou": 0.421875, + "loss_num": 0.039306640625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 179664724, + "step": 3207 + }, + { + "epoch": 7.144766146993319, + "grad_norm": 31.973432540893555, + "learning_rate": 1e-06, + "loss": 0.6015, + "num_input_tokens_seen": 179723472, + "step": 3208 + }, + { + "epoch": 7.144766146993319, + "loss": 0.5833202600479126, + "loss_ce": 0.00019040628103539348, + "loss_iou": 0.267578125, + "loss_num": 0.0098876953125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 179723472, + "step": 3208 + }, + { + "epoch": 7.146993318485523, + "grad_norm": 17.996723175048828, + "learning_rate": 1e-06, + "loss": 0.5506, + "num_input_tokens_seen": 179781296, + "step": 3209 + }, + { + "epoch": 7.146993318485523, + "loss": 0.613503098487854, + "loss_ce": 0.00022186974820215255, + "loss_iou": 0.259765625, + "loss_num": 0.0185546875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 179781296, + "step": 3209 + }, + { + "epoch": 7.149220489977728, + "grad_norm": 12.357635498046875, + "learning_rate": 1e-06, + "loss": 0.5798, + "num_input_tokens_seen": 179836852, + "step": 3210 + }, + { + "epoch": 7.149220489977728, + "loss": 0.6682331562042236, + "loss_ce": 0.0002034001227002591, + "loss_iou": 0.2373046875, + "loss_num": 0.03857421875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 179836852, + "step": 3210 + }, + { + "epoch": 7.151447661469933, + "grad_norm": 16.912948608398438, + "learning_rate": 1e-06, + "loss": 0.6662, + "num_input_tokens_seen": 179892884, + "step": 3211 + }, + { + "epoch": 7.151447661469933, + "loss": 0.8070770502090454, + "loss_ce": 0.00019233419152442366, + "loss_iou": 0.337890625, + "loss_num": 0.0263671875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 179892884, + "step": 3211 + }, + { + "epoch": 7.153674832962138, + "grad_norm": 17.714216232299805, + "learning_rate": 1e-06, + "loss": 0.4681, + "num_input_tokens_seen": 179949768, + "step": 3212 + }, + { + "epoch": 7.153674832962138, + "loss": 0.4969853460788727, + "loss_ce": 0.00015915744006633759, + "loss_iou": 0.205078125, + "loss_num": 0.01708984375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 179949768, + "step": 3212 + }, + { + "epoch": 7.155902004454343, + "grad_norm": 26.61876106262207, + "learning_rate": 1e-06, + "loss": 0.8391, + "num_input_tokens_seen": 180005168, + "step": 3213 + }, + { + "epoch": 7.155902004454343, + "loss": 0.7213802337646484, + "loss_ce": 0.00018884365272242576, + "loss_iou": 0.31640625, + "loss_num": 0.0177001953125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 180005168, + "step": 3213 + }, + { + "epoch": 7.158129175946548, + "grad_norm": 15.515600204467773, + "learning_rate": 1e-06, + "loss": 0.6922, + "num_input_tokens_seen": 180061648, + "step": 3214 + }, + { + "epoch": 7.158129175946548, + "loss": 0.6254113912582397, + "loss_ce": 0.0001672459620749578, + "loss_iou": 0.26171875, + "loss_num": 0.02001953125, + "loss_xval": 0.625, + "num_input_tokens_seen": 180061648, + "step": 3214 + }, + { + "epoch": 7.160356347438753, + "grad_norm": 25.33653450012207, + "learning_rate": 1e-06, + "loss": 0.6162, + "num_input_tokens_seen": 180118080, + "step": 3215 + }, + { + "epoch": 7.160356347438753, + "loss": 0.5609911680221558, + "loss_ce": 0.00020018592476844788, + "loss_iou": 0.2431640625, + "loss_num": 0.0147705078125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 180118080, + "step": 3215 + }, + { + "epoch": 7.1625835189309575, + "grad_norm": 16.110671997070312, + "learning_rate": 1e-06, + "loss": 0.6583, + "num_input_tokens_seen": 180173604, + "step": 3216 + }, + { + "epoch": 7.1625835189309575, + "loss": 0.7682472467422485, + "loss_ce": 0.00018078596622217447, + "loss_iou": 0.328125, + "loss_num": 0.0223388671875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 180173604, + "step": 3216 + }, + { + "epoch": 7.164810690423162, + "grad_norm": 20.151470184326172, + "learning_rate": 1e-06, + "loss": 0.5956, + "num_input_tokens_seen": 180230020, + "step": 3217 + }, + { + "epoch": 7.164810690423162, + "loss": 0.5044952630996704, + "loss_ce": 0.0003448851057328284, + "loss_iou": 0.2265625, + "loss_num": 0.01007080078125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 180230020, + "step": 3217 + }, + { + "epoch": 7.167037861915367, + "grad_norm": 13.990897178649902, + "learning_rate": 1e-06, + "loss": 0.8846, + "num_input_tokens_seen": 180283952, + "step": 3218 + }, + { + "epoch": 7.167037861915367, + "loss": 0.638586699962616, + "loss_ce": 0.0002810558071359992, + "loss_iou": 0.283203125, + "loss_num": 0.0142822265625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 180283952, + "step": 3218 + }, + { + "epoch": 7.169265033407572, + "grad_norm": 17.823301315307617, + "learning_rate": 1e-06, + "loss": 0.8448, + "num_input_tokens_seen": 180340276, + "step": 3219 + }, + { + "epoch": 7.169265033407572, + "loss": 0.7374616861343384, + "loss_ce": 0.00015702965902164578, + "loss_iou": 0.310546875, + "loss_num": 0.0235595703125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 180340276, + "step": 3219 + }, + { + "epoch": 7.171492204899777, + "grad_norm": 21.112295150756836, + "learning_rate": 1e-06, + "loss": 0.6465, + "num_input_tokens_seen": 180397884, + "step": 3220 + }, + { + "epoch": 7.171492204899777, + "loss": 0.4930199384689331, + "loss_ce": 0.00022211679606698453, + "loss_iou": 0.2197265625, + "loss_num": 0.01043701171875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 180397884, + "step": 3220 + }, + { + "epoch": 7.173719376391982, + "grad_norm": 14.755711555480957, + "learning_rate": 1e-06, + "loss": 0.4308, + "num_input_tokens_seen": 180452508, + "step": 3221 + }, + { + "epoch": 7.173719376391982, + "loss": 0.5091297030448914, + "loss_ce": 0.00021859334083274007, + "loss_iou": 0.2138671875, + "loss_num": 0.0164794921875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 180452508, + "step": 3221 + }, + { + "epoch": 7.1759465478841875, + "grad_norm": 17.953371047973633, + "learning_rate": 1e-06, + "loss": 0.5174, + "num_input_tokens_seen": 180510496, + "step": 3222 + }, + { + "epoch": 7.1759465478841875, + "loss": 0.6082879304885864, + "loss_ce": 0.0004388437664601952, + "loss_iou": 0.25390625, + "loss_num": 0.0203857421875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 180510496, + "step": 3222 + }, + { + "epoch": 7.178173719376392, + "grad_norm": 27.752344131469727, + "learning_rate": 1e-06, + "loss": 0.6884, + "num_input_tokens_seen": 180567600, + "step": 3223 + }, + { + "epoch": 7.178173719376392, + "loss": 0.8676443696022034, + "loss_ce": 0.00021275154722388834, + "loss_iou": 0.35546875, + "loss_num": 0.03173828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 180567600, + "step": 3223 + }, + { + "epoch": 7.180400890868597, + "grad_norm": 20.237045288085938, + "learning_rate": 1e-06, + "loss": 0.7489, + "num_input_tokens_seen": 180622160, + "step": 3224 + }, + { + "epoch": 7.180400890868597, + "loss": 0.7350777387619019, + "loss_ce": 0.0002144520840374753, + "loss_iou": 0.318359375, + "loss_num": 0.01953125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 180622160, + "step": 3224 + }, + { + "epoch": 7.182628062360802, + "grad_norm": 17.729013442993164, + "learning_rate": 1e-06, + "loss": 0.6288, + "num_input_tokens_seen": 180678492, + "step": 3225 + }, + { + "epoch": 7.182628062360802, + "loss": 0.7737811803817749, + "loss_ce": 0.00034367607440799475, + "loss_iou": 0.3515625, + "loss_num": 0.01458740234375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 180678492, + "step": 3225 + }, + { + "epoch": 7.184855233853007, + "grad_norm": 17.447938919067383, + "learning_rate": 1e-06, + "loss": 0.8197, + "num_input_tokens_seen": 180734308, + "step": 3226 + }, + { + "epoch": 7.184855233853007, + "loss": 0.6977049708366394, + "loss_ce": 0.00019519682973623276, + "loss_iou": 0.298828125, + "loss_num": 0.02001953125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 180734308, + "step": 3226 + }, + { + "epoch": 7.187082405345212, + "grad_norm": 17.61049461364746, + "learning_rate": 1e-06, + "loss": 0.7649, + "num_input_tokens_seen": 180789952, + "step": 3227 + }, + { + "epoch": 7.187082405345212, + "loss": 0.6152649521827698, + "loss_ce": 0.00027470148052088916, + "loss_iou": 0.255859375, + "loss_num": 0.0206298828125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 180789952, + "step": 3227 + }, + { + "epoch": 7.189309576837417, + "grad_norm": 17.236371994018555, + "learning_rate": 1e-06, + "loss": 0.6585, + "num_input_tokens_seen": 180844252, + "step": 3228 + }, + { + "epoch": 7.189309576837417, + "loss": 0.5438805818557739, + "loss_ce": 0.00017939825193025172, + "loss_iou": 0.25, + "loss_num": 0.00823974609375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 180844252, + "step": 3228 + }, + { + "epoch": 7.1915367483296215, + "grad_norm": 15.305548667907715, + "learning_rate": 1e-06, + "loss": 0.543, + "num_input_tokens_seen": 180898788, + "step": 3229 + }, + { + "epoch": 7.1915367483296215, + "loss": 0.5575687885284424, + "loss_ce": 0.00019571834127418697, + "loss_iou": 0.2421875, + "loss_num": 0.0146484375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 180898788, + "step": 3229 + }, + { + "epoch": 7.193763919821826, + "grad_norm": 36.117584228515625, + "learning_rate": 1e-06, + "loss": 0.8034, + "num_input_tokens_seen": 180955360, + "step": 3230 + }, + { + "epoch": 7.193763919821826, + "loss": 0.7606761455535889, + "loss_ce": 0.00017807254334911704, + "loss_iou": 0.31640625, + "loss_num": 0.0257568359375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 180955360, + "step": 3230 + }, + { + "epoch": 7.195991091314031, + "grad_norm": 26.037805557250977, + "learning_rate": 1e-06, + "loss": 0.5336, + "num_input_tokens_seen": 181010364, + "step": 3231 + }, + { + "epoch": 7.195991091314031, + "loss": 0.4867013990879059, + "loss_ce": 0.0001901786308735609, + "loss_iou": 0.2119140625, + "loss_num": 0.012451171875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 181010364, + "step": 3231 + }, + { + "epoch": 7.198218262806236, + "grad_norm": 20.942747116088867, + "learning_rate": 1e-06, + "loss": 0.6008, + "num_input_tokens_seen": 181065840, + "step": 3232 + }, + { + "epoch": 7.198218262806236, + "loss": 0.5934904217720032, + "loss_ce": 0.00022868883388582617, + "loss_iou": 0.25390625, + "loss_num": 0.01708984375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 181065840, + "step": 3232 + }, + { + "epoch": 7.200445434298441, + "grad_norm": 14.775635719299316, + "learning_rate": 1e-06, + "loss": 0.6616, + "num_input_tokens_seen": 181122380, + "step": 3233 + }, + { + "epoch": 7.200445434298441, + "loss": 0.5939116477966309, + "loss_ce": 0.0001616643276065588, + "loss_iou": 0.267578125, + "loss_num": 0.0115966796875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 181122380, + "step": 3233 + }, + { + "epoch": 7.202672605790646, + "grad_norm": 16.27145004272461, + "learning_rate": 1e-06, + "loss": 0.84, + "num_input_tokens_seen": 181179864, + "step": 3234 + }, + { + "epoch": 7.202672605790646, + "loss": 0.9337013959884644, + "loss_ce": 0.0008400785736739635, + "loss_iou": 0.38671875, + "loss_num": 0.03173828125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 181179864, + "step": 3234 + }, + { + "epoch": 7.204899777282851, + "grad_norm": 29.473827362060547, + "learning_rate": 1e-06, + "loss": 0.7444, + "num_input_tokens_seen": 181233212, + "step": 3235 + }, + { + "epoch": 7.204899777282851, + "loss": 0.8354470729827881, + "loss_ce": 0.0004861447378061712, + "loss_iou": 0.3671875, + "loss_num": 0.020263671875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 181233212, + "step": 3235 + }, + { + "epoch": 7.2071269487750556, + "grad_norm": 22.71228790283203, + "learning_rate": 1e-06, + "loss": 0.6487, + "num_input_tokens_seen": 181288260, + "step": 3236 + }, + { + "epoch": 7.2071269487750556, + "loss": 0.7758283615112305, + "loss_ce": 0.00019357707060407847, + "loss_iou": 0.32421875, + "loss_num": 0.025146484375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 181288260, + "step": 3236 + }, + { + "epoch": 7.20935412026726, + "grad_norm": 16.81915283203125, + "learning_rate": 1e-06, + "loss": 0.7454, + "num_input_tokens_seen": 181343576, + "step": 3237 + }, + { + "epoch": 7.20935412026726, + "loss": 0.8085384368896484, + "loss_ce": 0.00031090632546693087, + "loss_iou": 0.357421875, + "loss_num": 0.018310546875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 181343576, + "step": 3237 + }, + { + "epoch": 7.211581291759465, + "grad_norm": 15.39242172241211, + "learning_rate": 1e-06, + "loss": 0.5735, + "num_input_tokens_seen": 181400228, + "step": 3238 + }, + { + "epoch": 7.211581291759465, + "loss": 0.6538634300231934, + "loss_ce": 0.0002989704080391675, + "loss_iou": 0.279296875, + "loss_num": 0.018798828125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 181400228, + "step": 3238 + }, + { + "epoch": 7.21380846325167, + "grad_norm": 59.97997283935547, + "learning_rate": 1e-06, + "loss": 0.6083, + "num_input_tokens_seen": 181458112, + "step": 3239 + }, + { + "epoch": 7.21380846325167, + "loss": 0.7236316204071045, + "loss_ce": 0.0002429374580970034, + "loss_iou": 0.318359375, + "loss_num": 0.0172119140625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 181458112, + "step": 3239 + }, + { + "epoch": 7.216035634743875, + "grad_norm": 13.519067764282227, + "learning_rate": 1e-06, + "loss": 0.5369, + "num_input_tokens_seen": 181514996, + "step": 3240 + }, + { + "epoch": 7.216035634743875, + "loss": 0.36852580308914185, + "loss_ce": 0.00017863643006421626, + "loss_iou": 0.1572265625, + "loss_num": 0.01080322265625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 181514996, + "step": 3240 + }, + { + "epoch": 7.21826280623608, + "grad_norm": 26.277721405029297, + "learning_rate": 1e-06, + "loss": 0.4887, + "num_input_tokens_seen": 181571304, + "step": 3241 + }, + { + "epoch": 7.21826280623608, + "loss": 0.5072898268699646, + "loss_ce": 0.0006980198086239398, + "loss_iou": 0.2265625, + "loss_num": 0.01055908203125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 181571304, + "step": 3241 + }, + { + "epoch": 7.220489977728285, + "grad_norm": 146.0947265625, + "learning_rate": 1e-06, + "loss": 0.62, + "num_input_tokens_seen": 181626812, + "step": 3242 + }, + { + "epoch": 7.220489977728285, + "loss": 0.640555202960968, + "loss_ce": 0.000174342465470545, + "loss_iou": 0.294921875, + "loss_num": 0.010009765625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 181626812, + "step": 3242 + }, + { + "epoch": 7.22271714922049, + "grad_norm": 26.887720108032227, + "learning_rate": 1e-06, + "loss": 0.8177, + "num_input_tokens_seen": 181681800, + "step": 3243 + }, + { + "epoch": 7.22271714922049, + "loss": 0.6879905462265015, + "loss_ce": 0.0004905810346826911, + "loss_iou": 0.296875, + "loss_num": 0.018798828125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 181681800, + "step": 3243 + }, + { + "epoch": 7.224944320712694, + "grad_norm": 17.908056259155273, + "learning_rate": 1e-06, + "loss": 0.6145, + "num_input_tokens_seen": 181736656, + "step": 3244 + }, + { + "epoch": 7.224944320712694, + "loss": 0.5160667896270752, + "loss_ce": 0.00019765175238717347, + "loss_iou": 0.2314453125, + "loss_num": 0.0107421875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 181736656, + "step": 3244 + }, + { + "epoch": 7.2271714922049, + "grad_norm": 13.982433319091797, + "learning_rate": 1e-06, + "loss": 0.5749, + "num_input_tokens_seen": 181794728, + "step": 3245 + }, + { + "epoch": 7.2271714922049, + "loss": 0.4950827658176422, + "loss_ce": 0.0002097318647429347, + "loss_iou": 0.1953125, + "loss_num": 0.02099609375, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 181794728, + "step": 3245 + }, + { + "epoch": 7.229398663697105, + "grad_norm": 19.076622009277344, + "learning_rate": 1e-06, + "loss": 0.5997, + "num_input_tokens_seen": 181850576, + "step": 3246 + }, + { + "epoch": 7.229398663697105, + "loss": 0.5747532844543457, + "loss_ce": 0.00029037147760391235, + "loss_iou": 0.23046875, + "loss_num": 0.0225830078125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 181850576, + "step": 3246 + }, + { + "epoch": 7.23162583518931, + "grad_norm": 36.7861442565918, + "learning_rate": 1e-06, + "loss": 0.8128, + "num_input_tokens_seen": 181905156, + "step": 3247 + }, + { + "epoch": 7.23162583518931, + "loss": 0.9604984521865845, + "loss_ce": 0.0010258200345560908, + "loss_iou": 0.40234375, + "loss_num": 0.03076171875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 181905156, + "step": 3247 + }, + { + "epoch": 7.233853006681515, + "grad_norm": 28.286663055419922, + "learning_rate": 1e-06, + "loss": 0.7149, + "num_input_tokens_seen": 181961164, + "step": 3248 + }, + { + "epoch": 7.233853006681515, + "loss": 0.7203893065452576, + "loss_ce": 0.0001744742039591074, + "loss_iou": 0.33203125, + "loss_num": 0.01141357421875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 181961164, + "step": 3248 + }, + { + "epoch": 7.23608017817372, + "grad_norm": 18.691064834594727, + "learning_rate": 1e-06, + "loss": 0.6303, + "num_input_tokens_seen": 182020592, + "step": 3249 + }, + { + "epoch": 7.23608017817372, + "loss": 0.6998783946037292, + "loss_ce": 0.00017135526286438107, + "loss_iou": 0.296875, + "loss_num": 0.0211181640625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 182020592, + "step": 3249 + }, + { + "epoch": 7.2383073496659245, + "grad_norm": 15.494166374206543, + "learning_rate": 1e-06, + "loss": 0.6059, + "num_input_tokens_seen": 182075240, + "step": 3250 + }, + { + "epoch": 7.2383073496659245, + "eval_seeclick_web_CIoU": 0.5736072361469269, + "eval_seeclick_web_GIoU": 0.5713272094726562, + "eval_seeclick_web_IoU": 0.5900914669036865, + "eval_seeclick_web_MAE_all": 0.01693468587473035, + "eval_seeclick_web_MAE_h": 0.009493312099948525, + "eval_seeclick_web_MAE_w": 0.017465373501181602, + "eval_seeclick_web_MAE_x_boxes": 0.009315244387835264, + "eval_seeclick_web_MAE_y_boxes": 0.022717589512467384, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9355779886245728, + "eval_seeclick_web_loss_ce": 0.00025881038163788617, + "eval_seeclick_web_loss_iou": 0.4263916015625, + "eval_seeclick_web_loss_num": 0.013437271118164062, + "eval_seeclick_web_loss_xval": 0.920166015625, + "eval_seeclick_web_runtime": 22.493, + "eval_seeclick_web_samples_per_second": 2.223, + "eval_seeclick_web_steps_per_second": 0.089, + "num_input_tokens_seen": 182075240, + "step": 3250 + }, + { + "epoch": 7.2383073496659245, + "eval_icons_CIoU": 0.2808741182088852, + "eval_icons_GIoU": 0.3052249103784561, + "eval_icons_IoU": 0.3607981503009796, + "eval_icons_MAE_all": 0.06516656465828419, + "eval_icons_MAE_h": 0.03764822334051132, + "eval_icons_MAE_w": 0.07068785838782787, + "eval_icons_MAE_x_boxes": 0.06141933798789978, + "eval_icons_MAE_y_boxes": 0.03828867059201002, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.7471874952316284, + "eval_icons_loss_ce": 0.0003105820360360667, + "eval_icons_loss_iou": 0.67919921875, + "eval_icons_loss_num": 0.06205558776855469, + "eval_icons_loss_xval": 1.66943359375, + "eval_icons_runtime": 21.7009, + "eval_icons_samples_per_second": 2.304, + "eval_icons_steps_per_second": 0.092, + "num_input_tokens_seen": 182075240, + "step": 3250 + }, + { + "epoch": 7.2383073496659245, + "eval_screenspot_CIoU": 0.342861811319987, + "eval_screenspot_GIoU": 0.3614034950733185, + "eval_screenspot_IoU": 0.42649880051612854, + "eval_screenspot_MAE_all": 0.06335503856341045, + "eval_screenspot_MAE_h": 0.03755492903292179, + "eval_screenspot_MAE_w": 0.07218488802512486, + "eval_screenspot_MAE_x_boxes": 0.07481345720589161, + "eval_screenspot_MAE_y_boxes": 0.04668992726753155, + "eval_screenspot_inside_bbox": 0.6729166706403097, + "eval_screenspot_loss": 1.6553105115890503, + "eval_screenspot_loss_ce": 0.00030076557110684615, + "eval_screenspot_loss_iou": 0.6756998697916666, + "eval_screenspot_loss_num": 0.07468668619791667, + "eval_screenspot_loss_xval": 1.7252604166666667, + "eval_screenspot_runtime": 36.4126, + "eval_screenspot_samples_per_second": 2.444, + "eval_screenspot_steps_per_second": 0.082, + "num_input_tokens_seen": 182075240, + "step": 3250 + }, + { + "epoch": 7.2383073496659245, + "eval_compot_CIoU": 0.35138705372810364, + "eval_compot_GIoU": 0.3623329848051071, + "eval_compot_IoU": 0.4089464396238327, + "eval_compot_MAE_all": 0.01784850051626563, + "eval_compot_MAE_h": 0.009071170818060637, + "eval_compot_MAE_w": 0.02207251265645027, + "eval_compot_MAE_x_boxes": 0.029478789307177067, + "eval_compot_MAE_y_boxes": 0.0062866308726370335, + "eval_compot_inside_bbox": 0.6458333432674408, + "eval_compot_loss": 1.3766494989395142, + "eval_compot_loss_ce": 0.00024336049682460725, + "eval_compot_loss_iou": 0.6270751953125, + "eval_compot_loss_num": 0.0166168212890625, + "eval_compot_loss_xval": 1.3369140625, + "eval_compot_runtime": 22.3641, + "eval_compot_samples_per_second": 2.236, + "eval_compot_steps_per_second": 0.089, + "num_input_tokens_seen": 182075240, + "step": 3250 + }, + { + "epoch": 7.2383073496659245, + "eval_custom_ui_val_CIoU": 0.46513410409291583, + "eval_custom_ui_val_GIoU": 0.47927956614229417, + "eval_custom_ui_val_IoU": 0.5227026873164706, + "eval_custom_ui_val_MAE_all": 0.03009833147128423, + "eval_custom_ui_val_MAE_h": 0.017143823982526857, + "eval_custom_ui_val_MAE_w": 0.038110896220637694, + "eval_custom_ui_val_MAE_x_boxes": 0.03446255738122596, + "eval_custom_ui_val_MAE_y_boxes": 0.015643620294415288, + "eval_custom_ui_val_inside_bbox": 0.7527006202273898, + "eval_custom_ui_val_loss": 1.198036789894104, + "eval_custom_ui_val_loss_ce": 0.000274412335582181, + "eval_custom_ui_val_loss_iou": 0.5103217230902778, + "eval_custom_ui_val_loss_num": 0.028072569105360243, + "eval_custom_ui_val_loss_xval": 1.1607801649305556, + "eval_custom_ui_val_runtime": 60.3528, + "eval_custom_ui_val_samples_per_second": 4.391, + "eval_custom_ui_val_steps_per_second": 0.149, + "num_input_tokens_seen": 182075240, + "step": 3250 + }, + { + "epoch": 7.2383073496659245, + "loss": 0.9084522724151611, + "loss_ce": 0.00024910393403843045, + "loss_iou": 0.396484375, + "loss_num": 0.023193359375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 182075240, + "step": 3250 + }, + { + "epoch": 7.240534521158129, + "grad_norm": 24.0264892578125, + "learning_rate": 1e-06, + "loss": 0.7723, + "num_input_tokens_seen": 182134612, + "step": 3251 + }, + { + "epoch": 7.240534521158129, + "loss": 0.7384548187255859, + "loss_ce": 0.00017352268332615495, + "loss_iou": 0.291015625, + "loss_num": 0.03125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 182134612, + "step": 3251 + }, + { + "epoch": 7.242761692650334, + "grad_norm": 17.387958526611328, + "learning_rate": 1e-06, + "loss": 0.4481, + "num_input_tokens_seen": 182192124, + "step": 3252 + }, + { + "epoch": 7.242761692650334, + "loss": 0.40355396270751953, + "loss_ce": 0.004872324876487255, + "loss_iou": 0.171875, + "loss_num": 0.01104736328125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 182192124, + "step": 3252 + }, + { + "epoch": 7.244988864142539, + "grad_norm": 26.076507568359375, + "learning_rate": 1e-06, + "loss": 0.6101, + "num_input_tokens_seen": 182248140, + "step": 3253 + }, + { + "epoch": 7.244988864142539, + "loss": 0.5168513059616089, + "loss_ce": 0.0002497411041986197, + "loss_iou": 0.220703125, + "loss_num": 0.01513671875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 182248140, + "step": 3253 + }, + { + "epoch": 7.247216035634744, + "grad_norm": 15.936118125915527, + "learning_rate": 1e-06, + "loss": 0.6972, + "num_input_tokens_seen": 182302560, + "step": 3254 + }, + { + "epoch": 7.247216035634744, + "loss": 0.6615881323814392, + "loss_ce": 0.00027220824267715216, + "loss_iou": 0.251953125, + "loss_num": 0.031982421875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 182302560, + "step": 3254 + }, + { + "epoch": 7.249443207126949, + "grad_norm": 21.23505210876465, + "learning_rate": 1e-06, + "loss": 0.6678, + "num_input_tokens_seen": 182359212, + "step": 3255 + }, + { + "epoch": 7.249443207126949, + "loss": 0.6819499135017395, + "loss_ce": 0.00024824903812259436, + "loss_iou": 0.263671875, + "loss_num": 0.030517578125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 182359212, + "step": 3255 + }, + { + "epoch": 7.251670378619154, + "grad_norm": 14.325440406799316, + "learning_rate": 1e-06, + "loss": 0.6858, + "num_input_tokens_seen": 182412908, + "step": 3256 + }, + { + "epoch": 7.251670378619154, + "loss": 0.6170239448547363, + "loss_ce": 0.0008129666093736887, + "loss_iou": 0.251953125, + "loss_num": 0.022705078125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 182412908, + "step": 3256 + }, + { + "epoch": 7.2538975501113585, + "grad_norm": 16.26470184326172, + "learning_rate": 1e-06, + "loss": 0.9041, + "num_input_tokens_seen": 182471924, + "step": 3257 + }, + { + "epoch": 7.2538975501113585, + "loss": 0.7431246042251587, + "loss_ce": 0.00044888071715831757, + "loss_iou": 0.310546875, + "loss_num": 0.0240478515625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 182471924, + "step": 3257 + }, + { + "epoch": 7.256124721603563, + "grad_norm": 18.07449722290039, + "learning_rate": 1e-06, + "loss": 0.5345, + "num_input_tokens_seen": 182526908, + "step": 3258 + }, + { + "epoch": 7.256124721603563, + "loss": 0.549538791179657, + "loss_ce": 0.00022235384676605463, + "loss_iou": 0.2421875, + "loss_num": 0.012939453125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 182526908, + "step": 3258 + }, + { + "epoch": 7.258351893095768, + "grad_norm": 15.7253999710083, + "learning_rate": 1e-06, + "loss": 0.6923, + "num_input_tokens_seen": 182583592, + "step": 3259 + }, + { + "epoch": 7.258351893095768, + "loss": 0.9013206958770752, + "loss_ce": 0.001052145496942103, + "loss_iou": 0.4140625, + "loss_num": 0.01446533203125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 182583592, + "step": 3259 + }, + { + "epoch": 7.260579064587973, + "grad_norm": 20.088714599609375, + "learning_rate": 1e-06, + "loss": 0.535, + "num_input_tokens_seen": 182641252, + "step": 3260 + }, + { + "epoch": 7.260579064587973, + "loss": 0.5077439546585083, + "loss_ce": 0.00017562185530550778, + "loss_iou": 0.228515625, + "loss_num": 0.01019287109375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 182641252, + "step": 3260 + }, + { + "epoch": 7.262806236080178, + "grad_norm": 15.09283447265625, + "learning_rate": 1e-06, + "loss": 0.5227, + "num_input_tokens_seen": 182697120, + "step": 3261 + }, + { + "epoch": 7.262806236080178, + "loss": 0.43932655453681946, + "loss_ce": 0.00017860504158306867, + "loss_iou": 0.1826171875, + "loss_num": 0.01470947265625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 182697120, + "step": 3261 + }, + { + "epoch": 7.265033407572383, + "grad_norm": 19.676998138427734, + "learning_rate": 1e-06, + "loss": 0.6918, + "num_input_tokens_seen": 182752892, + "step": 3262 + }, + { + "epoch": 7.265033407572383, + "loss": 0.630585789680481, + "loss_ce": 0.00021469607600010931, + "loss_iou": 0.275390625, + "loss_num": 0.016357421875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 182752892, + "step": 3262 + }, + { + "epoch": 7.267260579064588, + "grad_norm": 110.68122863769531, + "learning_rate": 1e-06, + "loss": 0.7325, + "num_input_tokens_seen": 182810764, + "step": 3263 + }, + { + "epoch": 7.267260579064588, + "loss": 0.7132278084754944, + "loss_ce": 0.0002150905056623742, + "loss_iou": 0.310546875, + "loss_num": 0.01806640625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 182810764, + "step": 3263 + }, + { + "epoch": 7.2694877505567925, + "grad_norm": 23.634044647216797, + "learning_rate": 1e-06, + "loss": 0.7038, + "num_input_tokens_seen": 182868244, + "step": 3264 + }, + { + "epoch": 7.2694877505567925, + "loss": 0.6935637593269348, + "loss_ce": 0.002157476032152772, + "loss_iou": 0.28515625, + "loss_num": 0.0244140625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 182868244, + "step": 3264 + }, + { + "epoch": 7.271714922048997, + "grad_norm": 23.025278091430664, + "learning_rate": 1e-06, + "loss": 0.5485, + "num_input_tokens_seen": 182924824, + "step": 3265 + }, + { + "epoch": 7.271714922048997, + "loss": 0.5591219067573547, + "loss_ce": 0.00028401942108757794, + "loss_iou": 0.2578125, + "loss_num": 0.00860595703125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 182924824, + "step": 3265 + }, + { + "epoch": 7.273942093541203, + "grad_norm": 15.47992992401123, + "learning_rate": 1e-06, + "loss": 0.6323, + "num_input_tokens_seen": 182979672, + "step": 3266 + }, + { + "epoch": 7.273942093541203, + "loss": 0.7570323944091797, + "loss_ce": 0.00019647592853289098, + "loss_iou": 0.31640625, + "loss_num": 0.0252685546875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 182979672, + "step": 3266 + }, + { + "epoch": 7.276169265033408, + "grad_norm": 19.16633415222168, + "learning_rate": 1e-06, + "loss": 0.6991, + "num_input_tokens_seen": 183035308, + "step": 3267 + }, + { + "epoch": 7.276169265033408, + "loss": 0.8601139783859253, + "loss_ce": 0.00037270993925631046, + "loss_iou": 0.34765625, + "loss_num": 0.03271484375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 183035308, + "step": 3267 + }, + { + "epoch": 7.278396436525613, + "grad_norm": 17.217559814453125, + "learning_rate": 1e-06, + "loss": 0.8387, + "num_input_tokens_seen": 183093172, + "step": 3268 + }, + { + "epoch": 7.278396436525613, + "loss": 0.6842477321624756, + "loss_ce": 0.00016567618877161294, + "loss_iou": 0.2734375, + "loss_num": 0.0274658203125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 183093172, + "step": 3268 + }, + { + "epoch": 7.280623608017818, + "grad_norm": 16.485475540161133, + "learning_rate": 1e-06, + "loss": 0.6404, + "num_input_tokens_seen": 183148780, + "step": 3269 + }, + { + "epoch": 7.280623608017818, + "loss": 0.7775435447692871, + "loss_ce": 0.00019982852973043919, + "loss_iou": 0.328125, + "loss_num": 0.0240478515625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 183148780, + "step": 3269 + }, + { + "epoch": 7.282850779510023, + "grad_norm": 14.587860107421875, + "learning_rate": 1e-06, + "loss": 0.6758, + "num_input_tokens_seen": 183205300, + "step": 3270 + }, + { + "epoch": 7.282850779510023, + "loss": 0.5569312572479248, + "loss_ce": 0.0001685761963017285, + "loss_iou": 0.2197265625, + "loss_num": 0.0233154296875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 183205300, + "step": 3270 + }, + { + "epoch": 7.285077951002227, + "grad_norm": 20.71013832092285, + "learning_rate": 1e-06, + "loss": 0.4886, + "num_input_tokens_seen": 183261900, + "step": 3271 + }, + { + "epoch": 7.285077951002227, + "loss": 0.45970138907432556, + "loss_ce": 0.00022872036788612604, + "loss_iou": 0.1806640625, + "loss_num": 0.01953125, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 183261900, + "step": 3271 + }, + { + "epoch": 7.287305122494432, + "grad_norm": 15.918169975280762, + "learning_rate": 1e-06, + "loss": 0.8508, + "num_input_tokens_seen": 183317060, + "step": 3272 + }, + { + "epoch": 7.287305122494432, + "loss": 0.6296719312667847, + "loss_ce": 0.00015531133976764977, + "loss_iou": 0.26171875, + "loss_num": 0.0213623046875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 183317060, + "step": 3272 + }, + { + "epoch": 7.289532293986637, + "grad_norm": 13.940874099731445, + "learning_rate": 1e-06, + "loss": 0.6049, + "num_input_tokens_seen": 183372896, + "step": 3273 + }, + { + "epoch": 7.289532293986637, + "loss": 0.6454752087593079, + "loss_ce": 0.00021151437249500304, + "loss_iou": 0.263671875, + "loss_num": 0.0233154296875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 183372896, + "step": 3273 + }, + { + "epoch": 7.291759465478842, + "grad_norm": 13.392332077026367, + "learning_rate": 1e-06, + "loss": 0.5429, + "num_input_tokens_seen": 183431176, + "step": 3274 + }, + { + "epoch": 7.291759465478842, + "loss": 0.5099714398384094, + "loss_ce": 0.00020580792624969035, + "loss_iou": 0.2314453125, + "loss_num": 0.0093994140625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 183431176, + "step": 3274 + }, + { + "epoch": 7.293986636971047, + "grad_norm": 31.767141342163086, + "learning_rate": 1e-06, + "loss": 0.6066, + "num_input_tokens_seen": 183485436, + "step": 3275 + }, + { + "epoch": 7.293986636971047, + "loss": 0.7388736605644226, + "loss_ce": 0.00022620504023507237, + "loss_iou": 0.287109375, + "loss_num": 0.03271484375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 183485436, + "step": 3275 + }, + { + "epoch": 7.296213808463252, + "grad_norm": 27.66456413269043, + "learning_rate": 1e-06, + "loss": 0.6369, + "num_input_tokens_seen": 183542084, + "step": 3276 + }, + { + "epoch": 7.296213808463252, + "loss": 0.4608932137489319, + "loss_ce": 0.00019985750259365886, + "loss_iou": 0.203125, + "loss_num": 0.010986328125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 183542084, + "step": 3276 + }, + { + "epoch": 7.298440979955457, + "grad_norm": 23.609827041625977, + "learning_rate": 1e-06, + "loss": 0.7595, + "num_input_tokens_seen": 183597960, + "step": 3277 + }, + { + "epoch": 7.298440979955457, + "loss": 0.683929443359375, + "loss_ce": 0.0003357165842317045, + "loss_iou": 0.3046875, + "loss_num": 0.01446533203125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 183597960, + "step": 3277 + }, + { + "epoch": 7.3006681514476615, + "grad_norm": 19.84480094909668, + "learning_rate": 1e-06, + "loss": 0.5777, + "num_input_tokens_seen": 183655488, + "step": 3278 + }, + { + "epoch": 7.3006681514476615, + "loss": 0.7001070976257324, + "loss_ce": 0.00015596086450386792, + "loss_iou": 0.3046875, + "loss_num": 0.017822265625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 183655488, + "step": 3278 + }, + { + "epoch": 7.302895322939866, + "grad_norm": 20.375669479370117, + "learning_rate": 1e-06, + "loss": 0.6575, + "num_input_tokens_seen": 183712908, + "step": 3279 + }, + { + "epoch": 7.302895322939866, + "loss": 0.7356512546539307, + "loss_ce": 0.00029969203751534224, + "loss_iou": 0.30859375, + "loss_num": 0.0235595703125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 183712908, + "step": 3279 + }, + { + "epoch": 7.305122494432071, + "grad_norm": 24.645275115966797, + "learning_rate": 1e-06, + "loss": 0.6105, + "num_input_tokens_seen": 183770608, + "step": 3280 + }, + { + "epoch": 7.305122494432071, + "loss": 0.6635428667068481, + "loss_ce": 0.0002127783081959933, + "loss_iou": 0.30078125, + "loss_num": 0.01239013671875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 183770608, + "step": 3280 + }, + { + "epoch": 7.307349665924276, + "grad_norm": 14.512449264526367, + "learning_rate": 1e-06, + "loss": 0.7044, + "num_input_tokens_seen": 183825576, + "step": 3281 + }, + { + "epoch": 7.307349665924276, + "loss": 0.9417954087257385, + "loss_ce": 0.00038921748637221754, + "loss_iou": 0.384765625, + "loss_num": 0.0341796875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 183825576, + "step": 3281 + }, + { + "epoch": 7.309576837416481, + "grad_norm": 19.870107650756836, + "learning_rate": 1e-06, + "loss": 0.6247, + "num_input_tokens_seen": 183881588, + "step": 3282 + }, + { + "epoch": 7.309576837416481, + "loss": 0.8251048922538757, + "loss_ce": 0.00015370306209661067, + "loss_iou": 0.345703125, + "loss_num": 0.026611328125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 183881588, + "step": 3282 + }, + { + "epoch": 7.311804008908686, + "grad_norm": 55.68107604980469, + "learning_rate": 1e-06, + "loss": 0.5833, + "num_input_tokens_seen": 183940448, + "step": 3283 + }, + { + "epoch": 7.311804008908686, + "loss": 0.564154326915741, + "loss_ce": 0.00018950334924738854, + "loss_iou": 0.244140625, + "loss_num": 0.0150146484375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 183940448, + "step": 3283 + }, + { + "epoch": 7.314031180400891, + "grad_norm": 22.890865325927734, + "learning_rate": 1e-06, + "loss": 0.5107, + "num_input_tokens_seen": 183997440, + "step": 3284 + }, + { + "epoch": 7.314031180400891, + "loss": 0.40723344683647156, + "loss_ce": 0.000251030403887853, + "loss_iou": 0.185546875, + "loss_num": 0.00732421875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 183997440, + "step": 3284 + }, + { + "epoch": 7.3162583518930955, + "grad_norm": 38.493743896484375, + "learning_rate": 1e-06, + "loss": 0.7763, + "num_input_tokens_seen": 184054300, + "step": 3285 + }, + { + "epoch": 7.3162583518930955, + "loss": 0.7445439100265503, + "loss_ce": 0.00015913081006146967, + "loss_iou": 0.330078125, + "loss_num": 0.01708984375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 184054300, + "step": 3285 + }, + { + "epoch": 7.3184855233853, + "grad_norm": 12.925439834594727, + "learning_rate": 1e-06, + "loss": 0.4989, + "num_input_tokens_seen": 184109764, + "step": 3286 + }, + { + "epoch": 7.3184855233853, + "loss": 0.5402973294258118, + "loss_ce": 0.00025825732154771686, + "loss_iou": 0.240234375, + "loss_num": 0.01171875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 184109764, + "step": 3286 + }, + { + "epoch": 7.320712694877505, + "grad_norm": 14.311683654785156, + "learning_rate": 1e-06, + "loss": 0.7607, + "num_input_tokens_seen": 184166476, + "step": 3287 + }, + { + "epoch": 7.320712694877505, + "loss": 0.5723039507865906, + "loss_ce": 0.0004045310488436371, + "loss_iou": 0.255859375, + "loss_num": 0.01214599609375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 184166476, + "step": 3287 + }, + { + "epoch": 7.32293986636971, + "grad_norm": 21.740203857421875, + "learning_rate": 1e-06, + "loss": 0.7618, + "num_input_tokens_seen": 184220904, + "step": 3288 + }, + { + "epoch": 7.32293986636971, + "loss": 0.7254713773727417, + "loss_ce": 0.00031269137980416417, + "loss_iou": 0.283203125, + "loss_num": 0.031982421875, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 184220904, + "step": 3288 + }, + { + "epoch": 7.325167037861915, + "grad_norm": 20.65580177307129, + "learning_rate": 1e-06, + "loss": 0.7546, + "num_input_tokens_seen": 184277784, + "step": 3289 + }, + { + "epoch": 7.325167037861915, + "loss": 0.6745328903198242, + "loss_ce": 0.0002165070327464491, + "loss_iou": 0.271484375, + "loss_num": 0.0262451171875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 184277784, + "step": 3289 + }, + { + "epoch": 7.327394209354121, + "grad_norm": 22.88839340209961, + "learning_rate": 1e-06, + "loss": 0.6154, + "num_input_tokens_seen": 184334484, + "step": 3290 + }, + { + "epoch": 7.327394209354121, + "loss": 0.6290842294692993, + "loss_ce": 0.0004221061826683581, + "loss_iou": 0.287109375, + "loss_num": 0.01129150390625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 184334484, + "step": 3290 + }, + { + "epoch": 7.3296213808463255, + "grad_norm": 19.395837783813477, + "learning_rate": 1e-06, + "loss": 0.8562, + "num_input_tokens_seen": 184389964, + "step": 3291 + }, + { + "epoch": 7.3296213808463255, + "loss": 0.7204995155334473, + "loss_ce": 0.00028470673714764416, + "loss_iou": 0.30078125, + "loss_num": 0.023681640625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 184389964, + "step": 3291 + }, + { + "epoch": 7.33184855233853, + "grad_norm": 18.469728469848633, + "learning_rate": 1e-06, + "loss": 0.6447, + "num_input_tokens_seen": 184441476, + "step": 3292 + }, + { + "epoch": 7.33184855233853, + "loss": 0.6571934819221497, + "loss_ce": 0.0002110537316184491, + "loss_iou": 0.2890625, + "loss_num": 0.0162353515625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 184441476, + "step": 3292 + }, + { + "epoch": 7.334075723830735, + "grad_norm": 21.28434944152832, + "learning_rate": 1e-06, + "loss": 0.637, + "num_input_tokens_seen": 184499576, + "step": 3293 + }, + { + "epoch": 7.334075723830735, + "loss": 0.7633267045021057, + "loss_ce": 0.00026520711253397167, + "loss_iou": 0.294921875, + "loss_num": 0.034912109375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 184499576, + "step": 3293 + }, + { + "epoch": 7.33630289532294, + "grad_norm": 14.56684398651123, + "learning_rate": 1e-06, + "loss": 0.7462, + "num_input_tokens_seen": 184557016, + "step": 3294 + }, + { + "epoch": 7.33630289532294, + "loss": 0.6941665410995483, + "loss_ce": 0.0001968404685612768, + "loss_iou": 0.30859375, + "loss_num": 0.01519775390625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 184557016, + "step": 3294 + }, + { + "epoch": 7.338530066815145, + "grad_norm": 33.30550003051758, + "learning_rate": 1e-06, + "loss": 0.7178, + "num_input_tokens_seen": 184612596, + "step": 3295 + }, + { + "epoch": 7.338530066815145, + "loss": 0.8899809122085571, + "loss_ce": 0.00033249915577471256, + "loss_iou": 0.349609375, + "loss_num": 0.037841796875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 184612596, + "step": 3295 + }, + { + "epoch": 7.34075723830735, + "grad_norm": 77.08248138427734, + "learning_rate": 1e-06, + "loss": 0.5869, + "num_input_tokens_seen": 184669968, + "step": 3296 + }, + { + "epoch": 7.34075723830735, + "loss": 0.4362117350101471, + "loss_ce": 0.0004207201418466866, + "loss_iou": 0.189453125, + "loss_num": 0.01123046875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 184669968, + "step": 3296 + }, + { + "epoch": 7.342984409799555, + "grad_norm": 17.855411529541016, + "learning_rate": 1e-06, + "loss": 0.606, + "num_input_tokens_seen": 184727100, + "step": 3297 + }, + { + "epoch": 7.342984409799555, + "loss": 0.5758390426635742, + "loss_ce": 0.00015543993504252285, + "loss_iou": 0.2353515625, + "loss_num": 0.0208740234375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 184727100, + "step": 3297 + }, + { + "epoch": 7.3452115812917596, + "grad_norm": 18.587966918945312, + "learning_rate": 1e-06, + "loss": 0.5917, + "num_input_tokens_seen": 184781776, + "step": 3298 + }, + { + "epoch": 7.3452115812917596, + "loss": 0.4892728626728058, + "loss_ce": 0.0003202102379873395, + "loss_iou": 0.2109375, + "loss_num": 0.01318359375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 184781776, + "step": 3298 + }, + { + "epoch": 7.347438752783964, + "grad_norm": 45.11115646362305, + "learning_rate": 1e-06, + "loss": 0.5559, + "num_input_tokens_seen": 184833932, + "step": 3299 + }, + { + "epoch": 7.347438752783964, + "loss": 0.6328990459442139, + "loss_ce": 0.00020860567747149616, + "loss_iou": 0.271484375, + "loss_num": 0.017822265625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 184833932, + "step": 3299 + }, + { + "epoch": 7.349665924276169, + "grad_norm": 22.717544555664062, + "learning_rate": 1e-06, + "loss": 0.5208, + "num_input_tokens_seen": 184888688, + "step": 3300 + }, + { + "epoch": 7.349665924276169, + "loss": 0.46978557109832764, + "loss_ce": 0.00018108604126609862, + "loss_iou": 0.2099609375, + "loss_num": 0.00994873046875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 184888688, + "step": 3300 + }, + { + "epoch": 7.351893095768374, + "grad_norm": 20.01310920715332, + "learning_rate": 1e-06, + "loss": 0.5374, + "num_input_tokens_seen": 184943680, + "step": 3301 + }, + { + "epoch": 7.351893095768374, + "loss": 0.451114296913147, + "loss_ce": 0.00018655930762179196, + "loss_iou": 0.1767578125, + "loss_num": 0.01953125, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 184943680, + "step": 3301 + }, + { + "epoch": 7.354120267260579, + "grad_norm": 36.787132263183594, + "learning_rate": 1e-06, + "loss": 0.8746, + "num_input_tokens_seen": 184996220, + "step": 3302 + }, + { + "epoch": 7.354120267260579, + "loss": 0.8494809865951538, + "loss_ce": 0.0003598902840167284, + "loss_iou": 0.33984375, + "loss_num": 0.033447265625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 184996220, + "step": 3302 + }, + { + "epoch": 7.356347438752784, + "grad_norm": 22.735063552856445, + "learning_rate": 1e-06, + "loss": 0.7577, + "num_input_tokens_seen": 185047952, + "step": 3303 + }, + { + "epoch": 7.356347438752784, + "loss": 0.5736503601074219, + "loss_ce": 0.00016400867025367916, + "loss_iou": 0.255859375, + "loss_num": 0.01214599609375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 185047952, + "step": 3303 + }, + { + "epoch": 7.358574610244989, + "grad_norm": 15.140356063842773, + "learning_rate": 1e-06, + "loss": 0.6265, + "num_input_tokens_seen": 185104504, + "step": 3304 + }, + { + "epoch": 7.358574610244989, + "loss": 0.8113583922386169, + "loss_ce": 0.00020119798136875033, + "loss_iou": 0.34375, + "loss_num": 0.0247802734375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 185104504, + "step": 3304 + }, + { + "epoch": 7.360801781737194, + "grad_norm": 25.078630447387695, + "learning_rate": 1e-06, + "loss": 0.7108, + "num_input_tokens_seen": 185162056, + "step": 3305 + }, + { + "epoch": 7.360801781737194, + "loss": 0.7138417959213257, + "loss_ce": 0.00021878939878661186, + "loss_iou": 0.3125, + "loss_num": 0.017822265625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 185162056, + "step": 3305 + }, + { + "epoch": 7.363028953229398, + "grad_norm": 21.57823371887207, + "learning_rate": 1e-06, + "loss": 0.7628, + "num_input_tokens_seen": 185218544, + "step": 3306 + }, + { + "epoch": 7.363028953229398, + "loss": 1.0595169067382812, + "loss_ce": 0.00019062710634898394, + "loss_iou": 0.466796875, + "loss_num": 0.0252685546875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 185218544, + "step": 3306 + }, + { + "epoch": 7.365256124721603, + "grad_norm": 17.32356071472168, + "learning_rate": 1e-06, + "loss": 0.574, + "num_input_tokens_seen": 185276144, + "step": 3307 + }, + { + "epoch": 7.365256124721603, + "loss": 0.49967584013938904, + "loss_ce": 0.00016412066179327667, + "loss_iou": 0.21484375, + "loss_num": 0.01397705078125, + "loss_xval": 0.5, + "num_input_tokens_seen": 185276144, + "step": 3307 + }, + { + "epoch": 7.367483296213808, + "grad_norm": 17.607749938964844, + "learning_rate": 1e-06, + "loss": 0.6742, + "num_input_tokens_seen": 185333240, + "step": 3308 + }, + { + "epoch": 7.367483296213808, + "loss": 0.6454328298568726, + "loss_ce": 0.00016912329010665417, + "loss_iou": 0.27734375, + "loss_num": 0.01806640625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 185333240, + "step": 3308 + }, + { + "epoch": 7.369710467706013, + "grad_norm": 16.388914108276367, + "learning_rate": 1e-06, + "loss": 0.5042, + "num_input_tokens_seen": 185390156, + "step": 3309 + }, + { + "epoch": 7.369710467706013, + "loss": 0.4991820454597473, + "loss_ce": 0.00021965075575280935, + "loss_iou": 0.2275390625, + "loss_num": 0.009033203125, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 185390156, + "step": 3309 + }, + { + "epoch": 7.371937639198218, + "grad_norm": 18.82445526123047, + "learning_rate": 1e-06, + "loss": 0.6255, + "num_input_tokens_seen": 185443792, + "step": 3310 + }, + { + "epoch": 7.371937639198218, + "loss": 0.8694823980331421, + "loss_ce": 0.00034184064134024084, + "loss_iou": 0.3671875, + "loss_num": 0.02685546875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 185443792, + "step": 3310 + }, + { + "epoch": 7.374164810690424, + "grad_norm": 23.051231384277344, + "learning_rate": 1e-06, + "loss": 0.5239, + "num_input_tokens_seen": 185500112, + "step": 3311 + }, + { + "epoch": 7.374164810690424, + "loss": 0.6354948282241821, + "loss_ce": 0.00024095734988804907, + "loss_iou": 0.2734375, + "loss_num": 0.01806640625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 185500112, + "step": 3311 + }, + { + "epoch": 7.3763919821826285, + "grad_norm": 12.503715515136719, + "learning_rate": 1e-06, + "loss": 0.6206, + "num_input_tokens_seen": 185556836, + "step": 3312 + }, + { + "epoch": 7.3763919821826285, + "loss": 0.6298484206199646, + "loss_ce": 0.00020972295897081494, + "loss_iou": 0.279296875, + "loss_num": 0.01434326171875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 185556836, + "step": 3312 + }, + { + "epoch": 7.378619153674833, + "grad_norm": 39.04258728027344, + "learning_rate": 1e-06, + "loss": 0.7489, + "num_input_tokens_seen": 185611472, + "step": 3313 + }, + { + "epoch": 7.378619153674833, + "loss": 0.6686286330223083, + "loss_ce": 0.000171576626598835, + "loss_iou": 0.279296875, + "loss_num": 0.0216064453125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 185611472, + "step": 3313 + }, + { + "epoch": 7.380846325167038, + "grad_norm": 20.53398323059082, + "learning_rate": 1e-06, + "loss": 0.5664, + "num_input_tokens_seen": 185664932, + "step": 3314 + }, + { + "epoch": 7.380846325167038, + "loss": 0.5182798504829407, + "loss_ce": 0.00021345698041841388, + "loss_iou": 0.2265625, + "loss_num": 0.01275634765625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 185664932, + "step": 3314 + }, + { + "epoch": 7.383073496659243, + "grad_norm": 17.228731155395508, + "learning_rate": 1e-06, + "loss": 0.7039, + "num_input_tokens_seen": 185719144, + "step": 3315 + }, + { + "epoch": 7.383073496659243, + "loss": 0.6190841197967529, + "loss_ce": 0.00018761484534479678, + "loss_iou": 0.2890625, + "loss_num": 0.00836181640625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 185719144, + "step": 3315 + }, + { + "epoch": 7.385300668151448, + "grad_norm": 25.301191329956055, + "learning_rate": 1e-06, + "loss": 0.6395, + "num_input_tokens_seen": 185773492, + "step": 3316 + }, + { + "epoch": 7.385300668151448, + "loss": 0.483335018157959, + "loss_ce": 0.00018073133833240718, + "loss_iou": 0.1982421875, + "loss_num": 0.017578125, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 185773492, + "step": 3316 + }, + { + "epoch": 7.387527839643653, + "grad_norm": 21.02423095703125, + "learning_rate": 1e-06, + "loss": 0.4614, + "num_input_tokens_seen": 185830376, + "step": 3317 + }, + { + "epoch": 7.387527839643653, + "loss": 0.5402176380157471, + "loss_ce": 0.00017858328646980226, + "loss_iou": 0.2421875, + "loss_num": 0.010986328125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 185830376, + "step": 3317 + }, + { + "epoch": 7.389755011135858, + "grad_norm": 17.576339721679688, + "learning_rate": 1e-06, + "loss": 0.607, + "num_input_tokens_seen": 185884160, + "step": 3318 + }, + { + "epoch": 7.389755011135858, + "loss": 0.7509297728538513, + "loss_ce": 0.00019736467220354825, + "loss_iou": 0.33203125, + "loss_num": 0.0172119140625, + "loss_xval": 0.75, + "num_input_tokens_seen": 185884160, + "step": 3318 + }, + { + "epoch": 7.3919821826280625, + "grad_norm": 26.350475311279297, + "learning_rate": 1e-06, + "loss": 0.5901, + "num_input_tokens_seen": 185939524, + "step": 3319 + }, + { + "epoch": 7.3919821826280625, + "loss": 0.5638967156410217, + "loss_ce": 0.0001760525774443522, + "loss_iou": 0.2373046875, + "loss_num": 0.017822265625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 185939524, + "step": 3319 + }, + { + "epoch": 7.394209354120267, + "grad_norm": 16.871248245239258, + "learning_rate": 1e-06, + "loss": 0.6936, + "num_input_tokens_seen": 185996464, + "step": 3320 + }, + { + "epoch": 7.394209354120267, + "loss": 0.6505089998245239, + "loss_ce": 0.0007286987965926528, + "loss_iou": 0.28125, + "loss_num": 0.01708984375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 185996464, + "step": 3320 + }, + { + "epoch": 7.396436525612472, + "grad_norm": 28.474199295043945, + "learning_rate": 1e-06, + "loss": 0.5359, + "num_input_tokens_seen": 186051964, + "step": 3321 + }, + { + "epoch": 7.396436525612472, + "loss": 0.5825638771057129, + "loss_ce": 0.00016640947433188558, + "loss_iou": 0.25390625, + "loss_num": 0.0146484375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 186051964, + "step": 3321 + }, + { + "epoch": 7.398663697104677, + "grad_norm": 15.175790786743164, + "learning_rate": 1e-06, + "loss": 0.5039, + "num_input_tokens_seen": 186106580, + "step": 3322 + }, + { + "epoch": 7.398663697104677, + "loss": 0.4596654176712036, + "loss_ce": 0.00019277595856692642, + "loss_iou": 0.203125, + "loss_num": 0.0106201171875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 186106580, + "step": 3322 + }, + { + "epoch": 7.400890868596882, + "grad_norm": 20.897014617919922, + "learning_rate": 1e-06, + "loss": 0.6625, + "num_input_tokens_seen": 186163020, + "step": 3323 + }, + { + "epoch": 7.400890868596882, + "loss": 0.5817232131958008, + "loss_ce": 0.00018024235032498837, + "loss_iou": 0.265625, + "loss_num": 0.009765625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 186163020, + "step": 3323 + }, + { + "epoch": 7.403118040089087, + "grad_norm": 17.83477210998535, + "learning_rate": 1e-06, + "loss": 0.5627, + "num_input_tokens_seen": 186218364, + "step": 3324 + }, + { + "epoch": 7.403118040089087, + "loss": 0.580369234085083, + "loss_ce": 0.0002911181654781103, + "loss_iou": 0.2294921875, + "loss_num": 0.0242919921875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 186218364, + "step": 3324 + }, + { + "epoch": 7.405345211581292, + "grad_norm": 18.244247436523438, + "learning_rate": 1e-06, + "loss": 0.7525, + "num_input_tokens_seen": 186271824, + "step": 3325 + }, + { + "epoch": 7.405345211581292, + "loss": 0.5565927028656006, + "loss_ce": 0.00019619996601250023, + "loss_iou": 0.25, + "loss_num": 0.01092529296875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 186271824, + "step": 3325 + }, + { + "epoch": 7.4075723830734965, + "grad_norm": 25.529314041137695, + "learning_rate": 1e-06, + "loss": 0.7204, + "num_input_tokens_seen": 186326604, + "step": 3326 + }, + { + "epoch": 7.4075723830734965, + "loss": 0.7397571802139282, + "loss_ce": 0.0002552796504460275, + "loss_iou": 0.32421875, + "loss_num": 0.0181884765625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 186326604, + "step": 3326 + }, + { + "epoch": 7.409799554565701, + "grad_norm": 15.39941692352295, + "learning_rate": 1e-06, + "loss": 0.562, + "num_input_tokens_seen": 186383040, + "step": 3327 + }, + { + "epoch": 7.409799554565701, + "loss": 0.6352710723876953, + "loss_ce": 0.00026131048798561096, + "loss_iou": 0.27734375, + "loss_num": 0.016357421875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 186383040, + "step": 3327 + }, + { + "epoch": 7.412026726057906, + "grad_norm": 14.803224563598633, + "learning_rate": 1e-06, + "loss": 0.641, + "num_input_tokens_seen": 186435624, + "step": 3328 + }, + { + "epoch": 7.412026726057906, + "loss": 0.512509286403656, + "loss_ce": 0.0005464061396196485, + "loss_iou": 0.2080078125, + "loss_num": 0.0191650390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 186435624, + "step": 3328 + }, + { + "epoch": 7.414253897550111, + "grad_norm": 19.84122085571289, + "learning_rate": 1e-06, + "loss": 0.6622, + "num_input_tokens_seen": 186492116, + "step": 3329 + }, + { + "epoch": 7.414253897550111, + "loss": 0.579806923866272, + "loss_ce": 0.00021710841974709183, + "loss_iou": 0.2490234375, + "loss_num": 0.0162353515625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 186492116, + "step": 3329 + }, + { + "epoch": 7.416481069042316, + "grad_norm": 18.60784149169922, + "learning_rate": 1e-06, + "loss": 0.7456, + "num_input_tokens_seen": 186549676, + "step": 3330 + }, + { + "epoch": 7.416481069042316, + "loss": 0.7443963885307312, + "loss_ce": 0.0002557601546868682, + "loss_iou": 0.302734375, + "loss_num": 0.0274658203125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 186549676, + "step": 3330 + }, + { + "epoch": 7.418708240534521, + "grad_norm": 21.594797134399414, + "learning_rate": 1e-06, + "loss": 0.7149, + "num_input_tokens_seen": 186604212, + "step": 3331 + }, + { + "epoch": 7.418708240534521, + "loss": 0.7223727107048035, + "loss_ce": 0.00020475327619351447, + "loss_iou": 0.314453125, + "loss_num": 0.0189208984375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 186604212, + "step": 3331 + }, + { + "epoch": 7.420935412026726, + "grad_norm": 20.443817138671875, + "learning_rate": 1e-06, + "loss": 0.6048, + "num_input_tokens_seen": 186658836, + "step": 3332 + }, + { + "epoch": 7.420935412026726, + "loss": 0.5924441814422607, + "loss_ce": 0.000281118496786803, + "loss_iou": 0.24609375, + "loss_num": 0.0198974609375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 186658836, + "step": 3332 + }, + { + "epoch": 7.4231625835189305, + "grad_norm": 22.519840240478516, + "learning_rate": 1e-06, + "loss": 0.4982, + "num_input_tokens_seen": 186716272, + "step": 3333 + }, + { + "epoch": 7.4231625835189305, + "loss": 0.5731725096702576, + "loss_ce": 0.0002965159364975989, + "loss_iou": 0.263671875, + "loss_num": 0.00927734375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 186716272, + "step": 3333 + }, + { + "epoch": 7.425389755011135, + "grad_norm": 16.776288986206055, + "learning_rate": 1e-06, + "loss": 0.5875, + "num_input_tokens_seen": 186769964, + "step": 3334 + }, + { + "epoch": 7.425389755011135, + "loss": 0.5091996192932129, + "loss_ce": 0.0001664264709688723, + "loss_iou": 0.205078125, + "loss_num": 0.019775390625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 186769964, + "step": 3334 + }, + { + "epoch": 7.427616926503341, + "grad_norm": 19.484222412109375, + "learning_rate": 1e-06, + "loss": 0.5844, + "num_input_tokens_seen": 186826344, + "step": 3335 + }, + { + "epoch": 7.427616926503341, + "loss": 0.6198055744171143, + "loss_ce": 0.0001766337372828275, + "loss_iou": 0.259765625, + "loss_num": 0.0205078125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 186826344, + "step": 3335 + }, + { + "epoch": 7.429844097995546, + "grad_norm": 17.908937454223633, + "learning_rate": 1e-06, + "loss": 0.624, + "num_input_tokens_seen": 186881588, + "step": 3336 + }, + { + "epoch": 7.429844097995546, + "loss": 0.6411352753639221, + "loss_ce": 0.0002661352336872369, + "loss_iou": 0.279296875, + "loss_num": 0.0164794921875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 186881588, + "step": 3336 + }, + { + "epoch": 7.432071269487751, + "grad_norm": 24.69992446899414, + "learning_rate": 1e-06, + "loss": 0.5701, + "num_input_tokens_seen": 186939068, + "step": 3337 + }, + { + "epoch": 7.432071269487751, + "loss": 0.5966298580169678, + "loss_ce": 0.00019431284454185516, + "loss_iou": 0.2392578125, + "loss_num": 0.0235595703125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 186939068, + "step": 3337 + }, + { + "epoch": 7.434298440979956, + "grad_norm": 14.345844268798828, + "learning_rate": 1e-06, + "loss": 0.4817, + "num_input_tokens_seen": 186996924, + "step": 3338 + }, + { + "epoch": 7.434298440979956, + "loss": 0.6793899536132812, + "loss_ce": 0.00019069564586970955, + "loss_iou": 0.296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 186996924, + "step": 3338 + }, + { + "epoch": 7.436525612472161, + "grad_norm": 14.285273551940918, + "learning_rate": 1e-06, + "loss": 0.5295, + "num_input_tokens_seen": 187053760, + "step": 3339 + }, + { + "epoch": 7.436525612472161, + "loss": 0.7244080305099487, + "loss_ce": 0.0001648097822908312, + "loss_iou": 0.3125, + "loss_num": 0.0201416015625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 187053760, + "step": 3339 + }, + { + "epoch": 7.4387527839643655, + "grad_norm": 16.611595153808594, + "learning_rate": 1e-06, + "loss": 0.8745, + "num_input_tokens_seen": 187108896, + "step": 3340 + }, + { + "epoch": 7.4387527839643655, + "loss": 1.1862220764160156, + "loss_ce": 0.0001869781408458948, + "loss_iou": 0.498046875, + "loss_num": 0.037353515625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 187108896, + "step": 3340 + }, + { + "epoch": 7.44097995545657, + "grad_norm": 22.56696891784668, + "learning_rate": 1e-06, + "loss": 0.6369, + "num_input_tokens_seen": 187166280, + "step": 3341 + }, + { + "epoch": 7.44097995545657, + "loss": 0.7133273482322693, + "loss_ce": 0.00019260836415924132, + "loss_iou": 0.298828125, + "loss_num": 0.022705078125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 187166280, + "step": 3341 + }, + { + "epoch": 7.443207126948775, + "grad_norm": 31.73601531982422, + "learning_rate": 1e-06, + "loss": 0.7095, + "num_input_tokens_seen": 187223724, + "step": 3342 + }, + { + "epoch": 7.443207126948775, + "loss": 0.5013738870620728, + "loss_ce": 0.00027527124620974064, + "loss_iou": 0.2109375, + "loss_num": 0.0157470703125, + "loss_xval": 0.5, + "num_input_tokens_seen": 187223724, + "step": 3342 + }, + { + "epoch": 7.44543429844098, + "grad_norm": 15.37706184387207, + "learning_rate": 1e-06, + "loss": 0.457, + "num_input_tokens_seen": 187281536, + "step": 3343 + }, + { + "epoch": 7.44543429844098, + "loss": 0.43623799085617065, + "loss_ce": 0.00020283354388084263, + "loss_iou": 0.185546875, + "loss_num": 0.01287841796875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 187281536, + "step": 3343 + }, + { + "epoch": 7.447661469933185, + "grad_norm": 14.091643333435059, + "learning_rate": 1e-06, + "loss": 0.7446, + "num_input_tokens_seen": 187339076, + "step": 3344 + }, + { + "epoch": 7.447661469933185, + "loss": 0.8033976554870605, + "loss_ce": 0.00026650671497918665, + "loss_iou": 0.318359375, + "loss_num": 0.033203125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 187339076, + "step": 3344 + }, + { + "epoch": 7.44988864142539, + "grad_norm": 17.009387969970703, + "learning_rate": 1e-06, + "loss": 0.6225, + "num_input_tokens_seen": 187395496, + "step": 3345 + }, + { + "epoch": 7.44988864142539, + "loss": 0.7989903688430786, + "loss_ce": 0.001260860008187592, + "loss_iou": 0.28125, + "loss_num": 0.046875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 187395496, + "step": 3345 + }, + { + "epoch": 7.452115812917595, + "grad_norm": 16.3013916015625, + "learning_rate": 1e-06, + "loss": 0.7418, + "num_input_tokens_seen": 187452228, + "step": 3346 + }, + { + "epoch": 7.452115812917595, + "loss": 0.7919469475746155, + "loss_ce": 0.0001989098673220724, + "loss_iou": 0.3359375, + "loss_num": 0.024169921875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 187452228, + "step": 3346 + }, + { + "epoch": 7.4543429844097995, + "grad_norm": 25.37590217590332, + "learning_rate": 1e-06, + "loss": 0.6981, + "num_input_tokens_seen": 187507640, + "step": 3347 + }, + { + "epoch": 7.4543429844097995, + "loss": 0.5329313278198242, + "loss_ce": 0.0007047686958685517, + "loss_iou": 0.2314453125, + "loss_num": 0.013916015625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 187507640, + "step": 3347 + }, + { + "epoch": 7.456570155902004, + "grad_norm": 15.719435691833496, + "learning_rate": 1e-06, + "loss": 0.6663, + "num_input_tokens_seen": 187564908, + "step": 3348 + }, + { + "epoch": 7.456570155902004, + "loss": 0.7233518362045288, + "loss_ce": 0.00020732844131998718, + "loss_iou": 0.30078125, + "loss_num": 0.0240478515625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 187564908, + "step": 3348 + }, + { + "epoch": 7.458797327394209, + "grad_norm": 14.450872421264648, + "learning_rate": 1e-06, + "loss": 0.6857, + "num_input_tokens_seen": 187616496, + "step": 3349 + }, + { + "epoch": 7.458797327394209, + "loss": 0.5809314250946045, + "loss_ce": 0.0006091895047575235, + "loss_iou": 0.25390625, + "loss_num": 0.0147705078125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 187616496, + "step": 3349 + }, + { + "epoch": 7.461024498886414, + "grad_norm": 16.598936080932617, + "learning_rate": 1e-06, + "loss": 0.6052, + "num_input_tokens_seen": 187670232, + "step": 3350 + }, + { + "epoch": 7.461024498886414, + "loss": 0.479515016078949, + "loss_ce": 0.00017542547720950097, + "loss_iou": 0.20703125, + "loss_num": 0.0130615234375, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 187670232, + "step": 3350 + }, + { + "epoch": 7.463251670378619, + "grad_norm": 29.393518447875977, + "learning_rate": 1e-06, + "loss": 0.6938, + "num_input_tokens_seen": 187724992, + "step": 3351 + }, + { + "epoch": 7.463251670378619, + "loss": 0.8341479897499084, + "loss_ce": 0.00040772499050945044, + "loss_iou": 0.375, + "loss_num": 0.0167236328125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 187724992, + "step": 3351 + }, + { + "epoch": 7.465478841870824, + "grad_norm": 17.579273223876953, + "learning_rate": 1e-06, + "loss": 0.6167, + "num_input_tokens_seen": 187782612, + "step": 3352 + }, + { + "epoch": 7.465478841870824, + "loss": 0.7424448132514954, + "loss_ce": 0.0002573099918663502, + "loss_iou": 0.31640625, + "loss_num": 0.021728515625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 187782612, + "step": 3352 + }, + { + "epoch": 7.467706013363029, + "grad_norm": 20.376893997192383, + "learning_rate": 1e-06, + "loss": 0.6322, + "num_input_tokens_seen": 187838060, + "step": 3353 + }, + { + "epoch": 7.467706013363029, + "loss": 0.6124618649482727, + "loss_ce": 0.00040132386493496597, + "loss_iou": 0.2578125, + "loss_num": 0.019287109375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 187838060, + "step": 3353 + }, + { + "epoch": 7.4699331848552335, + "grad_norm": 27.1313533782959, + "learning_rate": 1e-06, + "loss": 0.8042, + "num_input_tokens_seen": 187891800, + "step": 3354 + }, + { + "epoch": 7.4699331848552335, + "loss": 0.8244494795799255, + "loss_ce": 0.00023073022020980716, + "loss_iou": 0.3828125, + "loss_num": 0.01165771484375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 187891800, + "step": 3354 + }, + { + "epoch": 7.472160356347438, + "grad_norm": 17.113248825073242, + "learning_rate": 1e-06, + "loss": 0.5967, + "num_input_tokens_seen": 187949628, + "step": 3355 + }, + { + "epoch": 7.472160356347438, + "loss": 0.414481520652771, + "loss_ce": 0.00017491632024757564, + "loss_iou": 0.1767578125, + "loss_num": 0.0123291015625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 187949628, + "step": 3355 + }, + { + "epoch": 7.474387527839644, + "grad_norm": 34.95979309082031, + "learning_rate": 1e-06, + "loss": 0.7369, + "num_input_tokens_seen": 188006312, + "step": 3356 + }, + { + "epoch": 7.474387527839644, + "loss": 0.7557699680328369, + "loss_ce": 0.00030729081481695175, + "loss_iou": 0.298828125, + "loss_num": 0.031982421875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 188006312, + "step": 3356 + }, + { + "epoch": 7.476614699331849, + "grad_norm": 18.19429588317871, + "learning_rate": 1e-06, + "loss": 0.7208, + "num_input_tokens_seen": 188062748, + "step": 3357 + }, + { + "epoch": 7.476614699331849, + "loss": 0.5873679518699646, + "loss_ce": 0.00027081643929705024, + "loss_iou": 0.25, + "loss_num": 0.01708984375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 188062748, + "step": 3357 + }, + { + "epoch": 7.478841870824054, + "grad_norm": 25.23208236694336, + "learning_rate": 1e-06, + "loss": 0.462, + "num_input_tokens_seen": 188118952, + "step": 3358 + }, + { + "epoch": 7.478841870824054, + "loss": 0.6015306711196899, + "loss_ce": 0.00021232501603662968, + "loss_iou": 0.26171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 188118952, + "step": 3358 + }, + { + "epoch": 7.481069042316259, + "grad_norm": 13.118025779724121, + "learning_rate": 1e-06, + "loss": 0.7661, + "num_input_tokens_seen": 188172408, + "step": 3359 + }, + { + "epoch": 7.481069042316259, + "loss": 0.7937259674072266, + "loss_ce": 0.00026893243193626404, + "loss_iou": 0.33984375, + "loss_num": 0.0224609375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 188172408, + "step": 3359 + }, + { + "epoch": 7.4832962138084635, + "grad_norm": 21.09063148498535, + "learning_rate": 1e-06, + "loss": 0.655, + "num_input_tokens_seen": 188228848, + "step": 3360 + }, + { + "epoch": 7.4832962138084635, + "loss": 0.7106525897979736, + "loss_ce": 0.000203342831810005, + "loss_iou": 0.271484375, + "loss_num": 0.03369140625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 188228848, + "step": 3360 + }, + { + "epoch": 7.485523385300668, + "grad_norm": 19.18509292602539, + "learning_rate": 1e-06, + "loss": 0.737, + "num_input_tokens_seen": 188284528, + "step": 3361 + }, + { + "epoch": 7.485523385300668, + "loss": 0.8408026695251465, + "loss_ce": 0.00022645562421530485, + "loss_iou": 0.359375, + "loss_num": 0.024658203125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 188284528, + "step": 3361 + }, + { + "epoch": 7.487750556792873, + "grad_norm": 21.143802642822266, + "learning_rate": 1e-06, + "loss": 0.6618, + "num_input_tokens_seen": 188340712, + "step": 3362 + }, + { + "epoch": 7.487750556792873, + "loss": 0.7464203238487244, + "loss_ce": 0.00020453488104976714, + "loss_iou": 0.33203125, + "loss_num": 0.016357421875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 188340712, + "step": 3362 + }, + { + "epoch": 7.489977728285078, + "grad_norm": 25.710002899169922, + "learning_rate": 1e-06, + "loss": 0.7553, + "num_input_tokens_seen": 188395496, + "step": 3363 + }, + { + "epoch": 7.489977728285078, + "loss": 0.898524820804596, + "loss_ce": 0.00033146512578241527, + "loss_iou": 0.373046875, + "loss_num": 0.030517578125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 188395496, + "step": 3363 + }, + { + "epoch": 7.492204899777283, + "grad_norm": 18.37621307373047, + "learning_rate": 1e-06, + "loss": 0.6617, + "num_input_tokens_seen": 188451292, + "step": 3364 + }, + { + "epoch": 7.492204899777283, + "loss": 0.6165107488632202, + "loss_ce": 0.00029981633997522295, + "loss_iou": 0.25, + "loss_num": 0.0234375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 188451292, + "step": 3364 + }, + { + "epoch": 7.494432071269488, + "grad_norm": 23.23649787902832, + "learning_rate": 1e-06, + "loss": 0.723, + "num_input_tokens_seen": 188510116, + "step": 3365 + }, + { + "epoch": 7.494432071269488, + "loss": 0.5230201482772827, + "loss_ce": 0.00019301672000437975, + "loss_iou": 0.232421875, + "loss_num": 0.0115966796875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 188510116, + "step": 3365 + }, + { + "epoch": 7.496659242761693, + "grad_norm": 19.028898239135742, + "learning_rate": 1e-06, + "loss": 0.6373, + "num_input_tokens_seen": 188566168, + "step": 3366 + }, + { + "epoch": 7.496659242761693, + "loss": 0.5705129504203796, + "loss_ce": 0.00020043338008690625, + "loss_iou": 0.255859375, + "loss_num": 0.01165771484375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 188566168, + "step": 3366 + }, + { + "epoch": 7.498886414253898, + "grad_norm": 60.06413650512695, + "learning_rate": 1e-06, + "loss": 0.6584, + "num_input_tokens_seen": 188622556, + "step": 3367 + }, + { + "epoch": 7.498886414253898, + "loss": 0.5773051977157593, + "loss_ce": 0.00021782593103125691, + "loss_iou": 0.248046875, + "loss_num": 0.01611328125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 188622556, + "step": 3367 + }, + { + "epoch": 7.501113585746102, + "grad_norm": 29.536270141601562, + "learning_rate": 1e-06, + "loss": 0.67, + "num_input_tokens_seen": 188675804, + "step": 3368 + }, + { + "epoch": 7.501113585746102, + "loss": 0.7318795919418335, + "loss_ce": 0.0001901389186969027, + "loss_iou": 0.326171875, + "loss_num": 0.015869140625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 188675804, + "step": 3368 + }, + { + "epoch": 7.503340757238307, + "grad_norm": 19.667911529541016, + "learning_rate": 1e-06, + "loss": 0.7901, + "num_input_tokens_seen": 188729532, + "step": 3369 + }, + { + "epoch": 7.503340757238307, + "loss": 0.6141993999481201, + "loss_ce": 0.00018573581473901868, + "loss_iou": 0.275390625, + "loss_num": 0.0123291015625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 188729532, + "step": 3369 + }, + { + "epoch": 7.505567928730512, + "grad_norm": 19.440563201904297, + "learning_rate": 1e-06, + "loss": 0.6208, + "num_input_tokens_seen": 188784192, + "step": 3370 + }, + { + "epoch": 7.505567928730512, + "loss": 0.608165979385376, + "loss_ce": 0.0002558160631451756, + "loss_iou": 0.259765625, + "loss_num": 0.0177001953125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 188784192, + "step": 3370 + }, + { + "epoch": 7.507795100222717, + "grad_norm": 20.731998443603516, + "learning_rate": 1e-06, + "loss": 0.6593, + "num_input_tokens_seen": 188838624, + "step": 3371 + }, + { + "epoch": 7.507795100222717, + "loss": 0.6856630444526672, + "loss_ce": 0.00023825542302802205, + "loss_iou": 0.322265625, + "loss_num": 0.008056640625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 188838624, + "step": 3371 + }, + { + "epoch": 7.510022271714922, + "grad_norm": 21.571271896362305, + "learning_rate": 1e-06, + "loss": 0.6617, + "num_input_tokens_seen": 188896352, + "step": 3372 + }, + { + "epoch": 7.510022271714922, + "loss": 0.7389297485351562, + "loss_ce": 0.0001602266274858266, + "loss_iou": 0.333984375, + "loss_num": 0.01397705078125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 188896352, + "step": 3372 + }, + { + "epoch": 7.512249443207127, + "grad_norm": 26.354827880859375, + "learning_rate": 1e-06, + "loss": 0.5974, + "num_input_tokens_seen": 188953156, + "step": 3373 + }, + { + "epoch": 7.512249443207127, + "loss": 0.5054886341094971, + "loss_ce": 0.00023960010730661452, + "loss_iou": 0.228515625, + "loss_num": 0.00946044921875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 188953156, + "step": 3373 + }, + { + "epoch": 7.514476614699332, + "grad_norm": 33.86724853515625, + "learning_rate": 1e-06, + "loss": 0.529, + "num_input_tokens_seen": 189007540, + "step": 3374 + }, + { + "epoch": 7.514476614699332, + "loss": 0.34347233176231384, + "loss_ce": 0.0002106213360093534, + "loss_iou": 0.1474609375, + "loss_num": 0.0096435546875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 189007540, + "step": 3374 + }, + { + "epoch": 7.5167037861915365, + "grad_norm": 23.846332550048828, + "learning_rate": 1e-06, + "loss": 0.8868, + "num_input_tokens_seen": 189063288, + "step": 3375 + }, + { + "epoch": 7.5167037861915365, + "loss": 1.2485597133636475, + "loss_ce": 0.00026864674873650074, + "loss_iou": 0.51953125, + "loss_num": 0.041748046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 189063288, + "step": 3375 + }, + { + "epoch": 7.518930957683741, + "grad_norm": 34.920448303222656, + "learning_rate": 1e-06, + "loss": 0.619, + "num_input_tokens_seen": 189121948, + "step": 3376 + }, + { + "epoch": 7.518930957683741, + "loss": 0.4434422254562378, + "loss_ce": 0.0002049226895906031, + "loss_iou": 0.1640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 189121948, + "step": 3376 + }, + { + "epoch": 7.521158129175946, + "grad_norm": 45.45869445800781, + "learning_rate": 1e-06, + "loss": 0.6966, + "num_input_tokens_seen": 189178620, + "step": 3377 + }, + { + "epoch": 7.521158129175946, + "loss": 0.9557803869247437, + "loss_ce": 0.00021394254872575402, + "loss_iou": 0.376953125, + "loss_num": 0.04052734375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 189178620, + "step": 3377 + }, + { + "epoch": 7.523385300668151, + "grad_norm": 21.550317764282227, + "learning_rate": 1e-06, + "loss": 0.5571, + "num_input_tokens_seen": 189234636, + "step": 3378 + }, + { + "epoch": 7.523385300668151, + "loss": 0.6044266819953918, + "loss_ce": 0.00017864728579297662, + "loss_iou": 0.279296875, + "loss_num": 0.00946044921875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 189234636, + "step": 3378 + }, + { + "epoch": 7.525612472160356, + "grad_norm": 17.31205177307129, + "learning_rate": 1e-06, + "loss": 0.5324, + "num_input_tokens_seen": 189293248, + "step": 3379 + }, + { + "epoch": 7.525612472160356, + "loss": 0.5417789220809937, + "loss_ce": 0.00027502982993610203, + "loss_iou": 0.2275390625, + "loss_num": 0.017333984375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 189293248, + "step": 3379 + }, + { + "epoch": 7.527839643652561, + "grad_norm": 18.366722106933594, + "learning_rate": 1e-06, + "loss": 0.6955, + "num_input_tokens_seen": 189351236, + "step": 3380 + }, + { + "epoch": 7.527839643652561, + "loss": 0.6718355417251587, + "loss_ce": 0.00020462839165702462, + "loss_iou": 0.30859375, + "loss_num": 0.01055908203125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 189351236, + "step": 3380 + }, + { + "epoch": 7.5300668151447665, + "grad_norm": 28.49734878540039, + "learning_rate": 1e-06, + "loss": 0.6942, + "num_input_tokens_seen": 189408204, + "step": 3381 + }, + { + "epoch": 7.5300668151447665, + "loss": 0.6963520050048828, + "loss_ce": 0.00018498743884265423, + "loss_iou": 0.306640625, + "loss_num": 0.0162353515625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 189408204, + "step": 3381 + }, + { + "epoch": 7.532293986636971, + "grad_norm": 21.700590133666992, + "learning_rate": 1e-06, + "loss": 0.4877, + "num_input_tokens_seen": 189466496, + "step": 3382 + }, + { + "epoch": 7.532293986636971, + "loss": 0.6716306209564209, + "loss_ce": 0.00024389507598243654, + "loss_iou": 0.28125, + "loss_num": 0.02197265625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 189466496, + "step": 3382 + }, + { + "epoch": 7.534521158129176, + "grad_norm": 18.741180419921875, + "learning_rate": 1e-06, + "loss": 0.6188, + "num_input_tokens_seen": 189524108, + "step": 3383 + }, + { + "epoch": 7.534521158129176, + "loss": 0.4249844551086426, + "loss_ce": 0.0003018215938936919, + "loss_iou": 0.185546875, + "loss_num": 0.0108642578125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 189524108, + "step": 3383 + }, + { + "epoch": 7.536748329621381, + "grad_norm": 18.38628578186035, + "learning_rate": 1e-06, + "loss": 0.5341, + "num_input_tokens_seen": 189580564, + "step": 3384 + }, + { + "epoch": 7.536748329621381, + "loss": 0.6349685192108154, + "loss_ce": 0.00020285190839786083, + "loss_iou": 0.263671875, + "loss_num": 0.0211181640625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 189580564, + "step": 3384 + }, + { + "epoch": 7.538975501113586, + "grad_norm": 17.587261199951172, + "learning_rate": 1e-06, + "loss": 0.5419, + "num_input_tokens_seen": 189633448, + "step": 3385 + }, + { + "epoch": 7.538975501113586, + "loss": 0.590072512626648, + "loss_ce": 0.00022877063020132482, + "loss_iou": 0.2353515625, + "loss_num": 0.023681640625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 189633448, + "step": 3385 + }, + { + "epoch": 7.541202672605791, + "grad_norm": 23.872756958007812, + "learning_rate": 1e-06, + "loss": 0.5987, + "num_input_tokens_seen": 189690868, + "step": 3386 + }, + { + "epoch": 7.541202672605791, + "loss": 0.568671464920044, + "loss_ce": 0.00019003619672730565, + "loss_iou": 0.24609375, + "loss_num": 0.01544189453125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 189690868, + "step": 3386 + }, + { + "epoch": 7.543429844097996, + "grad_norm": 20.22979164123535, + "learning_rate": 1e-06, + "loss": 0.585, + "num_input_tokens_seen": 189746736, + "step": 3387 + }, + { + "epoch": 7.543429844097996, + "loss": 0.5246177911758423, + "loss_ce": 0.00020370190031826496, + "loss_iou": 0.2177734375, + "loss_num": 0.0174560546875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 189746736, + "step": 3387 + }, + { + "epoch": 7.5456570155902005, + "grad_norm": 18.41916275024414, + "learning_rate": 1e-06, + "loss": 0.6426, + "num_input_tokens_seen": 189803012, + "step": 3388 + }, + { + "epoch": 7.5456570155902005, + "loss": 0.621086597442627, + "loss_ce": 0.0002370403817621991, + "loss_iou": 0.28125, + "loss_num": 0.01202392578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 189803012, + "step": 3388 + }, + { + "epoch": 7.547884187082405, + "grad_norm": 189.67298889160156, + "learning_rate": 1e-06, + "loss": 0.5411, + "num_input_tokens_seen": 189859212, + "step": 3389 + }, + { + "epoch": 7.547884187082405, + "loss": 0.5852431654930115, + "loss_ce": 0.0002822045935317874, + "loss_iou": 0.251953125, + "loss_num": 0.01611328125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 189859212, + "step": 3389 + }, + { + "epoch": 7.55011135857461, + "grad_norm": 15.547959327697754, + "learning_rate": 1e-06, + "loss": 0.8417, + "num_input_tokens_seen": 189916504, + "step": 3390 + }, + { + "epoch": 7.55011135857461, + "loss": 0.5157856941223145, + "loss_ce": 0.00022171747696120292, + "loss_iou": 0.2138671875, + "loss_num": 0.0174560546875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 189916504, + "step": 3390 + }, + { + "epoch": 7.552338530066815, + "grad_norm": 26.860252380371094, + "learning_rate": 1e-06, + "loss": 0.7319, + "num_input_tokens_seen": 189973312, + "step": 3391 + }, + { + "epoch": 7.552338530066815, + "loss": 0.7284436225891113, + "loss_ce": 0.00017210739315487444, + "loss_iou": 0.28515625, + "loss_num": 0.031494140625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 189973312, + "step": 3391 + }, + { + "epoch": 7.55456570155902, + "grad_norm": 19.536853790283203, + "learning_rate": 1e-06, + "loss": 0.4387, + "num_input_tokens_seen": 190030136, + "step": 3392 + }, + { + "epoch": 7.55456570155902, + "loss": 0.4148935079574585, + "loss_ce": 0.00022065457596909255, + "loss_iou": 0.173828125, + "loss_num": 0.013427734375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 190030136, + "step": 3392 + }, + { + "epoch": 7.556792873051225, + "grad_norm": 20.840858459472656, + "learning_rate": 1e-06, + "loss": 0.6976, + "num_input_tokens_seen": 190086148, + "step": 3393 + }, + { + "epoch": 7.556792873051225, + "loss": 0.8776832222938538, + "loss_ce": 0.00024186470545828342, + "loss_iou": 0.369140625, + "loss_num": 0.02783203125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 190086148, + "step": 3393 + }, + { + "epoch": 7.55902004454343, + "grad_norm": 16.58432960510254, + "learning_rate": 1e-06, + "loss": 0.736, + "num_input_tokens_seen": 190141764, + "step": 3394 + }, + { + "epoch": 7.55902004454343, + "loss": 0.7458630204200745, + "loss_ce": 0.0002575473044998944, + "loss_iou": 0.291015625, + "loss_num": 0.032470703125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 190141764, + "step": 3394 + }, + { + "epoch": 7.5612472160356345, + "grad_norm": 15.23847770690918, + "learning_rate": 1e-06, + "loss": 0.7859, + "num_input_tokens_seen": 190197356, + "step": 3395 + }, + { + "epoch": 7.5612472160356345, + "loss": 0.8410194516181946, + "loss_ce": 0.00019913521828129888, + "loss_iou": 0.33984375, + "loss_num": 0.032470703125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 190197356, + "step": 3395 + }, + { + "epoch": 7.563474387527839, + "grad_norm": 17.79561424255371, + "learning_rate": 1e-06, + "loss": 0.5632, + "num_input_tokens_seen": 190256636, + "step": 3396 + }, + { + "epoch": 7.563474387527839, + "loss": 0.458886057138443, + "loss_ce": 0.00026789220282807946, + "loss_iou": 0.2158203125, + "loss_num": 0.0054931640625, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 190256636, + "step": 3396 + }, + { + "epoch": 7.565701559020044, + "grad_norm": 15.726015090942383, + "learning_rate": 1e-06, + "loss": 0.4185, + "num_input_tokens_seen": 190311620, + "step": 3397 + }, + { + "epoch": 7.565701559020044, + "loss": 0.4230450987815857, + "loss_ce": 0.0010480263736099005, + "loss_iou": 0.1787109375, + "loss_num": 0.0128173828125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 190311620, + "step": 3397 + }, + { + "epoch": 7.567928730512249, + "grad_norm": 25.931352615356445, + "learning_rate": 1e-06, + "loss": 0.7059, + "num_input_tokens_seen": 190366792, + "step": 3398 + }, + { + "epoch": 7.567928730512249, + "loss": 0.7654964923858643, + "loss_ce": 0.0003598046605475247, + "loss_iou": 0.337890625, + "loss_num": 0.017822265625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 190366792, + "step": 3398 + }, + { + "epoch": 7.570155902004454, + "grad_norm": 21.16999053955078, + "learning_rate": 1e-06, + "loss": 0.7098, + "num_input_tokens_seen": 190422324, + "step": 3399 + }, + { + "epoch": 7.570155902004454, + "loss": 0.48839089274406433, + "loss_ce": 0.0001706905895844102, + "loss_iou": 0.212890625, + "loss_num": 0.01214599609375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 190422324, + "step": 3399 + }, + { + "epoch": 7.57238307349666, + "grad_norm": 23.648658752441406, + "learning_rate": 1e-06, + "loss": 0.4927, + "num_input_tokens_seen": 190480076, + "step": 3400 + }, + { + "epoch": 7.57238307349666, + "loss": 0.5361147522926331, + "loss_ce": 0.00016503711231052876, + "loss_iou": 0.2236328125, + "loss_num": 0.0179443359375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 190480076, + "step": 3400 + }, + { + "epoch": 7.574610244988865, + "grad_norm": 15.166297912597656, + "learning_rate": 1e-06, + "loss": 0.5387, + "num_input_tokens_seen": 190535360, + "step": 3401 + }, + { + "epoch": 7.574610244988865, + "loss": 0.6972156763076782, + "loss_ce": 0.0001941586670000106, + "loss_iou": 0.294921875, + "loss_num": 0.021240234375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 190535360, + "step": 3401 + }, + { + "epoch": 7.5768374164810695, + "grad_norm": 21.948759078979492, + "learning_rate": 1e-06, + "loss": 0.6897, + "num_input_tokens_seen": 190589160, + "step": 3402 + }, + { + "epoch": 7.5768374164810695, + "loss": 0.5853911638259888, + "loss_ce": 0.0001860598858911544, + "loss_iou": 0.26171875, + "loss_num": 0.0126953125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 190589160, + "step": 3402 + }, + { + "epoch": 7.579064587973274, + "grad_norm": 36.27766799926758, + "learning_rate": 1e-06, + "loss": 0.9359, + "num_input_tokens_seen": 190644804, + "step": 3403 + }, + { + "epoch": 7.579064587973274, + "loss": 0.9691004753112793, + "loss_ce": 0.00022840322344563901, + "loss_iou": 0.41015625, + "loss_num": 0.030029296875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 190644804, + "step": 3403 + }, + { + "epoch": 7.581291759465479, + "grad_norm": 27.834016799926758, + "learning_rate": 1e-06, + "loss": 0.6663, + "num_input_tokens_seen": 190702760, + "step": 3404 + }, + { + "epoch": 7.581291759465479, + "loss": 0.7742569446563721, + "loss_ce": 0.000209053119760938, + "loss_iou": 0.345703125, + "loss_num": 0.0167236328125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 190702760, + "step": 3404 + }, + { + "epoch": 7.583518930957684, + "grad_norm": 18.967069625854492, + "learning_rate": 1e-06, + "loss": 0.5449, + "num_input_tokens_seen": 190758396, + "step": 3405 + }, + { + "epoch": 7.583518930957684, + "loss": 0.6806260943412781, + "loss_ce": 0.0002061867417069152, + "loss_iou": 0.296875, + "loss_num": 0.0174560546875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 190758396, + "step": 3405 + }, + { + "epoch": 7.585746102449889, + "grad_norm": 22.21535873413086, + "learning_rate": 1e-06, + "loss": 0.668, + "num_input_tokens_seen": 190810616, + "step": 3406 + }, + { + "epoch": 7.585746102449889, + "loss": 0.7401872873306274, + "loss_ce": 0.0001970837765838951, + "loss_iou": 0.32421875, + "loss_num": 0.01806640625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 190810616, + "step": 3406 + }, + { + "epoch": 7.587973273942094, + "grad_norm": 16.877315521240234, + "learning_rate": 1e-06, + "loss": 0.5691, + "num_input_tokens_seen": 190866152, + "step": 3407 + }, + { + "epoch": 7.587973273942094, + "loss": 0.545766294002533, + "loss_ce": 0.00023407851404044777, + "loss_iou": 0.232421875, + "loss_num": 0.01611328125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 190866152, + "step": 3407 + }, + { + "epoch": 7.590200445434299, + "grad_norm": 29.44357681274414, + "learning_rate": 1e-06, + "loss": 0.5684, + "num_input_tokens_seen": 190924720, + "step": 3408 + }, + { + "epoch": 7.590200445434299, + "loss": 0.6174308061599731, + "loss_ce": 0.0002433276386000216, + "loss_iou": 0.25390625, + "loss_num": 0.022216796875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 190924720, + "step": 3408 + }, + { + "epoch": 7.5924276169265035, + "grad_norm": 22.013757705688477, + "learning_rate": 1e-06, + "loss": 0.6058, + "num_input_tokens_seen": 190982220, + "step": 3409 + }, + { + "epoch": 7.5924276169265035, + "loss": 0.6500968337059021, + "loss_ce": 0.0001944488030858338, + "loss_iou": 0.28515625, + "loss_num": 0.015869140625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 190982220, + "step": 3409 + }, + { + "epoch": 7.594654788418708, + "grad_norm": 19.142648696899414, + "learning_rate": 1e-06, + "loss": 0.6046, + "num_input_tokens_seen": 191039340, + "step": 3410 + }, + { + "epoch": 7.594654788418708, + "loss": 0.35581400990486145, + "loss_ce": 0.0001621594128664583, + "loss_iou": 0.1640625, + "loss_num": 0.00531005859375, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 191039340, + "step": 3410 + }, + { + "epoch": 7.596881959910913, + "grad_norm": 19.273056030273438, + "learning_rate": 1e-06, + "loss": 0.6247, + "num_input_tokens_seen": 191097160, + "step": 3411 + }, + { + "epoch": 7.596881959910913, + "loss": 0.5918649435043335, + "loss_ce": 0.00019010651158168912, + "loss_iou": 0.2490234375, + "loss_num": 0.0186767578125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 191097160, + "step": 3411 + }, + { + "epoch": 7.599109131403118, + "grad_norm": 12.594167709350586, + "learning_rate": 1e-06, + "loss": 0.5376, + "num_input_tokens_seen": 191153944, + "step": 3412 + }, + { + "epoch": 7.599109131403118, + "loss": 0.5286160707473755, + "loss_ce": 0.00017369385750498623, + "loss_iou": 0.2314453125, + "loss_num": 0.01287841796875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 191153944, + "step": 3412 + }, + { + "epoch": 7.601336302895323, + "grad_norm": 21.295249938964844, + "learning_rate": 1e-06, + "loss": 0.7192, + "num_input_tokens_seen": 191210128, + "step": 3413 + }, + { + "epoch": 7.601336302895323, + "loss": 0.8668521642684937, + "loss_ce": 0.00015292070747818798, + "loss_iou": 0.3359375, + "loss_num": 0.038330078125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 191210128, + "step": 3413 + }, + { + "epoch": 7.603563474387528, + "grad_norm": 20.785724639892578, + "learning_rate": 1e-06, + "loss": 0.7145, + "num_input_tokens_seen": 191267092, + "step": 3414 + }, + { + "epoch": 7.603563474387528, + "loss": 0.6779365539550781, + "loss_ce": 0.0002021369436988607, + "loss_iou": 0.27734375, + "loss_num": 0.0247802734375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 191267092, + "step": 3414 + }, + { + "epoch": 7.605790645879733, + "grad_norm": 15.27004337310791, + "learning_rate": 1e-06, + "loss": 0.6446, + "num_input_tokens_seen": 191323100, + "step": 3415 + }, + { + "epoch": 7.605790645879733, + "loss": 0.7496963739395142, + "loss_ce": 0.00018462681327946484, + "loss_iou": 0.328125, + "loss_num": 0.018310546875, + "loss_xval": 0.75, + "num_input_tokens_seen": 191323100, + "step": 3415 + }, + { + "epoch": 7.6080178173719375, + "grad_norm": 34.315914154052734, + "learning_rate": 1e-06, + "loss": 0.5219, + "num_input_tokens_seen": 191379484, + "step": 3416 + }, + { + "epoch": 7.6080178173719375, + "loss": 0.558527410030365, + "loss_ce": 0.00017779026529751718, + "loss_iou": 0.25, + "loss_num": 0.01190185546875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 191379484, + "step": 3416 + }, + { + "epoch": 7.610244988864142, + "grad_norm": 16.876907348632812, + "learning_rate": 1e-06, + "loss": 0.875, + "num_input_tokens_seen": 191434760, + "step": 3417 + }, + { + "epoch": 7.610244988864142, + "loss": 1.0747464895248413, + "loss_ce": 0.00028365751495584846, + "loss_iou": 0.47265625, + "loss_num": 0.02587890625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 191434760, + "step": 3417 + }, + { + "epoch": 7.612472160356347, + "grad_norm": 13.883415222167969, + "learning_rate": 1e-06, + "loss": 0.7571, + "num_input_tokens_seen": 191492628, + "step": 3418 + }, + { + "epoch": 7.612472160356347, + "loss": 0.7818785309791565, + "loss_ce": 0.00026232993695884943, + "loss_iou": 0.28515625, + "loss_num": 0.04248046875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 191492628, + "step": 3418 + }, + { + "epoch": 7.614699331848552, + "grad_norm": 12.954174995422363, + "learning_rate": 1e-06, + "loss": 0.5369, + "num_input_tokens_seen": 191549316, + "step": 3419 + }, + { + "epoch": 7.614699331848552, + "loss": 0.5483337640762329, + "loss_ce": 0.00023806520039215684, + "loss_iou": 0.2236328125, + "loss_num": 0.02001953125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 191549316, + "step": 3419 + }, + { + "epoch": 7.616926503340757, + "grad_norm": 14.275961875915527, + "learning_rate": 1e-06, + "loss": 0.7559, + "num_input_tokens_seen": 191605056, + "step": 3420 + }, + { + "epoch": 7.616926503340757, + "loss": 0.5231179594993591, + "loss_ce": 0.0001687241019681096, + "loss_iou": 0.23046875, + "loss_num": 0.01239013671875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 191605056, + "step": 3420 + }, + { + "epoch": 7.619153674832962, + "grad_norm": 18.700197219848633, + "learning_rate": 1e-06, + "loss": 0.6053, + "num_input_tokens_seen": 191662236, + "step": 3421 + }, + { + "epoch": 7.619153674832962, + "loss": 0.7925351858139038, + "loss_ce": 0.00029885018011555076, + "loss_iou": 0.341796875, + "loss_num": 0.0218505859375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 191662236, + "step": 3421 + }, + { + "epoch": 7.621380846325167, + "grad_norm": 24.45431900024414, + "learning_rate": 1e-06, + "loss": 0.6113, + "num_input_tokens_seen": 191720748, + "step": 3422 + }, + { + "epoch": 7.621380846325167, + "loss": 0.6171329617500305, + "loss_ce": 0.00018957394058816135, + "loss_iou": 0.267578125, + "loss_num": 0.0162353515625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 191720748, + "step": 3422 + }, + { + "epoch": 7.6236080178173715, + "grad_norm": 20.672542572021484, + "learning_rate": 1e-06, + "loss": 0.8138, + "num_input_tokens_seen": 191774632, + "step": 3423 + }, + { + "epoch": 7.6236080178173715, + "loss": 0.8238697052001953, + "loss_ce": 0.00013917218893766403, + "loss_iou": 0.361328125, + "loss_num": 0.0205078125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 191774632, + "step": 3423 + }, + { + "epoch": 7.625835189309576, + "grad_norm": 23.883867263793945, + "learning_rate": 1e-06, + "loss": 0.7723, + "num_input_tokens_seen": 191831068, + "step": 3424 + }, + { + "epoch": 7.625835189309576, + "loss": 0.6435463428497314, + "loss_ce": 0.0002357645716983825, + "loss_iou": 0.283203125, + "loss_num": 0.01495361328125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 191831068, + "step": 3424 + }, + { + "epoch": 7.628062360801781, + "grad_norm": 17.740406036376953, + "learning_rate": 1e-06, + "loss": 0.4917, + "num_input_tokens_seen": 191888888, + "step": 3425 + }, + { + "epoch": 7.628062360801781, + "loss": 0.5690467953681946, + "loss_ce": 0.00019914188305847347, + "loss_iou": 0.2412109375, + "loss_num": 0.0174560546875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 191888888, + "step": 3425 + }, + { + "epoch": 7.630289532293987, + "grad_norm": 34.307125091552734, + "learning_rate": 1e-06, + "loss": 0.518, + "num_input_tokens_seen": 191944488, + "step": 3426 + }, + { + "epoch": 7.630289532293987, + "loss": 0.5533403158187866, + "loss_ce": 0.00036179396556690335, + "loss_iou": 0.20703125, + "loss_num": 0.0277099609375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 191944488, + "step": 3426 + }, + { + "epoch": 7.632516703786192, + "grad_norm": 15.701261520385742, + "learning_rate": 1e-06, + "loss": 0.5397, + "num_input_tokens_seen": 191999632, + "step": 3427 + }, + { + "epoch": 7.632516703786192, + "loss": 0.6593526601791382, + "loss_ce": 0.0004170782049186528, + "loss_iou": 0.28125, + "loss_num": 0.019287109375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 191999632, + "step": 3427 + }, + { + "epoch": 7.634743875278397, + "grad_norm": 20.72135353088379, + "learning_rate": 1e-06, + "loss": 0.6254, + "num_input_tokens_seen": 192055488, + "step": 3428 + }, + { + "epoch": 7.634743875278397, + "loss": 0.6992897987365723, + "loss_ce": 0.00019309617346152663, + "loss_iou": 0.302734375, + "loss_num": 0.0185546875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 192055488, + "step": 3428 + }, + { + "epoch": 7.636971046770602, + "grad_norm": 30.77797508239746, + "learning_rate": 1e-06, + "loss": 0.5652, + "num_input_tokens_seen": 192112044, + "step": 3429 + }, + { + "epoch": 7.636971046770602, + "loss": 0.5464463233947754, + "loss_ce": 0.00018164291395805776, + "loss_iou": 0.2470703125, + "loss_num": 0.0106201171875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 192112044, + "step": 3429 + }, + { + "epoch": 7.639198218262806, + "grad_norm": 17.127363204956055, + "learning_rate": 1e-06, + "loss": 0.942, + "num_input_tokens_seen": 192165832, + "step": 3430 + }, + { + "epoch": 7.639198218262806, + "loss": 0.7089158892631531, + "loss_ce": 0.00017566155293025076, + "loss_iou": 0.294921875, + "loss_num": 0.0240478515625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 192165832, + "step": 3430 + }, + { + "epoch": 7.641425389755011, + "grad_norm": 19.867774963378906, + "learning_rate": 1e-06, + "loss": 0.5599, + "num_input_tokens_seen": 192219304, + "step": 3431 + }, + { + "epoch": 7.641425389755011, + "loss": 0.4574645757675171, + "loss_ce": 0.0001891769061330706, + "loss_iou": 0.2119140625, + "loss_num": 0.0067138671875, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 192219304, + "step": 3431 + }, + { + "epoch": 7.643652561247216, + "grad_norm": 13.734804153442383, + "learning_rate": 1e-06, + "loss": 0.6357, + "num_input_tokens_seen": 192276704, + "step": 3432 + }, + { + "epoch": 7.643652561247216, + "loss": 0.6115978956222534, + "loss_ce": 0.0002697639574762434, + "loss_iou": 0.2451171875, + "loss_num": 0.0245361328125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 192276704, + "step": 3432 + }, + { + "epoch": 7.645879732739421, + "grad_norm": 18.749895095825195, + "learning_rate": 1e-06, + "loss": 0.7049, + "num_input_tokens_seen": 192335292, + "step": 3433 + }, + { + "epoch": 7.645879732739421, + "loss": 0.6146724820137024, + "loss_ce": 0.00017052568728104234, + "loss_iou": 0.26171875, + "loss_num": 0.0185546875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 192335292, + "step": 3433 + }, + { + "epoch": 7.648106904231626, + "grad_norm": 17.647464752197266, + "learning_rate": 1e-06, + "loss": 0.5601, + "num_input_tokens_seen": 192391092, + "step": 3434 + }, + { + "epoch": 7.648106904231626, + "loss": 0.519446611404419, + "loss_ce": 0.00015951888053677976, + "loss_iou": 0.228515625, + "loss_num": 0.0123291015625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 192391092, + "step": 3434 + }, + { + "epoch": 7.650334075723831, + "grad_norm": 17.00111961364746, + "learning_rate": 1e-06, + "loss": 0.5881, + "num_input_tokens_seen": 192447236, + "step": 3435 + }, + { + "epoch": 7.650334075723831, + "loss": 0.4986279606819153, + "loss_ce": 0.00033695262391120195, + "loss_iou": 0.1923828125, + "loss_num": 0.0225830078125, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 192447236, + "step": 3435 + }, + { + "epoch": 7.652561247216036, + "grad_norm": 16.545228958129883, + "learning_rate": 1e-06, + "loss": 0.7619, + "num_input_tokens_seen": 192502120, + "step": 3436 + }, + { + "epoch": 7.652561247216036, + "loss": 0.8621032238006592, + "loss_ce": 0.0002867898438125849, + "loss_iou": 0.353515625, + "loss_num": 0.0306396484375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 192502120, + "step": 3436 + }, + { + "epoch": 7.6547884187082404, + "grad_norm": 26.11361312866211, + "learning_rate": 1e-06, + "loss": 0.7085, + "num_input_tokens_seen": 192556024, + "step": 3437 + }, + { + "epoch": 7.6547884187082404, + "loss": 0.9188729524612427, + "loss_ce": 0.00017173260857816786, + "loss_iou": 0.375, + "loss_num": 0.0341796875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 192556024, + "step": 3437 + }, + { + "epoch": 7.657015590200445, + "grad_norm": 14.627326011657715, + "learning_rate": 1e-06, + "loss": 0.4922, + "num_input_tokens_seen": 192614920, + "step": 3438 + }, + { + "epoch": 7.657015590200445, + "loss": 0.47406378388404846, + "loss_ce": 0.0007971928571350873, + "loss_iou": 0.212890625, + "loss_num": 0.0093994140625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 192614920, + "step": 3438 + }, + { + "epoch": 7.65924276169265, + "grad_norm": 22.492679595947266, + "learning_rate": 1e-06, + "loss": 0.502, + "num_input_tokens_seen": 192670472, + "step": 3439 + }, + { + "epoch": 7.65924276169265, + "loss": 0.49001675844192505, + "loss_ce": 0.000270665914285928, + "loss_iou": 0.220703125, + "loss_num": 0.00982666015625, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 192670472, + "step": 3439 + }, + { + "epoch": 7.661469933184855, + "grad_norm": 25.153066635131836, + "learning_rate": 1e-06, + "loss": 0.6003, + "num_input_tokens_seen": 192724972, + "step": 3440 + }, + { + "epoch": 7.661469933184855, + "loss": 0.6613759994506836, + "loss_ce": 0.0002431726170470938, + "loss_iou": 0.2890625, + "loss_num": 0.0169677734375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 192724972, + "step": 3440 + }, + { + "epoch": 7.66369710467706, + "grad_norm": 23.217100143432617, + "learning_rate": 1e-06, + "loss": 0.4673, + "num_input_tokens_seen": 192780272, + "step": 3441 + }, + { + "epoch": 7.66369710467706, + "loss": 0.49449318647384644, + "loss_ce": 0.00023047745344229043, + "loss_iou": 0.21484375, + "loss_num": 0.0128173828125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 192780272, + "step": 3441 + }, + { + "epoch": 7.665924276169265, + "grad_norm": 19.738222122192383, + "learning_rate": 1e-06, + "loss": 0.5292, + "num_input_tokens_seen": 192834056, + "step": 3442 + }, + { + "epoch": 7.665924276169265, + "loss": 0.4528142213821411, + "loss_ce": 0.0001774845877662301, + "loss_iou": 0.193359375, + "loss_num": 0.01318359375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 192834056, + "step": 3442 + }, + { + "epoch": 7.66815144766147, + "grad_norm": 17.538352966308594, + "learning_rate": 1e-06, + "loss": 0.5892, + "num_input_tokens_seen": 192887184, + "step": 3443 + }, + { + "epoch": 7.66815144766147, + "loss": 0.5604883432388306, + "loss_ce": 0.00018561651813797653, + "loss_iou": 0.2451171875, + "loss_num": 0.01416015625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 192887184, + "step": 3443 + }, + { + "epoch": 7.6703786191536745, + "grad_norm": 20.0687198638916, + "learning_rate": 1e-06, + "loss": 0.6592, + "num_input_tokens_seen": 192944640, + "step": 3444 + }, + { + "epoch": 7.6703786191536745, + "loss": 0.5089391469955444, + "loss_ce": 0.00027216560556553304, + "loss_iou": 0.228515625, + "loss_num": 0.01043701171875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 192944640, + "step": 3444 + }, + { + "epoch": 7.67260579064588, + "grad_norm": 15.960465431213379, + "learning_rate": 1e-06, + "loss": 0.5891, + "num_input_tokens_seen": 193000164, + "step": 3445 + }, + { + "epoch": 7.67260579064588, + "loss": 0.6576277017593384, + "loss_ce": 0.00040116519085131586, + "loss_iou": 0.267578125, + "loss_num": 0.024169921875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 193000164, + "step": 3445 + }, + { + "epoch": 7.674832962138085, + "grad_norm": 18.63033103942871, + "learning_rate": 1e-06, + "loss": 0.623, + "num_input_tokens_seen": 193053128, + "step": 3446 + }, + { + "epoch": 7.674832962138085, + "loss": 0.5889174342155457, + "loss_ce": 0.00017231784295290709, + "loss_iou": 0.25, + "loss_num": 0.017822265625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 193053128, + "step": 3446 + }, + { + "epoch": 7.67706013363029, + "grad_norm": 14.499951362609863, + "learning_rate": 1e-06, + "loss": 0.5724, + "num_input_tokens_seen": 193108676, + "step": 3447 + }, + { + "epoch": 7.67706013363029, + "loss": 0.657649576663971, + "loss_ce": 0.000178877409780398, + "loss_iou": 0.3046875, + "loss_num": 0.009765625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 193108676, + "step": 3447 + }, + { + "epoch": 7.679287305122495, + "grad_norm": 18.459136962890625, + "learning_rate": 1e-06, + "loss": 0.5688, + "num_input_tokens_seen": 193166308, + "step": 3448 + }, + { + "epoch": 7.679287305122495, + "loss": 0.7646270990371704, + "loss_ce": 0.00022284439182840288, + "loss_iou": 0.34375, + "loss_num": 0.015625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 193166308, + "step": 3448 + }, + { + "epoch": 7.6815144766147, + "grad_norm": 35.68458938598633, + "learning_rate": 1e-06, + "loss": 0.5712, + "num_input_tokens_seen": 193222788, + "step": 3449 + }, + { + "epoch": 7.6815144766147, + "loss": 0.48069441318511963, + "loss_ce": 0.0001646471064304933, + "loss_iou": 0.212890625, + "loss_num": 0.01104736328125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 193222788, + "step": 3449 + }, + { + "epoch": 7.6837416481069045, + "grad_norm": 23.960975646972656, + "learning_rate": 1e-06, + "loss": 0.7771, + "num_input_tokens_seen": 193273416, + "step": 3450 + }, + { + "epoch": 7.6837416481069045, + "loss": 0.5310087203979492, + "loss_ce": 0.0002469586324878037, + "loss_iou": 0.2255859375, + "loss_num": 0.015869140625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 193273416, + "step": 3450 + }, + { + "epoch": 7.685968819599109, + "grad_norm": 21.065771102905273, + "learning_rate": 1e-06, + "loss": 0.6134, + "num_input_tokens_seen": 193327328, + "step": 3451 + }, + { + "epoch": 7.685968819599109, + "loss": 0.8937476873397827, + "loss_ce": 0.0001929743157234043, + "loss_iou": 0.365234375, + "loss_num": 0.032470703125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 193327328, + "step": 3451 + }, + { + "epoch": 7.688195991091314, + "grad_norm": 16.559165954589844, + "learning_rate": 1e-06, + "loss": 0.5826, + "num_input_tokens_seen": 193384244, + "step": 3452 + }, + { + "epoch": 7.688195991091314, + "loss": 0.6634224653244019, + "loss_ce": 0.0002144278259947896, + "loss_iou": 0.302734375, + "loss_num": 0.01177978515625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 193384244, + "step": 3452 + }, + { + "epoch": 7.690423162583519, + "grad_norm": 16.13030433654785, + "learning_rate": 1e-06, + "loss": 0.6468, + "num_input_tokens_seen": 193439784, + "step": 3453 + }, + { + "epoch": 7.690423162583519, + "loss": 0.6554673910140991, + "loss_ce": 0.0001939490030054003, + "loss_iou": 0.283203125, + "loss_num": 0.0179443359375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 193439784, + "step": 3453 + }, + { + "epoch": 7.692650334075724, + "grad_norm": 17.711097717285156, + "learning_rate": 1e-06, + "loss": 0.6227, + "num_input_tokens_seen": 193493800, + "step": 3454 + }, + { + "epoch": 7.692650334075724, + "loss": 0.6291646957397461, + "loss_ce": 0.0002584658795967698, + "loss_iou": 0.25390625, + "loss_num": 0.0244140625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 193493800, + "step": 3454 + }, + { + "epoch": 7.694877505567929, + "grad_norm": 16.89303207397461, + "learning_rate": 1e-06, + "loss": 0.5846, + "num_input_tokens_seen": 193546256, + "step": 3455 + }, + { + "epoch": 7.694877505567929, + "loss": 0.550576388835907, + "loss_ce": 0.00016140022489707917, + "loss_iou": 0.2060546875, + "loss_num": 0.027587890625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 193546256, + "step": 3455 + }, + { + "epoch": 7.697104677060134, + "grad_norm": 34.59210968017578, + "learning_rate": 1e-06, + "loss": 0.6684, + "num_input_tokens_seen": 193602852, + "step": 3456 + }, + { + "epoch": 7.697104677060134, + "loss": 0.5592398643493652, + "loss_ce": 0.000157813512487337, + "loss_iou": 0.263671875, + "loss_num": 0.00653076171875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 193602852, + "step": 3456 + }, + { + "epoch": 7.6993318485523385, + "grad_norm": 20.46678352355957, + "learning_rate": 1e-06, + "loss": 0.5734, + "num_input_tokens_seen": 193661456, + "step": 3457 + }, + { + "epoch": 7.6993318485523385, + "loss": 0.5596364140510559, + "loss_ce": 0.00018814706709235907, + "loss_iou": 0.2421875, + "loss_num": 0.01513671875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 193661456, + "step": 3457 + }, + { + "epoch": 7.701559020044543, + "grad_norm": 14.73276424407959, + "learning_rate": 1e-06, + "loss": 0.6287, + "num_input_tokens_seen": 193720104, + "step": 3458 + }, + { + "epoch": 7.701559020044543, + "loss": 0.5451168417930603, + "loss_ce": 0.00019496420281939209, + "loss_iou": 0.25, + "loss_num": 0.00860595703125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 193720104, + "step": 3458 + }, + { + "epoch": 7.703786191536748, + "grad_norm": 14.44981575012207, + "learning_rate": 1e-06, + "loss": 0.3805, + "num_input_tokens_seen": 193776696, + "step": 3459 + }, + { + "epoch": 7.703786191536748, + "loss": 0.3983895778656006, + "loss_ce": 0.00019621921819634736, + "loss_iou": 0.177734375, + "loss_num": 0.00830078125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 193776696, + "step": 3459 + }, + { + "epoch": 7.706013363028953, + "grad_norm": 19.433345794677734, + "learning_rate": 1e-06, + "loss": 0.4632, + "num_input_tokens_seen": 193834220, + "step": 3460 + }, + { + "epoch": 7.706013363028953, + "loss": 0.4960872530937195, + "loss_ce": 0.00023767323000356555, + "loss_iou": 0.185546875, + "loss_num": 0.0247802734375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 193834220, + "step": 3460 + }, + { + "epoch": 7.708240534521158, + "grad_norm": 55.87489318847656, + "learning_rate": 1e-06, + "loss": 0.5689, + "num_input_tokens_seen": 193888720, + "step": 3461 + }, + { + "epoch": 7.708240534521158, + "loss": 0.6605603098869324, + "loss_ce": 0.0004040383209940046, + "loss_iou": 0.2890625, + "loss_num": 0.0162353515625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 193888720, + "step": 3461 + }, + { + "epoch": 7.710467706013363, + "grad_norm": 28.30753517150879, + "learning_rate": 1e-06, + "loss": 0.7928, + "num_input_tokens_seen": 193943548, + "step": 3462 + }, + { + "epoch": 7.710467706013363, + "loss": 0.6849916577339172, + "loss_ce": 0.00017721363110467792, + "loss_iou": 0.287109375, + "loss_num": 0.02197265625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 193943548, + "step": 3462 + }, + { + "epoch": 7.712694877505568, + "grad_norm": 23.885019302368164, + "learning_rate": 1e-06, + "loss": 0.7603, + "num_input_tokens_seen": 194000004, + "step": 3463 + }, + { + "epoch": 7.712694877505568, + "loss": 0.6882323026657104, + "loss_ce": 0.00024399746325798333, + "loss_iou": 0.27734375, + "loss_num": 0.0262451171875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 194000004, + "step": 3463 + }, + { + "epoch": 7.714922048997773, + "grad_norm": 14.701726913452148, + "learning_rate": 1e-06, + "loss": 0.5217, + "num_input_tokens_seen": 194056588, + "step": 3464 + }, + { + "epoch": 7.714922048997773, + "loss": 0.5858080387115479, + "loss_ce": 0.00020622329611796886, + "loss_iou": 0.240234375, + "loss_num": 0.0208740234375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 194056588, + "step": 3464 + }, + { + "epoch": 7.717149220489977, + "grad_norm": 16.439701080322266, + "learning_rate": 1e-06, + "loss": 0.5724, + "num_input_tokens_seen": 194111836, + "step": 3465 + }, + { + "epoch": 7.717149220489977, + "loss": 0.5037325620651245, + "loss_ce": 0.0001925197138916701, + "loss_iou": 0.21484375, + "loss_num": 0.0146484375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 194111836, + "step": 3465 + }, + { + "epoch": 7.719376391982182, + "grad_norm": 21.691720962524414, + "learning_rate": 1e-06, + "loss": 0.628, + "num_input_tokens_seen": 194168228, + "step": 3466 + }, + { + "epoch": 7.719376391982182, + "loss": 0.7247686982154846, + "loss_ce": 0.00015930971130728722, + "loss_iou": 0.306640625, + "loss_num": 0.0218505859375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 194168228, + "step": 3466 + }, + { + "epoch": 7.721603563474387, + "grad_norm": 13.828973770141602, + "learning_rate": 1e-06, + "loss": 0.5437, + "num_input_tokens_seen": 194223784, + "step": 3467 + }, + { + "epoch": 7.721603563474387, + "loss": 0.5051822662353516, + "loss_ce": 0.00017740536713972688, + "loss_iou": 0.2333984375, + "loss_num": 0.007659912109375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 194223784, + "step": 3467 + }, + { + "epoch": 7.723830734966592, + "grad_norm": 12.506599426269531, + "learning_rate": 1e-06, + "loss": 0.5575, + "num_input_tokens_seen": 194280580, + "step": 3468 + }, + { + "epoch": 7.723830734966592, + "loss": 0.5337764620780945, + "loss_ce": 0.00032918865326792, + "loss_iou": 0.2119140625, + "loss_num": 0.0218505859375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 194280580, + "step": 3468 + }, + { + "epoch": 7.726057906458797, + "grad_norm": 52.32142639160156, + "learning_rate": 1e-06, + "loss": 0.6348, + "num_input_tokens_seen": 194336084, + "step": 3469 + }, + { + "epoch": 7.726057906458797, + "loss": 0.6026872992515564, + "loss_ce": 0.0001482292718719691, + "loss_iou": 0.27734375, + "loss_num": 0.0098876953125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 194336084, + "step": 3469 + }, + { + "epoch": 7.728285077951003, + "grad_norm": 26.636455535888672, + "learning_rate": 1e-06, + "loss": 0.5941, + "num_input_tokens_seen": 194389144, + "step": 3470 + }, + { + "epoch": 7.728285077951003, + "loss": 0.5668909549713135, + "loss_ce": 0.00024056396796368062, + "loss_iou": 0.234375, + "loss_num": 0.0196533203125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 194389144, + "step": 3470 + }, + { + "epoch": 7.7305122494432075, + "grad_norm": 25.504764556884766, + "learning_rate": 1e-06, + "loss": 0.6929, + "num_input_tokens_seen": 194446564, + "step": 3471 + }, + { + "epoch": 7.7305122494432075, + "loss": 0.7231018543243408, + "loss_ce": 0.00020141596905887127, + "loss_iou": 0.31640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 194446564, + "step": 3471 + }, + { + "epoch": 7.732739420935412, + "grad_norm": 18.57636070251465, + "learning_rate": 1e-06, + "loss": 0.7205, + "num_input_tokens_seen": 194501216, + "step": 3472 + }, + { + "epoch": 7.732739420935412, + "loss": 0.757434606552124, + "loss_ce": 0.00017141405260190368, + "loss_iou": 0.326171875, + "loss_num": 0.021240234375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 194501216, + "step": 3472 + }, + { + "epoch": 7.734966592427617, + "grad_norm": 23.02729034423828, + "learning_rate": 1e-06, + "loss": 0.6108, + "num_input_tokens_seen": 194558088, + "step": 3473 + }, + { + "epoch": 7.734966592427617, + "loss": 0.591259241104126, + "loss_ce": 0.0001947856944752857, + "loss_iou": 0.248046875, + "loss_num": 0.0189208984375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 194558088, + "step": 3473 + }, + { + "epoch": 7.737193763919822, + "grad_norm": 16.5152587890625, + "learning_rate": 1e-06, + "loss": 0.591, + "num_input_tokens_seen": 194616640, + "step": 3474 + }, + { + "epoch": 7.737193763919822, + "loss": 0.42105597257614136, + "loss_ce": 0.00015753699699416757, + "loss_iou": 0.189453125, + "loss_num": 0.00830078125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 194616640, + "step": 3474 + }, + { + "epoch": 7.739420935412027, + "grad_norm": 20.325769424438477, + "learning_rate": 1e-06, + "loss": 0.8947, + "num_input_tokens_seen": 194674240, + "step": 3475 + }, + { + "epoch": 7.739420935412027, + "loss": 1.23799729347229, + "loss_ce": 0.0003264172119088471, + "loss_iou": 0.478515625, + "loss_num": 0.055908203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 194674240, + "step": 3475 + }, + { + "epoch": 7.741648106904232, + "grad_norm": 21.625873565673828, + "learning_rate": 1e-06, + "loss": 0.5786, + "num_input_tokens_seen": 194729012, + "step": 3476 + }, + { + "epoch": 7.741648106904232, + "loss": 0.391615629196167, + "loss_ce": 0.000136150760226883, + "loss_iou": 0.16796875, + "loss_num": 0.01104736328125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 194729012, + "step": 3476 + }, + { + "epoch": 7.743875278396437, + "grad_norm": 14.393267631530762, + "learning_rate": 1e-06, + "loss": 0.6558, + "num_input_tokens_seen": 194786480, + "step": 3477 + }, + { + "epoch": 7.743875278396437, + "loss": 0.5332167148590088, + "loss_ce": 0.0001356468565063551, + "loss_iou": 0.220703125, + "loss_num": 0.0181884765625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 194786480, + "step": 3477 + }, + { + "epoch": 7.7461024498886415, + "grad_norm": 33.26962661743164, + "learning_rate": 1e-06, + "loss": 0.9039, + "num_input_tokens_seen": 194842640, + "step": 3478 + }, + { + "epoch": 7.7461024498886415, + "loss": 0.8585567474365234, + "loss_ce": 0.00015828575124032795, + "loss_iou": 0.37109375, + "loss_num": 0.0234375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 194842640, + "step": 3478 + }, + { + "epoch": 7.748329621380846, + "grad_norm": 20.943241119384766, + "learning_rate": 1e-06, + "loss": 0.5986, + "num_input_tokens_seen": 194900196, + "step": 3479 + }, + { + "epoch": 7.748329621380846, + "loss": 0.5145847797393799, + "loss_ce": 0.00018048740457743406, + "loss_iou": 0.232421875, + "loss_num": 0.01007080078125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 194900196, + "step": 3479 + }, + { + "epoch": 7.750556792873051, + "grad_norm": 27.85867691040039, + "learning_rate": 1e-06, + "loss": 0.4849, + "num_input_tokens_seen": 194956328, + "step": 3480 + }, + { + "epoch": 7.750556792873051, + "loss": 0.5256341695785522, + "loss_ce": 0.0007318383431993425, + "loss_iou": 0.2421875, + "loss_num": 0.00799560546875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 194956328, + "step": 3480 + }, + { + "epoch": 7.752783964365256, + "grad_norm": 23.426788330078125, + "learning_rate": 1e-06, + "loss": 0.8095, + "num_input_tokens_seen": 195011508, + "step": 3481 + }, + { + "epoch": 7.752783964365256, + "loss": 0.9079303741455078, + "loss_ce": 0.00021555817511398345, + "loss_iou": 0.3828125, + "loss_num": 0.02880859375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 195011508, + "step": 3481 + }, + { + "epoch": 7.755011135857461, + "grad_norm": 26.646705627441406, + "learning_rate": 1e-06, + "loss": 0.6848, + "num_input_tokens_seen": 195066276, + "step": 3482 + }, + { + "epoch": 7.755011135857461, + "loss": 0.5551425218582153, + "loss_ce": 0.00021085041225887835, + "loss_iou": 0.2314453125, + "loss_num": 0.0185546875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 195066276, + "step": 3482 + }, + { + "epoch": 7.757238307349666, + "grad_norm": 21.877819061279297, + "learning_rate": 1e-06, + "loss": 0.75, + "num_input_tokens_seen": 195121820, + "step": 3483 + }, + { + "epoch": 7.757238307349666, + "loss": 0.87198805809021, + "loss_ce": 0.00016191550821531564, + "loss_iou": 0.37890625, + "loss_num": 0.0224609375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 195121820, + "step": 3483 + }, + { + "epoch": 7.759465478841871, + "grad_norm": 17.727195739746094, + "learning_rate": 1e-06, + "loss": 0.7847, + "num_input_tokens_seen": 195179468, + "step": 3484 + }, + { + "epoch": 7.759465478841871, + "loss": 0.8190126419067383, + "loss_ce": 0.00016502838116139174, + "loss_iou": 0.353515625, + "loss_num": 0.0224609375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 195179468, + "step": 3484 + }, + { + "epoch": 7.7616926503340755, + "grad_norm": 18.596248626708984, + "learning_rate": 1e-06, + "loss": 0.4576, + "num_input_tokens_seen": 195233616, + "step": 3485 + }, + { + "epoch": 7.7616926503340755, + "loss": 0.4784582555294037, + "loss_ce": 0.0001867622195277363, + "loss_iou": 0.203125, + "loss_num": 0.01446533203125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 195233616, + "step": 3485 + }, + { + "epoch": 7.76391982182628, + "grad_norm": 21.53468894958496, + "learning_rate": 1e-06, + "loss": 0.5638, + "num_input_tokens_seen": 195289464, + "step": 3486 + }, + { + "epoch": 7.76391982182628, + "loss": 0.6328760981559753, + "loss_ce": 0.00018564131460152566, + "loss_iou": 0.265625, + "loss_num": 0.0198974609375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 195289464, + "step": 3486 + }, + { + "epoch": 7.766146993318485, + "grad_norm": 17.920970916748047, + "learning_rate": 1e-06, + "loss": 0.8169, + "num_input_tokens_seen": 195344536, + "step": 3487 + }, + { + "epoch": 7.766146993318485, + "loss": 0.45805519819259644, + "loss_ce": 0.00016945481183938682, + "loss_iou": 0.2080078125, + "loss_num": 0.0084228515625, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 195344536, + "step": 3487 + }, + { + "epoch": 7.76837416481069, + "grad_norm": 17.782272338867188, + "learning_rate": 1e-06, + "loss": 0.5641, + "num_input_tokens_seen": 195399244, + "step": 3488 + }, + { + "epoch": 7.76837416481069, + "loss": 0.5128250122070312, + "loss_ce": 0.00025179749354720116, + "loss_iou": 0.2255859375, + "loss_num": 0.01214599609375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 195399244, + "step": 3488 + }, + { + "epoch": 7.770601336302895, + "grad_norm": 28.824478149414062, + "learning_rate": 1e-06, + "loss": 0.4858, + "num_input_tokens_seen": 195455096, + "step": 3489 + }, + { + "epoch": 7.770601336302895, + "loss": 0.5841382741928101, + "loss_ce": 0.0001538892393000424, + "loss_iou": 0.259765625, + "loss_num": 0.0125732421875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 195455096, + "step": 3489 + }, + { + "epoch": 7.772828507795101, + "grad_norm": 16.21584129333496, + "learning_rate": 1e-06, + "loss": 0.6751, + "num_input_tokens_seen": 195507752, + "step": 3490 + }, + { + "epoch": 7.772828507795101, + "loss": 0.7475310564041138, + "loss_ce": 0.0002166137855965644, + "loss_iou": 0.322265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 195507752, + "step": 3490 + }, + { + "epoch": 7.775055679287306, + "grad_norm": 16.735774993896484, + "learning_rate": 1e-06, + "loss": 0.5796, + "num_input_tokens_seen": 195565384, + "step": 3491 + }, + { + "epoch": 7.775055679287306, + "loss": 0.6273143291473389, + "loss_ce": 0.00023911299649626017, + "loss_iou": 0.25390625, + "loss_num": 0.02392578125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 195565384, + "step": 3491 + }, + { + "epoch": 7.77728285077951, + "grad_norm": 17.522539138793945, + "learning_rate": 1e-06, + "loss": 0.7027, + "num_input_tokens_seen": 195619800, + "step": 3492 + }, + { + "epoch": 7.77728285077951, + "loss": 0.7621063590049744, + "loss_ce": 0.00014349556295201182, + "loss_iou": 0.322265625, + "loss_num": 0.0235595703125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 195619800, + "step": 3492 + }, + { + "epoch": 7.779510022271715, + "grad_norm": 26.973304748535156, + "learning_rate": 1e-06, + "loss": 0.9673, + "num_input_tokens_seen": 195673328, + "step": 3493 + }, + { + "epoch": 7.779510022271715, + "loss": 1.0272767543792725, + "loss_ce": 0.0006654445314779878, + "loss_iou": 0.435546875, + "loss_num": 0.03125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 195673328, + "step": 3493 + }, + { + "epoch": 7.78173719376392, + "grad_norm": 19.321470260620117, + "learning_rate": 1e-06, + "loss": 0.6188, + "num_input_tokens_seen": 195728884, + "step": 3494 + }, + { + "epoch": 7.78173719376392, + "loss": 0.5602940320968628, + "loss_ce": 0.0002354723692405969, + "loss_iou": 0.2265625, + "loss_num": 0.0213623046875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 195728884, + "step": 3494 + }, + { + "epoch": 7.783964365256125, + "grad_norm": 19.93840789794922, + "learning_rate": 1e-06, + "loss": 0.5698, + "num_input_tokens_seen": 195783412, + "step": 3495 + }, + { + "epoch": 7.783964365256125, + "loss": 0.5731680393218994, + "loss_ce": 0.00016997376224026084, + "loss_iou": 0.224609375, + "loss_num": 0.02490234375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 195783412, + "step": 3495 + }, + { + "epoch": 7.78619153674833, + "grad_norm": 23.796375274658203, + "learning_rate": 1e-06, + "loss": 0.6692, + "num_input_tokens_seen": 195838768, + "step": 3496 + }, + { + "epoch": 7.78619153674833, + "loss": 0.717685341835022, + "loss_ce": 0.00015606911620125175, + "loss_iou": 0.3125, + "loss_num": 0.01806640625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 195838768, + "step": 3496 + }, + { + "epoch": 7.788418708240535, + "grad_norm": 15.419715881347656, + "learning_rate": 1e-06, + "loss": 0.6021, + "num_input_tokens_seen": 195894208, + "step": 3497 + }, + { + "epoch": 7.788418708240535, + "loss": 0.7013325691223145, + "loss_ce": 0.00016067746037151664, + "loss_iou": 0.310546875, + "loss_num": 0.0159912109375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 195894208, + "step": 3497 + }, + { + "epoch": 7.79064587973274, + "grad_norm": 21.454118728637695, + "learning_rate": 1e-06, + "loss": 0.6689, + "num_input_tokens_seen": 195949980, + "step": 3498 + }, + { + "epoch": 7.79064587973274, + "loss": 0.6081476211547852, + "loss_ce": 0.00023741269251331687, + "loss_iou": 0.2470703125, + "loss_num": 0.02294921875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 195949980, + "step": 3498 + }, + { + "epoch": 7.7928730512249444, + "grad_norm": 21.61153793334961, + "learning_rate": 1e-06, + "loss": 0.7103, + "num_input_tokens_seen": 196005100, + "step": 3499 + }, + { + "epoch": 7.7928730512249444, + "loss": 0.5294746160507202, + "loss_ce": 0.0012153343996033072, + "loss_iou": 0.2353515625, + "loss_num": 0.0113525390625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 196005100, + "step": 3499 + }, + { + "epoch": 7.795100222717149, + "grad_norm": 15.124077796936035, + "learning_rate": 1e-06, + "loss": 0.7598, + "num_input_tokens_seen": 196060760, + "step": 3500 + }, + { + "epoch": 7.795100222717149, + "eval_seeclick_web_CIoU": 0.5788059234619141, + "eval_seeclick_web_GIoU": 0.5771978497505188, + "eval_seeclick_web_IoU": 0.5963517725467682, + "eval_seeclick_web_MAE_all": 0.01642331574112177, + "eval_seeclick_web_MAE_h": 0.008296339772641659, + "eval_seeclick_web_MAE_w": 0.01685692649334669, + "eval_seeclick_web_MAE_x_boxes": 0.009794581681489944, + "eval_seeclick_web_MAE_y_boxes": 0.02204720745794475, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9286947846412659, + "eval_seeclick_web_loss_ce": 0.0002503915602574125, + "eval_seeclick_web_loss_iou": 0.4237060546875, + "eval_seeclick_web_loss_num": 0.013225555419921875, + "eval_seeclick_web_loss_xval": 0.913330078125, + "eval_seeclick_web_runtime": 20.0349, + "eval_seeclick_web_samples_per_second": 2.496, + "eval_seeclick_web_steps_per_second": 0.1, + "num_input_tokens_seen": 196060760, + "step": 3500 + }, + { + "epoch": 7.795100222717149, + "eval_icons_CIoU": 0.3098849505186081, + "eval_icons_GIoU": 0.3192085027694702, + "eval_icons_IoU": 0.3817008137702942, + "eval_icons_MAE_all": 0.0535985603928566, + "eval_icons_MAE_h": 0.038005659356713295, + "eval_icons_MAE_w": 0.04143812507390976, + "eval_icons_MAE_x_boxes": 0.05823635682463646, + "eval_icons_MAE_y_boxes": 0.037908594124019146, + "eval_icons_inside_bbox": 0.6493055522441864, + "eval_icons_loss": 1.6437243223190308, + "eval_icons_loss_ce": 0.00029484537662938237, + "eval_icons_loss_iou": 0.649658203125, + "eval_icons_loss_num": 0.04685401916503906, + "eval_icons_loss_xval": 1.533203125, + "eval_icons_runtime": 21.1399, + "eval_icons_samples_per_second": 2.365, + "eval_icons_steps_per_second": 0.095, + "num_input_tokens_seen": 196060760, + "step": 3500 + }, + { + "epoch": 7.795100222717149, + "eval_screenspot_CIoU": 0.34179479877154034, + "eval_screenspot_GIoU": 0.35942623019218445, + "eval_screenspot_IoU": 0.4252944588661194, + "eval_screenspot_MAE_all": 0.06343474984169006, + "eval_screenspot_MAE_h": 0.037477921694517136, + "eval_screenspot_MAE_w": 0.07201713944474857, + "eval_screenspot_MAE_x_boxes": 0.07516818679869175, + "eval_screenspot_MAE_y_boxes": 0.046938162917892136, + "eval_screenspot_inside_bbox": 0.6966666579246521, + "eval_screenspot_loss": 1.6630264520645142, + "eval_screenspot_loss_ce": 0.0002933433085369567, + "eval_screenspot_loss_iou": 0.6790364583333334, + "eval_screenspot_loss_num": 0.07456461588541667, + "eval_screenspot_loss_xval": 1.7312825520833333, + "eval_screenspot_runtime": 33.7448, + "eval_screenspot_samples_per_second": 2.637, + "eval_screenspot_steps_per_second": 0.089, + "num_input_tokens_seen": 196060760, + "step": 3500 + }, + { + "epoch": 7.795100222717149, + "eval_compot_CIoU": 0.3524170517921448, + "eval_compot_GIoU": 0.3639347553253174, + "eval_compot_IoU": 0.40965285897254944, + "eval_compot_MAE_all": 0.017891014460474253, + "eval_compot_MAE_h": 0.00863239774480462, + "eval_compot_MAE_w": 0.022040129639208317, + "eval_compot_MAE_x_boxes": 0.029601704329252243, + "eval_compot_MAE_y_boxes": 0.00674438988789916, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3697645664215088, + "eval_compot_loss_ce": 0.00023254087136592716, + "eval_compot_loss_iou": 0.62646484375, + "eval_compot_loss_num": 0.016546249389648438, + "eval_compot_loss_xval": 1.335205078125, + "eval_compot_runtime": 20.9161, + "eval_compot_samples_per_second": 2.391, + "eval_compot_steps_per_second": 0.096, + "num_input_tokens_seen": 196060760, + "step": 3500 + }, + { + "epoch": 7.795100222717149, + "eval_custom_ui_val_CIoU": 0.4692361321714189, + "eval_custom_ui_val_GIoU": 0.48255549867947894, + "eval_custom_ui_val_IoU": 0.5274171696768867, + "eval_custom_ui_val_MAE_all": 0.029507537372410297, + "eval_custom_ui_val_MAE_h": 0.016804457098866504, + "eval_custom_ui_val_MAE_w": 0.03681204499055942, + "eval_custom_ui_val_MAE_x_boxes": 0.03406558599736956, + "eval_custom_ui_val_MAE_y_boxes": 0.015829176952441532, + "eval_custom_ui_val_inside_bbox": 0.7457561757829454, + "eval_custom_ui_val_loss": 1.1831005811691284, + "eval_custom_ui_val_loss_ce": 0.0002701325170669912, + "eval_custom_ui_val_loss_iou": 0.5026584201388888, + "eval_custom_ui_val_loss_num": 0.027642779880099826, + "eval_custom_ui_val_loss_xval": 1.1439887152777777, + "eval_custom_ui_val_runtime": 59.4212, + "eval_custom_ui_val_samples_per_second": 4.46, + "eval_custom_ui_val_steps_per_second": 0.151, + "num_input_tokens_seen": 196060760, + "step": 3500 + }, + { + "epoch": 7.795100222717149, + "loss": 0.8771896958351135, + "loss_ce": 0.0002365570981055498, + "loss_iou": 0.3828125, + "loss_num": 0.0220947265625, + "loss_xval": 0.875, + "num_input_tokens_seen": 196060760, + "step": 3500 + }, + { + "epoch": 7.797327394209354, + "grad_norm": 20.44601058959961, + "learning_rate": 1e-06, + "loss": 0.7573, + "num_input_tokens_seen": 196116308, + "step": 3501 + }, + { + "epoch": 7.797327394209354, + "loss": 0.790895402431488, + "loss_ce": 0.0003680819063447416, + "loss_iou": 0.306640625, + "loss_num": 0.035400390625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 196116308, + "step": 3501 + }, + { + "epoch": 7.799554565701559, + "grad_norm": 19.4777774810791, + "learning_rate": 1e-06, + "loss": 0.6974, + "num_input_tokens_seen": 196172864, + "step": 3502 + }, + { + "epoch": 7.799554565701559, + "loss": 0.7237052917480469, + "loss_ce": 0.00019457802409306169, + "loss_iou": 0.3125, + "loss_num": 0.0196533203125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 196172864, + "step": 3502 + }, + { + "epoch": 7.801781737193764, + "grad_norm": 18.440637588500977, + "learning_rate": 1e-06, + "loss": 0.5529, + "num_input_tokens_seen": 196229132, + "step": 3503 + }, + { + "epoch": 7.801781737193764, + "loss": 0.5999140739440918, + "loss_ce": 0.0005488179158419371, + "loss_iou": 0.2734375, + "loss_num": 0.0107421875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 196229132, + "step": 3503 + }, + { + "epoch": 7.804008908685969, + "grad_norm": 24.30689811706543, + "learning_rate": 1e-06, + "loss": 0.4758, + "num_input_tokens_seen": 196287452, + "step": 3504 + }, + { + "epoch": 7.804008908685969, + "loss": 0.4462117552757263, + "loss_ce": 0.00016680760018061846, + "loss_iou": 0.1962890625, + "loss_num": 0.0106201171875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 196287452, + "step": 3504 + }, + { + "epoch": 7.806236080178174, + "grad_norm": 21.83576011657715, + "learning_rate": 1e-06, + "loss": 0.7566, + "num_input_tokens_seen": 196340952, + "step": 3505 + }, + { + "epoch": 7.806236080178174, + "loss": 0.8626251220703125, + "loss_ce": 0.0003204582317266613, + "loss_iou": 0.38671875, + "loss_num": 0.018310546875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 196340952, + "step": 3505 + }, + { + "epoch": 7.8084632516703785, + "grad_norm": 41.28773880004883, + "learning_rate": 1e-06, + "loss": 0.5113, + "num_input_tokens_seen": 196397792, + "step": 3506 + }, + { + "epoch": 7.8084632516703785, + "loss": 0.5087121725082397, + "loss_ce": 0.0001672878279350698, + "loss_iou": 0.216796875, + "loss_num": 0.01519775390625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 196397792, + "step": 3506 + }, + { + "epoch": 7.810690423162583, + "grad_norm": 14.272880554199219, + "learning_rate": 1e-06, + "loss": 0.505, + "num_input_tokens_seen": 196455836, + "step": 3507 + }, + { + "epoch": 7.810690423162583, + "loss": 0.4430527091026306, + "loss_ce": 0.0001816436561057344, + "loss_iou": 0.19140625, + "loss_num": 0.0120849609375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 196455836, + "step": 3507 + }, + { + "epoch": 7.812917594654788, + "grad_norm": 16.481369018554688, + "learning_rate": 1e-06, + "loss": 0.6081, + "num_input_tokens_seen": 196513684, + "step": 3508 + }, + { + "epoch": 7.812917594654788, + "loss": 0.6896613836288452, + "loss_ce": 0.00020824806415475905, + "loss_iou": 0.3046875, + "loss_num": 0.01611328125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 196513684, + "step": 3508 + }, + { + "epoch": 7.815144766146993, + "grad_norm": 16.66682243347168, + "learning_rate": 1e-06, + "loss": 0.561, + "num_input_tokens_seen": 196566484, + "step": 3509 + }, + { + "epoch": 7.815144766146993, + "loss": 0.47554415464401245, + "loss_ce": 0.00020236926502548158, + "loss_iou": 0.1865234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 196566484, + "step": 3509 + }, + { + "epoch": 7.817371937639198, + "grad_norm": 31.580455780029297, + "learning_rate": 1e-06, + "loss": 0.584, + "num_input_tokens_seen": 196621400, + "step": 3510 + }, + { + "epoch": 7.817371937639198, + "loss": 0.6484927535057068, + "loss_ce": 0.00017732605920173228, + "loss_iou": 0.275390625, + "loss_num": 0.019775390625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 196621400, + "step": 3510 + }, + { + "epoch": 7.819599109131403, + "grad_norm": 14.395856857299805, + "learning_rate": 1e-06, + "loss": 0.5939, + "num_input_tokens_seen": 196678068, + "step": 3511 + }, + { + "epoch": 7.819599109131403, + "loss": 0.5976732969284058, + "loss_ce": 0.0001391283149132505, + "loss_iou": 0.248046875, + "loss_num": 0.020263671875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 196678068, + "step": 3511 + }, + { + "epoch": 7.821826280623608, + "grad_norm": 22.546255111694336, + "learning_rate": 1e-06, + "loss": 0.5801, + "num_input_tokens_seen": 196733972, + "step": 3512 + }, + { + "epoch": 7.821826280623608, + "loss": 0.467131644487381, + "loss_ce": 0.00015164985961746424, + "loss_iou": 0.212890625, + "loss_num": 0.00811767578125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 196733972, + "step": 3512 + }, + { + "epoch": 7.8240534521158125, + "grad_norm": 16.682117462158203, + "learning_rate": 1e-06, + "loss": 0.5618, + "num_input_tokens_seen": 196790868, + "step": 3513 + }, + { + "epoch": 7.8240534521158125, + "loss": 0.6464253664016724, + "loss_ce": 0.00018512400856707245, + "loss_iou": 0.267578125, + "loss_num": 0.02197265625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 196790868, + "step": 3513 + }, + { + "epoch": 7.826280623608017, + "grad_norm": 18.157453536987305, + "learning_rate": 1e-06, + "loss": 0.6781, + "num_input_tokens_seen": 196847364, + "step": 3514 + }, + { + "epoch": 7.826280623608017, + "loss": 0.6460493803024292, + "loss_ce": 0.0001753381366143003, + "loss_iou": 0.279296875, + "loss_num": 0.017578125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 196847364, + "step": 3514 + }, + { + "epoch": 7.828507795100223, + "grad_norm": 20.77778434753418, + "learning_rate": 1e-06, + "loss": 0.743, + "num_input_tokens_seen": 196904288, + "step": 3515 + }, + { + "epoch": 7.828507795100223, + "loss": 0.7536814212799072, + "loss_ce": 0.00026343436911702156, + "loss_iou": 0.318359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 196904288, + "step": 3515 + }, + { + "epoch": 7.830734966592428, + "grad_norm": 15.515265464782715, + "learning_rate": 1e-06, + "loss": 0.4926, + "num_input_tokens_seen": 196961928, + "step": 3516 + }, + { + "epoch": 7.830734966592428, + "loss": 0.4176163673400879, + "loss_ce": 0.00013586709974333644, + "loss_iou": 0.1689453125, + "loss_num": 0.01611328125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 196961928, + "step": 3516 + }, + { + "epoch": 7.832962138084633, + "grad_norm": 23.900028228759766, + "learning_rate": 1e-06, + "loss": 0.4868, + "num_input_tokens_seen": 197017236, + "step": 3517 + }, + { + "epoch": 7.832962138084633, + "loss": 0.49435505270957947, + "loss_ce": 0.00015339165111072361, + "loss_iou": 0.2275390625, + "loss_num": 0.0079345703125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 197017236, + "step": 3517 + }, + { + "epoch": 7.835189309576838, + "grad_norm": 15.74838638305664, + "learning_rate": 1e-06, + "loss": 0.7655, + "num_input_tokens_seen": 197073028, + "step": 3518 + }, + { + "epoch": 7.835189309576838, + "loss": 1.0873568058013916, + "loss_ce": 0.00019860133761540055, + "loss_iou": 0.443359375, + "loss_num": 0.04052734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 197073028, + "step": 3518 + }, + { + "epoch": 7.8374164810690425, + "grad_norm": 25.064544677734375, + "learning_rate": 1e-06, + "loss": 0.552, + "num_input_tokens_seen": 197130308, + "step": 3519 + }, + { + "epoch": 7.8374164810690425, + "loss": 0.512490451335907, + "loss_ce": 0.0001613302156329155, + "loss_iou": 0.2158203125, + "loss_num": 0.01611328125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 197130308, + "step": 3519 + }, + { + "epoch": 7.839643652561247, + "grad_norm": 20.62727928161621, + "learning_rate": 1e-06, + "loss": 0.4757, + "num_input_tokens_seen": 197186840, + "step": 3520 + }, + { + "epoch": 7.839643652561247, + "loss": 0.6267750263214111, + "loss_ce": 0.000188069258001633, + "loss_iou": 0.2734375, + "loss_num": 0.015625, + "loss_xval": 0.625, + "num_input_tokens_seen": 197186840, + "step": 3520 + }, + { + "epoch": 7.841870824053452, + "grad_norm": 15.101804733276367, + "learning_rate": 1e-06, + "loss": 0.6523, + "num_input_tokens_seen": 197244324, + "step": 3521 + }, + { + "epoch": 7.841870824053452, + "loss": 0.8312093019485474, + "loss_ce": 0.0002156283298972994, + "loss_iou": 0.357421875, + "loss_num": 0.0233154296875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 197244324, + "step": 3521 + }, + { + "epoch": 7.844097995545657, + "grad_norm": 27.03868865966797, + "learning_rate": 1e-06, + "loss": 0.52, + "num_input_tokens_seen": 197299300, + "step": 3522 + }, + { + "epoch": 7.844097995545657, + "loss": 0.5687766075134277, + "loss_ce": 0.0002951917704194784, + "loss_iou": 0.244140625, + "loss_num": 0.015869140625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 197299300, + "step": 3522 + }, + { + "epoch": 7.846325167037862, + "grad_norm": 51.59347915649414, + "learning_rate": 1e-06, + "loss": 0.6587, + "num_input_tokens_seen": 197355036, + "step": 3523 + }, + { + "epoch": 7.846325167037862, + "loss": 0.6813713908195496, + "loss_ce": 0.0002190661762142554, + "loss_iou": 0.298828125, + "loss_num": 0.0166015625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 197355036, + "step": 3523 + }, + { + "epoch": 7.848552338530067, + "grad_norm": 14.566356658935547, + "learning_rate": 1e-06, + "loss": 0.5917, + "num_input_tokens_seen": 197410556, + "step": 3524 + }, + { + "epoch": 7.848552338530067, + "loss": 0.5617358684539795, + "loss_ce": 0.0002124165475834161, + "loss_iou": 0.2255859375, + "loss_num": 0.0220947265625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 197410556, + "step": 3524 + }, + { + "epoch": 7.850779510022272, + "grad_norm": 17.55548858642578, + "learning_rate": 1e-06, + "loss": 0.5818, + "num_input_tokens_seen": 197468284, + "step": 3525 + }, + { + "epoch": 7.850779510022272, + "loss": 0.6032076478004456, + "loss_ce": 0.0001802719198167324, + "loss_iou": 0.26953125, + "loss_num": 0.01300048828125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 197468284, + "step": 3525 + }, + { + "epoch": 7.853006681514477, + "grad_norm": 48.917110443115234, + "learning_rate": 1e-06, + "loss": 0.7346, + "num_input_tokens_seen": 197525216, + "step": 3526 + }, + { + "epoch": 7.853006681514477, + "loss": 0.9218271970748901, + "loss_ce": 0.0001963673421414569, + "loss_iou": 0.375, + "loss_num": 0.033935546875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 197525216, + "step": 3526 + }, + { + "epoch": 7.855233853006681, + "grad_norm": 32.04833984375, + "learning_rate": 1e-06, + "loss": 0.5597, + "num_input_tokens_seen": 197580472, + "step": 3527 + }, + { + "epoch": 7.855233853006681, + "loss": 0.5170796513557434, + "loss_ce": 0.00023395352764055133, + "loss_iou": 0.2265625, + "loss_num": 0.012451171875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 197580472, + "step": 3527 + }, + { + "epoch": 7.857461024498886, + "grad_norm": 20.859832763671875, + "learning_rate": 1e-06, + "loss": 0.6578, + "num_input_tokens_seen": 197636904, + "step": 3528 + }, + { + "epoch": 7.857461024498886, + "loss": 0.8837323188781738, + "loss_ce": 0.00018741836538538337, + "loss_iou": 0.34765625, + "loss_num": 0.03759765625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 197636904, + "step": 3528 + }, + { + "epoch": 7.859688195991091, + "grad_norm": 20.60695457458496, + "learning_rate": 1e-06, + "loss": 0.7797, + "num_input_tokens_seen": 197689184, + "step": 3529 + }, + { + "epoch": 7.859688195991091, + "loss": 0.9426758289337158, + "loss_ce": 0.0003540791803970933, + "loss_iou": 0.39453125, + "loss_num": 0.0303955078125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 197689184, + "step": 3529 + }, + { + "epoch": 7.861915367483296, + "grad_norm": 23.411840438842773, + "learning_rate": 1e-06, + "loss": 0.7374, + "num_input_tokens_seen": 197745056, + "step": 3530 + }, + { + "epoch": 7.861915367483296, + "loss": 0.8417353630065918, + "loss_ce": 0.00018263611127622426, + "loss_iou": 0.337890625, + "loss_num": 0.032958984375, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 197745056, + "step": 3530 + }, + { + "epoch": 7.864142538975501, + "grad_norm": 14.93423843383789, + "learning_rate": 1e-06, + "loss": 0.4174, + "num_input_tokens_seen": 197801836, + "step": 3531 + }, + { + "epoch": 7.864142538975501, + "loss": 0.4481639862060547, + "loss_ce": 0.00016593134205322713, + "loss_iou": 0.171875, + "loss_num": 0.0208740234375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 197801836, + "step": 3531 + }, + { + "epoch": 7.866369710467706, + "grad_norm": 19.369924545288086, + "learning_rate": 1e-06, + "loss": 0.6988, + "num_input_tokens_seen": 197858596, + "step": 3532 + }, + { + "epoch": 7.866369710467706, + "loss": 0.987027108669281, + "loss_ce": 0.0002106587344314903, + "loss_iou": 0.412109375, + "loss_num": 0.032470703125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 197858596, + "step": 3532 + }, + { + "epoch": 7.868596881959911, + "grad_norm": 37.832054138183594, + "learning_rate": 1e-06, + "loss": 0.7192, + "num_input_tokens_seen": 197915224, + "step": 3533 + }, + { + "epoch": 7.868596881959911, + "loss": 0.7180832028388977, + "loss_ce": 0.000309744878904894, + "loss_iou": 0.30078125, + "loss_num": 0.023681640625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 197915224, + "step": 3533 + }, + { + "epoch": 7.870824053452115, + "grad_norm": 13.525866508483887, + "learning_rate": 1e-06, + "loss": 0.4544, + "num_input_tokens_seen": 197971236, + "step": 3534 + }, + { + "epoch": 7.870824053452115, + "loss": 0.5098298788070679, + "loss_ce": 0.0001863364304881543, + "loss_iou": 0.2158203125, + "loss_num": 0.015380859375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 197971236, + "step": 3534 + }, + { + "epoch": 7.873051224944321, + "grad_norm": 20.633840560913086, + "learning_rate": 1e-06, + "loss": 0.6103, + "num_input_tokens_seen": 198029292, + "step": 3535 + }, + { + "epoch": 7.873051224944321, + "loss": 0.7734463810920715, + "loss_ce": 0.0002530375204514712, + "loss_iou": 0.361328125, + "loss_num": 0.0103759765625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 198029292, + "step": 3535 + }, + { + "epoch": 7.875278396436526, + "grad_norm": 169.28097534179688, + "learning_rate": 1e-06, + "loss": 0.6063, + "num_input_tokens_seen": 198083980, + "step": 3536 + }, + { + "epoch": 7.875278396436526, + "loss": 0.52978515625, + "loss_ce": 0.0004882930079475045, + "loss_iou": 0.2294921875, + "loss_num": 0.0140380859375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 198083980, + "step": 3536 + }, + { + "epoch": 7.877505567928731, + "grad_norm": 20.492658615112305, + "learning_rate": 1e-06, + "loss": 0.5758, + "num_input_tokens_seen": 198139948, + "step": 3537 + }, + { + "epoch": 7.877505567928731, + "loss": 0.6573839783668518, + "loss_ce": 0.00015742500545457006, + "loss_iou": 0.271484375, + "loss_num": 0.0230712890625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 198139948, + "step": 3537 + }, + { + "epoch": 7.879732739420936, + "grad_norm": 27.89311981201172, + "learning_rate": 1e-06, + "loss": 0.8226, + "num_input_tokens_seen": 198198180, + "step": 3538 + }, + { + "epoch": 7.879732739420936, + "loss": 0.9203274250030518, + "loss_ce": 0.00016145262634381652, + "loss_iou": 0.40625, + "loss_num": 0.021728515625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 198198180, + "step": 3538 + }, + { + "epoch": 7.881959910913141, + "grad_norm": 21.136817932128906, + "learning_rate": 1e-06, + "loss": 0.5017, + "num_input_tokens_seen": 198253928, + "step": 3539 + }, + { + "epoch": 7.881959910913141, + "loss": 0.5626224279403687, + "loss_ce": 0.0003665737749543041, + "loss_iou": 0.248046875, + "loss_num": 0.0133056640625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 198253928, + "step": 3539 + }, + { + "epoch": 7.8841870824053455, + "grad_norm": 28.12201499938965, + "learning_rate": 1e-06, + "loss": 0.4507, + "num_input_tokens_seen": 198309852, + "step": 3540 + }, + { + "epoch": 7.8841870824053455, + "loss": 0.5494855642318726, + "loss_ce": 0.00016917182074394077, + "loss_iou": 0.22265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 198309852, + "step": 3540 + }, + { + "epoch": 7.88641425389755, + "grad_norm": 47.74600601196289, + "learning_rate": 1e-06, + "loss": 0.5923, + "num_input_tokens_seen": 198365504, + "step": 3541 + }, + { + "epoch": 7.88641425389755, + "loss": 0.6581393480300903, + "loss_ce": 0.00042452660272829235, + "loss_iou": 0.267578125, + "loss_num": 0.0240478515625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 198365504, + "step": 3541 + }, + { + "epoch": 7.888641425389755, + "grad_norm": 11.964970588684082, + "learning_rate": 1e-06, + "loss": 0.4837, + "num_input_tokens_seen": 198420992, + "step": 3542 + }, + { + "epoch": 7.888641425389755, + "loss": 0.46798640489578247, + "loss_ce": 0.00021299449144862592, + "loss_iou": 0.2021484375, + "loss_num": 0.01263427734375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 198420992, + "step": 3542 + }, + { + "epoch": 7.89086859688196, + "grad_norm": 14.015028953552246, + "learning_rate": 1e-06, + "loss": 0.5561, + "num_input_tokens_seen": 198478588, + "step": 3543 + }, + { + "epoch": 7.89086859688196, + "loss": 0.5441027879714966, + "loss_ce": 0.0001574681227793917, + "loss_iou": 0.234375, + "loss_num": 0.01495361328125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 198478588, + "step": 3543 + }, + { + "epoch": 7.893095768374165, + "grad_norm": 13.727971076965332, + "learning_rate": 1e-06, + "loss": 0.7564, + "num_input_tokens_seen": 198532012, + "step": 3544 + }, + { + "epoch": 7.893095768374165, + "loss": 0.9394014477729797, + "loss_ce": 0.0001924369134940207, + "loss_iou": 0.3671875, + "loss_num": 0.040771484375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 198532012, + "step": 3544 + }, + { + "epoch": 7.89532293986637, + "grad_norm": 24.72553253173828, + "learning_rate": 1e-06, + "loss": 0.7666, + "num_input_tokens_seen": 198587972, + "step": 3545 + }, + { + "epoch": 7.89532293986637, + "loss": 0.6909983158111572, + "loss_ce": 0.0003245328553020954, + "loss_iou": 0.2470703125, + "loss_num": 0.0390625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 198587972, + "step": 3545 + }, + { + "epoch": 7.897550111358575, + "grad_norm": 16.695913314819336, + "learning_rate": 1e-06, + "loss": 0.6233, + "num_input_tokens_seen": 198644912, + "step": 3546 + }, + { + "epoch": 7.897550111358575, + "loss": 0.5989300608634949, + "loss_ce": 0.00017520345863886178, + "loss_iou": 0.2412109375, + "loss_num": 0.023193359375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 198644912, + "step": 3546 + }, + { + "epoch": 7.8997772828507795, + "grad_norm": 23.642194747924805, + "learning_rate": 1e-06, + "loss": 0.8229, + "num_input_tokens_seen": 198701832, + "step": 3547 + }, + { + "epoch": 7.8997772828507795, + "loss": 0.7591318488121033, + "loss_ce": 0.0008310843259096146, + "loss_iou": 0.310546875, + "loss_num": 0.0274658203125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 198701832, + "step": 3547 + }, + { + "epoch": 7.902004454342984, + "grad_norm": 17.38911247253418, + "learning_rate": 1e-06, + "loss": 0.4997, + "num_input_tokens_seen": 198759072, + "step": 3548 + }, + { + "epoch": 7.902004454342984, + "loss": 0.4457376301288605, + "loss_ce": 0.004087251145392656, + "loss_iou": 0.1796875, + "loss_num": 0.016357421875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 198759072, + "step": 3548 + }, + { + "epoch": 7.904231625835189, + "grad_norm": 15.332737922668457, + "learning_rate": 1e-06, + "loss": 0.9554, + "num_input_tokens_seen": 198815520, + "step": 3549 + }, + { + "epoch": 7.904231625835189, + "loss": 0.7602176666259766, + "loss_ce": 0.00020788329129572958, + "loss_iou": 0.322265625, + "loss_num": 0.0235595703125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 198815520, + "step": 3549 + }, + { + "epoch": 7.906458797327394, + "grad_norm": 17.26343536376953, + "learning_rate": 1e-06, + "loss": 0.6921, + "num_input_tokens_seen": 198871008, + "step": 3550 + }, + { + "epoch": 7.906458797327394, + "loss": 0.7331159710884094, + "loss_ce": 0.00020580444834195077, + "loss_iou": 0.30078125, + "loss_num": 0.026123046875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 198871008, + "step": 3550 + }, + { + "epoch": 7.908685968819599, + "grad_norm": 18.0777587890625, + "learning_rate": 1e-06, + "loss": 0.9367, + "num_input_tokens_seen": 198927764, + "step": 3551 + }, + { + "epoch": 7.908685968819599, + "loss": 0.9931344985961914, + "loss_ce": 0.0002145655162166804, + "loss_iou": 0.4140625, + "loss_num": 0.032958984375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 198927764, + "step": 3551 + }, + { + "epoch": 7.910913140311804, + "grad_norm": 15.072328567504883, + "learning_rate": 1e-06, + "loss": 0.4566, + "num_input_tokens_seen": 198983208, + "step": 3552 + }, + { + "epoch": 7.910913140311804, + "loss": 0.3726545572280884, + "loss_ce": 0.00021803012350574136, + "loss_iou": 0.1689453125, + "loss_num": 0.00689697265625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 198983208, + "step": 3552 + }, + { + "epoch": 7.913140311804009, + "grad_norm": 23.05716896057129, + "learning_rate": 1e-06, + "loss": 0.541, + "num_input_tokens_seen": 199038544, + "step": 3553 + }, + { + "epoch": 7.913140311804009, + "loss": 0.4792807102203369, + "loss_ce": 0.000154730340000242, + "loss_iou": 0.2138671875, + "loss_num": 0.0101318359375, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 199038544, + "step": 3553 + }, + { + "epoch": 7.9153674832962135, + "grad_norm": 14.205634117126465, + "learning_rate": 1e-06, + "loss": 0.511, + "num_input_tokens_seen": 199093300, + "step": 3554 + }, + { + "epoch": 7.9153674832962135, + "loss": 0.5586905479431152, + "loss_ce": 0.00021891451615374535, + "loss_iou": 0.21875, + "loss_num": 0.0242919921875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 199093300, + "step": 3554 + }, + { + "epoch": 7.917594654788418, + "grad_norm": 20.449451446533203, + "learning_rate": 1e-06, + "loss": 0.529, + "num_input_tokens_seen": 199151908, + "step": 3555 + }, + { + "epoch": 7.917594654788418, + "loss": 0.3755338788032532, + "loss_ce": 0.0001676726678851992, + "loss_iou": 0.1689453125, + "loss_num": 0.00750732421875, + "loss_xval": 0.375, + "num_input_tokens_seen": 199151908, + "step": 3555 + }, + { + "epoch": 7.919821826280623, + "grad_norm": 24.3996639251709, + "learning_rate": 1e-06, + "loss": 0.775, + "num_input_tokens_seen": 199204824, + "step": 3556 + }, + { + "epoch": 7.919821826280623, + "loss": 0.6280686855316162, + "loss_ce": 0.00016949971904978156, + "loss_iou": 0.265625, + "loss_num": 0.01953125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 199204824, + "step": 3556 + }, + { + "epoch": 7.922048997772828, + "grad_norm": 16.553987503051758, + "learning_rate": 1e-06, + "loss": 0.7373, + "num_input_tokens_seen": 199259004, + "step": 3557 + }, + { + "epoch": 7.922048997772828, + "loss": 0.8635649681091309, + "loss_ce": 0.00016163403051905334, + "loss_iou": 0.3671875, + "loss_num": 0.0255126953125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 199259004, + "step": 3557 + }, + { + "epoch": 7.924276169265033, + "grad_norm": 20.715532302856445, + "learning_rate": 1e-06, + "loss": 0.5043, + "num_input_tokens_seen": 199314500, + "step": 3558 + }, + { + "epoch": 7.924276169265033, + "loss": 0.5213112235069275, + "loss_ce": 0.00019308787886984646, + "loss_iou": 0.234375, + "loss_num": 0.01043701171875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 199314500, + "step": 3558 + }, + { + "epoch": 7.926503340757238, + "grad_norm": 33.767356872558594, + "learning_rate": 1e-06, + "loss": 0.4129, + "num_input_tokens_seen": 199372168, + "step": 3559 + }, + { + "epoch": 7.926503340757238, + "loss": 0.31914597749710083, + "loss_ce": 0.0001762679312378168, + "loss_iou": 0.1484375, + "loss_num": 0.0042724609375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 199372168, + "step": 3559 + }, + { + "epoch": 7.928730512249444, + "grad_norm": 23.694744110107422, + "learning_rate": 1e-06, + "loss": 0.713, + "num_input_tokens_seen": 199425588, + "step": 3560 + }, + { + "epoch": 7.928730512249444, + "loss": 0.7644591331481934, + "loss_ce": 0.00042102765291929245, + "loss_iou": 0.302734375, + "loss_num": 0.031494140625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 199425588, + "step": 3560 + }, + { + "epoch": 7.9309576837416484, + "grad_norm": 22.84273338317871, + "learning_rate": 1e-06, + "loss": 0.6157, + "num_input_tokens_seen": 199483636, + "step": 3561 + }, + { + "epoch": 7.9309576837416484, + "loss": 0.646425724029541, + "loss_ce": 0.00018549279775470495, + "loss_iou": 0.275390625, + "loss_num": 0.01904296875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 199483636, + "step": 3561 + }, + { + "epoch": 7.933184855233853, + "grad_norm": 16.44258689880371, + "learning_rate": 1e-06, + "loss": 0.507, + "num_input_tokens_seen": 199538272, + "step": 3562 + }, + { + "epoch": 7.933184855233853, + "loss": 0.5787392854690552, + "loss_ce": 0.00024805517750792205, + "loss_iou": 0.2578125, + "loss_num": 0.0125732421875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 199538272, + "step": 3562 + }, + { + "epoch": 7.935412026726058, + "grad_norm": 20.36518669128418, + "learning_rate": 1e-06, + "loss": 0.6295, + "num_input_tokens_seen": 199596328, + "step": 3563 + }, + { + "epoch": 7.935412026726058, + "loss": 0.641238272190094, + "loss_ce": 0.00036911331699229777, + "loss_iou": 0.296875, + "loss_num": 0.009521484375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 199596328, + "step": 3563 + }, + { + "epoch": 7.937639198218263, + "grad_norm": 49.14147186279297, + "learning_rate": 1e-06, + "loss": 0.6839, + "num_input_tokens_seen": 199653252, + "step": 3564 + }, + { + "epoch": 7.937639198218263, + "loss": 0.6464834213256836, + "loss_ce": 0.00024316800408996642, + "loss_iou": 0.30078125, + "loss_num": 0.00927734375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 199653252, + "step": 3564 + }, + { + "epoch": 7.939866369710468, + "grad_norm": 16.839582443237305, + "learning_rate": 1e-06, + "loss": 0.6352, + "num_input_tokens_seen": 199708264, + "step": 3565 + }, + { + "epoch": 7.939866369710468, + "loss": 0.49529188871383667, + "loss_ce": 0.0001746913476381451, + "loss_iou": 0.212890625, + "loss_num": 0.01397705078125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 199708264, + "step": 3565 + }, + { + "epoch": 7.942093541202673, + "grad_norm": 16.844385147094727, + "learning_rate": 1e-06, + "loss": 0.6126, + "num_input_tokens_seen": 199762920, + "step": 3566 + }, + { + "epoch": 7.942093541202673, + "loss": 0.6229934096336365, + "loss_ce": 0.00019066702225245535, + "loss_iou": 0.263671875, + "loss_num": 0.0194091796875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 199762920, + "step": 3566 + }, + { + "epoch": 7.944320712694878, + "grad_norm": 23.8248233795166, + "learning_rate": 1e-06, + "loss": 0.6024, + "num_input_tokens_seen": 199820520, + "step": 3567 + }, + { + "epoch": 7.944320712694878, + "loss": 0.6216274499893188, + "loss_ce": 0.00016751314979046583, + "loss_iou": 0.26953125, + "loss_num": 0.0164794921875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 199820520, + "step": 3567 + }, + { + "epoch": 7.9465478841870825, + "grad_norm": 13.384178161621094, + "learning_rate": 1e-06, + "loss": 0.6889, + "num_input_tokens_seen": 199874968, + "step": 3568 + }, + { + "epoch": 7.9465478841870825, + "loss": 0.6415170431137085, + "loss_ce": 0.00022065843222662807, + "loss_iou": 0.259765625, + "loss_num": 0.0242919921875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 199874968, + "step": 3568 + }, + { + "epoch": 7.948775055679287, + "grad_norm": 17.003244400024414, + "learning_rate": 1e-06, + "loss": 0.7046, + "num_input_tokens_seen": 199931032, + "step": 3569 + }, + { + "epoch": 7.948775055679287, + "loss": 0.7286696434020996, + "loss_ce": 0.00015403382712975144, + "loss_iou": 0.326171875, + "loss_num": 0.0150146484375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 199931032, + "step": 3569 + }, + { + "epoch": 7.951002227171492, + "grad_norm": 18.784358978271484, + "learning_rate": 1e-06, + "loss": 0.6467, + "num_input_tokens_seen": 199986240, + "step": 3570 + }, + { + "epoch": 7.951002227171492, + "loss": 0.711881160736084, + "loss_ce": 0.00021116853167768568, + "loss_iou": 0.3203125, + "loss_num": 0.01458740234375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 199986240, + "step": 3570 + }, + { + "epoch": 7.953229398663697, + "grad_norm": 25.438982009887695, + "learning_rate": 1e-06, + "loss": 0.6809, + "num_input_tokens_seen": 200041048, + "step": 3571 + }, + { + "epoch": 7.953229398663697, + "loss": 0.8664193153381348, + "loss_ce": 0.00020840237266384065, + "loss_iou": 0.34375, + "loss_num": 0.03564453125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 200041048, + "step": 3571 + }, + { + "epoch": 7.955456570155902, + "grad_norm": 18.567123413085938, + "learning_rate": 1e-06, + "loss": 0.6805, + "num_input_tokens_seen": 200098136, + "step": 3572 + }, + { + "epoch": 7.955456570155902, + "loss": 0.8434309363365173, + "loss_ce": 0.0001692144141998142, + "loss_iou": 0.35546875, + "loss_num": 0.02685546875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 200098136, + "step": 3572 + }, + { + "epoch": 7.957683741648107, + "grad_norm": 271.4332275390625, + "learning_rate": 1e-06, + "loss": 0.6131, + "num_input_tokens_seen": 200152360, + "step": 3573 + }, + { + "epoch": 7.957683741648107, + "loss": 0.7838395833969116, + "loss_ce": 0.00014815322356298566, + "loss_iou": 0.3359375, + "loss_num": 0.0225830078125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 200152360, + "step": 3573 + }, + { + "epoch": 7.959910913140312, + "grad_norm": 18.194717407226562, + "learning_rate": 1e-06, + "loss": 0.5837, + "num_input_tokens_seen": 200208800, + "step": 3574 + }, + { + "epoch": 7.959910913140312, + "loss": 0.3856315314769745, + "loss_ce": 0.00013348979700822383, + "loss_iou": 0.15625, + "loss_num": 0.0146484375, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 200208800, + "step": 3574 + }, + { + "epoch": 7.9621380846325165, + "grad_norm": 23.228425979614258, + "learning_rate": 1e-06, + "loss": 0.8644, + "num_input_tokens_seen": 200265476, + "step": 3575 + }, + { + "epoch": 7.9621380846325165, + "loss": 0.6910591125488281, + "loss_ce": 0.0001411863195244223, + "loss_iou": 0.263671875, + "loss_num": 0.032958984375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 200265476, + "step": 3575 + }, + { + "epoch": 7.964365256124721, + "grad_norm": 28.046964645385742, + "learning_rate": 1e-06, + "loss": 0.6866, + "num_input_tokens_seen": 200322152, + "step": 3576 + }, + { + "epoch": 7.964365256124721, + "loss": 0.7025761604309082, + "loss_ce": 0.00018362130504101515, + "loss_iou": 0.294921875, + "loss_num": 0.0224609375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 200322152, + "step": 3576 + }, + { + "epoch": 7.966592427616926, + "grad_norm": 23.809234619140625, + "learning_rate": 1e-06, + "loss": 0.5926, + "num_input_tokens_seen": 200375664, + "step": 3577 + }, + { + "epoch": 7.966592427616926, + "loss": 0.6842349767684937, + "loss_ce": 0.00015298397920560092, + "loss_iou": 0.31640625, + "loss_num": 0.010498046875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 200375664, + "step": 3577 + }, + { + "epoch": 7.968819599109131, + "grad_norm": 35.44462966918945, + "learning_rate": 1e-06, + "loss": 0.6293, + "num_input_tokens_seen": 200430944, + "step": 3578 + }, + { + "epoch": 7.968819599109131, + "loss": 0.6284739375114441, + "loss_ce": 0.0001780486200004816, + "loss_iou": 0.28515625, + "loss_num": 0.01129150390625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 200430944, + "step": 3578 + }, + { + "epoch": 7.971046770601336, + "grad_norm": 22.342145919799805, + "learning_rate": 1e-06, + "loss": 0.5531, + "num_input_tokens_seen": 200486764, + "step": 3579 + }, + { + "epoch": 7.971046770601336, + "loss": 0.5724341869354248, + "loss_ce": 0.0001685347524471581, + "loss_iou": 0.259765625, + "loss_num": 0.01055908203125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 200486764, + "step": 3579 + }, + { + "epoch": 7.973273942093542, + "grad_norm": 18.398874282836914, + "learning_rate": 1e-06, + "loss": 0.641, + "num_input_tokens_seen": 200544420, + "step": 3580 + }, + { + "epoch": 7.973273942093542, + "loss": 0.6851996779441833, + "loss_ce": 0.0001410768600180745, + "loss_iou": 0.28515625, + "loss_num": 0.0228271484375, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 200544420, + "step": 3580 + }, + { + "epoch": 7.9755011135857465, + "grad_norm": 12.79372787475586, + "learning_rate": 1e-06, + "loss": 0.3946, + "num_input_tokens_seen": 200601156, + "step": 3581 + }, + { + "epoch": 7.9755011135857465, + "loss": 0.40876662731170654, + "loss_ce": 0.0010518098715692759, + "loss_iou": 0.177734375, + "loss_num": 0.01043701171875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 200601156, + "step": 3581 + }, + { + "epoch": 7.977728285077951, + "grad_norm": 15.637197494506836, + "learning_rate": 1e-06, + "loss": 0.8963, + "num_input_tokens_seen": 200657656, + "step": 3582 + }, + { + "epoch": 7.977728285077951, + "loss": 1.1127707958221436, + "loss_ce": 0.0002219329762738198, + "loss_iou": 0.4453125, + "loss_num": 0.043701171875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 200657656, + "step": 3582 + }, + { + "epoch": 7.979955456570156, + "grad_norm": 19.720239639282227, + "learning_rate": 1e-06, + "loss": 0.7492, + "num_input_tokens_seen": 200713320, + "step": 3583 + }, + { + "epoch": 7.979955456570156, + "loss": 0.5831568241119385, + "loss_ce": 0.000149025785503909, + "loss_iou": 0.251953125, + "loss_num": 0.0155029296875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 200713320, + "step": 3583 + }, + { + "epoch": 7.982182628062361, + "grad_norm": 13.927251815795898, + "learning_rate": 1e-06, + "loss": 0.371, + "num_input_tokens_seen": 200768308, + "step": 3584 + }, + { + "epoch": 7.982182628062361, + "loss": 0.3358955383300781, + "loss_ce": 0.00044634263031184673, + "loss_iou": 0.1416015625, + "loss_num": 0.01055908203125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 200768308, + "step": 3584 + }, + { + "epoch": 7.984409799554566, + "grad_norm": 19.705570220947266, + "learning_rate": 1e-06, + "loss": 0.5878, + "num_input_tokens_seen": 200821804, + "step": 3585 + }, + { + "epoch": 7.984409799554566, + "loss": 0.4889172911643982, + "loss_ce": 0.00020879982912447304, + "loss_iou": 0.212890625, + "loss_num": 0.01275634765625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 200821804, + "step": 3585 + }, + { + "epoch": 7.986636971046771, + "grad_norm": 125.53588104248047, + "learning_rate": 1e-06, + "loss": 0.778, + "num_input_tokens_seen": 200880704, + "step": 3586 + }, + { + "epoch": 7.986636971046771, + "loss": 0.7231355905532837, + "loss_ce": 0.00023522632545791566, + "loss_iou": 0.296875, + "loss_num": 0.0257568359375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 200880704, + "step": 3586 + }, + { + "epoch": 7.988864142538976, + "grad_norm": 15.870854377746582, + "learning_rate": 1e-06, + "loss": 0.7217, + "num_input_tokens_seen": 200937412, + "step": 3587 + }, + { + "epoch": 7.988864142538976, + "loss": 0.3839578628540039, + "loss_ce": 0.00022984019597060978, + "loss_iou": 0.1318359375, + "loss_num": 0.02392578125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 200937412, + "step": 3587 + }, + { + "epoch": 7.991091314031181, + "grad_norm": 17.978586196899414, + "learning_rate": 1e-06, + "loss": 0.495, + "num_input_tokens_seen": 200992028, + "step": 3588 + }, + { + "epoch": 7.991091314031181, + "loss": 0.49199825525283813, + "loss_ce": 0.0001769806258380413, + "loss_iou": 0.2138671875, + "loss_num": 0.0126953125, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 200992028, + "step": 3588 + }, + { + "epoch": 7.993318485523385, + "grad_norm": 18.851573944091797, + "learning_rate": 1e-06, + "loss": 0.6107, + "num_input_tokens_seen": 201049456, + "step": 3589 + }, + { + "epoch": 7.993318485523385, + "loss": 0.6161330938339233, + "loss_ce": 0.00016627684817649424, + "loss_iou": 0.26171875, + "loss_num": 0.01904296875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 201049456, + "step": 3589 + }, + { + "epoch": 7.99554565701559, + "grad_norm": 24.63906478881836, + "learning_rate": 1e-06, + "loss": 0.6317, + "num_input_tokens_seen": 201104932, + "step": 3590 + }, + { + "epoch": 7.99554565701559, + "loss": 0.6456716060638428, + "loss_ce": 0.00016378730651922524, + "loss_iou": 0.298828125, + "loss_num": 0.00946044921875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 201104932, + "step": 3590 + }, + { + "epoch": 7.997772828507795, + "grad_norm": 19.594945907592773, + "learning_rate": 1e-06, + "loss": 0.6995, + "num_input_tokens_seen": 201161408, + "step": 3591 + }, + { + "epoch": 7.997772828507795, + "loss": 0.6261618137359619, + "loss_ce": 0.00018528125656303018, + "loss_iou": 0.2734375, + "loss_num": 0.015869140625, + "loss_xval": 0.625, + "num_input_tokens_seen": 201161408, + "step": 3591 + }, + { + "epoch": 8.0, + "grad_norm": 13.603879928588867, + "learning_rate": 1e-06, + "loss": 0.6194, + "num_input_tokens_seen": 201216936, + "step": 3592 + }, + { + "epoch": 8.0, + "loss": 0.7476929426193237, + "loss_ce": 0.00013434665743261576, + "loss_iou": 0.33203125, + "loss_num": 0.0172119140625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 201216936, + "step": 3592 + }, + { + "epoch": 8.002227171492205, + "grad_norm": 18.30463409423828, + "learning_rate": 1e-06, + "loss": 0.7265, + "num_input_tokens_seen": 201274140, + "step": 3593 + }, + { + "epoch": 8.002227171492205, + "loss": 0.8002690076828003, + "loss_ce": 0.00022022916527930647, + "loss_iou": 0.353515625, + "loss_num": 0.0185546875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 201274140, + "step": 3593 + }, + { + "epoch": 8.00445434298441, + "grad_norm": 21.524507522583008, + "learning_rate": 1e-06, + "loss": 0.7233, + "num_input_tokens_seen": 201330520, + "step": 3594 + }, + { + "epoch": 8.00445434298441, + "loss": 0.7201758027076721, + "loss_ce": 0.00020510726608335972, + "loss_iou": 0.333984375, + "loss_num": 0.01055908203125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 201330520, + "step": 3594 + }, + { + "epoch": 8.006681514476615, + "grad_norm": 23.734619140625, + "learning_rate": 1e-06, + "loss": 0.8149, + "num_input_tokens_seen": 201386452, + "step": 3595 + }, + { + "epoch": 8.006681514476615, + "loss": 0.6124993562698364, + "loss_ce": 0.00019466172670945525, + "loss_iou": 0.2734375, + "loss_num": 0.01312255859375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 201386452, + "step": 3595 + }, + { + "epoch": 8.00890868596882, + "grad_norm": 20.260971069335938, + "learning_rate": 1e-06, + "loss": 0.8065, + "num_input_tokens_seen": 201444920, + "step": 3596 + }, + { + "epoch": 8.00890868596882, + "loss": 0.48429691791534424, + "loss_ce": 0.00016606590361334383, + "loss_iou": 0.20703125, + "loss_num": 0.01397705078125, + "loss_xval": 0.484375, + "num_input_tokens_seen": 201444920, + "step": 3596 + }, + { + "epoch": 8.011135857461024, + "grad_norm": 13.375497817993164, + "learning_rate": 1e-06, + "loss": 0.4723, + "num_input_tokens_seen": 201504192, + "step": 3597 + }, + { + "epoch": 8.011135857461024, + "loss": 0.42642343044281006, + "loss_ce": 0.00015389773761853576, + "loss_iou": 0.1943359375, + "loss_num": 0.0074462890625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 201504192, + "step": 3597 + }, + { + "epoch": 8.01336302895323, + "grad_norm": 13.759163856506348, + "learning_rate": 1e-06, + "loss": 0.4227, + "num_input_tokens_seen": 201560760, + "step": 3598 + }, + { + "epoch": 8.01336302895323, + "loss": 0.5462967157363892, + "loss_ce": 0.0002762216317933053, + "loss_iou": 0.2431640625, + "loss_num": 0.01190185546875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 201560760, + "step": 3598 + }, + { + "epoch": 8.015590200445434, + "grad_norm": 13.41096305847168, + "learning_rate": 1e-06, + "loss": 0.6107, + "num_input_tokens_seen": 201614240, + "step": 3599 + }, + { + "epoch": 8.015590200445434, + "loss": 0.5510572791099548, + "loss_ce": 0.0001539345394121483, + "loss_iou": 0.2470703125, + "loss_num": 0.01123046875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 201614240, + "step": 3599 + }, + { + "epoch": 8.017817371937639, + "grad_norm": 23.751489639282227, + "learning_rate": 1e-06, + "loss": 0.6214, + "num_input_tokens_seen": 201672716, + "step": 3600 + }, + { + "epoch": 8.017817371937639, + "loss": 0.7004799842834473, + "loss_ce": 0.00040674611227586865, + "loss_iou": 0.314453125, + "loss_num": 0.0142822265625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 201672716, + "step": 3600 + }, + { + "epoch": 8.020044543429844, + "grad_norm": 14.586628913879395, + "learning_rate": 1e-06, + "loss": 0.7327, + "num_input_tokens_seen": 201729168, + "step": 3601 + }, + { + "epoch": 8.020044543429844, + "loss": 0.636683464050293, + "loss_ce": 0.00020884581317659467, + "loss_iou": 0.263671875, + "loss_num": 0.021728515625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 201729168, + "step": 3601 + }, + { + "epoch": 8.022271714922049, + "grad_norm": 19.21128273010254, + "learning_rate": 1e-06, + "loss": 0.5544, + "num_input_tokens_seen": 201784708, + "step": 3602 + }, + { + "epoch": 8.022271714922049, + "loss": 0.4761182367801666, + "loss_ce": 0.00016609346494078636, + "loss_iou": 0.2001953125, + "loss_num": 0.0152587890625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 201784708, + "step": 3602 + }, + { + "epoch": 8.024498886414253, + "grad_norm": 16.912395477294922, + "learning_rate": 1e-06, + "loss": 0.671, + "num_input_tokens_seen": 201843236, + "step": 3603 + }, + { + "epoch": 8.024498886414253, + "loss": 0.7302785515785217, + "loss_ce": 0.00017601058061700314, + "loss_iou": 0.3046875, + "loss_num": 0.023681640625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 201843236, + "step": 3603 + }, + { + "epoch": 8.026726057906458, + "grad_norm": 18.214143753051758, + "learning_rate": 1e-06, + "loss": 0.6397, + "num_input_tokens_seen": 201897720, + "step": 3604 + }, + { + "epoch": 8.026726057906458, + "loss": 0.41655105352401733, + "loss_ce": 0.00016921485075727105, + "loss_iou": 0.19140625, + "loss_num": 0.006988525390625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 201897720, + "step": 3604 + }, + { + "epoch": 8.028953229398663, + "grad_norm": 18.966310501098633, + "learning_rate": 1e-06, + "loss": 0.6243, + "num_input_tokens_seen": 201953360, + "step": 3605 + }, + { + "epoch": 8.028953229398663, + "loss": 0.5837928056716919, + "loss_ce": 0.00017461951938457787, + "loss_iou": 0.232421875, + "loss_num": 0.0238037109375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 201953360, + "step": 3605 + }, + { + "epoch": 8.031180400890868, + "grad_norm": 18.62992286682129, + "learning_rate": 1e-06, + "loss": 0.6118, + "num_input_tokens_seen": 202008704, + "step": 3606 + }, + { + "epoch": 8.031180400890868, + "loss": 0.5211237668991089, + "loss_ce": 0.0003718439256772399, + "loss_iou": 0.216796875, + "loss_num": 0.0177001953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 202008704, + "step": 3606 + }, + { + "epoch": 8.033407572383073, + "grad_norm": 24.204988479614258, + "learning_rate": 1e-06, + "loss": 0.5671, + "num_input_tokens_seen": 202068704, + "step": 3607 + }, + { + "epoch": 8.033407572383073, + "loss": 0.4836837947368622, + "loss_ce": 0.0005294845905154943, + "loss_iou": 0.1923828125, + "loss_num": 0.01953125, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 202068704, + "step": 3607 + }, + { + "epoch": 8.035634743875278, + "grad_norm": 13.87259292602539, + "learning_rate": 1e-06, + "loss": 0.6244, + "num_input_tokens_seen": 202123772, + "step": 3608 + }, + { + "epoch": 8.035634743875278, + "loss": 0.5231030583381653, + "loss_ce": 0.0001537989010103047, + "loss_iou": 0.220703125, + "loss_num": 0.01611328125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 202123772, + "step": 3608 + }, + { + "epoch": 8.037861915367483, + "grad_norm": 23.374591827392578, + "learning_rate": 1e-06, + "loss": 0.7716, + "num_input_tokens_seen": 202179184, + "step": 3609 + }, + { + "epoch": 8.037861915367483, + "loss": 0.7146378755569458, + "loss_ce": 0.00016029010294005275, + "loss_iou": 0.310546875, + "loss_num": 0.0185546875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 202179184, + "step": 3609 + }, + { + "epoch": 8.040089086859687, + "grad_norm": 18.180028915405273, + "learning_rate": 1e-06, + "loss": 0.6477, + "num_input_tokens_seen": 202234064, + "step": 3610 + }, + { + "epoch": 8.040089086859687, + "loss": 0.8393857479095459, + "loss_ce": 0.0001523745886515826, + "loss_iou": 0.359375, + "loss_num": 0.024169921875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 202234064, + "step": 3610 + }, + { + "epoch": 8.042316258351892, + "grad_norm": 22.855134963989258, + "learning_rate": 1e-06, + "loss": 0.565, + "num_input_tokens_seen": 202290848, + "step": 3611 + }, + { + "epoch": 8.042316258351892, + "loss": 0.4523416757583618, + "loss_ce": 0.0001932748273247853, + "loss_iou": 0.193359375, + "loss_num": 0.01300048828125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 202290848, + "step": 3611 + }, + { + "epoch": 8.044543429844097, + "grad_norm": 22.91286849975586, + "learning_rate": 1e-06, + "loss": 0.7699, + "num_input_tokens_seen": 202349456, + "step": 3612 + }, + { + "epoch": 8.044543429844097, + "loss": 0.7767907381057739, + "loss_ce": 0.0001794241979951039, + "loss_iou": 0.322265625, + "loss_num": 0.0267333984375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 202349456, + "step": 3612 + }, + { + "epoch": 8.046770601336302, + "grad_norm": 22.00181770324707, + "learning_rate": 1e-06, + "loss": 0.5741, + "num_input_tokens_seen": 202406208, + "step": 3613 + }, + { + "epoch": 8.046770601336302, + "loss": 0.6100356578826904, + "loss_ce": 0.00017238240980077535, + "loss_iou": 0.2451171875, + "loss_num": 0.0238037109375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 202406208, + "step": 3613 + }, + { + "epoch": 8.048997772828507, + "grad_norm": 23.352352142333984, + "learning_rate": 1e-06, + "loss": 0.4346, + "num_input_tokens_seen": 202461324, + "step": 3614 + }, + { + "epoch": 8.048997772828507, + "loss": 0.4557119607925415, + "loss_ce": 0.00014553280198015273, + "loss_iou": 0.1845703125, + "loss_num": 0.0172119140625, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 202461324, + "step": 3614 + }, + { + "epoch": 8.051224944320714, + "grad_norm": 24.288408279418945, + "learning_rate": 1e-06, + "loss": 0.5669, + "num_input_tokens_seen": 202520016, + "step": 3615 + }, + { + "epoch": 8.051224944320714, + "loss": 0.8215472102165222, + "loss_ce": 0.0006243445095606148, + "loss_iou": 0.33203125, + "loss_num": 0.0311279296875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 202520016, + "step": 3615 + }, + { + "epoch": 8.053452115812918, + "grad_norm": 18.481889724731445, + "learning_rate": 1e-06, + "loss": 0.44, + "num_input_tokens_seen": 202576916, + "step": 3616 + }, + { + "epoch": 8.053452115812918, + "loss": 0.38955333828926086, + "loss_ce": 0.0001490543072577566, + "loss_iou": 0.1708984375, + "loss_num": 0.0096435546875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 202576916, + "step": 3616 + }, + { + "epoch": 8.055679287305123, + "grad_norm": 23.070777893066406, + "learning_rate": 1e-06, + "loss": 0.6434, + "num_input_tokens_seen": 202633128, + "step": 3617 + }, + { + "epoch": 8.055679287305123, + "loss": 0.6288352608680725, + "loss_ce": 0.00017316042794845998, + "loss_iou": 0.267578125, + "loss_num": 0.0186767578125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 202633128, + "step": 3617 + }, + { + "epoch": 8.057906458797328, + "grad_norm": 21.540128707885742, + "learning_rate": 1e-06, + "loss": 0.5371, + "num_input_tokens_seen": 202686364, + "step": 3618 + }, + { + "epoch": 8.057906458797328, + "loss": 0.5090727210044861, + "loss_ce": 0.0001615592191228643, + "loss_iou": 0.224609375, + "loss_num": 0.01177978515625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 202686364, + "step": 3618 + }, + { + "epoch": 8.060133630289533, + "grad_norm": 29.778921127319336, + "learning_rate": 1e-06, + "loss": 0.8185, + "num_input_tokens_seen": 202740904, + "step": 3619 + }, + { + "epoch": 8.060133630289533, + "loss": 0.6977142691612244, + "loss_ce": 0.000204495110665448, + "loss_iou": 0.30078125, + "loss_num": 0.0194091796875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 202740904, + "step": 3619 + }, + { + "epoch": 8.062360801781738, + "grad_norm": 15.424278259277344, + "learning_rate": 1e-06, + "loss": 0.5548, + "num_input_tokens_seen": 202796944, + "step": 3620 + }, + { + "epoch": 8.062360801781738, + "loss": 0.6742639541625977, + "loss_ce": 0.00019166519632562995, + "loss_iou": 0.279296875, + "loss_num": 0.023193359375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 202796944, + "step": 3620 + }, + { + "epoch": 8.064587973273943, + "grad_norm": 15.881271362304688, + "learning_rate": 1e-06, + "loss": 0.6079, + "num_input_tokens_seen": 202852488, + "step": 3621 + }, + { + "epoch": 8.064587973273943, + "loss": 0.6368354558944702, + "loss_ce": 0.00017772393766790628, + "loss_iou": 0.29296875, + "loss_num": 0.01031494140625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 202852488, + "step": 3621 + }, + { + "epoch": 8.066815144766148, + "grad_norm": 20.29325294494629, + "learning_rate": 1e-06, + "loss": 0.7275, + "num_input_tokens_seen": 202905252, + "step": 3622 + }, + { + "epoch": 8.066815144766148, + "loss": 0.5565529465675354, + "loss_ce": 0.00015647773398086429, + "loss_iou": 0.234375, + "loss_num": 0.0174560546875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 202905252, + "step": 3622 + }, + { + "epoch": 8.069042316258352, + "grad_norm": 18.05845069885254, + "learning_rate": 1e-06, + "loss": 0.5679, + "num_input_tokens_seen": 202962516, + "step": 3623 + }, + { + "epoch": 8.069042316258352, + "loss": 0.5240879058837891, + "loss_ce": 0.0001620947732590139, + "loss_iou": 0.2314453125, + "loss_num": 0.01239013671875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 202962516, + "step": 3623 + }, + { + "epoch": 8.071269487750557, + "grad_norm": 16.5575008392334, + "learning_rate": 1e-06, + "loss": 0.3642, + "num_input_tokens_seen": 203018252, + "step": 3624 + }, + { + "epoch": 8.071269487750557, + "loss": 0.23819105327129364, + "loss_ce": 0.00015395878290291876, + "loss_iou": 0.10693359375, + "loss_num": 0.00482177734375, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 203018252, + "step": 3624 + }, + { + "epoch": 8.073496659242762, + "grad_norm": 14.785902976989746, + "learning_rate": 1e-06, + "loss": 0.7054, + "num_input_tokens_seen": 203075692, + "step": 3625 + }, + { + "epoch": 8.073496659242762, + "loss": 0.8658159971237183, + "loss_ce": 0.0003374355146661401, + "loss_iou": 0.34765625, + "loss_num": 0.0341796875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 203075692, + "step": 3625 + }, + { + "epoch": 8.075723830734967, + "grad_norm": 30.066858291625977, + "learning_rate": 1e-06, + "loss": 0.6224, + "num_input_tokens_seen": 203132688, + "step": 3626 + }, + { + "epoch": 8.075723830734967, + "loss": 0.5541576743125916, + "loss_ce": 0.00020257396681699902, + "loss_iou": 0.21875, + "loss_num": 0.0233154296875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 203132688, + "step": 3626 + }, + { + "epoch": 8.077951002227172, + "grad_norm": 13.862289428710938, + "learning_rate": 1e-06, + "loss": 0.6046, + "num_input_tokens_seen": 203189732, + "step": 3627 + }, + { + "epoch": 8.077951002227172, + "loss": 0.6278433203697205, + "loss_ce": 0.00015777646331116557, + "loss_iou": 0.2578125, + "loss_num": 0.0220947265625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 203189732, + "step": 3627 + }, + { + "epoch": 8.080178173719377, + "grad_norm": 14.706859588623047, + "learning_rate": 1e-06, + "loss": 0.5169, + "num_input_tokens_seen": 203247688, + "step": 3628 + }, + { + "epoch": 8.080178173719377, + "loss": 0.48699110746383667, + "loss_ce": 0.00017472410399932414, + "loss_iou": 0.220703125, + "loss_num": 0.00909423828125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 203247688, + "step": 3628 + }, + { + "epoch": 8.082405345211582, + "grad_norm": 15.710479736328125, + "learning_rate": 1e-06, + "loss": 0.661, + "num_input_tokens_seen": 203304544, + "step": 3629 + }, + { + "epoch": 8.082405345211582, + "loss": 0.6681832075119019, + "loss_ce": 0.00021445300080813468, + "loss_iou": 0.271484375, + "loss_num": 0.0247802734375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 203304544, + "step": 3629 + }, + { + "epoch": 8.084632516703786, + "grad_norm": 64.82049560546875, + "learning_rate": 1e-06, + "loss": 0.6353, + "num_input_tokens_seen": 203358916, + "step": 3630 + }, + { + "epoch": 8.084632516703786, + "loss": 0.7306484580039978, + "loss_ce": 0.00024074350949376822, + "loss_iou": 0.314453125, + "loss_num": 0.0203857421875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 203358916, + "step": 3630 + }, + { + "epoch": 8.086859688195991, + "grad_norm": 17.783905029296875, + "learning_rate": 1e-06, + "loss": 0.558, + "num_input_tokens_seen": 203414864, + "step": 3631 + }, + { + "epoch": 8.086859688195991, + "loss": 0.6720456480979919, + "loss_ce": 0.00017064949497580528, + "loss_iou": 0.298828125, + "loss_num": 0.01458740234375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 203414864, + "step": 3631 + }, + { + "epoch": 8.089086859688196, + "grad_norm": 13.846236228942871, + "learning_rate": 1e-06, + "loss": 0.5735, + "num_input_tokens_seen": 203469972, + "step": 3632 + }, + { + "epoch": 8.089086859688196, + "loss": 0.61847984790802, + "loss_ce": 0.00019372851238586009, + "loss_iou": 0.27734375, + "loss_num": 0.01312255859375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 203469972, + "step": 3632 + }, + { + "epoch": 8.091314031180401, + "grad_norm": 49.66973876953125, + "learning_rate": 1e-06, + "loss": 0.7761, + "num_input_tokens_seen": 203528200, + "step": 3633 + }, + { + "epoch": 8.091314031180401, + "loss": 0.869318962097168, + "loss_ce": 0.00017833446327131242, + "loss_iou": 0.365234375, + "loss_num": 0.0274658203125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 203528200, + "step": 3633 + }, + { + "epoch": 8.093541202672606, + "grad_norm": 23.811777114868164, + "learning_rate": 1e-06, + "loss": 0.4735, + "num_input_tokens_seen": 203584436, + "step": 3634 + }, + { + "epoch": 8.093541202672606, + "loss": 0.4863722026348114, + "loss_ce": 0.00016612093895673752, + "loss_iou": 0.21875, + "loss_num": 0.00982666015625, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 203584436, + "step": 3634 + }, + { + "epoch": 8.09576837416481, + "grad_norm": 15.80212116241455, + "learning_rate": 1e-06, + "loss": 0.5608, + "num_input_tokens_seen": 203641428, + "step": 3635 + }, + { + "epoch": 8.09576837416481, + "loss": 0.48964816331863403, + "loss_ce": 0.00014622273738496006, + "loss_iou": 0.2099609375, + "loss_num": 0.0140380859375, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 203641428, + "step": 3635 + }, + { + "epoch": 8.097995545657016, + "grad_norm": 24.546077728271484, + "learning_rate": 1e-06, + "loss": 0.6726, + "num_input_tokens_seen": 203693980, + "step": 3636 + }, + { + "epoch": 8.097995545657016, + "loss": 0.569033682346344, + "loss_ce": 0.0001860179763752967, + "loss_iou": 0.2578125, + "loss_num": 0.01104736328125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 203693980, + "step": 3636 + }, + { + "epoch": 8.10022271714922, + "grad_norm": 45.66901397705078, + "learning_rate": 1e-06, + "loss": 0.8868, + "num_input_tokens_seen": 203747768, + "step": 3637 + }, + { + "epoch": 8.10022271714922, + "loss": 0.6677889227867126, + "loss_ce": 0.00018637420726008713, + "loss_iou": 0.291015625, + "loss_num": 0.016845703125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 203747768, + "step": 3637 + }, + { + "epoch": 8.102449888641425, + "grad_norm": 25.512130737304688, + "learning_rate": 1e-06, + "loss": 0.5878, + "num_input_tokens_seen": 203805264, + "step": 3638 + }, + { + "epoch": 8.102449888641425, + "loss": 0.47341108322143555, + "loss_ce": 0.00014449466834776103, + "loss_iou": 0.201171875, + "loss_num": 0.01422119140625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 203805264, + "step": 3638 + }, + { + "epoch": 8.10467706013363, + "grad_norm": 16.746042251586914, + "learning_rate": 1e-06, + "loss": 0.5147, + "num_input_tokens_seen": 203862852, + "step": 3639 + }, + { + "epoch": 8.10467706013363, + "loss": 0.5026716589927673, + "loss_ce": 0.0003523434279486537, + "loss_iou": 0.2060546875, + "loss_num": 0.0179443359375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 203862852, + "step": 3639 + }, + { + "epoch": 8.106904231625835, + "grad_norm": 14.684697151184082, + "learning_rate": 1e-06, + "loss": 0.6076, + "num_input_tokens_seen": 203920432, + "step": 3640 + }, + { + "epoch": 8.106904231625835, + "loss": 0.7979079484939575, + "loss_ce": 0.00017849741561803967, + "loss_iou": 0.30859375, + "loss_num": 0.035888671875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 203920432, + "step": 3640 + }, + { + "epoch": 8.10913140311804, + "grad_norm": 29.783206939697266, + "learning_rate": 1e-06, + "loss": 0.6487, + "num_input_tokens_seen": 203977000, + "step": 3641 + }, + { + "epoch": 8.10913140311804, + "loss": 0.7514079809188843, + "loss_ce": 0.00043139857007190585, + "loss_iou": 0.34375, + "loss_num": 0.012451171875, + "loss_xval": 0.75, + "num_input_tokens_seen": 203977000, + "step": 3641 + }, + { + "epoch": 8.111358574610245, + "grad_norm": 17.605884552001953, + "learning_rate": 1e-06, + "loss": 0.5144, + "num_input_tokens_seen": 204034360, + "step": 3642 + }, + { + "epoch": 8.111358574610245, + "loss": 0.6749836206436157, + "loss_ce": 0.00017892984033096582, + "loss_iou": 0.30859375, + "loss_num": 0.01153564453125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 204034360, + "step": 3642 + }, + { + "epoch": 8.11358574610245, + "grad_norm": 17.724246978759766, + "learning_rate": 1e-06, + "loss": 0.6901, + "num_input_tokens_seen": 204093164, + "step": 3643 + }, + { + "epoch": 8.11358574610245, + "loss": 0.8570829033851624, + "loss_ce": 0.00033243943471461535, + "loss_iou": 0.365234375, + "loss_num": 0.0250244140625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 204093164, + "step": 3643 + }, + { + "epoch": 8.115812917594655, + "grad_norm": 23.620380401611328, + "learning_rate": 1e-06, + "loss": 0.6023, + "num_input_tokens_seen": 204147504, + "step": 3644 + }, + { + "epoch": 8.115812917594655, + "loss": 0.5590322017669678, + "loss_ce": 0.0001943043462233618, + "loss_iou": 0.25, + "loss_num": 0.0113525390625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 204147504, + "step": 3644 + }, + { + "epoch": 8.11804008908686, + "grad_norm": 20.3369140625, + "learning_rate": 1e-06, + "loss": 0.5411, + "num_input_tokens_seen": 204204044, + "step": 3645 + }, + { + "epoch": 8.11804008908686, + "loss": 0.5622153878211975, + "loss_ce": 0.00020367707475088537, + "loss_iou": 0.22265625, + "loss_num": 0.0233154296875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 204204044, + "step": 3645 + }, + { + "epoch": 8.120267260579064, + "grad_norm": 20.101959228515625, + "learning_rate": 1e-06, + "loss": 0.5644, + "num_input_tokens_seen": 204262612, + "step": 3646 + }, + { + "epoch": 8.120267260579064, + "loss": 0.5232473611831665, + "loss_ce": 0.0001760848390404135, + "loss_iou": 0.228515625, + "loss_num": 0.01318359375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 204262612, + "step": 3646 + }, + { + "epoch": 8.122494432071269, + "grad_norm": 40.90657043457031, + "learning_rate": 1e-06, + "loss": 0.6132, + "num_input_tokens_seen": 204318668, + "step": 3647 + }, + { + "epoch": 8.122494432071269, + "loss": 0.5699893832206726, + "loss_ce": 0.00016517913900315762, + "loss_iou": 0.2294921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 204318668, + "step": 3647 + }, + { + "epoch": 8.124721603563474, + "grad_norm": 22.63620948791504, + "learning_rate": 1e-06, + "loss": 0.5056, + "num_input_tokens_seen": 204376132, + "step": 3648 + }, + { + "epoch": 8.124721603563474, + "loss": 0.478777140378952, + "loss_ce": 0.00013943444355390966, + "loss_iou": 0.2158203125, + "loss_num": 0.00921630859375, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 204376132, + "step": 3648 + }, + { + "epoch": 8.126948775055679, + "grad_norm": 13.594147682189941, + "learning_rate": 1e-06, + "loss": 0.4997, + "num_input_tokens_seen": 204435292, + "step": 3649 + }, + { + "epoch": 8.126948775055679, + "loss": 0.5463213920593262, + "loss_ce": 0.00017879356164485216, + "loss_iou": 0.2578125, + "loss_num": 0.006378173828125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 204435292, + "step": 3649 + }, + { + "epoch": 8.129175946547884, + "grad_norm": 22.09722137451172, + "learning_rate": 1e-06, + "loss": 0.6183, + "num_input_tokens_seen": 204491100, + "step": 3650 + }, + { + "epoch": 8.129175946547884, + "loss": 0.5094276666641235, + "loss_ce": 0.00015031076327431947, + "loss_iou": 0.208984375, + "loss_num": 0.0181884765625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 204491100, + "step": 3650 + }, + { + "epoch": 8.131403118040089, + "grad_norm": 19.736661911010742, + "learning_rate": 1e-06, + "loss": 0.6618, + "num_input_tokens_seen": 204546644, + "step": 3651 + }, + { + "epoch": 8.131403118040089, + "loss": 0.7262549996376038, + "loss_ce": 0.00018080734298564494, + "loss_iou": 0.298828125, + "loss_num": 0.0260009765625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 204546644, + "step": 3651 + }, + { + "epoch": 8.133630289532293, + "grad_norm": 46.30784606933594, + "learning_rate": 1e-06, + "loss": 0.78, + "num_input_tokens_seen": 204601144, + "step": 3652 + }, + { + "epoch": 8.133630289532293, + "loss": 0.8595629930496216, + "loss_ce": 0.00018801141413860023, + "loss_iou": 0.333984375, + "loss_num": 0.0380859375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 204601144, + "step": 3652 + }, + { + "epoch": 8.135857461024498, + "grad_norm": 19.52750587463379, + "learning_rate": 1e-06, + "loss": 0.7199, + "num_input_tokens_seen": 204657164, + "step": 3653 + }, + { + "epoch": 8.135857461024498, + "loss": 0.9219685196876526, + "loss_ce": 0.0002156377595383674, + "loss_iou": 0.38671875, + "loss_num": 0.029296875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 204657164, + "step": 3653 + }, + { + "epoch": 8.138084632516703, + "grad_norm": 16.912254333496094, + "learning_rate": 1e-06, + "loss": 0.7982, + "num_input_tokens_seen": 204714952, + "step": 3654 + }, + { + "epoch": 8.138084632516703, + "loss": 0.9020793437957764, + "loss_ce": 0.00022387836361303926, + "loss_iou": 0.3828125, + "loss_num": 0.0272216796875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 204714952, + "step": 3654 + }, + { + "epoch": 8.140311804008908, + "grad_norm": 16.71441650390625, + "learning_rate": 1e-06, + "loss": 0.4875, + "num_input_tokens_seen": 204768916, + "step": 3655 + }, + { + "epoch": 8.140311804008908, + "loss": 0.5125894546508789, + "loss_ce": 0.00013829523231834173, + "loss_iou": 0.19921875, + "loss_num": 0.0228271484375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 204768916, + "step": 3655 + }, + { + "epoch": 8.142538975501113, + "grad_norm": 20.34796714782715, + "learning_rate": 1e-06, + "loss": 0.6757, + "num_input_tokens_seen": 204825592, + "step": 3656 + }, + { + "epoch": 8.142538975501113, + "loss": 0.7248254418373108, + "loss_ce": 0.00021602565539069474, + "loss_iou": 0.28515625, + "loss_num": 0.031005859375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 204825592, + "step": 3656 + }, + { + "epoch": 8.144766146993318, + "grad_norm": 29.74313735961914, + "learning_rate": 1e-06, + "loss": 0.7219, + "num_input_tokens_seen": 204878420, + "step": 3657 + }, + { + "epoch": 8.144766146993318, + "loss": 0.5412226319313049, + "loss_ce": 0.0002070144983008504, + "loss_iou": 0.2392578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 204878420, + "step": 3657 + }, + { + "epoch": 8.146993318485523, + "grad_norm": 17.01011085510254, + "learning_rate": 1e-06, + "loss": 0.8195, + "num_input_tokens_seen": 204933092, + "step": 3658 + }, + { + "epoch": 8.146993318485523, + "loss": 0.9253840446472168, + "loss_ce": 0.0003352175117470324, + "loss_iou": 0.38671875, + "loss_num": 0.030029296875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 204933092, + "step": 3658 + }, + { + "epoch": 8.14922048997773, + "grad_norm": 44.65693664550781, + "learning_rate": 1e-06, + "loss": 0.7511, + "num_input_tokens_seen": 204988252, + "step": 3659 + }, + { + "epoch": 8.14922048997773, + "loss": 0.5185383558273315, + "loss_ce": 0.00016674870857968926, + "loss_iou": 0.234375, + "loss_num": 0.0098876953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 204988252, + "step": 3659 + }, + { + "epoch": 8.151447661469934, + "grad_norm": 12.71816635131836, + "learning_rate": 1e-06, + "loss": 0.4983, + "num_input_tokens_seen": 205044848, + "step": 3660 + }, + { + "epoch": 8.151447661469934, + "loss": 0.5829252600669861, + "loss_ce": 0.0003446881892159581, + "loss_iou": 0.2578125, + "loss_num": 0.0130615234375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 205044848, + "step": 3660 + }, + { + "epoch": 8.153674832962139, + "grad_norm": 16.42379379272461, + "learning_rate": 1e-06, + "loss": 0.6397, + "num_input_tokens_seen": 205101712, + "step": 3661 + }, + { + "epoch": 8.153674832962139, + "loss": 0.8078476190567017, + "loss_ce": 0.00023041786334943026, + "loss_iou": 0.34375, + "loss_num": 0.0240478515625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 205101712, + "step": 3661 + }, + { + "epoch": 8.155902004454344, + "grad_norm": 17.691226959228516, + "learning_rate": 1e-06, + "loss": 0.5595, + "num_input_tokens_seen": 205158256, + "step": 3662 + }, + { + "epoch": 8.155902004454344, + "loss": 0.5641229152679443, + "loss_ce": 0.00015803641872480512, + "loss_iou": 0.2578125, + "loss_num": 0.0093994140625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 205158256, + "step": 3662 + }, + { + "epoch": 8.158129175946549, + "grad_norm": 27.215301513671875, + "learning_rate": 1e-06, + "loss": 0.738, + "num_input_tokens_seen": 205215004, + "step": 3663 + }, + { + "epoch": 8.158129175946549, + "loss": 0.6344413757324219, + "loss_ce": 0.0002250707766506821, + "loss_iou": 0.24609375, + "loss_num": 0.028564453125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 205215004, + "step": 3663 + }, + { + "epoch": 8.160356347438753, + "grad_norm": 27.16131591796875, + "learning_rate": 1e-06, + "loss": 0.5939, + "num_input_tokens_seen": 205269476, + "step": 3664 + }, + { + "epoch": 8.160356347438753, + "loss": 0.729198694229126, + "loss_ce": 0.00019480480113998055, + "loss_iou": 0.306640625, + "loss_num": 0.022705078125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 205269476, + "step": 3664 + }, + { + "epoch": 8.162583518930958, + "grad_norm": 14.612595558166504, + "learning_rate": 1e-06, + "loss": 0.7695, + "num_input_tokens_seen": 205321936, + "step": 3665 + }, + { + "epoch": 8.162583518930958, + "loss": 0.7426295280456543, + "loss_ce": 0.00019791701924987137, + "loss_iou": 0.3125, + "loss_num": 0.02392578125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 205321936, + "step": 3665 + }, + { + "epoch": 8.164810690423163, + "grad_norm": 23.188566207885742, + "learning_rate": 1e-06, + "loss": 0.9123, + "num_input_tokens_seen": 205373464, + "step": 3666 + }, + { + "epoch": 8.164810690423163, + "loss": 1.0600320100784302, + "loss_ce": 0.0003396637039259076, + "loss_iou": 0.455078125, + "loss_num": 0.0301513671875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 205373464, + "step": 3666 + }, + { + "epoch": 8.167037861915368, + "grad_norm": 17.917818069458008, + "learning_rate": 1e-06, + "loss": 0.6325, + "num_input_tokens_seen": 205424324, + "step": 3667 + }, + { + "epoch": 8.167037861915368, + "loss": 0.43619126081466675, + "loss_ce": 0.00015609552792739123, + "loss_iou": 0.189453125, + "loss_num": 0.011474609375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 205424324, + "step": 3667 + }, + { + "epoch": 8.169265033407573, + "grad_norm": 22.07512092590332, + "learning_rate": 1e-06, + "loss": 1.011, + "num_input_tokens_seen": 205479044, + "step": 3668 + }, + { + "epoch": 8.169265033407573, + "loss": 1.0271341800689697, + "loss_ce": 0.00027870899066329, + "loss_iou": 0.3984375, + "loss_num": 0.0458984375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 205479044, + "step": 3668 + }, + { + "epoch": 8.171492204899778, + "grad_norm": 47.829078674316406, + "learning_rate": 1e-06, + "loss": 0.4878, + "num_input_tokens_seen": 205533932, + "step": 3669 + }, + { + "epoch": 8.171492204899778, + "loss": 0.40675288438796997, + "loss_ce": 0.0001671954378252849, + "loss_iou": 0.18359375, + "loss_num": 0.007720947265625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 205533932, + "step": 3669 + }, + { + "epoch": 8.173719376391983, + "grad_norm": 18.006383895874023, + "learning_rate": 1e-06, + "loss": 0.4668, + "num_input_tokens_seen": 205589416, + "step": 3670 + }, + { + "epoch": 8.173719376391983, + "loss": 0.3985961675643921, + "loss_ce": 0.00015864020679146051, + "loss_iou": 0.17578125, + "loss_num": 0.00921630859375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 205589416, + "step": 3670 + }, + { + "epoch": 8.175946547884188, + "grad_norm": 27.27997589111328, + "learning_rate": 1e-06, + "loss": 0.5638, + "num_input_tokens_seen": 205643196, + "step": 3671 + }, + { + "epoch": 8.175946547884188, + "loss": 0.5613173246383667, + "loss_ce": 0.00016013637650758028, + "loss_iou": 0.2216796875, + "loss_num": 0.0234375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 205643196, + "step": 3671 + }, + { + "epoch": 8.178173719376392, + "grad_norm": 19.659027099609375, + "learning_rate": 1e-06, + "loss": 0.7405, + "num_input_tokens_seen": 205698748, + "step": 3672 + }, + { + "epoch": 8.178173719376392, + "loss": 0.9643779397010803, + "loss_ce": 0.0002666152431629598, + "loss_iou": 0.369140625, + "loss_num": 0.044921875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 205698748, + "step": 3672 + }, + { + "epoch": 8.180400890868597, + "grad_norm": 25.99626922607422, + "learning_rate": 1e-06, + "loss": 0.6958, + "num_input_tokens_seen": 205753176, + "step": 3673 + }, + { + "epoch": 8.180400890868597, + "loss": 0.4478331208229065, + "loss_ce": 0.00020127877360209823, + "loss_iou": 0.203125, + "loss_num": 0.0084228515625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 205753176, + "step": 3673 + }, + { + "epoch": 8.182628062360802, + "grad_norm": 15.863686561584473, + "learning_rate": 1e-06, + "loss": 0.5438, + "num_input_tokens_seen": 205807732, + "step": 3674 + }, + { + "epoch": 8.182628062360802, + "loss": 0.5248715877532959, + "loss_ce": 0.00015238260675687343, + "loss_iou": 0.2353515625, + "loss_num": 0.0107421875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 205807732, + "step": 3674 + }, + { + "epoch": 8.184855233853007, + "grad_norm": 21.04659652709961, + "learning_rate": 1e-06, + "loss": 0.6401, + "num_input_tokens_seen": 205860244, + "step": 3675 + }, + { + "epoch": 8.184855233853007, + "loss": 0.5119701623916626, + "loss_ce": 0.0001293848908971995, + "loss_iou": 0.2177734375, + "loss_num": 0.01519775390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 205860244, + "step": 3675 + }, + { + "epoch": 8.187082405345212, + "grad_norm": 15.499350547790527, + "learning_rate": 1e-06, + "loss": 0.6712, + "num_input_tokens_seen": 205918972, + "step": 3676 + }, + { + "epoch": 8.187082405345212, + "loss": 0.7695894241333008, + "loss_ce": 0.00018028570048045367, + "loss_iou": 0.302734375, + "loss_num": 0.033203125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 205918972, + "step": 3676 + }, + { + "epoch": 8.189309576837417, + "grad_norm": 19.32471466064453, + "learning_rate": 1e-06, + "loss": 0.592, + "num_input_tokens_seen": 205975060, + "step": 3677 + }, + { + "epoch": 8.189309576837417, + "loss": 0.6343322992324829, + "loss_ce": 0.00017700789612717927, + "loss_iou": 0.283203125, + "loss_num": 0.01336669921875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 205975060, + "step": 3677 + }, + { + "epoch": 8.191536748329622, + "grad_norm": 18.84845542907715, + "learning_rate": 1e-06, + "loss": 0.69, + "num_input_tokens_seen": 206030576, + "step": 3678 + }, + { + "epoch": 8.191536748329622, + "loss": 0.8888704776763916, + "loss_ce": 0.00019854953279718757, + "loss_iou": 0.375, + "loss_num": 0.02734375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 206030576, + "step": 3678 + }, + { + "epoch": 8.193763919821826, + "grad_norm": 16.970340728759766, + "learning_rate": 1e-06, + "loss": 0.6626, + "num_input_tokens_seen": 206087296, + "step": 3679 + }, + { + "epoch": 8.193763919821826, + "loss": 0.6373680830001831, + "loss_ce": 0.0002831476158462465, + "loss_iou": 0.28125, + "loss_num": 0.01470947265625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 206087296, + "step": 3679 + }, + { + "epoch": 8.195991091314031, + "grad_norm": 31.894102096557617, + "learning_rate": 1e-06, + "loss": 0.5481, + "num_input_tokens_seen": 206141220, + "step": 3680 + }, + { + "epoch": 8.195991091314031, + "loss": 0.3562151789665222, + "loss_ce": 0.0001360624737571925, + "loss_iou": 0.16015625, + "loss_num": 0.007049560546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 206141220, + "step": 3680 + }, + { + "epoch": 8.198218262806236, + "grad_norm": 15.175886154174805, + "learning_rate": 1e-06, + "loss": 0.4565, + "num_input_tokens_seen": 206199092, + "step": 3681 + }, + { + "epoch": 8.198218262806236, + "loss": 0.38468533754348755, + "loss_ce": 0.00022489193361252546, + "loss_iou": 0.1669921875, + "loss_num": 0.01019287109375, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 206199092, + "step": 3681 + }, + { + "epoch": 8.200445434298441, + "grad_norm": 14.856992721557617, + "learning_rate": 1e-06, + "loss": 0.5933, + "num_input_tokens_seen": 206255596, + "step": 3682 + }, + { + "epoch": 8.200445434298441, + "loss": 0.6677369475364685, + "loss_ce": 0.0002565070753917098, + "loss_iou": 0.28125, + "loss_num": 0.0211181640625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 206255596, + "step": 3682 + }, + { + "epoch": 8.202672605790646, + "grad_norm": 26.978513717651367, + "learning_rate": 1e-06, + "loss": 0.6554, + "num_input_tokens_seen": 206310404, + "step": 3683 + }, + { + "epoch": 8.202672605790646, + "loss": 0.6696029901504517, + "loss_ce": 0.0004135652561672032, + "loss_iou": 0.28125, + "loss_num": 0.021240234375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 206310404, + "step": 3683 + }, + { + "epoch": 8.20489977728285, + "grad_norm": 15.110715866088867, + "learning_rate": 1e-06, + "loss": 0.5443, + "num_input_tokens_seen": 206364188, + "step": 3684 + }, + { + "epoch": 8.20489977728285, + "loss": 0.4510454833507538, + "loss_ce": 0.00017877297068480402, + "loss_iou": 0.1943359375, + "loss_num": 0.01251220703125, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 206364188, + "step": 3684 + }, + { + "epoch": 8.207126948775056, + "grad_norm": 18.641998291015625, + "learning_rate": 1e-06, + "loss": 0.563, + "num_input_tokens_seen": 206419384, + "step": 3685 + }, + { + "epoch": 8.207126948775056, + "loss": 0.5365896821022034, + "loss_ce": 0.00021273433230817318, + "loss_iou": 0.2197265625, + "loss_num": 0.01953125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 206419384, + "step": 3685 + }, + { + "epoch": 8.20935412026726, + "grad_norm": 16.16904640197754, + "learning_rate": 1e-06, + "loss": 0.6996, + "num_input_tokens_seen": 206474860, + "step": 3686 + }, + { + "epoch": 8.20935412026726, + "loss": 0.9597395062446594, + "loss_ce": 0.00026684050681069493, + "loss_iou": 0.369140625, + "loss_num": 0.0439453125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 206474860, + "step": 3686 + }, + { + "epoch": 8.211581291759465, + "grad_norm": 14.339240074157715, + "learning_rate": 1e-06, + "loss": 0.5036, + "num_input_tokens_seen": 206532756, + "step": 3687 + }, + { + "epoch": 8.211581291759465, + "loss": 0.5385290384292603, + "loss_ce": 0.0001989899465115741, + "loss_iou": 0.2392578125, + "loss_num": 0.01202392578125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 206532756, + "step": 3687 + }, + { + "epoch": 8.21380846325167, + "grad_norm": 16.02851104736328, + "learning_rate": 1e-06, + "loss": 0.7048, + "num_input_tokens_seen": 206588744, + "step": 3688 + }, + { + "epoch": 8.21380846325167, + "loss": 0.8307337164878845, + "loss_ce": 0.000167329068062827, + "loss_iou": 0.36328125, + "loss_num": 0.0205078125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 206588744, + "step": 3688 + }, + { + "epoch": 8.216035634743875, + "grad_norm": 20.067358016967773, + "learning_rate": 1e-06, + "loss": 0.5148, + "num_input_tokens_seen": 206645832, + "step": 3689 + }, + { + "epoch": 8.216035634743875, + "loss": 0.4788054823875427, + "loss_ce": 0.0001677550608292222, + "loss_iou": 0.2001953125, + "loss_num": 0.01544189453125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 206645832, + "step": 3689 + }, + { + "epoch": 8.21826280623608, + "grad_norm": 13.61365795135498, + "learning_rate": 1e-06, + "loss": 0.4008, + "num_input_tokens_seen": 206701388, + "step": 3690 + }, + { + "epoch": 8.21826280623608, + "loss": 0.34245729446411133, + "loss_ce": 0.00017215096158906817, + "loss_iou": 0.1513671875, + "loss_num": 0.007781982421875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 206701388, + "step": 3690 + }, + { + "epoch": 8.220489977728285, + "grad_norm": 16.062326431274414, + "learning_rate": 1e-06, + "loss": 0.6755, + "num_input_tokens_seen": 206756396, + "step": 3691 + }, + { + "epoch": 8.220489977728285, + "loss": 0.7819403409957886, + "loss_ce": 0.00020207473426125944, + "loss_iou": 0.337890625, + "loss_num": 0.02099609375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 206756396, + "step": 3691 + }, + { + "epoch": 8.22271714922049, + "grad_norm": 16.20405387878418, + "learning_rate": 1e-06, + "loss": 0.8364, + "num_input_tokens_seen": 206813452, + "step": 3692 + }, + { + "epoch": 8.22271714922049, + "loss": 0.7428736686706543, + "loss_ce": 0.00019789818907156587, + "loss_iou": 0.34765625, + "loss_num": 0.00885009765625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 206813452, + "step": 3692 + }, + { + "epoch": 8.224944320712694, + "grad_norm": 15.899574279785156, + "learning_rate": 1e-06, + "loss": 0.5448, + "num_input_tokens_seen": 206871936, + "step": 3693 + }, + { + "epoch": 8.224944320712694, + "loss": 0.4710107445716858, + "loss_ce": 0.00018556615395937115, + "loss_iou": 0.2080078125, + "loss_num": 0.010986328125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 206871936, + "step": 3693 + }, + { + "epoch": 8.2271714922049, + "grad_norm": 21.231388092041016, + "learning_rate": 1e-06, + "loss": 0.6426, + "num_input_tokens_seen": 206927444, + "step": 3694 + }, + { + "epoch": 8.2271714922049, + "loss": 0.717461347579956, + "loss_ce": 0.00017620844300836325, + "loss_iou": 0.291015625, + "loss_num": 0.02685546875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 206927444, + "step": 3694 + }, + { + "epoch": 8.229398663697104, + "grad_norm": 14.745061874389648, + "learning_rate": 1e-06, + "loss": 0.4019, + "num_input_tokens_seen": 206982152, + "step": 3695 + }, + { + "epoch": 8.229398663697104, + "loss": 0.32421159744262695, + "loss_ce": 0.00014541992277372628, + "loss_iou": 0.1279296875, + "loss_num": 0.01361083984375, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 206982152, + "step": 3695 + }, + { + "epoch": 8.231625835189309, + "grad_norm": 16.86565589904785, + "learning_rate": 1e-06, + "loss": 0.577, + "num_input_tokens_seen": 207036748, + "step": 3696 + }, + { + "epoch": 8.231625835189309, + "loss": 0.5805333852767944, + "loss_ce": 0.00015002592408563942, + "loss_iou": 0.23828125, + "loss_num": 0.020751953125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 207036748, + "step": 3696 + }, + { + "epoch": 8.233853006681514, + "grad_norm": 16.574954986572266, + "learning_rate": 1e-06, + "loss": 0.751, + "num_input_tokens_seen": 207090424, + "step": 3697 + }, + { + "epoch": 8.233853006681514, + "loss": 0.7723691463470459, + "loss_ce": 0.0001523814134998247, + "loss_iou": 0.33984375, + "loss_num": 0.018798828125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 207090424, + "step": 3697 + }, + { + "epoch": 8.236080178173719, + "grad_norm": 32.75465774536133, + "learning_rate": 1e-06, + "loss": 0.4883, + "num_input_tokens_seen": 207146916, + "step": 3698 + }, + { + "epoch": 8.236080178173719, + "loss": 0.5894994735717773, + "loss_ce": 0.0001440244377590716, + "loss_iou": 0.263671875, + "loss_num": 0.0126953125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 207146916, + "step": 3698 + }, + { + "epoch": 8.238307349665924, + "grad_norm": 19.782665252685547, + "learning_rate": 1e-06, + "loss": 1.013, + "num_input_tokens_seen": 207202692, + "step": 3699 + }, + { + "epoch": 8.238307349665924, + "loss": 1.0240821838378906, + "loss_ce": 0.00015636572788935155, + "loss_iou": 0.384765625, + "loss_num": 0.050537109375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 207202692, + "step": 3699 + }, + { + "epoch": 8.240534521158128, + "grad_norm": 25.966341018676758, + "learning_rate": 1e-06, + "loss": 0.6403, + "num_input_tokens_seen": 207253188, + "step": 3700 + }, + { + "epoch": 8.240534521158128, + "loss": 0.602780818939209, + "loss_ce": 0.00024172097619157284, + "loss_iou": 0.232421875, + "loss_num": 0.027587890625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 207253188, + "step": 3700 + }, + { + "epoch": 8.242761692650333, + "grad_norm": 19.4698543548584, + "learning_rate": 1e-06, + "loss": 0.565, + "num_input_tokens_seen": 207307820, + "step": 3701 + }, + { + "epoch": 8.242761692650333, + "loss": 0.6106410026550293, + "loss_ce": 0.00016741504077799618, + "loss_iou": 0.2734375, + "loss_num": 0.0128173828125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 207307820, + "step": 3701 + }, + { + "epoch": 8.244988864142538, + "grad_norm": 15.045053482055664, + "learning_rate": 1e-06, + "loss": 0.4617, + "num_input_tokens_seen": 207363340, + "step": 3702 + }, + { + "epoch": 8.244988864142538, + "loss": 0.40211230516433716, + "loss_ce": 0.00013477080210577697, + "loss_iou": 0.1767578125, + "loss_num": 0.0096435546875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 207363340, + "step": 3702 + }, + { + "epoch": 8.247216035634743, + "grad_norm": 15.657466888427734, + "learning_rate": 1e-06, + "loss": 0.721, + "num_input_tokens_seen": 207416744, + "step": 3703 + }, + { + "epoch": 8.247216035634743, + "loss": 0.7376276254653931, + "loss_ce": 0.00026186800096184015, + "loss_iou": 0.298828125, + "loss_num": 0.0281982421875, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 207416744, + "step": 3703 + }, + { + "epoch": 8.249443207126948, + "grad_norm": 16.417505264282227, + "learning_rate": 1e-06, + "loss": 0.4049, + "num_input_tokens_seen": 207473136, + "step": 3704 + }, + { + "epoch": 8.249443207126948, + "loss": 0.3186110258102417, + "loss_ce": 0.00012959179002791643, + "loss_iou": 0.1259765625, + "loss_num": 0.01324462890625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 207473136, + "step": 3704 + }, + { + "epoch": 8.251670378619155, + "grad_norm": 21.20426368713379, + "learning_rate": 1e-06, + "loss": 0.5388, + "num_input_tokens_seen": 207528736, + "step": 3705 + }, + { + "epoch": 8.251670378619155, + "loss": 0.6479160189628601, + "loss_ce": 0.00014987270697019994, + "loss_iou": 0.275390625, + "loss_num": 0.01953125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 207528736, + "step": 3705 + }, + { + "epoch": 8.25389755011136, + "grad_norm": 27.81067657470703, + "learning_rate": 1e-06, + "loss": 0.6099, + "num_input_tokens_seen": 207579912, + "step": 3706 + }, + { + "epoch": 8.25389755011136, + "loss": 0.6510103344917297, + "loss_ce": 0.00019250249897595495, + "loss_iou": 0.267578125, + "loss_num": 0.023193359375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 207579912, + "step": 3706 + }, + { + "epoch": 8.256124721603564, + "grad_norm": 24.32451629638672, + "learning_rate": 1e-06, + "loss": 0.6658, + "num_input_tokens_seen": 207635324, + "step": 3707 + }, + { + "epoch": 8.256124721603564, + "loss": 0.6035555601119995, + "loss_ce": 0.00016204667917918414, + "loss_iou": 0.25390625, + "loss_num": 0.018798828125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 207635324, + "step": 3707 + }, + { + "epoch": 8.25835189309577, + "grad_norm": 19.096254348754883, + "learning_rate": 1e-06, + "loss": 0.6809, + "num_input_tokens_seen": 207691984, + "step": 3708 + }, + { + "epoch": 8.25835189309577, + "loss": 0.6975338459014893, + "loss_ce": 0.0002682044287212193, + "loss_iou": 0.298828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 207691984, + "step": 3708 + }, + { + "epoch": 8.260579064587974, + "grad_norm": 19.96952247619629, + "learning_rate": 1e-06, + "loss": 0.9264, + "num_input_tokens_seen": 207742476, + "step": 3709 + }, + { + "epoch": 8.260579064587974, + "loss": 0.9751963019371033, + "loss_ce": 0.00022070904378779233, + "loss_iou": 0.416015625, + "loss_num": 0.0283203125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 207742476, + "step": 3709 + }, + { + "epoch": 8.262806236080179, + "grad_norm": 15.585442543029785, + "learning_rate": 1e-06, + "loss": 0.4606, + "num_input_tokens_seen": 207800484, + "step": 3710 + }, + { + "epoch": 8.262806236080179, + "loss": 0.5031048655509949, + "loss_ce": 0.00017520022811368108, + "loss_iou": 0.2265625, + "loss_num": 0.0098876953125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 207800484, + "step": 3710 + }, + { + "epoch": 8.265033407572384, + "grad_norm": 36.97969055175781, + "learning_rate": 1e-06, + "loss": 0.9413, + "num_input_tokens_seen": 207855020, + "step": 3711 + }, + { + "epoch": 8.265033407572384, + "loss": 0.9603962898254395, + "loss_ce": 0.00019123686070088297, + "loss_iou": 0.373046875, + "loss_num": 0.043212890625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 207855020, + "step": 3711 + }, + { + "epoch": 8.267260579064589, + "grad_norm": 15.044690132141113, + "learning_rate": 1e-06, + "loss": 0.7137, + "num_input_tokens_seen": 207911256, + "step": 3712 + }, + { + "epoch": 8.267260579064589, + "loss": 0.6639913320541382, + "loss_ce": 0.00017301499610766768, + "loss_iou": 0.279296875, + "loss_num": 0.0211181640625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 207911256, + "step": 3712 + }, + { + "epoch": 8.269487750556793, + "grad_norm": 19.051246643066406, + "learning_rate": 1e-06, + "loss": 0.6152, + "num_input_tokens_seen": 207968744, + "step": 3713 + }, + { + "epoch": 8.269487750556793, + "loss": 0.5926859378814697, + "loss_ce": 0.00015662802616134286, + "loss_iou": 0.259765625, + "loss_num": 0.0145263671875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 207968744, + "step": 3713 + }, + { + "epoch": 8.271714922048998, + "grad_norm": 17.641752243041992, + "learning_rate": 1e-06, + "loss": 0.6043, + "num_input_tokens_seen": 208024108, + "step": 3714 + }, + { + "epoch": 8.271714922048998, + "loss": 0.5748984813690186, + "loss_ce": 0.0001914296008180827, + "loss_iou": 0.251953125, + "loss_num": 0.01397705078125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 208024108, + "step": 3714 + }, + { + "epoch": 8.273942093541203, + "grad_norm": 17.993663787841797, + "learning_rate": 1e-06, + "loss": 0.7099, + "num_input_tokens_seen": 208075840, + "step": 3715 + }, + { + "epoch": 8.273942093541203, + "loss": 0.6750204563140869, + "loss_ce": 0.0004599187523126602, + "loss_iou": 0.30078125, + "loss_num": 0.014892578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 208075840, + "step": 3715 + }, + { + "epoch": 8.276169265033408, + "grad_norm": 23.302898406982422, + "learning_rate": 1e-06, + "loss": 0.4742, + "num_input_tokens_seen": 208132584, + "step": 3716 + }, + { + "epoch": 8.276169265033408, + "loss": 0.5063949823379517, + "loss_ce": 0.0001694063248578459, + "loss_iou": 0.2275390625, + "loss_num": 0.01031494140625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 208132584, + "step": 3716 + }, + { + "epoch": 8.278396436525613, + "grad_norm": 19.531972885131836, + "learning_rate": 1e-06, + "loss": 0.5614, + "num_input_tokens_seen": 208189256, + "step": 3717 + }, + { + "epoch": 8.278396436525613, + "loss": 0.46560224890708923, + "loss_ce": 0.0001481281651649624, + "loss_iou": 0.1953125, + "loss_num": 0.014892578125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 208189256, + "step": 3717 + }, + { + "epoch": 8.280623608017818, + "grad_norm": 23.255966186523438, + "learning_rate": 1e-06, + "loss": 0.6233, + "num_input_tokens_seen": 208247268, + "step": 3718 + }, + { + "epoch": 8.280623608017818, + "loss": 0.630403995513916, + "loss_ce": 0.0002770504215732217, + "loss_iou": 0.263671875, + "loss_num": 0.020263671875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 208247268, + "step": 3718 + }, + { + "epoch": 8.282850779510023, + "grad_norm": 31.8346004486084, + "learning_rate": 1e-06, + "loss": 0.4895, + "num_input_tokens_seen": 208303824, + "step": 3719 + }, + { + "epoch": 8.282850779510023, + "loss": 0.49452388286590576, + "loss_ce": 0.00020015303744003177, + "loss_iou": 0.212890625, + "loss_num": 0.0140380859375, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 208303824, + "step": 3719 + }, + { + "epoch": 8.285077951002227, + "grad_norm": 16.450027465820312, + "learning_rate": 1e-06, + "loss": 0.8151, + "num_input_tokens_seen": 208361756, + "step": 3720 + }, + { + "epoch": 8.285077951002227, + "loss": 0.8614362478256226, + "loss_ce": 0.00023015934857539833, + "loss_iou": 0.361328125, + "loss_num": 0.0274658203125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 208361756, + "step": 3720 + }, + { + "epoch": 8.287305122494432, + "grad_norm": 73.26863098144531, + "learning_rate": 1e-06, + "loss": 0.8532, + "num_input_tokens_seen": 208418208, + "step": 3721 + }, + { + "epoch": 8.287305122494432, + "loss": 1.0102782249450684, + "loss_ce": 0.0002685172366909683, + "loss_iou": 0.3515625, + "loss_num": 0.0615234375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 208418208, + "step": 3721 + }, + { + "epoch": 8.289532293986637, + "grad_norm": 14.410170555114746, + "learning_rate": 1e-06, + "loss": 0.5911, + "num_input_tokens_seen": 208475988, + "step": 3722 + }, + { + "epoch": 8.289532293986637, + "loss": 0.736660897731781, + "loss_ce": 0.00033277933835051954, + "loss_iou": 0.322265625, + "loss_num": 0.018310546875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 208475988, + "step": 3722 + }, + { + "epoch": 8.291759465478842, + "grad_norm": 17.486080169677734, + "learning_rate": 1e-06, + "loss": 0.6389, + "num_input_tokens_seen": 208532072, + "step": 3723 + }, + { + "epoch": 8.291759465478842, + "loss": 0.7401440143585205, + "loss_ce": 0.00015374486974906176, + "loss_iou": 0.333984375, + "loss_num": 0.01446533203125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 208532072, + "step": 3723 + }, + { + "epoch": 8.293986636971047, + "grad_norm": 27.663610458374023, + "learning_rate": 1e-06, + "loss": 0.5406, + "num_input_tokens_seen": 208587856, + "step": 3724 + }, + { + "epoch": 8.293986636971047, + "loss": 0.46732282638549805, + "loss_ce": 0.0001597225054865703, + "loss_iou": 0.2021484375, + "loss_num": 0.0125732421875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 208587856, + "step": 3724 + }, + { + "epoch": 8.296213808463252, + "grad_norm": 23.040565490722656, + "learning_rate": 1e-06, + "loss": 0.7325, + "num_input_tokens_seen": 208640652, + "step": 3725 + }, + { + "epoch": 8.296213808463252, + "loss": 0.9635574817657471, + "loss_ce": 0.00017856716294772923, + "loss_iou": 0.42578125, + "loss_num": 0.0220947265625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 208640652, + "step": 3725 + }, + { + "epoch": 8.298440979955457, + "grad_norm": 17.724363327026367, + "learning_rate": 1e-06, + "loss": 0.4894, + "num_input_tokens_seen": 208694956, + "step": 3726 + }, + { + "epoch": 8.298440979955457, + "loss": 0.42843109369277954, + "loss_ce": 0.0002084198349621147, + "loss_iou": 0.189453125, + "loss_num": 0.00970458984375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 208694956, + "step": 3726 + }, + { + "epoch": 8.300668151447661, + "grad_norm": 21.658283233642578, + "learning_rate": 1e-06, + "loss": 0.5328, + "num_input_tokens_seen": 208749444, + "step": 3727 + }, + { + "epoch": 8.300668151447661, + "loss": 0.5249356627464294, + "loss_ce": 0.0001553678303025663, + "loss_iou": 0.2138671875, + "loss_num": 0.019287109375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 208749444, + "step": 3727 + }, + { + "epoch": 8.302895322939866, + "grad_norm": 25.13924789428711, + "learning_rate": 1e-06, + "loss": 0.5197, + "num_input_tokens_seen": 208807968, + "step": 3728 + }, + { + "epoch": 8.302895322939866, + "loss": 0.7015814781188965, + "loss_ce": 0.00016545310791116208, + "loss_iou": 0.3046875, + "loss_num": 0.01806640625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 208807968, + "step": 3728 + }, + { + "epoch": 8.305122494432071, + "grad_norm": 19.753189086914062, + "learning_rate": 1e-06, + "loss": 0.6141, + "num_input_tokens_seen": 208865056, + "step": 3729 + }, + { + "epoch": 8.305122494432071, + "loss": 0.7641258835792542, + "loss_ce": 0.0002098674012813717, + "loss_iou": 0.330078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 208865056, + "step": 3729 + }, + { + "epoch": 8.307349665924276, + "grad_norm": 18.014732360839844, + "learning_rate": 1e-06, + "loss": 0.5394, + "num_input_tokens_seen": 208923552, + "step": 3730 + }, + { + "epoch": 8.307349665924276, + "loss": 0.6470487713813782, + "loss_ce": 0.0001981953828362748, + "loss_iou": 0.28515625, + "loss_num": 0.01507568359375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 208923552, + "step": 3730 + }, + { + "epoch": 8.309576837416481, + "grad_norm": 35.129364013671875, + "learning_rate": 1e-06, + "loss": 0.6561, + "num_input_tokens_seen": 208979572, + "step": 3731 + }, + { + "epoch": 8.309576837416481, + "loss": 0.6719157695770264, + "loss_ce": 0.0002849046722985804, + "loss_iou": 0.30078125, + "loss_num": 0.01373291015625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 208979572, + "step": 3731 + }, + { + "epoch": 8.311804008908686, + "grad_norm": 22.689146041870117, + "learning_rate": 1e-06, + "loss": 0.7743, + "num_input_tokens_seen": 209034148, + "step": 3732 + }, + { + "epoch": 8.311804008908686, + "loss": 0.7121527194976807, + "loss_ce": 0.00023866846458986402, + "loss_iou": 0.28125, + "loss_num": 0.0302734375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 209034148, + "step": 3732 + }, + { + "epoch": 8.31403118040089, + "grad_norm": 91.16738891601562, + "learning_rate": 1e-06, + "loss": 0.5123, + "num_input_tokens_seen": 209090968, + "step": 3733 + }, + { + "epoch": 8.31403118040089, + "loss": 0.5099412202835083, + "loss_ce": 0.00017555063823238015, + "loss_iou": 0.2294921875, + "loss_num": 0.010009765625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 209090968, + "step": 3733 + }, + { + "epoch": 8.316258351893095, + "grad_norm": 19.895280838012695, + "learning_rate": 1e-06, + "loss": 0.6358, + "num_input_tokens_seen": 209145588, + "step": 3734 + }, + { + "epoch": 8.316258351893095, + "loss": 0.8830050826072693, + "loss_ce": 0.00019260949920862913, + "loss_iou": 0.369140625, + "loss_num": 0.0294189453125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 209145588, + "step": 3734 + }, + { + "epoch": 8.3184855233853, + "grad_norm": 28.7868709564209, + "learning_rate": 1e-06, + "loss": 0.6875, + "num_input_tokens_seen": 209201476, + "step": 3735 + }, + { + "epoch": 8.3184855233853, + "loss": 0.7821846008300781, + "loss_ce": 0.00020213823881931603, + "loss_iou": 0.3515625, + "loss_num": 0.015380859375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 209201476, + "step": 3735 + }, + { + "epoch": 8.320712694877505, + "grad_norm": 21.692697525024414, + "learning_rate": 1e-06, + "loss": 0.5106, + "num_input_tokens_seen": 209257012, + "step": 3736 + }, + { + "epoch": 8.320712694877505, + "loss": 0.5338290929794312, + "loss_ce": 0.0001377178414259106, + "loss_iou": 0.2392578125, + "loss_num": 0.01104736328125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 209257012, + "step": 3736 + }, + { + "epoch": 8.32293986636971, + "grad_norm": 24.94318199157715, + "learning_rate": 1e-06, + "loss": 0.8265, + "num_input_tokens_seen": 209312432, + "step": 3737 + }, + { + "epoch": 8.32293986636971, + "loss": 0.6608332395553589, + "loss_ce": 0.0001887211692519486, + "loss_iou": 0.2734375, + "loss_num": 0.022705078125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 209312432, + "step": 3737 + }, + { + "epoch": 8.325167037861915, + "grad_norm": 17.53243637084961, + "learning_rate": 1e-06, + "loss": 0.5902, + "num_input_tokens_seen": 209368084, + "step": 3738 + }, + { + "epoch": 8.325167037861915, + "loss": 0.7750497460365295, + "loss_ce": 0.00014740778715349734, + "loss_iou": 0.306640625, + "loss_num": 0.032470703125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 209368084, + "step": 3738 + }, + { + "epoch": 8.32739420935412, + "grad_norm": 14.224679946899414, + "learning_rate": 1e-06, + "loss": 0.5397, + "num_input_tokens_seen": 209421340, + "step": 3739 + }, + { + "epoch": 8.32739420935412, + "loss": 0.43520334362983704, + "loss_ce": 0.00014473804912995547, + "loss_iou": 0.1962890625, + "loss_num": 0.0086669921875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 209421340, + "step": 3739 + }, + { + "epoch": 8.329621380846325, + "grad_norm": 17.133176803588867, + "learning_rate": 1e-06, + "loss": 0.5827, + "num_input_tokens_seen": 209478168, + "step": 3740 + }, + { + "epoch": 8.329621380846325, + "loss": 0.5902957916259766, + "loss_ce": 0.0002079373225569725, + "loss_iou": 0.2431640625, + "loss_num": 0.0206298828125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 209478168, + "step": 3740 + }, + { + "epoch": 8.33184855233853, + "grad_norm": 23.81924057006836, + "learning_rate": 1e-06, + "loss": 0.5458, + "num_input_tokens_seen": 209533220, + "step": 3741 + }, + { + "epoch": 8.33184855233853, + "loss": 0.7213869094848633, + "loss_ce": 0.00019548808631952852, + "loss_iou": 0.322265625, + "loss_num": 0.0155029296875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 209533220, + "step": 3741 + }, + { + "epoch": 8.334075723830734, + "grad_norm": 16.98560333251953, + "learning_rate": 1e-06, + "loss": 0.4944, + "num_input_tokens_seen": 209589144, + "step": 3742 + }, + { + "epoch": 8.334075723830734, + "loss": 0.3810575604438782, + "loss_ce": 0.00019817678548861295, + "loss_iou": 0.1640625, + "loss_num": 0.01055908203125, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 209589144, + "step": 3742 + }, + { + "epoch": 8.33630289532294, + "grad_norm": 18.857044219970703, + "learning_rate": 1e-06, + "loss": 0.4678, + "num_input_tokens_seen": 209640328, + "step": 3743 + }, + { + "epoch": 8.33630289532294, + "loss": 0.413556307554245, + "loss_ce": 0.00016517053882125765, + "loss_iou": 0.173828125, + "loss_num": 0.01312255859375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 209640328, + "step": 3743 + }, + { + "epoch": 8.338530066815144, + "grad_norm": 16.573524475097656, + "learning_rate": 1e-06, + "loss": 0.6259, + "num_input_tokens_seen": 209696820, + "step": 3744 + }, + { + "epoch": 8.338530066815144, + "loss": 0.6830825805664062, + "loss_ce": 0.00022125753457657993, + "loss_iou": 0.296875, + "loss_num": 0.01806640625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 209696820, + "step": 3744 + }, + { + "epoch": 8.340757238307349, + "grad_norm": 29.868751525878906, + "learning_rate": 1e-06, + "loss": 0.8029, + "num_input_tokens_seen": 209750812, + "step": 3745 + }, + { + "epoch": 8.340757238307349, + "loss": 0.7501974701881409, + "loss_ce": 0.0001974825281649828, + "loss_iou": 0.31640625, + "loss_num": 0.0238037109375, + "loss_xval": 0.75, + "num_input_tokens_seen": 209750812, + "step": 3745 + }, + { + "epoch": 8.342984409799554, + "grad_norm": 13.687642097473145, + "learning_rate": 1e-06, + "loss": 0.6552, + "num_input_tokens_seen": 209805656, + "step": 3746 + }, + { + "epoch": 8.342984409799554, + "loss": 0.8280588984489441, + "loss_ce": 0.00017803689115680754, + "loss_iou": 0.337890625, + "loss_num": 0.0308837890625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 209805656, + "step": 3746 + }, + { + "epoch": 8.345211581291759, + "grad_norm": 40.525962829589844, + "learning_rate": 1e-06, + "loss": 0.7054, + "num_input_tokens_seen": 209859280, + "step": 3747 + }, + { + "epoch": 8.345211581291759, + "loss": 0.7600662708282471, + "loss_ce": 0.00017863856919575483, + "loss_iou": 0.3203125, + "loss_num": 0.02392578125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 209859280, + "step": 3747 + }, + { + "epoch": 8.347438752783964, + "grad_norm": 81.74958801269531, + "learning_rate": 1e-06, + "loss": 0.5614, + "num_input_tokens_seen": 209917268, + "step": 3748 + }, + { + "epoch": 8.347438752783964, + "loss": 0.37694764137268066, + "loss_ce": 0.0002996893017552793, + "loss_iou": 0.16015625, + "loss_num": 0.01129150390625, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 209917268, + "step": 3748 + }, + { + "epoch": 8.34966592427617, + "grad_norm": 58.23908996582031, + "learning_rate": 1e-06, + "loss": 0.6929, + "num_input_tokens_seen": 209973232, + "step": 3749 + }, + { + "epoch": 8.34966592427617, + "loss": 0.5084662437438965, + "loss_ce": 0.0001654803636483848, + "loss_iou": 0.2294921875, + "loss_num": 0.00994873046875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 209973232, + "step": 3749 + }, + { + "epoch": 8.351893095768375, + "grad_norm": 14.04624080657959, + "learning_rate": 1e-06, + "loss": 0.4782, + "num_input_tokens_seen": 210030984, + "step": 3750 + }, + { + "epoch": 8.351893095768375, + "eval_seeclick_web_CIoU": 0.5785337090492249, + "eval_seeclick_web_GIoU": 0.5769274234771729, + "eval_seeclick_web_IoU": 0.5960685312747955, + "eval_seeclick_web_MAE_all": 0.01612033136188984, + "eval_seeclick_web_MAE_h": 0.008123957552015781, + "eval_seeclick_web_MAE_w": 0.016420952044427395, + "eval_seeclick_web_MAE_x_boxes": 0.009630883112549782, + "eval_seeclick_web_MAE_y_boxes": 0.021957224002107978, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9174290299415588, + "eval_seeclick_web_loss_ce": 0.00023504487762693316, + "eval_seeclick_web_loss_iou": 0.41943359375, + "eval_seeclick_web_loss_num": 0.012887954711914062, + "eval_seeclick_web_loss_xval": 0.90283203125, + "eval_seeclick_web_runtime": 22.7475, + "eval_seeclick_web_samples_per_second": 2.198, + "eval_seeclick_web_steps_per_second": 0.088, + "num_input_tokens_seen": 210030984, + "step": 3750 + }, + { + "epoch": 8.351893095768375, + "eval_icons_CIoU": 0.2745140343904495, + "eval_icons_GIoU": 0.2994941622018814, + "eval_icons_IoU": 0.35809725522994995, + "eval_icons_MAE_all": 0.06353667378425598, + "eval_icons_MAE_h": 0.03699115989729762, + "eval_icons_MAE_w": 0.06941121257841587, + "eval_icons_MAE_x_boxes": 0.05955472029745579, + "eval_icons_MAE_y_boxes": 0.038531024008989334, + "eval_icons_inside_bbox": 0.6059027910232544, + "eval_icons_loss": 1.762790560722351, + "eval_icons_loss_ce": 0.0002956779644591734, + "eval_icons_loss_iou": 0.687744140625, + "eval_icons_loss_num": 0.06077384948730469, + "eval_icons_loss_xval": 1.6787109375, + "eval_icons_runtime": 22.6059, + "eval_icons_samples_per_second": 2.212, + "eval_icons_steps_per_second": 0.088, + "num_input_tokens_seen": 210030984, + "step": 3750 + }, + { + "epoch": 8.351893095768375, + "eval_screenspot_CIoU": 0.34976951281229657, + "eval_screenspot_GIoU": 0.36631672581036884, + "eval_screenspot_IoU": 0.4275648792584737, + "eval_screenspot_MAE_all": 0.05975629389286041, + "eval_screenspot_MAE_h": 0.03780995992322763, + "eval_screenspot_MAE_w": 0.06876554464300473, + "eval_screenspot_MAE_x_boxes": 0.06643692528208096, + "eval_screenspot_MAE_y_boxes": 0.04602641022453705, + "eval_screenspot_inside_bbox": 0.6862499912579855, + "eval_screenspot_loss": 1.6282535791397095, + "eval_screenspot_loss_ce": 0.0002775423345156014, + "eval_screenspot_loss_iou": 0.6735026041666666, + "eval_screenspot_loss_num": 0.07061513264973958, + "eval_screenspot_loss_xval": 1.7000325520833333, + "eval_screenspot_runtime": 35.522, + "eval_screenspot_samples_per_second": 2.505, + "eval_screenspot_steps_per_second": 0.084, + "num_input_tokens_seen": 210030984, + "step": 3750 + }, + { + "epoch": 8.351893095768375, + "eval_compot_CIoU": 0.3505849689245224, + "eval_compot_GIoU": 0.3601333200931549, + "eval_compot_IoU": 0.40839655697345734, + "eval_compot_MAE_all": 0.017904515843838453, + "eval_compot_MAE_h": 0.009080663323402405, + "eval_compot_MAE_w": 0.022023603320121765, + "eval_compot_MAE_x_boxes": 0.0294346297159791, + "eval_compot_MAE_y_boxes": 0.0067735526245087385, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3908497095108032, + "eval_compot_loss_ce": 0.00022490947594633326, + "eval_compot_loss_iou": 0.63720703125, + "eval_compot_loss_num": 0.01686859130859375, + "eval_compot_loss_xval": 1.358642578125, + "eval_compot_runtime": 19.7321, + "eval_compot_samples_per_second": 2.534, + "eval_compot_steps_per_second": 0.101, + "num_input_tokens_seen": 210030984, + "step": 3750 + }, + { + "epoch": 8.351893095768375, + "eval_custom_ui_val_CIoU": 0.470332317882114, + "eval_custom_ui_val_GIoU": 0.48397915727562374, + "eval_custom_ui_val_IoU": 0.530869291888343, + "eval_custom_ui_val_MAE_all": 0.0304783143930965, + "eval_custom_ui_val_MAE_h": 0.015579992827648917, + "eval_custom_ui_val_MAE_w": 0.04120294677300586, + "eval_custom_ui_val_MAE_x_boxes": 0.039027334190905094, + "eval_custom_ui_val_MAE_y_boxes": 0.015175239571059743, + "eval_custom_ui_val_inside_bbox": 0.738811731338501, + "eval_custom_ui_val_loss": 1.183510184288025, + "eval_custom_ui_val_loss_ce": 0.00025910464238323685, + "eval_custom_ui_val_loss_iou": 0.5034722222222222, + "eval_custom_ui_val_loss_num": 0.02816009521484375, + "eval_custom_ui_val_loss_xval": 1.1477593315972223, + "eval_custom_ui_val_runtime": 64.4781, + "eval_custom_ui_val_samples_per_second": 4.11, + "eval_custom_ui_val_steps_per_second": 0.14, + "num_input_tokens_seen": 210030984, + "step": 3750 + }, + { + "epoch": 8.351893095768375, + "loss": 0.9277232885360718, + "loss_ce": 0.00023301383771467954, + "loss_iou": 0.40234375, + "loss_num": 0.024169921875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 210030984, + "step": 3750 + }, + { + "epoch": 8.35412026726058, + "grad_norm": 20.638172149658203, + "learning_rate": 1e-06, + "loss": 0.6147, + "num_input_tokens_seen": 210089604, + "step": 3751 + }, + { + "epoch": 8.35412026726058, + "loss": 0.7052416205406189, + "loss_ce": 0.00016353550017811358, + "loss_iou": 0.291015625, + "loss_num": 0.025146484375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 210089604, + "step": 3751 + }, + { + "epoch": 8.356347438752785, + "grad_norm": 17.781017303466797, + "learning_rate": 1e-06, + "loss": 0.5201, + "num_input_tokens_seen": 210145624, + "step": 3752 + }, + { + "epoch": 8.356347438752785, + "loss": 0.6434928178787231, + "loss_ce": 0.00018225307576358318, + "loss_iou": 0.265625, + "loss_num": 0.02197265625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 210145624, + "step": 3752 + }, + { + "epoch": 8.35857461024499, + "grad_norm": 88.13903045654297, + "learning_rate": 1e-06, + "loss": 0.6379, + "num_input_tokens_seen": 210203036, + "step": 3753 + }, + { + "epoch": 8.35857461024499, + "loss": 0.6185895204544067, + "loss_ce": 0.00018133444245904684, + "loss_iou": 0.28515625, + "loss_num": 0.00958251953125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 210203036, + "step": 3753 + }, + { + "epoch": 8.360801781737194, + "grad_norm": 15.603206634521484, + "learning_rate": 1e-06, + "loss": 0.738, + "num_input_tokens_seen": 210259024, + "step": 3754 + }, + { + "epoch": 8.360801781737194, + "loss": 0.7939836978912354, + "loss_ce": 0.0006487197242677212, + "loss_iou": 0.33984375, + "loss_num": 0.02294921875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 210259024, + "step": 3754 + }, + { + "epoch": 8.3630289532294, + "grad_norm": 21.302343368530273, + "learning_rate": 1e-06, + "loss": 0.7286, + "num_input_tokens_seen": 210316388, + "step": 3755 + }, + { + "epoch": 8.3630289532294, + "loss": 0.8514755964279175, + "loss_ce": 0.00015728248399682343, + "loss_iou": 0.34375, + "loss_num": 0.03271484375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 210316388, + "step": 3755 + }, + { + "epoch": 8.365256124721604, + "grad_norm": 15.152788162231445, + "learning_rate": 1e-06, + "loss": 0.5393, + "num_input_tokens_seen": 210373268, + "step": 3756 + }, + { + "epoch": 8.365256124721604, + "loss": 0.6328523755073547, + "loss_ce": 0.00016193960618693382, + "loss_iou": 0.275390625, + "loss_num": 0.0166015625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 210373268, + "step": 3756 + }, + { + "epoch": 8.367483296213809, + "grad_norm": 19.08024787902832, + "learning_rate": 1e-06, + "loss": 0.6706, + "num_input_tokens_seen": 210426048, + "step": 3757 + }, + { + "epoch": 8.367483296213809, + "loss": 0.7494222521781921, + "loss_ce": 0.0001546498097013682, + "loss_iou": 0.33984375, + "loss_num": 0.013916015625, + "loss_xval": 0.75, + "num_input_tokens_seen": 210426048, + "step": 3757 + }, + { + "epoch": 8.369710467706014, + "grad_norm": 19.812162399291992, + "learning_rate": 1e-06, + "loss": 0.6775, + "num_input_tokens_seen": 210483032, + "step": 3758 + }, + { + "epoch": 8.369710467706014, + "loss": 0.6561557650566101, + "loss_ce": 0.0001498895580880344, + "loss_iou": 0.287109375, + "loss_num": 0.016357421875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 210483032, + "step": 3758 + }, + { + "epoch": 8.371937639198219, + "grad_norm": 22.67157554626465, + "learning_rate": 1e-06, + "loss": 0.7241, + "num_input_tokens_seen": 210540188, + "step": 3759 + }, + { + "epoch": 8.371937639198219, + "loss": 0.745628833770752, + "loss_ce": 0.0002675401628948748, + "loss_iou": 0.322265625, + "loss_num": 0.0201416015625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 210540188, + "step": 3759 + }, + { + "epoch": 8.374164810690424, + "grad_norm": 24.26649284362793, + "learning_rate": 1e-06, + "loss": 0.6399, + "num_input_tokens_seen": 210597540, + "step": 3760 + }, + { + "epoch": 8.374164810690424, + "loss": 0.5113849639892578, + "loss_ce": 0.0001544796396046877, + "loss_iou": 0.224609375, + "loss_num": 0.0125732421875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 210597540, + "step": 3760 + }, + { + "epoch": 8.376391982182628, + "grad_norm": 18.92394256591797, + "learning_rate": 1e-06, + "loss": 0.6188, + "num_input_tokens_seen": 210651116, + "step": 3761 + }, + { + "epoch": 8.376391982182628, + "loss": 0.6872795224189758, + "loss_ce": 0.0002677679876796901, + "loss_iou": 0.279296875, + "loss_num": 0.025634765625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 210651116, + "step": 3761 + }, + { + "epoch": 8.378619153674833, + "grad_norm": 20.54817771911621, + "learning_rate": 1e-06, + "loss": 0.5593, + "num_input_tokens_seen": 210705180, + "step": 3762 + }, + { + "epoch": 8.378619153674833, + "loss": 0.4156179428100586, + "loss_ce": 0.00018215348245576024, + "loss_iou": 0.1796875, + "loss_num": 0.01104736328125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 210705180, + "step": 3762 + }, + { + "epoch": 8.380846325167038, + "grad_norm": 17.212587356567383, + "learning_rate": 1e-06, + "loss": 0.4299, + "num_input_tokens_seen": 210763276, + "step": 3763 + }, + { + "epoch": 8.380846325167038, + "loss": 0.373112291097641, + "loss_ce": 0.00018747567082755268, + "loss_iou": 0.171875, + "loss_num": 0.0059814453125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 210763276, + "step": 3763 + }, + { + "epoch": 8.383073496659243, + "grad_norm": 19.85719871520996, + "learning_rate": 1e-06, + "loss": 0.5694, + "num_input_tokens_seen": 210818156, + "step": 3764 + }, + { + "epoch": 8.383073496659243, + "loss": 0.5267565250396729, + "loss_ce": 0.00014518463285639882, + "loss_iou": 0.2392578125, + "loss_num": 0.00946044921875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 210818156, + "step": 3764 + }, + { + "epoch": 8.385300668151448, + "grad_norm": 22.43527603149414, + "learning_rate": 1e-06, + "loss": 0.7907, + "num_input_tokens_seen": 210873660, + "step": 3765 + }, + { + "epoch": 8.385300668151448, + "loss": 0.8294920921325684, + "loss_ce": 0.0001463915396016091, + "loss_iou": 0.330078125, + "loss_num": 0.033935546875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 210873660, + "step": 3765 + }, + { + "epoch": 8.387527839643653, + "grad_norm": 15.67119312286377, + "learning_rate": 1e-06, + "loss": 0.4889, + "num_input_tokens_seen": 210928384, + "step": 3766 + }, + { + "epoch": 8.387527839643653, + "loss": 0.5981723070144653, + "loss_ce": 0.0002108315529767424, + "loss_iou": 0.251953125, + "loss_num": 0.018798828125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 210928384, + "step": 3766 + }, + { + "epoch": 8.389755011135858, + "grad_norm": 84.53565216064453, + "learning_rate": 1e-06, + "loss": 0.6099, + "num_input_tokens_seen": 210983112, + "step": 3767 + }, + { + "epoch": 8.389755011135858, + "loss": 0.5748885869979858, + "loss_ce": 0.00018153036944568157, + "loss_iou": 0.251953125, + "loss_num": 0.01446533203125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 210983112, + "step": 3767 + }, + { + "epoch": 8.391982182628063, + "grad_norm": 21.295007705688477, + "learning_rate": 1e-06, + "loss": 0.5424, + "num_input_tokens_seen": 211039244, + "step": 3768 + }, + { + "epoch": 8.391982182628063, + "loss": 0.5405082702636719, + "loss_ce": 0.0002250939724035561, + "loss_iou": 0.23828125, + "loss_num": 0.01275634765625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 211039244, + "step": 3768 + }, + { + "epoch": 8.394209354120267, + "grad_norm": 28.580411911010742, + "learning_rate": 1e-06, + "loss": 0.4886, + "num_input_tokens_seen": 211093216, + "step": 3769 + }, + { + "epoch": 8.394209354120267, + "loss": 0.4398452043533325, + "loss_ce": 0.0001479371276218444, + "loss_iou": 0.1953125, + "loss_num": 0.009765625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 211093216, + "step": 3769 + }, + { + "epoch": 8.396436525612472, + "grad_norm": 23.4449462890625, + "learning_rate": 1e-06, + "loss": 0.6098, + "num_input_tokens_seen": 211145896, + "step": 3770 + }, + { + "epoch": 8.396436525612472, + "loss": 0.4624381363391876, + "loss_ce": 0.0002799497451633215, + "loss_iou": 0.201171875, + "loss_num": 0.0120849609375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 211145896, + "step": 3770 + }, + { + "epoch": 8.398663697104677, + "grad_norm": 21.414167404174805, + "learning_rate": 1e-06, + "loss": 0.5727, + "num_input_tokens_seen": 211200436, + "step": 3771 + }, + { + "epoch": 8.398663697104677, + "loss": 0.6622140407562256, + "loss_ce": 0.00022672140039503574, + "loss_iou": 0.287109375, + "loss_num": 0.017333984375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 211200436, + "step": 3771 + }, + { + "epoch": 8.400890868596882, + "grad_norm": 24.094797134399414, + "learning_rate": 1e-06, + "loss": 0.4669, + "num_input_tokens_seen": 211253492, + "step": 3772 + }, + { + "epoch": 8.400890868596882, + "loss": 0.3054664731025696, + "loss_ce": 0.00016864070494193584, + "loss_iou": 0.1337890625, + "loss_num": 0.0076904296875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 211253492, + "step": 3772 + }, + { + "epoch": 8.403118040089087, + "grad_norm": 16.602676391601562, + "learning_rate": 1e-06, + "loss": 0.6977, + "num_input_tokens_seen": 211309996, + "step": 3773 + }, + { + "epoch": 8.403118040089087, + "loss": 0.8234143853187561, + "loss_ce": 0.00017219953588210046, + "loss_iou": 0.3046875, + "loss_num": 0.042724609375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 211309996, + "step": 3773 + }, + { + "epoch": 8.405345211581292, + "grad_norm": 15.6812744140625, + "learning_rate": 1e-06, + "loss": 0.5358, + "num_input_tokens_seen": 211365484, + "step": 3774 + }, + { + "epoch": 8.405345211581292, + "loss": 0.5414432287216187, + "loss_ce": 0.0001529736036900431, + "loss_iou": 0.2333984375, + "loss_num": 0.01495361328125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 211365484, + "step": 3774 + }, + { + "epoch": 8.407572383073497, + "grad_norm": 15.80637264251709, + "learning_rate": 1e-06, + "loss": 0.5815, + "num_input_tokens_seen": 211421912, + "step": 3775 + }, + { + "epoch": 8.407572383073497, + "loss": 0.4960188865661621, + "loss_ce": 0.000169286533491686, + "loss_iou": 0.212890625, + "loss_num": 0.01409912109375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 211421912, + "step": 3775 + }, + { + "epoch": 8.409799554565701, + "grad_norm": 22.33266830444336, + "learning_rate": 1e-06, + "loss": 0.7466, + "num_input_tokens_seen": 211478792, + "step": 3776 + }, + { + "epoch": 8.409799554565701, + "loss": 0.7512620091438293, + "loss_ce": 0.00016335461987182498, + "loss_iou": 0.302734375, + "loss_num": 0.0286865234375, + "loss_xval": 0.75, + "num_input_tokens_seen": 211478792, + "step": 3776 + }, + { + "epoch": 8.412026726057906, + "grad_norm": 18.270977020263672, + "learning_rate": 1e-06, + "loss": 0.6434, + "num_input_tokens_seen": 211535448, + "step": 3777 + }, + { + "epoch": 8.412026726057906, + "loss": 0.44083136320114136, + "loss_ce": 0.00015753594925627112, + "loss_iou": 0.1962890625, + "loss_num": 0.00946044921875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 211535448, + "step": 3777 + }, + { + "epoch": 8.414253897550111, + "grad_norm": 26.987186431884766, + "learning_rate": 1e-06, + "loss": 0.6894, + "num_input_tokens_seen": 211590956, + "step": 3778 + }, + { + "epoch": 8.414253897550111, + "loss": 0.9005333781242371, + "loss_ce": 0.00014275358989834785, + "loss_iou": 0.3671875, + "loss_num": 0.032958984375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 211590956, + "step": 3778 + }, + { + "epoch": 8.416481069042316, + "grad_norm": 36.464210510253906, + "learning_rate": 1e-06, + "loss": 0.5899, + "num_input_tokens_seen": 211645452, + "step": 3779 + }, + { + "epoch": 8.416481069042316, + "loss": 0.5824276804924011, + "loss_ce": 0.00015230441931635141, + "loss_iou": 0.251953125, + "loss_num": 0.0157470703125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 211645452, + "step": 3779 + }, + { + "epoch": 8.41870824053452, + "grad_norm": 18.14861297607422, + "learning_rate": 1e-06, + "loss": 0.6356, + "num_input_tokens_seen": 211701036, + "step": 3780 + }, + { + "epoch": 8.41870824053452, + "loss": 0.8778401613235474, + "loss_ce": 0.0001545833656564355, + "loss_iou": 0.353515625, + "loss_num": 0.03369140625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 211701036, + "step": 3780 + }, + { + "epoch": 8.420935412026726, + "grad_norm": 13.912912368774414, + "learning_rate": 1e-06, + "loss": 0.6765, + "num_input_tokens_seen": 211757800, + "step": 3781 + }, + { + "epoch": 8.420935412026726, + "loss": 0.7191988229751587, + "loss_ce": 0.0002046736772172153, + "loss_iou": 0.314453125, + "loss_num": 0.0181884765625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 211757800, + "step": 3781 + }, + { + "epoch": 8.42316258351893, + "grad_norm": 28.029550552368164, + "learning_rate": 1e-06, + "loss": 0.5842, + "num_input_tokens_seen": 211815608, + "step": 3782 + }, + { + "epoch": 8.42316258351893, + "loss": 0.5234848260879517, + "loss_ce": 0.00016937771579250693, + "loss_iou": 0.220703125, + "loss_num": 0.01611328125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 211815608, + "step": 3782 + }, + { + "epoch": 8.425389755011135, + "grad_norm": 14.431218147277832, + "learning_rate": 1e-06, + "loss": 0.7867, + "num_input_tokens_seen": 211872108, + "step": 3783 + }, + { + "epoch": 8.425389755011135, + "loss": 0.6559756994247437, + "loss_ce": 0.0002139731077477336, + "loss_iou": 0.26171875, + "loss_num": 0.0262451171875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 211872108, + "step": 3783 + }, + { + "epoch": 8.42761692650334, + "grad_norm": 35.63063430786133, + "learning_rate": 1e-06, + "loss": 0.7438, + "num_input_tokens_seen": 211927844, + "step": 3784 + }, + { + "epoch": 8.42761692650334, + "loss": 0.7180646657943726, + "loss_ce": 0.0001691640354692936, + "loss_iou": 0.330078125, + "loss_num": 0.01165771484375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 211927844, + "step": 3784 + }, + { + "epoch": 8.429844097995545, + "grad_norm": 29.35708999633789, + "learning_rate": 1e-06, + "loss": 0.574, + "num_input_tokens_seen": 211979040, + "step": 3785 + }, + { + "epoch": 8.429844097995545, + "loss": 0.4887080192565918, + "loss_ce": 0.0001826368534239009, + "loss_iou": 0.2197265625, + "loss_num": 0.00982666015625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 211979040, + "step": 3785 + }, + { + "epoch": 8.43207126948775, + "grad_norm": 21.319759368896484, + "learning_rate": 1e-06, + "loss": 0.5513, + "num_input_tokens_seen": 212035452, + "step": 3786 + }, + { + "epoch": 8.43207126948775, + "loss": 0.363552451133728, + "loss_ce": 0.00014914579514879733, + "loss_iou": 0.15625, + "loss_num": 0.0101318359375, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 212035452, + "step": 3786 + }, + { + "epoch": 8.434298440979955, + "grad_norm": 23.589679718017578, + "learning_rate": 1e-06, + "loss": 0.7261, + "num_input_tokens_seen": 212093292, + "step": 3787 + }, + { + "epoch": 8.434298440979955, + "loss": 0.6076536178588867, + "loss_ce": 0.00023178444826044142, + "loss_iou": 0.240234375, + "loss_num": 0.025634765625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 212093292, + "step": 3787 + }, + { + "epoch": 8.43652561247216, + "grad_norm": 14.83303451538086, + "learning_rate": 1e-06, + "loss": 0.4061, + "num_input_tokens_seen": 212150376, + "step": 3788 + }, + { + "epoch": 8.43652561247216, + "loss": 0.35096991062164307, + "loss_ce": 0.0001398474269080907, + "loss_iou": 0.1474609375, + "loss_num": 0.0111083984375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 212150376, + "step": 3788 + }, + { + "epoch": 8.438752783964365, + "grad_norm": 20.81564712524414, + "learning_rate": 1e-06, + "loss": 0.6, + "num_input_tokens_seen": 212202624, + "step": 3789 + }, + { + "epoch": 8.438752783964365, + "loss": 0.5509481430053711, + "loss_ce": 0.00016686462913639843, + "loss_iou": 0.2451171875, + "loss_num": 0.0120849609375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 212202624, + "step": 3789 + }, + { + "epoch": 8.44097995545657, + "grad_norm": 15.132460594177246, + "learning_rate": 1e-06, + "loss": 0.7095, + "num_input_tokens_seen": 212258148, + "step": 3790 + }, + { + "epoch": 8.44097995545657, + "loss": 0.7777798175811768, + "loss_ce": 0.00019189229351468384, + "loss_iou": 0.322265625, + "loss_num": 0.0269775390625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 212258148, + "step": 3790 + }, + { + "epoch": 8.443207126948774, + "grad_norm": 16.23349952697754, + "learning_rate": 1e-06, + "loss": 0.4949, + "num_input_tokens_seen": 212315600, + "step": 3791 + }, + { + "epoch": 8.443207126948774, + "loss": 0.6320212483406067, + "loss_ce": 0.00018532070680521429, + "loss_iou": 0.25390625, + "loss_num": 0.02490234375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 212315600, + "step": 3791 + }, + { + "epoch": 8.44543429844098, + "grad_norm": 29.736553192138672, + "learning_rate": 1e-06, + "loss": 0.6874, + "num_input_tokens_seen": 212371436, + "step": 3792 + }, + { + "epoch": 8.44543429844098, + "loss": 0.7760206460952759, + "loss_ce": 0.00014173590170685202, + "loss_iou": 0.34765625, + "loss_num": 0.0164794921875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 212371436, + "step": 3792 + }, + { + "epoch": 8.447661469933184, + "grad_norm": 17.218870162963867, + "learning_rate": 1e-06, + "loss": 0.5741, + "num_input_tokens_seen": 212427368, + "step": 3793 + }, + { + "epoch": 8.447661469933184, + "loss": 0.7538229823112488, + "loss_ce": 0.00016089789278339595, + "loss_iou": 0.30859375, + "loss_num": 0.0272216796875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 212427368, + "step": 3793 + }, + { + "epoch": 8.449888641425389, + "grad_norm": 17.727853775024414, + "learning_rate": 1e-06, + "loss": 0.4507, + "num_input_tokens_seen": 212482836, + "step": 3794 + }, + { + "epoch": 8.449888641425389, + "loss": 0.5649484395980835, + "loss_ce": 0.0004953413736075163, + "loss_iou": 0.2353515625, + "loss_num": 0.018798828125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 212482836, + "step": 3794 + }, + { + "epoch": 8.452115812917596, + "grad_norm": 21.763526916503906, + "learning_rate": 1e-06, + "loss": 0.518, + "num_input_tokens_seen": 212537884, + "step": 3795 + }, + { + "epoch": 8.452115812917596, + "loss": 0.43735483288764954, + "loss_ce": 0.00022105529205873609, + "loss_iou": 0.189453125, + "loss_num": 0.01190185546875, + "loss_xval": 0.4375, + "num_input_tokens_seen": 212537884, + "step": 3795 + }, + { + "epoch": 8.4543429844098, + "grad_norm": 29.629959106445312, + "learning_rate": 1e-06, + "loss": 0.5047, + "num_input_tokens_seen": 212592316, + "step": 3796 + }, + { + "epoch": 8.4543429844098, + "loss": 0.4535437524318695, + "loss_ce": 0.00017461413517594337, + "loss_iou": 0.1845703125, + "loss_num": 0.0167236328125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 212592316, + "step": 3796 + }, + { + "epoch": 8.456570155902005, + "grad_norm": 33.515464782714844, + "learning_rate": 1e-06, + "loss": 0.7209, + "num_input_tokens_seen": 212649160, + "step": 3797 + }, + { + "epoch": 8.456570155902005, + "loss": 0.8188025951385498, + "loss_ce": 0.0001990534656215459, + "loss_iou": 0.357421875, + "loss_num": 0.02099609375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 212649160, + "step": 3797 + }, + { + "epoch": 8.45879732739421, + "grad_norm": 25.849811553955078, + "learning_rate": 1e-06, + "loss": 0.6433, + "num_input_tokens_seen": 212705208, + "step": 3798 + }, + { + "epoch": 8.45879732739421, + "loss": 0.7545800805091858, + "loss_ce": 0.0001855643349699676, + "loss_iou": 0.34375, + "loss_num": 0.01324462890625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 212705208, + "step": 3798 + }, + { + "epoch": 8.461024498886415, + "grad_norm": 27.304214477539062, + "learning_rate": 1e-06, + "loss": 0.6453, + "num_input_tokens_seen": 212760512, + "step": 3799 + }, + { + "epoch": 8.461024498886415, + "loss": 0.5861043334007263, + "loss_ce": 0.00016680179396644235, + "loss_iou": 0.240234375, + "loss_num": 0.02099609375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 212760512, + "step": 3799 + }, + { + "epoch": 8.46325167037862, + "grad_norm": 23.8879451751709, + "learning_rate": 1e-06, + "loss": 0.6665, + "num_input_tokens_seen": 212817608, + "step": 3800 + }, + { + "epoch": 8.46325167037862, + "loss": 0.6634845733642578, + "loss_ce": 0.00015447995974682271, + "loss_iou": 0.310546875, + "loss_num": 0.00811767578125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 212817608, + "step": 3800 + }, + { + "epoch": 8.465478841870825, + "grad_norm": 42.28864288330078, + "learning_rate": 1e-06, + "loss": 0.7315, + "num_input_tokens_seen": 212873236, + "step": 3801 + }, + { + "epoch": 8.465478841870825, + "loss": 0.7518316507339478, + "loss_ce": 0.0003667787532322109, + "loss_iou": 0.3203125, + "loss_num": 0.0223388671875, + "loss_xval": 0.75, + "num_input_tokens_seen": 212873236, + "step": 3801 + }, + { + "epoch": 8.46770601336303, + "grad_norm": 17.901504516601562, + "learning_rate": 1e-06, + "loss": 0.6369, + "num_input_tokens_seen": 212929028, + "step": 3802 + }, + { + "epoch": 8.46770601336303, + "loss": 0.6398534178733826, + "loss_ce": 0.000204975571250543, + "loss_iou": 0.26953125, + "loss_num": 0.020263671875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 212929028, + "step": 3802 + }, + { + "epoch": 8.469933184855234, + "grad_norm": 12.724729537963867, + "learning_rate": 1e-06, + "loss": 0.726, + "num_input_tokens_seen": 212985068, + "step": 3803 + }, + { + "epoch": 8.469933184855234, + "loss": 0.5726535320281982, + "loss_ce": 0.00014379943604581058, + "loss_iou": 0.23046875, + "loss_num": 0.0224609375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 212985068, + "step": 3803 + }, + { + "epoch": 8.47216035634744, + "grad_norm": 17.61178207397461, + "learning_rate": 1e-06, + "loss": 0.4873, + "num_input_tokens_seen": 213042936, + "step": 3804 + }, + { + "epoch": 8.47216035634744, + "loss": 0.37965720891952515, + "loss_ce": 0.00014062756963539869, + "loss_iou": 0.1708984375, + "loss_num": 0.00750732421875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 213042936, + "step": 3804 + }, + { + "epoch": 8.474387527839644, + "grad_norm": 17.672943115234375, + "learning_rate": 1e-06, + "loss": 0.5562, + "num_input_tokens_seen": 213095496, + "step": 3805 + }, + { + "epoch": 8.474387527839644, + "loss": 0.5798885822296143, + "loss_ce": 0.00017669444787316024, + "loss_iou": 0.251953125, + "loss_num": 0.015380859375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 213095496, + "step": 3805 + }, + { + "epoch": 8.476614699331849, + "grad_norm": 15.858698844909668, + "learning_rate": 1e-06, + "loss": 0.5357, + "num_input_tokens_seen": 213151276, + "step": 3806 + }, + { + "epoch": 8.476614699331849, + "loss": 0.3039921820163727, + "loss_ce": 0.00015917010023258626, + "loss_iou": 0.126953125, + "loss_num": 0.01007080078125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 213151276, + "step": 3806 + }, + { + "epoch": 8.478841870824054, + "grad_norm": 19.456628799438477, + "learning_rate": 1e-06, + "loss": 0.5587, + "num_input_tokens_seen": 213208404, + "step": 3807 + }, + { + "epoch": 8.478841870824054, + "loss": 0.4867333769798279, + "loss_ce": 0.0001611171173863113, + "loss_iou": 0.208984375, + "loss_num": 0.01385498046875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 213208404, + "step": 3807 + }, + { + "epoch": 8.481069042316259, + "grad_norm": 16.330257415771484, + "learning_rate": 1e-06, + "loss": 0.592, + "num_input_tokens_seen": 213266032, + "step": 3808 + }, + { + "epoch": 8.481069042316259, + "loss": 0.8233369588851929, + "loss_ce": 0.00015580856415908784, + "loss_iou": 0.330078125, + "loss_num": 0.033203125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 213266032, + "step": 3808 + }, + { + "epoch": 8.483296213808464, + "grad_norm": 18.079784393310547, + "learning_rate": 1e-06, + "loss": 0.5542, + "num_input_tokens_seen": 213321096, + "step": 3809 + }, + { + "epoch": 8.483296213808464, + "loss": 0.5775792598724365, + "loss_ce": 0.00018672105215955526, + "loss_iou": 0.267578125, + "loss_num": 0.00811767578125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 213321096, + "step": 3809 + }, + { + "epoch": 8.485523385300668, + "grad_norm": 14.717164039611816, + "learning_rate": 1e-06, + "loss": 0.5484, + "num_input_tokens_seen": 213379256, + "step": 3810 + }, + { + "epoch": 8.485523385300668, + "loss": 0.6559404134750366, + "loss_ce": 0.00017873873002827168, + "loss_iou": 0.2890625, + "loss_num": 0.01531982421875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 213379256, + "step": 3810 + }, + { + "epoch": 8.487750556792873, + "grad_norm": 15.925959587097168, + "learning_rate": 1e-06, + "loss": 0.5738, + "num_input_tokens_seen": 213436860, + "step": 3811 + }, + { + "epoch": 8.487750556792873, + "loss": 0.6409112811088562, + "loss_ce": 0.00016421903274022043, + "loss_iou": 0.275390625, + "loss_num": 0.01806640625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 213436860, + "step": 3811 + }, + { + "epoch": 8.489977728285078, + "grad_norm": 16.596834182739258, + "learning_rate": 1e-06, + "loss": 0.5186, + "num_input_tokens_seen": 213494208, + "step": 3812 + }, + { + "epoch": 8.489977728285078, + "loss": 0.4774528443813324, + "loss_ce": 0.00015789938333909959, + "loss_iou": 0.2109375, + "loss_num": 0.0113525390625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 213494208, + "step": 3812 + }, + { + "epoch": 8.492204899777283, + "grad_norm": 20.76615333557129, + "learning_rate": 1e-06, + "loss": 0.5608, + "num_input_tokens_seen": 213550704, + "step": 3813 + }, + { + "epoch": 8.492204899777283, + "loss": 0.6620367765426636, + "loss_ce": 0.00017153860244434327, + "loss_iou": 0.296875, + "loss_num": 0.01361083984375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 213550704, + "step": 3813 + }, + { + "epoch": 8.494432071269488, + "grad_norm": 35.46400451660156, + "learning_rate": 1e-06, + "loss": 0.4233, + "num_input_tokens_seen": 213605856, + "step": 3814 + }, + { + "epoch": 8.494432071269488, + "loss": 0.4266274869441986, + "loss_ce": 0.00011381316289771348, + "loss_iou": 0.1982421875, + "loss_num": 0.00592041015625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 213605856, + "step": 3814 + }, + { + "epoch": 8.496659242761693, + "grad_norm": 17.748003005981445, + "learning_rate": 1e-06, + "loss": 0.4427, + "num_input_tokens_seen": 213661372, + "step": 3815 + }, + { + "epoch": 8.496659242761693, + "loss": 0.5624146461486816, + "loss_ce": 0.00015880668070167303, + "loss_iou": 0.2216796875, + "loss_num": 0.023681640625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 213661372, + "step": 3815 + }, + { + "epoch": 8.498886414253898, + "grad_norm": 17.674402236938477, + "learning_rate": 1e-06, + "loss": 0.5981, + "num_input_tokens_seen": 213716564, + "step": 3816 + }, + { + "epoch": 8.498886414253898, + "loss": 0.5442743897438049, + "loss_ce": 0.00020702678011730313, + "loss_iou": 0.2412109375, + "loss_num": 0.0125732421875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 213716564, + "step": 3816 + }, + { + "epoch": 8.501113585746102, + "grad_norm": 15.914905548095703, + "learning_rate": 1e-06, + "loss": 0.6234, + "num_input_tokens_seen": 213775080, + "step": 3817 + }, + { + "epoch": 8.501113585746102, + "loss": 0.6505662202835083, + "loss_ce": 0.00017557844694238156, + "loss_iou": 0.296875, + "loss_num": 0.0111083984375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 213775080, + "step": 3817 + }, + { + "epoch": 8.503340757238307, + "grad_norm": 13.035726547241211, + "learning_rate": 1e-06, + "loss": 0.4907, + "num_input_tokens_seen": 213832808, + "step": 3818 + }, + { + "epoch": 8.503340757238307, + "loss": 0.5575791001319885, + "loss_ce": 0.00020606406906154007, + "loss_iou": 0.2578125, + "loss_num": 0.00848388671875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 213832808, + "step": 3818 + }, + { + "epoch": 8.505567928730512, + "grad_norm": 21.998821258544922, + "learning_rate": 1e-06, + "loss": 0.6187, + "num_input_tokens_seen": 213892096, + "step": 3819 + }, + { + "epoch": 8.505567928730512, + "loss": 0.5462846755981445, + "loss_ce": 0.0001420707703800872, + "loss_iou": 0.2353515625, + "loss_num": 0.01495361328125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 213892096, + "step": 3819 + }, + { + "epoch": 8.507795100222717, + "grad_norm": 38.21397399902344, + "learning_rate": 1e-06, + "loss": 0.6985, + "num_input_tokens_seen": 213946800, + "step": 3820 + }, + { + "epoch": 8.507795100222717, + "loss": 0.7257634401321411, + "loss_ce": 0.00017748677055351436, + "loss_iou": 0.326171875, + "loss_num": 0.01470947265625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 213946800, + "step": 3820 + }, + { + "epoch": 8.510022271714922, + "grad_norm": 24.83524513244629, + "learning_rate": 1e-06, + "loss": 0.5032, + "num_input_tokens_seen": 214003200, + "step": 3821 + }, + { + "epoch": 8.510022271714922, + "loss": 0.464314341545105, + "loss_ce": 0.0004471480497159064, + "loss_iou": 0.1953125, + "loss_num": 0.014892578125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 214003200, + "step": 3821 + }, + { + "epoch": 8.512249443207127, + "grad_norm": 18.419172286987305, + "learning_rate": 1e-06, + "loss": 0.6254, + "num_input_tokens_seen": 214059832, + "step": 3822 + }, + { + "epoch": 8.512249443207127, + "loss": 0.5757325887680054, + "loss_ce": 0.00017112254863604903, + "loss_iou": 0.255859375, + "loss_num": 0.0125732421875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 214059832, + "step": 3822 + }, + { + "epoch": 8.514476614699332, + "grad_norm": 23.88714599609375, + "learning_rate": 1e-06, + "loss": 0.7178, + "num_input_tokens_seen": 214115268, + "step": 3823 + }, + { + "epoch": 8.514476614699332, + "loss": 0.9213709235191345, + "loss_ce": 0.00022837005963083357, + "loss_iou": 0.365234375, + "loss_num": 0.037841796875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 214115268, + "step": 3823 + }, + { + "epoch": 8.516703786191536, + "grad_norm": 16.07703399658203, + "learning_rate": 1e-06, + "loss": 0.774, + "num_input_tokens_seen": 214173736, + "step": 3824 + }, + { + "epoch": 8.516703786191536, + "loss": 0.5605735778808594, + "loss_ce": 0.00014879286754876375, + "loss_iou": 0.23828125, + "loss_num": 0.0167236328125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 214173736, + "step": 3824 + }, + { + "epoch": 8.518930957683741, + "grad_norm": 17.80256462097168, + "learning_rate": 1e-06, + "loss": 0.5559, + "num_input_tokens_seen": 214231872, + "step": 3825 + }, + { + "epoch": 8.518930957683741, + "loss": 0.5602927207946777, + "loss_ce": 0.00017310409748461097, + "loss_iou": 0.251953125, + "loss_num": 0.01141357421875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 214231872, + "step": 3825 + }, + { + "epoch": 8.521158129175946, + "grad_norm": 15.230652809143066, + "learning_rate": 1e-06, + "loss": 0.5637, + "num_input_tokens_seen": 214289616, + "step": 3826 + }, + { + "epoch": 8.521158129175946, + "loss": 0.6788380146026611, + "loss_ce": 0.0002491388004273176, + "loss_iou": 0.28125, + "loss_num": 0.0234375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 214289616, + "step": 3826 + }, + { + "epoch": 8.523385300668151, + "grad_norm": 19.55664825439453, + "learning_rate": 1e-06, + "loss": 0.4918, + "num_input_tokens_seen": 214347108, + "step": 3827 + }, + { + "epoch": 8.523385300668151, + "loss": 0.4482664167881012, + "loss_ce": 0.00014631151861976832, + "loss_iou": 0.185546875, + "loss_num": 0.01519775390625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 214347108, + "step": 3827 + }, + { + "epoch": 8.525612472160356, + "grad_norm": 19.749784469604492, + "learning_rate": 1e-06, + "loss": 0.5827, + "num_input_tokens_seen": 214399800, + "step": 3828 + }, + { + "epoch": 8.525612472160356, + "loss": 0.7083353996276855, + "loss_ce": 0.00020547436724882573, + "loss_iou": 0.3125, + "loss_num": 0.0166015625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 214399800, + "step": 3828 + }, + { + "epoch": 8.52783964365256, + "grad_norm": 16.604900360107422, + "learning_rate": 1e-06, + "loss": 0.7405, + "num_input_tokens_seen": 214454464, + "step": 3829 + }, + { + "epoch": 8.52783964365256, + "loss": 0.6651185750961304, + "loss_ce": 0.00020155691890977323, + "loss_iou": 0.298828125, + "loss_num": 0.01373291015625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 214454464, + "step": 3829 + }, + { + "epoch": 8.530066815144766, + "grad_norm": 39.781227111816406, + "learning_rate": 1e-06, + "loss": 0.5991, + "num_input_tokens_seen": 214511448, + "step": 3830 + }, + { + "epoch": 8.530066815144766, + "loss": 0.589692234992981, + "loss_ce": 0.0002146851911675185, + "loss_iou": 0.24609375, + "loss_num": 0.0191650390625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 214511448, + "step": 3830 + }, + { + "epoch": 8.53229398663697, + "grad_norm": 26.230976104736328, + "learning_rate": 1e-06, + "loss": 0.517, + "num_input_tokens_seen": 214565856, + "step": 3831 + }, + { + "epoch": 8.53229398663697, + "loss": 0.389570951461792, + "loss_ce": 0.00016667514864820987, + "loss_iou": 0.1767578125, + "loss_num": 0.007354736328125, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 214565856, + "step": 3831 + }, + { + "epoch": 8.534521158129175, + "grad_norm": 10.636022567749023, + "learning_rate": 1e-06, + "loss": 0.4324, + "num_input_tokens_seen": 214623292, + "step": 3832 + }, + { + "epoch": 8.534521158129175, + "loss": 0.4710007309913635, + "loss_ce": 0.00017552266945131123, + "loss_iou": 0.2138671875, + "loss_num": 0.00836181640625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 214623292, + "step": 3832 + }, + { + "epoch": 8.53674832962138, + "grad_norm": 20.143753051757812, + "learning_rate": 1e-06, + "loss": 0.5645, + "num_input_tokens_seen": 214679760, + "step": 3833 + }, + { + "epoch": 8.53674832962138, + "loss": 0.5450610518455505, + "loss_ce": 0.00013918953482061625, + "loss_iou": 0.2294921875, + "loss_num": 0.01708984375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 214679760, + "step": 3833 + }, + { + "epoch": 8.538975501113585, + "grad_norm": 29.548900604248047, + "learning_rate": 1e-06, + "loss": 0.6879, + "num_input_tokens_seen": 214734852, + "step": 3834 + }, + { + "epoch": 8.538975501113585, + "loss": 0.43624764680862427, + "loss_ce": 0.00015146093210205436, + "loss_iou": 0.193359375, + "loss_num": 0.00994873046875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 214734852, + "step": 3834 + }, + { + "epoch": 8.54120267260579, + "grad_norm": 21.732810974121094, + "learning_rate": 1e-06, + "loss": 0.639, + "num_input_tokens_seen": 214787916, + "step": 3835 + }, + { + "epoch": 8.54120267260579, + "loss": 0.7973939180374146, + "loss_ce": 0.0001527119311504066, + "loss_iou": 0.3515625, + "loss_num": 0.01904296875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 214787916, + "step": 3835 + }, + { + "epoch": 8.543429844097995, + "grad_norm": 10.797616958618164, + "learning_rate": 1e-06, + "loss": 0.4759, + "num_input_tokens_seen": 214842424, + "step": 3836 + }, + { + "epoch": 8.543429844097995, + "loss": 0.5132704377174377, + "loss_ce": 0.00020891126769129187, + "loss_iou": 0.197265625, + "loss_num": 0.0235595703125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 214842424, + "step": 3836 + }, + { + "epoch": 8.5456570155902, + "grad_norm": 21.894956588745117, + "learning_rate": 1e-06, + "loss": 0.5809, + "num_input_tokens_seen": 214897440, + "step": 3837 + }, + { + "epoch": 8.5456570155902, + "loss": 0.5928073525428772, + "loss_ce": 0.0001559797237860039, + "loss_iou": 0.255859375, + "loss_num": 0.0162353515625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 214897440, + "step": 3837 + }, + { + "epoch": 8.547884187082406, + "grad_norm": 16.91735076904297, + "learning_rate": 1e-06, + "loss": 0.6461, + "num_input_tokens_seen": 214951216, + "step": 3838 + }, + { + "epoch": 8.547884187082406, + "loss": 0.7435756325721741, + "loss_ce": 0.00016743276501074433, + "loss_iou": 0.328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 214951216, + "step": 3838 + }, + { + "epoch": 8.550111358574611, + "grad_norm": 16.6099796295166, + "learning_rate": 1e-06, + "loss": 0.7447, + "num_input_tokens_seen": 215004404, + "step": 3839 + }, + { + "epoch": 8.550111358574611, + "loss": 0.4547840356826782, + "loss_ce": 0.00019417837029322982, + "loss_iou": 0.2041015625, + "loss_num": 0.0093994140625, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 215004404, + "step": 3839 + }, + { + "epoch": 8.552338530066816, + "grad_norm": 18.755542755126953, + "learning_rate": 1e-06, + "loss": 0.7003, + "num_input_tokens_seen": 215061452, + "step": 3840 + }, + { + "epoch": 8.552338530066816, + "loss": 0.7906391620635986, + "loss_ce": 0.00023385511303786188, + "loss_iou": 0.34375, + "loss_num": 0.02099609375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 215061452, + "step": 3840 + }, + { + "epoch": 8.55456570155902, + "grad_norm": 15.474801063537598, + "learning_rate": 1e-06, + "loss": 0.5902, + "num_input_tokens_seen": 215119176, + "step": 3841 + }, + { + "epoch": 8.55456570155902, + "loss": 0.6494476199150085, + "loss_ce": 0.00015564775094389915, + "loss_iou": 0.271484375, + "loss_num": 0.0211181640625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 215119176, + "step": 3841 + }, + { + "epoch": 8.556792873051226, + "grad_norm": 22.1204891204834, + "learning_rate": 1e-06, + "loss": 0.5713, + "num_input_tokens_seen": 215174944, + "step": 3842 + }, + { + "epoch": 8.556792873051226, + "loss": 0.5475232601165771, + "loss_ce": 0.00015993315901141614, + "loss_iou": 0.2392578125, + "loss_num": 0.01397705078125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 215174944, + "step": 3842 + }, + { + "epoch": 8.55902004454343, + "grad_norm": 13.736021995544434, + "learning_rate": 1e-06, + "loss": 0.5063, + "num_input_tokens_seen": 215230864, + "step": 3843 + }, + { + "epoch": 8.55902004454343, + "loss": 0.4863549768924713, + "loss_ce": 0.00014891059254296124, + "loss_iou": 0.2138671875, + "loss_num": 0.01171875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 215230864, + "step": 3843 + }, + { + "epoch": 8.561247216035635, + "grad_norm": 22.418909072875977, + "learning_rate": 1e-06, + "loss": 0.7348, + "num_input_tokens_seen": 215289312, + "step": 3844 + }, + { + "epoch": 8.561247216035635, + "loss": 0.7660500407218933, + "loss_ce": 0.00018092009122483432, + "loss_iou": 0.361328125, + "loss_num": 0.00830078125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 215289312, + "step": 3844 + }, + { + "epoch": 8.56347438752784, + "grad_norm": 16.7957820892334, + "learning_rate": 1e-06, + "loss": 0.6157, + "num_input_tokens_seen": 215346324, + "step": 3845 + }, + { + "epoch": 8.56347438752784, + "loss": 0.4588056802749634, + "loss_ce": 0.0001875289308372885, + "loss_iou": 0.201171875, + "loss_num": 0.01129150390625, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 215346324, + "step": 3845 + }, + { + "epoch": 8.565701559020045, + "grad_norm": 18.19455909729004, + "learning_rate": 1e-06, + "loss": 0.4958, + "num_input_tokens_seen": 215402928, + "step": 3846 + }, + { + "epoch": 8.565701559020045, + "loss": 0.4269152879714966, + "loss_ce": 0.00015747133875265718, + "loss_iou": 0.1884765625, + "loss_num": 0.00994873046875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 215402928, + "step": 3846 + }, + { + "epoch": 8.56792873051225, + "grad_norm": 21.939851760864258, + "learning_rate": 1e-06, + "loss": 0.649, + "num_input_tokens_seen": 215458620, + "step": 3847 + }, + { + "epoch": 8.56792873051225, + "loss": 0.5985796451568604, + "loss_ce": 0.00019095309835392982, + "loss_iou": 0.251953125, + "loss_num": 0.018798828125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 215458620, + "step": 3847 + }, + { + "epoch": 8.570155902004455, + "grad_norm": 17.403221130371094, + "learning_rate": 1e-06, + "loss": 0.5387, + "num_input_tokens_seen": 215515404, + "step": 3848 + }, + { + "epoch": 8.570155902004455, + "loss": 0.6373581886291504, + "loss_ce": 0.0001511847076471895, + "loss_iou": 0.283203125, + "loss_num": 0.0146484375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 215515404, + "step": 3848 + }, + { + "epoch": 8.57238307349666, + "grad_norm": 17.169862747192383, + "learning_rate": 1e-06, + "loss": 0.5251, + "num_input_tokens_seen": 215571552, + "step": 3849 + }, + { + "epoch": 8.57238307349666, + "loss": 0.4762542247772217, + "loss_ce": 0.0001799672027118504, + "loss_iou": 0.2119140625, + "loss_num": 0.01043701171875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 215571552, + "step": 3849 + }, + { + "epoch": 8.574610244988865, + "grad_norm": 15.021171569824219, + "learning_rate": 1e-06, + "loss": 0.737, + "num_input_tokens_seen": 215627964, + "step": 3850 + }, + { + "epoch": 8.574610244988865, + "loss": 0.8246517777442932, + "loss_ce": 0.00018888208433054388, + "loss_iou": 0.328125, + "loss_num": 0.033203125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 215627964, + "step": 3850 + }, + { + "epoch": 8.57683741648107, + "grad_norm": 25.07132339477539, + "learning_rate": 1e-06, + "loss": 0.6764, + "num_input_tokens_seen": 215683576, + "step": 3851 + }, + { + "epoch": 8.57683741648107, + "loss": 0.5619097352027893, + "loss_ce": 0.00014213821850717068, + "loss_iou": 0.25, + "loss_num": 0.01202392578125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 215683576, + "step": 3851 + }, + { + "epoch": 8.579064587973274, + "grad_norm": 15.985095024108887, + "learning_rate": 1e-06, + "loss": 0.5649, + "num_input_tokens_seen": 215740424, + "step": 3852 + }, + { + "epoch": 8.579064587973274, + "loss": 0.5519498586654663, + "loss_ce": 0.000802424328867346, + "loss_iou": 0.2275390625, + "loss_num": 0.0189208984375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 215740424, + "step": 3852 + }, + { + "epoch": 8.58129175946548, + "grad_norm": 17.084421157836914, + "learning_rate": 1e-06, + "loss": 0.4632, + "num_input_tokens_seen": 215797432, + "step": 3853 + }, + { + "epoch": 8.58129175946548, + "loss": 0.45524704456329346, + "loss_ce": 0.00016892084386199713, + "loss_iou": 0.2060546875, + "loss_num": 0.0087890625, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 215797432, + "step": 3853 + }, + { + "epoch": 8.583518930957684, + "grad_norm": 25.7215633392334, + "learning_rate": 1e-06, + "loss": 0.827, + "num_input_tokens_seen": 215855860, + "step": 3854 + }, + { + "epoch": 8.583518930957684, + "loss": 0.8243607878684998, + "loss_ce": 0.0001420074113411829, + "loss_iou": 0.322265625, + "loss_num": 0.035888671875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 215855860, + "step": 3854 + }, + { + "epoch": 8.585746102449889, + "grad_norm": 20.765113830566406, + "learning_rate": 1e-06, + "loss": 0.5694, + "num_input_tokens_seen": 215913020, + "step": 3855 + }, + { + "epoch": 8.585746102449889, + "loss": 0.4681831896305084, + "loss_ce": 0.00016561683150939643, + "loss_iou": 0.201171875, + "loss_num": 0.01318359375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 215913020, + "step": 3855 + }, + { + "epoch": 8.587973273942094, + "grad_norm": 22.44451141357422, + "learning_rate": 1e-06, + "loss": 0.5467, + "num_input_tokens_seen": 215968572, + "step": 3856 + }, + { + "epoch": 8.587973273942094, + "loss": 0.5845038890838623, + "loss_ce": 0.00015330014866776764, + "loss_iou": 0.26171875, + "loss_num": 0.01251220703125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 215968572, + "step": 3856 + }, + { + "epoch": 8.590200445434299, + "grad_norm": 20.8133487701416, + "learning_rate": 1e-06, + "loss": 0.5643, + "num_input_tokens_seen": 216022120, + "step": 3857 + }, + { + "epoch": 8.590200445434299, + "loss": 0.5692482590675354, + "loss_ce": 0.00015648285625502467, + "loss_iou": 0.248046875, + "loss_num": 0.0145263671875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 216022120, + "step": 3857 + }, + { + "epoch": 8.592427616926503, + "grad_norm": 15.802310943603516, + "learning_rate": 1e-06, + "loss": 0.7726, + "num_input_tokens_seen": 216077540, + "step": 3858 + }, + { + "epoch": 8.592427616926503, + "loss": 0.732232391834259, + "loss_ce": 0.00017669174121692777, + "loss_iou": 0.30859375, + "loss_num": 0.0224609375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 216077540, + "step": 3858 + }, + { + "epoch": 8.594654788418708, + "grad_norm": 21.7590389251709, + "learning_rate": 1e-06, + "loss": 0.4892, + "num_input_tokens_seen": 216132424, + "step": 3859 + }, + { + "epoch": 8.594654788418708, + "loss": 0.3683167099952698, + "loss_ce": 0.0001526352425571531, + "loss_iou": 0.1318359375, + "loss_num": 0.0206298828125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 216132424, + "step": 3859 + }, + { + "epoch": 8.596881959910913, + "grad_norm": 29.03900718688965, + "learning_rate": 1e-06, + "loss": 0.7531, + "num_input_tokens_seen": 216185120, + "step": 3860 + }, + { + "epoch": 8.596881959910913, + "loss": 0.7123351097106934, + "loss_ce": 0.0001769287046045065, + "loss_iou": 0.3203125, + "loss_num": 0.01458740234375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 216185120, + "step": 3860 + }, + { + "epoch": 8.599109131403118, + "grad_norm": 14.911413192749023, + "learning_rate": 1e-06, + "loss": 0.4934, + "num_input_tokens_seen": 216239524, + "step": 3861 + }, + { + "epoch": 8.599109131403118, + "loss": 0.5576772689819336, + "loss_ce": 0.00018213198927696794, + "loss_iou": 0.25390625, + "loss_num": 0.00982666015625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 216239524, + "step": 3861 + }, + { + "epoch": 8.601336302895323, + "grad_norm": 23.764856338500977, + "learning_rate": 1e-06, + "loss": 0.535, + "num_input_tokens_seen": 216295068, + "step": 3862 + }, + { + "epoch": 8.601336302895323, + "loss": 0.6164814233779907, + "loss_ce": 0.0001483721862314269, + "loss_iou": 0.27734375, + "loss_num": 0.01263427734375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 216295068, + "step": 3862 + }, + { + "epoch": 8.603563474387528, + "grad_norm": 43.42782211303711, + "learning_rate": 1e-06, + "loss": 0.8414, + "num_input_tokens_seen": 216349744, + "step": 3863 + }, + { + "epoch": 8.603563474387528, + "loss": 0.4933130145072937, + "loss_ce": 0.00014895344793330878, + "loss_iou": 0.224609375, + "loss_num": 0.00872802734375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 216349744, + "step": 3863 + }, + { + "epoch": 8.605790645879733, + "grad_norm": 20.517236709594727, + "learning_rate": 1e-06, + "loss": 0.6078, + "num_input_tokens_seen": 216407720, + "step": 3864 + }, + { + "epoch": 8.605790645879733, + "loss": 0.5921906232833862, + "loss_ce": 0.00014958585961721838, + "loss_iou": 0.26171875, + "loss_num": 0.01373291015625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 216407720, + "step": 3864 + }, + { + "epoch": 8.608017817371937, + "grad_norm": 11.897843360900879, + "learning_rate": 1e-06, + "loss": 0.3836, + "num_input_tokens_seen": 216466340, + "step": 3865 + }, + { + "epoch": 8.608017817371937, + "loss": 0.3420777916908264, + "loss_ce": 0.00015885230095591396, + "loss_iou": 0.1552734375, + "loss_num": 0.006317138671875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 216466340, + "step": 3865 + }, + { + "epoch": 8.610244988864142, + "grad_norm": 20.564950942993164, + "learning_rate": 1e-06, + "loss": 0.6276, + "num_input_tokens_seen": 216521392, + "step": 3866 + }, + { + "epoch": 8.610244988864142, + "loss": 0.4767248034477234, + "loss_ce": 0.0001622969430172816, + "loss_iou": 0.2080078125, + "loss_num": 0.01226806640625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 216521392, + "step": 3866 + }, + { + "epoch": 8.612472160356347, + "grad_norm": 22.76060676574707, + "learning_rate": 1e-06, + "loss": 0.9585, + "num_input_tokens_seen": 216572244, + "step": 3867 + }, + { + "epoch": 8.612472160356347, + "loss": 1.402675747871399, + "loss_ce": 0.00033200866892002523, + "loss_iou": 0.5546875, + "loss_num": 0.05810546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 216572244, + "step": 3867 + }, + { + "epoch": 8.614699331848552, + "grad_norm": 17.371736526489258, + "learning_rate": 1e-06, + "loss": 0.5416, + "num_input_tokens_seen": 216630272, + "step": 3868 + }, + { + "epoch": 8.614699331848552, + "loss": 0.49856650829315186, + "loss_ce": 0.000153428060002625, + "loss_iou": 0.2216796875, + "loss_num": 0.01092529296875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 216630272, + "step": 3868 + }, + { + "epoch": 8.616926503340757, + "grad_norm": 18.579317092895508, + "learning_rate": 1e-06, + "loss": 0.5887, + "num_input_tokens_seen": 216686540, + "step": 3869 + }, + { + "epoch": 8.616926503340757, + "loss": 0.5541001558303833, + "loss_ce": 0.00014507281593978405, + "loss_iou": 0.2138671875, + "loss_num": 0.0252685546875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 216686540, + "step": 3869 + }, + { + "epoch": 8.619153674832962, + "grad_norm": 17.833322525024414, + "learning_rate": 1e-06, + "loss": 0.5787, + "num_input_tokens_seen": 216742544, + "step": 3870 + }, + { + "epoch": 8.619153674832962, + "loss": 0.5232726335525513, + "loss_ce": 0.00020131460041739047, + "loss_iou": 0.23046875, + "loss_num": 0.01263427734375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 216742544, + "step": 3870 + }, + { + "epoch": 8.621380846325167, + "grad_norm": 16.937902450561523, + "learning_rate": 1e-06, + "loss": 0.6145, + "num_input_tokens_seen": 216798684, + "step": 3871 + }, + { + "epoch": 8.621380846325167, + "loss": 0.4764711260795593, + "loss_ce": 0.000152770007844083, + "loss_iou": 0.203125, + "loss_num": 0.01397705078125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 216798684, + "step": 3871 + }, + { + "epoch": 8.623608017817372, + "grad_norm": 15.312004089355469, + "learning_rate": 1e-06, + "loss": 0.5381, + "num_input_tokens_seen": 216852448, + "step": 3872 + }, + { + "epoch": 8.623608017817372, + "loss": 0.47357112169265747, + "loss_ce": 0.00018245888350065798, + "loss_iou": 0.2119140625, + "loss_num": 0.00982666015625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 216852448, + "step": 3872 + }, + { + "epoch": 8.625835189309576, + "grad_norm": 19.796167373657227, + "learning_rate": 1e-06, + "loss": 0.5406, + "num_input_tokens_seen": 216906968, + "step": 3873 + }, + { + "epoch": 8.625835189309576, + "loss": 0.42670130729675293, + "loss_ce": 0.00018764834385365248, + "loss_iou": 0.19921875, + "loss_num": 0.0057373046875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 216906968, + "step": 3873 + }, + { + "epoch": 8.628062360801781, + "grad_norm": 17.857717514038086, + "learning_rate": 1e-06, + "loss": 0.8762, + "num_input_tokens_seen": 216959064, + "step": 3874 + }, + { + "epoch": 8.628062360801781, + "loss": 0.8671058416366577, + "loss_ce": 0.00016246383893303573, + "loss_iou": 0.40234375, + "loss_num": 0.01300048828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 216959064, + "step": 3874 + }, + { + "epoch": 8.630289532293986, + "grad_norm": 26.22123908996582, + "learning_rate": 1e-06, + "loss": 0.571, + "num_input_tokens_seen": 217016020, + "step": 3875 + }, + { + "epoch": 8.630289532293986, + "loss": 0.6762186288833618, + "loss_ce": 0.0001932468730956316, + "loss_iou": 0.310546875, + "loss_num": 0.0108642578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 217016020, + "step": 3875 + }, + { + "epoch": 8.632516703786191, + "grad_norm": 16.814098358154297, + "learning_rate": 1e-06, + "loss": 0.5477, + "num_input_tokens_seen": 217069532, + "step": 3876 + }, + { + "epoch": 8.632516703786191, + "loss": 0.3917490839958191, + "loss_ce": 0.00014753625146113336, + "loss_iou": 0.1708984375, + "loss_num": 0.00994873046875, + "loss_xval": 0.390625, + "num_input_tokens_seen": 217069532, + "step": 3876 + }, + { + "epoch": 8.634743875278396, + "grad_norm": 14.337661743164062, + "learning_rate": 1e-06, + "loss": 0.4624, + "num_input_tokens_seen": 217128076, + "step": 3877 + }, + { + "epoch": 8.634743875278396, + "loss": 0.49962735176086426, + "loss_ce": 0.00023766841331962496, + "loss_iou": 0.2236328125, + "loss_num": 0.0107421875, + "loss_xval": 0.5, + "num_input_tokens_seen": 217128076, + "step": 3877 + }, + { + "epoch": 8.6369710467706, + "grad_norm": 19.950851440429688, + "learning_rate": 1e-06, + "loss": 0.4853, + "num_input_tokens_seen": 217187092, + "step": 3878 + }, + { + "epoch": 8.6369710467706, + "loss": 0.4780520796775818, + "loss_ce": 0.0001620454277144745, + "loss_iou": 0.197265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 217187092, + "step": 3878 + }, + { + "epoch": 8.639198218262806, + "grad_norm": 16.780813217163086, + "learning_rate": 1e-06, + "loss": 0.6026, + "num_input_tokens_seen": 217244100, + "step": 3879 + }, + { + "epoch": 8.639198218262806, + "loss": 0.7703104019165039, + "loss_ce": 0.00029087584698572755, + "loss_iou": 0.314453125, + "loss_num": 0.02783203125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 217244100, + "step": 3879 + }, + { + "epoch": 8.64142538975501, + "grad_norm": 20.776565551757812, + "learning_rate": 1e-06, + "loss": 0.5547, + "num_input_tokens_seen": 217300800, + "step": 3880 + }, + { + "epoch": 8.64142538975501, + "loss": 0.5758423805236816, + "loss_ce": 0.0001587883016327396, + "loss_iou": 0.244140625, + "loss_num": 0.017578125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 217300800, + "step": 3880 + }, + { + "epoch": 8.643652561247215, + "grad_norm": 16.412261962890625, + "learning_rate": 1e-06, + "loss": 0.5354, + "num_input_tokens_seen": 217357076, + "step": 3881 + }, + { + "epoch": 8.643652561247215, + "loss": 0.48521143198013306, + "loss_ce": 0.00047024147352203727, + "loss_iou": 0.2080078125, + "loss_num": 0.013671875, + "loss_xval": 0.484375, + "num_input_tokens_seen": 217357076, + "step": 3881 + }, + { + "epoch": 8.64587973273942, + "grad_norm": 40.7680778503418, + "learning_rate": 1e-06, + "loss": 0.7236, + "num_input_tokens_seen": 217413124, + "step": 3882 + }, + { + "epoch": 8.64587973273942, + "loss": 0.7792381644248962, + "loss_ce": 0.00018543524492997676, + "loss_iou": 0.341796875, + "loss_num": 0.019287109375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 217413124, + "step": 3882 + }, + { + "epoch": 8.648106904231625, + "grad_norm": 29.648727416992188, + "learning_rate": 1e-06, + "loss": 0.6271, + "num_input_tokens_seen": 217468736, + "step": 3883 + }, + { + "epoch": 8.648106904231625, + "loss": 0.6764873266220093, + "loss_ce": 0.00027885870076715946, + "loss_iou": 0.27734375, + "loss_num": 0.024169921875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 217468736, + "step": 3883 + }, + { + "epoch": 8.65033407572383, + "grad_norm": 16.55933380126953, + "learning_rate": 1e-06, + "loss": 0.6713, + "num_input_tokens_seen": 217524312, + "step": 3884 + }, + { + "epoch": 8.65033407572383, + "loss": 0.46768736839294434, + "loss_ce": 0.00015806582814548165, + "loss_iou": 0.201171875, + "loss_num": 0.01300048828125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 217524312, + "step": 3884 + }, + { + "epoch": 8.652561247216035, + "grad_norm": 24.027820587158203, + "learning_rate": 1e-06, + "loss": 0.6617, + "num_input_tokens_seen": 217582960, + "step": 3885 + }, + { + "epoch": 8.652561247216035, + "loss": 0.5760955810546875, + "loss_ce": 0.00016780085570644587, + "loss_iou": 0.251953125, + "loss_num": 0.01446533203125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 217582960, + "step": 3885 + }, + { + "epoch": 8.654788418708241, + "grad_norm": 12.898533821105957, + "learning_rate": 1e-06, + "loss": 0.4816, + "num_input_tokens_seen": 217640700, + "step": 3886 + }, + { + "epoch": 8.654788418708241, + "loss": 0.39870375394821167, + "loss_ce": 0.00014417324564419687, + "loss_iou": 0.169921875, + "loss_num": 0.01171875, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 217640700, + "step": 3886 + }, + { + "epoch": 8.657015590200446, + "grad_norm": 24.518152236938477, + "learning_rate": 1e-06, + "loss": 0.7044, + "num_input_tokens_seen": 217698148, + "step": 3887 + }, + { + "epoch": 8.657015590200446, + "loss": 0.8448777198791504, + "loss_ce": 0.0006394129595719278, + "loss_iou": 0.357421875, + "loss_num": 0.0257568359375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 217698148, + "step": 3887 + }, + { + "epoch": 8.659242761692651, + "grad_norm": 20.064247131347656, + "learning_rate": 1e-06, + "loss": 0.5771, + "num_input_tokens_seen": 217755788, + "step": 3888 + }, + { + "epoch": 8.659242761692651, + "loss": 0.602916955947876, + "loss_ce": 0.00013376196147873998, + "loss_iou": 0.26171875, + "loss_num": 0.01611328125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 217755788, + "step": 3888 + }, + { + "epoch": 8.661469933184856, + "grad_norm": 25.421236038208008, + "learning_rate": 1e-06, + "loss": 0.4772, + "num_input_tokens_seen": 217813576, + "step": 3889 + }, + { + "epoch": 8.661469933184856, + "loss": 0.5371390581130981, + "loss_ce": 0.00015175854787230492, + "loss_iou": 0.2470703125, + "loss_num": 0.0086669921875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 217813576, + "step": 3889 + }, + { + "epoch": 8.66369710467706, + "grad_norm": 23.681997299194336, + "learning_rate": 1e-06, + "loss": 0.6563, + "num_input_tokens_seen": 217868180, + "step": 3890 + }, + { + "epoch": 8.66369710467706, + "loss": 0.8451223373413086, + "loss_ce": 0.00015169865218922496, + "loss_iou": 0.36328125, + "loss_num": 0.0238037109375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 217868180, + "step": 3890 + }, + { + "epoch": 8.665924276169266, + "grad_norm": 19.213150024414062, + "learning_rate": 1e-06, + "loss": 0.7518, + "num_input_tokens_seen": 217926136, + "step": 3891 + }, + { + "epoch": 8.665924276169266, + "loss": 0.5428782105445862, + "loss_ce": 0.00015362550038844347, + "loss_iou": 0.23046875, + "loss_num": 0.0166015625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 217926136, + "step": 3891 + }, + { + "epoch": 8.66815144766147, + "grad_norm": 22.411832809448242, + "learning_rate": 1e-06, + "loss": 0.6165, + "num_input_tokens_seen": 217983252, + "step": 3892 + }, + { + "epoch": 8.66815144766147, + "loss": 0.7387982606887817, + "loss_ce": 0.0001508084242232144, + "loss_iou": 0.3203125, + "loss_num": 0.01953125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 217983252, + "step": 3892 + }, + { + "epoch": 8.670378619153675, + "grad_norm": 22.501161575317383, + "learning_rate": 1e-06, + "loss": 0.5534, + "num_input_tokens_seen": 218039972, + "step": 3893 + }, + { + "epoch": 8.670378619153675, + "loss": 0.3819955587387085, + "loss_ce": 0.0002206652716267854, + "loss_iou": 0.1591796875, + "loss_num": 0.01275634765625, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 218039972, + "step": 3893 + }, + { + "epoch": 8.67260579064588, + "grad_norm": 27.5329532623291, + "learning_rate": 1e-06, + "loss": 0.6429, + "num_input_tokens_seen": 218097220, + "step": 3894 + }, + { + "epoch": 8.67260579064588, + "loss": 0.3702659010887146, + "loss_ce": 0.0001486857217969373, + "loss_iou": 0.1640625, + "loss_num": 0.00860595703125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 218097220, + "step": 3894 + }, + { + "epoch": 8.674832962138085, + "grad_norm": 13.485652923583984, + "learning_rate": 1e-06, + "loss": 0.432, + "num_input_tokens_seen": 218154216, + "step": 3895 + }, + { + "epoch": 8.674832962138085, + "loss": 0.40601831674575806, + "loss_ce": 0.00013454348663799465, + "loss_iou": 0.1640625, + "loss_num": 0.0157470703125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 218154216, + "step": 3895 + }, + { + "epoch": 8.67706013363029, + "grad_norm": 26.522533416748047, + "learning_rate": 1e-06, + "loss": 0.5957, + "num_input_tokens_seen": 218206464, + "step": 3896 + }, + { + "epoch": 8.67706013363029, + "loss": 0.751849889755249, + "loss_ce": 0.0001408641110174358, + "loss_iou": 0.32421875, + "loss_num": 0.0206298828125, + "loss_xval": 0.75, + "num_input_tokens_seen": 218206464, + "step": 3896 + }, + { + "epoch": 8.679287305122495, + "grad_norm": 24.25346565246582, + "learning_rate": 1e-06, + "loss": 0.8235, + "num_input_tokens_seen": 218263664, + "step": 3897 + }, + { + "epoch": 8.679287305122495, + "loss": 0.8781342506408691, + "loss_ce": 0.0002045307046500966, + "loss_iou": 0.333984375, + "loss_num": 0.041748046875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 218263664, + "step": 3897 + }, + { + "epoch": 8.6815144766147, + "grad_norm": 17.56336212158203, + "learning_rate": 1e-06, + "loss": 0.7218, + "num_input_tokens_seen": 218319228, + "step": 3898 + }, + { + "epoch": 8.6815144766147, + "loss": 0.8000770807266235, + "loss_ce": 0.0001502884115325287, + "loss_iou": 0.361328125, + "loss_num": 0.01507568359375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 218319228, + "step": 3898 + }, + { + "epoch": 8.683741648106905, + "grad_norm": 21.46102523803711, + "learning_rate": 1e-06, + "loss": 0.5721, + "num_input_tokens_seen": 218374208, + "step": 3899 + }, + { + "epoch": 8.683741648106905, + "loss": 0.6461607813835144, + "loss_ce": 0.00016468125977553427, + "loss_iou": 0.287109375, + "loss_num": 0.0146484375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 218374208, + "step": 3899 + }, + { + "epoch": 8.68596881959911, + "grad_norm": 15.952383995056152, + "learning_rate": 1e-06, + "loss": 0.6203, + "num_input_tokens_seen": 218432844, + "step": 3900 + }, + { + "epoch": 8.68596881959911, + "loss": 0.5890233516693115, + "loss_ce": 0.0001561893877806142, + "loss_iou": 0.265625, + "loss_num": 0.01165771484375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 218432844, + "step": 3900 + }, + { + "epoch": 8.688195991091314, + "grad_norm": 20.294042587280273, + "learning_rate": 1e-06, + "loss": 0.5964, + "num_input_tokens_seen": 218489864, + "step": 3901 + }, + { + "epoch": 8.688195991091314, + "loss": 0.6337062120437622, + "loss_ce": 0.00040536391315981746, + "loss_iou": 0.28125, + "loss_num": 0.013916015625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 218489864, + "step": 3901 + }, + { + "epoch": 8.690423162583519, + "grad_norm": 14.53677749633789, + "learning_rate": 1e-06, + "loss": 0.6919, + "num_input_tokens_seen": 218545668, + "step": 3902 + }, + { + "epoch": 8.690423162583519, + "loss": 0.4630255401134491, + "loss_ce": 0.00013489379489328712, + "loss_iou": 0.2080078125, + "loss_num": 0.0093994140625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 218545668, + "step": 3902 + }, + { + "epoch": 8.692650334075724, + "grad_norm": 21.7016544342041, + "learning_rate": 1e-06, + "loss": 0.614, + "num_input_tokens_seen": 218602996, + "step": 3903 + }, + { + "epoch": 8.692650334075724, + "loss": 0.573477566242218, + "loss_ce": 0.000479543989058584, + "loss_iou": 0.2392578125, + "loss_num": 0.0189208984375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 218602996, + "step": 3903 + }, + { + "epoch": 8.694877505567929, + "grad_norm": 17.698266983032227, + "learning_rate": 1e-06, + "loss": 0.4877, + "num_input_tokens_seen": 218659176, + "step": 3904 + }, + { + "epoch": 8.694877505567929, + "loss": 0.47621312737464905, + "loss_ce": 0.00013888333342038095, + "loss_iou": 0.203125, + "loss_num": 0.0137939453125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 218659176, + "step": 3904 + }, + { + "epoch": 8.697104677060134, + "grad_norm": 21.86427116394043, + "learning_rate": 1e-06, + "loss": 0.5896, + "num_input_tokens_seen": 218713336, + "step": 3905 + }, + { + "epoch": 8.697104677060134, + "loss": 0.5707519054412842, + "loss_ce": 0.0001952478487510234, + "loss_iou": 0.259765625, + "loss_num": 0.01007080078125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 218713336, + "step": 3905 + }, + { + "epoch": 8.699331848552339, + "grad_norm": 21.313528060913086, + "learning_rate": 1e-06, + "loss": 0.5622, + "num_input_tokens_seen": 218771932, + "step": 3906 + }, + { + "epoch": 8.699331848552339, + "loss": 0.5350602865219116, + "loss_ce": 0.00014820430078543723, + "loss_iou": 0.23828125, + "loss_num": 0.01177978515625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 218771932, + "step": 3906 + }, + { + "epoch": 8.701559020044543, + "grad_norm": 25.534914016723633, + "learning_rate": 1e-06, + "loss": 0.4969, + "num_input_tokens_seen": 218829644, + "step": 3907 + }, + { + "epoch": 8.701559020044543, + "loss": 0.4723511338233948, + "loss_ce": 0.00018313938926439732, + "loss_iou": 0.21875, + "loss_num": 0.00689697265625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 218829644, + "step": 3907 + }, + { + "epoch": 8.703786191536748, + "grad_norm": 31.614242553710938, + "learning_rate": 1e-06, + "loss": 0.7103, + "num_input_tokens_seen": 218884672, + "step": 3908 + }, + { + "epoch": 8.703786191536748, + "loss": 0.6449835300445557, + "loss_ce": 0.0002081613929476589, + "loss_iou": 0.27734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 218884672, + "step": 3908 + }, + { + "epoch": 8.706013363028953, + "grad_norm": 15.506339073181152, + "learning_rate": 1e-06, + "loss": 0.5959, + "num_input_tokens_seen": 218938064, + "step": 3909 + }, + { + "epoch": 8.706013363028953, + "loss": 0.729173481464386, + "loss_ce": 0.00016962323570623994, + "loss_iou": 0.283203125, + "loss_num": 0.031982421875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 218938064, + "step": 3909 + }, + { + "epoch": 8.708240534521158, + "grad_norm": 23.147897720336914, + "learning_rate": 1e-06, + "loss": 0.4788, + "num_input_tokens_seen": 218995120, + "step": 3910 + }, + { + "epoch": 8.708240534521158, + "loss": 0.3938792645931244, + "loss_ce": 0.0002025177382165566, + "loss_iou": 0.1787109375, + "loss_num": 0.007080078125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 218995120, + "step": 3910 + }, + { + "epoch": 8.710467706013363, + "grad_norm": 23.020849227905273, + "learning_rate": 1e-06, + "loss": 0.6817, + "num_input_tokens_seen": 219050528, + "step": 3911 + }, + { + "epoch": 8.710467706013363, + "loss": 0.6399117708206177, + "loss_ce": 0.00026331457775086164, + "loss_iou": 0.255859375, + "loss_num": 0.02587890625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 219050528, + "step": 3911 + }, + { + "epoch": 8.712694877505568, + "grad_norm": 16.904033660888672, + "learning_rate": 1e-06, + "loss": 0.5594, + "num_input_tokens_seen": 219105692, + "step": 3912 + }, + { + "epoch": 8.712694877505568, + "loss": 0.7058773636817932, + "loss_ce": 0.00018892009393312037, + "loss_iou": 0.3046875, + "loss_num": 0.01904296875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 219105692, + "step": 3912 + }, + { + "epoch": 8.714922048997773, + "grad_norm": 15.886432647705078, + "learning_rate": 1e-06, + "loss": 0.5245, + "num_input_tokens_seen": 219160744, + "step": 3913 + }, + { + "epoch": 8.714922048997773, + "loss": 0.6087524890899658, + "loss_ce": 0.00023196196707431227, + "loss_iou": 0.2578125, + "loss_num": 0.01904296875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 219160744, + "step": 3913 + }, + { + "epoch": 8.717149220489977, + "grad_norm": 14.26981258392334, + "learning_rate": 1e-06, + "loss": 0.5525, + "num_input_tokens_seen": 219217212, + "step": 3914 + }, + { + "epoch": 8.717149220489977, + "loss": 0.7599831819534302, + "loss_ce": 0.00021756268688477576, + "loss_iou": 0.279296875, + "loss_num": 0.040283203125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 219217212, + "step": 3914 + }, + { + "epoch": 8.719376391982182, + "grad_norm": 15.31400203704834, + "learning_rate": 1e-06, + "loss": 0.6814, + "num_input_tokens_seen": 219275172, + "step": 3915 + }, + { + "epoch": 8.719376391982182, + "loss": 0.5874415040016174, + "loss_ce": 0.00016121604130603373, + "loss_iou": 0.236328125, + "loss_num": 0.022705078125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 219275172, + "step": 3915 + }, + { + "epoch": 8.721603563474387, + "grad_norm": 13.000571250915527, + "learning_rate": 1e-06, + "loss": 0.43, + "num_input_tokens_seen": 219329864, + "step": 3916 + }, + { + "epoch": 8.721603563474387, + "loss": 0.35176119208335876, + "loss_ce": 0.00019871644326485693, + "loss_iou": 0.158203125, + "loss_num": 0.007232666015625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 219329864, + "step": 3916 + }, + { + "epoch": 8.723830734966592, + "grad_norm": 24.805957794189453, + "learning_rate": 1e-06, + "loss": 0.6412, + "num_input_tokens_seen": 219387476, + "step": 3917 + }, + { + "epoch": 8.723830734966592, + "loss": 0.6872168183326721, + "loss_ce": 0.00020506742293946445, + "loss_iou": 0.26171875, + "loss_num": 0.032958984375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 219387476, + "step": 3917 + }, + { + "epoch": 8.726057906458797, + "grad_norm": 17.83942413330078, + "learning_rate": 1e-06, + "loss": 0.6982, + "num_input_tokens_seen": 219445780, + "step": 3918 + }, + { + "epoch": 8.726057906458797, + "loss": 0.7072159051895142, + "loss_ce": 0.00018466576875653118, + "loss_iou": 0.29296875, + "loss_num": 0.02392578125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 219445780, + "step": 3918 + }, + { + "epoch": 8.728285077951002, + "grad_norm": 18.335899353027344, + "learning_rate": 1e-06, + "loss": 0.554, + "num_input_tokens_seen": 219501992, + "step": 3919 + }, + { + "epoch": 8.728285077951002, + "loss": 0.6365430355072021, + "loss_ce": 0.0001904442033264786, + "loss_iou": 0.279296875, + "loss_num": 0.015380859375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 219501992, + "step": 3919 + }, + { + "epoch": 8.730512249443207, + "grad_norm": 54.56409454345703, + "learning_rate": 1e-06, + "loss": 0.7071, + "num_input_tokens_seen": 219557492, + "step": 3920 + }, + { + "epoch": 8.730512249443207, + "loss": 0.7284532785415649, + "loss_ce": 0.00018174288561567664, + "loss_iou": 0.29296875, + "loss_num": 0.0289306640625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 219557492, + "step": 3920 + }, + { + "epoch": 8.732739420935411, + "grad_norm": 19.81359100341797, + "learning_rate": 1e-06, + "loss": 0.4295, + "num_input_tokens_seen": 219612520, + "step": 3921 + }, + { + "epoch": 8.732739420935411, + "loss": 0.35280388593673706, + "loss_ce": 0.00014273943088483065, + "loss_iou": 0.15234375, + "loss_num": 0.009521484375, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 219612520, + "step": 3921 + }, + { + "epoch": 8.734966592427616, + "grad_norm": 20.431453704833984, + "learning_rate": 1e-06, + "loss": 0.6836, + "num_input_tokens_seen": 219666936, + "step": 3922 + }, + { + "epoch": 8.734966592427616, + "loss": 0.7043083906173706, + "loss_ce": 0.00020681662135757506, + "loss_iou": 0.29296875, + "loss_num": 0.02392578125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 219666936, + "step": 3922 + }, + { + "epoch": 8.737193763919821, + "grad_norm": 74.53073120117188, + "learning_rate": 1e-06, + "loss": 0.6355, + "num_input_tokens_seen": 219721856, + "step": 3923 + }, + { + "epoch": 8.737193763919821, + "loss": 0.5503079891204834, + "loss_ce": 0.00013712375948671252, + "loss_iou": 0.2216796875, + "loss_num": 0.0216064453125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 219721856, + "step": 3923 + }, + { + "epoch": 8.739420935412026, + "grad_norm": 18.951021194458008, + "learning_rate": 1e-06, + "loss": 0.4582, + "num_input_tokens_seen": 219777316, + "step": 3924 + }, + { + "epoch": 8.739420935412026, + "loss": 0.4863456189632416, + "loss_ce": 0.00026161997811868787, + "loss_iou": 0.22265625, + "loss_num": 0.00836181640625, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 219777316, + "step": 3924 + }, + { + "epoch": 8.74164810690423, + "grad_norm": 17.503719329833984, + "learning_rate": 1e-06, + "loss": 0.6413, + "num_input_tokens_seen": 219834932, + "step": 3925 + }, + { + "epoch": 8.74164810690423, + "loss": 0.6336848735809326, + "loss_ce": 0.0001399153989041224, + "loss_iou": 0.244140625, + "loss_num": 0.0291748046875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 219834932, + "step": 3925 + }, + { + "epoch": 8.743875278396436, + "grad_norm": 15.789238929748535, + "learning_rate": 1e-06, + "loss": 0.5391, + "num_input_tokens_seen": 219891608, + "step": 3926 + }, + { + "epoch": 8.743875278396436, + "loss": 0.559868574142456, + "loss_ce": 0.00029824947705492377, + "loss_iou": 0.236328125, + "loss_num": 0.0172119140625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 219891608, + "step": 3926 + }, + { + "epoch": 8.74610244988864, + "grad_norm": 22.618701934814453, + "learning_rate": 1e-06, + "loss": 0.6993, + "num_input_tokens_seen": 219948044, + "step": 3927 + }, + { + "epoch": 8.74610244988864, + "loss": 0.7226395606994629, + "loss_ce": 0.00022747760522179306, + "loss_iou": 0.3046875, + "loss_num": 0.022216796875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 219948044, + "step": 3927 + }, + { + "epoch": 8.748329621380847, + "grad_norm": 16.85779571533203, + "learning_rate": 1e-06, + "loss": 0.5143, + "num_input_tokens_seen": 220002632, + "step": 3928 + }, + { + "epoch": 8.748329621380847, + "loss": 0.6813238859176636, + "loss_ce": 0.00017151121573988348, + "loss_iou": 0.291015625, + "loss_num": 0.0201416015625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 220002632, + "step": 3928 + }, + { + "epoch": 8.750556792873052, + "grad_norm": 22.382169723510742, + "learning_rate": 1e-06, + "loss": 0.6557, + "num_input_tokens_seen": 220058412, + "step": 3929 + }, + { + "epoch": 8.750556792873052, + "loss": 0.7504002451896667, + "loss_ce": 0.00015609902038704604, + "loss_iou": 0.3203125, + "loss_num": 0.02197265625, + "loss_xval": 0.75, + "num_input_tokens_seen": 220058412, + "step": 3929 + }, + { + "epoch": 8.752783964365257, + "grad_norm": 15.235098838806152, + "learning_rate": 1e-06, + "loss": 0.6484, + "num_input_tokens_seen": 220114060, + "step": 3930 + }, + { + "epoch": 8.752783964365257, + "loss": 0.7623621225357056, + "loss_ce": 0.00015506052295677364, + "loss_iou": 0.337890625, + "loss_num": 0.017333984375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 220114060, + "step": 3930 + }, + { + "epoch": 8.755011135857462, + "grad_norm": 19.23377227783203, + "learning_rate": 1e-06, + "loss": 0.7264, + "num_input_tokens_seen": 220169768, + "step": 3931 + }, + { + "epoch": 8.755011135857462, + "loss": 0.7468116283416748, + "loss_ce": 0.0007179292151704431, + "loss_iou": 0.3203125, + "loss_num": 0.0206298828125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 220169768, + "step": 3931 + }, + { + "epoch": 8.757238307349667, + "grad_norm": 24.401901245117188, + "learning_rate": 1e-06, + "loss": 0.603, + "num_input_tokens_seen": 220227336, + "step": 3932 + }, + { + "epoch": 8.757238307349667, + "loss": 0.422521710395813, + "loss_ce": 0.00015843103756196797, + "loss_iou": 0.1865234375, + "loss_num": 0.010009765625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 220227336, + "step": 3932 + }, + { + "epoch": 8.759465478841872, + "grad_norm": 24.561513900756836, + "learning_rate": 1e-06, + "loss": 0.5883, + "num_input_tokens_seen": 220281280, + "step": 3933 + }, + { + "epoch": 8.759465478841872, + "loss": 0.6444734930992126, + "loss_ce": 0.00018640572670847178, + "loss_iou": 0.291015625, + "loss_num": 0.0126953125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 220281280, + "step": 3933 + }, + { + "epoch": 8.761692650334076, + "grad_norm": 22.442155838012695, + "learning_rate": 1e-06, + "loss": 0.5317, + "num_input_tokens_seen": 220337648, + "step": 3934 + }, + { + "epoch": 8.761692650334076, + "loss": 0.662487268447876, + "loss_ce": 0.00013376369315665215, + "loss_iou": 0.287109375, + "loss_num": 0.017333984375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 220337648, + "step": 3934 + }, + { + "epoch": 8.763919821826281, + "grad_norm": 17.563997268676758, + "learning_rate": 1e-06, + "loss": 0.578, + "num_input_tokens_seen": 220395500, + "step": 3935 + }, + { + "epoch": 8.763919821826281, + "loss": 0.7258895635604858, + "loss_ce": 0.0007919379277154803, + "loss_iou": 0.30078125, + "loss_num": 0.0244140625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 220395500, + "step": 3935 + }, + { + "epoch": 8.766146993318486, + "grad_norm": 21.255687713623047, + "learning_rate": 1e-06, + "loss": 0.6404, + "num_input_tokens_seen": 220450940, + "step": 3936 + }, + { + "epoch": 8.766146993318486, + "loss": 0.6532711386680603, + "loss_ce": 0.00019493838772177696, + "loss_iou": 0.26171875, + "loss_num": 0.0257568359375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 220450940, + "step": 3936 + }, + { + "epoch": 8.768374164810691, + "grad_norm": 14.73167610168457, + "learning_rate": 1e-06, + "loss": 0.605, + "num_input_tokens_seen": 220505768, + "step": 3937 + }, + { + "epoch": 8.768374164810691, + "loss": 0.4992978870868683, + "loss_ce": 0.00015238078776746988, + "loss_iou": 0.2197265625, + "loss_num": 0.01214599609375, + "loss_xval": 0.5, + "num_input_tokens_seen": 220505768, + "step": 3937 + }, + { + "epoch": 8.770601336302896, + "grad_norm": 20.76053237915039, + "learning_rate": 1e-06, + "loss": 0.5365, + "num_input_tokens_seen": 220562928, + "step": 3938 + }, + { + "epoch": 8.770601336302896, + "loss": 0.5216740369796753, + "loss_ce": 0.00018964534683618695, + "loss_iou": 0.2373046875, + "loss_num": 0.0093994140625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 220562928, + "step": 3938 + }, + { + "epoch": 8.7728285077951, + "grad_norm": 16.428634643554688, + "learning_rate": 1e-06, + "loss": 0.6054, + "num_input_tokens_seen": 220619840, + "step": 3939 + }, + { + "epoch": 8.7728285077951, + "loss": 0.6325225830078125, + "loss_ce": 0.00019834056729450822, + "loss_iou": 0.2578125, + "loss_num": 0.023681640625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 220619840, + "step": 3939 + }, + { + "epoch": 8.775055679287306, + "grad_norm": 14.64647388458252, + "learning_rate": 1e-06, + "loss": 0.5719, + "num_input_tokens_seen": 220674660, + "step": 3940 + }, + { + "epoch": 8.775055679287306, + "loss": 0.6691886782646179, + "loss_ce": 0.00024336397473234683, + "loss_iou": 0.275390625, + "loss_num": 0.023681640625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 220674660, + "step": 3940 + }, + { + "epoch": 8.77728285077951, + "grad_norm": 12.952407836914062, + "learning_rate": 1e-06, + "loss": 0.4744, + "num_input_tokens_seen": 220733140, + "step": 3941 + }, + { + "epoch": 8.77728285077951, + "loss": 0.6572137475013733, + "loss_ce": 0.00023132127535063773, + "loss_iou": 0.28125, + "loss_num": 0.019287109375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 220733140, + "step": 3941 + }, + { + "epoch": 8.779510022271715, + "grad_norm": 18.830053329467773, + "learning_rate": 1e-06, + "loss": 0.6262, + "num_input_tokens_seen": 220789288, + "step": 3942 + }, + { + "epoch": 8.779510022271715, + "loss": 0.6996396780014038, + "loss_ce": 0.0001767998473951593, + "loss_iou": 0.279296875, + "loss_num": 0.0283203125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 220789288, + "step": 3942 + }, + { + "epoch": 8.78173719376392, + "grad_norm": 24.526212692260742, + "learning_rate": 1e-06, + "loss": 0.7996, + "num_input_tokens_seen": 220844188, + "step": 3943 + }, + { + "epoch": 8.78173719376392, + "loss": 0.6259015798568726, + "loss_ce": 0.0001691749203018844, + "loss_iou": 0.283203125, + "loss_num": 0.01190185546875, + "loss_xval": 0.625, + "num_input_tokens_seen": 220844188, + "step": 3943 + }, + { + "epoch": 8.783964365256125, + "grad_norm": 17.90644073486328, + "learning_rate": 1e-06, + "loss": 0.5722, + "num_input_tokens_seen": 220901768, + "step": 3944 + }, + { + "epoch": 8.783964365256125, + "loss": 0.5738731622695923, + "loss_ce": 0.00014269730309024453, + "loss_iou": 0.26171875, + "loss_num": 0.0098876953125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 220901768, + "step": 3944 + }, + { + "epoch": 8.78619153674833, + "grad_norm": 40.18233871459961, + "learning_rate": 1e-06, + "loss": 0.7066, + "num_input_tokens_seen": 220957076, + "step": 3945 + }, + { + "epoch": 8.78619153674833, + "loss": 0.8366864919662476, + "loss_ce": 0.0001997254294110462, + "loss_iou": 0.359375, + "loss_num": 0.0233154296875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 220957076, + "step": 3945 + }, + { + "epoch": 8.788418708240535, + "grad_norm": 35.03602600097656, + "learning_rate": 1e-06, + "loss": 0.683, + "num_input_tokens_seen": 221011744, + "step": 3946 + }, + { + "epoch": 8.788418708240535, + "loss": 0.5396614074707031, + "loss_ce": 0.00023272512771654874, + "loss_iou": 0.224609375, + "loss_num": 0.0179443359375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 221011744, + "step": 3946 + }, + { + "epoch": 8.79064587973274, + "grad_norm": 19.676462173461914, + "learning_rate": 1e-06, + "loss": 0.6669, + "num_input_tokens_seen": 221068528, + "step": 3947 + }, + { + "epoch": 8.79064587973274, + "loss": 0.7179461717605591, + "loss_ce": 0.00017279275925830007, + "loss_iou": 0.291015625, + "loss_num": 0.02685546875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 221068528, + "step": 3947 + }, + { + "epoch": 8.792873051224944, + "grad_norm": 26.63570785522461, + "learning_rate": 1e-06, + "loss": 0.5588, + "num_input_tokens_seen": 221125112, + "step": 3948 + }, + { + "epoch": 8.792873051224944, + "loss": 0.6211436986923218, + "loss_ce": 0.0005382976960390806, + "loss_iou": 0.259765625, + "loss_num": 0.0201416015625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 221125112, + "step": 3948 + }, + { + "epoch": 8.79510022271715, + "grad_norm": 17.29355239868164, + "learning_rate": 1e-06, + "loss": 0.5911, + "num_input_tokens_seen": 221181924, + "step": 3949 + }, + { + "epoch": 8.79510022271715, + "loss": 0.6676521897315979, + "loss_ce": 0.0001717579725664109, + "loss_iou": 0.298828125, + "loss_num": 0.01373291015625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 221181924, + "step": 3949 + }, + { + "epoch": 8.797327394209354, + "grad_norm": 20.77406120300293, + "learning_rate": 1e-06, + "loss": 0.6492, + "num_input_tokens_seen": 221239948, + "step": 3950 + }, + { + "epoch": 8.797327394209354, + "loss": 0.7762324213981628, + "loss_ce": 0.00035347635275684297, + "loss_iou": 0.310546875, + "loss_num": 0.0311279296875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 221239948, + "step": 3950 + }, + { + "epoch": 8.799554565701559, + "grad_norm": 25.013608932495117, + "learning_rate": 1e-06, + "loss": 0.7604, + "num_input_tokens_seen": 221296624, + "step": 3951 + }, + { + "epoch": 8.799554565701559, + "loss": 0.6159054040908813, + "loss_ce": 0.0001827623782446608, + "loss_iou": 0.263671875, + "loss_num": 0.017578125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 221296624, + "step": 3951 + }, + { + "epoch": 8.801781737193764, + "grad_norm": 22.501018524169922, + "learning_rate": 1e-06, + "loss": 0.6662, + "num_input_tokens_seen": 221352544, + "step": 3952 + }, + { + "epoch": 8.801781737193764, + "loss": 0.8238818645477295, + "loss_ce": 0.0001514081668574363, + "loss_iou": 0.341796875, + "loss_num": 0.0277099609375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 221352544, + "step": 3952 + }, + { + "epoch": 8.804008908685969, + "grad_norm": 16.182146072387695, + "learning_rate": 1e-06, + "loss": 0.7461, + "num_input_tokens_seen": 221409016, + "step": 3953 + }, + { + "epoch": 8.804008908685969, + "loss": 1.03929603099823, + "loss_ce": 0.00023350719129666686, + "loss_iou": 0.41015625, + "loss_num": 0.04345703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 221409016, + "step": 3953 + }, + { + "epoch": 8.806236080178174, + "grad_norm": 20.251689910888672, + "learning_rate": 1e-06, + "loss": 0.625, + "num_input_tokens_seen": 221464600, + "step": 3954 + }, + { + "epoch": 8.806236080178174, + "loss": 0.6593621969223022, + "loss_ce": 0.00018251534493174404, + "loss_iou": 0.296875, + "loss_num": 0.01324462890625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 221464600, + "step": 3954 + }, + { + "epoch": 8.808463251670378, + "grad_norm": 22.65058708190918, + "learning_rate": 1e-06, + "loss": 0.4196, + "num_input_tokens_seen": 221519316, + "step": 3955 + }, + { + "epoch": 8.808463251670378, + "loss": 0.5017726421356201, + "loss_ce": 0.0001857746101450175, + "loss_iou": 0.205078125, + "loss_num": 0.01806640625, + "loss_xval": 0.5, + "num_input_tokens_seen": 221519316, + "step": 3955 + }, + { + "epoch": 8.810690423162583, + "grad_norm": 16.896793365478516, + "learning_rate": 1e-06, + "loss": 0.6443, + "num_input_tokens_seen": 221574936, + "step": 3956 + }, + { + "epoch": 8.810690423162583, + "loss": 0.4596463441848755, + "loss_ce": 0.00017369160195812583, + "loss_iou": 0.2001953125, + "loss_num": 0.0120849609375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 221574936, + "step": 3956 + }, + { + "epoch": 8.812917594654788, + "grad_norm": 19.71438217163086, + "learning_rate": 1e-06, + "loss": 0.589, + "num_input_tokens_seen": 221629572, + "step": 3957 + }, + { + "epoch": 8.812917594654788, + "loss": 0.5652260780334473, + "loss_ce": 0.00016256351955235004, + "loss_iou": 0.2421875, + "loss_num": 0.01611328125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 221629572, + "step": 3957 + }, + { + "epoch": 8.815144766146993, + "grad_norm": 14.786736488342285, + "learning_rate": 1e-06, + "loss": 0.4319, + "num_input_tokens_seen": 221687656, + "step": 3958 + }, + { + "epoch": 8.815144766146993, + "loss": 0.45927944779396057, + "loss_ce": 0.00017301308980677277, + "loss_iou": 0.20703125, + "loss_num": 0.00921630859375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 221687656, + "step": 3958 + }, + { + "epoch": 8.817371937639198, + "grad_norm": 17.631956100463867, + "learning_rate": 1e-06, + "loss": 0.6813, + "num_input_tokens_seen": 221745792, + "step": 3959 + }, + { + "epoch": 8.817371937639198, + "loss": 0.5624639987945557, + "loss_ce": 0.0002081410784740001, + "loss_iou": 0.2431640625, + "loss_num": 0.01544189453125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 221745792, + "step": 3959 + }, + { + "epoch": 8.819599109131403, + "grad_norm": 22.315784454345703, + "learning_rate": 1e-06, + "loss": 0.7343, + "num_input_tokens_seen": 221800120, + "step": 3960 + }, + { + "epoch": 8.819599109131403, + "loss": 0.8773585557937622, + "loss_ce": 0.00016129494179040194, + "loss_iou": 0.37890625, + "loss_num": 0.0238037109375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 221800120, + "step": 3960 + }, + { + "epoch": 8.821826280623608, + "grad_norm": 14.803606986999512, + "learning_rate": 1e-06, + "loss": 0.6183, + "num_input_tokens_seen": 221856540, + "step": 3961 + }, + { + "epoch": 8.821826280623608, + "loss": 0.5055595636367798, + "loss_ce": 0.00018847928731702268, + "loss_iou": 0.2236328125, + "loss_num": 0.0115966796875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 221856540, + "step": 3961 + }, + { + "epoch": 8.824053452115812, + "grad_norm": 22.032180786132812, + "learning_rate": 1e-06, + "loss": 0.5228, + "num_input_tokens_seen": 221908880, + "step": 3962 + }, + { + "epoch": 8.824053452115812, + "loss": 0.551054835319519, + "loss_ce": 0.00027354987105354667, + "loss_iou": 0.24609375, + "loss_num": 0.0115966796875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 221908880, + "step": 3962 + }, + { + "epoch": 8.826280623608017, + "grad_norm": 64.0085678100586, + "learning_rate": 1e-06, + "loss": 0.8304, + "num_input_tokens_seen": 221964260, + "step": 3963 + }, + { + "epoch": 8.826280623608017, + "loss": 0.7420934438705444, + "loss_ce": 0.00015010150673333555, + "loss_iou": 0.314453125, + "loss_num": 0.0225830078125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 221964260, + "step": 3963 + }, + { + "epoch": 8.828507795100222, + "grad_norm": 16.832103729248047, + "learning_rate": 1e-06, + "loss": 0.8408, + "num_input_tokens_seen": 222022236, + "step": 3964 + }, + { + "epoch": 8.828507795100222, + "loss": 1.2050738334655762, + "loss_ce": 0.00023982246057130396, + "loss_iou": 0.466796875, + "loss_num": 0.053955078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 222022236, + "step": 3964 + }, + { + "epoch": 8.830734966592427, + "grad_norm": 20.819835662841797, + "learning_rate": 1e-06, + "loss": 0.7145, + "num_input_tokens_seen": 222077780, + "step": 3965 + }, + { + "epoch": 8.830734966592427, + "loss": 0.9205887317657471, + "loss_ce": 0.00017864895926322788, + "loss_iou": 0.37109375, + "loss_num": 0.035400390625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 222077780, + "step": 3965 + }, + { + "epoch": 8.832962138084632, + "grad_norm": 18.16783332824707, + "learning_rate": 1e-06, + "loss": 0.4841, + "num_input_tokens_seen": 222136840, + "step": 3966 + }, + { + "epoch": 8.832962138084632, + "loss": 0.5124315023422241, + "loss_ce": 0.00019395150593481958, + "loss_iou": 0.2080078125, + "loss_num": 0.01953125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 222136840, + "step": 3966 + }, + { + "epoch": 8.835189309576837, + "grad_norm": 25.93811798095703, + "learning_rate": 1e-06, + "loss": 0.4811, + "num_input_tokens_seen": 222191848, + "step": 3967 + }, + { + "epoch": 8.835189309576837, + "loss": 0.5905265212059021, + "loss_ce": 0.00019449519459158182, + "loss_iou": 0.248046875, + "loss_num": 0.018798828125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 222191848, + "step": 3967 + }, + { + "epoch": 8.837416481069042, + "grad_norm": 26.04351043701172, + "learning_rate": 1e-06, + "loss": 0.4815, + "num_input_tokens_seen": 222246416, + "step": 3968 + }, + { + "epoch": 8.837416481069042, + "loss": 0.5717356204986572, + "loss_ce": 0.00020242987375240773, + "loss_iou": 0.251953125, + "loss_num": 0.013671875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 222246416, + "step": 3968 + }, + { + "epoch": 8.839643652561247, + "grad_norm": 32.86327362060547, + "learning_rate": 1e-06, + "loss": 0.6211, + "num_input_tokens_seen": 222300948, + "step": 3969 + }, + { + "epoch": 8.839643652561247, + "loss": 0.7585898637771606, + "loss_ce": 0.000167002814123407, + "loss_iou": 0.3203125, + "loss_num": 0.023681640625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 222300948, + "step": 3969 + }, + { + "epoch": 8.841870824053451, + "grad_norm": 29.39252471923828, + "learning_rate": 1e-06, + "loss": 0.7207, + "num_input_tokens_seen": 222356800, + "step": 3970 + }, + { + "epoch": 8.841870824053451, + "loss": 0.7579694390296936, + "loss_ce": 0.00015694380272179842, + "loss_iou": 0.310546875, + "loss_num": 0.0277099609375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 222356800, + "step": 3970 + }, + { + "epoch": 8.844097995545656, + "grad_norm": 47.64936828613281, + "learning_rate": 1e-06, + "loss": 0.7561, + "num_input_tokens_seen": 222414448, + "step": 3971 + }, + { + "epoch": 8.844097995545656, + "loss": 0.6730362772941589, + "loss_ce": 0.00018474232638254762, + "loss_iou": 0.2890625, + "loss_num": 0.01904296875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 222414448, + "step": 3971 + }, + { + "epoch": 8.846325167037861, + "grad_norm": 19.512876510620117, + "learning_rate": 1e-06, + "loss": 0.7139, + "num_input_tokens_seen": 222468120, + "step": 3972 + }, + { + "epoch": 8.846325167037861, + "loss": 0.6079658269882202, + "loss_ce": 0.0001777580037014559, + "loss_iou": 0.263671875, + "loss_num": 0.015869140625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 222468120, + "step": 3972 + }, + { + "epoch": 8.848552338530066, + "grad_norm": 17.036836624145508, + "learning_rate": 1e-06, + "loss": 0.6141, + "num_input_tokens_seen": 222523584, + "step": 3973 + }, + { + "epoch": 8.848552338530066, + "loss": 0.5712729692459106, + "loss_ce": 0.00016703552682884037, + "loss_iou": 0.23828125, + "loss_num": 0.0189208984375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 222523584, + "step": 3973 + }, + { + "epoch": 8.85077951002227, + "grad_norm": 17.4189395904541, + "learning_rate": 1e-06, + "loss": 0.4694, + "num_input_tokens_seen": 222581532, + "step": 3974 + }, + { + "epoch": 8.85077951002227, + "loss": 0.42311620712280273, + "loss_ce": 0.00014257154543884099, + "loss_iou": 0.1904296875, + "loss_num": 0.00860595703125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 222581532, + "step": 3974 + }, + { + "epoch": 8.853006681514476, + "grad_norm": 40.079444885253906, + "learning_rate": 1e-06, + "loss": 0.5725, + "num_input_tokens_seen": 222638036, + "step": 3975 + }, + { + "epoch": 8.853006681514476, + "loss": 0.61489337682724, + "loss_ce": 0.00014727030065841973, + "loss_iou": 0.2734375, + "loss_num": 0.0135498046875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 222638036, + "step": 3975 + }, + { + "epoch": 8.855233853006682, + "grad_norm": 29.330018997192383, + "learning_rate": 1e-06, + "loss": 0.468, + "num_input_tokens_seen": 222693340, + "step": 3976 + }, + { + "epoch": 8.855233853006682, + "loss": 0.531175971031189, + "loss_ce": 0.00017008304712362587, + "loss_iou": 0.23828125, + "loss_num": 0.01104736328125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 222693340, + "step": 3976 + }, + { + "epoch": 8.857461024498887, + "grad_norm": 11.950536727905273, + "learning_rate": 1e-06, + "loss": 0.4903, + "num_input_tokens_seen": 222750828, + "step": 3977 + }, + { + "epoch": 8.857461024498887, + "loss": 0.36172372102737427, + "loss_ce": 0.00015144153439905494, + "loss_iou": 0.1708984375, + "loss_num": 0.00408935546875, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 222750828, + "step": 3977 + }, + { + "epoch": 8.859688195991092, + "grad_norm": 17.319873809814453, + "learning_rate": 1e-06, + "loss": 0.569, + "num_input_tokens_seen": 222807080, + "step": 3978 + }, + { + "epoch": 8.859688195991092, + "loss": 0.5596229434013367, + "loss_ce": 0.00017472056788392365, + "loss_iou": 0.212890625, + "loss_num": 0.026611328125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 222807080, + "step": 3978 + }, + { + "epoch": 8.861915367483297, + "grad_norm": 25.0523624420166, + "learning_rate": 1e-06, + "loss": 0.6712, + "num_input_tokens_seen": 222864000, + "step": 3979 + }, + { + "epoch": 8.861915367483297, + "loss": 0.670589804649353, + "loss_ce": 0.00017965443839784712, + "loss_iou": 0.3046875, + "loss_num": 0.01214599609375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 222864000, + "step": 3979 + }, + { + "epoch": 8.864142538975502, + "grad_norm": 18.790050506591797, + "learning_rate": 1e-06, + "loss": 0.4437, + "num_input_tokens_seen": 222918468, + "step": 3980 + }, + { + "epoch": 8.864142538975502, + "loss": 0.5265804529190063, + "loss_ce": 0.00021330692106857896, + "loss_iou": 0.2333984375, + "loss_num": 0.01177978515625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 222918468, + "step": 3980 + }, + { + "epoch": 8.866369710467707, + "grad_norm": 39.062259674072266, + "learning_rate": 1e-06, + "loss": 0.5382, + "num_input_tokens_seen": 222971988, + "step": 3981 + }, + { + "epoch": 8.866369710467707, + "loss": 0.4984826147556305, + "loss_ce": 0.00019161765521857888, + "loss_iou": 0.2177734375, + "loss_num": 0.0126953125, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 222971988, + "step": 3981 + }, + { + "epoch": 8.868596881959911, + "grad_norm": 17.326452255249023, + "learning_rate": 1e-06, + "loss": 0.6576, + "num_input_tokens_seen": 223027000, + "step": 3982 + }, + { + "epoch": 8.868596881959911, + "loss": 0.6256543397903442, + "loss_ce": 0.00041018627234734595, + "loss_iou": 0.275390625, + "loss_num": 0.01513671875, + "loss_xval": 0.625, + "num_input_tokens_seen": 223027000, + "step": 3982 + }, + { + "epoch": 8.870824053452116, + "grad_norm": 16.484027862548828, + "learning_rate": 1e-06, + "loss": 0.7406, + "num_input_tokens_seen": 223081684, + "step": 3983 + }, + { + "epoch": 8.870824053452116, + "loss": 0.6387948989868164, + "loss_ce": 0.0001230332418344915, + "loss_iou": 0.275390625, + "loss_num": 0.017578125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 223081684, + "step": 3983 + }, + { + "epoch": 8.873051224944321, + "grad_norm": 26.997020721435547, + "learning_rate": 1e-06, + "loss": 0.6737, + "num_input_tokens_seen": 223135544, + "step": 3984 + }, + { + "epoch": 8.873051224944321, + "loss": 0.745303213596344, + "loss_ce": 0.00018600517068989575, + "loss_iou": 0.310546875, + "loss_num": 0.024658203125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 223135544, + "step": 3984 + }, + { + "epoch": 8.875278396436526, + "grad_norm": 16.463109970092773, + "learning_rate": 1e-06, + "loss": 0.6391, + "num_input_tokens_seen": 223192264, + "step": 3985 + }, + { + "epoch": 8.875278396436526, + "loss": 0.6673752069473267, + "loss_ce": 0.00038302806206047535, + "loss_iou": 0.283203125, + "loss_num": 0.0205078125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 223192264, + "step": 3985 + }, + { + "epoch": 8.877505567928731, + "grad_norm": 26.982946395874023, + "learning_rate": 1e-06, + "loss": 0.5375, + "num_input_tokens_seen": 223247844, + "step": 3986 + }, + { + "epoch": 8.877505567928731, + "loss": 0.5868437886238098, + "loss_ce": 0.00041803409112617373, + "loss_iou": 0.255859375, + "loss_num": 0.0147705078125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 223247844, + "step": 3986 + }, + { + "epoch": 8.879732739420936, + "grad_norm": 17.86455726623535, + "learning_rate": 1e-06, + "loss": 0.5355, + "num_input_tokens_seen": 223304620, + "step": 3987 + }, + { + "epoch": 8.879732739420936, + "loss": 0.46156710386276245, + "loss_ce": 0.0001413182180840522, + "loss_iou": 0.203125, + "loss_num": 0.01080322265625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 223304620, + "step": 3987 + }, + { + "epoch": 8.88195991091314, + "grad_norm": 19.752405166625977, + "learning_rate": 1e-06, + "loss": 0.3951, + "num_input_tokens_seen": 223361520, + "step": 3988 + }, + { + "epoch": 8.88195991091314, + "loss": 0.3074072003364563, + "loss_ce": 0.0001561975514050573, + "loss_iou": 0.140625, + "loss_num": 0.0052490234375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 223361520, + "step": 3988 + }, + { + "epoch": 8.884187082405345, + "grad_norm": 12.55148983001709, + "learning_rate": 1e-06, + "loss": 0.7263, + "num_input_tokens_seen": 223419580, + "step": 3989 + }, + { + "epoch": 8.884187082405345, + "loss": 1.0404853820800781, + "loss_ce": 0.00020214373944327235, + "loss_iou": 0.3984375, + "loss_num": 0.048583984375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 223419580, + "step": 3989 + }, + { + "epoch": 8.88641425389755, + "grad_norm": 19.007347106933594, + "learning_rate": 1e-06, + "loss": 0.5547, + "num_input_tokens_seen": 223476520, + "step": 3990 + }, + { + "epoch": 8.88641425389755, + "loss": 0.45726656913757324, + "loss_ce": 0.00017428494174964726, + "loss_iou": 0.1953125, + "loss_num": 0.01312255859375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 223476520, + "step": 3990 + }, + { + "epoch": 8.888641425389755, + "grad_norm": 21.701292037963867, + "learning_rate": 1e-06, + "loss": 0.6151, + "num_input_tokens_seen": 223532396, + "step": 3991 + }, + { + "epoch": 8.888641425389755, + "loss": 0.6684781908988953, + "loss_ce": 0.00014323292998597026, + "loss_iou": 0.291015625, + "loss_num": 0.0174560546875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 223532396, + "step": 3991 + }, + { + "epoch": 8.89086859688196, + "grad_norm": 20.25054359436035, + "learning_rate": 1e-06, + "loss": 0.7418, + "num_input_tokens_seen": 223588248, + "step": 3992 + }, + { + "epoch": 8.89086859688196, + "loss": 0.6505246162414551, + "loss_ce": 0.00013402706827037036, + "loss_iou": 0.294921875, + "loss_num": 0.011962890625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 223588248, + "step": 3992 + }, + { + "epoch": 8.893095768374165, + "grad_norm": 39.547183990478516, + "learning_rate": 1e-06, + "loss": 0.6393, + "num_input_tokens_seen": 223643372, + "step": 3993 + }, + { + "epoch": 8.893095768374165, + "loss": 0.47359946370124817, + "loss_ce": 0.0009432291844859719, + "loss_iou": 0.2080078125, + "loss_num": 0.0111083984375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 223643372, + "step": 3993 + }, + { + "epoch": 8.89532293986637, + "grad_norm": 15.449313163757324, + "learning_rate": 1e-06, + "loss": 0.4557, + "num_input_tokens_seen": 223701152, + "step": 3994 + }, + { + "epoch": 8.89532293986637, + "loss": 0.4065554440021515, + "loss_ce": 0.00018337275832891464, + "loss_iou": 0.17578125, + "loss_num": 0.01092529296875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 223701152, + "step": 3994 + }, + { + "epoch": 8.897550111358575, + "grad_norm": 20.84811782836914, + "learning_rate": 1e-06, + "loss": 0.5546, + "num_input_tokens_seen": 223754800, + "step": 3995 + }, + { + "epoch": 8.897550111358575, + "loss": 0.5366966724395752, + "loss_ce": 0.00013655968359671533, + "loss_iou": 0.2275390625, + "loss_num": 0.0162353515625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 223754800, + "step": 3995 + }, + { + "epoch": 8.89977728285078, + "grad_norm": 22.598325729370117, + "learning_rate": 1e-06, + "loss": 0.4089, + "num_input_tokens_seen": 223811716, + "step": 3996 + }, + { + "epoch": 8.89977728285078, + "loss": 0.5239717364311218, + "loss_ce": 0.00016802808386273682, + "loss_iou": 0.2158203125, + "loss_num": 0.0185546875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 223811716, + "step": 3996 + }, + { + "epoch": 8.902004454342984, + "grad_norm": 16.426435470581055, + "learning_rate": 1e-06, + "loss": 0.5465, + "num_input_tokens_seen": 223867872, + "step": 3997 + }, + { + "epoch": 8.902004454342984, + "loss": 0.5754693746566772, + "loss_ce": 0.00015196282765828073, + "loss_iou": 0.24609375, + "loss_num": 0.0164794921875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 223867872, + "step": 3997 + }, + { + "epoch": 8.90423162583519, + "grad_norm": 35.01568603515625, + "learning_rate": 1e-06, + "loss": 0.8874, + "num_input_tokens_seen": 223922524, + "step": 3998 + }, + { + "epoch": 8.90423162583519, + "loss": 0.7457464337348938, + "loss_ce": 0.00014096120139583945, + "loss_iou": 0.326171875, + "loss_num": 0.0185546875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 223922524, + "step": 3998 + }, + { + "epoch": 8.906458797327394, + "grad_norm": 52.03001403808594, + "learning_rate": 1e-06, + "loss": 0.7269, + "num_input_tokens_seen": 223977692, + "step": 3999 + }, + { + "epoch": 8.906458797327394, + "loss": 0.9718111753463745, + "loss_ce": 0.00013149988080840558, + "loss_iou": 0.392578125, + "loss_num": 0.036865234375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 223977692, + "step": 3999 + }, + { + "epoch": 8.908685968819599, + "grad_norm": 23.651729583740234, + "learning_rate": 1e-06, + "loss": 0.6773, + "num_input_tokens_seen": 224031804, + "step": 4000 + }, + { + "epoch": 8.908685968819599, + "eval_seeclick_web_CIoU": 0.584255576133728, + "eval_seeclick_web_GIoU": 0.5831271409988403, + "eval_seeclick_web_IoU": 0.6019724309444427, + "eval_seeclick_web_MAE_all": 0.0160904498770833, + "eval_seeclick_web_MAE_h": 0.00791524676606059, + "eval_seeclick_web_MAE_w": 0.016444522887468338, + "eval_seeclick_web_MAE_x_boxes": 0.009520682971924543, + "eval_seeclick_web_MAE_y_boxes": 0.022019447991624475, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9091385006904602, + "eval_seeclick_web_loss_ce": 0.00022534980962518603, + "eval_seeclick_web_loss_iou": 0.41326904296875, + "eval_seeclick_web_loss_num": 0.012880325317382812, + "eval_seeclick_web_loss_xval": 0.8909912109375, + "eval_seeclick_web_runtime": 23.4695, + "eval_seeclick_web_samples_per_second": 2.13, + "eval_seeclick_web_steps_per_second": 0.085, + "num_input_tokens_seen": 224031804, + "step": 4000 + }, + { + "epoch": 8.908685968819599, + "eval_icons_CIoU": 0.280209019780159, + "eval_icons_GIoU": 0.3042156249284744, + "eval_icons_IoU": 0.3636113852262497, + "eval_icons_MAE_all": 0.06306547299027443, + "eval_icons_MAE_h": 0.03832197841256857, + "eval_icons_MAE_w": 0.06806911528110504, + "eval_icons_MAE_x_boxes": 0.05768297426402569, + "eval_icons_MAE_y_boxes": 0.03917842544615269, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.7296451330184937, + "eval_icons_loss_ce": 0.00027250249695498496, + "eval_icons_loss_iou": 0.6729736328125, + "eval_icons_loss_num": 0.06075096130371094, + "eval_icons_loss_xval": 1.648193359375, + "eval_icons_runtime": 20.6404, + "eval_icons_samples_per_second": 2.422, + "eval_icons_steps_per_second": 0.097, + "num_input_tokens_seen": 224031804, + "step": 4000 + }, + { + "epoch": 8.908685968819599, + "eval_screenspot_CIoU": 0.344086229801178, + "eval_screenspot_GIoU": 0.36166812976201373, + "eval_screenspot_IoU": 0.42560062805811566, + "eval_screenspot_MAE_all": 0.06235171233614286, + "eval_screenspot_MAE_h": 0.038458424930771194, + "eval_screenspot_MAE_w": 0.07053530837098758, + "eval_screenspot_MAE_x_boxes": 0.07286067555348079, + "eval_screenspot_MAE_y_boxes": 0.04639405757188797, + "eval_screenspot_inside_bbox": 0.6862499912579855, + "eval_screenspot_loss": 1.6449319124221802, + "eval_screenspot_loss_ce": 0.0002812407910823822, + "eval_screenspot_loss_iou": 0.6747233072916666, + "eval_screenspot_loss_num": 0.07325236002604167, + "eval_screenspot_loss_xval": 1.7154947916666667, + "eval_screenspot_runtime": 37.0539, + "eval_screenspot_samples_per_second": 2.402, + "eval_screenspot_steps_per_second": 0.081, + "num_input_tokens_seen": 224031804, + "step": 4000 + }, + { + "epoch": 8.908685968819599, + "eval_compot_CIoU": 0.3489241451025009, + "eval_compot_GIoU": 0.3593568354845047, + "eval_compot_IoU": 0.4082530289888382, + "eval_compot_MAE_all": 0.01762966625392437, + "eval_compot_MAE_h": 0.009061001241207123, + "eval_compot_MAE_w": 0.022062174044549465, + "eval_compot_MAE_x_boxes": 0.028892694041132927, + "eval_compot_MAE_y_boxes": 0.006829841528087854, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.407405138015747, + "eval_compot_loss_ce": 0.00021665637177648023, + "eval_compot_loss_iou": 0.6473388671875, + "eval_compot_loss_num": 0.016569137573242188, + "eval_compot_loss_xval": 1.377197265625, + "eval_compot_runtime": 21.8205, + "eval_compot_samples_per_second": 2.291, + "eval_compot_steps_per_second": 0.092, + "num_input_tokens_seen": 224031804, + "step": 4000 + }, + { + "epoch": 8.908685968819599, + "eval_custom_ui_val_CIoU": 0.4698154992527432, + "eval_custom_ui_val_GIoU": 0.483290907409456, + "eval_custom_ui_val_IoU": 0.5295593506760068, + "eval_custom_ui_val_MAE_all": 0.030179924021164577, + "eval_custom_ui_val_MAE_h": 0.016545448245273695, + "eval_custom_ui_val_MAE_w": 0.03918004294650422, + "eval_custom_ui_val_MAE_x_boxes": 0.03695020234833161, + "eval_custom_ui_val_MAE_y_boxes": 0.01497822223852078, + "eval_custom_ui_val_inside_bbox": 0.7457561757829454, + "eval_custom_ui_val_loss": 1.1855908632278442, + "eval_custom_ui_val_loss_ce": 0.00024014294386789616, + "eval_custom_ui_val_loss_iou": 0.5057237413194444, + "eval_custom_ui_val_loss_num": 0.027684529622395832, + "eval_custom_ui_val_loss_xval": 1.1498480902777777, + "eval_custom_ui_val_runtime": 70.3473, + "eval_custom_ui_val_samples_per_second": 3.767, + "eval_custom_ui_val_steps_per_second": 0.128, + "num_input_tokens_seen": 224031804, + "step": 4000 + }, + { + "epoch": 8.908685968819599, + "loss": 0.9238047003746033, + "loss_ce": 0.00022073142463341355, + "loss_iou": 0.400390625, + "loss_num": 0.0245361328125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 224031804, + "step": 4000 + }, + { + "epoch": 8.910913140311804, + "grad_norm": 17.125661849975586, + "learning_rate": 1e-06, + "loss": 0.6028, + "num_input_tokens_seen": 224088664, + "step": 4001 + }, + { + "epoch": 8.910913140311804, + "loss": 0.6688809394836426, + "loss_ce": 0.00017975796072278172, + "loss_iou": 0.279296875, + "loss_num": 0.0224609375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 224088664, + "step": 4001 + }, + { + "epoch": 8.913140311804009, + "grad_norm": 11.797565460205078, + "learning_rate": 1e-06, + "loss": 0.4654, + "num_input_tokens_seen": 224146092, + "step": 4002 + }, + { + "epoch": 8.913140311804009, + "loss": 0.43066659569740295, + "loss_ce": 0.00012459905701689422, + "loss_iou": 0.181640625, + "loss_num": 0.013427734375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 224146092, + "step": 4002 + }, + { + "epoch": 8.915367483296214, + "grad_norm": 18.511592864990234, + "learning_rate": 1e-06, + "loss": 0.4735, + "num_input_tokens_seen": 224201576, + "step": 4003 + }, + { + "epoch": 8.915367483296214, + "loss": 0.44720643758773804, + "loss_ce": 0.00018495078256819397, + "loss_iou": 0.1953125, + "loss_num": 0.0113525390625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 224201576, + "step": 4003 + }, + { + "epoch": 8.917594654788418, + "grad_norm": 19.493896484375, + "learning_rate": 1e-06, + "loss": 0.658, + "num_input_tokens_seen": 224258016, + "step": 4004 + }, + { + "epoch": 8.917594654788418, + "loss": 0.9191592931747437, + "loss_ce": 0.00021400017431005836, + "loss_iou": 0.388671875, + "loss_num": 0.0281982421875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 224258016, + "step": 4004 + }, + { + "epoch": 8.919821826280623, + "grad_norm": 19.006305694580078, + "learning_rate": 1e-06, + "loss": 0.6289, + "num_input_tokens_seen": 224311936, + "step": 4005 + }, + { + "epoch": 8.919821826280623, + "loss": 0.7552786469459534, + "loss_ce": 0.0001517195487394929, + "loss_iou": 0.322265625, + "loss_num": 0.021728515625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 224311936, + "step": 4005 + }, + { + "epoch": 8.922048997772828, + "grad_norm": 58.83499526977539, + "learning_rate": 1e-06, + "loss": 0.6666, + "num_input_tokens_seen": 224368388, + "step": 4006 + }, + { + "epoch": 8.922048997772828, + "loss": 0.746427595615387, + "loss_ce": 0.00021177891176193953, + "loss_iou": 0.33984375, + "loss_num": 0.01336669921875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 224368388, + "step": 4006 + }, + { + "epoch": 8.924276169265033, + "grad_norm": 17.04109001159668, + "learning_rate": 1e-06, + "loss": 0.6045, + "num_input_tokens_seen": 224423224, + "step": 4007 + }, + { + "epoch": 8.924276169265033, + "loss": 0.6434751749038696, + "loss_ce": 0.00016464036889374256, + "loss_iou": 0.298828125, + "loss_num": 0.00958251953125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 224423224, + "step": 4007 + }, + { + "epoch": 8.926503340757238, + "grad_norm": 16.078716278076172, + "learning_rate": 1e-06, + "loss": 0.4969, + "num_input_tokens_seen": 224479088, + "step": 4008 + }, + { + "epoch": 8.926503340757238, + "loss": 0.47645998001098633, + "loss_ce": 0.00014161772560328245, + "loss_iou": 0.2080078125, + "loss_num": 0.011962890625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 224479088, + "step": 4008 + }, + { + "epoch": 8.928730512249443, + "grad_norm": 18.10431671142578, + "learning_rate": 1e-06, + "loss": 0.5381, + "num_input_tokens_seen": 224536168, + "step": 4009 + }, + { + "epoch": 8.928730512249443, + "loss": 0.5730590224266052, + "loss_ce": 0.00018303040997125208, + "loss_iou": 0.220703125, + "loss_num": 0.0264892578125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 224536168, + "step": 4009 + }, + { + "epoch": 8.930957683741648, + "grad_norm": 20.611072540283203, + "learning_rate": 1e-06, + "loss": 0.5397, + "num_input_tokens_seen": 224594284, + "step": 4010 + }, + { + "epoch": 8.930957683741648, + "loss": 0.5585442781448364, + "loss_ce": 0.00019466917729005218, + "loss_iou": 0.2578125, + "loss_num": 0.0087890625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 224594284, + "step": 4010 + }, + { + "epoch": 8.933184855233852, + "grad_norm": 20.54092788696289, + "learning_rate": 1e-06, + "loss": 0.5272, + "num_input_tokens_seen": 224651224, + "step": 4011 + }, + { + "epoch": 8.933184855233852, + "loss": 0.577430248260498, + "loss_ce": 0.00015977630391716957, + "loss_iou": 0.259765625, + "loss_num": 0.01165771484375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 224651224, + "step": 4011 + }, + { + "epoch": 8.935412026726057, + "grad_norm": 15.826013565063477, + "learning_rate": 1e-06, + "loss": 0.6706, + "num_input_tokens_seen": 224707328, + "step": 4012 + }, + { + "epoch": 8.935412026726057, + "loss": 0.4253334701061249, + "loss_ce": 0.0001625774457352236, + "loss_iou": 0.193359375, + "loss_num": 0.007568359375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 224707328, + "step": 4012 + }, + { + "epoch": 8.937639198218262, + "grad_norm": 25.152671813964844, + "learning_rate": 1e-06, + "loss": 0.5631, + "num_input_tokens_seen": 224766028, + "step": 4013 + }, + { + "epoch": 8.937639198218262, + "loss": 0.7184352874755859, + "loss_ce": 0.0001735725672915578, + "loss_iou": 0.31640625, + "loss_num": 0.01708984375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 224766028, + "step": 4013 + }, + { + "epoch": 8.939866369710467, + "grad_norm": 30.11623191833496, + "learning_rate": 1e-06, + "loss": 0.605, + "num_input_tokens_seen": 224822620, + "step": 4014 + }, + { + "epoch": 8.939866369710467, + "loss": 0.7723772525787354, + "loss_ce": 0.00016042383504100144, + "loss_iou": 0.326171875, + "loss_num": 0.0244140625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 224822620, + "step": 4014 + }, + { + "epoch": 8.942093541202672, + "grad_norm": 45.63913345336914, + "learning_rate": 1e-06, + "loss": 0.5652, + "num_input_tokens_seen": 224878404, + "step": 4015 + }, + { + "epoch": 8.942093541202672, + "loss": 0.6024213433265686, + "loss_ce": 0.0001264153397642076, + "loss_iou": 0.2314453125, + "loss_num": 0.0281982421875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 224878404, + "step": 4015 + }, + { + "epoch": 8.944320712694877, + "grad_norm": 17.508010864257812, + "learning_rate": 1e-06, + "loss": 0.4491, + "num_input_tokens_seen": 224936428, + "step": 4016 + }, + { + "epoch": 8.944320712694877, + "loss": 0.5184711217880249, + "loss_ce": 0.0001606106961844489, + "loss_iou": 0.220703125, + "loss_num": 0.01531982421875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 224936428, + "step": 4016 + }, + { + "epoch": 8.946547884187082, + "grad_norm": 15.416549682617188, + "learning_rate": 1e-06, + "loss": 0.6399, + "num_input_tokens_seen": 224994188, + "step": 4017 + }, + { + "epoch": 8.946547884187082, + "loss": 0.7011144161224365, + "loss_ce": 0.00018664849631022662, + "loss_iou": 0.287109375, + "loss_num": 0.0252685546875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 224994188, + "step": 4017 + }, + { + "epoch": 8.948775055679288, + "grad_norm": 21.26276969909668, + "learning_rate": 1e-06, + "loss": 0.4372, + "num_input_tokens_seen": 225051360, + "step": 4018 + }, + { + "epoch": 8.948775055679288, + "loss": 0.3914967179298401, + "loss_ce": 0.00013928582484368235, + "loss_iou": 0.171875, + "loss_num": 0.00958251953125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 225051360, + "step": 4018 + }, + { + "epoch": 8.951002227171493, + "grad_norm": 13.906608581542969, + "learning_rate": 1e-06, + "loss": 0.4571, + "num_input_tokens_seen": 225108952, + "step": 4019 + }, + { + "epoch": 8.951002227171493, + "loss": 0.4027478098869324, + "loss_ce": 0.00015992176486179233, + "loss_iou": 0.1767578125, + "loss_num": 0.00970458984375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 225108952, + "step": 4019 + }, + { + "epoch": 8.953229398663698, + "grad_norm": 13.286585807800293, + "learning_rate": 1e-06, + "loss": 0.6289, + "num_input_tokens_seen": 225165484, + "step": 4020 + }, + { + "epoch": 8.953229398663698, + "loss": 0.37464502453804016, + "loss_ce": 0.00013330676301848143, + "loss_iou": 0.158203125, + "loss_num": 0.011474609375, + "loss_xval": 0.375, + "num_input_tokens_seen": 225165484, + "step": 4020 + }, + { + "epoch": 8.955456570155903, + "grad_norm": 27.786767959594727, + "learning_rate": 1e-06, + "loss": 0.843, + "num_input_tokens_seen": 225219788, + "step": 4021 + }, + { + "epoch": 8.955456570155903, + "loss": 0.6932044625282288, + "loss_ce": 0.00015029762289486825, + "loss_iou": 0.30078125, + "loss_num": 0.0179443359375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 225219788, + "step": 4021 + }, + { + "epoch": 8.957683741648108, + "grad_norm": 15.158284187316895, + "learning_rate": 1e-06, + "loss": 0.5056, + "num_input_tokens_seen": 225277592, + "step": 4022 + }, + { + "epoch": 8.957683741648108, + "loss": 0.5200361013412476, + "loss_ce": 0.00013863734784536064, + "loss_iou": 0.212890625, + "loss_num": 0.018798828125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 225277592, + "step": 4022 + }, + { + "epoch": 8.959910913140313, + "grad_norm": 45.792728424072266, + "learning_rate": 1e-06, + "loss": 0.4745, + "num_input_tokens_seen": 225334828, + "step": 4023 + }, + { + "epoch": 8.959910913140313, + "loss": 0.40143540501594543, + "loss_ce": 0.0001902944641187787, + "loss_iou": 0.1826171875, + "loss_num": 0.00714111328125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 225334828, + "step": 4023 + }, + { + "epoch": 8.962138084632517, + "grad_norm": 19.045509338378906, + "learning_rate": 1e-06, + "loss": 0.5239, + "num_input_tokens_seen": 225389700, + "step": 4024 + }, + { + "epoch": 8.962138084632517, + "loss": 0.4376333951950073, + "loss_ce": 0.00013340538134798408, + "loss_iou": 0.1962890625, + "loss_num": 0.0091552734375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 225389700, + "step": 4024 + }, + { + "epoch": 8.964365256124722, + "grad_norm": 15.347426414489746, + "learning_rate": 1e-06, + "loss": 0.6214, + "num_input_tokens_seen": 225447624, + "step": 4025 + }, + { + "epoch": 8.964365256124722, + "loss": 0.5260591506958008, + "loss_ce": 0.000180262781213969, + "loss_iou": 0.2216796875, + "loss_num": 0.016357421875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 225447624, + "step": 4025 + }, + { + "epoch": 8.966592427616927, + "grad_norm": 14.499673843383789, + "learning_rate": 1e-06, + "loss": 0.5167, + "num_input_tokens_seen": 225506444, + "step": 4026 + }, + { + "epoch": 8.966592427616927, + "loss": 0.3981502652168274, + "loss_ce": 0.00014002776879351586, + "loss_iou": 0.171875, + "loss_num": 0.010986328125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 225506444, + "step": 4026 + }, + { + "epoch": 8.968819599109132, + "grad_norm": 19.23043441772461, + "learning_rate": 1e-06, + "loss": 0.5612, + "num_input_tokens_seen": 225560052, + "step": 4027 + }, + { + "epoch": 8.968819599109132, + "loss": 0.32940202951431274, + "loss_ce": 0.00017840655345935374, + "loss_iou": 0.140625, + "loss_num": 0.00958251953125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 225560052, + "step": 4027 + }, + { + "epoch": 8.971046770601337, + "grad_norm": 34.80836486816406, + "learning_rate": 1e-06, + "loss": 0.5679, + "num_input_tokens_seen": 225618236, + "step": 4028 + }, + { + "epoch": 8.971046770601337, + "loss": 0.6726021766662598, + "loss_ce": 0.00023891603632364422, + "loss_iou": 0.30078125, + "loss_num": 0.0142822265625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 225618236, + "step": 4028 + }, + { + "epoch": 8.973273942093542, + "grad_norm": 19.638723373413086, + "learning_rate": 1e-06, + "loss": 0.5453, + "num_input_tokens_seen": 225674116, + "step": 4029 + }, + { + "epoch": 8.973273942093542, + "loss": 0.520553708076477, + "loss_ce": 0.0012665874091908336, + "loss_iou": 0.1845703125, + "loss_num": 0.030029296875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 225674116, + "step": 4029 + }, + { + "epoch": 8.975501113585747, + "grad_norm": 18.89423942565918, + "learning_rate": 1e-06, + "loss": 0.6397, + "num_input_tokens_seen": 225730220, + "step": 4030 + }, + { + "epoch": 8.975501113585747, + "loss": 0.5465627908706665, + "loss_ce": 0.00017607388144824654, + "loss_iou": 0.244140625, + "loss_num": 0.01165771484375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 225730220, + "step": 4030 + }, + { + "epoch": 8.977728285077951, + "grad_norm": 13.650494575500488, + "learning_rate": 1e-06, + "loss": 0.5068, + "num_input_tokens_seen": 225786488, + "step": 4031 + }, + { + "epoch": 8.977728285077951, + "loss": 0.5636086463928223, + "loss_ce": 0.00013209109602030367, + "loss_iou": 0.2275390625, + "loss_num": 0.021728515625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 225786488, + "step": 4031 + }, + { + "epoch": 8.979955456570156, + "grad_norm": 16.062467575073242, + "learning_rate": 1e-06, + "loss": 0.4167, + "num_input_tokens_seen": 225841372, + "step": 4032 + }, + { + "epoch": 8.979955456570156, + "loss": 0.29786229133605957, + "loss_ce": 0.00013277304242365062, + "loss_iou": 0.1201171875, + "loss_num": 0.011474609375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 225841372, + "step": 4032 + }, + { + "epoch": 8.982182628062361, + "grad_norm": 17.630416870117188, + "learning_rate": 1e-06, + "loss": 0.5697, + "num_input_tokens_seen": 225894908, + "step": 4033 + }, + { + "epoch": 8.982182628062361, + "loss": 0.676677942276001, + "loss_ce": 0.00016425049398094416, + "loss_iou": 0.2734375, + "loss_num": 0.0257568359375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 225894908, + "step": 4033 + }, + { + "epoch": 8.984409799554566, + "grad_norm": 20.209087371826172, + "learning_rate": 1e-06, + "loss": 0.4613, + "num_input_tokens_seen": 225948016, + "step": 4034 + }, + { + "epoch": 8.984409799554566, + "loss": 0.6483420729637146, + "loss_ce": 0.00014869487495161593, + "loss_iou": 0.298828125, + "loss_num": 0.00982666015625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 225948016, + "step": 4034 + }, + { + "epoch": 8.98663697104677, + "grad_norm": 31.209794998168945, + "learning_rate": 1e-06, + "loss": 0.62, + "num_input_tokens_seen": 226005860, + "step": 4035 + }, + { + "epoch": 8.98663697104677, + "loss": 0.6758050918579102, + "loss_ce": 0.0002679667086340487, + "loss_iou": 0.287109375, + "loss_num": 0.0205078125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 226005860, + "step": 4035 + }, + { + "epoch": 8.988864142538976, + "grad_norm": 13.588601112365723, + "learning_rate": 1e-06, + "loss": 0.4508, + "num_input_tokens_seen": 226061580, + "step": 4036 + }, + { + "epoch": 8.988864142538976, + "loss": 0.2981112003326416, + "loss_ce": 0.00013759495050180703, + "loss_iou": 0.134765625, + "loss_num": 0.00555419921875, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 226061580, + "step": 4036 + }, + { + "epoch": 8.99109131403118, + "grad_norm": 12.43724250793457, + "learning_rate": 1e-06, + "loss": 0.6268, + "num_input_tokens_seen": 226121088, + "step": 4037 + }, + { + "epoch": 8.99109131403118, + "loss": 0.7063800096511841, + "loss_ce": 0.00020320256589911878, + "loss_iou": 0.298828125, + "loss_num": 0.02197265625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 226121088, + "step": 4037 + }, + { + "epoch": 8.993318485523385, + "grad_norm": 14.459733963012695, + "learning_rate": 1e-06, + "loss": 0.7679, + "num_input_tokens_seen": 226178000, + "step": 4038 + }, + { + "epoch": 8.993318485523385, + "loss": 0.6036978363990784, + "loss_ce": 0.00018220869242213666, + "loss_iou": 0.2294921875, + "loss_num": 0.029052734375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 226178000, + "step": 4038 + }, + { + "epoch": 8.99554565701559, + "grad_norm": 20.82482147216797, + "learning_rate": 1e-06, + "loss": 0.5971, + "num_input_tokens_seen": 226234736, + "step": 4039 + }, + { + "epoch": 8.99554565701559, + "loss": 0.47330033779144287, + "loss_ce": 0.00015583749336656183, + "loss_iou": 0.2158203125, + "loss_num": 0.00830078125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 226234736, + "step": 4039 + }, + { + "epoch": 8.997772828507795, + "grad_norm": 18.3863468170166, + "learning_rate": 1e-06, + "loss": 0.5675, + "num_input_tokens_seen": 226287252, + "step": 4040 + }, + { + "epoch": 8.997772828507795, + "loss": 0.5164123177528381, + "loss_ce": 0.00017695256974548101, + "loss_iou": 0.2314453125, + "loss_num": 0.01068115234375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 226287252, + "step": 4040 + }, + { + "epoch": 9.0, + "grad_norm": 18.648279190063477, + "learning_rate": 1e-06, + "loss": 0.5718, + "num_input_tokens_seen": 226345288, + "step": 4041 + }, + { + "epoch": 9.0, + "loss": 0.6733693480491638, + "loss_ce": 0.00015158146561589092, + "loss_iou": 0.283203125, + "loss_num": 0.0211181640625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 226345288, + "step": 4041 + }, + { + "epoch": 9.002227171492205, + "grad_norm": 56.01642608642578, + "learning_rate": 1e-06, + "loss": 0.5703, + "num_input_tokens_seen": 226400596, + "step": 4042 + }, + { + "epoch": 9.002227171492205, + "loss": 0.6565755605697632, + "loss_ce": 0.00020350792328827083, + "loss_iou": 0.25, + "loss_num": 0.03125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 226400596, + "step": 4042 + }, + { + "epoch": 9.00445434298441, + "grad_norm": 21.13946533203125, + "learning_rate": 1e-06, + "loss": 0.5926, + "num_input_tokens_seen": 226455584, + "step": 4043 + }, + { + "epoch": 9.00445434298441, + "loss": 0.5227538347244263, + "loss_ce": 0.00017086087609641254, + "loss_iou": 0.240234375, + "loss_num": 0.00830078125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 226455584, + "step": 4043 + }, + { + "epoch": 9.006681514476615, + "grad_norm": 23.61444091796875, + "learning_rate": 1e-06, + "loss": 0.5942, + "num_input_tokens_seen": 226508220, + "step": 4044 + }, + { + "epoch": 9.006681514476615, + "loss": 0.588532030582428, + "loss_ce": 0.00015310911112464964, + "loss_iou": 0.24609375, + "loss_num": 0.0194091796875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 226508220, + "step": 4044 + }, + { + "epoch": 9.00890868596882, + "grad_norm": 26.408849716186523, + "learning_rate": 1e-06, + "loss": 0.4008, + "num_input_tokens_seen": 226563516, + "step": 4045 + }, + { + "epoch": 9.00890868596882, + "loss": 0.4769752323627472, + "loss_ce": 0.00016859016614034772, + "loss_iou": 0.20703125, + "loss_num": 0.01251220703125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 226563516, + "step": 4045 + }, + { + "epoch": 9.011135857461024, + "grad_norm": 16.130887985229492, + "learning_rate": 1e-06, + "loss": 0.4924, + "num_input_tokens_seen": 226619892, + "step": 4046 + }, + { + "epoch": 9.011135857461024, + "loss": 0.3370331823825836, + "loss_ce": 0.0001191033807117492, + "loss_iou": 0.11083984375, + "loss_num": 0.0230712890625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 226619892, + "step": 4046 + }, + { + "epoch": 9.01336302895323, + "grad_norm": 17.81732749938965, + "learning_rate": 1e-06, + "loss": 0.6139, + "num_input_tokens_seen": 226678680, + "step": 4047 + }, + { + "epoch": 9.01336302895323, + "loss": 0.826948881149292, + "loss_ce": 0.00028875278076156974, + "loss_iou": 0.345703125, + "loss_num": 0.0274658203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 226678680, + "step": 4047 + }, + { + "epoch": 9.015590200445434, + "grad_norm": 19.63979721069336, + "learning_rate": 1e-06, + "loss": 0.7731, + "num_input_tokens_seen": 226736108, + "step": 4048 + }, + { + "epoch": 9.015590200445434, + "loss": 0.8490115404129028, + "loss_ce": 0.00025667509180493653, + "loss_iou": 0.37890625, + "loss_num": 0.0179443359375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 226736108, + "step": 4048 + }, + { + "epoch": 9.017817371937639, + "grad_norm": 23.76055335998535, + "learning_rate": 1e-06, + "loss": 0.5958, + "num_input_tokens_seen": 226791264, + "step": 4049 + }, + { + "epoch": 9.017817371937639, + "loss": 0.4640924334526062, + "loss_ce": 0.00022527157852891833, + "loss_iou": 0.201171875, + "loss_num": 0.012451171875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 226791264, + "step": 4049 + }, + { + "epoch": 9.020044543429844, + "grad_norm": 12.326661109924316, + "learning_rate": 1e-06, + "loss": 0.49, + "num_input_tokens_seen": 226849248, + "step": 4050 + }, + { + "epoch": 9.020044543429844, + "loss": 0.4276803731918335, + "loss_ce": 0.00019013047858607024, + "loss_iou": 0.1953125, + "loss_num": 0.007476806640625, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 226849248, + "step": 4050 + }, + { + "epoch": 9.022271714922049, + "grad_norm": 25.97770118713379, + "learning_rate": 1e-06, + "loss": 0.704, + "num_input_tokens_seen": 226903776, + "step": 4051 + }, + { + "epoch": 9.022271714922049, + "loss": 0.8058446049690247, + "loss_ce": 0.0001805661740945652, + "loss_iou": 0.361328125, + "loss_num": 0.016357421875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 226903776, + "step": 4051 + }, + { + "epoch": 9.024498886414253, + "grad_norm": 23.6370792388916, + "learning_rate": 1e-06, + "loss": 0.4962, + "num_input_tokens_seen": 226960756, + "step": 4052 + }, + { + "epoch": 9.024498886414253, + "loss": 0.5954891443252563, + "loss_ce": 0.00015223291120491922, + "loss_iou": 0.2734375, + "loss_num": 0.010009765625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 226960756, + "step": 4052 + }, + { + "epoch": 9.026726057906458, + "grad_norm": 22.727136611938477, + "learning_rate": 1e-06, + "loss": 0.792, + "num_input_tokens_seen": 227015196, + "step": 4053 + }, + { + "epoch": 9.026726057906458, + "loss": 0.6129428148269653, + "loss_ce": 0.000394008937291801, + "loss_iou": 0.267578125, + "loss_num": 0.01519775390625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 227015196, + "step": 4053 + }, + { + "epoch": 9.028953229398663, + "grad_norm": 21.844358444213867, + "learning_rate": 1e-06, + "loss": 0.6825, + "num_input_tokens_seen": 227069252, + "step": 4054 + }, + { + "epoch": 9.028953229398663, + "loss": 0.7161485552787781, + "loss_ce": 0.00020616035908460617, + "loss_iou": 0.3125, + "loss_num": 0.0185546875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 227069252, + "step": 4054 + }, + { + "epoch": 9.031180400890868, + "grad_norm": 12.784194946289062, + "learning_rate": 1e-06, + "loss": 0.5135, + "num_input_tokens_seen": 227124728, + "step": 4055 + }, + { + "epoch": 9.031180400890868, + "loss": 0.46670782566070557, + "loss_ce": 0.00015509186778217554, + "loss_iou": 0.21484375, + "loss_num": 0.00750732421875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 227124728, + "step": 4055 + }, + { + "epoch": 9.033407572383073, + "grad_norm": 48.46086120605469, + "learning_rate": 1e-06, + "loss": 0.7413, + "num_input_tokens_seen": 227181172, + "step": 4056 + }, + { + "epoch": 9.033407572383073, + "loss": 1.0057790279388428, + "loss_ce": 0.00016376111307181418, + "loss_iou": 0.40625, + "loss_num": 0.0390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 227181172, + "step": 4056 + }, + { + "epoch": 9.035634743875278, + "grad_norm": 20.736644744873047, + "learning_rate": 1e-06, + "loss": 0.5321, + "num_input_tokens_seen": 227237508, + "step": 4057 + }, + { + "epoch": 9.035634743875278, + "loss": 0.46913978457450867, + "loss_ce": 0.0007559881778433919, + "loss_iou": 0.2099609375, + "loss_num": 0.00982666015625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 227237508, + "step": 4057 + }, + { + "epoch": 9.037861915367483, + "grad_norm": 69.1377944946289, + "learning_rate": 1e-06, + "loss": 0.7385, + "num_input_tokens_seen": 227289364, + "step": 4058 + }, + { + "epoch": 9.037861915367483, + "loss": 0.597074568271637, + "loss_ce": 0.00015077766147442162, + "loss_iou": 0.26171875, + "loss_num": 0.01483154296875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 227289364, + "step": 4058 + }, + { + "epoch": 9.040089086859687, + "grad_norm": 15.685859680175781, + "learning_rate": 1e-06, + "loss": 0.5525, + "num_input_tokens_seen": 227345812, + "step": 4059 + }, + { + "epoch": 9.040089086859687, + "loss": 0.5543001890182495, + "loss_ce": 0.00016202838742174208, + "loss_iou": 0.2373046875, + "loss_num": 0.0159912109375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 227345812, + "step": 4059 + }, + { + "epoch": 9.042316258351892, + "grad_norm": 15.678596496582031, + "learning_rate": 1e-06, + "loss": 0.6039, + "num_input_tokens_seen": 227402332, + "step": 4060 + }, + { + "epoch": 9.042316258351892, + "loss": 0.6879202127456665, + "loss_ce": 0.00017606788605917245, + "loss_iou": 0.28125, + "loss_num": 0.02490234375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 227402332, + "step": 4060 + }, + { + "epoch": 9.044543429844097, + "grad_norm": 24.40289306640625, + "learning_rate": 1e-06, + "loss": 0.6604, + "num_input_tokens_seen": 227456080, + "step": 4061 + }, + { + "epoch": 9.044543429844097, + "loss": 0.7870203256607056, + "loss_ce": 0.00015513686230406165, + "loss_iou": 0.3046875, + "loss_num": 0.035400390625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 227456080, + "step": 4061 + }, + { + "epoch": 9.046770601336302, + "grad_norm": 20.38033103942871, + "learning_rate": 1e-06, + "loss": 0.5911, + "num_input_tokens_seen": 227512720, + "step": 4062 + }, + { + "epoch": 9.046770601336302, + "loss": 0.541382908821106, + "loss_ce": 0.0001231818023370579, + "loss_iou": 0.25, + "loss_num": 0.00848388671875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 227512720, + "step": 4062 + }, + { + "epoch": 9.048997772828507, + "grad_norm": 16.504854202270508, + "learning_rate": 1e-06, + "loss": 0.5148, + "num_input_tokens_seen": 227570688, + "step": 4063 + }, + { + "epoch": 9.048997772828507, + "loss": 0.4388590455055237, + "loss_ce": 0.00013835716526955366, + "loss_iou": 0.1884765625, + "loss_num": 0.01214599609375, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 227570688, + "step": 4063 + }, + { + "epoch": 9.051224944320714, + "grad_norm": 23.736780166625977, + "learning_rate": 1e-06, + "loss": 0.5917, + "num_input_tokens_seen": 227628128, + "step": 4064 + }, + { + "epoch": 9.051224944320714, + "loss": 0.7519105672836304, + "loss_ce": 0.0002015778300119564, + "loss_iou": 0.31640625, + "loss_num": 0.02392578125, + "loss_xval": 0.75, + "num_input_tokens_seen": 227628128, + "step": 4064 + }, + { + "epoch": 9.053452115812918, + "grad_norm": 17.67232894897461, + "learning_rate": 1e-06, + "loss": 0.6101, + "num_input_tokens_seen": 227683884, + "step": 4065 + }, + { + "epoch": 9.053452115812918, + "loss": 0.618810772895813, + "loss_ce": 0.00015848156181164086, + "loss_iou": 0.265625, + "loss_num": 0.017578125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 227683884, + "step": 4065 + }, + { + "epoch": 9.055679287305123, + "grad_norm": 19.514118194580078, + "learning_rate": 1e-06, + "loss": 0.8178, + "num_input_tokens_seen": 227735504, + "step": 4066 + }, + { + "epoch": 9.055679287305123, + "loss": 0.6703531742095947, + "loss_ce": 0.00018715695478022099, + "loss_iou": 0.287109375, + "loss_num": 0.01904296875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 227735504, + "step": 4066 + }, + { + "epoch": 9.057906458797328, + "grad_norm": 18.996246337890625, + "learning_rate": 1e-06, + "loss": 0.4703, + "num_input_tokens_seen": 227793336, + "step": 4067 + }, + { + "epoch": 9.057906458797328, + "loss": 0.37396594882011414, + "loss_ce": 0.00018664645904209465, + "loss_iou": 0.1630859375, + "loss_num": 0.0096435546875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 227793336, + "step": 4067 + }, + { + "epoch": 9.060133630289533, + "grad_norm": 13.91976547241211, + "learning_rate": 1e-06, + "loss": 0.4175, + "num_input_tokens_seen": 227851892, + "step": 4068 + }, + { + "epoch": 9.060133630289533, + "loss": 0.5668018460273743, + "loss_ce": 0.00015145693032536656, + "loss_iou": 0.255859375, + "loss_num": 0.0108642578125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 227851892, + "step": 4068 + }, + { + "epoch": 9.062360801781738, + "grad_norm": 15.925460815429688, + "learning_rate": 1e-06, + "loss": 0.4987, + "num_input_tokens_seen": 227904312, + "step": 4069 + }, + { + "epoch": 9.062360801781738, + "loss": 0.5191997289657593, + "loss_ce": 0.00015678332420066, + "loss_iou": 0.2431640625, + "loss_num": 0.00665283203125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 227904312, + "step": 4069 + }, + { + "epoch": 9.064587973273943, + "grad_norm": 19.193389892578125, + "learning_rate": 1e-06, + "loss": 0.6064, + "num_input_tokens_seen": 227959496, + "step": 4070 + }, + { + "epoch": 9.064587973273943, + "loss": 0.5843898057937622, + "loss_ce": 0.00016130355652421713, + "loss_iou": 0.25, + "loss_num": 0.0169677734375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 227959496, + "step": 4070 + }, + { + "epoch": 9.066815144766148, + "grad_norm": 23.820255279541016, + "learning_rate": 1e-06, + "loss": 0.4375, + "num_input_tokens_seen": 228018492, + "step": 4071 + }, + { + "epoch": 9.066815144766148, + "loss": 0.4991843104362488, + "loss_ce": 0.00016086628602351993, + "loss_iou": 0.228515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.5, + "num_input_tokens_seen": 228018492, + "step": 4071 + }, + { + "epoch": 9.069042316258352, + "grad_norm": 17.966339111328125, + "learning_rate": 1e-06, + "loss": 0.4902, + "num_input_tokens_seen": 228072824, + "step": 4072 + }, + { + "epoch": 9.069042316258352, + "loss": 0.33019495010375977, + "loss_ce": 0.00020839170610997826, + "loss_iou": 0.1337890625, + "loss_num": 0.01251220703125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 228072824, + "step": 4072 + }, + { + "epoch": 9.071269487750557, + "grad_norm": 19.144359588623047, + "learning_rate": 1e-06, + "loss": 0.5818, + "num_input_tokens_seen": 228126552, + "step": 4073 + }, + { + "epoch": 9.071269487750557, + "loss": 0.5044497847557068, + "loss_ce": 0.00017729168757796288, + "loss_iou": 0.20703125, + "loss_num": 0.01806640625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 228126552, + "step": 4073 + }, + { + "epoch": 9.073496659242762, + "grad_norm": 27.93861961364746, + "learning_rate": 1e-06, + "loss": 0.542, + "num_input_tokens_seen": 228181208, + "step": 4074 + }, + { + "epoch": 9.073496659242762, + "loss": 0.4734833836555481, + "loss_ce": 0.00015576003352180123, + "loss_iou": 0.220703125, + "loss_num": 0.006256103515625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 228181208, + "step": 4074 + }, + { + "epoch": 9.075723830734967, + "grad_norm": 16.074905395507812, + "learning_rate": 1e-06, + "loss": 0.5862, + "num_input_tokens_seen": 228237728, + "step": 4075 + }, + { + "epoch": 9.075723830734967, + "loss": 0.6505739092826843, + "loss_ce": 0.00018325743440072984, + "loss_iou": 0.27734375, + "loss_num": 0.019287109375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 228237728, + "step": 4075 + }, + { + "epoch": 9.077951002227172, + "grad_norm": 26.28703498840332, + "learning_rate": 1e-06, + "loss": 0.5817, + "num_input_tokens_seen": 228294284, + "step": 4076 + }, + { + "epoch": 9.077951002227172, + "loss": 0.46036121249198914, + "loss_ce": 0.00015614047879353166, + "loss_iou": 0.205078125, + "loss_num": 0.010009765625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 228294284, + "step": 4076 + }, + { + "epoch": 9.080178173719377, + "grad_norm": 19.399211883544922, + "learning_rate": 1e-06, + "loss": 0.5473, + "num_input_tokens_seen": 228349092, + "step": 4077 + }, + { + "epoch": 9.080178173719377, + "loss": 0.5990505218505859, + "loss_ce": 0.00017354718875139952, + "loss_iou": 0.251953125, + "loss_num": 0.0189208984375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 228349092, + "step": 4077 + }, + { + "epoch": 9.082405345211582, + "grad_norm": 25.406312942504883, + "learning_rate": 1e-06, + "loss": 0.5724, + "num_input_tokens_seen": 228404620, + "step": 4078 + }, + { + "epoch": 9.082405345211582, + "loss": 0.5832569003105164, + "loss_ce": 0.00012698877253569663, + "loss_iou": 0.251953125, + "loss_num": 0.0157470703125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 228404620, + "step": 4078 + }, + { + "epoch": 9.084632516703786, + "grad_norm": 21.80425262451172, + "learning_rate": 1e-06, + "loss": 0.4839, + "num_input_tokens_seen": 228460136, + "step": 4079 + }, + { + "epoch": 9.084632516703786, + "loss": 0.3352894186973572, + "loss_ce": 0.00014540574920829386, + "loss_iou": 0.1376953125, + "loss_num": 0.011962890625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 228460136, + "step": 4079 + }, + { + "epoch": 9.086859688195991, + "grad_norm": 20.531923294067383, + "learning_rate": 1e-06, + "loss": 0.491, + "num_input_tokens_seen": 228517568, + "step": 4080 + }, + { + "epoch": 9.086859688195991, + "loss": 0.6657657623291016, + "loss_ce": 0.00014688099327031523, + "loss_iou": 0.2734375, + "loss_num": 0.0234375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 228517568, + "step": 4080 + }, + { + "epoch": 9.089086859688196, + "grad_norm": 21.13003921508789, + "learning_rate": 1e-06, + "loss": 0.6183, + "num_input_tokens_seen": 228571092, + "step": 4081 + }, + { + "epoch": 9.089086859688196, + "loss": 0.7186765670776367, + "loss_ce": 0.0001707267656456679, + "loss_iou": 0.314453125, + "loss_num": 0.0181884765625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 228571092, + "step": 4081 + }, + { + "epoch": 9.091314031180401, + "grad_norm": 17.36870002746582, + "learning_rate": 1e-06, + "loss": 0.7185, + "num_input_tokens_seen": 228626888, + "step": 4082 + }, + { + "epoch": 9.091314031180401, + "loss": 0.9348142147064209, + "loss_ce": 0.000365984917152673, + "loss_iou": 0.384765625, + "loss_num": 0.032470703125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 228626888, + "step": 4082 + }, + { + "epoch": 9.093541202672606, + "grad_norm": 16.720142364501953, + "learning_rate": 1e-06, + "loss": 0.6332, + "num_input_tokens_seen": 228684328, + "step": 4083 + }, + { + "epoch": 9.093541202672606, + "loss": 0.5341998934745789, + "loss_ce": 0.0001422719651600346, + "loss_iou": 0.2265625, + "loss_num": 0.01611328125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 228684328, + "step": 4083 + }, + { + "epoch": 9.09576837416481, + "grad_norm": 21.377214431762695, + "learning_rate": 1e-06, + "loss": 0.6319, + "num_input_tokens_seen": 228741784, + "step": 4084 + }, + { + "epoch": 9.09576837416481, + "loss": 0.5814862251281738, + "loss_ce": 0.0011639189906418324, + "loss_iou": 0.2431640625, + "loss_num": 0.0189208984375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 228741784, + "step": 4084 + }, + { + "epoch": 9.097995545657016, + "grad_norm": 18.07195472717285, + "learning_rate": 1e-06, + "loss": 0.7059, + "num_input_tokens_seen": 228797852, + "step": 4085 + }, + { + "epoch": 9.097995545657016, + "loss": 0.8597797155380249, + "loss_ce": 0.00016060096095316112, + "loss_iou": 0.3828125, + "loss_num": 0.0189208984375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 228797852, + "step": 4085 + }, + { + "epoch": 9.10022271714922, + "grad_norm": 15.08352279663086, + "learning_rate": 1e-06, + "loss": 0.5056, + "num_input_tokens_seen": 228852356, + "step": 4086 + }, + { + "epoch": 9.10022271714922, + "loss": 0.5499671697616577, + "loss_ce": 0.00016245490405708551, + "loss_iou": 0.25390625, + "loss_num": 0.00830078125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 228852356, + "step": 4086 + }, + { + "epoch": 9.102449888641425, + "grad_norm": 23.534421920776367, + "learning_rate": 1e-06, + "loss": 0.5026, + "num_input_tokens_seen": 228904152, + "step": 4087 + }, + { + "epoch": 9.102449888641425, + "loss": 0.4977412819862366, + "loss_ce": 0.00018270721193403006, + "loss_iou": 0.2080078125, + "loss_num": 0.016357421875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 228904152, + "step": 4087 + }, + { + "epoch": 9.10467706013363, + "grad_norm": 19.300045013427734, + "learning_rate": 1e-06, + "loss": 0.9714, + "num_input_tokens_seen": 228957696, + "step": 4088 + }, + { + "epoch": 9.10467706013363, + "loss": 0.8054395914077759, + "loss_ce": 0.00014173405361361802, + "loss_iou": 0.32421875, + "loss_num": 0.03173828125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 228957696, + "step": 4088 + }, + { + "epoch": 9.106904231625835, + "grad_norm": 20.3017578125, + "learning_rate": 1e-06, + "loss": 0.3923, + "num_input_tokens_seen": 229012032, + "step": 4089 + }, + { + "epoch": 9.106904231625835, + "loss": 0.40640291571617126, + "loss_ce": 0.00015291740419343114, + "loss_iou": 0.1865234375, + "loss_num": 0.006866455078125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 229012032, + "step": 4089 + }, + { + "epoch": 9.10913140311804, + "grad_norm": 20.25347137451172, + "learning_rate": 1e-06, + "loss": 0.5511, + "num_input_tokens_seen": 229067576, + "step": 4090 + }, + { + "epoch": 9.10913140311804, + "loss": 0.5202056169509888, + "loss_ce": 0.00012503171456046402, + "loss_iou": 0.2177734375, + "loss_num": 0.0169677734375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 229067576, + "step": 4090 + }, + { + "epoch": 9.111358574610245, + "grad_norm": 19.562305450439453, + "learning_rate": 1e-06, + "loss": 0.6281, + "num_input_tokens_seen": 229127092, + "step": 4091 + }, + { + "epoch": 9.111358574610245, + "loss": 0.7418767213821411, + "loss_ce": 0.00017745466902852058, + "loss_iou": 0.298828125, + "loss_num": 0.02880859375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 229127092, + "step": 4091 + }, + { + "epoch": 9.11358574610245, + "grad_norm": 19.772968292236328, + "learning_rate": 1e-06, + "loss": 0.5598, + "num_input_tokens_seen": 229181696, + "step": 4092 + }, + { + "epoch": 9.11358574610245, + "loss": 0.6595751047134399, + "loss_ce": 0.00015126551443245262, + "loss_iou": 0.2890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 229181696, + "step": 4092 + }, + { + "epoch": 9.115812917594655, + "grad_norm": 21.81011962890625, + "learning_rate": 1e-06, + "loss": 0.6088, + "num_input_tokens_seen": 229235572, + "step": 4093 + }, + { + "epoch": 9.115812917594655, + "loss": 0.6007593870162964, + "loss_ce": 0.00017348321853205562, + "loss_iou": 0.265625, + "loss_num": 0.0135498046875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 229235572, + "step": 4093 + }, + { + "epoch": 9.11804008908686, + "grad_norm": 19.07417106628418, + "learning_rate": 1e-06, + "loss": 0.4561, + "num_input_tokens_seen": 229291708, + "step": 4094 + }, + { + "epoch": 9.11804008908686, + "loss": 0.5785167217254639, + "loss_ce": 0.00020860986842308193, + "loss_iou": 0.236328125, + "loss_num": 0.02099609375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 229291708, + "step": 4094 + }, + { + "epoch": 9.120267260579064, + "grad_norm": 27.573549270629883, + "learning_rate": 1e-06, + "loss": 0.6232, + "num_input_tokens_seen": 229349472, + "step": 4095 + }, + { + "epoch": 9.120267260579064, + "loss": 0.7934918403625488, + "loss_ce": 0.0002789198188111186, + "loss_iou": 0.322265625, + "loss_num": 0.0296630859375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 229349472, + "step": 4095 + }, + { + "epoch": 9.122494432071269, + "grad_norm": 13.100459098815918, + "learning_rate": 1e-06, + "loss": 0.7147, + "num_input_tokens_seen": 229405976, + "step": 4096 + }, + { + "epoch": 9.122494432071269, + "loss": 0.48514890670776367, + "loss_ce": 0.00016353695536963642, + "loss_iou": 0.22265625, + "loss_num": 0.008056640625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 229405976, + "step": 4096 + }, + { + "epoch": 9.124721603563474, + "grad_norm": 25.375526428222656, + "learning_rate": 1e-06, + "loss": 0.6361, + "num_input_tokens_seen": 229459716, + "step": 4097 + }, + { + "epoch": 9.124721603563474, + "loss": 0.4884394705295563, + "loss_ce": 0.00015823112335056067, + "loss_iou": 0.216796875, + "loss_num": 0.01080322265625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 229459716, + "step": 4097 + }, + { + "epoch": 9.126948775055679, + "grad_norm": 18.099225997924805, + "learning_rate": 1e-06, + "loss": 0.6709, + "num_input_tokens_seen": 229516212, + "step": 4098 + }, + { + "epoch": 9.126948775055679, + "loss": 0.5384310483932495, + "loss_ce": 0.0007113110623322427, + "loss_iou": 0.2353515625, + "loss_num": 0.01348876953125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 229516212, + "step": 4098 + }, + { + "epoch": 9.129175946547884, + "grad_norm": 22.159957885742188, + "learning_rate": 1e-06, + "loss": 0.5206, + "num_input_tokens_seen": 229570184, + "step": 4099 + }, + { + "epoch": 9.129175946547884, + "loss": 0.638085126876831, + "loss_ce": 0.00014572580403182656, + "loss_iou": 0.26171875, + "loss_num": 0.0234375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 229570184, + "step": 4099 + }, + { + "epoch": 9.131403118040089, + "grad_norm": 19.935977935791016, + "learning_rate": 1e-06, + "loss": 0.5385, + "num_input_tokens_seen": 229624652, + "step": 4100 + }, + { + "epoch": 9.131403118040089, + "loss": 0.6349242925643921, + "loss_ce": 0.00015866165631450713, + "loss_iou": 0.28125, + "loss_num": 0.01446533203125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 229624652, + "step": 4100 + }, + { + "epoch": 9.133630289532293, + "grad_norm": 17.789283752441406, + "learning_rate": 1e-06, + "loss": 0.5755, + "num_input_tokens_seen": 229681300, + "step": 4101 + }, + { + "epoch": 9.133630289532293, + "loss": 0.5260428190231323, + "loss_ce": 0.00016388525546062738, + "loss_iou": 0.232421875, + "loss_num": 0.01239013671875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 229681300, + "step": 4101 + }, + { + "epoch": 9.135857461024498, + "grad_norm": 29.85516357421875, + "learning_rate": 1e-06, + "loss": 0.5757, + "num_input_tokens_seen": 229735816, + "step": 4102 + }, + { + "epoch": 9.135857461024498, + "loss": 0.6520152688026428, + "loss_ce": 0.00015980206080712378, + "loss_iou": 0.28125, + "loss_num": 0.0179443359375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 229735816, + "step": 4102 + }, + { + "epoch": 9.138084632516703, + "grad_norm": 17.78467559814453, + "learning_rate": 1e-06, + "loss": 0.4811, + "num_input_tokens_seen": 229789440, + "step": 4103 + }, + { + "epoch": 9.138084632516703, + "loss": 0.5187010169029236, + "loss_ce": 0.00014634460967499763, + "loss_iou": 0.2041015625, + "loss_num": 0.02197265625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 229789440, + "step": 4103 + }, + { + "epoch": 9.140311804008908, + "grad_norm": 13.813704490661621, + "learning_rate": 1e-06, + "loss": 0.5981, + "num_input_tokens_seen": 229845256, + "step": 4104 + }, + { + "epoch": 9.140311804008908, + "loss": 0.6361100673675537, + "loss_ce": 0.000245812872890383, + "loss_iou": 0.25390625, + "loss_num": 0.0252685546875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 229845256, + "step": 4104 + }, + { + "epoch": 9.142538975501113, + "grad_norm": 17.91861343383789, + "learning_rate": 1e-06, + "loss": 0.7475, + "num_input_tokens_seen": 229902832, + "step": 4105 + }, + { + "epoch": 9.142538975501113, + "loss": 0.8098193407058716, + "loss_ce": 0.0002490263432264328, + "loss_iou": 0.369140625, + "loss_num": 0.0142822265625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 229902832, + "step": 4105 + }, + { + "epoch": 9.144766146993318, + "grad_norm": 34.62513732910156, + "learning_rate": 1e-06, + "loss": 0.5171, + "num_input_tokens_seen": 229959432, + "step": 4106 + }, + { + "epoch": 9.144766146993318, + "loss": 0.6218581199645996, + "loss_ce": 0.00015404779696837068, + "loss_iou": 0.265625, + "loss_num": 0.017822265625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 229959432, + "step": 4106 + }, + { + "epoch": 9.146993318485523, + "grad_norm": 16.90599822998047, + "learning_rate": 1e-06, + "loss": 0.6897, + "num_input_tokens_seen": 230015140, + "step": 4107 + }, + { + "epoch": 9.146993318485523, + "loss": 0.9310408234596252, + "loss_ce": 0.00013258628314360976, + "loss_iou": 0.400390625, + "loss_num": 0.0262451171875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 230015140, + "step": 4107 + }, + { + "epoch": 9.14922048997773, + "grad_norm": 18.637083053588867, + "learning_rate": 1e-06, + "loss": 0.5436, + "num_input_tokens_seen": 230074668, + "step": 4108 + }, + { + "epoch": 9.14922048997773, + "loss": 0.5741239786148071, + "loss_ce": 0.0001493622548878193, + "loss_iou": 0.26171875, + "loss_num": 0.0101318359375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 230074668, + "step": 4108 + }, + { + "epoch": 9.151447661469934, + "grad_norm": 26.293113708496094, + "learning_rate": 1e-06, + "loss": 0.5738, + "num_input_tokens_seen": 230131008, + "step": 4109 + }, + { + "epoch": 9.151447661469934, + "loss": 0.5387140512466431, + "loss_ce": 0.00013981794472783804, + "loss_iou": 0.220703125, + "loss_num": 0.0196533203125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 230131008, + "step": 4109 + }, + { + "epoch": 9.153674832962139, + "grad_norm": 99.2131576538086, + "learning_rate": 1e-06, + "loss": 0.6848, + "num_input_tokens_seen": 230186684, + "step": 4110 + }, + { + "epoch": 9.153674832962139, + "loss": 0.8059197068214417, + "loss_ce": 0.00025563541566953063, + "loss_iou": 0.35546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 230186684, + "step": 4110 + }, + { + "epoch": 9.155902004454344, + "grad_norm": 11.691954612731934, + "learning_rate": 1e-06, + "loss": 0.5424, + "num_input_tokens_seen": 230244428, + "step": 4111 + }, + { + "epoch": 9.155902004454344, + "loss": 0.5812520980834961, + "loss_ce": 0.00019743737357202917, + "loss_iou": 0.22265625, + "loss_num": 0.0269775390625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 230244428, + "step": 4111 + }, + { + "epoch": 9.158129175946549, + "grad_norm": 23.4112606048584, + "learning_rate": 1e-06, + "loss": 0.5201, + "num_input_tokens_seen": 230298452, + "step": 4112 + }, + { + "epoch": 9.158129175946549, + "loss": 0.6187844276428223, + "loss_ce": 0.00013208799646236002, + "loss_iou": 0.2490234375, + "loss_num": 0.024169921875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 230298452, + "step": 4112 + }, + { + "epoch": 9.160356347438753, + "grad_norm": 24.178136825561523, + "learning_rate": 1e-06, + "loss": 0.4805, + "num_input_tokens_seen": 230352684, + "step": 4113 + }, + { + "epoch": 9.160356347438753, + "loss": 0.5137137174606323, + "loss_ce": 0.00016394033445976675, + "loss_iou": 0.2451171875, + "loss_num": 0.0045166015625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 230352684, + "step": 4113 + }, + { + "epoch": 9.162583518930958, + "grad_norm": 17.317472457885742, + "learning_rate": 1e-06, + "loss": 0.5563, + "num_input_tokens_seen": 230408152, + "step": 4114 + }, + { + "epoch": 9.162583518930958, + "loss": 0.6397294998168945, + "loss_ce": 0.0001421074557583779, + "loss_iou": 0.26953125, + "loss_num": 0.0198974609375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 230408152, + "step": 4114 + }, + { + "epoch": 9.164810690423163, + "grad_norm": 18.378246307373047, + "learning_rate": 1e-06, + "loss": 0.57, + "num_input_tokens_seen": 230464552, + "step": 4115 + }, + { + "epoch": 9.164810690423163, + "loss": 0.5939757823944092, + "loss_ce": 0.00016474167932756245, + "loss_iou": 0.212890625, + "loss_num": 0.03369140625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 230464552, + "step": 4115 + }, + { + "epoch": 9.167037861915368, + "grad_norm": 13.769119262695312, + "learning_rate": 1e-06, + "loss": 0.6041, + "num_input_tokens_seen": 230520760, + "step": 4116 + }, + { + "epoch": 9.167037861915368, + "loss": 0.7275907397270203, + "loss_ce": 0.0001737515558488667, + "loss_iou": 0.306640625, + "loss_num": 0.0224609375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 230520760, + "step": 4116 + }, + { + "epoch": 9.169265033407573, + "grad_norm": 16.299108505249023, + "learning_rate": 1e-06, + "loss": 0.6447, + "num_input_tokens_seen": 230577540, + "step": 4117 + }, + { + "epoch": 9.169265033407573, + "loss": 0.7198707461357117, + "loss_ce": 0.0002662655897438526, + "loss_iou": 0.29296875, + "loss_num": 0.0269775390625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 230577540, + "step": 4117 + }, + { + "epoch": 9.171492204899778, + "grad_norm": 20.457033157348633, + "learning_rate": 1e-06, + "loss": 0.7404, + "num_input_tokens_seen": 230632328, + "step": 4118 + }, + { + "epoch": 9.171492204899778, + "loss": 0.7303974032402039, + "loss_ce": 0.0001728244824334979, + "loss_iou": 0.3125, + "loss_num": 0.021240234375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 230632328, + "step": 4118 + }, + { + "epoch": 9.173719376391983, + "grad_norm": 17.638404846191406, + "learning_rate": 1e-06, + "loss": 0.5783, + "num_input_tokens_seen": 230687228, + "step": 4119 + }, + { + "epoch": 9.173719376391983, + "loss": 0.46454915404319763, + "loss_ce": 0.00019369515939615667, + "loss_iou": 0.177734375, + "loss_num": 0.021728515625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 230687228, + "step": 4119 + }, + { + "epoch": 9.175946547884188, + "grad_norm": 18.509496688842773, + "learning_rate": 1e-06, + "loss": 0.6066, + "num_input_tokens_seen": 230743560, + "step": 4120 + }, + { + "epoch": 9.175946547884188, + "loss": 0.7997314929962158, + "loss_ce": 0.00017094583017751575, + "loss_iou": 0.33984375, + "loss_num": 0.0234375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 230743560, + "step": 4120 + }, + { + "epoch": 9.178173719376392, + "grad_norm": 16.593032836914062, + "learning_rate": 1e-06, + "loss": 0.6728, + "num_input_tokens_seen": 230800968, + "step": 4121 + }, + { + "epoch": 9.178173719376392, + "loss": 0.949892520904541, + "loss_ce": 0.00018545052444096655, + "loss_iou": 0.37890625, + "loss_num": 0.0380859375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 230800968, + "step": 4121 + }, + { + "epoch": 9.180400890868597, + "grad_norm": 18.3552303314209, + "learning_rate": 1e-06, + "loss": 0.6935, + "num_input_tokens_seen": 230857948, + "step": 4122 + }, + { + "epoch": 9.180400890868597, + "loss": 0.7443253993988037, + "loss_ce": 0.00018475553952157497, + "loss_iou": 0.3125, + "loss_num": 0.02392578125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 230857948, + "step": 4122 + }, + { + "epoch": 9.182628062360802, + "grad_norm": 17.54131317138672, + "learning_rate": 1e-06, + "loss": 0.4531, + "num_input_tokens_seen": 230912532, + "step": 4123 + }, + { + "epoch": 9.182628062360802, + "loss": 0.43069642782211304, + "loss_ce": 0.0001544352126074955, + "loss_iou": 0.1796875, + "loss_num": 0.01409912109375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 230912532, + "step": 4123 + }, + { + "epoch": 9.184855233853007, + "grad_norm": 15.92164421081543, + "learning_rate": 1e-06, + "loss": 0.416, + "num_input_tokens_seen": 230967364, + "step": 4124 + }, + { + "epoch": 9.184855233853007, + "loss": 0.37866830825805664, + "loss_ce": 0.00012826549937017262, + "loss_iou": 0.158203125, + "loss_num": 0.0123291015625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 230967364, + "step": 4124 + }, + { + "epoch": 9.187082405345212, + "grad_norm": 14.523744583129883, + "learning_rate": 1e-06, + "loss": 0.5998, + "num_input_tokens_seen": 231023780, + "step": 4125 + }, + { + "epoch": 9.187082405345212, + "loss": 0.4754905700683594, + "loss_ce": 0.00014873658074066043, + "loss_iou": 0.2177734375, + "loss_num": 0.00799560546875, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 231023780, + "step": 4125 + }, + { + "epoch": 9.189309576837417, + "grad_norm": 41.82172393798828, + "learning_rate": 1e-06, + "loss": 0.6878, + "num_input_tokens_seen": 231081652, + "step": 4126 + }, + { + "epoch": 9.189309576837417, + "loss": 0.8278828859329224, + "loss_ce": 0.0002461716067045927, + "loss_iou": 0.33984375, + "loss_num": 0.029296875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 231081652, + "step": 4126 + }, + { + "epoch": 9.191536748329622, + "grad_norm": 13.996521949768066, + "learning_rate": 1e-06, + "loss": 0.627, + "num_input_tokens_seen": 231137804, + "step": 4127 + }, + { + "epoch": 9.191536748329622, + "loss": 0.6197777390480042, + "loss_ce": 0.0001488196139689535, + "loss_iou": 0.2470703125, + "loss_num": 0.0250244140625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 231137804, + "step": 4127 + }, + { + "epoch": 9.193763919821826, + "grad_norm": 23.2529239654541, + "learning_rate": 1e-06, + "loss": 0.5141, + "num_input_tokens_seen": 231192268, + "step": 4128 + }, + { + "epoch": 9.193763919821826, + "loss": 0.5340644717216492, + "loss_ce": 0.00015944175538606942, + "loss_iou": 0.236328125, + "loss_num": 0.0120849609375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 231192268, + "step": 4128 + }, + { + "epoch": 9.195991091314031, + "grad_norm": 16.674602508544922, + "learning_rate": 1e-06, + "loss": 0.5281, + "num_input_tokens_seen": 231246748, + "step": 4129 + }, + { + "epoch": 9.195991091314031, + "loss": 0.5064660310745239, + "loss_ce": 0.00011834965698653832, + "loss_iou": 0.2197265625, + "loss_num": 0.0133056640625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 231246748, + "step": 4129 + }, + { + "epoch": 9.198218262806236, + "grad_norm": 18.663047790527344, + "learning_rate": 1e-06, + "loss": 0.6424, + "num_input_tokens_seen": 231302740, + "step": 4130 + }, + { + "epoch": 9.198218262806236, + "loss": 0.5936636924743652, + "loss_ce": 0.0001578406518092379, + "loss_iou": 0.2392578125, + "loss_num": 0.02294921875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 231302740, + "step": 4130 + }, + { + "epoch": 9.200445434298441, + "grad_norm": 39.11851119995117, + "learning_rate": 1e-06, + "loss": 0.4673, + "num_input_tokens_seen": 231356712, + "step": 4131 + }, + { + "epoch": 9.200445434298441, + "loss": 0.3150908052921295, + "loss_ce": 0.0001494048337917775, + "loss_iou": 0.126953125, + "loss_num": 0.0123291015625, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 231356712, + "step": 4131 + }, + { + "epoch": 9.202672605790646, + "grad_norm": 16.948453903198242, + "learning_rate": 1e-06, + "loss": 0.537, + "num_input_tokens_seen": 231411692, + "step": 4132 + }, + { + "epoch": 9.202672605790646, + "loss": 0.3508445620536804, + "loss_ce": 0.00013653644418809563, + "loss_iou": 0.1572265625, + "loss_num": 0.00726318359375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 231411692, + "step": 4132 + }, + { + "epoch": 9.20489977728285, + "grad_norm": 18.197696685791016, + "learning_rate": 1e-06, + "loss": 0.5971, + "num_input_tokens_seen": 231468140, + "step": 4133 + }, + { + "epoch": 9.20489977728285, + "loss": 0.47916704416275024, + "loss_ce": 0.0001631466147955507, + "loss_iou": 0.203125, + "loss_num": 0.01470947265625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 231468140, + "step": 4133 + }, + { + "epoch": 9.207126948775056, + "grad_norm": 18.140928268432617, + "learning_rate": 1e-06, + "loss": 0.4389, + "num_input_tokens_seen": 231524076, + "step": 4134 + }, + { + "epoch": 9.207126948775056, + "loss": 0.484072208404541, + "loss_ce": 0.0001854776928666979, + "loss_iou": 0.2021484375, + "loss_num": 0.0159912109375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 231524076, + "step": 4134 + }, + { + "epoch": 9.20935412026726, + "grad_norm": 31.31038475036621, + "learning_rate": 1e-06, + "loss": 0.5013, + "num_input_tokens_seen": 231580384, + "step": 4135 + }, + { + "epoch": 9.20935412026726, + "loss": 0.5428678393363953, + "loss_ce": 0.00014321647176984698, + "loss_iou": 0.23046875, + "loss_num": 0.016357421875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 231580384, + "step": 4135 + }, + { + "epoch": 9.211581291759465, + "grad_norm": 18.321279525756836, + "learning_rate": 1e-06, + "loss": 0.482, + "num_input_tokens_seen": 231635920, + "step": 4136 + }, + { + "epoch": 9.211581291759465, + "loss": 0.5265001058578491, + "loss_ce": 0.0001329151273239404, + "loss_iou": 0.2314453125, + "loss_num": 0.01263427734375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 231635920, + "step": 4136 + }, + { + "epoch": 9.21380846325167, + "grad_norm": 15.607020378112793, + "learning_rate": 1e-06, + "loss": 0.7133, + "num_input_tokens_seen": 231692860, + "step": 4137 + }, + { + "epoch": 9.21380846325167, + "loss": 0.6896048784255981, + "loss_ce": 0.00015171918494161218, + "loss_iou": 0.298828125, + "loss_num": 0.0181884765625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 231692860, + "step": 4137 + }, + { + "epoch": 9.216035634743875, + "grad_norm": 20.895606994628906, + "learning_rate": 1e-06, + "loss": 0.5896, + "num_input_tokens_seen": 231752072, + "step": 4138 + }, + { + "epoch": 9.216035634743875, + "loss": 0.5861691236495972, + "loss_ce": 0.00017053935152944177, + "loss_iou": 0.2431640625, + "loss_num": 0.0198974609375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 231752072, + "step": 4138 + }, + { + "epoch": 9.21826280623608, + "grad_norm": 21.63951873779297, + "learning_rate": 1e-06, + "loss": 0.5425, + "num_input_tokens_seen": 231806848, + "step": 4139 + }, + { + "epoch": 9.21826280623608, + "loss": 0.6595204472541809, + "loss_ce": 0.0004628373426385224, + "loss_iou": 0.296875, + "loss_num": 0.0133056640625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 231806848, + "step": 4139 + }, + { + "epoch": 9.220489977728285, + "grad_norm": 17.077104568481445, + "learning_rate": 1e-06, + "loss": 0.6683, + "num_input_tokens_seen": 231862472, + "step": 4140 + }, + { + "epoch": 9.220489977728285, + "loss": 0.7157517075538635, + "loss_ce": 0.00017556847888045013, + "loss_iou": 0.30078125, + "loss_num": 0.0228271484375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 231862472, + "step": 4140 + }, + { + "epoch": 9.22271714922049, + "grad_norm": 15.992926597595215, + "learning_rate": 1e-06, + "loss": 0.5696, + "num_input_tokens_seen": 231916984, + "step": 4141 + }, + { + "epoch": 9.22271714922049, + "loss": 0.48255985975265503, + "loss_ce": 0.00013797251449432224, + "loss_iou": 0.2138671875, + "loss_num": 0.01080322265625, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 231916984, + "step": 4141 + }, + { + "epoch": 9.224944320712694, + "grad_norm": 17.786970138549805, + "learning_rate": 1e-06, + "loss": 0.6369, + "num_input_tokens_seen": 231971832, + "step": 4142 + }, + { + "epoch": 9.224944320712694, + "loss": 0.581957221031189, + "loss_ce": 0.00017014719196595252, + "loss_iou": 0.267578125, + "loss_num": 0.009521484375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 231971832, + "step": 4142 + }, + { + "epoch": 9.2271714922049, + "grad_norm": 17.536426544189453, + "learning_rate": 1e-06, + "loss": 0.7411, + "num_input_tokens_seen": 232029912, + "step": 4143 + }, + { + "epoch": 9.2271714922049, + "loss": 0.6547312140464783, + "loss_ce": 0.00019021191110368818, + "loss_iou": 0.28515625, + "loss_num": 0.01708984375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 232029912, + "step": 4143 + }, + { + "epoch": 9.229398663697104, + "grad_norm": 17.012691497802734, + "learning_rate": 1e-06, + "loss": 0.5203, + "num_input_tokens_seen": 232089236, + "step": 4144 + }, + { + "epoch": 9.229398663697104, + "loss": 0.5632002353668213, + "loss_ce": 0.00021197632304392755, + "loss_iou": 0.2451171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 232089236, + "step": 4144 + }, + { + "epoch": 9.231625835189309, + "grad_norm": 17.848848342895508, + "learning_rate": 1e-06, + "loss": 0.4004, + "num_input_tokens_seen": 232144528, + "step": 4145 + }, + { + "epoch": 9.231625835189309, + "loss": 0.3932184875011444, + "loss_ce": 0.0002741398348007351, + "loss_iou": 0.169921875, + "loss_num": 0.0106201171875, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 232144528, + "step": 4145 + }, + { + "epoch": 9.233853006681514, + "grad_norm": 14.590682983398438, + "learning_rate": 1e-06, + "loss": 0.476, + "num_input_tokens_seen": 232202272, + "step": 4146 + }, + { + "epoch": 9.233853006681514, + "loss": 0.40485870838165283, + "loss_ce": 0.00013458858302328736, + "loss_iou": 0.1728515625, + "loss_num": 0.0118408203125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 232202272, + "step": 4146 + }, + { + "epoch": 9.236080178173719, + "grad_norm": 22.08498764038086, + "learning_rate": 1e-06, + "loss": 0.5566, + "num_input_tokens_seen": 232257404, + "step": 4147 + }, + { + "epoch": 9.236080178173719, + "loss": 0.687615156173706, + "loss_ce": 0.0002372571761952713, + "loss_iou": 0.27734375, + "loss_num": 0.0263671875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 232257404, + "step": 4147 + }, + { + "epoch": 9.238307349665924, + "grad_norm": 33.60602569580078, + "learning_rate": 1e-06, + "loss": 0.5149, + "num_input_tokens_seen": 232313104, + "step": 4148 + }, + { + "epoch": 9.238307349665924, + "loss": 0.46257176995277405, + "loss_ce": 0.00016943998343776911, + "loss_iou": 0.208984375, + "loss_num": 0.0087890625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 232313104, + "step": 4148 + }, + { + "epoch": 9.240534521158128, + "grad_norm": 20.825382232666016, + "learning_rate": 1e-06, + "loss": 0.6308, + "num_input_tokens_seen": 232368704, + "step": 4149 + }, + { + "epoch": 9.240534521158128, + "loss": 0.7248323559761047, + "loss_ce": 0.00022297201212495565, + "loss_iou": 0.291015625, + "loss_num": 0.0283203125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 232368704, + "step": 4149 + }, + { + "epoch": 9.242761692650333, + "grad_norm": 16.274980545043945, + "learning_rate": 1e-06, + "loss": 0.611, + "num_input_tokens_seen": 232423564, + "step": 4150 + }, + { + "epoch": 9.242761692650333, + "loss": 0.8920093178749084, + "loss_ce": 0.0001636209199205041, + "loss_iou": 0.337890625, + "loss_num": 0.04296875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 232423564, + "step": 4150 + }, + { + "epoch": 9.244988864142538, + "grad_norm": 17.573843002319336, + "learning_rate": 1e-06, + "loss": 0.4551, + "num_input_tokens_seen": 232480304, + "step": 4151 + }, + { + "epoch": 9.244988864142538, + "loss": 0.4830518662929535, + "loss_ce": 0.0001416985469404608, + "loss_iou": 0.2060546875, + "loss_num": 0.01409912109375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 232480304, + "step": 4151 + }, + { + "epoch": 9.247216035634743, + "grad_norm": 20.984481811523438, + "learning_rate": 1e-06, + "loss": 0.5364, + "num_input_tokens_seen": 232536816, + "step": 4152 + }, + { + "epoch": 9.247216035634743, + "loss": 0.46505963802337646, + "loss_ce": 0.00015485798940062523, + "loss_iou": 0.1982421875, + "loss_num": 0.01373291015625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 232536816, + "step": 4152 + }, + { + "epoch": 9.249443207126948, + "grad_norm": 16.171337127685547, + "learning_rate": 1e-06, + "loss": 0.4752, + "num_input_tokens_seen": 232592992, + "step": 4153 + }, + { + "epoch": 9.249443207126948, + "loss": 0.5165247917175293, + "loss_ce": 0.0004115123301744461, + "loss_iou": 0.220703125, + "loss_num": 0.01519775390625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 232592992, + "step": 4153 + }, + { + "epoch": 9.251670378619155, + "grad_norm": 21.20688247680664, + "learning_rate": 1e-06, + "loss": 0.5654, + "num_input_tokens_seen": 232649224, + "step": 4154 + }, + { + "epoch": 9.251670378619155, + "loss": 0.4512087404727936, + "loss_ce": 0.00015892238297965378, + "loss_iou": 0.1865234375, + "loss_num": 0.01544189453125, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 232649224, + "step": 4154 + }, + { + "epoch": 9.25389755011136, + "grad_norm": 12.871101379394531, + "learning_rate": 1e-06, + "loss": 0.4954, + "num_input_tokens_seen": 232706068, + "step": 4155 + }, + { + "epoch": 9.25389755011136, + "loss": 0.5504859685897827, + "loss_ce": 0.00019304068700876087, + "loss_iou": 0.248046875, + "loss_num": 0.010986328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 232706068, + "step": 4155 + }, + { + "epoch": 9.256124721603564, + "grad_norm": 21.039005279541016, + "learning_rate": 1e-06, + "loss": 0.6367, + "num_input_tokens_seen": 232759908, + "step": 4156 + }, + { + "epoch": 9.256124721603564, + "loss": 0.6510411500930786, + "loss_ce": 0.00016224203864112496, + "loss_iou": 0.2890625, + "loss_num": 0.014404296875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 232759908, + "step": 4156 + }, + { + "epoch": 9.25835189309577, + "grad_norm": 17.5001163482666, + "learning_rate": 1e-06, + "loss": 0.5683, + "num_input_tokens_seen": 232815104, + "step": 4157 + }, + { + "epoch": 9.25835189309577, + "loss": 0.57974773645401, + "loss_ce": 0.00015791512851137668, + "loss_iou": 0.23046875, + "loss_num": 0.0238037109375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 232815104, + "step": 4157 + }, + { + "epoch": 9.260579064587974, + "grad_norm": 13.760968208312988, + "learning_rate": 1e-06, + "loss": 0.49, + "num_input_tokens_seen": 232870704, + "step": 4158 + }, + { + "epoch": 9.260579064587974, + "loss": 0.553850531578064, + "loss_ce": 0.00013953927555121481, + "loss_iou": 0.2392578125, + "loss_num": 0.01495361328125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 232870704, + "step": 4158 + }, + { + "epoch": 9.262806236080179, + "grad_norm": 20.684478759765625, + "learning_rate": 1e-06, + "loss": 0.578, + "num_input_tokens_seen": 232925348, + "step": 4159 + }, + { + "epoch": 9.262806236080179, + "loss": 0.4780513644218445, + "loss_ce": 0.000146102363942191, + "loss_iou": 0.212890625, + "loss_num": 0.010498046875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 232925348, + "step": 4159 + }, + { + "epoch": 9.265033407572384, + "grad_norm": 13.17013931274414, + "learning_rate": 1e-06, + "loss": 0.6326, + "num_input_tokens_seen": 232982220, + "step": 4160 + }, + { + "epoch": 9.265033407572384, + "loss": 0.7166314125061035, + "loss_ce": 0.00020072060578968376, + "loss_iou": 0.302734375, + "loss_num": 0.0224609375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 232982220, + "step": 4160 + }, + { + "epoch": 9.267260579064589, + "grad_norm": 21.762144088745117, + "learning_rate": 1e-06, + "loss": 0.6647, + "num_input_tokens_seen": 233039268, + "step": 4161 + }, + { + "epoch": 9.267260579064589, + "loss": 0.6170930862426758, + "loss_ce": 0.00014968152390792966, + "loss_iou": 0.2451171875, + "loss_num": 0.025390625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 233039268, + "step": 4161 + }, + { + "epoch": 9.269487750556793, + "grad_norm": 19.671171188354492, + "learning_rate": 1e-06, + "loss": 0.4354, + "num_input_tokens_seen": 233095824, + "step": 4162 + }, + { + "epoch": 9.269487750556793, + "loss": 0.41189324855804443, + "loss_ce": 0.00015009564231149852, + "loss_iou": 0.1845703125, + "loss_num": 0.0087890625, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 233095824, + "step": 4162 + }, + { + "epoch": 9.271714922048998, + "grad_norm": 19.537986755371094, + "learning_rate": 1e-06, + "loss": 0.5711, + "num_input_tokens_seen": 233152836, + "step": 4163 + }, + { + "epoch": 9.271714922048998, + "loss": 0.654711902141571, + "loss_ce": 0.00017089179891627282, + "loss_iou": 0.298828125, + "loss_num": 0.01141357421875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 233152836, + "step": 4163 + }, + { + "epoch": 9.273942093541203, + "grad_norm": 15.77131462097168, + "learning_rate": 1e-06, + "loss": 0.566, + "num_input_tokens_seen": 233207496, + "step": 4164 + }, + { + "epoch": 9.273942093541203, + "loss": 0.5520115494728088, + "loss_ce": 0.00019268158939667046, + "loss_iou": 0.2353515625, + "loss_num": 0.01611328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 233207496, + "step": 4164 + }, + { + "epoch": 9.276169265033408, + "grad_norm": 13.012890815734863, + "learning_rate": 1e-06, + "loss": 0.5237, + "num_input_tokens_seen": 233264596, + "step": 4165 + }, + { + "epoch": 9.276169265033408, + "loss": 0.46829238533973694, + "loss_ce": 0.00015275325858965516, + "loss_iou": 0.1875, + "loss_num": 0.018798828125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 233264596, + "step": 4165 + }, + { + "epoch": 9.278396436525613, + "grad_norm": 24.705101013183594, + "learning_rate": 1e-06, + "loss": 0.6364, + "num_input_tokens_seen": 233321472, + "step": 4166 + }, + { + "epoch": 9.278396436525613, + "loss": 0.5150579214096069, + "loss_ce": 0.00016535192844457924, + "loss_iou": 0.228515625, + "loss_num": 0.0115966796875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 233321472, + "step": 4166 + }, + { + "epoch": 9.280623608017818, + "grad_norm": 15.74944019317627, + "learning_rate": 1e-06, + "loss": 0.5495, + "num_input_tokens_seen": 233377064, + "step": 4167 + }, + { + "epoch": 9.280623608017818, + "loss": 0.5138007402420044, + "loss_ce": 0.00022045343939680606, + "loss_iou": 0.23046875, + "loss_num": 0.01055908203125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 233377064, + "step": 4167 + }, + { + "epoch": 9.282850779510023, + "grad_norm": 25.632081985473633, + "learning_rate": 1e-06, + "loss": 0.5602, + "num_input_tokens_seen": 233432460, + "step": 4168 + }, + { + "epoch": 9.282850779510023, + "loss": 0.6908320784568787, + "loss_ce": 0.0001582444820087403, + "loss_iou": 0.26953125, + "loss_num": 0.0299072265625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 233432460, + "step": 4168 + }, + { + "epoch": 9.285077951002227, + "grad_norm": 21.129497528076172, + "learning_rate": 1e-06, + "loss": 0.6149, + "num_input_tokens_seen": 233489932, + "step": 4169 + }, + { + "epoch": 9.285077951002227, + "loss": 0.8304713368415833, + "loss_ce": 0.00014905552961863577, + "loss_iou": 0.337890625, + "loss_num": 0.0306396484375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 233489932, + "step": 4169 + }, + { + "epoch": 9.287305122494432, + "grad_norm": 18.864761352539062, + "learning_rate": 1e-06, + "loss": 0.6289, + "num_input_tokens_seen": 233546048, + "step": 4170 + }, + { + "epoch": 9.287305122494432, + "loss": 0.4682886600494385, + "loss_ce": 0.00014900718815624714, + "loss_iou": 0.2109375, + "loss_num": 0.00946044921875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 233546048, + "step": 4170 + }, + { + "epoch": 9.289532293986637, + "grad_norm": 20.65398597717285, + "learning_rate": 1e-06, + "loss": 0.5547, + "num_input_tokens_seen": 233601528, + "step": 4171 + }, + { + "epoch": 9.289532293986637, + "loss": 0.6244362592697144, + "loss_ce": 0.00016863204655237496, + "loss_iou": 0.2734375, + "loss_num": 0.01556396484375, + "loss_xval": 0.625, + "num_input_tokens_seen": 233601528, + "step": 4171 + }, + { + "epoch": 9.291759465478842, + "grad_norm": 23.496076583862305, + "learning_rate": 1e-06, + "loss": 0.5856, + "num_input_tokens_seen": 233657160, + "step": 4172 + }, + { + "epoch": 9.291759465478842, + "loss": 0.5482483506202698, + "loss_ce": 0.00015264737885445356, + "loss_iou": 0.1982421875, + "loss_num": 0.0303955078125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 233657160, + "step": 4172 + }, + { + "epoch": 9.293986636971047, + "grad_norm": 19.034543991088867, + "learning_rate": 1e-06, + "loss": 0.7883, + "num_input_tokens_seen": 233713276, + "step": 4173 + }, + { + "epoch": 9.293986636971047, + "loss": 0.7952208518981934, + "loss_ce": 0.00017690191452857107, + "loss_iou": 0.3203125, + "loss_num": 0.03076171875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 233713276, + "step": 4173 + }, + { + "epoch": 9.296213808463252, + "grad_norm": 17.01585578918457, + "learning_rate": 1e-06, + "loss": 0.3602, + "num_input_tokens_seen": 233769732, + "step": 4174 + }, + { + "epoch": 9.296213808463252, + "loss": 0.4717066287994385, + "loss_ce": 0.0001490233844378963, + "loss_iou": 0.20703125, + "loss_num": 0.01141357421875, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 233769732, + "step": 4174 + }, + { + "epoch": 9.298440979955457, + "grad_norm": 18.97732925415039, + "learning_rate": 1e-06, + "loss": 0.4866, + "num_input_tokens_seen": 233827660, + "step": 4175 + }, + { + "epoch": 9.298440979955457, + "loss": 0.5509434938430786, + "loss_ce": 0.0001622582640266046, + "loss_iou": 0.25, + "loss_num": 0.0098876953125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 233827660, + "step": 4175 + }, + { + "epoch": 9.300668151447661, + "grad_norm": 23.303890228271484, + "learning_rate": 1e-06, + "loss": 0.67, + "num_input_tokens_seen": 233884728, + "step": 4176 + }, + { + "epoch": 9.300668151447661, + "loss": 0.754581093788147, + "loss_ce": 0.00018660849309526384, + "loss_iou": 0.298828125, + "loss_num": 0.031494140625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 233884728, + "step": 4176 + }, + { + "epoch": 9.302895322939866, + "grad_norm": 15.087565422058105, + "learning_rate": 1e-06, + "loss": 0.5322, + "num_input_tokens_seen": 233942264, + "step": 4177 + }, + { + "epoch": 9.302895322939866, + "loss": 0.692180871963501, + "loss_ce": 0.00016421612235717475, + "loss_iou": 0.296875, + "loss_num": 0.019775390625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 233942264, + "step": 4177 + }, + { + "epoch": 9.305122494432071, + "grad_norm": 21.872156143188477, + "learning_rate": 1e-06, + "loss": 0.4854, + "num_input_tokens_seen": 233999788, + "step": 4178 + }, + { + "epoch": 9.305122494432071, + "loss": 0.37598371505737305, + "loss_ce": 0.00012923183385282755, + "loss_iou": 0.166015625, + "loss_num": 0.0089111328125, + "loss_xval": 0.375, + "num_input_tokens_seen": 233999788, + "step": 4178 + }, + { + "epoch": 9.307349665924276, + "grad_norm": 15.20783519744873, + "learning_rate": 1e-06, + "loss": 0.7171, + "num_input_tokens_seen": 234058404, + "step": 4179 + }, + { + "epoch": 9.307349665924276, + "loss": 0.7457845211029053, + "loss_ce": 0.00017908678273670375, + "loss_iou": 0.3046875, + "loss_num": 0.0274658203125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 234058404, + "step": 4179 + }, + { + "epoch": 9.309576837416481, + "grad_norm": 18.751060485839844, + "learning_rate": 1e-06, + "loss": 0.5907, + "num_input_tokens_seen": 234115236, + "step": 4180 + }, + { + "epoch": 9.309576837416481, + "loss": 0.5246922969818115, + "loss_ce": 0.00015614343283232301, + "loss_iou": 0.2294921875, + "loss_num": 0.013427734375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 234115236, + "step": 4180 + }, + { + "epoch": 9.311804008908686, + "grad_norm": 21.91156005859375, + "learning_rate": 1e-06, + "loss": 0.4344, + "num_input_tokens_seen": 234173444, + "step": 4181 + }, + { + "epoch": 9.311804008908686, + "loss": 0.5160158276557922, + "loss_ce": 0.0001467098481953144, + "loss_iou": 0.244140625, + "loss_num": 0.005767822265625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 234173444, + "step": 4181 + }, + { + "epoch": 9.31403118040089, + "grad_norm": 31.379457473754883, + "learning_rate": 1e-06, + "loss": 0.8267, + "num_input_tokens_seen": 234228884, + "step": 4182 + }, + { + "epoch": 9.31403118040089, + "loss": 0.8775807619094849, + "loss_ce": 0.00013937248149886727, + "loss_iou": 0.369140625, + "loss_num": 0.027587890625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 234228884, + "step": 4182 + }, + { + "epoch": 9.316258351893095, + "grad_norm": 18.9929256439209, + "learning_rate": 1e-06, + "loss": 0.5908, + "num_input_tokens_seen": 234286752, + "step": 4183 + }, + { + "epoch": 9.316258351893095, + "loss": 0.8116600513458252, + "loss_ce": 0.000136615228257142, + "loss_iou": 0.337890625, + "loss_num": 0.02685546875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 234286752, + "step": 4183 + }, + { + "epoch": 9.3184855233853, + "grad_norm": 16.527360916137695, + "learning_rate": 1e-06, + "loss": 0.6648, + "num_input_tokens_seen": 234342332, + "step": 4184 + }, + { + "epoch": 9.3184855233853, + "loss": 0.7774823904037476, + "loss_ce": 0.00013861866318620741, + "loss_iou": 0.34765625, + "loss_num": 0.0167236328125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 234342332, + "step": 4184 + }, + { + "epoch": 9.320712694877505, + "grad_norm": 16.93600845336914, + "learning_rate": 1e-06, + "loss": 0.5782, + "num_input_tokens_seen": 234398820, + "step": 4185 + }, + { + "epoch": 9.320712694877505, + "loss": 0.7552863359451294, + "loss_ce": 0.0001593705965206027, + "loss_iou": 0.322265625, + "loss_num": 0.022216796875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 234398820, + "step": 4185 + }, + { + "epoch": 9.32293986636971, + "grad_norm": 22.869224548339844, + "learning_rate": 1e-06, + "loss": 0.7147, + "num_input_tokens_seen": 234455760, + "step": 4186 + }, + { + "epoch": 9.32293986636971, + "loss": 0.5804110169410706, + "loss_ce": 0.00024132244288921356, + "loss_iou": 0.24609375, + "loss_num": 0.0177001953125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 234455760, + "step": 4186 + }, + { + "epoch": 9.325167037861915, + "grad_norm": 15.121347427368164, + "learning_rate": 1e-06, + "loss": 0.4896, + "num_input_tokens_seen": 234510508, + "step": 4187 + }, + { + "epoch": 9.325167037861915, + "loss": 0.3788001537322998, + "loss_ce": 0.00013804002082906663, + "loss_iou": 0.166015625, + "loss_num": 0.0093994140625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 234510508, + "step": 4187 + }, + { + "epoch": 9.32739420935412, + "grad_norm": 18.335205078125, + "learning_rate": 1e-06, + "loss": 0.7204, + "num_input_tokens_seen": 234567480, + "step": 4188 + }, + { + "epoch": 9.32739420935412, + "loss": 0.6255242824554443, + "loss_ce": 0.0001580739044584334, + "loss_iou": 0.251953125, + "loss_num": 0.024658203125, + "loss_xval": 0.625, + "num_input_tokens_seen": 234567480, + "step": 4188 + }, + { + "epoch": 9.329621380846325, + "grad_norm": 12.408183097839355, + "learning_rate": 1e-06, + "loss": 0.58, + "num_input_tokens_seen": 234625836, + "step": 4189 + }, + { + "epoch": 9.329621380846325, + "loss": 0.36714544892311096, + "loss_ce": 0.0002021001128014177, + "loss_iou": 0.146484375, + "loss_num": 0.0147705078125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 234625836, + "step": 4189 + }, + { + "epoch": 9.33184855233853, + "grad_norm": 20.51274299621582, + "learning_rate": 1e-06, + "loss": 0.5372, + "num_input_tokens_seen": 234683824, + "step": 4190 + }, + { + "epoch": 9.33184855233853, + "loss": 0.4362949728965759, + "loss_ce": 0.00013776315608993173, + "loss_iou": 0.1865234375, + "loss_num": 0.01263427734375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 234683824, + "step": 4190 + }, + { + "epoch": 9.334075723830734, + "grad_norm": 67.99667358398438, + "learning_rate": 1e-06, + "loss": 0.5763, + "num_input_tokens_seen": 234739016, + "step": 4191 + }, + { + "epoch": 9.334075723830734, + "loss": 0.4585108757019043, + "loss_ce": 0.00013684081204701215, + "loss_iou": 0.1923828125, + "loss_num": 0.0146484375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 234739016, + "step": 4191 + }, + { + "epoch": 9.33630289532294, + "grad_norm": 17.7122859954834, + "learning_rate": 1e-06, + "loss": 0.6762, + "num_input_tokens_seen": 234793360, + "step": 4192 + }, + { + "epoch": 9.33630289532294, + "loss": 0.6960656642913818, + "loss_ce": 0.00014278030721470714, + "loss_iou": 0.32421875, + "loss_num": 0.0093994140625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 234793360, + "step": 4192 + }, + { + "epoch": 9.338530066815144, + "grad_norm": 14.239766120910645, + "learning_rate": 1e-06, + "loss": 0.4748, + "num_input_tokens_seen": 234849088, + "step": 4193 + }, + { + "epoch": 9.338530066815144, + "loss": 0.40485137701034546, + "loss_ce": 0.0001883181685116142, + "loss_iou": 0.18359375, + "loss_num": 0.007598876953125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 234849088, + "step": 4193 + }, + { + "epoch": 9.340757238307349, + "grad_norm": 22.02215576171875, + "learning_rate": 1e-06, + "loss": 0.4715, + "num_input_tokens_seen": 234905732, + "step": 4194 + }, + { + "epoch": 9.340757238307349, + "loss": 0.4357007145881653, + "loss_ce": 0.0001538339420221746, + "loss_iou": 0.193359375, + "loss_num": 0.00982666015625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 234905732, + "step": 4194 + }, + { + "epoch": 9.342984409799554, + "grad_norm": 24.65700340270996, + "learning_rate": 1e-06, + "loss": 0.6571, + "num_input_tokens_seen": 234959588, + "step": 4195 + }, + { + "epoch": 9.342984409799554, + "loss": 0.6442734003067017, + "loss_ce": 0.00023038909421302378, + "loss_iou": 0.2353515625, + "loss_num": 0.03466796875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 234959588, + "step": 4195 + }, + { + "epoch": 9.345211581291759, + "grad_norm": 18.189048767089844, + "learning_rate": 1e-06, + "loss": 0.5611, + "num_input_tokens_seen": 235018980, + "step": 4196 + }, + { + "epoch": 9.345211581291759, + "loss": 0.6549949645996094, + "loss_ce": 0.00020979381224606186, + "loss_iou": 0.29296875, + "loss_num": 0.0140380859375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 235018980, + "step": 4196 + }, + { + "epoch": 9.347438752783964, + "grad_norm": 43.657958984375, + "learning_rate": 1e-06, + "loss": 0.7156, + "num_input_tokens_seen": 235076428, + "step": 4197 + }, + { + "epoch": 9.347438752783964, + "loss": 0.6480019092559814, + "loss_ce": 0.0002968419576063752, + "loss_iou": 0.28125, + "loss_num": 0.0172119140625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 235076428, + "step": 4197 + }, + { + "epoch": 9.34966592427617, + "grad_norm": 18.815326690673828, + "learning_rate": 1e-06, + "loss": 0.526, + "num_input_tokens_seen": 235134848, + "step": 4198 + }, + { + "epoch": 9.34966592427617, + "loss": 0.5375351905822754, + "loss_ce": 0.00018168592941947281, + "loss_iou": 0.2373046875, + "loss_num": 0.01263427734375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 235134848, + "step": 4198 + }, + { + "epoch": 9.351893095768375, + "grad_norm": 16.838205337524414, + "learning_rate": 1e-06, + "loss": 0.6095, + "num_input_tokens_seen": 235189616, + "step": 4199 + }, + { + "epoch": 9.351893095768375, + "loss": 0.5999011993408203, + "loss_ce": 0.00016978610074147582, + "loss_iou": 0.26171875, + "loss_num": 0.0152587890625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 235189616, + "step": 4199 + }, + { + "epoch": 9.35412026726058, + "grad_norm": 22.742488861083984, + "learning_rate": 1e-06, + "loss": 0.6383, + "num_input_tokens_seen": 235243244, + "step": 4200 + }, + { + "epoch": 9.35412026726058, + "loss": 0.49684494733810425, + "loss_ce": 0.00014081134577281773, + "loss_iou": 0.205078125, + "loss_num": 0.0174560546875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 235243244, + "step": 4200 + }, + { + "epoch": 9.356347438752785, + "grad_norm": 18.40520668029785, + "learning_rate": 1e-06, + "loss": 0.6173, + "num_input_tokens_seen": 235300852, + "step": 4201 + }, + { + "epoch": 9.356347438752785, + "loss": 0.7159720659255981, + "loss_ce": 0.00015178298053797334, + "loss_iou": 0.291015625, + "loss_num": 0.02685546875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 235300852, + "step": 4201 + }, + { + "epoch": 9.35857461024499, + "grad_norm": 18.26913833618164, + "learning_rate": 1e-06, + "loss": 0.6494, + "num_input_tokens_seen": 235357404, + "step": 4202 + }, + { + "epoch": 9.35857461024499, + "loss": 0.7315636873245239, + "loss_ce": 0.00024043236044235528, + "loss_iou": 0.328125, + "loss_num": 0.0145263671875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 235357404, + "step": 4202 + }, + { + "epoch": 9.360801781737194, + "grad_norm": 20.889400482177734, + "learning_rate": 1e-06, + "loss": 0.6694, + "num_input_tokens_seen": 235412144, + "step": 4203 + }, + { + "epoch": 9.360801781737194, + "loss": 0.6847151517868042, + "loss_ce": 0.00014484771236311644, + "loss_iou": 0.2890625, + "loss_num": 0.0208740234375, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 235412144, + "step": 4203 + }, + { + "epoch": 9.3630289532294, + "grad_norm": 14.52232551574707, + "learning_rate": 1e-06, + "loss": 0.522, + "num_input_tokens_seen": 235469972, + "step": 4204 + }, + { + "epoch": 9.3630289532294, + "loss": 0.5159176588058472, + "loss_ce": 0.00017058770754374564, + "loss_iou": 0.2197265625, + "loss_num": 0.01531982421875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 235469972, + "step": 4204 + }, + { + "epoch": 9.365256124721604, + "grad_norm": 18.92323875427246, + "learning_rate": 1e-06, + "loss": 0.5651, + "num_input_tokens_seen": 235526988, + "step": 4205 + }, + { + "epoch": 9.365256124721604, + "loss": 0.7476789951324463, + "loss_ce": 0.00036454031942412257, + "loss_iou": 0.330078125, + "loss_num": 0.0172119140625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 235526988, + "step": 4205 + }, + { + "epoch": 9.367483296213809, + "grad_norm": 25.92852210998535, + "learning_rate": 1e-06, + "loss": 0.4669, + "num_input_tokens_seen": 235584824, + "step": 4206 + }, + { + "epoch": 9.367483296213809, + "loss": 0.36491858959198, + "loss_ce": 0.00017249592929147184, + "loss_iou": 0.166015625, + "loss_num": 0.006622314453125, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 235584824, + "step": 4206 + }, + { + "epoch": 9.369710467706014, + "grad_norm": 23.176660537719727, + "learning_rate": 1e-06, + "loss": 0.5446, + "num_input_tokens_seen": 235642972, + "step": 4207 + }, + { + "epoch": 9.369710467706014, + "loss": 0.425027072429657, + "loss_ce": 0.00022239354439079762, + "loss_iou": 0.171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 235642972, + "step": 4207 + }, + { + "epoch": 9.371937639198219, + "grad_norm": 47.62214660644531, + "learning_rate": 1e-06, + "loss": 0.7125, + "num_input_tokens_seen": 235698332, + "step": 4208 + }, + { + "epoch": 9.371937639198219, + "loss": 0.8402742743492126, + "loss_ce": 0.00018637791799847037, + "loss_iou": 0.33203125, + "loss_num": 0.035400390625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 235698332, + "step": 4208 + }, + { + "epoch": 9.374164810690424, + "grad_norm": 15.558445930480957, + "learning_rate": 1e-06, + "loss": 0.4657, + "num_input_tokens_seen": 235754988, + "step": 4209 + }, + { + "epoch": 9.374164810690424, + "loss": 0.354988694190979, + "loss_ce": 0.000130318061565049, + "loss_iou": 0.1494140625, + "loss_num": 0.0111083984375, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 235754988, + "step": 4209 + }, + { + "epoch": 9.376391982182628, + "grad_norm": 21.12288475036621, + "learning_rate": 1e-06, + "loss": 0.6511, + "num_input_tokens_seen": 235812532, + "step": 4210 + }, + { + "epoch": 9.376391982182628, + "loss": 0.7051099538803101, + "loss_ce": 0.0001538842625450343, + "loss_iou": 0.302734375, + "loss_num": 0.01953125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 235812532, + "step": 4210 + }, + { + "epoch": 9.378619153674833, + "grad_norm": 16.031126022338867, + "learning_rate": 1e-06, + "loss": 0.4852, + "num_input_tokens_seen": 235867920, + "step": 4211 + }, + { + "epoch": 9.378619153674833, + "loss": 0.5213667750358582, + "loss_ce": 0.0004927542759105563, + "loss_iou": 0.2333984375, + "loss_num": 0.0107421875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 235867920, + "step": 4211 + }, + { + "epoch": 9.380846325167038, + "grad_norm": 16.82216453552246, + "learning_rate": 1e-06, + "loss": 0.468, + "num_input_tokens_seen": 235923424, + "step": 4212 + }, + { + "epoch": 9.380846325167038, + "loss": 0.5014048218727112, + "loss_ce": 0.00018410818302072585, + "loss_iou": 0.2109375, + "loss_num": 0.015869140625, + "loss_xval": 0.5, + "num_input_tokens_seen": 235923424, + "step": 4212 + }, + { + "epoch": 9.383073496659243, + "grad_norm": 15.369388580322266, + "learning_rate": 1e-06, + "loss": 0.4188, + "num_input_tokens_seen": 235977656, + "step": 4213 + }, + { + "epoch": 9.383073496659243, + "loss": 0.466952919960022, + "loss_ce": 0.0001560571399750188, + "loss_iou": 0.1943359375, + "loss_num": 0.015625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 235977656, + "step": 4213 + }, + { + "epoch": 9.385300668151448, + "grad_norm": 12.31820011138916, + "learning_rate": 1e-06, + "loss": 0.6936, + "num_input_tokens_seen": 236034136, + "step": 4214 + }, + { + "epoch": 9.385300668151448, + "loss": 0.6039036512374878, + "loss_ce": 0.00014389277203008533, + "loss_iou": 0.265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 236034136, + "step": 4214 + }, + { + "epoch": 9.387527839643653, + "grad_norm": 20.034059524536133, + "learning_rate": 1e-06, + "loss": 0.5376, + "num_input_tokens_seen": 236092456, + "step": 4215 + }, + { + "epoch": 9.387527839643653, + "loss": 0.5938976407051086, + "loss_ce": 0.0001476521574659273, + "loss_iou": 0.267578125, + "loss_num": 0.01141357421875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 236092456, + "step": 4215 + }, + { + "epoch": 9.389755011135858, + "grad_norm": 46.42024612426758, + "learning_rate": 1e-06, + "loss": 0.5741, + "num_input_tokens_seen": 236147020, + "step": 4216 + }, + { + "epoch": 9.389755011135858, + "loss": 0.5294545292854309, + "loss_ce": 0.00015764265845064074, + "loss_iou": 0.244140625, + "loss_num": 0.0079345703125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 236147020, + "step": 4216 + }, + { + "epoch": 9.391982182628063, + "grad_norm": 26.644548416137695, + "learning_rate": 1e-06, + "loss": 0.6237, + "num_input_tokens_seen": 236200772, + "step": 4217 + }, + { + "epoch": 9.391982182628063, + "loss": 0.7529926300048828, + "loss_ce": 0.00018498601275496185, + "loss_iou": 0.33984375, + "loss_num": 0.01483154296875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 236200772, + "step": 4217 + }, + { + "epoch": 9.394209354120267, + "grad_norm": 18.652217864990234, + "learning_rate": 1e-06, + "loss": 0.5676, + "num_input_tokens_seen": 236255604, + "step": 4218 + }, + { + "epoch": 9.394209354120267, + "loss": 0.6091314554214478, + "loss_ce": 0.00024474196834489703, + "loss_iou": 0.28125, + "loss_num": 0.0091552734375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 236255604, + "step": 4218 + }, + { + "epoch": 9.396436525612472, + "grad_norm": 14.043095588684082, + "learning_rate": 1e-06, + "loss": 0.7096, + "num_input_tokens_seen": 236310220, + "step": 4219 + }, + { + "epoch": 9.396436525612472, + "loss": 0.8942954540252686, + "loss_ce": 0.0002524922601878643, + "loss_iou": 0.390625, + "loss_num": 0.022705078125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 236310220, + "step": 4219 + }, + { + "epoch": 9.398663697104677, + "grad_norm": 17.981460571289062, + "learning_rate": 1e-06, + "loss": 0.5667, + "num_input_tokens_seen": 236364604, + "step": 4220 + }, + { + "epoch": 9.398663697104677, + "loss": 0.4448683559894562, + "loss_ce": 0.0001662159920670092, + "loss_iou": 0.2001953125, + "loss_num": 0.00872802734375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 236364604, + "step": 4220 + }, + { + "epoch": 9.400890868596882, + "grad_norm": 14.154129028320312, + "learning_rate": 1e-06, + "loss": 0.6947, + "num_input_tokens_seen": 236421912, + "step": 4221 + }, + { + "epoch": 9.400890868596882, + "loss": 0.7411311268806458, + "loss_ce": 0.00016432552365586162, + "loss_iou": 0.34765625, + "loss_num": 0.00933837890625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 236421912, + "step": 4221 + }, + { + "epoch": 9.403118040089087, + "grad_norm": 34.250003814697266, + "learning_rate": 1e-06, + "loss": 0.5965, + "num_input_tokens_seen": 236478284, + "step": 4222 + }, + { + "epoch": 9.403118040089087, + "loss": 0.5931603312492371, + "loss_ce": 0.00014277252194005996, + "loss_iou": 0.25, + "loss_num": 0.0185546875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 236478284, + "step": 4222 + }, + { + "epoch": 9.405345211581292, + "grad_norm": 21.780054092407227, + "learning_rate": 1e-06, + "loss": 0.5467, + "num_input_tokens_seen": 236534108, + "step": 4223 + }, + { + "epoch": 9.405345211581292, + "loss": 0.5391287803649902, + "loss_ce": 0.0001272944500669837, + "loss_iou": 0.2158203125, + "loss_num": 0.0213623046875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 236534108, + "step": 4223 + }, + { + "epoch": 9.407572383073497, + "grad_norm": 22.980321884155273, + "learning_rate": 1e-06, + "loss": 0.7141, + "num_input_tokens_seen": 236590772, + "step": 4224 + }, + { + "epoch": 9.407572383073497, + "loss": 0.6423543691635132, + "loss_ce": 0.00014242672477848828, + "loss_iou": 0.27734375, + "loss_num": 0.01708984375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 236590772, + "step": 4224 + }, + { + "epoch": 9.409799554565701, + "grad_norm": 21.113697052001953, + "learning_rate": 1e-06, + "loss": 0.4206, + "num_input_tokens_seen": 236648468, + "step": 4225 + }, + { + "epoch": 9.409799554565701, + "loss": 0.31469690799713135, + "loss_ce": 0.0001217244571307674, + "loss_iou": 0.12060546875, + "loss_num": 0.0146484375, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 236648468, + "step": 4225 + }, + { + "epoch": 9.412026726057906, + "grad_norm": 9.035064697265625, + "learning_rate": 1e-06, + "loss": 0.3957, + "num_input_tokens_seen": 236707008, + "step": 4226 + }, + { + "epoch": 9.412026726057906, + "loss": 0.30737414956092834, + "loss_ce": 0.00012316979700699449, + "loss_iou": 0.125, + "loss_num": 0.011474609375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 236707008, + "step": 4226 + }, + { + "epoch": 9.414253897550111, + "grad_norm": 16.591636657714844, + "learning_rate": 1e-06, + "loss": 0.6342, + "num_input_tokens_seen": 236761664, + "step": 4227 + }, + { + "epoch": 9.414253897550111, + "loss": 0.7024285793304443, + "loss_ce": 0.00015806331066414714, + "loss_iou": 0.29296875, + "loss_num": 0.0234375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 236761664, + "step": 4227 + }, + { + "epoch": 9.416481069042316, + "grad_norm": 21.84915542602539, + "learning_rate": 1e-06, + "loss": 0.6148, + "num_input_tokens_seen": 236819440, + "step": 4228 + }, + { + "epoch": 9.416481069042316, + "loss": 0.5935507416725159, + "loss_ce": 0.0002889999595936388, + "loss_iou": 0.2412109375, + "loss_num": 0.02197265625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 236819440, + "step": 4228 + }, + { + "epoch": 9.41870824053452, + "grad_norm": 17.725332260131836, + "learning_rate": 1e-06, + "loss": 0.5218, + "num_input_tokens_seen": 236876512, + "step": 4229 + }, + { + "epoch": 9.41870824053452, + "loss": 0.546193540096283, + "loss_ce": 0.00017301872139796615, + "loss_iou": 0.24609375, + "loss_num": 0.0106201171875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 236876512, + "step": 4229 + }, + { + "epoch": 9.420935412026726, + "grad_norm": 14.556755065917969, + "learning_rate": 1e-06, + "loss": 0.4876, + "num_input_tokens_seen": 236933876, + "step": 4230 + }, + { + "epoch": 9.420935412026726, + "loss": 0.5147203207015991, + "loss_ce": 0.0008042675326578319, + "loss_iou": 0.220703125, + "loss_num": 0.0147705078125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 236933876, + "step": 4230 + }, + { + "epoch": 9.42316258351893, + "grad_norm": 30.021310806274414, + "learning_rate": 1e-06, + "loss": 0.5368, + "num_input_tokens_seen": 236989724, + "step": 4231 + }, + { + "epoch": 9.42316258351893, + "loss": 0.6030337810516357, + "loss_ce": 0.000128521875012666, + "loss_iou": 0.26171875, + "loss_num": 0.01544189453125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 236989724, + "step": 4231 + }, + { + "epoch": 9.425389755011135, + "grad_norm": 17.476322174072266, + "learning_rate": 1e-06, + "loss": 0.6046, + "num_input_tokens_seen": 237044832, + "step": 4232 + }, + { + "epoch": 9.425389755011135, + "loss": 0.6286413669586182, + "loss_ce": 0.00022339157294481993, + "loss_iou": 0.291015625, + "loss_num": 0.0089111328125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 237044832, + "step": 4232 + }, + { + "epoch": 9.42761692650334, + "grad_norm": 18.216716766357422, + "learning_rate": 1e-06, + "loss": 0.6325, + "num_input_tokens_seen": 237102544, + "step": 4233 + }, + { + "epoch": 9.42761692650334, + "loss": 0.7180312871932983, + "loss_ce": 0.0002578936982899904, + "loss_iou": 0.29296875, + "loss_num": 0.026611328125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 237102544, + "step": 4233 + }, + { + "epoch": 9.429844097995545, + "grad_norm": 20.78913688659668, + "learning_rate": 1e-06, + "loss": 0.5074, + "num_input_tokens_seen": 237158060, + "step": 4234 + }, + { + "epoch": 9.429844097995545, + "loss": 0.5021035671234131, + "loss_ce": 0.00015046056068968028, + "loss_iou": 0.2275390625, + "loss_num": 0.00933837890625, + "loss_xval": 0.5, + "num_input_tokens_seen": 237158060, + "step": 4234 + }, + { + "epoch": 9.43207126948775, + "grad_norm": 138.009033203125, + "learning_rate": 1e-06, + "loss": 0.5155, + "num_input_tokens_seen": 237213952, + "step": 4235 + }, + { + "epoch": 9.43207126948775, + "loss": 0.5691501498222351, + "loss_ce": 0.0005466500297188759, + "loss_iou": 0.240234375, + "loss_num": 0.017578125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 237213952, + "step": 4235 + }, + { + "epoch": 9.434298440979955, + "grad_norm": 18.69287109375, + "learning_rate": 1e-06, + "loss": 0.5015, + "num_input_tokens_seen": 237269936, + "step": 4236 + }, + { + "epoch": 9.434298440979955, + "loss": 0.6309968829154968, + "loss_ce": 0.00013751011283602566, + "loss_iou": 0.251953125, + "loss_num": 0.0255126953125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 237269936, + "step": 4236 + }, + { + "epoch": 9.43652561247216, + "grad_norm": 11.285958290100098, + "learning_rate": 1e-06, + "loss": 0.5439, + "num_input_tokens_seen": 237325496, + "step": 4237 + }, + { + "epoch": 9.43652561247216, + "loss": 0.6378533840179443, + "loss_ce": 0.0001580730895511806, + "loss_iou": 0.27734375, + "loss_num": 0.01708984375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 237325496, + "step": 4237 + }, + { + "epoch": 9.438752783964365, + "grad_norm": 24.240333557128906, + "learning_rate": 1e-06, + "loss": 0.5876, + "num_input_tokens_seen": 237383104, + "step": 4238 + }, + { + "epoch": 9.438752783964365, + "loss": 0.6686908006668091, + "loss_ce": 0.0002032603952102363, + "loss_iou": 0.2734375, + "loss_num": 0.02392578125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 237383104, + "step": 4238 + }, + { + "epoch": 9.44097995545657, + "grad_norm": 16.92664337158203, + "learning_rate": 1e-06, + "loss": 0.4389, + "num_input_tokens_seen": 237434992, + "step": 4239 + }, + { + "epoch": 9.44097995545657, + "loss": 0.38501814007759094, + "loss_ce": 0.00013044432853348553, + "loss_iou": 0.1611328125, + "loss_num": 0.012451171875, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 237434992, + "step": 4239 + }, + { + "epoch": 9.443207126948774, + "grad_norm": 20.529521942138672, + "learning_rate": 1e-06, + "loss": 0.6429, + "num_input_tokens_seen": 237489668, + "step": 4240 + }, + { + "epoch": 9.443207126948774, + "loss": 0.703424870967865, + "loss_ce": 0.00017782000941224396, + "loss_iou": 0.283203125, + "loss_num": 0.0274658203125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 237489668, + "step": 4240 + }, + { + "epoch": 9.44543429844098, + "grad_norm": 18.514591217041016, + "learning_rate": 1e-06, + "loss": 0.5228, + "num_input_tokens_seen": 237545776, + "step": 4241 + }, + { + "epoch": 9.44543429844098, + "loss": 0.508231520652771, + "loss_ce": 0.00041901745134964585, + "loss_iou": 0.236328125, + "loss_num": 0.007171630859375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 237545776, + "step": 4241 + }, + { + "epoch": 9.447661469933184, + "grad_norm": 22.630306243896484, + "learning_rate": 1e-06, + "loss": 0.5125, + "num_input_tokens_seen": 237603000, + "step": 4242 + }, + { + "epoch": 9.447661469933184, + "loss": 0.5746460556983948, + "loss_ce": 0.00018314782937522978, + "loss_iou": 0.22265625, + "loss_num": 0.02587890625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 237603000, + "step": 4242 + }, + { + "epoch": 9.449888641425389, + "grad_norm": 21.890134811401367, + "learning_rate": 1e-06, + "loss": 0.5389, + "num_input_tokens_seen": 237658508, + "step": 4243 + }, + { + "epoch": 9.449888641425389, + "loss": 0.5602293610572815, + "loss_ce": 0.00017077414668165147, + "loss_iou": 0.248046875, + "loss_num": 0.01263427734375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 237658508, + "step": 4243 + }, + { + "epoch": 9.452115812917596, + "grad_norm": 17.784313201904297, + "learning_rate": 1e-06, + "loss": 0.5817, + "num_input_tokens_seen": 237714064, + "step": 4244 + }, + { + "epoch": 9.452115812917596, + "loss": 0.5421576499938965, + "loss_ce": 0.0001654581428738311, + "loss_iou": 0.2470703125, + "loss_num": 0.00933837890625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 237714064, + "step": 4244 + }, + { + "epoch": 9.4543429844098, + "grad_norm": 17.17974853515625, + "learning_rate": 1e-06, + "loss": 0.4908, + "num_input_tokens_seen": 237768912, + "step": 4245 + }, + { + "epoch": 9.4543429844098, + "loss": 0.34260523319244385, + "loss_ce": 0.00013697342365048826, + "loss_iou": 0.13671875, + "loss_num": 0.01361083984375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 237768912, + "step": 4245 + }, + { + "epoch": 9.456570155902005, + "grad_norm": 22.440263748168945, + "learning_rate": 1e-06, + "loss": 0.8688, + "num_input_tokens_seen": 237822376, + "step": 4246 + }, + { + "epoch": 9.456570155902005, + "loss": 0.7955905795097351, + "loss_ce": 0.00018044964235741645, + "loss_iou": 0.322265625, + "loss_num": 0.030029296875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 237822376, + "step": 4246 + }, + { + "epoch": 9.45879732739421, + "grad_norm": 20.348644256591797, + "learning_rate": 1e-06, + "loss": 0.5448, + "num_input_tokens_seen": 237879852, + "step": 4247 + }, + { + "epoch": 9.45879732739421, + "loss": 0.7884389758110046, + "loss_ce": 0.00023097131634131074, + "loss_iou": 0.314453125, + "loss_num": 0.031982421875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 237879852, + "step": 4247 + }, + { + "epoch": 9.461024498886415, + "grad_norm": 98.13805389404297, + "learning_rate": 1e-06, + "loss": 0.7861, + "num_input_tokens_seen": 237933556, + "step": 4248 + }, + { + "epoch": 9.461024498886415, + "loss": 1.0614817142486572, + "loss_ce": 0.0002023179258685559, + "loss_iou": 0.453125, + "loss_num": 0.03076171875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 237933556, + "step": 4248 + }, + { + "epoch": 9.46325167037862, + "grad_norm": 18.363706588745117, + "learning_rate": 1e-06, + "loss": 0.5792, + "num_input_tokens_seen": 237990416, + "step": 4249 + }, + { + "epoch": 9.46325167037862, + "loss": 0.6430014371871948, + "loss_ce": 0.00017919728998094797, + "loss_iou": 0.2734375, + "loss_num": 0.01904296875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 237990416, + "step": 4249 + }, + { + "epoch": 9.465478841870825, + "grad_norm": 22.31871223449707, + "learning_rate": 1e-06, + "loss": 0.5357, + "num_input_tokens_seen": 238048204, + "step": 4250 + }, + { + "epoch": 9.465478841870825, + "eval_seeclick_web_CIoU": 0.5783384740352631, + "eval_seeclick_web_GIoU": 0.5768938064575195, + "eval_seeclick_web_IoU": 0.596380203962326, + "eval_seeclick_web_MAE_all": 0.016195162199437618, + "eval_seeclick_web_MAE_h": 0.007964757736772299, + "eval_seeclick_web_MAE_w": 0.01662321202456951, + "eval_seeclick_web_MAE_x_boxes": 0.00983009533956647, + "eval_seeclick_web_MAE_y_boxes": 0.021932302275672555, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9355359077453613, + "eval_seeclick_web_loss_ce": 0.00021685881074517965, + "eval_seeclick_web_loss_iou": 0.424072265625, + "eval_seeclick_web_loss_num": 0.013062477111816406, + "eval_seeclick_web_loss_xval": 0.9130859375, + "eval_seeclick_web_runtime": 23.3851, + "eval_seeclick_web_samples_per_second": 2.138, + "eval_seeclick_web_steps_per_second": 0.086, + "num_input_tokens_seen": 238048204, + "step": 4250 + }, + { + "epoch": 9.465478841870825, + "eval_icons_CIoU": 0.2803712487220764, + "eval_icons_GIoU": 0.30777665972709656, + "eval_icons_IoU": 0.3619535565376282, + "eval_icons_MAE_all": 0.06462299637496471, + "eval_icons_MAE_h": 0.039737068116664886, + "eval_icons_MAE_w": 0.06902601942420006, + "eval_icons_MAE_x_boxes": 0.05829739384353161, + "eval_icons_MAE_y_boxes": 0.039782424457371235, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.738389253616333, + "eval_icons_loss_ce": 0.00025170089793391526, + "eval_icons_loss_iou": 0.6759033203125, + "eval_icons_loss_num": 0.061977386474609375, + "eval_icons_loss_xval": 1.659912109375, + "eval_icons_runtime": 22.6548, + "eval_icons_samples_per_second": 2.207, + "eval_icons_steps_per_second": 0.088, + "num_input_tokens_seen": 238048204, + "step": 4250 + }, + { + "epoch": 9.465478841870825, + "eval_screenspot_CIoU": 0.34290045499801636, + "eval_screenspot_GIoU": 0.36323591073354083, + "eval_screenspot_IoU": 0.4251118103663127, + "eval_screenspot_MAE_all": 0.06305227304498355, + "eval_screenspot_MAE_h": 0.03816718918581804, + "eval_screenspot_MAE_w": 0.07184332360823949, + "eval_screenspot_MAE_x_boxes": 0.07595415661732356, + "eval_screenspot_MAE_y_boxes": 0.04537342426677545, + "eval_screenspot_inside_bbox": 0.6862499912579855, + "eval_screenspot_loss": 1.6450954675674438, + "eval_screenspot_loss_ce": 0.0002674317511264235, + "eval_screenspot_loss_iou": 0.6758626302083334, + "eval_screenspot_loss_num": 0.07347997029622395, + "eval_screenspot_loss_xval": 1.7189127604166667, + "eval_screenspot_runtime": 39.9574, + "eval_screenspot_samples_per_second": 2.227, + "eval_screenspot_steps_per_second": 0.075, + "num_input_tokens_seen": 238048204, + "step": 4250 + }, + { + "epoch": 9.465478841870825, + "eval_compot_CIoU": 0.3499128520488739, + "eval_compot_GIoU": 0.3589301258325577, + "eval_compot_IoU": 0.40847519040107727, + "eval_compot_MAE_all": 0.018001767806708813, + "eval_compot_MAE_h": 0.008998606353998184, + "eval_compot_MAE_w": 0.021249551326036453, + "eval_compot_MAE_x_boxes": 0.029959955252707005, + "eval_compot_MAE_y_boxes": 0.007053635781630874, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3970842361450195, + "eval_compot_loss_ce": 0.00020831655274378136, + "eval_compot_loss_iou": 0.6414794921875, + "eval_compot_loss_num": 0.016920089721679688, + "eval_compot_loss_xval": 1.3681640625, + "eval_compot_runtime": 21.8924, + "eval_compot_samples_per_second": 2.284, + "eval_compot_steps_per_second": 0.091, + "num_input_tokens_seen": 238048204, + "step": 4250 + }, + { + "epoch": 9.465478841870825, + "eval_custom_ui_val_CIoU": 0.4645145701037513, + "eval_custom_ui_val_GIoU": 0.4788343757390976, + "eval_custom_ui_val_IoU": 0.5250709156195322, + "eval_custom_ui_val_MAE_all": 0.03104415287574132, + "eval_custom_ui_val_MAE_h": 0.01666957584934102, + "eval_custom_ui_val_MAE_w": 0.039596209612985454, + "eval_custom_ui_val_MAE_x_boxes": 0.0380424867487616, + "eval_custom_ui_val_MAE_y_boxes": 0.015655622765835788, + "eval_custom_ui_val_inside_bbox": 0.7353395091162788, + "eval_custom_ui_val_loss": 1.2056481838226318, + "eval_custom_ui_val_loss_ce": 0.00023572022231140485, + "eval_custom_ui_val_loss_iou": 0.5117323133680556, + "eval_custom_ui_val_loss_num": 0.028479894002278645, + "eval_custom_ui_val_loss_xval": 1.1659071180555556, + "eval_custom_ui_val_runtime": 64.1276, + "eval_custom_ui_val_samples_per_second": 4.132, + "eval_custom_ui_val_steps_per_second": 0.14, + "num_input_tokens_seen": 238048204, + "step": 4250 + }, + { + "epoch": 9.465478841870825, + "loss": 0.9052478671073914, + "loss_ce": 0.00021855966770090163, + "loss_iou": 0.390625, + "loss_num": 0.0250244140625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 238048204, + "step": 4250 + }, + { + "epoch": 9.46770601336303, + "grad_norm": 13.239431381225586, + "learning_rate": 1e-06, + "loss": 0.5228, + "num_input_tokens_seen": 238102764, + "step": 4251 + }, + { + "epoch": 9.46770601336303, + "loss": 0.4459550380706787, + "loss_ce": 0.00015426415484398603, + "loss_iou": 0.1982421875, + "loss_num": 0.00994873046875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 238102764, + "step": 4251 + }, + { + "epoch": 9.469933184855234, + "grad_norm": 16.412900924682617, + "learning_rate": 1e-06, + "loss": 0.5837, + "num_input_tokens_seen": 238160024, + "step": 4252 + }, + { + "epoch": 9.469933184855234, + "loss": 0.6651994585990906, + "loss_ce": 0.00016038438479881734, + "loss_iou": 0.265625, + "loss_num": 0.0269775390625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 238160024, + "step": 4252 + }, + { + "epoch": 9.47216035634744, + "grad_norm": 16.5249080657959, + "learning_rate": 1e-06, + "loss": 0.535, + "num_input_tokens_seen": 238218512, + "step": 4253 + }, + { + "epoch": 9.47216035634744, + "loss": 0.5461084246635437, + "loss_ce": 0.00020996385137550533, + "loss_iou": 0.248046875, + "loss_num": 0.01007080078125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 238218512, + "step": 4253 + }, + { + "epoch": 9.474387527839644, + "grad_norm": 17.52018165588379, + "learning_rate": 1e-06, + "loss": 0.6211, + "num_input_tokens_seen": 238277132, + "step": 4254 + }, + { + "epoch": 9.474387527839644, + "loss": 0.5265886783599854, + "loss_ce": 0.00022152790916152298, + "loss_iou": 0.2216796875, + "loss_num": 0.0166015625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 238277132, + "step": 4254 + }, + { + "epoch": 9.476614699331849, + "grad_norm": 18.231037139892578, + "learning_rate": 1e-06, + "loss": 0.5999, + "num_input_tokens_seen": 238331752, + "step": 4255 + }, + { + "epoch": 9.476614699331849, + "loss": 0.7489705085754395, + "loss_ce": 0.00019125922699458897, + "loss_iou": 0.318359375, + "loss_num": 0.022216796875, + "loss_xval": 0.75, + "num_input_tokens_seen": 238331752, + "step": 4255 + }, + { + "epoch": 9.478841870824054, + "grad_norm": 27.039873123168945, + "learning_rate": 1e-06, + "loss": 0.6843, + "num_input_tokens_seen": 238387640, + "step": 4256 + }, + { + "epoch": 9.478841870824054, + "loss": 0.7631433010101318, + "loss_ce": 0.00020384913659654558, + "loss_iou": 0.3359375, + "loss_num": 0.018310546875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 238387640, + "step": 4256 + }, + { + "epoch": 9.481069042316259, + "grad_norm": 33.0074577331543, + "learning_rate": 1e-06, + "loss": 0.6398, + "num_input_tokens_seen": 238440696, + "step": 4257 + }, + { + "epoch": 9.481069042316259, + "loss": 0.8715030550956726, + "loss_ce": 0.0001651808270253241, + "loss_iou": 0.3828125, + "loss_num": 0.0213623046875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 238440696, + "step": 4257 + }, + { + "epoch": 9.483296213808464, + "grad_norm": 27.93197250366211, + "learning_rate": 1e-06, + "loss": 0.8218, + "num_input_tokens_seen": 238494316, + "step": 4258 + }, + { + "epoch": 9.483296213808464, + "loss": 0.602424681186676, + "loss_ce": 0.0001297968119615689, + "loss_iou": 0.255859375, + "loss_num": 0.0184326171875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 238494316, + "step": 4258 + }, + { + "epoch": 9.485523385300668, + "grad_norm": 23.008649826049805, + "learning_rate": 1e-06, + "loss": 0.7639, + "num_input_tokens_seen": 238550660, + "step": 4259 + }, + { + "epoch": 9.485523385300668, + "loss": 0.7036758065223694, + "loss_ce": 0.0001846282830229029, + "loss_iou": 0.296875, + "loss_num": 0.0223388671875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 238550660, + "step": 4259 + }, + { + "epoch": 9.487750556792873, + "grad_norm": 18.077539443969727, + "learning_rate": 1e-06, + "loss": 0.5111, + "num_input_tokens_seen": 238606896, + "step": 4260 + }, + { + "epoch": 9.487750556792873, + "loss": 0.44276681542396545, + "loss_ce": 0.00013988392311148345, + "loss_iou": 0.1943359375, + "loss_num": 0.010498046875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 238606896, + "step": 4260 + }, + { + "epoch": 9.489977728285078, + "grad_norm": 12.999273300170898, + "learning_rate": 1e-06, + "loss": 0.6034, + "num_input_tokens_seen": 238662636, + "step": 4261 + }, + { + "epoch": 9.489977728285078, + "loss": 0.6766662001609802, + "loss_ce": 0.00015251210425049067, + "loss_iou": 0.283203125, + "loss_num": 0.0216064453125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 238662636, + "step": 4261 + }, + { + "epoch": 9.492204899777283, + "grad_norm": 22.135456085205078, + "learning_rate": 1e-06, + "loss": 0.5358, + "num_input_tokens_seen": 238717560, + "step": 4262 + }, + { + "epoch": 9.492204899777283, + "loss": 0.38586312532424927, + "loss_ce": 0.000181969502591528, + "loss_iou": 0.173828125, + "loss_num": 0.00762939453125, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 238717560, + "step": 4262 + }, + { + "epoch": 9.494432071269488, + "grad_norm": 50.78253173828125, + "learning_rate": 1e-06, + "loss": 0.6531, + "num_input_tokens_seen": 238775784, + "step": 4263 + }, + { + "epoch": 9.494432071269488, + "loss": 0.7694522142410278, + "loss_ce": 0.00016509005217812955, + "loss_iou": 0.32421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 238775784, + "step": 4263 + }, + { + "epoch": 9.496659242761693, + "grad_norm": 19.99468421936035, + "learning_rate": 1e-06, + "loss": 0.4683, + "num_input_tokens_seen": 238834380, + "step": 4264 + }, + { + "epoch": 9.496659242761693, + "loss": 0.5259066820144653, + "loss_ce": 0.00014983075379859656, + "loss_iou": 0.2197265625, + "loss_num": 0.0169677734375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 238834380, + "step": 4264 + }, + { + "epoch": 9.498886414253898, + "grad_norm": 20.870853424072266, + "learning_rate": 1e-06, + "loss": 0.7663, + "num_input_tokens_seen": 238892804, + "step": 4265 + }, + { + "epoch": 9.498886414253898, + "loss": 0.5954189300537109, + "loss_ce": 0.00014309046673588455, + "loss_iou": 0.2421875, + "loss_num": 0.02197265625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 238892804, + "step": 4265 + }, + { + "epoch": 9.501113585746102, + "grad_norm": 16.311065673828125, + "learning_rate": 1e-06, + "loss": 0.5614, + "num_input_tokens_seen": 238949684, + "step": 4266 + }, + { + "epoch": 9.501113585746102, + "loss": 0.5311484336853027, + "loss_ce": 0.00014256531721912324, + "loss_iou": 0.2412109375, + "loss_num": 0.010009765625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 238949684, + "step": 4266 + }, + { + "epoch": 9.503340757238307, + "grad_norm": 234.5572509765625, + "learning_rate": 1e-06, + "loss": 0.6156, + "num_input_tokens_seen": 239004348, + "step": 4267 + }, + { + "epoch": 9.503340757238307, + "loss": 0.7360097169876099, + "loss_ce": 0.00041402934584766626, + "loss_iou": 0.306640625, + "loss_num": 0.0247802734375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 239004348, + "step": 4267 + }, + { + "epoch": 9.505567928730512, + "grad_norm": 22.837961196899414, + "learning_rate": 1e-06, + "loss": 0.5485, + "num_input_tokens_seen": 239061392, + "step": 4268 + }, + { + "epoch": 9.505567928730512, + "loss": 0.5970693826675415, + "loss_ce": 0.0001455858291592449, + "loss_iou": 0.2470703125, + "loss_num": 0.0206298828125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 239061392, + "step": 4268 + }, + { + "epoch": 9.507795100222717, + "grad_norm": 24.823139190673828, + "learning_rate": 1e-06, + "loss": 0.6112, + "num_input_tokens_seen": 239115904, + "step": 4269 + }, + { + "epoch": 9.507795100222717, + "loss": 0.7151393890380859, + "loss_ce": 0.00017360490164719522, + "loss_iou": 0.265625, + "loss_num": 0.037109375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 239115904, + "step": 4269 + }, + { + "epoch": 9.510022271714922, + "grad_norm": 31.860271453857422, + "learning_rate": 1e-06, + "loss": 0.5909, + "num_input_tokens_seen": 239170464, + "step": 4270 + }, + { + "epoch": 9.510022271714922, + "loss": 0.7071930170059204, + "loss_ce": 0.00025335949612781405, + "loss_iou": 0.3046875, + "loss_num": 0.019287109375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 239170464, + "step": 4270 + }, + { + "epoch": 9.512249443207127, + "grad_norm": 32.744850158691406, + "learning_rate": 1e-06, + "loss": 0.4666, + "num_input_tokens_seen": 239227924, + "step": 4271 + }, + { + "epoch": 9.512249443207127, + "loss": 0.31606927514076233, + "loss_ce": 0.00027338817017152905, + "loss_iou": 0.1435546875, + "loss_num": 0.00567626953125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 239227924, + "step": 4271 + }, + { + "epoch": 9.514476614699332, + "grad_norm": 24.312049865722656, + "learning_rate": 1e-06, + "loss": 0.6541, + "num_input_tokens_seen": 239285676, + "step": 4272 + }, + { + "epoch": 9.514476614699332, + "loss": 0.7336921691894531, + "loss_ce": 0.0002937186509370804, + "loss_iou": 0.291015625, + "loss_num": 0.030517578125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 239285676, + "step": 4272 + }, + { + "epoch": 9.516703786191536, + "grad_norm": 16.174928665161133, + "learning_rate": 1e-06, + "loss": 0.6278, + "num_input_tokens_seen": 239340228, + "step": 4273 + }, + { + "epoch": 9.516703786191536, + "loss": 0.6358986496925354, + "loss_ce": 0.0001564474805491045, + "loss_iou": 0.263671875, + "loss_num": 0.0216064453125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 239340228, + "step": 4273 + }, + { + "epoch": 9.518930957683741, + "grad_norm": 46.78654861450195, + "learning_rate": 1e-06, + "loss": 0.5724, + "num_input_tokens_seen": 239396412, + "step": 4274 + }, + { + "epoch": 9.518930957683741, + "loss": 0.6387228965759277, + "loss_ce": 0.00029518798692151904, + "loss_iou": 0.26953125, + "loss_num": 0.0198974609375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 239396412, + "step": 4274 + }, + { + "epoch": 9.521158129175946, + "grad_norm": 14.414083480834961, + "learning_rate": 1e-06, + "loss": 0.374, + "num_input_tokens_seen": 239450920, + "step": 4275 + }, + { + "epoch": 9.521158129175946, + "loss": 0.27442389726638794, + "loss_ce": 0.00013192339974921197, + "loss_iou": 0.1044921875, + "loss_num": 0.01318359375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 239450920, + "step": 4275 + }, + { + "epoch": 9.523385300668151, + "grad_norm": 25.984390258789062, + "learning_rate": 1e-06, + "loss": 0.584, + "num_input_tokens_seen": 239506768, + "step": 4276 + }, + { + "epoch": 9.523385300668151, + "loss": 0.7418030500411987, + "loss_ce": 0.0003479471488390118, + "loss_iou": 0.314453125, + "loss_num": 0.0220947265625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 239506768, + "step": 4276 + }, + { + "epoch": 9.525612472160356, + "grad_norm": 25.130739212036133, + "learning_rate": 1e-06, + "loss": 0.5816, + "num_input_tokens_seen": 239559000, + "step": 4277 + }, + { + "epoch": 9.525612472160356, + "loss": 0.566669225692749, + "loss_ce": 0.00014092770288698375, + "loss_iou": 0.2333984375, + "loss_num": 0.0201416015625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 239559000, + "step": 4277 + }, + { + "epoch": 9.52783964365256, + "grad_norm": 18.2275447845459, + "learning_rate": 1e-06, + "loss": 0.4939, + "num_input_tokens_seen": 239616448, + "step": 4278 + }, + { + "epoch": 9.52783964365256, + "loss": 0.45471665263175964, + "loss_ce": 0.0001268136693397537, + "loss_iou": 0.2109375, + "loss_num": 0.006591796875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 239616448, + "step": 4278 + }, + { + "epoch": 9.530066815144766, + "grad_norm": 17.732545852661133, + "learning_rate": 1e-06, + "loss": 0.389, + "num_input_tokens_seen": 239674876, + "step": 4279 + }, + { + "epoch": 9.530066815144766, + "loss": 0.4159805476665497, + "loss_ce": 0.00014803148224018514, + "loss_iou": 0.17578125, + "loss_num": 0.012939453125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 239674876, + "step": 4279 + }, + { + "epoch": 9.53229398663697, + "grad_norm": 39.37736511230469, + "learning_rate": 1e-06, + "loss": 0.5871, + "num_input_tokens_seen": 239732948, + "step": 4280 + }, + { + "epoch": 9.53229398663697, + "loss": 0.49721166491508484, + "loss_ce": 0.0001413416030118242, + "loss_iou": 0.224609375, + "loss_num": 0.00958251953125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 239732948, + "step": 4280 + }, + { + "epoch": 9.534521158129175, + "grad_norm": 32.132293701171875, + "learning_rate": 1e-06, + "loss": 0.4756, + "num_input_tokens_seen": 239789608, + "step": 4281 + }, + { + "epoch": 9.534521158129175, + "loss": 0.44500744342803955, + "loss_ce": 0.00018322419782634825, + "loss_iou": 0.1904296875, + "loss_num": 0.01263427734375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 239789608, + "step": 4281 + }, + { + "epoch": 9.53674832962138, + "grad_norm": 14.220974922180176, + "learning_rate": 1e-06, + "loss": 0.4334, + "num_input_tokens_seen": 239844376, + "step": 4282 + }, + { + "epoch": 9.53674832962138, + "loss": 0.40624189376831055, + "loss_ce": 0.00023602474539075047, + "loss_iou": 0.1484375, + "loss_num": 0.02197265625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 239844376, + "step": 4282 + }, + { + "epoch": 9.538975501113585, + "grad_norm": 23.4123592376709, + "learning_rate": 1e-06, + "loss": 0.5361, + "num_input_tokens_seen": 239896980, + "step": 4283 + }, + { + "epoch": 9.538975501113585, + "loss": 0.43205326795578003, + "loss_ce": 0.00016851615509949625, + "loss_iou": 0.1884765625, + "loss_num": 0.01104736328125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 239896980, + "step": 4283 + }, + { + "epoch": 9.54120267260579, + "grad_norm": 18.76468276977539, + "learning_rate": 1e-06, + "loss": 0.6264, + "num_input_tokens_seen": 239953088, + "step": 4284 + }, + { + "epoch": 9.54120267260579, + "loss": 0.6263715624809265, + "loss_ce": 0.00015087236533872783, + "loss_iou": 0.287109375, + "loss_num": 0.01068115234375, + "loss_xval": 0.625, + "num_input_tokens_seen": 239953088, + "step": 4284 + }, + { + "epoch": 9.543429844097995, + "grad_norm": 15.258724212646484, + "learning_rate": 1e-06, + "loss": 0.5531, + "num_input_tokens_seen": 240011052, + "step": 4285 + }, + { + "epoch": 9.543429844097995, + "loss": 0.5455443859100342, + "loss_ce": 0.00013422755000647157, + "loss_iou": 0.19921875, + "loss_num": 0.029541015625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 240011052, + "step": 4285 + }, + { + "epoch": 9.5456570155902, + "grad_norm": 20.07108497619629, + "learning_rate": 1e-06, + "loss": 0.6354, + "num_input_tokens_seen": 240061388, + "step": 4286 + }, + { + "epoch": 9.5456570155902, + "loss": 0.670364260673523, + "loss_ce": 0.00019821789464913309, + "loss_iou": 0.287109375, + "loss_num": 0.0194091796875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 240061388, + "step": 4286 + }, + { + "epoch": 9.547884187082406, + "grad_norm": 16.95429229736328, + "learning_rate": 1e-06, + "loss": 0.5388, + "num_input_tokens_seen": 240120732, + "step": 4287 + }, + { + "epoch": 9.547884187082406, + "loss": 0.6176313161849976, + "loss_ce": 0.00013861627667210996, + "loss_iou": 0.27734375, + "loss_num": 0.012939453125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 240120732, + "step": 4287 + }, + { + "epoch": 9.550111358574611, + "grad_norm": 30.3793888092041, + "learning_rate": 1e-06, + "loss": 0.7166, + "num_input_tokens_seen": 240176776, + "step": 4288 + }, + { + "epoch": 9.550111358574611, + "loss": 0.8734985589981079, + "loss_ce": 0.000207598292035982, + "loss_iou": 0.359375, + "loss_num": 0.0308837890625, + "loss_xval": 0.875, + "num_input_tokens_seen": 240176776, + "step": 4288 + }, + { + "epoch": 9.552338530066816, + "grad_norm": 23.587675094604492, + "learning_rate": 1e-06, + "loss": 0.6852, + "num_input_tokens_seen": 240232852, + "step": 4289 + }, + { + "epoch": 9.552338530066816, + "loss": 0.5840327739715576, + "loss_ce": 0.00017050979658961296, + "loss_iou": 0.263671875, + "loss_num": 0.011474609375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 240232852, + "step": 4289 + }, + { + "epoch": 9.55456570155902, + "grad_norm": 33.9605598449707, + "learning_rate": 1e-06, + "loss": 0.5779, + "num_input_tokens_seen": 240288524, + "step": 4290 + }, + { + "epoch": 9.55456570155902, + "loss": 0.6915979385375977, + "loss_ce": 0.00019171604071743786, + "loss_iou": 0.310546875, + "loss_num": 0.01434326171875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 240288524, + "step": 4290 + }, + { + "epoch": 9.556792873051226, + "grad_norm": 13.900967597961426, + "learning_rate": 1e-06, + "loss": 0.4135, + "num_input_tokens_seen": 240344628, + "step": 4291 + }, + { + "epoch": 9.556792873051226, + "loss": 0.5288236737251282, + "loss_ce": 0.00013714321539737284, + "loss_iou": 0.2255859375, + "loss_num": 0.01544189453125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 240344628, + "step": 4291 + }, + { + "epoch": 9.55902004454343, + "grad_norm": 40.7745246887207, + "learning_rate": 1e-06, + "loss": 0.5172, + "num_input_tokens_seen": 240399768, + "step": 4292 + }, + { + "epoch": 9.55902004454343, + "loss": 0.6885933876037598, + "loss_ce": 0.0001778678852133453, + "loss_iou": 0.30078125, + "loss_num": 0.017333984375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 240399768, + "step": 4292 + }, + { + "epoch": 9.561247216035635, + "grad_norm": 24.22138786315918, + "learning_rate": 1e-06, + "loss": 0.6703, + "num_input_tokens_seen": 240456252, + "step": 4293 + }, + { + "epoch": 9.561247216035635, + "loss": 0.6952459812164307, + "loss_ce": 0.00023865490220487118, + "loss_iou": 0.265625, + "loss_num": 0.03271484375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 240456252, + "step": 4293 + }, + { + "epoch": 9.56347438752784, + "grad_norm": 19.527559280395508, + "learning_rate": 1e-06, + "loss": 0.6914, + "num_input_tokens_seen": 240512636, + "step": 4294 + }, + { + "epoch": 9.56347438752784, + "loss": 0.63869708776474, + "loss_ce": 0.0001473345619160682, + "loss_iou": 0.2451171875, + "loss_num": 0.0301513671875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 240512636, + "step": 4294 + }, + { + "epoch": 9.565701559020045, + "grad_norm": 17.53272819519043, + "learning_rate": 1e-06, + "loss": 0.6336, + "num_input_tokens_seen": 240571148, + "step": 4295 + }, + { + "epoch": 9.565701559020045, + "loss": 0.7293701767921448, + "loss_ce": 0.00024417019449174404, + "loss_iou": 0.310546875, + "loss_num": 0.0218505859375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 240571148, + "step": 4295 + }, + { + "epoch": 9.56792873051225, + "grad_norm": 20.05321502685547, + "learning_rate": 1e-06, + "loss": 0.6674, + "num_input_tokens_seen": 240628292, + "step": 4296 + }, + { + "epoch": 9.56792873051225, + "loss": 0.6266350746154785, + "loss_ce": 0.0001702026347629726, + "loss_iou": 0.28125, + "loss_num": 0.0128173828125, + "loss_xval": 0.625, + "num_input_tokens_seen": 240628292, + "step": 4296 + }, + { + "epoch": 9.570155902004455, + "grad_norm": 15.255680084228516, + "learning_rate": 1e-06, + "loss": 0.4882, + "num_input_tokens_seen": 240686052, + "step": 4297 + }, + { + "epoch": 9.570155902004455, + "loss": 0.5545688271522522, + "loss_ce": 0.0001254748785868287, + "loss_iou": 0.2265625, + "loss_num": 0.020263671875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 240686052, + "step": 4297 + }, + { + "epoch": 9.57238307349666, + "grad_norm": 18.931354522705078, + "learning_rate": 1e-06, + "loss": 0.5507, + "num_input_tokens_seen": 240744448, + "step": 4298 + }, + { + "epoch": 9.57238307349666, + "loss": 0.571701169013977, + "loss_ce": 0.00016800707089714706, + "loss_iou": 0.25390625, + "loss_num": 0.0130615234375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 240744448, + "step": 4298 + }, + { + "epoch": 9.574610244988865, + "grad_norm": 38.29421615600586, + "learning_rate": 1e-06, + "loss": 0.5789, + "num_input_tokens_seen": 240800872, + "step": 4299 + }, + { + "epoch": 9.574610244988865, + "loss": 0.6190475821495056, + "loss_ce": 0.0001510926231276244, + "loss_iou": 0.27734375, + "loss_num": 0.01275634765625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 240800872, + "step": 4299 + }, + { + "epoch": 9.57683741648107, + "grad_norm": 15.936655044555664, + "learning_rate": 1e-06, + "loss": 0.655, + "num_input_tokens_seen": 240855308, + "step": 4300 + }, + { + "epoch": 9.57683741648107, + "loss": 0.7067993879318237, + "loss_ce": 0.00013432535342872143, + "loss_iou": 0.29296875, + "loss_num": 0.024658203125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 240855308, + "step": 4300 + }, + { + "epoch": 9.579064587973274, + "grad_norm": 14.852972030639648, + "learning_rate": 1e-06, + "loss": 0.5593, + "num_input_tokens_seen": 240910724, + "step": 4301 + }, + { + "epoch": 9.579064587973274, + "loss": 0.6589754819869995, + "loss_ce": 0.00016200373647734523, + "loss_iou": 0.27734375, + "loss_num": 0.0211181640625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 240910724, + "step": 4301 + }, + { + "epoch": 9.58129175946548, + "grad_norm": 18.125749588012695, + "learning_rate": 1e-06, + "loss": 0.5183, + "num_input_tokens_seen": 240967040, + "step": 4302 + }, + { + "epoch": 9.58129175946548, + "loss": 0.404930055141449, + "loss_ce": 0.00014489417662844062, + "loss_iou": 0.166015625, + "loss_num": 0.0142822265625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 240967040, + "step": 4302 + }, + { + "epoch": 9.583518930957684, + "grad_norm": 16.510257720947266, + "learning_rate": 1e-06, + "loss": 0.6927, + "num_input_tokens_seen": 241021748, + "step": 4303 + }, + { + "epoch": 9.583518930957684, + "loss": 0.7239894866943359, + "loss_ce": 0.0027980851009488106, + "loss_iou": 0.3046875, + "loss_num": 0.0220947265625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 241021748, + "step": 4303 + }, + { + "epoch": 9.585746102449889, + "grad_norm": 17.652618408203125, + "learning_rate": 1e-06, + "loss": 0.5067, + "num_input_tokens_seen": 241078380, + "step": 4304 + }, + { + "epoch": 9.585746102449889, + "loss": 0.45593586564064026, + "loss_ce": 0.00012532059918157756, + "loss_iou": 0.18359375, + "loss_num": 0.017578125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 241078380, + "step": 4304 + }, + { + "epoch": 9.587973273942094, + "grad_norm": 21.635093688964844, + "learning_rate": 1e-06, + "loss": 0.6214, + "num_input_tokens_seen": 241131336, + "step": 4305 + }, + { + "epoch": 9.587973273942094, + "loss": 0.6010667085647583, + "loss_ce": 0.00023660104488953948, + "loss_iou": 0.263671875, + "loss_num": 0.01507568359375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 241131336, + "step": 4305 + }, + { + "epoch": 9.590200445434299, + "grad_norm": 21.13984489440918, + "learning_rate": 1e-06, + "loss": 0.5725, + "num_input_tokens_seen": 241188112, + "step": 4306 + }, + { + "epoch": 9.590200445434299, + "loss": 0.526428759098053, + "loss_ce": 0.0003057234571315348, + "loss_iou": 0.208984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 241188112, + "step": 4306 + }, + { + "epoch": 9.592427616926503, + "grad_norm": 16.988536834716797, + "learning_rate": 1e-06, + "loss": 0.614, + "num_input_tokens_seen": 241248100, + "step": 4307 + }, + { + "epoch": 9.592427616926503, + "loss": 0.6281224489212036, + "loss_ce": 0.00019275453814771026, + "loss_iou": 0.2734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 241248100, + "step": 4307 + }, + { + "epoch": 9.594654788418708, + "grad_norm": 21.287412643432617, + "learning_rate": 1e-06, + "loss": 0.7941, + "num_input_tokens_seen": 241303028, + "step": 4308 + }, + { + "epoch": 9.594654788418708, + "loss": 1.0175116062164307, + "loss_ce": 0.00017758479225449264, + "loss_iou": 0.416015625, + "loss_num": 0.036865234375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 241303028, + "step": 4308 + }, + { + "epoch": 9.596881959910913, + "grad_norm": 17.50885009765625, + "learning_rate": 1e-06, + "loss": 0.5592, + "num_input_tokens_seen": 241359508, + "step": 4309 + }, + { + "epoch": 9.596881959910913, + "loss": 0.6614413261413574, + "loss_ce": 0.00018642100621946156, + "loss_iou": 0.283203125, + "loss_num": 0.0194091796875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 241359508, + "step": 4309 + }, + { + "epoch": 9.599109131403118, + "grad_norm": 23.472171783447266, + "learning_rate": 1e-06, + "loss": 0.7445, + "num_input_tokens_seen": 241417160, + "step": 4310 + }, + { + "epoch": 9.599109131403118, + "loss": 0.797520101070404, + "loss_ce": 0.00015685184916947037, + "loss_iou": 0.3359375, + "loss_num": 0.0247802734375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 241417160, + "step": 4310 + }, + { + "epoch": 9.601336302895323, + "grad_norm": 29.62855339050293, + "learning_rate": 1e-06, + "loss": 0.7193, + "num_input_tokens_seen": 241473892, + "step": 4311 + }, + { + "epoch": 9.601336302895323, + "loss": 0.7771183848381042, + "loss_ce": 0.00014080063556320965, + "loss_iou": 0.33203125, + "loss_num": 0.0230712890625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 241473892, + "step": 4311 + }, + { + "epoch": 9.603563474387528, + "grad_norm": 19.706748962402344, + "learning_rate": 1e-06, + "loss": 0.6076, + "num_input_tokens_seen": 241532188, + "step": 4312 + }, + { + "epoch": 9.603563474387528, + "loss": 0.7907249927520752, + "loss_ce": 0.00031976206810213625, + "loss_iou": 0.333984375, + "loss_num": 0.0250244140625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 241532188, + "step": 4312 + }, + { + "epoch": 9.605790645879733, + "grad_norm": 19.344236373901367, + "learning_rate": 1e-06, + "loss": 0.565, + "num_input_tokens_seen": 241587928, + "step": 4313 + }, + { + "epoch": 9.605790645879733, + "loss": 0.6804521083831787, + "loss_ce": 0.0001542992249596864, + "loss_iou": 0.30078125, + "loss_num": 0.01556396484375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 241587928, + "step": 4313 + }, + { + "epoch": 9.608017817371937, + "grad_norm": 15.50113582611084, + "learning_rate": 1e-06, + "loss": 0.399, + "num_input_tokens_seen": 241640704, + "step": 4314 + }, + { + "epoch": 9.608017817371937, + "loss": 0.44986775517463684, + "loss_ce": 0.00013020676851738244, + "loss_iou": 0.2041015625, + "loss_num": 0.00836181640625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 241640704, + "step": 4314 + }, + { + "epoch": 9.610244988864142, + "grad_norm": 36.39356994628906, + "learning_rate": 1e-06, + "loss": 0.559, + "num_input_tokens_seen": 241693484, + "step": 4315 + }, + { + "epoch": 9.610244988864142, + "loss": 0.7055351138114929, + "loss_ce": 0.000456988753285259, + "loss_iou": 0.28125, + "loss_num": 0.028076171875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 241693484, + "step": 4315 + }, + { + "epoch": 9.612472160356347, + "grad_norm": 16.385406494140625, + "learning_rate": 1e-06, + "loss": 0.6239, + "num_input_tokens_seen": 241747860, + "step": 4316 + }, + { + "epoch": 9.612472160356347, + "loss": 0.7735809087753296, + "loss_ce": 0.0005096091772429645, + "loss_iou": 0.345703125, + "loss_num": 0.0166015625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 241747860, + "step": 4316 + }, + { + "epoch": 9.614699331848552, + "grad_norm": 18.57633399963379, + "learning_rate": 1e-06, + "loss": 0.6124, + "num_input_tokens_seen": 241802996, + "step": 4317 + }, + { + "epoch": 9.614699331848552, + "loss": 0.6089174747467041, + "loss_ce": 0.0001528042630525306, + "loss_iou": 0.2578125, + "loss_num": 0.0189208984375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 241802996, + "step": 4317 + }, + { + "epoch": 9.616926503340757, + "grad_norm": 17.626914978027344, + "learning_rate": 1e-06, + "loss": 0.4423, + "num_input_tokens_seen": 241857684, + "step": 4318 + }, + { + "epoch": 9.616926503340757, + "loss": 0.41496047377586365, + "loss_ce": 0.0001655758242122829, + "loss_iou": 0.1796875, + "loss_num": 0.01092529296875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 241857684, + "step": 4318 + }, + { + "epoch": 9.619153674832962, + "grad_norm": 23.88527488708496, + "learning_rate": 1e-06, + "loss": 0.6528, + "num_input_tokens_seen": 241911896, + "step": 4319 + }, + { + "epoch": 9.619153674832962, + "loss": 0.8219267129898071, + "loss_ce": 0.00014938012463971972, + "loss_iou": 0.34765625, + "loss_num": 0.0250244140625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 241911896, + "step": 4319 + }, + { + "epoch": 9.621380846325167, + "grad_norm": 14.68041706085205, + "learning_rate": 1e-06, + "loss": 0.3875, + "num_input_tokens_seen": 241969944, + "step": 4320 + }, + { + "epoch": 9.621380846325167, + "loss": 0.4286039471626282, + "loss_ce": 0.00013716876856051385, + "loss_iou": 0.1865234375, + "loss_num": 0.01116943359375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 241969944, + "step": 4320 + }, + { + "epoch": 9.623608017817372, + "grad_norm": 24.28359603881836, + "learning_rate": 1e-06, + "loss": 0.6069, + "num_input_tokens_seen": 242029492, + "step": 4321 + }, + { + "epoch": 9.623608017817372, + "loss": 0.6774266958236694, + "loss_ce": 0.00018064204778056592, + "loss_iou": 0.318359375, + "loss_num": 0.00811767578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 242029492, + "step": 4321 + }, + { + "epoch": 9.625835189309576, + "grad_norm": 22.979413986206055, + "learning_rate": 1e-06, + "loss": 0.6953, + "num_input_tokens_seen": 242084892, + "step": 4322 + }, + { + "epoch": 9.625835189309576, + "loss": 0.7274185419082642, + "loss_ce": 0.0001846232480602339, + "loss_iou": 0.31640625, + "loss_num": 0.0189208984375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 242084892, + "step": 4322 + }, + { + "epoch": 9.628062360801781, + "grad_norm": 17.841794967651367, + "learning_rate": 1e-06, + "loss": 0.468, + "num_input_tokens_seen": 242139172, + "step": 4323 + }, + { + "epoch": 9.628062360801781, + "loss": 0.5101624131202698, + "loss_ce": 0.00015266051923390478, + "loss_iou": 0.2314453125, + "loss_num": 0.00958251953125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 242139172, + "step": 4323 + }, + { + "epoch": 9.630289532293986, + "grad_norm": 20.742664337158203, + "learning_rate": 1e-06, + "loss": 0.517, + "num_input_tokens_seen": 242196196, + "step": 4324 + }, + { + "epoch": 9.630289532293986, + "loss": 0.5766589045524597, + "loss_ce": 0.0008532421197742224, + "loss_iou": 0.2373046875, + "loss_num": 0.020263671875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 242196196, + "step": 4324 + }, + { + "epoch": 9.632516703786191, + "grad_norm": 60.28476333618164, + "learning_rate": 1e-06, + "loss": 0.6853, + "num_input_tokens_seen": 242251812, + "step": 4325 + }, + { + "epoch": 9.632516703786191, + "loss": 0.5879150629043579, + "loss_ce": 0.00014649088552687317, + "loss_iou": 0.2470703125, + "loss_num": 0.0185546875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 242251812, + "step": 4325 + }, + { + "epoch": 9.634743875278396, + "grad_norm": 66.29544067382812, + "learning_rate": 1e-06, + "loss": 0.6181, + "num_input_tokens_seen": 242308236, + "step": 4326 + }, + { + "epoch": 9.634743875278396, + "loss": 0.5924375057220459, + "loss_ce": 0.00015232324949465692, + "loss_iou": 0.232421875, + "loss_num": 0.0255126953125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 242308236, + "step": 4326 + }, + { + "epoch": 9.6369710467706, + "grad_norm": 14.150559425354004, + "learning_rate": 1e-06, + "loss": 0.4069, + "num_input_tokens_seen": 242364656, + "step": 4327 + }, + { + "epoch": 9.6369710467706, + "loss": 0.4071110486984253, + "loss_ce": 0.00012862987932749093, + "loss_iou": 0.1826171875, + "loss_num": 0.00848388671875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 242364656, + "step": 4327 + }, + { + "epoch": 9.639198218262806, + "grad_norm": 21.00891876220703, + "learning_rate": 1e-06, + "loss": 0.6717, + "num_input_tokens_seen": 242422764, + "step": 4328 + }, + { + "epoch": 9.639198218262806, + "loss": 0.8691731691360474, + "loss_ce": 0.0029012321028858423, + "loss_iou": 0.345703125, + "loss_num": 0.03466796875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 242422764, + "step": 4328 + }, + { + "epoch": 9.64142538975501, + "grad_norm": 21.65594482421875, + "learning_rate": 1e-06, + "loss": 0.5172, + "num_input_tokens_seen": 242480756, + "step": 4329 + }, + { + "epoch": 9.64142538975501, + "loss": 0.5980610251426697, + "loss_ce": 0.00016064970986917615, + "loss_iou": 0.2451171875, + "loss_num": 0.0218505859375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 242480756, + "step": 4329 + }, + { + "epoch": 9.643652561247215, + "grad_norm": 19.103317260742188, + "learning_rate": 1e-06, + "loss": 0.7951, + "num_input_tokens_seen": 242536328, + "step": 4330 + }, + { + "epoch": 9.643652561247215, + "loss": 0.6043500900268555, + "loss_ce": 0.00022415511193685234, + "loss_iou": 0.25, + "loss_num": 0.0203857421875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 242536328, + "step": 4330 + }, + { + "epoch": 9.64587973273942, + "grad_norm": 18.600261688232422, + "learning_rate": 1e-06, + "loss": 0.6719, + "num_input_tokens_seen": 242594076, + "step": 4331 + }, + { + "epoch": 9.64587973273942, + "loss": 0.7887901663780212, + "loss_ce": 0.00021596970327664167, + "loss_iou": 0.3203125, + "loss_num": 0.0299072265625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 242594076, + "step": 4331 + }, + { + "epoch": 9.648106904231625, + "grad_norm": 23.29888343811035, + "learning_rate": 1e-06, + "loss": 0.5155, + "num_input_tokens_seen": 242647136, + "step": 4332 + }, + { + "epoch": 9.648106904231625, + "loss": 0.44800740480422974, + "loss_ce": 0.00013143845717422664, + "loss_iou": 0.185546875, + "loss_num": 0.015380859375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 242647136, + "step": 4332 + }, + { + "epoch": 9.65033407572383, + "grad_norm": 24.5333194732666, + "learning_rate": 1e-06, + "loss": 0.5333, + "num_input_tokens_seen": 242700824, + "step": 4333 + }, + { + "epoch": 9.65033407572383, + "loss": 0.5556710958480835, + "loss_ce": 0.00012911748490296304, + "loss_iou": 0.25, + "loss_num": 0.0108642578125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 242700824, + "step": 4333 + }, + { + "epoch": 9.652561247216035, + "grad_norm": 17.157747268676758, + "learning_rate": 1e-06, + "loss": 0.7577, + "num_input_tokens_seen": 242759000, + "step": 4334 + }, + { + "epoch": 9.652561247216035, + "loss": 0.7171846628189087, + "loss_ce": 0.00014364052913151681, + "loss_iou": 0.31640625, + "loss_num": 0.0172119140625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 242759000, + "step": 4334 + }, + { + "epoch": 9.654788418708241, + "grad_norm": 26.958030700683594, + "learning_rate": 1e-06, + "loss": 0.5635, + "num_input_tokens_seen": 242814612, + "step": 4335 + }, + { + "epoch": 9.654788418708241, + "loss": 0.517261266708374, + "loss_ce": 0.00017140991985797882, + "loss_iou": 0.234375, + "loss_num": 0.0096435546875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 242814612, + "step": 4335 + }, + { + "epoch": 9.657015590200446, + "grad_norm": 13.9208345413208, + "learning_rate": 1e-06, + "loss": 0.5748, + "num_input_tokens_seen": 242871532, + "step": 4336 + }, + { + "epoch": 9.657015590200446, + "loss": 0.7538334131240845, + "loss_ce": 0.00017131002095993608, + "loss_iou": 0.283203125, + "loss_num": 0.037109375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 242871532, + "step": 4336 + }, + { + "epoch": 9.659242761692651, + "grad_norm": 19.94957733154297, + "learning_rate": 1e-06, + "loss": 0.5622, + "num_input_tokens_seen": 242927132, + "step": 4337 + }, + { + "epoch": 9.659242761692651, + "loss": 0.6652075052261353, + "loss_ce": 0.00016842935292515904, + "loss_iou": 0.2890625, + "loss_num": 0.0169677734375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 242927132, + "step": 4337 + }, + { + "epoch": 9.661469933184856, + "grad_norm": 18.527286529541016, + "learning_rate": 1e-06, + "loss": 0.7037, + "num_input_tokens_seen": 242980644, + "step": 4338 + }, + { + "epoch": 9.661469933184856, + "loss": 0.798882007598877, + "loss_ce": 0.00017601504805497825, + "loss_iou": 0.341796875, + "loss_num": 0.0234375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 242980644, + "step": 4338 + }, + { + "epoch": 9.66369710467706, + "grad_norm": 24.58388328552246, + "learning_rate": 1e-06, + "loss": 0.623, + "num_input_tokens_seen": 243034252, + "step": 4339 + }, + { + "epoch": 9.66369710467706, + "loss": 0.56207674741745, + "loss_ce": 0.0003091880353167653, + "loss_iou": 0.2578125, + "loss_num": 0.00958251953125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 243034252, + "step": 4339 + }, + { + "epoch": 9.665924276169266, + "grad_norm": 22.670303344726562, + "learning_rate": 1e-06, + "loss": 0.6743, + "num_input_tokens_seen": 243091100, + "step": 4340 + }, + { + "epoch": 9.665924276169266, + "loss": 0.5509505867958069, + "loss_ce": 0.00016935347230173647, + "loss_iou": 0.251953125, + "loss_num": 0.0093994140625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 243091100, + "step": 4340 + }, + { + "epoch": 9.66815144766147, + "grad_norm": 24.056676864624023, + "learning_rate": 1e-06, + "loss": 0.7283, + "num_input_tokens_seen": 243144624, + "step": 4341 + }, + { + "epoch": 9.66815144766147, + "loss": 0.876803994178772, + "loss_ce": 0.0002171122032450512, + "loss_iou": 0.375, + "loss_num": 0.0255126953125, + "loss_xval": 0.875, + "num_input_tokens_seen": 243144624, + "step": 4341 + }, + { + "epoch": 9.670378619153675, + "grad_norm": 24.195653915405273, + "learning_rate": 1e-06, + "loss": 0.5525, + "num_input_tokens_seen": 243202736, + "step": 4342 + }, + { + "epoch": 9.670378619153675, + "loss": 0.5127952098846436, + "loss_ce": 0.00022197004000190645, + "loss_iou": 0.2158203125, + "loss_num": 0.0162353515625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 243202736, + "step": 4342 + }, + { + "epoch": 9.67260579064588, + "grad_norm": 20.252214431762695, + "learning_rate": 1e-06, + "loss": 0.5979, + "num_input_tokens_seen": 243260404, + "step": 4343 + }, + { + "epoch": 9.67260579064588, + "loss": 0.569305956363678, + "loss_ce": 0.00021413374633993953, + "loss_iou": 0.2421875, + "loss_num": 0.01708984375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 243260404, + "step": 4343 + }, + { + "epoch": 9.674832962138085, + "grad_norm": 22.576656341552734, + "learning_rate": 1e-06, + "loss": 0.6199, + "num_input_tokens_seen": 243315828, + "step": 4344 + }, + { + "epoch": 9.674832962138085, + "loss": 0.7008746266365051, + "loss_ce": 0.00019105095998384058, + "loss_iou": 0.302734375, + "loss_num": 0.0194091796875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 243315828, + "step": 4344 + }, + { + "epoch": 9.67706013363029, + "grad_norm": 23.109046936035156, + "learning_rate": 1e-06, + "loss": 0.6222, + "num_input_tokens_seen": 243371304, + "step": 4345 + }, + { + "epoch": 9.67706013363029, + "loss": 0.6012382507324219, + "loss_ce": 0.0001640522968955338, + "loss_iou": 0.265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 243371304, + "step": 4345 + }, + { + "epoch": 9.679287305122495, + "grad_norm": 17.9921875, + "learning_rate": 1e-06, + "loss": 0.6403, + "num_input_tokens_seen": 243421812, + "step": 4346 + }, + { + "epoch": 9.679287305122495, + "loss": 0.7237908840179443, + "loss_ce": 0.00015808289754204452, + "loss_iou": 0.30859375, + "loss_num": 0.0216064453125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 243421812, + "step": 4346 + }, + { + "epoch": 9.6815144766147, + "grad_norm": 22.943727493286133, + "learning_rate": 1e-06, + "loss": 0.5186, + "num_input_tokens_seen": 243479024, + "step": 4347 + }, + { + "epoch": 9.6815144766147, + "loss": 0.4597442150115967, + "loss_ce": 0.0014922881964594126, + "loss_iou": 0.2021484375, + "loss_num": 0.0107421875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 243479024, + "step": 4347 + }, + { + "epoch": 9.683741648106905, + "grad_norm": 24.374197006225586, + "learning_rate": 1e-06, + "loss": 0.6149, + "num_input_tokens_seen": 243535892, + "step": 4348 + }, + { + "epoch": 9.683741648106905, + "loss": 0.7361659407615662, + "loss_ce": 0.00020402228983584791, + "loss_iou": 0.3125, + "loss_num": 0.0223388671875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 243535892, + "step": 4348 + }, + { + "epoch": 9.68596881959911, + "grad_norm": 22.339231491088867, + "learning_rate": 1e-06, + "loss": 0.735, + "num_input_tokens_seen": 243591976, + "step": 4349 + }, + { + "epoch": 9.68596881959911, + "loss": 0.8295117020606995, + "loss_ce": 0.00016598097863607109, + "loss_iou": 0.373046875, + "loss_num": 0.016845703125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 243591976, + "step": 4349 + }, + { + "epoch": 9.688195991091314, + "grad_norm": 14.869282722473145, + "learning_rate": 1e-06, + "loss": 0.4473, + "num_input_tokens_seen": 243648860, + "step": 4350 + }, + { + "epoch": 9.688195991091314, + "loss": 0.5270088911056519, + "loss_ce": 0.00015341459948103875, + "loss_iou": 0.240234375, + "loss_num": 0.00921630859375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 243648860, + "step": 4350 + }, + { + "epoch": 9.690423162583519, + "grad_norm": 18.157405853271484, + "learning_rate": 1e-06, + "loss": 0.6345, + "num_input_tokens_seen": 243705736, + "step": 4351 + }, + { + "epoch": 9.690423162583519, + "loss": 0.6656748056411743, + "loss_ce": 0.0001474730233894661, + "loss_iou": 0.27734375, + "loss_num": 0.0220947265625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 243705736, + "step": 4351 + }, + { + "epoch": 9.692650334075724, + "grad_norm": 20.70793342590332, + "learning_rate": 1e-06, + "loss": 0.6055, + "num_input_tokens_seen": 243763076, + "step": 4352 + }, + { + "epoch": 9.692650334075724, + "loss": 0.6417899131774902, + "loss_ce": 0.0001883807562990114, + "loss_iou": 0.28125, + "loss_num": 0.015625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 243763076, + "step": 4352 + }, + { + "epoch": 9.694877505567929, + "grad_norm": 16.69988441467285, + "learning_rate": 1e-06, + "loss": 0.4398, + "num_input_tokens_seen": 243819884, + "step": 4353 + }, + { + "epoch": 9.694877505567929, + "loss": 0.4072994291782379, + "loss_ce": 0.00019493894069455564, + "loss_iou": 0.1796875, + "loss_num": 0.009521484375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 243819884, + "step": 4353 + }, + { + "epoch": 9.697104677060134, + "grad_norm": 18.394502639770508, + "learning_rate": 1e-06, + "loss": 0.709, + "num_input_tokens_seen": 243874696, + "step": 4354 + }, + { + "epoch": 9.697104677060134, + "loss": 0.7145628333091736, + "loss_ce": 0.00020736586884595454, + "loss_iou": 0.3125, + "loss_num": 0.017822265625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 243874696, + "step": 4354 + }, + { + "epoch": 9.699331848552339, + "grad_norm": 25.20227813720703, + "learning_rate": 1e-06, + "loss": 0.6424, + "num_input_tokens_seen": 243930484, + "step": 4355 + }, + { + "epoch": 9.699331848552339, + "loss": 0.6150010228157043, + "loss_ce": 0.00013285188470035791, + "loss_iou": 0.267578125, + "loss_num": 0.0162353515625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 243930484, + "step": 4355 + }, + { + "epoch": 9.701559020044543, + "grad_norm": 21.711627960205078, + "learning_rate": 1e-06, + "loss": 0.6181, + "num_input_tokens_seen": 243988028, + "step": 4356 + }, + { + "epoch": 9.701559020044543, + "loss": 0.7733531594276428, + "loss_ce": 0.00015979193267412484, + "loss_iou": 0.3359375, + "loss_num": 0.0206298828125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 243988028, + "step": 4356 + }, + { + "epoch": 9.703786191536748, + "grad_norm": 23.96913719177246, + "learning_rate": 1e-06, + "loss": 0.5364, + "num_input_tokens_seen": 244042796, + "step": 4357 + }, + { + "epoch": 9.703786191536748, + "loss": 0.43648862838745117, + "loss_ce": 0.00020933088671881706, + "loss_iou": 0.1904296875, + "loss_num": 0.01123046875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 244042796, + "step": 4357 + }, + { + "epoch": 9.706013363028953, + "grad_norm": 16.607881546020508, + "learning_rate": 1e-06, + "loss": 0.7013, + "num_input_tokens_seen": 244100220, + "step": 4358 + }, + { + "epoch": 9.706013363028953, + "loss": 0.651787281036377, + "loss_ce": 0.00017597780970390886, + "loss_iou": 0.302734375, + "loss_num": 0.00897216796875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 244100220, + "step": 4358 + }, + { + "epoch": 9.708240534521158, + "grad_norm": 17.134170532226562, + "learning_rate": 1e-06, + "loss": 0.596, + "num_input_tokens_seen": 244158116, + "step": 4359 + }, + { + "epoch": 9.708240534521158, + "loss": 0.6266348958015442, + "loss_ce": 0.0001700021093711257, + "loss_iou": 0.271484375, + "loss_num": 0.0167236328125, + "loss_xval": 0.625, + "num_input_tokens_seen": 244158116, + "step": 4359 + }, + { + "epoch": 9.710467706013363, + "grad_norm": 18.850582122802734, + "learning_rate": 1e-06, + "loss": 0.4285, + "num_input_tokens_seen": 244214688, + "step": 4360 + }, + { + "epoch": 9.710467706013363, + "loss": 0.4430268406867981, + "loss_ce": 0.0001557298528496176, + "loss_iou": 0.1943359375, + "loss_num": 0.0106201171875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 244214688, + "step": 4360 + }, + { + "epoch": 9.712694877505568, + "grad_norm": 15.182866096496582, + "learning_rate": 1e-06, + "loss": 0.461, + "num_input_tokens_seen": 244272276, + "step": 4361 + }, + { + "epoch": 9.712694877505568, + "loss": 0.4096102714538574, + "loss_ce": 0.00018646058742888272, + "loss_iou": 0.1826171875, + "loss_num": 0.0087890625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 244272276, + "step": 4361 + }, + { + "epoch": 9.714922048997773, + "grad_norm": 21.032060623168945, + "learning_rate": 1e-06, + "loss": 0.5245, + "num_input_tokens_seen": 244326084, + "step": 4362 + }, + { + "epoch": 9.714922048997773, + "loss": 0.5369995832443237, + "loss_ce": 0.00013436358131002635, + "loss_iou": 0.2353515625, + "loss_num": 0.01318359375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 244326084, + "step": 4362 + }, + { + "epoch": 9.717149220489977, + "grad_norm": 16.875015258789062, + "learning_rate": 1e-06, + "loss": 0.7523, + "num_input_tokens_seen": 244383708, + "step": 4363 + }, + { + "epoch": 9.717149220489977, + "loss": 0.8245912790298462, + "loss_ce": 0.00025050187832675874, + "loss_iou": 0.3203125, + "loss_num": 0.036865234375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 244383708, + "step": 4363 + }, + { + "epoch": 9.719376391982182, + "grad_norm": 15.091886520385742, + "learning_rate": 1e-06, + "loss": 0.4595, + "num_input_tokens_seen": 244436700, + "step": 4364 + }, + { + "epoch": 9.719376391982182, + "loss": 0.2986956834793091, + "loss_ce": 0.00053892977302894, + "loss_iou": 0.11376953125, + "loss_num": 0.01416015625, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 244436700, + "step": 4364 + }, + { + "epoch": 9.721603563474387, + "grad_norm": 36.89336013793945, + "learning_rate": 1e-06, + "loss": 0.6042, + "num_input_tokens_seen": 244492464, + "step": 4365 + }, + { + "epoch": 9.721603563474387, + "loss": 0.7046360373497009, + "loss_ce": 0.0010227648308500648, + "loss_iou": 0.30078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 244492464, + "step": 4365 + }, + { + "epoch": 9.723830734966592, + "grad_norm": 21.095430374145508, + "learning_rate": 1e-06, + "loss": 0.7696, + "num_input_tokens_seen": 244548864, + "step": 4366 + }, + { + "epoch": 9.723830734966592, + "loss": 0.7162793874740601, + "loss_ce": 0.0007032152498140931, + "loss_iou": 0.318359375, + "loss_num": 0.0152587890625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 244548864, + "step": 4366 + }, + { + "epoch": 9.726057906458797, + "grad_norm": 17.61524772644043, + "learning_rate": 1e-06, + "loss": 0.4847, + "num_input_tokens_seen": 244602692, + "step": 4367 + }, + { + "epoch": 9.726057906458797, + "loss": 0.505713939666748, + "loss_ce": 0.00022077930043451488, + "loss_iou": 0.23046875, + "loss_num": 0.00885009765625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 244602692, + "step": 4367 + }, + { + "epoch": 9.728285077951002, + "grad_norm": 23.061426162719727, + "learning_rate": 1e-06, + "loss": 0.519, + "num_input_tokens_seen": 244657716, + "step": 4368 + }, + { + "epoch": 9.728285077951002, + "loss": 0.48245200514793396, + "loss_ce": 0.00015220226487144828, + "loss_iou": 0.208984375, + "loss_num": 0.0128173828125, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 244657716, + "step": 4368 + }, + { + "epoch": 9.730512249443207, + "grad_norm": 19.16279411315918, + "learning_rate": 1e-06, + "loss": 0.5937, + "num_input_tokens_seen": 244711224, + "step": 4369 + }, + { + "epoch": 9.730512249443207, + "loss": 0.7714163661003113, + "loss_ce": 0.0001761687162797898, + "loss_iou": 0.333984375, + "loss_num": 0.0203857421875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 244711224, + "step": 4369 + }, + { + "epoch": 9.732739420935411, + "grad_norm": 17.79773712158203, + "learning_rate": 1e-06, + "loss": 0.5996, + "num_input_tokens_seen": 244765972, + "step": 4370 + }, + { + "epoch": 9.732739420935411, + "loss": 0.5670474171638489, + "loss_ce": 0.00015290510782506317, + "loss_iou": 0.244140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 244765972, + "step": 4370 + }, + { + "epoch": 9.734966592427616, + "grad_norm": 16.366491317749023, + "learning_rate": 1e-06, + "loss": 0.6912, + "num_input_tokens_seen": 244822468, + "step": 4371 + }, + { + "epoch": 9.734966592427616, + "loss": 0.6842033863067627, + "loss_ce": 0.0001213748037116602, + "loss_iou": 0.3046875, + "loss_num": 0.01470947265625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 244822468, + "step": 4371 + }, + { + "epoch": 9.737193763919821, + "grad_norm": 21.454408645629883, + "learning_rate": 1e-06, + "loss": 0.4793, + "num_input_tokens_seen": 244880364, + "step": 4372 + }, + { + "epoch": 9.737193763919821, + "loss": 0.38819241523742676, + "loss_ce": 0.00013089305139146745, + "loss_iou": 0.177734375, + "loss_num": 0.006561279296875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 244880364, + "step": 4372 + }, + { + "epoch": 9.739420935412026, + "grad_norm": 52.516292572021484, + "learning_rate": 1e-06, + "loss": 0.4779, + "num_input_tokens_seen": 244935344, + "step": 4373 + }, + { + "epoch": 9.739420935412026, + "loss": 0.4206976294517517, + "loss_ce": 0.0007757844286970794, + "loss_iou": 0.162109375, + "loss_num": 0.0194091796875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 244935344, + "step": 4373 + }, + { + "epoch": 9.74164810690423, + "grad_norm": 12.701075553894043, + "learning_rate": 1e-06, + "loss": 0.5218, + "num_input_tokens_seen": 244991972, + "step": 4374 + }, + { + "epoch": 9.74164810690423, + "loss": 0.381612628698349, + "loss_ce": 0.00014290204853750765, + "loss_iou": 0.17578125, + "loss_num": 0.006072998046875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 244991972, + "step": 4374 + }, + { + "epoch": 9.743875278396436, + "grad_norm": 20.51826286315918, + "learning_rate": 1e-06, + "loss": 0.5475, + "num_input_tokens_seen": 245048956, + "step": 4375 + }, + { + "epoch": 9.743875278396436, + "loss": 0.5812922120094299, + "loss_ce": 0.00029854726744815707, + "loss_iou": 0.248046875, + "loss_num": 0.01708984375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 245048956, + "step": 4375 + }, + { + "epoch": 9.74610244988864, + "grad_norm": 19.916961669921875, + "learning_rate": 1e-06, + "loss": 0.5741, + "num_input_tokens_seen": 245104028, + "step": 4376 + }, + { + "epoch": 9.74610244988864, + "loss": 0.4663507342338562, + "loss_ce": 0.0007135132909752429, + "loss_iou": 0.1845703125, + "loss_num": 0.01904296875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 245104028, + "step": 4376 + }, + { + "epoch": 9.748329621380847, + "grad_norm": 18.798843383789062, + "learning_rate": 1e-06, + "loss": 0.7112, + "num_input_tokens_seen": 245161592, + "step": 4377 + }, + { + "epoch": 9.748329621380847, + "loss": 0.5074524283409119, + "loss_ce": 0.00012819524272345006, + "loss_iou": 0.2255859375, + "loss_num": 0.01129150390625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 245161592, + "step": 4377 + }, + { + "epoch": 9.750556792873052, + "grad_norm": 14.465723991394043, + "learning_rate": 1e-06, + "loss": 0.4476, + "num_input_tokens_seen": 245218120, + "step": 4378 + }, + { + "epoch": 9.750556792873052, + "loss": 0.5026047825813293, + "loss_ce": 0.00016336789121851325, + "loss_iou": 0.19921875, + "loss_num": 0.020751953125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 245218120, + "step": 4378 + }, + { + "epoch": 9.752783964365257, + "grad_norm": 148.81324768066406, + "learning_rate": 1e-06, + "loss": 0.4971, + "num_input_tokens_seen": 245276528, + "step": 4379 + }, + { + "epoch": 9.752783964365257, + "loss": 0.5717120170593262, + "loss_ce": 0.00017881978419609368, + "loss_iou": 0.255859375, + "loss_num": 0.01214599609375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 245276528, + "step": 4379 + }, + { + "epoch": 9.755011135857462, + "grad_norm": 22.322999954223633, + "learning_rate": 1e-06, + "loss": 0.8127, + "num_input_tokens_seen": 245332196, + "step": 4380 + }, + { + "epoch": 9.755011135857462, + "loss": 0.8225026726722717, + "loss_ce": 0.0002370491565670818, + "loss_iou": 0.359375, + "loss_num": 0.0211181640625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 245332196, + "step": 4380 + }, + { + "epoch": 9.757238307349667, + "grad_norm": 15.783463478088379, + "learning_rate": 1e-06, + "loss": 0.5557, + "num_input_tokens_seen": 245386960, + "step": 4381 + }, + { + "epoch": 9.757238307349667, + "loss": 0.670534610748291, + "loss_ce": 0.0001244655722985044, + "loss_iou": 0.2890625, + "loss_num": 0.018798828125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 245386960, + "step": 4381 + }, + { + "epoch": 9.759465478841872, + "grad_norm": 16.41698455810547, + "learning_rate": 1e-06, + "loss": 0.3972, + "num_input_tokens_seen": 245439152, + "step": 4382 + }, + { + "epoch": 9.759465478841872, + "loss": 0.38251131772994995, + "loss_ce": 0.00012604420771822333, + "loss_iou": 0.166015625, + "loss_num": 0.0098876953125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 245439152, + "step": 4382 + }, + { + "epoch": 9.761692650334076, + "grad_norm": 21.44931983947754, + "learning_rate": 1e-06, + "loss": 0.6739, + "num_input_tokens_seen": 245498164, + "step": 4383 + }, + { + "epoch": 9.761692650334076, + "loss": 0.76970374584198, + "loss_ce": 0.0001725103793432936, + "loss_iou": 0.3203125, + "loss_num": 0.0255126953125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 245498164, + "step": 4383 + }, + { + "epoch": 9.763919821826281, + "grad_norm": 19.587390899658203, + "learning_rate": 1e-06, + "loss": 0.4272, + "num_input_tokens_seen": 245552244, + "step": 4384 + }, + { + "epoch": 9.763919821826281, + "loss": 0.4954010248184204, + "loss_ce": 0.0001617551752133295, + "loss_iou": 0.21484375, + "loss_num": 0.0130615234375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 245552244, + "step": 4384 + }, + { + "epoch": 9.766146993318486, + "grad_norm": 16.423864364624023, + "learning_rate": 1e-06, + "loss": 0.5937, + "num_input_tokens_seen": 245607924, + "step": 4385 + }, + { + "epoch": 9.766146993318486, + "loss": 0.659552276134491, + "loss_ce": 0.0001284128229599446, + "loss_iou": 0.2734375, + "loss_num": 0.0224609375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 245607924, + "step": 4385 + }, + { + "epoch": 9.768374164810691, + "grad_norm": 17.927560806274414, + "learning_rate": 1e-06, + "loss": 0.5699, + "num_input_tokens_seen": 245662504, + "step": 4386 + }, + { + "epoch": 9.768374164810691, + "loss": 0.5620256662368774, + "loss_ce": 0.00013603617844637483, + "loss_iou": 0.2314453125, + "loss_num": 0.01953125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 245662504, + "step": 4386 + }, + { + "epoch": 9.770601336302896, + "grad_norm": 21.027870178222656, + "learning_rate": 1e-06, + "loss": 0.6231, + "num_input_tokens_seen": 245719168, + "step": 4387 + }, + { + "epoch": 9.770601336302896, + "loss": 0.7491781711578369, + "loss_ce": 0.00015475715918000787, + "loss_iou": 0.310546875, + "loss_num": 0.025634765625, + "loss_xval": 0.75, + "num_input_tokens_seen": 245719168, + "step": 4387 + }, + { + "epoch": 9.7728285077951, + "grad_norm": 209.35565185546875, + "learning_rate": 1e-06, + "loss": 0.7288, + "num_input_tokens_seen": 245773520, + "step": 4388 + }, + { + "epoch": 9.7728285077951, + "loss": 0.900350034236908, + "loss_ce": 0.00020351973944343626, + "loss_iou": 0.34375, + "loss_num": 0.042236328125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 245773520, + "step": 4388 + }, + { + "epoch": 9.775055679287306, + "grad_norm": 15.992069244384766, + "learning_rate": 1e-06, + "loss": 0.6249, + "num_input_tokens_seen": 245829068, + "step": 4389 + }, + { + "epoch": 9.775055679287306, + "loss": 0.4481305480003357, + "loss_ce": 0.0002545609895605594, + "loss_iou": 0.1943359375, + "loss_num": 0.01177978515625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 245829068, + "step": 4389 + }, + { + "epoch": 9.77728285077951, + "grad_norm": 65.46475219726562, + "learning_rate": 1e-06, + "loss": 0.5634, + "num_input_tokens_seen": 245885128, + "step": 4390 + }, + { + "epoch": 9.77728285077951, + "loss": 0.5616711378097534, + "loss_ce": 0.00014769005065318197, + "loss_iou": 0.2421875, + "loss_num": 0.01519775390625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 245885128, + "step": 4390 + }, + { + "epoch": 9.779510022271715, + "grad_norm": 23.950044631958008, + "learning_rate": 1e-06, + "loss": 0.5287, + "num_input_tokens_seen": 245939168, + "step": 4391 + }, + { + "epoch": 9.779510022271715, + "loss": 0.6016286611557007, + "loss_ce": 0.0003102673217654228, + "loss_iou": 0.2470703125, + "loss_num": 0.021484375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 245939168, + "step": 4391 + }, + { + "epoch": 9.78173719376392, + "grad_norm": 13.293421745300293, + "learning_rate": 1e-06, + "loss": 0.4483, + "num_input_tokens_seen": 245996116, + "step": 4392 + }, + { + "epoch": 9.78173719376392, + "loss": 0.439850389957428, + "loss_ce": 0.00015313076437450945, + "loss_iou": 0.1806640625, + "loss_num": 0.015869140625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 245996116, + "step": 4392 + }, + { + "epoch": 9.783964365256125, + "grad_norm": 27.452177047729492, + "learning_rate": 1e-06, + "loss": 0.5466, + "num_input_tokens_seen": 246051320, + "step": 4393 + }, + { + "epoch": 9.783964365256125, + "loss": 0.5230746269226074, + "loss_ce": 0.00012541524483822286, + "loss_iou": 0.2373046875, + "loss_num": 0.00958251953125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 246051320, + "step": 4393 + }, + { + "epoch": 9.78619153674833, + "grad_norm": 18.695438385009766, + "learning_rate": 1e-06, + "loss": 0.6525, + "num_input_tokens_seen": 246107564, + "step": 4394 + }, + { + "epoch": 9.78619153674833, + "loss": 0.6207006573677063, + "loss_ce": 0.0002172729728044942, + "loss_iou": 0.263671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 246107564, + "step": 4394 + }, + { + "epoch": 9.788418708240535, + "grad_norm": 16.555150985717773, + "learning_rate": 1e-06, + "loss": 0.6315, + "num_input_tokens_seen": 246166380, + "step": 4395 + }, + { + "epoch": 9.788418708240535, + "loss": 0.6279211044311523, + "loss_ce": 0.00017448890139348805, + "loss_iou": 0.2421875, + "loss_num": 0.02880859375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 246166380, + "step": 4395 + }, + { + "epoch": 9.79064587973274, + "grad_norm": 13.74927043914795, + "learning_rate": 1e-06, + "loss": 0.7028, + "num_input_tokens_seen": 246222136, + "step": 4396 + }, + { + "epoch": 9.79064587973274, + "loss": 0.8214117288589478, + "loss_ce": 0.00036682139034383, + "loss_iou": 0.3359375, + "loss_num": 0.0301513671875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 246222136, + "step": 4396 + }, + { + "epoch": 9.792873051224944, + "grad_norm": 17.71005630493164, + "learning_rate": 1e-06, + "loss": 0.5901, + "num_input_tokens_seen": 246278428, + "step": 4397 + }, + { + "epoch": 9.792873051224944, + "loss": 0.41447368264198303, + "loss_ce": 0.00016704711015336215, + "loss_iou": 0.1884765625, + "loss_num": 0.007537841796875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 246278428, + "step": 4397 + }, + { + "epoch": 9.79510022271715, + "grad_norm": 13.60590934753418, + "learning_rate": 1e-06, + "loss": 0.5328, + "num_input_tokens_seen": 246335200, + "step": 4398 + }, + { + "epoch": 9.79510022271715, + "loss": 0.4858427345752716, + "loss_ce": 0.00012495677219703794, + "loss_iou": 0.212890625, + "loss_num": 0.0118408203125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 246335200, + "step": 4398 + }, + { + "epoch": 9.797327394209354, + "grad_norm": 12.569108009338379, + "learning_rate": 1e-06, + "loss": 0.7097, + "num_input_tokens_seen": 246391172, + "step": 4399 + }, + { + "epoch": 9.797327394209354, + "loss": 0.8952298164367676, + "loss_ce": 0.00021028138871770352, + "loss_iou": 0.337890625, + "loss_num": 0.043701171875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 246391172, + "step": 4399 + }, + { + "epoch": 9.799554565701559, + "grad_norm": 16.045135498046875, + "learning_rate": 1e-06, + "loss": 0.5855, + "num_input_tokens_seen": 246448824, + "step": 4400 + }, + { + "epoch": 9.799554565701559, + "loss": 0.7127890586853027, + "loss_ce": 0.00014254784036893398, + "loss_iou": 0.306640625, + "loss_num": 0.0201416015625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 246448824, + "step": 4400 + }, + { + "epoch": 9.801781737193764, + "grad_norm": 18.482440948486328, + "learning_rate": 1e-06, + "loss": 0.5164, + "num_input_tokens_seen": 246505864, + "step": 4401 + }, + { + "epoch": 9.801781737193764, + "loss": 0.5470033884048462, + "loss_ce": 0.00012841983698308468, + "loss_iou": 0.2392578125, + "loss_num": 0.01385498046875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 246505864, + "step": 4401 + }, + { + "epoch": 9.804008908685969, + "grad_norm": 17.286788940429688, + "learning_rate": 1e-06, + "loss": 0.6259, + "num_input_tokens_seen": 246562828, + "step": 4402 + }, + { + "epoch": 9.804008908685969, + "loss": 0.5885273218154907, + "loss_ce": 0.00014838032075203955, + "loss_iou": 0.2578125, + "loss_num": 0.0146484375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 246562828, + "step": 4402 + }, + { + "epoch": 9.806236080178174, + "grad_norm": 14.533248901367188, + "learning_rate": 1e-06, + "loss": 0.4501, + "num_input_tokens_seen": 246619276, + "step": 4403 + }, + { + "epoch": 9.806236080178174, + "loss": 0.4673030972480774, + "loss_ce": 0.00014000001829117537, + "loss_iou": 0.2080078125, + "loss_num": 0.01025390625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 246619276, + "step": 4403 + }, + { + "epoch": 9.808463251670378, + "grad_norm": 16.228418350219727, + "learning_rate": 1e-06, + "loss": 0.4324, + "num_input_tokens_seen": 246678448, + "step": 4404 + }, + { + "epoch": 9.808463251670378, + "loss": 0.47206440567970276, + "loss_ce": 0.0001405745279043913, + "loss_iou": 0.2109375, + "loss_num": 0.0098876953125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 246678448, + "step": 4404 + }, + { + "epoch": 9.810690423162583, + "grad_norm": 18.946840286254883, + "learning_rate": 1e-06, + "loss": 0.5187, + "num_input_tokens_seen": 246732924, + "step": 4405 + }, + { + "epoch": 9.810690423162583, + "loss": 0.5357993841171265, + "loss_ce": 0.00015488323697354645, + "loss_iou": 0.2294921875, + "loss_num": 0.015380859375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 246732924, + "step": 4405 + }, + { + "epoch": 9.812917594654788, + "grad_norm": 21.7833194732666, + "learning_rate": 1e-06, + "loss": 0.6752, + "num_input_tokens_seen": 246790636, + "step": 4406 + }, + { + "epoch": 9.812917594654788, + "loss": 0.5842469334602356, + "loss_ce": 0.00014047868899069726, + "loss_iou": 0.251953125, + "loss_num": 0.01556396484375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 246790636, + "step": 4406 + }, + { + "epoch": 9.815144766146993, + "grad_norm": 18.096515655517578, + "learning_rate": 1e-06, + "loss": 0.5097, + "num_input_tokens_seen": 246847708, + "step": 4407 + }, + { + "epoch": 9.815144766146993, + "loss": 0.634331226348877, + "loss_ce": 0.0002980299177579582, + "loss_iou": 0.263671875, + "loss_num": 0.021484375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 246847708, + "step": 4407 + }, + { + "epoch": 9.817371937639198, + "grad_norm": 21.39216423034668, + "learning_rate": 1e-06, + "loss": 0.6078, + "num_input_tokens_seen": 246902376, + "step": 4408 + }, + { + "epoch": 9.817371937639198, + "loss": 0.6875587701797485, + "loss_ce": 0.00018085587362293154, + "loss_iou": 0.283203125, + "loss_num": 0.0244140625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 246902376, + "step": 4408 + }, + { + "epoch": 9.819599109131403, + "grad_norm": 29.385353088378906, + "learning_rate": 1e-06, + "loss": 0.6988, + "num_input_tokens_seen": 246956228, + "step": 4409 + }, + { + "epoch": 9.819599109131403, + "loss": 0.6361332535743713, + "loss_ce": 0.00014688669762108475, + "loss_iou": 0.28515625, + "loss_num": 0.01318359375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 246956228, + "step": 4409 + }, + { + "epoch": 9.821826280623608, + "grad_norm": 23.78947639465332, + "learning_rate": 1e-06, + "loss": 0.5749, + "num_input_tokens_seen": 247012124, + "step": 4410 + }, + { + "epoch": 9.821826280623608, + "loss": 0.4960480332374573, + "loss_ce": 0.0001984003756660968, + "loss_iou": 0.2197265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 247012124, + "step": 4410 + }, + { + "epoch": 9.824053452115812, + "grad_norm": 24.32109260559082, + "learning_rate": 1e-06, + "loss": 0.6939, + "num_input_tokens_seen": 247070408, + "step": 4411 + }, + { + "epoch": 9.824053452115812, + "loss": 1.043853521347046, + "loss_ce": 0.00015239304048009217, + "loss_iou": 0.439453125, + "loss_num": 0.032958984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 247070408, + "step": 4411 + }, + { + "epoch": 9.826280623608017, + "grad_norm": 13.603236198425293, + "learning_rate": 1e-06, + "loss": 0.7308, + "num_input_tokens_seen": 247130296, + "step": 4412 + }, + { + "epoch": 9.826280623608017, + "loss": 0.8429387807846069, + "loss_ce": 0.00016536489420104772, + "loss_iou": 0.369140625, + "loss_num": 0.0206298828125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 247130296, + "step": 4412 + }, + { + "epoch": 9.828507795100222, + "grad_norm": 23.041610717773438, + "learning_rate": 1e-06, + "loss": 0.4969, + "num_input_tokens_seen": 247186440, + "step": 4413 + }, + { + "epoch": 9.828507795100222, + "loss": 0.5199660062789917, + "loss_ce": 0.0001905907120089978, + "loss_iou": 0.2236328125, + "loss_num": 0.0145263671875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 247186440, + "step": 4413 + }, + { + "epoch": 9.830734966592427, + "grad_norm": 18.076107025146484, + "learning_rate": 1e-06, + "loss": 0.4157, + "num_input_tokens_seen": 247239284, + "step": 4414 + }, + { + "epoch": 9.830734966592427, + "loss": 0.3731992244720459, + "loss_ce": 0.0001523523242212832, + "loss_iou": 0.173828125, + "loss_num": 0.005157470703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 247239284, + "step": 4414 + }, + { + "epoch": 9.832962138084632, + "grad_norm": 13.739655494689941, + "learning_rate": 1e-06, + "loss": 0.4094, + "num_input_tokens_seen": 247295360, + "step": 4415 + }, + { + "epoch": 9.832962138084632, + "loss": 0.3968695104122162, + "loss_ce": 0.00014100654516369104, + "loss_iou": 0.1796875, + "loss_num": 0.007293701171875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 247295360, + "step": 4415 + }, + { + "epoch": 9.835189309576837, + "grad_norm": 22.17353057861328, + "learning_rate": 1e-06, + "loss": 0.4095, + "num_input_tokens_seen": 247353432, + "step": 4416 + }, + { + "epoch": 9.835189309576837, + "loss": 0.38090550899505615, + "loss_ce": 0.00016820061136968434, + "loss_iou": 0.1748046875, + "loss_num": 0.00616455078125, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 247353432, + "step": 4416 + }, + { + "epoch": 9.837416481069042, + "grad_norm": 28.909465789794922, + "learning_rate": 1e-06, + "loss": 0.5909, + "num_input_tokens_seen": 247412760, + "step": 4417 + }, + { + "epoch": 9.837416481069042, + "loss": 0.5692502856254578, + "loss_ce": 0.00015849701594561338, + "loss_iou": 0.251953125, + "loss_num": 0.01300048828125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 247412760, + "step": 4417 + }, + { + "epoch": 9.839643652561247, + "grad_norm": 18.436254501342773, + "learning_rate": 1e-06, + "loss": 0.7414, + "num_input_tokens_seen": 247469036, + "step": 4418 + }, + { + "epoch": 9.839643652561247, + "loss": 0.946731686592102, + "loss_ce": 0.00019852191326208413, + "loss_iou": 0.38671875, + "loss_num": 0.0341796875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 247469036, + "step": 4418 + }, + { + "epoch": 9.841870824053451, + "grad_norm": 17.95979881286621, + "learning_rate": 1e-06, + "loss": 0.5232, + "num_input_tokens_seen": 247525680, + "step": 4419 + }, + { + "epoch": 9.841870824053451, + "loss": 0.5357891917228699, + "loss_ce": 0.00014465830463450402, + "loss_iou": 0.228515625, + "loss_num": 0.015869140625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 247525680, + "step": 4419 + }, + { + "epoch": 9.844097995545656, + "grad_norm": 29.744184494018555, + "learning_rate": 1e-06, + "loss": 0.5656, + "num_input_tokens_seen": 247580136, + "step": 4420 + }, + { + "epoch": 9.844097995545656, + "loss": 0.6758188605308533, + "loss_ce": 0.00015968517982400954, + "loss_iou": 0.291015625, + "loss_num": 0.0184326171875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 247580136, + "step": 4420 + }, + { + "epoch": 9.846325167037861, + "grad_norm": 13.804282188415527, + "learning_rate": 1e-06, + "loss": 0.4793, + "num_input_tokens_seen": 247634072, + "step": 4421 + }, + { + "epoch": 9.846325167037861, + "loss": 0.34506338834762573, + "loss_ce": 0.00033683725632727146, + "loss_iou": 0.1484375, + "loss_num": 0.0093994140625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 247634072, + "step": 4421 + }, + { + "epoch": 9.848552338530066, + "grad_norm": 53.691707611083984, + "learning_rate": 1e-06, + "loss": 0.5384, + "num_input_tokens_seen": 247688788, + "step": 4422 + }, + { + "epoch": 9.848552338530066, + "loss": 0.5472594499588013, + "loss_ce": 0.0001402852067258209, + "loss_iou": 0.24609375, + "loss_num": 0.01116943359375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 247688788, + "step": 4422 + }, + { + "epoch": 9.85077951002227, + "grad_norm": 37.15926742553711, + "learning_rate": 1e-06, + "loss": 0.6395, + "num_input_tokens_seen": 247746856, + "step": 4423 + }, + { + "epoch": 9.85077951002227, + "loss": 0.6605561375617981, + "loss_ce": 0.00015572294068988413, + "loss_iou": 0.287109375, + "loss_num": 0.016845703125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 247746856, + "step": 4423 + }, + { + "epoch": 9.853006681514476, + "grad_norm": 16.28897476196289, + "learning_rate": 1e-06, + "loss": 0.7326, + "num_input_tokens_seen": 247802588, + "step": 4424 + }, + { + "epoch": 9.853006681514476, + "loss": 0.605131208896637, + "loss_ce": 0.00015073509712237865, + "loss_iou": 0.26953125, + "loss_num": 0.012939453125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 247802588, + "step": 4424 + }, + { + "epoch": 9.855233853006682, + "grad_norm": 24.916080474853516, + "learning_rate": 1e-06, + "loss": 0.5261, + "num_input_tokens_seen": 247858736, + "step": 4425 + }, + { + "epoch": 9.855233853006682, + "loss": 0.5880270004272461, + "loss_ce": 0.0006245865952223539, + "loss_iou": 0.267578125, + "loss_num": 0.010498046875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 247858736, + "step": 4425 + }, + { + "epoch": 9.857461024498887, + "grad_norm": 13.907861709594727, + "learning_rate": 1e-06, + "loss": 0.4909, + "num_input_tokens_seen": 247916764, + "step": 4426 + }, + { + "epoch": 9.857461024498887, + "loss": 0.5015993118286133, + "loss_ce": 0.00013441478949971497, + "loss_iou": 0.21875, + "loss_num": 0.012939453125, + "loss_xval": 0.5, + "num_input_tokens_seen": 247916764, + "step": 4426 + }, + { + "epoch": 9.859688195991092, + "grad_norm": 31.806222915649414, + "learning_rate": 1e-06, + "loss": 0.5157, + "num_input_tokens_seen": 247971644, + "step": 4427 + }, + { + "epoch": 9.859688195991092, + "loss": 0.5441058278083801, + "loss_ce": 0.00016053869330789894, + "loss_iou": 0.244140625, + "loss_num": 0.01123046875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 247971644, + "step": 4427 + }, + { + "epoch": 9.861915367483297, + "grad_norm": 13.702202796936035, + "learning_rate": 1e-06, + "loss": 0.4027, + "num_input_tokens_seen": 248027780, + "step": 4428 + }, + { + "epoch": 9.861915367483297, + "loss": 0.3753761351108551, + "loss_ce": 0.00013199455861467868, + "loss_iou": 0.1640625, + "loss_num": 0.00946044921875, + "loss_xval": 0.375, + "num_input_tokens_seen": 248027780, + "step": 4428 + }, + { + "epoch": 9.864142538975502, + "grad_norm": 22.021825790405273, + "learning_rate": 1e-06, + "loss": 0.369, + "num_input_tokens_seen": 248085456, + "step": 4429 + }, + { + "epoch": 9.864142538975502, + "loss": 0.34767791628837585, + "loss_ce": 0.00014372303849086165, + "loss_iou": 0.1572265625, + "loss_num": 0.0064697265625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 248085456, + "step": 4429 + }, + { + "epoch": 9.866369710467707, + "grad_norm": 15.745802879333496, + "learning_rate": 1e-06, + "loss": 0.4453, + "num_input_tokens_seen": 248141132, + "step": 4430 + }, + { + "epoch": 9.866369710467707, + "loss": 0.3866683840751648, + "loss_ce": 0.0001937640190590173, + "loss_iou": 0.15625, + "loss_num": 0.01470947265625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 248141132, + "step": 4430 + }, + { + "epoch": 9.868596881959911, + "grad_norm": 17.44371795654297, + "learning_rate": 1e-06, + "loss": 0.6266, + "num_input_tokens_seen": 248199536, + "step": 4431 + }, + { + "epoch": 9.868596881959911, + "loss": 0.4967302978038788, + "loss_ce": 0.0001482525112805888, + "loss_iou": 0.224609375, + "loss_num": 0.0096435546875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 248199536, + "step": 4431 + }, + { + "epoch": 9.870824053452116, + "grad_norm": 20.9477481842041, + "learning_rate": 1e-06, + "loss": 0.5961, + "num_input_tokens_seen": 248254300, + "step": 4432 + }, + { + "epoch": 9.870824053452116, + "loss": 0.5253418684005737, + "loss_ce": 0.00019535439787432551, + "loss_iou": 0.23828125, + "loss_num": 0.009521484375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 248254300, + "step": 4432 + }, + { + "epoch": 9.873051224944321, + "grad_norm": 32.99359893798828, + "learning_rate": 1e-06, + "loss": 0.6711, + "num_input_tokens_seen": 248311172, + "step": 4433 + }, + { + "epoch": 9.873051224944321, + "loss": 0.6815510988235474, + "loss_ce": 0.00015456396795343608, + "loss_iou": 0.294921875, + "loss_num": 0.0186767578125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 248311172, + "step": 4433 + }, + { + "epoch": 9.875278396436526, + "grad_norm": 31.431236267089844, + "learning_rate": 1e-06, + "loss": 0.4702, + "num_input_tokens_seen": 248366780, + "step": 4434 + }, + { + "epoch": 9.875278396436526, + "loss": 0.4480833113193512, + "loss_ce": 0.0004514938627835363, + "loss_iou": 0.19921875, + "loss_num": 0.00982666015625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 248366780, + "step": 4434 + }, + { + "epoch": 9.877505567928731, + "grad_norm": 14.6542329788208, + "learning_rate": 1e-06, + "loss": 0.6543, + "num_input_tokens_seen": 248422596, + "step": 4435 + }, + { + "epoch": 9.877505567928731, + "loss": 0.7738858461380005, + "loss_ce": 0.00020423725072760135, + "loss_iou": 0.3046875, + "loss_num": 0.032958984375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 248422596, + "step": 4435 + }, + { + "epoch": 9.879732739420936, + "grad_norm": 21.185422897338867, + "learning_rate": 1e-06, + "loss": 0.7841, + "num_input_tokens_seen": 248477628, + "step": 4436 + }, + { + "epoch": 9.879732739420936, + "loss": 0.7956980466842651, + "loss_ce": 0.00016583640535827726, + "loss_iou": 0.330078125, + "loss_num": 0.026611328125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 248477628, + "step": 4436 + }, + { + "epoch": 9.88195991091314, + "grad_norm": 22.979459762573242, + "learning_rate": 1e-06, + "loss": 0.746, + "num_input_tokens_seen": 248531252, + "step": 4437 + }, + { + "epoch": 9.88195991091314, + "loss": 1.0274933576583862, + "loss_ce": 0.00014961442502681166, + "loss_iou": 0.4375, + "loss_num": 0.030517578125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 248531252, + "step": 4437 + }, + { + "epoch": 9.884187082405345, + "grad_norm": 13.26347541809082, + "learning_rate": 1e-06, + "loss": 0.789, + "num_input_tokens_seen": 248587712, + "step": 4438 + }, + { + "epoch": 9.884187082405345, + "loss": 0.8224426507949829, + "loss_ce": 0.0001770372618921101, + "loss_iou": 0.34765625, + "loss_num": 0.025390625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 248587712, + "step": 4438 + }, + { + "epoch": 9.88641425389755, + "grad_norm": 19.138172149658203, + "learning_rate": 1e-06, + "loss": 0.6224, + "num_input_tokens_seen": 248644168, + "step": 4439 + }, + { + "epoch": 9.88641425389755, + "loss": 0.6742221117019653, + "loss_ce": 0.00014987046597525477, + "loss_iou": 0.2734375, + "loss_num": 0.025146484375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 248644168, + "step": 4439 + }, + { + "epoch": 9.888641425389755, + "grad_norm": 14.5908784866333, + "learning_rate": 1e-06, + "loss": 0.448, + "num_input_tokens_seen": 248700288, + "step": 4440 + }, + { + "epoch": 9.888641425389755, + "loss": 0.5669944286346436, + "loss_ce": 0.00022195381461642683, + "loss_iou": 0.2333984375, + "loss_num": 0.020263671875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 248700288, + "step": 4440 + }, + { + "epoch": 9.89086859688196, + "grad_norm": 15.528139114379883, + "learning_rate": 1e-06, + "loss": 0.5742, + "num_input_tokens_seen": 248756128, + "step": 4441 + }, + { + "epoch": 9.89086859688196, + "loss": 0.6929147243499756, + "loss_ce": 0.00016564424731768668, + "loss_iou": 0.283203125, + "loss_num": 0.0255126953125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 248756128, + "step": 4441 + }, + { + "epoch": 9.893095768374165, + "grad_norm": 25.997331619262695, + "learning_rate": 1e-06, + "loss": 0.4781, + "num_input_tokens_seen": 248812652, + "step": 4442 + }, + { + "epoch": 9.893095768374165, + "loss": 0.4952731132507324, + "loss_ce": 0.0001559244265081361, + "loss_iou": 0.23046875, + "loss_num": 0.0069580078125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 248812652, + "step": 4442 + }, + { + "epoch": 9.89532293986637, + "grad_norm": 17.757108688354492, + "learning_rate": 1e-06, + "loss": 0.457, + "num_input_tokens_seen": 248870328, + "step": 4443 + }, + { + "epoch": 9.89532293986637, + "loss": 0.5199582576751709, + "loss_ce": 0.0003048997314181179, + "loss_iou": 0.2294921875, + "loss_num": 0.01220703125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 248870328, + "step": 4443 + }, + { + "epoch": 9.897550111358575, + "grad_norm": 33.18953323364258, + "learning_rate": 1e-06, + "loss": 0.4442, + "num_input_tokens_seen": 248927148, + "step": 4444 + }, + { + "epoch": 9.897550111358575, + "loss": 0.4624471068382263, + "loss_ce": 0.00016684165166225284, + "loss_iou": 0.2060546875, + "loss_num": 0.01007080078125, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 248927148, + "step": 4444 + }, + { + "epoch": 9.89977728285078, + "grad_norm": 16.396703720092773, + "learning_rate": 1e-06, + "loss": 0.695, + "num_input_tokens_seen": 248982876, + "step": 4445 + }, + { + "epoch": 9.89977728285078, + "loss": 0.759984016418457, + "loss_ce": 0.0002184216573368758, + "loss_iou": 0.32421875, + "loss_num": 0.022705078125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 248982876, + "step": 4445 + }, + { + "epoch": 9.902004454342984, + "grad_norm": 355.2792053222656, + "learning_rate": 1e-06, + "loss": 0.6844, + "num_input_tokens_seen": 249036336, + "step": 4446 + }, + { + "epoch": 9.902004454342984, + "loss": 0.8203428983688354, + "loss_ce": 0.0007628132589161396, + "loss_iou": 0.373046875, + "loss_num": 0.01483154296875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 249036336, + "step": 4446 + }, + { + "epoch": 9.90423162583519, + "grad_norm": 20.815412521362305, + "learning_rate": 1e-06, + "loss": 0.493, + "num_input_tokens_seen": 249090512, + "step": 4447 + }, + { + "epoch": 9.90423162583519, + "loss": 0.3990846276283264, + "loss_ce": 0.00015884230379015207, + "loss_iou": 0.173828125, + "loss_num": 0.0103759765625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 249090512, + "step": 4447 + }, + { + "epoch": 9.906458797327394, + "grad_norm": 22.289730072021484, + "learning_rate": 1e-06, + "loss": 0.5334, + "num_input_tokens_seen": 249146280, + "step": 4448 + }, + { + "epoch": 9.906458797327394, + "loss": 0.46716466546058655, + "loss_ce": 0.00012365993461571634, + "loss_iou": 0.205078125, + "loss_num": 0.01153564453125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 249146280, + "step": 4448 + }, + { + "epoch": 9.908685968819599, + "grad_norm": 18.984031677246094, + "learning_rate": 1e-06, + "loss": 0.482, + "num_input_tokens_seen": 249199220, + "step": 4449 + }, + { + "epoch": 9.908685968819599, + "loss": 0.3772040605545044, + "loss_ce": 0.00012887499178759754, + "loss_iou": 0.1669921875, + "loss_num": 0.00848388671875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 249199220, + "step": 4449 + }, + { + "epoch": 9.910913140311804, + "grad_norm": 22.167558670043945, + "learning_rate": 1e-06, + "loss": 0.3869, + "num_input_tokens_seen": 249255288, + "step": 4450 + }, + { + "epoch": 9.910913140311804, + "loss": 0.42713233828544617, + "loss_ce": 0.0001304029137827456, + "loss_iou": 0.1865234375, + "loss_num": 0.01068115234375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 249255288, + "step": 4450 + }, + { + "epoch": 9.913140311804009, + "grad_norm": 16.50093650817871, + "learning_rate": 1e-06, + "loss": 0.4574, + "num_input_tokens_seen": 249309848, + "step": 4451 + }, + { + "epoch": 9.913140311804009, + "loss": 0.3489049971103668, + "loss_ce": 0.0001501307706348598, + "loss_iou": 0.1484375, + "loss_num": 0.0101318359375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 249309848, + "step": 4451 + }, + { + "epoch": 9.915367483296214, + "grad_norm": 26.222118377685547, + "learning_rate": 1e-06, + "loss": 0.6307, + "num_input_tokens_seen": 249365492, + "step": 4452 + }, + { + "epoch": 9.915367483296214, + "loss": 0.5098949670791626, + "loss_ce": 0.00012939177395310253, + "loss_iou": 0.2177734375, + "loss_num": 0.01513671875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 249365492, + "step": 4452 + }, + { + "epoch": 9.917594654788418, + "grad_norm": 26.15787124633789, + "learning_rate": 1e-06, + "loss": 0.4824, + "num_input_tokens_seen": 249420888, + "step": 4453 + }, + { + "epoch": 9.917594654788418, + "loss": 0.34827083349227905, + "loss_ce": 0.0001263211015611887, + "loss_iou": 0.1533203125, + "loss_num": 0.008056640625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 249420888, + "step": 4453 + }, + { + "epoch": 9.919821826280623, + "grad_norm": 21.763919830322266, + "learning_rate": 1e-06, + "loss": 0.5638, + "num_input_tokens_seen": 249474144, + "step": 4454 + }, + { + "epoch": 9.919821826280623, + "loss": 0.48050248622894287, + "loss_ce": 0.00015578939928673208, + "loss_iou": 0.2177734375, + "loss_num": 0.0089111328125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 249474144, + "step": 4454 + }, + { + "epoch": 9.922048997772828, + "grad_norm": 22.63077163696289, + "learning_rate": 1e-06, + "loss": 0.5656, + "num_input_tokens_seen": 249532508, + "step": 4455 + }, + { + "epoch": 9.922048997772828, + "loss": 0.4286133050918579, + "loss_ce": 0.00014652467507403344, + "loss_iou": 0.1943359375, + "loss_num": 0.008056640625, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 249532508, + "step": 4455 + }, + { + "epoch": 9.924276169265033, + "grad_norm": 19.111759185791016, + "learning_rate": 1e-06, + "loss": 0.4905, + "num_input_tokens_seen": 249588256, + "step": 4456 + }, + { + "epoch": 9.924276169265033, + "loss": 0.6744104623794556, + "loss_ce": 0.0005213359836488962, + "loss_iou": 0.279296875, + "loss_num": 0.022705078125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 249588256, + "step": 4456 + }, + { + "epoch": 9.926503340757238, + "grad_norm": 14.64742660522461, + "learning_rate": 1e-06, + "loss": 0.5547, + "num_input_tokens_seen": 249645916, + "step": 4457 + }, + { + "epoch": 9.926503340757238, + "loss": 0.6124250888824463, + "loss_ce": 0.00036456523230299354, + "loss_iou": 0.2578125, + "loss_num": 0.018798828125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 249645916, + "step": 4457 + }, + { + "epoch": 9.928730512249443, + "grad_norm": 30.41196060180664, + "learning_rate": 1e-06, + "loss": 0.6046, + "num_input_tokens_seen": 249699704, + "step": 4458 + }, + { + "epoch": 9.928730512249443, + "loss": 0.43876737356185913, + "loss_ce": 0.00016875306027941406, + "loss_iou": 0.2041015625, + "loss_num": 0.0059814453125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 249699704, + "step": 4458 + }, + { + "epoch": 9.930957683741648, + "grad_norm": 19.998111724853516, + "learning_rate": 1e-06, + "loss": 0.5264, + "num_input_tokens_seen": 249754572, + "step": 4459 + }, + { + "epoch": 9.930957683741648, + "loss": 0.41116297245025635, + "loss_ce": 0.0001522265374660492, + "loss_iou": 0.1806640625, + "loss_num": 0.01007080078125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 249754572, + "step": 4459 + }, + { + "epoch": 9.933184855233852, + "grad_norm": 33.828853607177734, + "learning_rate": 1e-06, + "loss": 0.7319, + "num_input_tokens_seen": 249809712, + "step": 4460 + }, + { + "epoch": 9.933184855233852, + "loss": 0.640634298324585, + "loss_ce": 0.0002534303639549762, + "loss_iou": 0.279296875, + "loss_num": 0.01611328125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 249809712, + "step": 4460 + }, + { + "epoch": 9.935412026726057, + "grad_norm": 540.546875, + "learning_rate": 1e-06, + "loss": 0.5882, + "num_input_tokens_seen": 249864220, + "step": 4461 + }, + { + "epoch": 9.935412026726057, + "loss": 0.4697587788105011, + "loss_ce": 0.00012375140795484185, + "loss_iou": 0.19921875, + "loss_num": 0.01422119140625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 249864220, + "step": 4461 + }, + { + "epoch": 9.937639198218262, + "grad_norm": 36.97883987426758, + "learning_rate": 1e-06, + "loss": 0.6369, + "num_input_tokens_seen": 249917428, + "step": 4462 + }, + { + "epoch": 9.937639198218262, + "loss": 0.5367482900619507, + "loss_ce": 0.00012720399536192417, + "loss_iou": 0.2216796875, + "loss_num": 0.018798828125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 249917428, + "step": 4462 + }, + { + "epoch": 9.939866369710467, + "grad_norm": 16.344486236572266, + "learning_rate": 1e-06, + "loss": 0.5017, + "num_input_tokens_seen": 249975260, + "step": 4463 + }, + { + "epoch": 9.939866369710467, + "loss": 0.4674408435821533, + "loss_ce": 0.0001557128271088004, + "loss_iou": 0.193359375, + "loss_num": 0.0162353515625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 249975260, + "step": 4463 + }, + { + "epoch": 9.942093541202672, + "grad_norm": 20.06435203552246, + "learning_rate": 1e-06, + "loss": 0.4822, + "num_input_tokens_seen": 250028748, + "step": 4464 + }, + { + "epoch": 9.942093541202672, + "loss": 0.562027096748352, + "loss_ce": 0.00013746539480052888, + "loss_iou": 0.2421875, + "loss_num": 0.0152587890625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 250028748, + "step": 4464 + }, + { + "epoch": 9.944320712694877, + "grad_norm": 21.986183166503906, + "learning_rate": 1e-06, + "loss": 0.6695, + "num_input_tokens_seen": 250086348, + "step": 4465 + }, + { + "epoch": 9.944320712694877, + "loss": 0.6947346925735474, + "loss_ce": 0.00015459363930858672, + "loss_iou": 0.28515625, + "loss_num": 0.0245361328125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 250086348, + "step": 4465 + }, + { + "epoch": 9.946547884187082, + "grad_norm": 22.66680335998535, + "learning_rate": 1e-06, + "loss": 0.5404, + "num_input_tokens_seen": 250140012, + "step": 4466 + }, + { + "epoch": 9.946547884187082, + "loss": 0.5031921863555908, + "loss_ce": 0.00014042215480003506, + "loss_iou": 0.22265625, + "loss_num": 0.01165771484375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 250140012, + "step": 4466 + }, + { + "epoch": 9.948775055679288, + "grad_norm": 20.20932388305664, + "learning_rate": 1e-06, + "loss": 0.5721, + "num_input_tokens_seen": 250197260, + "step": 4467 + }, + { + "epoch": 9.948775055679288, + "loss": 0.5846377611160278, + "loss_ce": 0.0001650652993703261, + "loss_iou": 0.25, + "loss_num": 0.016845703125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 250197260, + "step": 4467 + }, + { + "epoch": 9.951002227171493, + "grad_norm": 13.735246658325195, + "learning_rate": 1e-06, + "loss": 0.5371, + "num_input_tokens_seen": 250255264, + "step": 4468 + }, + { + "epoch": 9.951002227171493, + "loss": 0.36416295170783997, + "loss_ce": 0.0001492602750658989, + "loss_iou": 0.16015625, + "loss_num": 0.00872802734375, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 250255264, + "step": 4468 + }, + { + "epoch": 9.953229398663698, + "grad_norm": 31.190519332885742, + "learning_rate": 1e-06, + "loss": 0.6512, + "num_input_tokens_seen": 250309688, + "step": 4469 + }, + { + "epoch": 9.953229398663698, + "loss": 0.8156118392944336, + "loss_ce": 0.00018213686416856945, + "loss_iou": 0.35546875, + "loss_num": 0.0208740234375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 250309688, + "step": 4469 + }, + { + "epoch": 9.955456570155903, + "grad_norm": 20.936838150024414, + "learning_rate": 1e-06, + "loss": 0.6122, + "num_input_tokens_seen": 250364732, + "step": 4470 + }, + { + "epoch": 9.955456570155903, + "loss": 0.6811445951461792, + "loss_ce": 0.0004805437056347728, + "loss_iou": 0.28515625, + "loss_num": 0.0220947265625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 250364732, + "step": 4470 + }, + { + "epoch": 9.957683741648108, + "grad_norm": 20.30494499206543, + "learning_rate": 1e-06, + "loss": 0.5026, + "num_input_tokens_seen": 250421248, + "step": 4471 + }, + { + "epoch": 9.957683741648108, + "loss": 0.4135947823524475, + "loss_ce": 0.00014261619071476161, + "loss_iou": 0.1865234375, + "loss_num": 0.00787353515625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 250421248, + "step": 4471 + }, + { + "epoch": 9.959910913140313, + "grad_norm": 22.17571449279785, + "learning_rate": 1e-06, + "loss": 0.5305, + "num_input_tokens_seen": 250476212, + "step": 4472 + }, + { + "epoch": 9.959910913140313, + "loss": 0.5278682112693787, + "loss_ce": 0.0001582720287842676, + "loss_iou": 0.2275390625, + "loss_num": 0.01446533203125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 250476212, + "step": 4472 + }, + { + "epoch": 9.962138084632517, + "grad_norm": 21.48920249938965, + "learning_rate": 1e-06, + "loss": 0.4718, + "num_input_tokens_seen": 250534156, + "step": 4473 + }, + { + "epoch": 9.962138084632517, + "loss": 0.5655474662780762, + "loss_ce": 0.0002398924989392981, + "loss_iou": 0.265625, + "loss_num": 0.00677490234375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 250534156, + "step": 4473 + }, + { + "epoch": 9.964365256124722, + "grad_norm": 21.838937759399414, + "learning_rate": 1e-06, + "loss": 0.3754, + "num_input_tokens_seen": 250590784, + "step": 4474 + }, + { + "epoch": 9.964365256124722, + "loss": 0.44572579860687256, + "loss_ce": 0.00016913507715798914, + "loss_iou": 0.1875, + "loss_num": 0.0142822265625, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 250590784, + "step": 4474 + }, + { + "epoch": 9.966592427616927, + "grad_norm": 15.84533977508545, + "learning_rate": 1e-06, + "loss": 0.5513, + "num_input_tokens_seen": 250649576, + "step": 4475 + }, + { + "epoch": 9.966592427616927, + "loss": 0.4788917899131775, + "loss_ce": 0.0001320300216320902, + "loss_iou": 0.203125, + "loss_num": 0.0147705078125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 250649576, + "step": 4475 + }, + { + "epoch": 9.968819599109132, + "grad_norm": 15.656432151794434, + "learning_rate": 1e-06, + "loss": 0.4874, + "num_input_tokens_seen": 250705124, + "step": 4476 + }, + { + "epoch": 9.968819599109132, + "loss": 0.49739590287208557, + "loss_ce": 0.00014248676598072052, + "loss_iou": 0.1904296875, + "loss_num": 0.023193359375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 250705124, + "step": 4476 + }, + { + "epoch": 9.971046770601337, + "grad_norm": 26.737167358398438, + "learning_rate": 1e-06, + "loss": 0.6391, + "num_input_tokens_seen": 250761396, + "step": 4477 + }, + { + "epoch": 9.971046770601337, + "loss": 0.6163727045059204, + "loss_ce": 0.00016177864745259285, + "loss_iou": 0.26171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 250761396, + "step": 4477 + }, + { + "epoch": 9.973273942093542, + "grad_norm": 22.787172317504883, + "learning_rate": 1e-06, + "loss": 0.6781, + "num_input_tokens_seen": 250815480, + "step": 4478 + }, + { + "epoch": 9.973273942093542, + "loss": 0.8530862331390381, + "loss_ce": 0.00018101731257047504, + "loss_iou": 0.349609375, + "loss_num": 0.0306396484375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 250815480, + "step": 4478 + }, + { + "epoch": 9.975501113585747, + "grad_norm": 15.702953338623047, + "learning_rate": 1e-06, + "loss": 0.4059, + "num_input_tokens_seen": 250872060, + "step": 4479 + }, + { + "epoch": 9.975501113585747, + "loss": 0.3851672112941742, + "loss_ce": 0.0001574572379468009, + "loss_iou": 0.1640625, + "loss_num": 0.01141357421875, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 250872060, + "step": 4479 + }, + { + "epoch": 9.977728285077951, + "grad_norm": 25.042709350585938, + "learning_rate": 1e-06, + "loss": 0.5131, + "num_input_tokens_seen": 250929860, + "step": 4480 + }, + { + "epoch": 9.977728285077951, + "loss": 0.5049949884414673, + "loss_ce": 0.00011219277803320438, + "loss_iou": 0.2275390625, + "loss_num": 0.01019287109375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 250929860, + "step": 4480 + }, + { + "epoch": 9.979955456570156, + "grad_norm": 15.756171226501465, + "learning_rate": 1e-06, + "loss": 0.5156, + "num_input_tokens_seen": 250984516, + "step": 4481 + }, + { + "epoch": 9.979955456570156, + "loss": 0.5473989248275757, + "loss_ce": 0.00015769051969982684, + "loss_iou": 0.251953125, + "loss_num": 0.0084228515625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 250984516, + "step": 4481 + }, + { + "epoch": 9.982182628062361, + "grad_norm": 16.539581298828125, + "learning_rate": 1e-06, + "loss": 0.6737, + "num_input_tokens_seen": 251041288, + "step": 4482 + }, + { + "epoch": 9.982182628062361, + "loss": 0.6556758880615234, + "loss_ce": 0.00015832131612114608, + "loss_iou": 0.291015625, + "loss_num": 0.01422119140625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 251041288, + "step": 4482 + }, + { + "epoch": 9.984409799554566, + "grad_norm": 19.558635711669922, + "learning_rate": 1e-06, + "loss": 0.5597, + "num_input_tokens_seen": 251098176, + "step": 4483 + }, + { + "epoch": 9.984409799554566, + "loss": 0.43492138385772705, + "loss_ce": 0.00022901118791196495, + "loss_iou": 0.197265625, + "loss_num": 0.0079345703125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 251098176, + "step": 4483 + }, + { + "epoch": 9.98663697104677, + "grad_norm": 14.561528205871582, + "learning_rate": 1e-06, + "loss": 0.5418, + "num_input_tokens_seen": 251154472, + "step": 4484 + }, + { + "epoch": 9.98663697104677, + "loss": 0.41586917638778687, + "loss_ce": 0.0001434582518413663, + "loss_iou": 0.181640625, + "loss_num": 0.010498046875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 251154472, + "step": 4484 + }, + { + "epoch": 9.988864142538976, + "grad_norm": 17.365453720092773, + "learning_rate": 1e-06, + "loss": 0.5246, + "num_input_tokens_seen": 251211700, + "step": 4485 + }, + { + "epoch": 9.988864142538976, + "loss": 0.41728872060775757, + "loss_ce": 0.0001744481414789334, + "loss_iou": 0.1884765625, + "loss_num": 0.00811767578125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 251211700, + "step": 4485 + }, + { + "epoch": 9.99109131403118, + "grad_norm": 21.09174346923828, + "learning_rate": 1e-06, + "loss": 0.586, + "num_input_tokens_seen": 251266340, + "step": 4486 + }, + { + "epoch": 9.99109131403118, + "loss": 0.6495558619499207, + "loss_ce": 0.000141800323035568, + "loss_iou": 0.27734375, + "loss_num": 0.01904296875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 251266340, + "step": 4486 + }, + { + "epoch": 9.993318485523385, + "grad_norm": 13.637633323669434, + "learning_rate": 1e-06, + "loss": 0.5915, + "num_input_tokens_seen": 251323956, + "step": 4487 + }, + { + "epoch": 9.993318485523385, + "loss": 0.6502304077148438, + "loss_ce": 0.00020597720867954195, + "loss_iou": 0.255859375, + "loss_num": 0.0274658203125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 251323956, + "step": 4487 + }, + { + "epoch": 9.99554565701559, + "grad_norm": 21.853805541992188, + "learning_rate": 1e-06, + "loss": 0.3252, + "num_input_tokens_seen": 251383004, + "step": 4488 + }, + { + "epoch": 9.99554565701559, + "loss": 0.4286557734012604, + "loss_ce": 0.0001889891573227942, + "loss_iou": 0.17578125, + "loss_num": 0.0155029296875, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 251383004, + "step": 4488 + }, + { + "epoch": 9.997772828507795, + "grad_norm": 27.582048416137695, + "learning_rate": 1e-06, + "loss": 0.6094, + "num_input_tokens_seen": 251442116, + "step": 4489 + }, + { + "epoch": 9.997772828507795, + "loss": 0.6864469051361084, + "loss_ce": 0.00016763756866566837, + "loss_iou": 0.30078125, + "loss_num": 0.016845703125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 251442116, + "step": 4489 + }, + { + "epoch": 10.0, + "grad_norm": 16.904457092285156, + "learning_rate": 1e-06, + "loss": 0.6229, + "num_input_tokens_seen": 251498292, + "step": 4490 + }, + { + "epoch": 10.0, + "loss": 0.6710529923439026, + "loss_ce": 0.00015453548985533416, + "loss_iou": 0.27734375, + "loss_num": 0.0235595703125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 251498292, + "step": 4490 + }, + { + "epoch": 10.002227171492205, + "grad_norm": 24.753952026367188, + "learning_rate": 1e-06, + "loss": 0.5628, + "num_input_tokens_seen": 251553736, + "step": 4491 + }, + { + "epoch": 10.002227171492205, + "loss": 0.44396665692329407, + "loss_ce": 0.00011899826495209709, + "loss_iou": 0.1953125, + "loss_num": 0.01055908203125, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 251553736, + "step": 4491 + }, + { + "epoch": 10.00445434298441, + "grad_norm": 23.54865074157715, + "learning_rate": 1e-06, + "loss": 0.6842, + "num_input_tokens_seen": 251607144, + "step": 4492 + }, + { + "epoch": 10.00445434298441, + "loss": 0.7414926290512085, + "loss_ce": 0.00015957036521285772, + "loss_iou": 0.298828125, + "loss_num": 0.0284423828125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 251607144, + "step": 4492 + }, + { + "epoch": 10.006681514476615, + "grad_norm": 17.976106643676758, + "learning_rate": 1e-06, + "loss": 0.5584, + "num_input_tokens_seen": 251663048, + "step": 4493 + }, + { + "epoch": 10.006681514476615, + "loss": 0.5196723937988281, + "loss_ce": 0.0001411354896845296, + "loss_iou": 0.201171875, + "loss_num": 0.0234375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 251663048, + "step": 4493 + }, + { + "epoch": 10.00890868596882, + "grad_norm": 14.608015060424805, + "learning_rate": 1e-06, + "loss": 0.4636, + "num_input_tokens_seen": 251719488, + "step": 4494 + }, + { + "epoch": 10.00890868596882, + "loss": 0.5168766975402832, + "loss_ce": 0.00015306880231946707, + "loss_iou": 0.2314453125, + "loss_num": 0.01080322265625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 251719488, + "step": 4494 + }, + { + "epoch": 10.011135857461024, + "grad_norm": 18.117263793945312, + "learning_rate": 1e-06, + "loss": 0.6167, + "num_input_tokens_seen": 251777652, + "step": 4495 + }, + { + "epoch": 10.011135857461024, + "loss": 0.7012206315994263, + "loss_ce": 0.0002929244365077466, + "loss_iou": 0.283203125, + "loss_num": 0.02685546875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 251777652, + "step": 4495 + }, + { + "epoch": 10.01336302895323, + "grad_norm": 23.114547729492188, + "learning_rate": 1e-06, + "loss": 0.6466, + "num_input_tokens_seen": 251834100, + "step": 4496 + }, + { + "epoch": 10.01336302895323, + "loss": 0.4545919597148895, + "loss_ce": 0.00012418595724739134, + "loss_iou": 0.19921875, + "loss_num": 0.010986328125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 251834100, + "step": 4496 + }, + { + "epoch": 10.015590200445434, + "grad_norm": 35.5590705871582, + "learning_rate": 1e-06, + "loss": 0.5079, + "num_input_tokens_seen": 251889480, + "step": 4497 + }, + { + "epoch": 10.015590200445434, + "loss": 0.5074775218963623, + "loss_ce": 0.00015331841132137924, + "loss_iou": 0.220703125, + "loss_num": 0.0133056640625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 251889480, + "step": 4497 + }, + { + "epoch": 10.017817371937639, + "grad_norm": 35.230525970458984, + "learning_rate": 1e-06, + "loss": 0.4227, + "num_input_tokens_seen": 251944744, + "step": 4498 + }, + { + "epoch": 10.017817371937639, + "loss": 0.35149550437927246, + "loss_ce": 0.00011612092202994972, + "loss_iou": 0.1533203125, + "loss_num": 0.0089111328125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 251944744, + "step": 4498 + }, + { + "epoch": 10.020044543429844, + "grad_norm": 17.35042953491211, + "learning_rate": 1e-06, + "loss": 0.5238, + "num_input_tokens_seen": 252001072, + "step": 4499 + }, + { + "epoch": 10.020044543429844, + "loss": 0.5627619624137878, + "loss_ce": 0.0002619452425278723, + "loss_iou": 0.228515625, + "loss_num": 0.02099609375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 252001072, + "step": 4499 + }, + { + "epoch": 10.022271714922049, + "grad_norm": 18.346031188964844, + "learning_rate": 1e-06, + "loss": 0.6645, + "num_input_tokens_seen": 252054980, + "step": 4500 + }, + { + "epoch": 10.022271714922049, + "eval_seeclick_web_CIoU": 0.5782820582389832, + "eval_seeclick_web_GIoU": 0.5778241455554962, + "eval_seeclick_web_IoU": 0.5961449146270752, + "eval_seeclick_web_MAE_all": 0.016307780984789133, + "eval_seeclick_web_MAE_h": 0.008026089053601027, + "eval_seeclick_web_MAE_w": 0.01621978636831045, + "eval_seeclick_web_MAE_x_boxes": 0.009718116372823715, + "eval_seeclick_web_MAE_y_boxes": 0.022209799382835627, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9367778301239014, + "eval_seeclick_web_loss_ce": 0.00020874126494163647, + "eval_seeclick_web_loss_iou": 0.4254150390625, + "eval_seeclick_web_loss_num": 0.013081550598144531, + "eval_seeclick_web_loss_xval": 0.916259765625, + "eval_seeclick_web_runtime": 18.3321, + "eval_seeclick_web_samples_per_second": 2.727, + "eval_seeclick_web_steps_per_second": 0.109, + "num_input_tokens_seen": 252054980, + "step": 4500 + }, + { + "epoch": 10.022271714922049, + "eval_icons_CIoU": 0.2818611115217209, + "eval_icons_GIoU": 0.3074444532394409, + "eval_icons_IoU": 0.3626803755760193, + "eval_icons_MAE_all": 0.06376760825514793, + "eval_icons_MAE_h": 0.038395908661186695, + "eval_icons_MAE_w": 0.06800028681755066, + "eval_icons_MAE_x_boxes": 0.059712398797273636, + "eval_icons_MAE_y_boxes": 0.03969671577215195, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 1.7186964750289917, + "eval_icons_loss_ce": 0.00024977065913844854, + "eval_icons_loss_iou": 0.6683349609375, + "eval_icons_loss_num": 0.06123924255371094, + "eval_icons_loss_xval": 1.6416015625, + "eval_icons_runtime": 19.3941, + "eval_icons_samples_per_second": 2.578, + "eval_icons_steps_per_second": 0.103, + "num_input_tokens_seen": 252054980, + "step": 4500 + }, + { + "epoch": 10.022271714922049, + "eval_screenspot_CIoU": 0.34726441899935406, + "eval_screenspot_GIoU": 0.36475805441538495, + "eval_screenspot_IoU": 0.43036482731501263, + "eval_screenspot_MAE_all": 0.06156049047907194, + "eval_screenspot_MAE_h": 0.0375117938965559, + "eval_screenspot_MAE_w": 0.07428983474771182, + "eval_screenspot_MAE_x_boxes": 0.07907873081664245, + "eval_screenspot_MAE_y_boxes": 0.04031235041717688, + "eval_screenspot_inside_bbox": 0.6862499912579855, + "eval_screenspot_loss": 1.6380661725997925, + "eval_screenspot_loss_ce": 0.00026154937222599983, + "eval_screenspot_loss_iou": 0.6749674479166666, + "eval_screenspot_loss_num": 0.0705553690592448, + "eval_screenspot_loss_xval": 1.7029622395833333, + "eval_screenspot_runtime": 30.7953, + "eval_screenspot_samples_per_second": 2.89, + "eval_screenspot_steps_per_second": 0.097, + "num_input_tokens_seen": 252054980, + "step": 4500 + }, + { + "epoch": 10.022271714922049, + "eval_compot_CIoU": 0.35239382088184357, + "eval_compot_GIoU": 0.36740949749946594, + "eval_compot_IoU": 0.4099326878786087, + "eval_compot_MAE_all": 0.01800244627520442, + "eval_compot_MAE_h": 0.008867041673511267, + "eval_compot_MAE_w": 0.02112545073032379, + "eval_compot_MAE_x_boxes": 0.030241395346820354, + "eval_compot_MAE_y_boxes": 0.0067769435700029135, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3512978553771973, + "eval_compot_loss_ce": 0.0002027387818088755, + "eval_compot_loss_iou": 0.6209716796875, + "eval_compot_loss_num": 0.016811370849609375, + "eval_compot_loss_xval": 1.325439453125, + "eval_compot_runtime": 19.3355, + "eval_compot_samples_per_second": 2.586, + "eval_compot_steps_per_second": 0.103, + "num_input_tokens_seen": 252054980, + "step": 4500 + }, + { + "epoch": 10.022271714922049, + "eval_custom_ui_val_CIoU": 0.46736228962739307, + "eval_custom_ui_val_GIoU": 0.48078184492058224, + "eval_custom_ui_val_IoU": 0.5274362398518456, + "eval_custom_ui_val_MAE_all": 0.03146956084916989, + "eval_custom_ui_val_MAE_h": 0.016498709821866617, + "eval_custom_ui_val_MAE_w": 0.0397563229004542, + "eval_custom_ui_val_MAE_x_boxes": 0.039604716209901705, + "eval_custom_ui_val_MAE_y_boxes": 0.015526494192373421, + "eval_custom_ui_val_inside_bbox": 0.7527006202273898, + "eval_custom_ui_val_loss": 1.2082252502441406, + "eval_custom_ui_val_loss_ce": 0.00023262486178686636, + "eval_custom_ui_val_loss_iou": 0.5123562282986112, + "eval_custom_ui_val_loss_num": 0.02882491217719184, + "eval_custom_ui_val_loss_xval": 1.1688096788194444, + "eval_custom_ui_val_runtime": 55.4656, + "eval_custom_ui_val_samples_per_second": 4.778, + "eval_custom_ui_val_steps_per_second": 0.162, + "num_input_tokens_seen": 252054980, + "step": 4500 + }, + { + "epoch": 10.022271714922049, + "loss": 0.912562906742096, + "loss_ce": 0.00020945594587828964, + "loss_iou": 0.392578125, + "loss_num": 0.0250244140625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 252054980, + "step": 4500 + }, + { + "epoch": 10.024498886414253, + "grad_norm": 17.76951789855957, + "learning_rate": 1e-06, + "loss": 0.4816, + "num_input_tokens_seen": 252110792, + "step": 4501 + }, + { + "epoch": 10.024498886414253, + "loss": 0.40937352180480957, + "loss_ce": 0.00019383057951927185, + "loss_iou": 0.1826171875, + "loss_num": 0.0087890625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 252110792, + "step": 4501 + }, + { + "epoch": 10.026726057906458, + "grad_norm": 13.45481014251709, + "learning_rate": 1e-06, + "loss": 0.6639, + "num_input_tokens_seen": 252168752, + "step": 4502 + }, + { + "epoch": 10.026726057906458, + "loss": 0.7877389788627625, + "loss_ce": 0.0001412917481502518, + "loss_iou": 0.328125, + "loss_num": 0.0264892578125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 252168752, + "step": 4502 + }, + { + "epoch": 10.028953229398663, + "grad_norm": 14.474963188171387, + "learning_rate": 1e-06, + "loss": 0.5939, + "num_input_tokens_seen": 252226176, + "step": 4503 + }, + { + "epoch": 10.028953229398663, + "loss": 0.7263405323028564, + "loss_ce": 0.00014424577238969505, + "loss_iou": 0.279296875, + "loss_num": 0.03369140625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 252226176, + "step": 4503 + }, + { + "epoch": 10.031180400890868, + "grad_norm": 20.8179931640625, + "learning_rate": 1e-06, + "loss": 0.6298, + "num_input_tokens_seen": 252285124, + "step": 4504 + }, + { + "epoch": 10.031180400890868, + "loss": 0.5824363231658936, + "loss_ce": 0.00016091388533823192, + "loss_iou": 0.259765625, + "loss_num": 0.01287841796875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 252285124, + "step": 4504 + }, + { + "epoch": 10.033407572383073, + "grad_norm": 25.60984230041504, + "learning_rate": 1e-06, + "loss": 0.594, + "num_input_tokens_seen": 252339596, + "step": 4505 + }, + { + "epoch": 10.033407572383073, + "loss": 0.47377800941467285, + "loss_ce": 0.00014518320676870644, + "loss_iou": 0.203125, + "loss_num": 0.01336669921875, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 252339596, + "step": 4505 + }, + { + "epoch": 10.035634743875278, + "grad_norm": 16.030750274658203, + "learning_rate": 1e-06, + "loss": 0.6331, + "num_input_tokens_seen": 252395008, + "step": 4506 + }, + { + "epoch": 10.035634743875278, + "loss": 0.6874623894691467, + "loss_ce": 0.0001455222663935274, + "loss_iou": 0.28515625, + "loss_num": 0.0230712890625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 252395008, + "step": 4506 + }, + { + "epoch": 10.037861915367483, + "grad_norm": 24.892925262451172, + "learning_rate": 1e-06, + "loss": 0.6838, + "num_input_tokens_seen": 252449524, + "step": 4507 + }, + { + "epoch": 10.037861915367483, + "loss": 0.7407252788543701, + "loss_ce": 0.00012467037595342845, + "loss_iou": 0.33203125, + "loss_num": 0.01507568359375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 252449524, + "step": 4507 + }, + { + "epoch": 10.040089086859687, + "grad_norm": 20.39448356628418, + "learning_rate": 1e-06, + "loss": 0.4444, + "num_input_tokens_seen": 252507044, + "step": 4508 + }, + { + "epoch": 10.040089086859687, + "loss": 0.5464025139808655, + "loss_ce": 0.0002599266008473933, + "loss_iou": 0.2373046875, + "loss_num": 0.01416015625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 252507044, + "step": 4508 + }, + { + "epoch": 10.042316258351892, + "grad_norm": 28.83409881591797, + "learning_rate": 1e-06, + "loss": 0.7085, + "num_input_tokens_seen": 252561052, + "step": 4509 + }, + { + "epoch": 10.042316258351892, + "loss": 0.6915697455406189, + "loss_ce": 0.00016349003999494016, + "loss_iou": 0.30859375, + "loss_num": 0.0147705078125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 252561052, + "step": 4509 + }, + { + "epoch": 10.044543429844097, + "grad_norm": 22.83576011657715, + "learning_rate": 1e-06, + "loss": 0.4975, + "num_input_tokens_seen": 252613648, + "step": 4510 + }, + { + "epoch": 10.044543429844097, + "loss": 0.3887307047843933, + "loss_ce": 0.0005776156904175878, + "loss_iou": 0.162109375, + "loss_num": 0.012939453125, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 252613648, + "step": 4510 + }, + { + "epoch": 10.046770601336302, + "grad_norm": 20.531705856323242, + "learning_rate": 1e-06, + "loss": 0.6052, + "num_input_tokens_seen": 252670492, + "step": 4511 + }, + { + "epoch": 10.046770601336302, + "loss": 0.5842575430870056, + "loss_ce": 0.00015114534471649677, + "loss_iou": 0.25390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 252670492, + "step": 4511 + }, + { + "epoch": 10.048997772828507, + "grad_norm": 19.739933013916016, + "learning_rate": 1e-06, + "loss": 0.9433, + "num_input_tokens_seen": 252725212, + "step": 4512 + }, + { + "epoch": 10.048997772828507, + "loss": 1.3185453414916992, + "loss_ce": 0.0001860101765487343, + "loss_iou": 0.53515625, + "loss_num": 0.049072265625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 252725212, + "step": 4512 + }, + { + "epoch": 10.051224944320714, + "grad_norm": 68.91185760498047, + "learning_rate": 1e-06, + "loss": 0.478, + "num_input_tokens_seen": 252781356, + "step": 4513 + }, + { + "epoch": 10.051224944320714, + "loss": 0.46619629859924316, + "loss_ce": 0.00013182274415157735, + "loss_iou": 0.1982421875, + "loss_num": 0.01397705078125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 252781356, + "step": 4513 + }, + { + "epoch": 10.053452115812918, + "grad_norm": 47.988834381103516, + "learning_rate": 1e-06, + "loss": 0.7151, + "num_input_tokens_seen": 252835956, + "step": 4514 + }, + { + "epoch": 10.053452115812918, + "loss": 0.7893850803375244, + "loss_ce": 0.00032261922024190426, + "loss_iou": 0.3359375, + "loss_num": 0.023681640625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 252835956, + "step": 4514 + }, + { + "epoch": 10.055679287305123, + "grad_norm": 13.075557708740234, + "learning_rate": 1e-06, + "loss": 0.5498, + "num_input_tokens_seen": 252893772, + "step": 4515 + }, + { + "epoch": 10.055679287305123, + "loss": 0.49821239709854126, + "loss_ce": 0.00016552505258005112, + "loss_iou": 0.2109375, + "loss_num": 0.01507568359375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 252893772, + "step": 4515 + }, + { + "epoch": 10.057906458797328, + "grad_norm": 31.43246078491211, + "learning_rate": 1e-06, + "loss": 0.5931, + "num_input_tokens_seen": 252947940, + "step": 4516 + }, + { + "epoch": 10.057906458797328, + "loss": 0.6125627756118774, + "loss_ce": 0.0002580622094683349, + "loss_iou": 0.279296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 252947940, + "step": 4516 + }, + { + "epoch": 10.060133630289533, + "grad_norm": 27.630748748779297, + "learning_rate": 1e-06, + "loss": 0.6048, + "num_input_tokens_seen": 253005340, + "step": 4517 + }, + { + "epoch": 10.060133630289533, + "loss": 0.699859619140625, + "loss_ce": 0.00015256297774612904, + "loss_iou": 0.296875, + "loss_num": 0.021484375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 253005340, + "step": 4517 + }, + { + "epoch": 10.062360801781738, + "grad_norm": 23.77977180480957, + "learning_rate": 1e-06, + "loss": 0.5228, + "num_input_tokens_seen": 253061712, + "step": 4518 + }, + { + "epoch": 10.062360801781738, + "loss": 0.5870422124862671, + "loss_ce": 0.000128203013446182, + "loss_iou": 0.2451171875, + "loss_num": 0.01904296875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 253061712, + "step": 4518 + }, + { + "epoch": 10.064587973273943, + "grad_norm": 20.586177825927734, + "learning_rate": 1e-06, + "loss": 0.581, + "num_input_tokens_seen": 253119552, + "step": 4519 + }, + { + "epoch": 10.064587973273943, + "loss": 0.5409017205238342, + "loss_ce": 0.000130253320094198, + "loss_iou": 0.25390625, + "loss_num": 0.00677490234375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 253119552, + "step": 4519 + }, + { + "epoch": 10.066815144766148, + "grad_norm": 19.27606964111328, + "learning_rate": 1e-06, + "loss": 0.5846, + "num_input_tokens_seen": 253173196, + "step": 4520 + }, + { + "epoch": 10.066815144766148, + "loss": 0.5604423880577087, + "loss_ce": 0.00013963712262921035, + "loss_iou": 0.24609375, + "loss_num": 0.01361083984375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 253173196, + "step": 4520 + }, + { + "epoch": 10.069042316258352, + "grad_norm": 16.18434715270996, + "learning_rate": 1e-06, + "loss": 0.4942, + "num_input_tokens_seen": 253230384, + "step": 4521 + }, + { + "epoch": 10.069042316258352, + "loss": 0.5661949515342712, + "loss_ce": 0.00015493093815166503, + "loss_iou": 0.2421875, + "loss_num": 0.0164794921875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 253230384, + "step": 4521 + }, + { + "epoch": 10.071269487750557, + "grad_norm": 22.953781127929688, + "learning_rate": 1e-06, + "loss": 0.5444, + "num_input_tokens_seen": 253286812, + "step": 4522 + }, + { + "epoch": 10.071269487750557, + "loss": 0.5951232314109802, + "loss_ce": 0.00015253589663188905, + "loss_iou": 0.26953125, + "loss_num": 0.011474609375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 253286812, + "step": 4522 + }, + { + "epoch": 10.073496659242762, + "grad_norm": 18.672510147094727, + "learning_rate": 1e-06, + "loss": 0.5558, + "num_input_tokens_seen": 253343172, + "step": 4523 + }, + { + "epoch": 10.073496659242762, + "loss": 0.5056633949279785, + "loss_ce": 0.00017027268768288195, + "loss_iou": 0.2177734375, + "loss_num": 0.01409912109375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 253343172, + "step": 4523 + }, + { + "epoch": 10.075723830734967, + "grad_norm": 17.472490310668945, + "learning_rate": 1e-06, + "loss": 0.6312, + "num_input_tokens_seen": 253397752, + "step": 4524 + }, + { + "epoch": 10.075723830734967, + "loss": 0.6256510019302368, + "loss_ce": 0.0001627619785722345, + "loss_iou": 0.287109375, + "loss_num": 0.010009765625, + "loss_xval": 0.625, + "num_input_tokens_seen": 253397752, + "step": 4524 + }, + { + "epoch": 10.077951002227172, + "grad_norm": 13.800442695617676, + "learning_rate": 1e-06, + "loss": 0.4006, + "num_input_tokens_seen": 253454584, + "step": 4525 + }, + { + "epoch": 10.077951002227172, + "loss": 0.4295790493488312, + "loss_ce": 0.00013570842565968633, + "loss_iou": 0.1826171875, + "loss_num": 0.0130615234375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 253454584, + "step": 4525 + }, + { + "epoch": 10.080178173719377, + "grad_norm": 83.25904846191406, + "learning_rate": 1e-06, + "loss": 0.5269, + "num_input_tokens_seen": 253512564, + "step": 4526 + }, + { + "epoch": 10.080178173719377, + "loss": 0.45021358132362366, + "loss_ce": 0.00014032571925781667, + "loss_iou": 0.2060546875, + "loss_num": 0.007476806640625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 253512564, + "step": 4526 + }, + { + "epoch": 10.082405345211582, + "grad_norm": 32.7124137878418, + "learning_rate": 1e-06, + "loss": 0.6008, + "num_input_tokens_seen": 253569208, + "step": 4527 + }, + { + "epoch": 10.082405345211582, + "loss": 0.5963279008865356, + "loss_ce": 0.00013654123176820576, + "loss_iou": 0.267578125, + "loss_num": 0.01220703125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 253569208, + "step": 4527 + }, + { + "epoch": 10.084632516703786, + "grad_norm": 18.75583839416504, + "learning_rate": 1e-06, + "loss": 0.436, + "num_input_tokens_seen": 253625684, + "step": 4528 + }, + { + "epoch": 10.084632516703786, + "loss": 0.42995163798332214, + "loss_ce": 0.00014207106141839176, + "loss_iou": 0.1767578125, + "loss_num": 0.01519775390625, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 253625684, + "step": 4528 + }, + { + "epoch": 10.086859688195991, + "grad_norm": 55.7838020324707, + "learning_rate": 1e-06, + "loss": 0.7112, + "num_input_tokens_seen": 253679644, + "step": 4529 + }, + { + "epoch": 10.086859688195991, + "loss": 0.73432457447052, + "loss_ce": 0.00019371899543330073, + "loss_iou": 0.326171875, + "loss_num": 0.0164794921875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 253679644, + "step": 4529 + }, + { + "epoch": 10.089086859688196, + "grad_norm": 15.54879379272461, + "learning_rate": 1e-06, + "loss": 0.5008, + "num_input_tokens_seen": 253738164, + "step": 4530 + }, + { + "epoch": 10.089086859688196, + "loss": 0.4535183012485504, + "loss_ce": 0.00014916164218448102, + "loss_iou": 0.1875, + "loss_num": 0.015625, + "loss_xval": 0.453125, + "num_input_tokens_seen": 253738164, + "step": 4530 + }, + { + "epoch": 10.091314031180401, + "grad_norm": 26.567584991455078, + "learning_rate": 1e-06, + "loss": 0.6987, + "num_input_tokens_seen": 253793512, + "step": 4531 + }, + { + "epoch": 10.091314031180401, + "loss": 0.8244662880897522, + "loss_ce": 0.0002475357032380998, + "loss_iou": 0.34375, + "loss_num": 0.0274658203125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 253793512, + "step": 4531 + }, + { + "epoch": 10.093541202672606, + "grad_norm": 17.407928466796875, + "learning_rate": 1e-06, + "loss": 0.6066, + "num_input_tokens_seen": 253851088, + "step": 4532 + }, + { + "epoch": 10.093541202672606, + "loss": 0.7939009666442871, + "loss_ce": 0.0009321961551904678, + "loss_iou": 0.32421875, + "loss_num": 0.02880859375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 253851088, + "step": 4532 + }, + { + "epoch": 10.09576837416481, + "grad_norm": 15.048906326293945, + "learning_rate": 1e-06, + "loss": 0.4981, + "num_input_tokens_seen": 253906892, + "step": 4533 + }, + { + "epoch": 10.09576837416481, + "loss": 0.5735203623771667, + "loss_ce": 0.00015607741079293191, + "loss_iou": 0.232421875, + "loss_num": 0.0216064453125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 253906892, + "step": 4533 + }, + { + "epoch": 10.097995545657016, + "grad_norm": 19.791410446166992, + "learning_rate": 1e-06, + "loss": 0.5475, + "num_input_tokens_seen": 253964416, + "step": 4534 + }, + { + "epoch": 10.097995545657016, + "loss": 0.5835007429122925, + "loss_ce": 0.00014201825251802802, + "loss_iou": 0.2490234375, + "loss_num": 0.01708984375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 253964416, + "step": 4534 + }, + { + "epoch": 10.10022271714922, + "grad_norm": 16.358074188232422, + "learning_rate": 1e-06, + "loss": 0.3828, + "num_input_tokens_seen": 254021404, + "step": 4535 + }, + { + "epoch": 10.10022271714922, + "loss": 0.48548585176467896, + "loss_ce": 0.00013428418606054038, + "loss_iou": 0.2099609375, + "loss_num": 0.01300048828125, + "loss_xval": 0.484375, + "num_input_tokens_seen": 254021404, + "step": 4535 + }, + { + "epoch": 10.102449888641425, + "grad_norm": 20.186296463012695, + "learning_rate": 1e-06, + "loss": 0.7477, + "num_input_tokens_seen": 254076892, + "step": 4536 + }, + { + "epoch": 10.102449888641425, + "loss": 0.8367500305175781, + "loss_ce": 0.0001716263359412551, + "loss_iou": 0.328125, + "loss_num": 0.035888671875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 254076892, + "step": 4536 + }, + { + "epoch": 10.10467706013363, + "grad_norm": 23.128829956054688, + "learning_rate": 1e-06, + "loss": 0.6005, + "num_input_tokens_seen": 254132820, + "step": 4537 + }, + { + "epoch": 10.10467706013363, + "loss": 0.7570396661758423, + "loss_ce": 0.00020372896688058972, + "loss_iou": 0.345703125, + "loss_num": 0.01300048828125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 254132820, + "step": 4537 + }, + { + "epoch": 10.106904231625835, + "grad_norm": 19.934675216674805, + "learning_rate": 1e-06, + "loss": 0.5398, + "num_input_tokens_seen": 254190556, + "step": 4538 + }, + { + "epoch": 10.106904231625835, + "loss": 0.46338510513305664, + "loss_ce": 0.0004944695974700153, + "loss_iou": 0.2080078125, + "loss_num": 0.009521484375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 254190556, + "step": 4538 + }, + { + "epoch": 10.10913140311804, + "grad_norm": 24.60442352294922, + "learning_rate": 1e-06, + "loss": 0.6456, + "num_input_tokens_seen": 254248000, + "step": 4539 + }, + { + "epoch": 10.10913140311804, + "loss": 0.7887189984321594, + "loss_ce": 0.0001448030088795349, + "loss_iou": 0.310546875, + "loss_num": 0.033447265625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 254248000, + "step": 4539 + }, + { + "epoch": 10.111358574610245, + "grad_norm": 16.316864013671875, + "learning_rate": 1e-06, + "loss": 0.5443, + "num_input_tokens_seen": 254306248, + "step": 4540 + }, + { + "epoch": 10.111358574610245, + "loss": 0.3741779625415802, + "loss_ce": 0.00015453985542990267, + "loss_iou": 0.15625, + "loss_num": 0.01220703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 254306248, + "step": 4540 + }, + { + "epoch": 10.11358574610245, + "grad_norm": 19.49997901916504, + "learning_rate": 1e-06, + "loss": 0.6895, + "num_input_tokens_seen": 254359852, + "step": 4541 + }, + { + "epoch": 10.11358574610245, + "loss": 0.6783727407455444, + "loss_ce": 0.00015005419845692813, + "loss_iou": 0.27734375, + "loss_num": 0.0250244140625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 254359852, + "step": 4541 + }, + { + "epoch": 10.115812917594655, + "grad_norm": 16.376052856445312, + "learning_rate": 1e-06, + "loss": 0.4816, + "num_input_tokens_seen": 254414044, + "step": 4542 + }, + { + "epoch": 10.115812917594655, + "loss": 0.526771605014801, + "loss_ce": 0.00016028305981308222, + "loss_iou": 0.2294921875, + "loss_num": 0.01385498046875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 254414044, + "step": 4542 + }, + { + "epoch": 10.11804008908686, + "grad_norm": 20.01656723022461, + "learning_rate": 1e-06, + "loss": 0.5995, + "num_input_tokens_seen": 254470696, + "step": 4543 + }, + { + "epoch": 10.11804008908686, + "loss": 0.538500964641571, + "loss_ce": 0.000170878556673415, + "loss_iou": 0.2109375, + "loss_num": 0.02294921875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 254470696, + "step": 4543 + }, + { + "epoch": 10.120267260579064, + "grad_norm": 15.220352172851562, + "learning_rate": 1e-06, + "loss": 0.5189, + "num_input_tokens_seen": 254526240, + "step": 4544 + }, + { + "epoch": 10.120267260579064, + "loss": 0.5009964108467102, + "loss_ce": 0.0002639779122546315, + "loss_iou": 0.2236328125, + "loss_num": 0.0106201171875, + "loss_xval": 0.5, + "num_input_tokens_seen": 254526240, + "step": 4544 + }, + { + "epoch": 10.122494432071269, + "grad_norm": 15.65402889251709, + "learning_rate": 1e-06, + "loss": 0.5134, + "num_input_tokens_seen": 254582940, + "step": 4545 + }, + { + "epoch": 10.122494432071269, + "loss": 0.52699214220047, + "loss_ce": 0.0001366714423056692, + "loss_iou": 0.232421875, + "loss_num": 0.012451171875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 254582940, + "step": 4545 + }, + { + "epoch": 10.124721603563474, + "grad_norm": 21.156890869140625, + "learning_rate": 1e-06, + "loss": 0.5, + "num_input_tokens_seen": 254638184, + "step": 4546 + }, + { + "epoch": 10.124721603563474, + "loss": 0.5189378261566162, + "loss_ce": 0.0001390243851346895, + "loss_iou": 0.236328125, + "loss_num": 0.009033203125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 254638184, + "step": 4546 + }, + { + "epoch": 10.126948775055679, + "grad_norm": 20.83241844177246, + "learning_rate": 1e-06, + "loss": 0.4975, + "num_input_tokens_seen": 254692884, + "step": 4547 + }, + { + "epoch": 10.126948775055679, + "loss": 0.4995710849761963, + "loss_ce": 0.00018145760986953974, + "loss_iou": 0.21484375, + "loss_num": 0.0137939453125, + "loss_xval": 0.5, + "num_input_tokens_seen": 254692884, + "step": 4547 + }, + { + "epoch": 10.129175946547884, + "grad_norm": 146.1394805908203, + "learning_rate": 1e-06, + "loss": 0.5299, + "num_input_tokens_seen": 254749104, + "step": 4548 + }, + { + "epoch": 10.129175946547884, + "loss": 0.6289869546890259, + "loss_ce": 0.001545518171042204, + "loss_iou": 0.240234375, + "loss_num": 0.0294189453125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 254749104, + "step": 4548 + }, + { + "epoch": 10.131403118040089, + "grad_norm": 21.525724411010742, + "learning_rate": 1e-06, + "loss": 0.4352, + "num_input_tokens_seen": 254806856, + "step": 4549 + }, + { + "epoch": 10.131403118040089, + "loss": 0.44396907091140747, + "loss_ce": 0.00012142823834437877, + "loss_iou": 0.203125, + "loss_num": 0.00738525390625, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 254806856, + "step": 4549 + }, + { + "epoch": 10.133630289532293, + "grad_norm": 15.656088829040527, + "learning_rate": 1e-06, + "loss": 0.486, + "num_input_tokens_seen": 254862556, + "step": 4550 + }, + { + "epoch": 10.133630289532293, + "loss": 0.3813803195953369, + "loss_ce": 0.0001547573774587363, + "loss_iou": 0.162109375, + "loss_num": 0.011474609375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 254862556, + "step": 4550 + }, + { + "epoch": 10.135857461024498, + "grad_norm": 18.48883628845215, + "learning_rate": 1e-06, + "loss": 0.4152, + "num_input_tokens_seen": 254921356, + "step": 4551 + }, + { + "epoch": 10.135857461024498, + "loss": 0.4063987135887146, + "loss_ce": 0.00027079382562078536, + "loss_iou": 0.1796875, + "loss_num": 0.00958251953125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 254921356, + "step": 4551 + }, + { + "epoch": 10.138084632516703, + "grad_norm": 14.69189739227295, + "learning_rate": 1e-06, + "loss": 0.576, + "num_input_tokens_seen": 254979752, + "step": 4552 + }, + { + "epoch": 10.138084632516703, + "loss": 0.520897388458252, + "loss_ce": 0.00014549962361343205, + "loss_iou": 0.2216796875, + "loss_num": 0.0152587890625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 254979752, + "step": 4552 + }, + { + "epoch": 10.140311804008908, + "grad_norm": 16.03564453125, + "learning_rate": 1e-06, + "loss": 0.4583, + "num_input_tokens_seen": 255037852, + "step": 4553 + }, + { + "epoch": 10.140311804008908, + "loss": 0.500576376914978, + "loss_ce": 0.0005153242964297533, + "loss_iou": 0.2275390625, + "loss_num": 0.0087890625, + "loss_xval": 0.5, + "num_input_tokens_seen": 255037852, + "step": 4553 + }, + { + "epoch": 10.142538975501113, + "grad_norm": 14.397992134094238, + "learning_rate": 1e-06, + "loss": 0.5558, + "num_input_tokens_seen": 255095480, + "step": 4554 + }, + { + "epoch": 10.142538975501113, + "loss": 0.4591131806373596, + "loss_ce": 0.0001288054045289755, + "loss_iou": 0.2080078125, + "loss_num": 0.0086669921875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 255095480, + "step": 4554 + }, + { + "epoch": 10.144766146993318, + "grad_norm": 16.567859649658203, + "learning_rate": 1e-06, + "loss": 0.3819, + "num_input_tokens_seen": 255154316, + "step": 4555 + }, + { + "epoch": 10.144766146993318, + "loss": 0.3125069737434387, + "loss_ce": 0.0001290409272769466, + "loss_iou": 0.115234375, + "loss_num": 0.0164794921875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 255154316, + "step": 4555 + }, + { + "epoch": 10.146993318485523, + "grad_norm": 13.150337219238281, + "learning_rate": 1e-06, + "loss": 0.4094, + "num_input_tokens_seen": 255211692, + "step": 4556 + }, + { + "epoch": 10.146993318485523, + "loss": 0.3635147213935852, + "loss_ce": 0.00011139338312204927, + "loss_iou": 0.15625, + "loss_num": 0.0101318359375, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 255211692, + "step": 4556 + }, + { + "epoch": 10.14922048997773, + "grad_norm": 18.678987503051758, + "learning_rate": 1e-06, + "loss": 0.2899, + "num_input_tokens_seen": 255265416, + "step": 4557 + }, + { + "epoch": 10.14922048997773, + "loss": 0.3159905672073364, + "loss_ce": 0.0001946770935319364, + "loss_iou": 0.130859375, + "loss_num": 0.01080322265625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 255265416, + "step": 4557 + }, + { + "epoch": 10.151447661469934, + "grad_norm": 15.587052345275879, + "learning_rate": 1e-06, + "loss": 0.5265, + "num_input_tokens_seen": 255319560, + "step": 4558 + }, + { + "epoch": 10.151447661469934, + "loss": 0.3966302275657654, + "loss_ce": 0.0001458691549487412, + "loss_iou": 0.177734375, + "loss_num": 0.00830078125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 255319560, + "step": 4558 + }, + { + "epoch": 10.153674832962139, + "grad_norm": 19.554283142089844, + "learning_rate": 1e-06, + "loss": 0.5922, + "num_input_tokens_seen": 255372816, + "step": 4559 + }, + { + "epoch": 10.153674832962139, + "loss": 0.5624641180038452, + "loss_ce": 0.0002082242863252759, + "loss_iou": 0.2158203125, + "loss_num": 0.0260009765625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 255372816, + "step": 4559 + }, + { + "epoch": 10.155902004454344, + "grad_norm": 27.854368209838867, + "learning_rate": 1e-06, + "loss": 0.5583, + "num_input_tokens_seen": 255426296, + "step": 4560 + }, + { + "epoch": 10.155902004454344, + "loss": 0.5693508386611938, + "loss_ce": 0.00013697383110411465, + "loss_iou": 0.2373046875, + "loss_num": 0.018798828125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 255426296, + "step": 4560 + }, + { + "epoch": 10.158129175946549, + "grad_norm": 22.34395408630371, + "learning_rate": 1e-06, + "loss": 0.4309, + "num_input_tokens_seen": 255479824, + "step": 4561 + }, + { + "epoch": 10.158129175946549, + "loss": 0.3726035952568054, + "loss_ce": 0.0002891222247853875, + "loss_iou": 0.1494140625, + "loss_num": 0.0147705078125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 255479824, + "step": 4561 + }, + { + "epoch": 10.160356347438753, + "grad_norm": 16.66666603088379, + "learning_rate": 1e-06, + "loss": 0.4812, + "num_input_tokens_seen": 255533776, + "step": 4562 + }, + { + "epoch": 10.160356347438753, + "loss": 0.46586695313453674, + "loss_ce": 0.0002907839370891452, + "loss_iou": 0.1748046875, + "loss_num": 0.023193359375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 255533776, + "step": 4562 + }, + { + "epoch": 10.162583518930958, + "grad_norm": 15.96545696258545, + "learning_rate": 1e-06, + "loss": 0.633, + "num_input_tokens_seen": 255592708, + "step": 4563 + }, + { + "epoch": 10.162583518930958, + "loss": 0.7283331751823425, + "loss_ce": 0.00018375377112533897, + "loss_iou": 0.3203125, + "loss_num": 0.017578125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 255592708, + "step": 4563 + }, + { + "epoch": 10.164810690423163, + "grad_norm": 17.769378662109375, + "learning_rate": 1e-06, + "loss": 0.5003, + "num_input_tokens_seen": 255651044, + "step": 4564 + }, + { + "epoch": 10.164810690423163, + "loss": 0.47229546308517456, + "loss_ce": 0.00012747224536724389, + "loss_iou": 0.1962890625, + "loss_num": 0.0159912109375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 255651044, + "step": 4564 + }, + { + "epoch": 10.167037861915368, + "grad_norm": 37.10343933105469, + "learning_rate": 1e-06, + "loss": 0.5678, + "num_input_tokens_seen": 255706820, + "step": 4565 + }, + { + "epoch": 10.167037861915368, + "loss": 0.5193136930465698, + "loss_ce": 0.00027068209601566195, + "loss_iou": 0.2255859375, + "loss_num": 0.01373291015625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 255706820, + "step": 4565 + }, + { + "epoch": 10.169265033407573, + "grad_norm": 16.142242431640625, + "learning_rate": 1e-06, + "loss": 0.5588, + "num_input_tokens_seen": 255764732, + "step": 4566 + }, + { + "epoch": 10.169265033407573, + "loss": 0.6505287885665894, + "loss_ce": 0.00013815713464282453, + "loss_iou": 0.29296875, + "loss_num": 0.0133056640625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 255764732, + "step": 4566 + }, + { + "epoch": 10.171492204899778, + "grad_norm": 26.361278533935547, + "learning_rate": 1e-06, + "loss": 0.5016, + "num_input_tokens_seen": 255821704, + "step": 4567 + }, + { + "epoch": 10.171492204899778, + "loss": 0.5541030168533325, + "loss_ce": 0.0001479470229241997, + "loss_iou": 0.240234375, + "loss_num": 0.0150146484375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 255821704, + "step": 4567 + }, + { + "epoch": 10.173719376391983, + "grad_norm": 14.757318496704102, + "learning_rate": 1e-06, + "loss": 0.5656, + "num_input_tokens_seen": 255880672, + "step": 4568 + }, + { + "epoch": 10.173719376391983, + "loss": 0.6377292275428772, + "loss_ce": 0.00015599204925820231, + "loss_iou": 0.263671875, + "loss_num": 0.0223388671875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 255880672, + "step": 4568 + }, + { + "epoch": 10.175946547884188, + "grad_norm": 20.087034225463867, + "learning_rate": 1e-06, + "loss": 0.5458, + "num_input_tokens_seen": 255938112, + "step": 4569 + }, + { + "epoch": 10.175946547884188, + "loss": 0.5018346905708313, + "loss_ce": 0.002200859831646085, + "loss_iou": 0.2265625, + "loss_num": 0.0093994140625, + "loss_xval": 0.5, + "num_input_tokens_seen": 255938112, + "step": 4569 + }, + { + "epoch": 10.178173719376392, + "grad_norm": 17.099891662597656, + "learning_rate": 1e-06, + "loss": 0.6128, + "num_input_tokens_seen": 255994480, + "step": 4570 + }, + { + "epoch": 10.178173719376392, + "loss": 0.6893445253372192, + "loss_ce": 0.00013553063035942614, + "loss_iou": 0.27734375, + "loss_num": 0.0267333984375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 255994480, + "step": 4570 + }, + { + "epoch": 10.180400890868597, + "grad_norm": 24.685178756713867, + "learning_rate": 1e-06, + "loss": 0.5695, + "num_input_tokens_seen": 256051472, + "step": 4571 + }, + { + "epoch": 10.180400890868597, + "loss": 0.691739022731781, + "loss_ce": 0.0013093581655994058, + "loss_iou": 0.306640625, + "loss_num": 0.015380859375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 256051472, + "step": 4571 + }, + { + "epoch": 10.182628062360802, + "grad_norm": 22.65061378479004, + "learning_rate": 1e-06, + "loss": 0.5629, + "num_input_tokens_seen": 256104576, + "step": 4572 + }, + { + "epoch": 10.182628062360802, + "loss": 0.7198662757873535, + "loss_ce": 0.00013974003377370536, + "loss_iou": 0.306640625, + "loss_num": 0.021240234375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 256104576, + "step": 4572 + }, + { + "epoch": 10.184855233853007, + "grad_norm": 21.435075759887695, + "learning_rate": 1e-06, + "loss": 0.4938, + "num_input_tokens_seen": 256159868, + "step": 4573 + }, + { + "epoch": 10.184855233853007, + "loss": 0.5475330352783203, + "loss_ce": 0.00016974794561974704, + "loss_iou": 0.240234375, + "loss_num": 0.0133056640625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 256159868, + "step": 4573 + }, + { + "epoch": 10.187082405345212, + "grad_norm": 15.654433250427246, + "learning_rate": 1e-06, + "loss": 0.5428, + "num_input_tokens_seen": 256216408, + "step": 4574 + }, + { + "epoch": 10.187082405345212, + "loss": 0.6571834087371826, + "loss_ce": 0.00020100505207665265, + "loss_iou": 0.2734375, + "loss_num": 0.0224609375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 256216408, + "step": 4574 + }, + { + "epoch": 10.189309576837417, + "grad_norm": 20.29172706604004, + "learning_rate": 1e-06, + "loss": 0.4126, + "num_input_tokens_seen": 256272580, + "step": 4575 + }, + { + "epoch": 10.189309576837417, + "loss": 0.3565613627433777, + "loss_ce": 0.00011605105100898072, + "loss_iou": 0.154296875, + "loss_num": 0.00958251953125, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 256272580, + "step": 4575 + }, + { + "epoch": 10.191536748329622, + "grad_norm": 47.00114440917969, + "learning_rate": 1e-06, + "loss": 0.486, + "num_input_tokens_seen": 256327452, + "step": 4576 + }, + { + "epoch": 10.191536748329622, + "loss": 0.6286421418190002, + "loss_ce": 0.00022419106971938163, + "loss_iou": 0.263671875, + "loss_num": 0.0198974609375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 256327452, + "step": 4576 + }, + { + "epoch": 10.193763919821826, + "grad_norm": 15.635924339294434, + "learning_rate": 1e-06, + "loss": 0.547, + "num_input_tokens_seen": 256383876, + "step": 4577 + }, + { + "epoch": 10.193763919821826, + "loss": 0.6068173050880432, + "loss_ce": 0.00012786718434654176, + "loss_iou": 0.267578125, + "loss_num": 0.01409912109375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 256383876, + "step": 4577 + }, + { + "epoch": 10.195991091314031, + "grad_norm": 24.49083137512207, + "learning_rate": 1e-06, + "loss": 0.4668, + "num_input_tokens_seen": 256441820, + "step": 4578 + }, + { + "epoch": 10.195991091314031, + "loss": 0.49414756894111633, + "loss_ce": 0.00012899917783215642, + "loss_iou": 0.21875, + "loss_num": 0.0113525390625, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 256441820, + "step": 4578 + }, + { + "epoch": 10.198218262806236, + "grad_norm": 18.868743896484375, + "learning_rate": 1e-06, + "loss": 0.7535, + "num_input_tokens_seen": 256495464, + "step": 4579 + }, + { + "epoch": 10.198218262806236, + "loss": 0.7945007681846619, + "loss_ce": 0.0001892938744276762, + "loss_iou": 0.30859375, + "loss_num": 0.03564453125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 256495464, + "step": 4579 + }, + { + "epoch": 10.200445434298441, + "grad_norm": 18.00078773498535, + "learning_rate": 1e-06, + "loss": 0.5301, + "num_input_tokens_seen": 256551316, + "step": 4580 + }, + { + "epoch": 10.200445434298441, + "loss": 0.6253817677497864, + "loss_ce": 0.000137610943056643, + "loss_iou": 0.2734375, + "loss_num": 0.01531982421875, + "loss_xval": 0.625, + "num_input_tokens_seen": 256551316, + "step": 4580 + }, + { + "epoch": 10.202672605790646, + "grad_norm": 14.672399520874023, + "learning_rate": 1e-06, + "loss": 0.5689, + "num_input_tokens_seen": 256607460, + "step": 4581 + }, + { + "epoch": 10.202672605790646, + "loss": 0.7001278400421143, + "loss_ce": 0.0001766737550497055, + "loss_iou": 0.283203125, + "loss_num": 0.0267333984375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 256607460, + "step": 4581 + }, + { + "epoch": 10.20489977728285, + "grad_norm": 27.121843338012695, + "learning_rate": 1e-06, + "loss": 0.3811, + "num_input_tokens_seen": 256662608, + "step": 4582 + }, + { + "epoch": 10.20489977728285, + "loss": 0.28326690196990967, + "loss_ce": 0.00012479553697630763, + "loss_iou": 0.107421875, + "loss_num": 0.013671875, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 256662608, + "step": 4582 + }, + { + "epoch": 10.207126948775056, + "grad_norm": 22.3253231048584, + "learning_rate": 1e-06, + "loss": 0.49, + "num_input_tokens_seen": 256719680, + "step": 4583 + }, + { + "epoch": 10.207126948775056, + "loss": 0.5782710313796997, + "loss_ce": 0.0001460201747249812, + "loss_iou": 0.2451171875, + "loss_num": 0.0174560546875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 256719680, + "step": 4583 + }, + { + "epoch": 10.20935412026726, + "grad_norm": 15.034200668334961, + "learning_rate": 1e-06, + "loss": 0.3748, + "num_input_tokens_seen": 256775480, + "step": 4584 + }, + { + "epoch": 10.20935412026726, + "loss": 0.3525688052177429, + "loss_ce": 0.0001518014323664829, + "loss_iou": 0.1484375, + "loss_num": 0.01123046875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 256775480, + "step": 4584 + }, + { + "epoch": 10.211581291759465, + "grad_norm": 20.57643699645996, + "learning_rate": 1e-06, + "loss": 0.4699, + "num_input_tokens_seen": 256830920, + "step": 4585 + }, + { + "epoch": 10.211581291759465, + "loss": 0.6310067772865295, + "loss_ce": 0.00014741039194632322, + "loss_iou": 0.28125, + "loss_num": 0.01348876953125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 256830920, + "step": 4585 + }, + { + "epoch": 10.21380846325167, + "grad_norm": 19.190690994262695, + "learning_rate": 1e-06, + "loss": 0.6572, + "num_input_tokens_seen": 256885536, + "step": 4586 + }, + { + "epoch": 10.21380846325167, + "loss": 0.8363385200500488, + "loss_ce": 0.0001568598672747612, + "loss_iou": 0.36328125, + "loss_num": 0.022216796875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 256885536, + "step": 4586 + }, + { + "epoch": 10.216035634743875, + "grad_norm": 19.48312759399414, + "learning_rate": 1e-06, + "loss": 0.7196, + "num_input_tokens_seen": 256944132, + "step": 4587 + }, + { + "epoch": 10.216035634743875, + "loss": 0.6926649808883667, + "loss_ce": 0.00016006956866476685, + "loss_iou": 0.3046875, + "loss_num": 0.0164794921875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 256944132, + "step": 4587 + }, + { + "epoch": 10.21826280623608, + "grad_norm": 28.042089462280273, + "learning_rate": 1e-06, + "loss": 0.6051, + "num_input_tokens_seen": 256998580, + "step": 4588 + }, + { + "epoch": 10.21826280623608, + "loss": 0.7248637080192566, + "loss_ce": 0.0006205601966939867, + "loss_iou": 0.30078125, + "loss_num": 0.0247802734375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 256998580, + "step": 4588 + }, + { + "epoch": 10.220489977728285, + "grad_norm": 17.185420989990234, + "learning_rate": 1e-06, + "loss": 0.427, + "num_input_tokens_seen": 257053016, + "step": 4589 + }, + { + "epoch": 10.220489977728285, + "loss": 0.4973496198654175, + "loss_ce": 0.0002793156891129911, + "loss_iou": 0.1982421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 257053016, + "step": 4589 + }, + { + "epoch": 10.22271714922049, + "grad_norm": 22.403812408447266, + "learning_rate": 1e-06, + "loss": 0.4526, + "num_input_tokens_seen": 257109612, + "step": 4590 + }, + { + "epoch": 10.22271714922049, + "loss": 0.4690048098564148, + "loss_ce": 0.00013278050755616277, + "loss_iou": 0.2080078125, + "loss_num": 0.010498046875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 257109612, + "step": 4590 + }, + { + "epoch": 10.224944320712694, + "grad_norm": 19.512828826904297, + "learning_rate": 1e-06, + "loss": 0.5368, + "num_input_tokens_seen": 257166788, + "step": 4591 + }, + { + "epoch": 10.224944320712694, + "loss": 0.5036743879318237, + "loss_ce": 0.00013440893962979317, + "loss_iou": 0.2294921875, + "loss_num": 0.0086669921875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 257166788, + "step": 4591 + }, + { + "epoch": 10.2271714922049, + "grad_norm": 29.91023826599121, + "learning_rate": 1e-06, + "loss": 0.6053, + "num_input_tokens_seen": 257223768, + "step": 4592 + }, + { + "epoch": 10.2271714922049, + "loss": 0.4671659767627716, + "loss_ce": 0.00012497020361479372, + "loss_iou": 0.2177734375, + "loss_num": 0.00628662109375, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 257223768, + "step": 4592 + }, + { + "epoch": 10.229398663697104, + "grad_norm": 21.827224731445312, + "learning_rate": 1e-06, + "loss": 0.6082, + "num_input_tokens_seen": 257281108, + "step": 4593 + }, + { + "epoch": 10.229398663697104, + "loss": 0.4103159010410309, + "loss_ce": 0.00015964708290994167, + "loss_iou": 0.185546875, + "loss_num": 0.007781982421875, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 257281108, + "step": 4593 + }, + { + "epoch": 10.231625835189309, + "grad_norm": 14.107495307922363, + "learning_rate": 1e-06, + "loss": 0.5821, + "num_input_tokens_seen": 257334832, + "step": 4594 + }, + { + "epoch": 10.231625835189309, + "loss": 0.5812970399856567, + "loss_ce": 0.00012028503260808066, + "loss_iou": 0.255859375, + "loss_num": 0.01348876953125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 257334832, + "step": 4594 + }, + { + "epoch": 10.233853006681514, + "grad_norm": 30.097774505615234, + "learning_rate": 1e-06, + "loss": 0.4502, + "num_input_tokens_seen": 257388612, + "step": 4595 + }, + { + "epoch": 10.233853006681514, + "loss": 0.47809213399887085, + "loss_ce": 0.00018684033420868218, + "loss_iou": 0.2158203125, + "loss_num": 0.00927734375, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 257388612, + "step": 4595 + }, + { + "epoch": 10.236080178173719, + "grad_norm": 287.798095703125, + "learning_rate": 1e-06, + "loss": 0.613, + "num_input_tokens_seen": 257445652, + "step": 4596 + }, + { + "epoch": 10.236080178173719, + "loss": 0.5535175800323486, + "loss_ce": 0.00029491656459867954, + "loss_iou": 0.224609375, + "loss_num": 0.0206298828125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 257445652, + "step": 4596 + }, + { + "epoch": 10.238307349665924, + "grad_norm": 380.41595458984375, + "learning_rate": 1e-06, + "loss": 0.7191, + "num_input_tokens_seen": 257499196, + "step": 4597 + }, + { + "epoch": 10.238307349665924, + "loss": 0.7513624429702759, + "loss_ce": 0.00014180070138536394, + "loss_iou": 0.306640625, + "loss_num": 0.027587890625, + "loss_xval": 0.75, + "num_input_tokens_seen": 257499196, + "step": 4597 + }, + { + "epoch": 10.240534521158128, + "grad_norm": 48.11332321166992, + "learning_rate": 1e-06, + "loss": 0.6278, + "num_input_tokens_seen": 257553432, + "step": 4598 + }, + { + "epoch": 10.240534521158128, + "loss": 0.884353756904602, + "loss_ce": 0.00019845434871967882, + "loss_iou": 0.37890625, + "loss_num": 0.0247802734375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 257553432, + "step": 4598 + }, + { + "epoch": 10.242761692650333, + "grad_norm": 14.302517890930176, + "learning_rate": 1e-06, + "loss": 0.4885, + "num_input_tokens_seen": 257611352, + "step": 4599 + }, + { + "epoch": 10.242761692650333, + "loss": 0.5572800636291504, + "loss_ce": 0.00015118328155949712, + "loss_iou": 0.224609375, + "loss_num": 0.021728515625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 257611352, + "step": 4599 + }, + { + "epoch": 10.244988864142538, + "grad_norm": 15.14469051361084, + "learning_rate": 1e-06, + "loss": 0.5646, + "num_input_tokens_seen": 257663240, + "step": 4600 + }, + { + "epoch": 10.244988864142538, + "loss": 0.3984256088733673, + "loss_ce": 0.000232246849918738, + "loss_iou": 0.154296875, + "loss_num": 0.01806640625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 257663240, + "step": 4600 + }, + { + "epoch": 10.247216035634743, + "grad_norm": 17.56197738647461, + "learning_rate": 1e-06, + "loss": 0.537, + "num_input_tokens_seen": 257719556, + "step": 4601 + }, + { + "epoch": 10.247216035634743, + "loss": 0.42298391461372375, + "loss_ce": 0.00013235666847322136, + "loss_iou": 0.189453125, + "loss_num": 0.008544921875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 257719556, + "step": 4601 + }, + { + "epoch": 10.249443207126948, + "grad_norm": 18.37726593017578, + "learning_rate": 1e-06, + "loss": 0.6735, + "num_input_tokens_seen": 257774564, + "step": 4602 + }, + { + "epoch": 10.249443207126948, + "loss": 0.4393607974052429, + "loss_ce": 0.00015179984620772302, + "loss_iou": 0.1923828125, + "loss_num": 0.0108642578125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 257774564, + "step": 4602 + }, + { + "epoch": 10.251670378619155, + "grad_norm": 13.660320281982422, + "learning_rate": 1e-06, + "loss": 0.4367, + "num_input_tokens_seen": 257830240, + "step": 4603 + }, + { + "epoch": 10.251670378619155, + "loss": 0.31909534335136414, + "loss_ce": 0.0001256193791050464, + "loss_iou": 0.1474609375, + "loss_num": 0.00457763671875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 257830240, + "step": 4603 + }, + { + "epoch": 10.25389755011136, + "grad_norm": 13.991618156433105, + "learning_rate": 1e-06, + "loss": 0.4515, + "num_input_tokens_seen": 257883768, + "step": 4604 + }, + { + "epoch": 10.25389755011136, + "loss": 0.39838650822639465, + "loss_ce": 0.0001321271702181548, + "loss_iou": 0.1806640625, + "loss_num": 0.0074462890625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 257883768, + "step": 4604 + }, + { + "epoch": 10.256124721603564, + "grad_norm": 15.985126495361328, + "learning_rate": 1e-06, + "loss": 0.6116, + "num_input_tokens_seen": 257940856, + "step": 4605 + }, + { + "epoch": 10.256124721603564, + "loss": 0.7079139947891235, + "loss_ce": 0.0001502782106399536, + "loss_iou": 0.296875, + "loss_num": 0.022705078125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 257940856, + "step": 4605 + }, + { + "epoch": 10.25835189309577, + "grad_norm": 17.861207962036133, + "learning_rate": 1e-06, + "loss": 0.6644, + "num_input_tokens_seen": 257997188, + "step": 4606 + }, + { + "epoch": 10.25835189309577, + "loss": 0.5625550150871277, + "loss_ce": 0.00017705293430481106, + "loss_iou": 0.232421875, + "loss_num": 0.019287109375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 257997188, + "step": 4606 + }, + { + "epoch": 10.260579064587974, + "grad_norm": 16.24393653869629, + "learning_rate": 1e-06, + "loss": 0.3853, + "num_input_tokens_seen": 258053824, + "step": 4607 + }, + { + "epoch": 10.260579064587974, + "loss": 0.4628918766975403, + "loss_ce": 0.00015384730068035424, + "loss_iou": 0.201171875, + "loss_num": 0.01214599609375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 258053824, + "step": 4607 + }, + { + "epoch": 10.262806236080179, + "grad_norm": 20.257102966308594, + "learning_rate": 1e-06, + "loss": 0.4479, + "num_input_tokens_seen": 258109152, + "step": 4608 + }, + { + "epoch": 10.262806236080179, + "loss": 0.3674119710922241, + "loss_ce": 0.00010239638504572213, + "loss_iou": 0.1552734375, + "loss_num": 0.0113525390625, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 258109152, + "step": 4608 + }, + { + "epoch": 10.265033407572384, + "grad_norm": 17.029348373413086, + "learning_rate": 1e-06, + "loss": 0.4576, + "num_input_tokens_seen": 258166380, + "step": 4609 + }, + { + "epoch": 10.265033407572384, + "loss": 0.4160352945327759, + "loss_ce": 0.00014175890828482807, + "loss_iou": 0.1904296875, + "loss_num": 0.00714111328125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 258166380, + "step": 4609 + }, + { + "epoch": 10.267260579064589, + "grad_norm": 21.380460739135742, + "learning_rate": 1e-06, + "loss": 0.5284, + "num_input_tokens_seen": 258220732, + "step": 4610 + }, + { + "epoch": 10.267260579064589, + "loss": 0.3585323393344879, + "loss_ce": 0.00013390296953730285, + "loss_iou": 0.1513671875, + "loss_num": 0.0111083984375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 258220732, + "step": 4610 + }, + { + "epoch": 10.269487750556793, + "grad_norm": 14.722599983215332, + "learning_rate": 1e-06, + "loss": 0.58, + "num_input_tokens_seen": 258277352, + "step": 4611 + }, + { + "epoch": 10.269487750556793, + "loss": 0.4370768666267395, + "loss_ce": 0.00018718844512477517, + "loss_iou": 0.1953125, + "loss_num": 0.00909423828125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 258277352, + "step": 4611 + }, + { + "epoch": 10.271714922048998, + "grad_norm": 21.076213836669922, + "learning_rate": 1e-06, + "loss": 0.5657, + "num_input_tokens_seen": 258332080, + "step": 4612 + }, + { + "epoch": 10.271714922048998, + "loss": 0.4650370478630066, + "loss_ce": 0.0001932941668201238, + "loss_iou": 0.2041015625, + "loss_num": 0.01123046875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 258332080, + "step": 4612 + }, + { + "epoch": 10.273942093541203, + "grad_norm": 85.08380126953125, + "learning_rate": 1e-06, + "loss": 0.6145, + "num_input_tokens_seen": 258389684, + "step": 4613 + }, + { + "epoch": 10.273942093541203, + "loss": 0.5986820459365845, + "loss_ce": 0.0001713307574391365, + "loss_iou": 0.251953125, + "loss_num": 0.0186767578125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 258389684, + "step": 4613 + }, + { + "epoch": 10.276169265033408, + "grad_norm": 32.875736236572266, + "learning_rate": 1e-06, + "loss": 0.6115, + "num_input_tokens_seen": 258446596, + "step": 4614 + }, + { + "epoch": 10.276169265033408, + "loss": 0.4154265522956848, + "loss_ce": 0.00014332013961393386, + "loss_iou": 0.1884765625, + "loss_num": 0.007568359375, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 258446596, + "step": 4614 + }, + { + "epoch": 10.278396436525613, + "grad_norm": 25.337644577026367, + "learning_rate": 1e-06, + "loss": 0.6043, + "num_input_tokens_seen": 258503504, + "step": 4615 + }, + { + "epoch": 10.278396436525613, + "loss": 0.5890331268310547, + "loss_ce": 0.00016596817295067012, + "loss_iou": 0.2578125, + "loss_num": 0.01434326171875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 258503504, + "step": 4615 + }, + { + "epoch": 10.280623608017818, + "grad_norm": 22.491819381713867, + "learning_rate": 1e-06, + "loss": 0.4488, + "num_input_tokens_seen": 258561264, + "step": 4616 + }, + { + "epoch": 10.280623608017818, + "loss": 0.42713305354118347, + "loss_ce": 0.00013111413863953203, + "loss_iou": 0.1845703125, + "loss_num": 0.011474609375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 258561264, + "step": 4616 + }, + { + "epoch": 10.282850779510023, + "grad_norm": 16.053213119506836, + "learning_rate": 1e-06, + "loss": 0.6014, + "num_input_tokens_seen": 258615720, + "step": 4617 + }, + { + "epoch": 10.282850779510023, + "loss": 0.6224437952041626, + "loss_ce": 0.00012936524581164122, + "loss_iou": 0.2578125, + "loss_num": 0.0213623046875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 258615720, + "step": 4617 + }, + { + "epoch": 10.285077951002227, + "grad_norm": 21.939491271972656, + "learning_rate": 1e-06, + "loss": 0.447, + "num_input_tokens_seen": 258671132, + "step": 4618 + }, + { + "epoch": 10.285077951002227, + "loss": 0.33654463291168213, + "loss_ce": 0.00011883860861416906, + "loss_iou": 0.1474609375, + "loss_num": 0.00830078125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 258671132, + "step": 4618 + }, + { + "epoch": 10.287305122494432, + "grad_norm": 14.96814250946045, + "learning_rate": 1e-06, + "loss": 0.5002, + "num_input_tokens_seen": 258728584, + "step": 4619 + }, + { + "epoch": 10.287305122494432, + "loss": 0.5067825317382812, + "loss_ce": 0.0002517920802347362, + "loss_iou": 0.21875, + "loss_num": 0.01385498046875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 258728584, + "step": 4619 + }, + { + "epoch": 10.289532293986637, + "grad_norm": 17.478721618652344, + "learning_rate": 1e-06, + "loss": 0.3775, + "num_input_tokens_seen": 258786092, + "step": 4620 + }, + { + "epoch": 10.289532293986637, + "loss": 0.46181732416152954, + "loss_ce": 0.00014738523168489337, + "loss_iou": 0.203125, + "loss_num": 0.01123046875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 258786092, + "step": 4620 + }, + { + "epoch": 10.291759465478842, + "grad_norm": 22.687427520751953, + "learning_rate": 1e-06, + "loss": 0.5753, + "num_input_tokens_seen": 258841324, + "step": 4621 + }, + { + "epoch": 10.291759465478842, + "loss": 0.6752172708511353, + "loss_ce": 0.00016845832578837872, + "loss_iou": 0.3125, + "loss_num": 0.00994873046875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 258841324, + "step": 4621 + }, + { + "epoch": 10.293986636971047, + "grad_norm": 20.015989303588867, + "learning_rate": 1e-06, + "loss": 0.7111, + "num_input_tokens_seen": 258895908, + "step": 4622 + }, + { + "epoch": 10.293986636971047, + "loss": 0.5323803424835205, + "loss_ce": 0.00015376918599940836, + "loss_iou": 0.240234375, + "loss_num": 0.0103759765625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 258895908, + "step": 4622 + }, + { + "epoch": 10.296213808463252, + "grad_norm": 19.902332305908203, + "learning_rate": 1e-06, + "loss": 0.3957, + "num_input_tokens_seen": 258955124, + "step": 4623 + }, + { + "epoch": 10.296213808463252, + "loss": 0.35737037658691406, + "loss_ce": 0.0001926565309986472, + "loss_iou": 0.1572265625, + "loss_num": 0.0086669921875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 258955124, + "step": 4623 + }, + { + "epoch": 10.298440979955457, + "grad_norm": 30.156911849975586, + "learning_rate": 1e-06, + "loss": 0.5562, + "num_input_tokens_seen": 259010176, + "step": 4624 + }, + { + "epoch": 10.298440979955457, + "loss": 0.5099859833717346, + "loss_ce": 0.0002203439362347126, + "loss_iou": 0.228515625, + "loss_num": 0.01055908203125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 259010176, + "step": 4624 + }, + { + "epoch": 10.300668151447661, + "grad_norm": 18.427066802978516, + "learning_rate": 1e-06, + "loss": 0.5903, + "num_input_tokens_seen": 259065656, + "step": 4625 + }, + { + "epoch": 10.300668151447661, + "loss": 0.5748733878135681, + "loss_ce": 0.0001511350565124303, + "loss_iou": 0.251953125, + "loss_num": 0.014404296875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 259065656, + "step": 4625 + }, + { + "epoch": 10.302895322939866, + "grad_norm": 16.115806579589844, + "learning_rate": 1e-06, + "loss": 0.4775, + "num_input_tokens_seen": 259121088, + "step": 4626 + }, + { + "epoch": 10.302895322939866, + "loss": 0.33922505378723145, + "loss_ce": 0.0001137549479608424, + "loss_iou": 0.1484375, + "loss_num": 0.00860595703125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 259121088, + "step": 4626 + }, + { + "epoch": 10.305122494432071, + "grad_norm": 21.94735336303711, + "learning_rate": 1e-06, + "loss": 0.6358, + "num_input_tokens_seen": 259179752, + "step": 4627 + }, + { + "epoch": 10.305122494432071, + "loss": 0.6556517481803894, + "loss_ce": 0.0001341440947726369, + "loss_iou": 0.279296875, + "loss_num": 0.01904296875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 259179752, + "step": 4627 + }, + { + "epoch": 10.307349665924276, + "grad_norm": 18.129470825195312, + "learning_rate": 1e-06, + "loss": 0.5075, + "num_input_tokens_seen": 259231148, + "step": 4628 + }, + { + "epoch": 10.307349665924276, + "loss": 0.5028002858161926, + "loss_ce": 0.00011475315841380507, + "loss_iou": 0.2099609375, + "loss_num": 0.0166015625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 259231148, + "step": 4628 + }, + { + "epoch": 10.309576837416481, + "grad_norm": 16.93099021911621, + "learning_rate": 1e-06, + "loss": 0.5894, + "num_input_tokens_seen": 259289500, + "step": 4629 + }, + { + "epoch": 10.309576837416481, + "loss": 0.4424053430557251, + "loss_ce": 0.00014459306839853525, + "loss_iou": 0.203125, + "loss_num": 0.007049560546875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 259289500, + "step": 4629 + }, + { + "epoch": 10.311804008908686, + "grad_norm": 25.796844482421875, + "learning_rate": 1e-06, + "loss": 0.6209, + "num_input_tokens_seen": 259346848, + "step": 4630 + }, + { + "epoch": 10.311804008908686, + "loss": 0.6188187599182129, + "loss_ce": 0.00016644690185785294, + "loss_iou": 0.275390625, + "loss_num": 0.01348876953125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 259346848, + "step": 4630 + }, + { + "epoch": 10.31403118040089, + "grad_norm": 16.472164154052734, + "learning_rate": 1e-06, + "loss": 0.7976, + "num_input_tokens_seen": 259404428, + "step": 4631 + }, + { + "epoch": 10.31403118040089, + "loss": 0.7173308730125427, + "loss_ce": 0.0002898685052059591, + "loss_iou": 0.27734375, + "loss_num": 0.032470703125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 259404428, + "step": 4631 + }, + { + "epoch": 10.316258351893095, + "grad_norm": 17.40803337097168, + "learning_rate": 1e-06, + "loss": 0.4205, + "num_input_tokens_seen": 259460152, + "step": 4632 + }, + { + "epoch": 10.316258351893095, + "loss": 0.3678438663482666, + "loss_ce": 0.00016810771194286644, + "loss_iou": 0.15625, + "loss_num": 0.01104736328125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 259460152, + "step": 4632 + }, + { + "epoch": 10.3184855233853, + "grad_norm": 17.019670486450195, + "learning_rate": 1e-06, + "loss": 0.6062, + "num_input_tokens_seen": 259512592, + "step": 4633 + }, + { + "epoch": 10.3184855233853, + "loss": 0.5542778968811035, + "loss_ce": 0.00020081247203052044, + "loss_iou": 0.2392578125, + "loss_num": 0.01519775390625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 259512592, + "step": 4633 + }, + { + "epoch": 10.320712694877505, + "grad_norm": 20.420970916748047, + "learning_rate": 1e-06, + "loss": 0.5013, + "num_input_tokens_seen": 259569136, + "step": 4634 + }, + { + "epoch": 10.320712694877505, + "loss": 0.5180292129516602, + "loss_ce": 0.00020692471298389137, + "loss_iou": 0.23046875, + "loss_num": 0.01141357421875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 259569136, + "step": 4634 + }, + { + "epoch": 10.32293986636971, + "grad_norm": 18.733917236328125, + "learning_rate": 1e-06, + "loss": 0.6095, + "num_input_tokens_seen": 259626308, + "step": 4635 + }, + { + "epoch": 10.32293986636971, + "loss": 0.5899757742881775, + "loss_ce": 0.00013202980335336179, + "loss_iou": 0.26171875, + "loss_num": 0.0133056640625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 259626308, + "step": 4635 + }, + { + "epoch": 10.325167037861915, + "grad_norm": 15.864693641662598, + "learning_rate": 1e-06, + "loss": 0.4543, + "num_input_tokens_seen": 259683580, + "step": 4636 + }, + { + "epoch": 10.325167037861915, + "loss": 0.36647483706474304, + "loss_ce": 0.00014181638834998012, + "loss_iou": 0.169921875, + "loss_num": 0.0052490234375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 259683580, + "step": 4636 + }, + { + "epoch": 10.32739420935412, + "grad_norm": 16.95244789123535, + "learning_rate": 1e-06, + "loss": 0.4863, + "num_input_tokens_seen": 259740856, + "step": 4637 + }, + { + "epoch": 10.32739420935412, + "loss": 0.4233607351779938, + "loss_ce": 0.00014292271225713193, + "loss_iou": 0.1826171875, + "loss_num": 0.01165771484375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 259740856, + "step": 4637 + }, + { + "epoch": 10.329621380846325, + "grad_norm": 14.428168296813965, + "learning_rate": 1e-06, + "loss": 0.5452, + "num_input_tokens_seen": 259797892, + "step": 4638 + }, + { + "epoch": 10.329621380846325, + "loss": 0.37976911664009094, + "loss_ce": 0.00013044924708083272, + "loss_iou": 0.173828125, + "loss_num": 0.0064697265625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 259797892, + "step": 4638 + }, + { + "epoch": 10.33184855233853, + "grad_norm": 20.9561767578125, + "learning_rate": 1e-06, + "loss": 0.5536, + "num_input_tokens_seen": 259856484, + "step": 4639 + }, + { + "epoch": 10.33184855233853, + "loss": 0.4375172257423401, + "loss_ce": 0.00013931245484855026, + "loss_iou": 0.19140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 259856484, + "step": 4639 + }, + { + "epoch": 10.334075723830734, + "grad_norm": 15.651261329650879, + "learning_rate": 1e-06, + "loss": 0.4453, + "num_input_tokens_seen": 259916208, + "step": 4640 + }, + { + "epoch": 10.334075723830734, + "loss": 0.548733115196228, + "loss_ce": 0.00014910154277458787, + "loss_iou": 0.2236328125, + "loss_num": 0.0201416015625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 259916208, + "step": 4640 + }, + { + "epoch": 10.33630289532294, + "grad_norm": 13.660500526428223, + "learning_rate": 1e-06, + "loss": 0.4894, + "num_input_tokens_seen": 259971708, + "step": 4641 + }, + { + "epoch": 10.33630289532294, + "loss": 0.45058485865592957, + "loss_ce": 0.000511608668603003, + "loss_iou": 0.15234375, + "loss_num": 0.02880859375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 259971708, + "step": 4641 + }, + { + "epoch": 10.338530066815144, + "grad_norm": 25.716596603393555, + "learning_rate": 1e-06, + "loss": 0.5956, + "num_input_tokens_seen": 260025852, + "step": 4642 + }, + { + "epoch": 10.338530066815144, + "loss": 0.7315921783447266, + "loss_ce": 0.00039098679553717375, + "loss_iou": 0.298828125, + "loss_num": 0.026611328125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 260025852, + "step": 4642 + }, + { + "epoch": 10.340757238307349, + "grad_norm": 21.653146743774414, + "learning_rate": 1e-06, + "loss": 0.5072, + "num_input_tokens_seen": 260079956, + "step": 4643 + }, + { + "epoch": 10.340757238307349, + "loss": 0.434053510427475, + "loss_ce": 0.0002766597317531705, + "loss_iou": 0.1884765625, + "loss_num": 0.0113525390625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 260079956, + "step": 4643 + }, + { + "epoch": 10.342984409799554, + "grad_norm": 14.551214218139648, + "learning_rate": 1e-06, + "loss": 0.4545, + "num_input_tokens_seen": 260136624, + "step": 4644 + }, + { + "epoch": 10.342984409799554, + "loss": 0.5187681913375854, + "loss_ce": 0.00021346815628930926, + "loss_iou": 0.2294921875, + "loss_num": 0.01177978515625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 260136624, + "step": 4644 + }, + { + "epoch": 10.345211581291759, + "grad_norm": 23.34543228149414, + "learning_rate": 1e-06, + "loss": 0.5684, + "num_input_tokens_seen": 260193936, + "step": 4645 + }, + { + "epoch": 10.345211581291759, + "loss": 0.6667625904083252, + "loss_ce": 0.00013660687545780092, + "loss_iou": 0.30078125, + "loss_num": 0.0128173828125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 260193936, + "step": 4645 + }, + { + "epoch": 10.347438752783964, + "grad_norm": 15.129518508911133, + "learning_rate": 1e-06, + "loss": 0.4025, + "num_input_tokens_seen": 260247704, + "step": 4646 + }, + { + "epoch": 10.347438752783964, + "loss": 0.3118034601211548, + "loss_ce": 0.00012743064144160599, + "loss_iou": 0.130859375, + "loss_num": 0.0101318359375, + "loss_xval": 0.3125, + "num_input_tokens_seen": 260247704, + "step": 4646 + }, + { + "epoch": 10.34966592427617, + "grad_norm": 20.0349063873291, + "learning_rate": 1e-06, + "loss": 0.8892, + "num_input_tokens_seen": 260305780, + "step": 4647 + }, + { + "epoch": 10.34966592427617, + "loss": 0.9932963252067566, + "loss_ce": 0.00013225735165178776, + "loss_iou": 0.43359375, + "loss_num": 0.02490234375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 260305780, + "step": 4647 + }, + { + "epoch": 10.351893095768375, + "grad_norm": 24.84522247314453, + "learning_rate": 1e-06, + "loss": 0.6352, + "num_input_tokens_seen": 260360704, + "step": 4648 + }, + { + "epoch": 10.351893095768375, + "loss": 0.5557194352149963, + "loss_ce": 0.0001774309203028679, + "loss_iou": 0.25390625, + "loss_num": 0.00994873046875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 260360704, + "step": 4648 + }, + { + "epoch": 10.35412026726058, + "grad_norm": 16.85805892944336, + "learning_rate": 1e-06, + "loss": 0.5613, + "num_input_tokens_seen": 260419212, + "step": 4649 + }, + { + "epoch": 10.35412026726058, + "loss": 0.43383756279945374, + "loss_ce": 0.00012176090240245685, + "loss_iou": 0.173828125, + "loss_num": 0.0169677734375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 260419212, + "step": 4649 + }, + { + "epoch": 10.356347438752785, + "grad_norm": 19.93196678161621, + "learning_rate": 1e-06, + "loss": 0.5456, + "num_input_tokens_seen": 260475968, + "step": 4650 + }, + { + "epoch": 10.356347438752785, + "loss": 0.5350778698921204, + "loss_ce": 0.00016572429740335792, + "loss_iou": 0.2294921875, + "loss_num": 0.01519775390625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 260475968, + "step": 4650 + }, + { + "epoch": 10.35857461024499, + "grad_norm": 22.55901336669922, + "learning_rate": 1e-06, + "loss": 0.5437, + "num_input_tokens_seen": 260530128, + "step": 4651 + }, + { + "epoch": 10.35857461024499, + "loss": 0.6019728183746338, + "loss_ce": 0.0001661243732087314, + "loss_iou": 0.28125, + "loss_num": 0.00799560546875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 260530128, + "step": 4651 + }, + { + "epoch": 10.360801781737194, + "grad_norm": 24.937402725219727, + "learning_rate": 1e-06, + "loss": 0.5643, + "num_input_tokens_seen": 260587396, + "step": 4652 + }, + { + "epoch": 10.360801781737194, + "loss": 0.4734136760234833, + "loss_ce": 0.00014707804075442255, + "loss_iou": 0.2099609375, + "loss_num": 0.01068115234375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 260587396, + "step": 4652 + }, + { + "epoch": 10.3630289532294, + "grad_norm": 13.982348442077637, + "learning_rate": 1e-06, + "loss": 0.4673, + "num_input_tokens_seen": 260644020, + "step": 4653 + }, + { + "epoch": 10.3630289532294, + "loss": 0.3794419467449188, + "loss_ce": 0.0004746583290398121, + "loss_iou": 0.1689453125, + "loss_num": 0.00823974609375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 260644020, + "step": 4653 + }, + { + "epoch": 10.365256124721604, + "grad_norm": 16.844064712524414, + "learning_rate": 1e-06, + "loss": 0.6486, + "num_input_tokens_seen": 260700716, + "step": 4654 + }, + { + "epoch": 10.365256124721604, + "loss": 0.5358029007911682, + "loss_ce": 0.00015838223043829203, + "loss_iou": 0.244140625, + "loss_num": 0.00946044921875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 260700716, + "step": 4654 + }, + { + "epoch": 10.367483296213809, + "grad_norm": 16.441743850708008, + "learning_rate": 1e-06, + "loss": 0.6257, + "num_input_tokens_seen": 260756304, + "step": 4655 + }, + { + "epoch": 10.367483296213809, + "loss": 0.6969484090805054, + "loss_ce": 0.00017108957399614155, + "loss_iou": 0.29296875, + "loss_num": 0.0228271484375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 260756304, + "step": 4655 + }, + { + "epoch": 10.369710467706014, + "grad_norm": 16.78958511352539, + "learning_rate": 1e-06, + "loss": 0.6998, + "num_input_tokens_seen": 260810784, + "step": 4656 + }, + { + "epoch": 10.369710467706014, + "loss": 0.6009594798088074, + "loss_ce": 0.00012937135761603713, + "loss_iou": 0.251953125, + "loss_num": 0.01904296875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 260810784, + "step": 4656 + }, + { + "epoch": 10.371937639198219, + "grad_norm": 18.355632781982422, + "learning_rate": 1e-06, + "loss": 0.5985, + "num_input_tokens_seen": 260867540, + "step": 4657 + }, + { + "epoch": 10.371937639198219, + "loss": 0.7768846750259399, + "loss_ce": 0.00015131058171391487, + "loss_iou": 0.333984375, + "loss_num": 0.021728515625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 260867540, + "step": 4657 + }, + { + "epoch": 10.374164810690424, + "grad_norm": 17.681825637817383, + "learning_rate": 1e-06, + "loss": 0.5509, + "num_input_tokens_seen": 260923584, + "step": 4658 + }, + { + "epoch": 10.374164810690424, + "loss": 0.5139190554618835, + "loss_ce": 0.00012508760846685618, + "loss_iou": 0.21875, + "loss_num": 0.01513671875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 260923584, + "step": 4658 + }, + { + "epoch": 10.376391982182628, + "grad_norm": 29.6978759765625, + "learning_rate": 1e-06, + "loss": 0.549, + "num_input_tokens_seen": 260980476, + "step": 4659 + }, + { + "epoch": 10.376391982182628, + "loss": 0.6463862657546997, + "loss_ce": 0.00014600764552596956, + "loss_iou": 0.2734375, + "loss_num": 0.02001953125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 260980476, + "step": 4659 + }, + { + "epoch": 10.378619153674833, + "grad_norm": 20.990150451660156, + "learning_rate": 1e-06, + "loss": 0.4872, + "num_input_tokens_seen": 261038064, + "step": 4660 + }, + { + "epoch": 10.378619153674833, + "loss": 0.5661933422088623, + "loss_ce": 0.00015332447947002947, + "loss_iou": 0.2578125, + "loss_num": 0.00994873046875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 261038064, + "step": 4660 + }, + { + "epoch": 10.380846325167038, + "grad_norm": 20.679819107055664, + "learning_rate": 1e-06, + "loss": 0.4146, + "num_input_tokens_seen": 261093180, + "step": 4661 + }, + { + "epoch": 10.380846325167038, + "loss": 0.479025661945343, + "loss_ce": 0.00014384492533281446, + "loss_iou": 0.2138671875, + "loss_num": 0.010009765625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 261093180, + "step": 4661 + }, + { + "epoch": 10.383073496659243, + "grad_norm": 12.441951751708984, + "learning_rate": 1e-06, + "loss": 0.5305, + "num_input_tokens_seen": 261149604, + "step": 4662 + }, + { + "epoch": 10.383073496659243, + "loss": 0.5029485821723938, + "loss_ce": 0.00014094685320742428, + "loss_iou": 0.2275390625, + "loss_num": 0.0096435546875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 261149604, + "step": 4662 + }, + { + "epoch": 10.385300668151448, + "grad_norm": 39.21706008911133, + "learning_rate": 1e-06, + "loss": 0.5123, + "num_input_tokens_seen": 261207228, + "step": 4663 + }, + { + "epoch": 10.385300668151448, + "loss": 0.44520843029022217, + "loss_ce": 0.00014007699792273343, + "loss_iou": 0.2021484375, + "loss_num": 0.00836181640625, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 261207228, + "step": 4663 + }, + { + "epoch": 10.387527839643653, + "grad_norm": 29.102785110473633, + "learning_rate": 1e-06, + "loss": 0.5844, + "num_input_tokens_seen": 261261652, + "step": 4664 + }, + { + "epoch": 10.387527839643653, + "loss": 0.523324191570282, + "loss_ce": 0.00013084019883535802, + "loss_iou": 0.2294921875, + "loss_num": 0.01300048828125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 261261652, + "step": 4664 + }, + { + "epoch": 10.389755011135858, + "grad_norm": 17.95996856689453, + "learning_rate": 1e-06, + "loss": 0.5083, + "num_input_tokens_seen": 261314420, + "step": 4665 + }, + { + "epoch": 10.389755011135858, + "loss": 0.666547417640686, + "loss_ce": 0.00016560900257900357, + "loss_iou": 0.3046875, + "loss_num": 0.01141357421875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 261314420, + "step": 4665 + }, + { + "epoch": 10.391982182628063, + "grad_norm": 17.621519088745117, + "learning_rate": 1e-06, + "loss": 0.4285, + "num_input_tokens_seen": 261369500, + "step": 4666 + }, + { + "epoch": 10.391982182628063, + "loss": 0.3165377378463745, + "loss_ce": 0.0001314996334258467, + "loss_iou": 0.13671875, + "loss_num": 0.00836181640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 261369500, + "step": 4666 + }, + { + "epoch": 10.394209354120267, + "grad_norm": 28.80778694152832, + "learning_rate": 1e-06, + "loss": 0.6926, + "num_input_tokens_seen": 261424900, + "step": 4667 + }, + { + "epoch": 10.394209354120267, + "loss": 0.6756924390792847, + "loss_ce": 0.00015533142141066492, + "loss_iou": 0.3046875, + "loss_num": 0.01336669921875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 261424900, + "step": 4667 + }, + { + "epoch": 10.396436525612472, + "grad_norm": 16.814796447753906, + "learning_rate": 1e-06, + "loss": 0.6278, + "num_input_tokens_seen": 261481180, + "step": 4668 + }, + { + "epoch": 10.396436525612472, + "loss": 0.6310012340545654, + "loss_ce": 0.00020286736253183335, + "loss_iou": 0.259765625, + "loss_num": 0.0220947265625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 261481180, + "step": 4668 + }, + { + "epoch": 10.398663697104677, + "grad_norm": 28.424175262451172, + "learning_rate": 1e-06, + "loss": 0.5187, + "num_input_tokens_seen": 261537668, + "step": 4669 + }, + { + "epoch": 10.398663697104677, + "loss": 0.7215948104858398, + "loss_ce": 0.00015925764455460012, + "loss_iou": 0.296875, + "loss_num": 0.0252685546875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 261537668, + "step": 4669 + }, + { + "epoch": 10.400890868596882, + "grad_norm": 42.25555419921875, + "learning_rate": 1e-06, + "loss": 0.5724, + "num_input_tokens_seen": 261592388, + "step": 4670 + }, + { + "epoch": 10.400890868596882, + "loss": 0.532261848449707, + "loss_ce": 0.00015735380293335766, + "loss_iou": 0.23046875, + "loss_num": 0.01416015625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 261592388, + "step": 4670 + }, + { + "epoch": 10.403118040089087, + "grad_norm": 15.451391220092773, + "learning_rate": 1e-06, + "loss": 0.6405, + "num_input_tokens_seen": 261649236, + "step": 4671 + }, + { + "epoch": 10.403118040089087, + "loss": 0.8438804149627686, + "loss_ce": 0.000130399945192039, + "loss_iou": 0.337890625, + "loss_num": 0.033203125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 261649236, + "step": 4671 + }, + { + "epoch": 10.405345211581292, + "grad_norm": 18.49277687072754, + "learning_rate": 1e-06, + "loss": 0.7358, + "num_input_tokens_seen": 261705516, + "step": 4672 + }, + { + "epoch": 10.405345211581292, + "loss": 0.5958718061447144, + "loss_ce": 0.0001686769537627697, + "loss_iou": 0.26171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 261705516, + "step": 4672 + }, + { + "epoch": 10.407572383073497, + "grad_norm": 18.64063262939453, + "learning_rate": 1e-06, + "loss": 0.5341, + "num_input_tokens_seen": 261759996, + "step": 4673 + }, + { + "epoch": 10.407572383073497, + "loss": 0.4139430522918701, + "loss_ce": 0.00012470106594264507, + "loss_iou": 0.169921875, + "loss_num": 0.01483154296875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 261759996, + "step": 4673 + }, + { + "epoch": 10.409799554565701, + "grad_norm": 15.402804374694824, + "learning_rate": 1e-06, + "loss": 0.5624, + "num_input_tokens_seen": 261814640, + "step": 4674 + }, + { + "epoch": 10.409799554565701, + "loss": 0.5486302375793457, + "loss_ce": 0.00016830695676617324, + "loss_iou": 0.22265625, + "loss_num": 0.0205078125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 261814640, + "step": 4674 + }, + { + "epoch": 10.412026726057906, + "grad_norm": 16.70870590209961, + "learning_rate": 1e-06, + "loss": 0.4344, + "num_input_tokens_seen": 261873280, + "step": 4675 + }, + { + "epoch": 10.412026726057906, + "loss": 0.26441287994384766, + "loss_ce": 0.00013062989455647767, + "loss_iou": 0.1162109375, + "loss_num": 0.00634765625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 261873280, + "step": 4675 + }, + { + "epoch": 10.414253897550111, + "grad_norm": 16.716644287109375, + "learning_rate": 1e-06, + "loss": 0.7616, + "num_input_tokens_seen": 261930832, + "step": 4676 + }, + { + "epoch": 10.414253897550111, + "loss": 0.6276519298553467, + "loss_ce": 0.00021053958334960043, + "loss_iou": 0.263671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 261930832, + "step": 4676 + }, + { + "epoch": 10.416481069042316, + "grad_norm": 24.789369583129883, + "learning_rate": 1e-06, + "loss": 0.5369, + "num_input_tokens_seen": 261985440, + "step": 4677 + }, + { + "epoch": 10.416481069042316, + "loss": 0.5724594593048096, + "loss_ce": 0.00019378411525394768, + "loss_iou": 0.255859375, + "loss_num": 0.01220703125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 261985440, + "step": 4677 + }, + { + "epoch": 10.41870824053452, + "grad_norm": 23.978904724121094, + "learning_rate": 1e-06, + "loss": 0.5775, + "num_input_tokens_seen": 262040176, + "step": 4678 + }, + { + "epoch": 10.41870824053452, + "loss": 0.7208090424537659, + "loss_ce": 0.0001669653574936092, + "loss_iou": 0.291015625, + "loss_num": 0.02783203125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 262040176, + "step": 4678 + }, + { + "epoch": 10.420935412026726, + "grad_norm": 21.936405181884766, + "learning_rate": 1e-06, + "loss": 0.5377, + "num_input_tokens_seen": 262096712, + "step": 4679 + }, + { + "epoch": 10.420935412026726, + "loss": 0.6339254379272461, + "loss_ce": 0.0001363815099466592, + "loss_iou": 0.27734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 262096712, + "step": 4679 + }, + { + "epoch": 10.42316258351893, + "grad_norm": 15.21117115020752, + "learning_rate": 1e-06, + "loss": 0.5304, + "num_input_tokens_seen": 262151328, + "step": 4680 + }, + { + "epoch": 10.42316258351893, + "loss": 0.6026941537857056, + "loss_ce": 0.00015511347737628967, + "loss_iou": 0.2470703125, + "loss_num": 0.021728515625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 262151328, + "step": 4680 + }, + { + "epoch": 10.425389755011135, + "grad_norm": 19.147802352905273, + "learning_rate": 1e-06, + "loss": 0.6639, + "num_input_tokens_seen": 262207744, + "step": 4681 + }, + { + "epoch": 10.425389755011135, + "loss": 0.6351706981658936, + "loss_ce": 0.00016094453167170286, + "loss_iou": 0.28125, + "loss_num": 0.01416015625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 262207744, + "step": 4681 + }, + { + "epoch": 10.42761692650334, + "grad_norm": 20.792253494262695, + "learning_rate": 1e-06, + "loss": 0.5855, + "num_input_tokens_seen": 262266176, + "step": 4682 + }, + { + "epoch": 10.42761692650334, + "loss": 0.7266117930412292, + "loss_ce": 0.00017138014663942158, + "loss_iou": 0.30859375, + "loss_num": 0.0218505859375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 262266176, + "step": 4682 + }, + { + "epoch": 10.429844097995545, + "grad_norm": 17.91155433654785, + "learning_rate": 1e-06, + "loss": 0.4326, + "num_input_tokens_seen": 262322384, + "step": 4683 + }, + { + "epoch": 10.429844097995545, + "loss": 0.4974979758262634, + "loss_ce": 0.00018352872575633228, + "loss_iou": 0.2158203125, + "loss_num": 0.0130615234375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 262322384, + "step": 4683 + }, + { + "epoch": 10.43207126948775, + "grad_norm": 15.88755989074707, + "learning_rate": 1e-06, + "loss": 0.4703, + "num_input_tokens_seen": 262378432, + "step": 4684 + }, + { + "epoch": 10.43207126948775, + "loss": 0.6226903200149536, + "loss_ce": 0.0001317547430517152, + "loss_iou": 0.27734375, + "loss_num": 0.013427734375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 262378432, + "step": 4684 + }, + { + "epoch": 10.434298440979955, + "grad_norm": 27.248088836669922, + "learning_rate": 1e-06, + "loss": 0.6203, + "num_input_tokens_seen": 262433420, + "step": 4685 + }, + { + "epoch": 10.434298440979955, + "loss": 0.8291926980018616, + "loss_ce": 0.000579414947424084, + "loss_iou": 0.3359375, + "loss_num": 0.031494140625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 262433420, + "step": 4685 + }, + { + "epoch": 10.43652561247216, + "grad_norm": 23.795869827270508, + "learning_rate": 1e-06, + "loss": 0.4751, + "num_input_tokens_seen": 262492420, + "step": 4686 + }, + { + "epoch": 10.43652561247216, + "loss": 0.5592020750045776, + "loss_ce": 0.0001200107071781531, + "loss_iou": 0.255859375, + "loss_num": 0.0096435546875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 262492420, + "step": 4686 + }, + { + "epoch": 10.438752783964365, + "grad_norm": 119.42134857177734, + "learning_rate": 1e-06, + "loss": 0.4158, + "num_input_tokens_seen": 262547884, + "step": 4687 + }, + { + "epoch": 10.438752783964365, + "loss": 0.2656348645687103, + "loss_ce": 0.0001319482980761677, + "loss_iou": 0.111328125, + "loss_num": 0.00860595703125, + "loss_xval": 0.265625, + "num_input_tokens_seen": 262547884, + "step": 4687 + }, + { + "epoch": 10.44097995545657, + "grad_norm": 29.43088722229004, + "learning_rate": 1e-06, + "loss": 0.6806, + "num_input_tokens_seen": 262605912, + "step": 4688 + }, + { + "epoch": 10.44097995545657, + "loss": 0.7944400310516357, + "loss_ce": 0.0001285005419049412, + "loss_iou": 0.35546875, + "loss_num": 0.0167236328125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 262605912, + "step": 4688 + }, + { + "epoch": 10.443207126948774, + "grad_norm": 37.14841842651367, + "learning_rate": 1e-06, + "loss": 0.5946, + "num_input_tokens_seen": 262658560, + "step": 4689 + }, + { + "epoch": 10.443207126948774, + "loss": 0.5586202144622803, + "loss_ce": 0.00014853276661597192, + "loss_iou": 0.2216796875, + "loss_num": 0.02294921875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 262658560, + "step": 4689 + }, + { + "epoch": 10.44543429844098, + "grad_norm": 97.54398345947266, + "learning_rate": 1e-06, + "loss": 0.3942, + "num_input_tokens_seen": 262714528, + "step": 4690 + }, + { + "epoch": 10.44543429844098, + "loss": 0.3732898235321045, + "loss_ce": 0.00012086871720384806, + "loss_iou": 0.1591796875, + "loss_num": 0.010986328125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 262714528, + "step": 4690 + }, + { + "epoch": 10.447661469933184, + "grad_norm": 20.17695426940918, + "learning_rate": 1e-06, + "loss": 0.492, + "num_input_tokens_seen": 262770576, + "step": 4691 + }, + { + "epoch": 10.447661469933184, + "loss": 0.47415345907211304, + "loss_ce": 0.00015445941244252026, + "loss_iou": 0.20703125, + "loss_num": 0.011962890625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 262770576, + "step": 4691 + }, + { + "epoch": 10.449888641425389, + "grad_norm": 21.019207000732422, + "learning_rate": 1e-06, + "loss": 0.4976, + "num_input_tokens_seen": 262827748, + "step": 4692 + }, + { + "epoch": 10.449888641425389, + "loss": 0.44643697142601013, + "loss_ce": 0.00014791959256399423, + "loss_iou": 0.1806640625, + "loss_num": 0.01708984375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 262827748, + "step": 4692 + }, + { + "epoch": 10.452115812917596, + "grad_norm": 17.917194366455078, + "learning_rate": 1e-06, + "loss": 0.5386, + "num_input_tokens_seen": 262884516, + "step": 4693 + }, + { + "epoch": 10.452115812917596, + "loss": 0.40125101804733276, + "loss_ce": 0.00012796126247849315, + "loss_iou": 0.18359375, + "loss_num": 0.00677490234375, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 262884516, + "step": 4693 + }, + { + "epoch": 10.4543429844098, + "grad_norm": 25.924360275268555, + "learning_rate": 1e-06, + "loss": 0.5668, + "num_input_tokens_seen": 262939420, + "step": 4694 + }, + { + "epoch": 10.4543429844098, + "loss": 0.43259525299072266, + "loss_ce": 0.00022220781829673797, + "loss_iou": 0.1845703125, + "loss_num": 0.012451171875, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 262939420, + "step": 4694 + }, + { + "epoch": 10.456570155902005, + "grad_norm": 29.25006675720215, + "learning_rate": 1e-06, + "loss": 0.6523, + "num_input_tokens_seen": 262998928, + "step": 4695 + }, + { + "epoch": 10.456570155902005, + "loss": 0.7494416236877441, + "loss_ce": 0.00017405254766345024, + "loss_iou": 0.33203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.75, + "num_input_tokens_seen": 262998928, + "step": 4695 + }, + { + "epoch": 10.45879732739421, + "grad_norm": 14.804512023925781, + "learning_rate": 1e-06, + "loss": 0.5316, + "num_input_tokens_seen": 263055744, + "step": 4696 + }, + { + "epoch": 10.45879732739421, + "loss": 0.5465176105499268, + "loss_ce": 0.0001309234939981252, + "loss_iou": 0.236328125, + "loss_num": 0.014892578125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 263055744, + "step": 4696 + }, + { + "epoch": 10.461024498886415, + "grad_norm": 23.423185348510742, + "learning_rate": 1e-06, + "loss": 0.4285, + "num_input_tokens_seen": 263111136, + "step": 4697 + }, + { + "epoch": 10.461024498886415, + "loss": 0.35573017597198486, + "loss_ce": 0.00013935507740825415, + "loss_iou": 0.162109375, + "loss_num": 0.00616455078125, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 263111136, + "step": 4697 + }, + { + "epoch": 10.46325167037862, + "grad_norm": 15.791742324829102, + "learning_rate": 1e-06, + "loss": 0.5727, + "num_input_tokens_seen": 263168172, + "step": 4698 + }, + { + "epoch": 10.46325167037862, + "loss": 0.7756419777870178, + "loss_ce": 0.00012929517833981663, + "loss_iou": 0.310546875, + "loss_num": 0.0308837890625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 263168172, + "step": 4698 + }, + { + "epoch": 10.465478841870825, + "grad_norm": 16.894933700561523, + "learning_rate": 1e-06, + "loss": 0.5117, + "num_input_tokens_seen": 263224844, + "step": 4699 + }, + { + "epoch": 10.465478841870825, + "loss": 0.4671807885169983, + "loss_ce": 0.0001397810410708189, + "loss_iou": 0.208984375, + "loss_num": 0.0098876953125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 263224844, + "step": 4699 + }, + { + "epoch": 10.46770601336303, + "grad_norm": 21.554258346557617, + "learning_rate": 1e-06, + "loss": 0.5666, + "num_input_tokens_seen": 263278976, + "step": 4700 + }, + { + "epoch": 10.46770601336303, + "loss": 0.5681118965148926, + "loss_ce": 0.00011875165364472196, + "loss_iou": 0.259765625, + "loss_num": 0.00994873046875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 263278976, + "step": 4700 + }, + { + "epoch": 10.469933184855234, + "grad_norm": 17.350711822509766, + "learning_rate": 1e-06, + "loss": 0.5042, + "num_input_tokens_seen": 263336380, + "step": 4701 + }, + { + "epoch": 10.469933184855234, + "loss": 0.4490933418273926, + "loss_ce": 0.0002408274740446359, + "loss_iou": 0.1962890625, + "loss_num": 0.01123046875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 263336380, + "step": 4701 + }, + { + "epoch": 10.47216035634744, + "grad_norm": 20.207218170166016, + "learning_rate": 1e-06, + "loss": 0.5642, + "num_input_tokens_seen": 263390656, + "step": 4702 + }, + { + "epoch": 10.47216035634744, + "loss": 0.5887584090232849, + "loss_ce": 0.0001353670231765136, + "loss_iou": 0.2099609375, + "loss_num": 0.03369140625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 263390656, + "step": 4702 + }, + { + "epoch": 10.474387527839644, + "grad_norm": 29.60268783569336, + "learning_rate": 1e-06, + "loss": 0.6127, + "num_input_tokens_seen": 263448332, + "step": 4703 + }, + { + "epoch": 10.474387527839644, + "loss": 0.6975966691970825, + "loss_ce": 0.00014798392658121884, + "loss_iou": 0.302734375, + "loss_num": 0.0184326171875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 263448332, + "step": 4703 + }, + { + "epoch": 10.476614699331849, + "grad_norm": 14.4660005569458, + "learning_rate": 1e-06, + "loss": 0.4241, + "num_input_tokens_seen": 263503824, + "step": 4704 + }, + { + "epoch": 10.476614699331849, + "loss": 0.3470431864261627, + "loss_ce": 0.00011936050577787682, + "loss_iou": 0.1484375, + "loss_num": 0.010009765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 263503824, + "step": 4704 + }, + { + "epoch": 10.478841870824054, + "grad_norm": 17.75361442565918, + "learning_rate": 1e-06, + "loss": 0.5674, + "num_input_tokens_seen": 263559724, + "step": 4705 + }, + { + "epoch": 10.478841870824054, + "loss": 0.6192811131477356, + "loss_ce": 0.0001404954819008708, + "loss_iou": 0.267578125, + "loss_num": 0.016845703125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 263559724, + "step": 4705 + }, + { + "epoch": 10.481069042316259, + "grad_norm": 16.871896743774414, + "learning_rate": 1e-06, + "loss": 0.5026, + "num_input_tokens_seen": 263615824, + "step": 4706 + }, + { + "epoch": 10.481069042316259, + "loss": 0.42048490047454834, + "loss_ce": 0.00019680103287100792, + "loss_iou": 0.1728515625, + "loss_num": 0.01483154296875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 263615824, + "step": 4706 + }, + { + "epoch": 10.483296213808464, + "grad_norm": 18.86580467224121, + "learning_rate": 1e-06, + "loss": 0.4055, + "num_input_tokens_seen": 263672988, + "step": 4707 + }, + { + "epoch": 10.483296213808464, + "loss": 0.4523187577724457, + "loss_ce": 0.0001703191373962909, + "loss_iou": 0.1982421875, + "loss_num": 0.01116943359375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 263672988, + "step": 4707 + }, + { + "epoch": 10.485523385300668, + "grad_norm": 18.27134132385254, + "learning_rate": 1e-06, + "loss": 0.4455, + "num_input_tokens_seen": 263729920, + "step": 4708 + }, + { + "epoch": 10.485523385300668, + "loss": 0.46423059701919556, + "loss_ce": 0.00011923116107936949, + "loss_iou": 0.2099609375, + "loss_num": 0.00885009765625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 263729920, + "step": 4708 + }, + { + "epoch": 10.487750556792873, + "grad_norm": 16.8643856048584, + "learning_rate": 1e-06, + "loss": 0.4758, + "num_input_tokens_seen": 263786968, + "step": 4709 + }, + { + "epoch": 10.487750556792873, + "loss": 0.41022104024887085, + "loss_ce": 0.00018687370175030082, + "loss_iou": 0.16796875, + "loss_num": 0.01495361328125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 263786968, + "step": 4709 + }, + { + "epoch": 10.489977728285078, + "grad_norm": 22.2263240814209, + "learning_rate": 1e-06, + "loss": 0.4912, + "num_input_tokens_seen": 263842484, + "step": 4710 + }, + { + "epoch": 10.489977728285078, + "loss": 0.5018296241760254, + "loss_ce": 0.00012060045264661312, + "loss_iou": 0.224609375, + "loss_num": 0.0103759765625, + "loss_xval": 0.5, + "num_input_tokens_seen": 263842484, + "step": 4710 + }, + { + "epoch": 10.492204899777283, + "grad_norm": 17.63686180114746, + "learning_rate": 1e-06, + "loss": 0.5927, + "num_input_tokens_seen": 263899136, + "step": 4711 + }, + { + "epoch": 10.492204899777283, + "loss": 0.675079345703125, + "loss_ce": 0.00015263669774867594, + "loss_iou": 0.26171875, + "loss_num": 0.030517578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 263899136, + "step": 4711 + }, + { + "epoch": 10.494432071269488, + "grad_norm": 19.261802673339844, + "learning_rate": 1e-06, + "loss": 0.5165, + "num_input_tokens_seen": 263956272, + "step": 4712 + }, + { + "epoch": 10.494432071269488, + "loss": 0.4028354585170746, + "loss_ce": 0.00012550255632959306, + "loss_iou": 0.16796875, + "loss_num": 0.013427734375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 263956272, + "step": 4712 + }, + { + "epoch": 10.496659242761693, + "grad_norm": 22.613391876220703, + "learning_rate": 1e-06, + "loss": 0.5683, + "num_input_tokens_seen": 264010704, + "step": 4713 + }, + { + "epoch": 10.496659242761693, + "loss": 0.7420130968093872, + "loss_ce": 0.0001918151247082278, + "loss_iou": 0.306640625, + "loss_num": 0.025390625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 264010704, + "step": 4713 + }, + { + "epoch": 10.498886414253898, + "grad_norm": 18.75325584411621, + "learning_rate": 1e-06, + "loss": 0.805, + "num_input_tokens_seen": 264065440, + "step": 4714 + }, + { + "epoch": 10.498886414253898, + "loss": 0.7152308225631714, + "loss_ce": 0.00014293221465777606, + "loss_iou": 0.287109375, + "loss_num": 0.0277099609375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 264065440, + "step": 4714 + }, + { + "epoch": 10.501113585746102, + "grad_norm": 17.041898727416992, + "learning_rate": 1e-06, + "loss": 0.5754, + "num_input_tokens_seen": 264122460, + "step": 4715 + }, + { + "epoch": 10.501113585746102, + "loss": 0.5261731743812561, + "loss_ce": 0.0002942573046311736, + "loss_iou": 0.228515625, + "loss_num": 0.01373291015625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 264122460, + "step": 4715 + }, + { + "epoch": 10.503340757238307, + "grad_norm": 24.113428115844727, + "learning_rate": 1e-06, + "loss": 0.4715, + "num_input_tokens_seen": 264176608, + "step": 4716 + }, + { + "epoch": 10.503340757238307, + "loss": 0.6706770658493042, + "loss_ce": 0.0001447929535061121, + "loss_iou": 0.287109375, + "loss_num": 0.0196533203125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 264176608, + "step": 4716 + }, + { + "epoch": 10.505567928730512, + "grad_norm": 25.971769332885742, + "learning_rate": 1e-06, + "loss": 0.6877, + "num_input_tokens_seen": 264232344, + "step": 4717 + }, + { + "epoch": 10.505567928730512, + "loss": 0.8002659678459167, + "loss_ce": 0.00046127024688757956, + "loss_iou": 0.3515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 264232344, + "step": 4717 + }, + { + "epoch": 10.507795100222717, + "grad_norm": 18.61360740661621, + "learning_rate": 1e-06, + "loss": 0.6597, + "num_input_tokens_seen": 264287720, + "step": 4718 + }, + { + "epoch": 10.507795100222717, + "loss": 0.4703478217124939, + "loss_ce": 0.0001329787337454036, + "loss_iou": 0.21484375, + "loss_num": 0.00799560546875, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 264287720, + "step": 4718 + }, + { + "epoch": 10.510022271714922, + "grad_norm": 22.263904571533203, + "learning_rate": 1e-06, + "loss": 0.3736, + "num_input_tokens_seen": 264344596, + "step": 4719 + }, + { + "epoch": 10.510022271714922, + "loss": 0.29570016264915466, + "loss_ce": 0.00016795132250990719, + "loss_iou": 0.1298828125, + "loss_num": 0.007080078125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 264344596, + "step": 4719 + }, + { + "epoch": 10.512249443207127, + "grad_norm": 16.474157333374023, + "learning_rate": 1e-06, + "loss": 0.4543, + "num_input_tokens_seen": 264401628, + "step": 4720 + }, + { + "epoch": 10.512249443207127, + "loss": 0.4919620752334595, + "loss_ce": 0.00014076274237595499, + "loss_iou": 0.21875, + "loss_num": 0.0108642578125, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 264401628, + "step": 4720 + }, + { + "epoch": 10.514476614699332, + "grad_norm": 14.566910743713379, + "learning_rate": 1e-06, + "loss": 0.4783, + "num_input_tokens_seen": 264457620, + "step": 4721 + }, + { + "epoch": 10.514476614699332, + "loss": 0.4565121829509735, + "loss_ce": 0.0007016496965661645, + "loss_iou": 0.1962890625, + "loss_num": 0.01263427734375, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 264457620, + "step": 4721 + }, + { + "epoch": 10.516703786191536, + "grad_norm": 11.910482406616211, + "learning_rate": 1e-06, + "loss": 0.584, + "num_input_tokens_seen": 264514528, + "step": 4722 + }, + { + "epoch": 10.516703786191536, + "loss": 0.4461956024169922, + "loss_ce": 0.00015069925575517118, + "loss_iou": 0.1962890625, + "loss_num": 0.0107421875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 264514528, + "step": 4722 + }, + { + "epoch": 10.518930957683741, + "grad_norm": 20.123058319091797, + "learning_rate": 1e-06, + "loss": 0.5795, + "num_input_tokens_seen": 264571452, + "step": 4723 + }, + { + "epoch": 10.518930957683741, + "loss": 0.7175799608230591, + "loss_ce": 0.0001727462949929759, + "loss_iou": 0.314453125, + "loss_num": 0.017822265625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 264571452, + "step": 4723 + }, + { + "epoch": 10.521158129175946, + "grad_norm": 15.674975395202637, + "learning_rate": 1e-06, + "loss": 0.5361, + "num_input_tokens_seen": 264627296, + "step": 4724 + }, + { + "epoch": 10.521158129175946, + "loss": 0.5477256178855896, + "loss_ce": 0.00011817881022579968, + "loss_iou": 0.2265625, + "loss_num": 0.018798828125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 264627296, + "step": 4724 + }, + { + "epoch": 10.523385300668151, + "grad_norm": 22.113855361938477, + "learning_rate": 1e-06, + "loss": 0.5957, + "num_input_tokens_seen": 264681016, + "step": 4725 + }, + { + "epoch": 10.523385300668151, + "loss": 0.6728705167770386, + "loss_ce": 0.0001409871329087764, + "loss_iou": 0.283203125, + "loss_num": 0.02099609375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 264681016, + "step": 4725 + }, + { + "epoch": 10.525612472160356, + "grad_norm": 18.36030387878418, + "learning_rate": 1e-06, + "loss": 0.5032, + "num_input_tokens_seen": 264738488, + "step": 4726 + }, + { + "epoch": 10.525612472160356, + "loss": 0.3978542685508728, + "loss_ce": 0.00014918479428160936, + "loss_iou": 0.166015625, + "loss_num": 0.01300048828125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 264738488, + "step": 4726 + }, + { + "epoch": 10.52783964365256, + "grad_norm": 17.425609588623047, + "learning_rate": 1e-06, + "loss": 0.3512, + "num_input_tokens_seen": 264795500, + "step": 4727 + }, + { + "epoch": 10.52783964365256, + "loss": 0.3651301860809326, + "loss_ce": 0.0001399611501256004, + "loss_iou": 0.150390625, + "loss_num": 0.01300048828125, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 264795500, + "step": 4727 + }, + { + "epoch": 10.530066815144766, + "grad_norm": 27.829883575439453, + "learning_rate": 1e-06, + "loss": 0.472, + "num_input_tokens_seen": 264853260, + "step": 4728 + }, + { + "epoch": 10.530066815144766, + "loss": 0.40601441264152527, + "loss_ce": 0.00025269074831157923, + "loss_iou": 0.1689453125, + "loss_num": 0.0135498046875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 264853260, + "step": 4728 + }, + { + "epoch": 10.53229398663697, + "grad_norm": 10.47251033782959, + "learning_rate": 1e-06, + "loss": 0.5847, + "num_input_tokens_seen": 264907728, + "step": 4729 + }, + { + "epoch": 10.53229398663697, + "loss": 0.5061122179031372, + "loss_ce": 0.00013078347546979785, + "loss_iou": 0.2216796875, + "loss_num": 0.0125732421875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 264907728, + "step": 4729 + }, + { + "epoch": 10.534521158129175, + "grad_norm": 26.059276580810547, + "learning_rate": 1e-06, + "loss": 0.6593, + "num_input_tokens_seen": 264961788, + "step": 4730 + }, + { + "epoch": 10.534521158129175, + "loss": 0.6526414752006531, + "loss_ce": 0.00017564307199791074, + "loss_iou": 0.283203125, + "loss_num": 0.0174560546875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 264961788, + "step": 4730 + }, + { + "epoch": 10.53674832962138, + "grad_norm": 17.807939529418945, + "learning_rate": 1e-06, + "loss": 0.3761, + "num_input_tokens_seen": 265015304, + "step": 4731 + }, + { + "epoch": 10.53674832962138, + "loss": 0.3732970356941223, + "loss_ce": 0.00015859381528571248, + "loss_iou": 0.171875, + "loss_num": 0.005950927734375, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 265015304, + "step": 4731 + }, + { + "epoch": 10.538975501113585, + "grad_norm": 92.26382446289062, + "learning_rate": 1e-06, + "loss": 0.5017, + "num_input_tokens_seen": 265070648, + "step": 4732 + }, + { + "epoch": 10.538975501113585, + "loss": 0.42812007665634155, + "loss_ce": 0.0010571026941761374, + "loss_iou": 0.171875, + "loss_num": 0.0166015625, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 265070648, + "step": 4732 + }, + { + "epoch": 10.54120267260579, + "grad_norm": 18.604368209838867, + "learning_rate": 1e-06, + "loss": 0.5278, + "num_input_tokens_seen": 265126172, + "step": 4733 + }, + { + "epoch": 10.54120267260579, + "loss": 0.5804719924926758, + "loss_ce": 0.00018023110169451684, + "loss_iou": 0.234375, + "loss_num": 0.022216796875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 265126172, + "step": 4733 + }, + { + "epoch": 10.543429844097995, + "grad_norm": 17.7320556640625, + "learning_rate": 1e-06, + "loss": 0.6031, + "num_input_tokens_seen": 265183528, + "step": 4734 + }, + { + "epoch": 10.543429844097995, + "loss": 0.49740028381347656, + "loss_ce": 0.00045207011862657964, + "loss_iou": 0.1884765625, + "loss_num": 0.02392578125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 265183528, + "step": 4734 + }, + { + "epoch": 10.5456570155902, + "grad_norm": 29.795515060424805, + "learning_rate": 1e-06, + "loss": 0.3743, + "num_input_tokens_seen": 265241408, + "step": 4735 + }, + { + "epoch": 10.5456570155902, + "loss": 0.37365561723709106, + "loss_ce": 0.00012047103518852964, + "loss_iou": 0.1640625, + "loss_num": 0.009033203125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 265241408, + "step": 4735 + }, + { + "epoch": 10.547884187082406, + "grad_norm": 60.60297775268555, + "learning_rate": 1e-06, + "loss": 0.5276, + "num_input_tokens_seen": 265296856, + "step": 4736 + }, + { + "epoch": 10.547884187082406, + "loss": 0.3346078097820282, + "loss_ce": 0.0001351345854345709, + "loss_iou": 0.15625, + "loss_num": 0.004180908203125, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 265296856, + "step": 4736 + }, + { + "epoch": 10.550111358574611, + "grad_norm": 28.60586929321289, + "learning_rate": 1e-06, + "loss": 0.5012, + "num_input_tokens_seen": 265351804, + "step": 4737 + }, + { + "epoch": 10.550111358574611, + "loss": 0.5089592933654785, + "loss_ce": 0.00017022958490997553, + "loss_iou": 0.1982421875, + "loss_num": 0.0224609375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 265351804, + "step": 4737 + }, + { + "epoch": 10.552338530066816, + "grad_norm": 16.353670120239258, + "learning_rate": 1e-06, + "loss": 0.6282, + "num_input_tokens_seen": 265406476, + "step": 4738 + }, + { + "epoch": 10.552338530066816, + "loss": 0.6063532829284668, + "loss_ce": 0.00015206160605885088, + "loss_iou": 0.255859375, + "loss_num": 0.018798828125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 265406476, + "step": 4738 + }, + { + "epoch": 10.55456570155902, + "grad_norm": 18.11619758605957, + "learning_rate": 1e-06, + "loss": 0.5011, + "num_input_tokens_seen": 265463764, + "step": 4739 + }, + { + "epoch": 10.55456570155902, + "loss": 0.6158651113510132, + "loss_ce": 0.000386598490877077, + "loss_iou": 0.271484375, + "loss_num": 0.01458740234375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 265463764, + "step": 4739 + }, + { + "epoch": 10.556792873051226, + "grad_norm": 21.920385360717773, + "learning_rate": 1e-06, + "loss": 0.7462, + "num_input_tokens_seen": 265520116, + "step": 4740 + }, + { + "epoch": 10.556792873051226, + "loss": 0.9230633974075317, + "loss_ce": 0.00021183726494200528, + "loss_iou": 0.37890625, + "loss_num": 0.033203125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 265520116, + "step": 4740 + }, + { + "epoch": 10.55902004454343, + "grad_norm": 12.749909400939941, + "learning_rate": 1e-06, + "loss": 0.4596, + "num_input_tokens_seen": 265578596, + "step": 4741 + }, + { + "epoch": 10.55902004454343, + "loss": 0.5135598182678223, + "loss_ce": 0.00013206491712480783, + "loss_iou": 0.2255859375, + "loss_num": 0.01275634765625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 265578596, + "step": 4741 + }, + { + "epoch": 10.561247216035635, + "grad_norm": 23.8961124420166, + "learning_rate": 1e-06, + "loss": 0.669, + "num_input_tokens_seen": 265634096, + "step": 4742 + }, + { + "epoch": 10.561247216035635, + "loss": 0.8774167895317078, + "loss_ce": 0.0029050512239336967, + "loss_iou": 0.34765625, + "loss_num": 0.03564453125, + "loss_xval": 0.875, + "num_input_tokens_seen": 265634096, + "step": 4742 + }, + { + "epoch": 10.56347438752784, + "grad_norm": 27.8192081451416, + "learning_rate": 1e-06, + "loss": 0.6048, + "num_input_tokens_seen": 265687780, + "step": 4743 + }, + { + "epoch": 10.56347438752784, + "loss": 0.5951275825500488, + "loss_ce": 0.00015690061263740063, + "loss_iou": 0.2578125, + "loss_num": 0.0162353515625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 265687780, + "step": 4743 + }, + { + "epoch": 10.565701559020045, + "grad_norm": 23.720129013061523, + "learning_rate": 1e-06, + "loss": 0.4395, + "num_input_tokens_seen": 265746240, + "step": 4744 + }, + { + "epoch": 10.565701559020045, + "loss": 0.43288612365722656, + "loss_ce": 0.0001468704140279442, + "loss_iou": 0.203125, + "loss_num": 0.005126953125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 265746240, + "step": 4744 + }, + { + "epoch": 10.56792873051225, + "grad_norm": 18.638071060180664, + "learning_rate": 1e-06, + "loss": 0.5585, + "num_input_tokens_seen": 265800708, + "step": 4745 + }, + { + "epoch": 10.56792873051225, + "loss": 0.690610408782959, + "loss_ce": 0.00018075491243507713, + "loss_iou": 0.291015625, + "loss_num": 0.0216064453125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 265800708, + "step": 4745 + }, + { + "epoch": 10.570155902004455, + "grad_norm": 27.448678970336914, + "learning_rate": 1e-06, + "loss": 0.5302, + "num_input_tokens_seen": 265857676, + "step": 4746 + }, + { + "epoch": 10.570155902004455, + "loss": 0.5328932404518127, + "loss_ce": 0.0001783959160093218, + "loss_iou": 0.2333984375, + "loss_num": 0.01312255859375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 265857676, + "step": 4746 + }, + { + "epoch": 10.57238307349666, + "grad_norm": 18.024444580078125, + "learning_rate": 1e-06, + "loss": 0.5738, + "num_input_tokens_seen": 265914688, + "step": 4747 + }, + { + "epoch": 10.57238307349666, + "loss": 0.5035586357116699, + "loss_ce": 0.0001406791852787137, + "loss_iou": 0.2236328125, + "loss_num": 0.0113525390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 265914688, + "step": 4747 + }, + { + "epoch": 10.574610244988865, + "grad_norm": 17.24481773376465, + "learning_rate": 1e-06, + "loss": 0.6003, + "num_input_tokens_seen": 265970292, + "step": 4748 + }, + { + "epoch": 10.574610244988865, + "loss": 0.5748478174209595, + "loss_ce": 0.00014080088294576854, + "loss_iou": 0.255859375, + "loss_num": 0.0126953125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 265970292, + "step": 4748 + }, + { + "epoch": 10.57683741648107, + "grad_norm": 21.86254119873047, + "learning_rate": 1e-06, + "loss": 0.5795, + "num_input_tokens_seen": 266027040, + "step": 4749 + }, + { + "epoch": 10.57683741648107, + "loss": 0.6045183539390564, + "loss_ce": 0.00014822966477368027, + "loss_iou": 0.2578125, + "loss_num": 0.017822265625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 266027040, + "step": 4749 + }, + { + "epoch": 10.579064587973274, + "grad_norm": 15.369706153869629, + "learning_rate": 1e-06, + "loss": 0.5245, + "num_input_tokens_seen": 266085392, + "step": 4750 + }, + { + "epoch": 10.579064587973274, + "eval_seeclick_web_CIoU": 0.5785282254219055, + "eval_seeclick_web_GIoU": 0.5777834057807922, + "eval_seeclick_web_IoU": 0.5963070392608643, + "eval_seeclick_web_MAE_all": 0.016103142872452736, + "eval_seeclick_web_MAE_h": 0.0080435904674232, + "eval_seeclick_web_MAE_w": 0.015959544107317924, + "eval_seeclick_web_MAE_x_boxes": 0.009686239995062351, + "eval_seeclick_web_MAE_y_boxes": 0.021757841110229492, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9105224013328552, + "eval_seeclick_web_loss_ce": 0.0002034704084508121, + "eval_seeclick_web_loss_iou": 0.415283203125, + "eval_seeclick_web_loss_num": 0.012907028198242188, + "eval_seeclick_web_loss_xval": 0.89501953125, + "eval_seeclick_web_runtime": 21.8476, + "eval_seeclick_web_samples_per_second": 2.289, + "eval_seeclick_web_steps_per_second": 0.092, + "num_input_tokens_seen": 266085392, + "step": 4750 + }, + { + "epoch": 10.579064587973274, + "eval_icons_CIoU": 0.2794487178325653, + "eval_icons_GIoU": 0.3004266917705536, + "eval_icons_IoU": 0.3560175597667694, + "eval_icons_MAE_all": 0.060138389468193054, + "eval_icons_MAE_h": 0.03905831277370453, + "eval_icons_MAE_w": 0.05829060822725296, + "eval_icons_MAE_x_boxes": 0.05876126326620579, + "eval_icons_MAE_y_boxes": 0.0388388317078352, + "eval_icons_inside_bbox": 0.6059027910232544, + "eval_icons_loss": 1.7231128215789795, + "eval_icons_loss_ce": 0.00027843897260027006, + "eval_icons_loss_iou": 0.6767578125, + "eval_icons_loss_num": 0.05956840515136719, + "eval_icons_loss_xval": 1.6513671875, + "eval_icons_runtime": 21.0355, + "eval_icons_samples_per_second": 2.377, + "eval_icons_steps_per_second": 0.095, + "num_input_tokens_seen": 266085392, + "step": 4750 + }, + { + "epoch": 10.579064587973274, + "eval_screenspot_CIoU": 0.3542960087458293, + "eval_screenspot_GIoU": 0.373006671667099, + "eval_screenspot_IoU": 0.4333365758260091, + "eval_screenspot_MAE_all": 0.05960427472988764, + "eval_screenspot_MAE_h": 0.03762041280666987, + "eval_screenspot_MAE_w": 0.06836641455690066, + "eval_screenspot_MAE_x_boxes": 0.07390400022268295, + "eval_screenspot_MAE_y_boxes": 0.03899012443919977, + "eval_screenspot_inside_bbox": 0.6862499912579855, + "eval_screenspot_loss": 1.6113767623901367, + "eval_screenspot_loss_ce": 0.0002595222613308579, + "eval_screenspot_loss_iou": 0.6659342447916666, + "eval_screenspot_loss_num": 0.0684064229329427, + "eval_screenspot_loss_xval": 1.6736653645833333, + "eval_screenspot_runtime": 35.1964, + "eval_screenspot_samples_per_second": 2.529, + "eval_screenspot_steps_per_second": 0.085, + "num_input_tokens_seen": 266085392, + "step": 4750 + }, + { + "epoch": 10.579064587973274, + "eval_compot_CIoU": 0.34722261130809784, + "eval_compot_GIoU": 0.35881949961185455, + "eval_compot_IoU": 0.4061947613954544, + "eval_compot_MAE_all": 0.018287954851984978, + "eval_compot_MAE_h": 0.009336336981505156, + "eval_compot_MAE_w": 0.0211613979190588, + "eval_compot_MAE_x_boxes": 0.030585231259465218, + "eval_compot_MAE_y_boxes": 0.006953268777579069, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3919204473495483, + "eval_compot_loss_ce": 0.00020033524197060615, + "eval_compot_loss_iou": 0.6395263671875, + "eval_compot_loss_num": 0.017185211181640625, + "eval_compot_loss_xval": 1.364501953125, + "eval_compot_runtime": 21.5206, + "eval_compot_samples_per_second": 2.323, + "eval_compot_steps_per_second": 0.093, + "num_input_tokens_seen": 266085392, + "step": 4750 + }, + { + "epoch": 10.579064587973274, + "eval_custom_ui_val_CIoU": 0.47197884652349686, + "eval_custom_ui_val_GIoU": 0.48575951324568856, + "eval_custom_ui_val_IoU": 0.5315978427728018, + "eval_custom_ui_val_MAE_all": 0.03096564869499869, + "eval_custom_ui_val_MAE_h": 0.016998413018882275, + "eval_custom_ui_val_MAE_w": 0.039277435590823494, + "eval_custom_ui_val_MAE_x_boxes": 0.038262664857837886, + "eval_custom_ui_val_MAE_y_boxes": 0.01508552656095061, + "eval_custom_ui_val_inside_bbox": 0.7527006202273898, + "eval_custom_ui_val_loss": 1.1994266510009766, + "eval_custom_ui_val_loss_ce": 0.00023388697849845307, + "eval_custom_ui_val_loss_iou": 0.5101725260416666, + "eval_custom_ui_val_loss_num": 0.028283013237847224, + "eval_custom_ui_val_loss_xval": 1.1618109809027777, + "eval_custom_ui_val_runtime": 65.3312, + "eval_custom_ui_val_samples_per_second": 4.056, + "eval_custom_ui_val_steps_per_second": 0.138, + "num_input_tokens_seen": 266085392, + "step": 4750 + }, + { + "epoch": 10.579064587973274, + "loss": 0.9167079925537109, + "loss_ce": 0.00020406048861332238, + "loss_iou": 0.39453125, + "loss_num": 0.0252685546875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 266085392, + "step": 4750 + }, + { + "epoch": 10.58129175946548, + "grad_norm": 22.031713485717773, + "learning_rate": 1e-06, + "loss": 0.5825, + "num_input_tokens_seen": 266140888, + "step": 4751 + }, + { + "epoch": 10.58129175946548, + "loss": 0.5439281463623047, + "loss_ce": 0.00013536959886550903, + "loss_iou": 0.24609375, + "loss_num": 0.0101318359375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 266140888, + "step": 4751 + }, + { + "epoch": 10.583518930957684, + "grad_norm": 28.358179092407227, + "learning_rate": 1e-06, + "loss": 0.5168, + "num_input_tokens_seen": 266195452, + "step": 4752 + }, + { + "epoch": 10.583518930957684, + "loss": 0.4804303050041199, + "loss_ce": 0.0002057160745607689, + "loss_iou": 0.2041015625, + "loss_num": 0.014404296875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 266195452, + "step": 4752 + }, + { + "epoch": 10.585746102449889, + "grad_norm": 20.244579315185547, + "learning_rate": 1e-06, + "loss": 0.6685, + "num_input_tokens_seen": 266253420, + "step": 4753 + }, + { + "epoch": 10.585746102449889, + "loss": 0.6307578086853027, + "loss_ce": 0.00014260906027629972, + "loss_iou": 0.283203125, + "loss_num": 0.01300048828125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 266253420, + "step": 4753 + }, + { + "epoch": 10.587973273942094, + "grad_norm": 23.68046760559082, + "learning_rate": 1e-06, + "loss": 0.5178, + "num_input_tokens_seen": 266309880, + "step": 4754 + }, + { + "epoch": 10.587973273942094, + "loss": 0.590294599533081, + "loss_ce": 0.0002067518071271479, + "loss_iou": 0.255859375, + "loss_num": 0.015380859375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 266309880, + "step": 4754 + }, + { + "epoch": 10.590200445434299, + "grad_norm": 14.198582649230957, + "learning_rate": 1e-06, + "loss": 0.6997, + "num_input_tokens_seen": 266367096, + "step": 4755 + }, + { + "epoch": 10.590200445434299, + "loss": 0.7059162855148315, + "loss_ce": 0.00022783357417210937, + "loss_iou": 0.3046875, + "loss_num": 0.019287109375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 266367096, + "step": 4755 + }, + { + "epoch": 10.592427616926503, + "grad_norm": 22.98945426940918, + "learning_rate": 1e-06, + "loss": 0.569, + "num_input_tokens_seen": 266421524, + "step": 4756 + }, + { + "epoch": 10.592427616926503, + "loss": 0.6033655405044556, + "loss_ce": 0.0001550911256344989, + "loss_iou": 0.267578125, + "loss_num": 0.0137939453125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 266421524, + "step": 4756 + }, + { + "epoch": 10.594654788418708, + "grad_norm": 22.537723541259766, + "learning_rate": 1e-06, + "loss": 0.3968, + "num_input_tokens_seen": 266475392, + "step": 4757 + }, + { + "epoch": 10.594654788418708, + "loss": 0.424811989068985, + "loss_ce": 0.00012936009443365037, + "loss_iou": 0.1875, + "loss_num": 0.010009765625, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 266475392, + "step": 4757 + }, + { + "epoch": 10.596881959910913, + "grad_norm": 21.612504959106445, + "learning_rate": 1e-06, + "loss": 0.5241, + "num_input_tokens_seen": 266531920, + "step": 4758 + }, + { + "epoch": 10.596881959910913, + "loss": 0.5215491056442261, + "loss_ce": 0.0003089057863689959, + "loss_iou": 0.2275390625, + "loss_num": 0.01348876953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 266531920, + "step": 4758 + }, + { + "epoch": 10.599109131403118, + "grad_norm": 14.450573921203613, + "learning_rate": 1e-06, + "loss": 0.5417, + "num_input_tokens_seen": 266585680, + "step": 4759 + }, + { + "epoch": 10.599109131403118, + "loss": 0.6613370180130005, + "loss_ce": 0.00014318549074232578, + "loss_iou": 0.26953125, + "loss_num": 0.0242919921875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 266585680, + "step": 4759 + }, + { + "epoch": 10.601336302895323, + "grad_norm": 24.842741012573242, + "learning_rate": 1e-06, + "loss": 0.7178, + "num_input_tokens_seen": 266643272, + "step": 4760 + }, + { + "epoch": 10.601336302895323, + "loss": 0.8287136554718018, + "loss_ce": 0.00022248990717343986, + "loss_iou": 0.365234375, + "loss_num": 0.0196533203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 266643272, + "step": 4760 + }, + { + "epoch": 10.603563474387528, + "grad_norm": 21.21478271484375, + "learning_rate": 1e-06, + "loss": 0.622, + "num_input_tokens_seen": 266700996, + "step": 4761 + }, + { + "epoch": 10.603563474387528, + "loss": 0.6402872800827026, + "loss_ce": 0.00015057012205943465, + "loss_iou": 0.28515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 266700996, + "step": 4761 + }, + { + "epoch": 10.605790645879733, + "grad_norm": 31.64474868774414, + "learning_rate": 1e-06, + "loss": 0.4276, + "num_input_tokens_seen": 266758212, + "step": 4762 + }, + { + "epoch": 10.605790645879733, + "loss": 0.43672651052474976, + "loss_ce": 0.0002030893083428964, + "loss_iou": 0.19140625, + "loss_num": 0.0108642578125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 266758212, + "step": 4762 + }, + { + "epoch": 10.608017817371937, + "grad_norm": 32.779476165771484, + "learning_rate": 1e-06, + "loss": 0.6393, + "num_input_tokens_seen": 266811900, + "step": 4763 + }, + { + "epoch": 10.608017817371937, + "loss": 0.5560557842254639, + "loss_ce": 0.00014755260781385005, + "loss_iou": 0.2373046875, + "loss_num": 0.016357421875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 266811900, + "step": 4763 + }, + { + "epoch": 10.610244988864142, + "grad_norm": 17.99529266357422, + "learning_rate": 1e-06, + "loss": 0.4536, + "num_input_tokens_seen": 266863520, + "step": 4764 + }, + { + "epoch": 10.610244988864142, + "loss": 0.441336989402771, + "loss_ce": 0.00011382217053323984, + "loss_iou": 0.15625, + "loss_num": 0.02587890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 266863520, + "step": 4764 + }, + { + "epoch": 10.612472160356347, + "grad_norm": 20.494014739990234, + "learning_rate": 1e-06, + "loss": 0.7394, + "num_input_tokens_seen": 266918760, + "step": 4765 + }, + { + "epoch": 10.612472160356347, + "loss": 0.49647536873817444, + "loss_ce": 0.00013749409117735922, + "loss_iou": 0.2216796875, + "loss_num": 0.01055908203125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 266918760, + "step": 4765 + }, + { + "epoch": 10.614699331848552, + "grad_norm": 19.456296920776367, + "learning_rate": 1e-06, + "loss": 0.4065, + "num_input_tokens_seen": 266975336, + "step": 4766 + }, + { + "epoch": 10.614699331848552, + "loss": 0.4348350167274475, + "loss_ce": 0.00014263816410675645, + "loss_iou": 0.1943359375, + "loss_num": 0.009033203125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 266975336, + "step": 4766 + }, + { + "epoch": 10.616926503340757, + "grad_norm": 23.569063186645508, + "learning_rate": 1e-06, + "loss": 0.4614, + "num_input_tokens_seen": 267031440, + "step": 4767 + }, + { + "epoch": 10.616926503340757, + "loss": 0.4862057566642761, + "loss_ce": 0.00012178381439298391, + "loss_iou": 0.2236328125, + "loss_num": 0.0076904296875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 267031440, + "step": 4767 + }, + { + "epoch": 10.619153674832962, + "grad_norm": 20.638471603393555, + "learning_rate": 1e-06, + "loss": 0.4967, + "num_input_tokens_seen": 267086692, + "step": 4768 + }, + { + "epoch": 10.619153674832962, + "loss": 0.5913643836975098, + "loss_ce": 0.00017783006478566676, + "loss_iou": 0.267578125, + "loss_num": 0.0108642578125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 267086692, + "step": 4768 + }, + { + "epoch": 10.621380846325167, + "grad_norm": 17.369722366333008, + "learning_rate": 1e-06, + "loss": 0.5522, + "num_input_tokens_seen": 267143996, + "step": 4769 + }, + { + "epoch": 10.621380846325167, + "loss": 0.6038705706596375, + "loss_ce": 0.0001718288112897426, + "loss_iou": 0.275390625, + "loss_num": 0.01043701171875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 267143996, + "step": 4769 + }, + { + "epoch": 10.623608017817372, + "grad_norm": 15.817121505737305, + "learning_rate": 1e-06, + "loss": 0.6093, + "num_input_tokens_seen": 267202664, + "step": 4770 + }, + { + "epoch": 10.623608017817372, + "loss": 0.608995795249939, + "loss_ce": 0.00010910604032687843, + "loss_iou": 0.26953125, + "loss_num": 0.01434326171875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 267202664, + "step": 4770 + }, + { + "epoch": 10.625835189309576, + "grad_norm": 19.741697311401367, + "learning_rate": 1e-06, + "loss": 0.6764, + "num_input_tokens_seen": 267259716, + "step": 4771 + }, + { + "epoch": 10.625835189309576, + "loss": 0.5544874668121338, + "loss_ce": 0.00016620635869912803, + "loss_iou": 0.21875, + "loss_num": 0.0233154296875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 267259716, + "step": 4771 + }, + { + "epoch": 10.628062360801781, + "grad_norm": 14.572179794311523, + "learning_rate": 1e-06, + "loss": 0.4699, + "num_input_tokens_seen": 267316276, + "step": 4772 + }, + { + "epoch": 10.628062360801781, + "loss": 0.37722790241241455, + "loss_ce": 0.0001526982377981767, + "loss_iou": 0.169921875, + "loss_num": 0.00738525390625, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 267316276, + "step": 4772 + }, + { + "epoch": 10.630289532293986, + "grad_norm": 35.8219108581543, + "learning_rate": 1e-06, + "loss": 0.7025, + "num_input_tokens_seen": 267370620, + "step": 4773 + }, + { + "epoch": 10.630289532293986, + "loss": 0.7943651080131531, + "loss_ce": 0.00017563029541634023, + "loss_iou": 0.359375, + "loss_num": 0.0147705078125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 267370620, + "step": 4773 + }, + { + "epoch": 10.632516703786191, + "grad_norm": 20.09000015258789, + "learning_rate": 1e-06, + "loss": 0.7104, + "num_input_tokens_seen": 267427184, + "step": 4774 + }, + { + "epoch": 10.632516703786191, + "loss": 0.812397837638855, + "loss_ce": 0.00014197138079907745, + "loss_iou": 0.318359375, + "loss_num": 0.03515625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 267427184, + "step": 4774 + }, + { + "epoch": 10.634743875278396, + "grad_norm": 21.24492835998535, + "learning_rate": 1e-06, + "loss": 0.6861, + "num_input_tokens_seen": 267481704, + "step": 4775 + }, + { + "epoch": 10.634743875278396, + "loss": 0.7231541872024536, + "loss_ce": 0.0001317608985118568, + "loss_iou": 0.306640625, + "loss_num": 0.0223388671875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 267481704, + "step": 4775 + }, + { + "epoch": 10.6369710467706, + "grad_norm": 32.74296951293945, + "learning_rate": 1e-06, + "loss": 0.5468, + "num_input_tokens_seen": 267539444, + "step": 4776 + }, + { + "epoch": 10.6369710467706, + "loss": 0.5010995864868164, + "loss_ce": 0.00030611734837293625, + "loss_iou": 0.2109375, + "loss_num": 0.015869140625, + "loss_xval": 0.5, + "num_input_tokens_seen": 267539444, + "step": 4776 + }, + { + "epoch": 10.639198218262806, + "grad_norm": 21.132326126098633, + "learning_rate": 1e-06, + "loss": 0.6992, + "num_input_tokens_seen": 267595728, + "step": 4777 + }, + { + "epoch": 10.639198218262806, + "loss": 0.6195580959320068, + "loss_ce": 0.00017328912508673966, + "loss_iou": 0.271484375, + "loss_num": 0.0152587890625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 267595728, + "step": 4777 + }, + { + "epoch": 10.64142538975501, + "grad_norm": 13.380131721496582, + "learning_rate": 1e-06, + "loss": 0.3196, + "num_input_tokens_seen": 267650392, + "step": 4778 + }, + { + "epoch": 10.64142538975501, + "loss": 0.3022405207157135, + "loss_ce": 0.00011649330554064363, + "loss_iou": 0.1240234375, + "loss_num": 0.0107421875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 267650392, + "step": 4778 + }, + { + "epoch": 10.643652561247215, + "grad_norm": 16.144832611083984, + "learning_rate": 1e-06, + "loss": 0.5431, + "num_input_tokens_seen": 267707784, + "step": 4779 + }, + { + "epoch": 10.643652561247215, + "loss": 0.7454525828361511, + "loss_ce": 0.00021334037592168897, + "loss_iou": 0.302734375, + "loss_num": 0.02783203125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 267707784, + "step": 4779 + }, + { + "epoch": 10.64587973273942, + "grad_norm": 32.90868377685547, + "learning_rate": 1e-06, + "loss": 0.57, + "num_input_tokens_seen": 267764500, + "step": 4780 + }, + { + "epoch": 10.64587973273942, + "loss": 0.6209944486618042, + "loss_ce": 0.0001448530238121748, + "loss_iou": 0.263671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 267764500, + "step": 4780 + }, + { + "epoch": 10.648106904231625, + "grad_norm": 90.17802429199219, + "learning_rate": 1e-06, + "loss": 0.5177, + "num_input_tokens_seen": 267819128, + "step": 4781 + }, + { + "epoch": 10.648106904231625, + "loss": 0.5430988073348999, + "loss_ce": 0.00013003447384107858, + "loss_iou": 0.21875, + "loss_num": 0.021240234375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 267819128, + "step": 4781 + }, + { + "epoch": 10.65033407572383, + "grad_norm": 20.155319213867188, + "learning_rate": 1e-06, + "loss": 0.6905, + "num_input_tokens_seen": 267876160, + "step": 4782 + }, + { + "epoch": 10.65033407572383, + "loss": 0.7085360288619995, + "loss_ce": 0.00016200730169657618, + "loss_iou": 0.287109375, + "loss_num": 0.026611328125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 267876160, + "step": 4782 + }, + { + "epoch": 10.652561247216035, + "grad_norm": 20.946746826171875, + "learning_rate": 1e-06, + "loss": 0.6077, + "num_input_tokens_seen": 267933324, + "step": 4783 + }, + { + "epoch": 10.652561247216035, + "loss": 0.5577636361122131, + "loss_ce": 0.00014642757014371455, + "loss_iou": 0.24609375, + "loss_num": 0.01287841796875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 267933324, + "step": 4783 + }, + { + "epoch": 10.654788418708241, + "grad_norm": 23.43316650390625, + "learning_rate": 1e-06, + "loss": 0.7215, + "num_input_tokens_seen": 267990524, + "step": 4784 + }, + { + "epoch": 10.654788418708241, + "loss": 0.5793881416320801, + "loss_ce": 0.00016452360432595015, + "loss_iou": 0.2578125, + "loss_num": 0.01263427734375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 267990524, + "step": 4784 + }, + { + "epoch": 10.657015590200446, + "grad_norm": 29.320165634155273, + "learning_rate": 1e-06, + "loss": 0.6149, + "num_input_tokens_seen": 268046904, + "step": 4785 + }, + { + "epoch": 10.657015590200446, + "loss": 0.6889311075210571, + "loss_ce": 0.00021046542678959668, + "loss_iou": 0.30078125, + "loss_num": 0.0177001953125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 268046904, + "step": 4785 + }, + { + "epoch": 10.659242761692651, + "grad_norm": 16.68779182434082, + "learning_rate": 1e-06, + "loss": 0.6267, + "num_input_tokens_seen": 268105252, + "step": 4786 + }, + { + "epoch": 10.659242761692651, + "loss": 0.6637170314788818, + "loss_ce": 0.0001428277901140973, + "loss_iou": 0.283203125, + "loss_num": 0.01904296875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 268105252, + "step": 4786 + }, + { + "epoch": 10.661469933184856, + "grad_norm": 41.306156158447266, + "learning_rate": 1e-06, + "loss": 0.7032, + "num_input_tokens_seen": 268159052, + "step": 4787 + }, + { + "epoch": 10.661469933184856, + "loss": 0.565805196762085, + "loss_ce": 0.00013136808411218226, + "loss_iou": 0.2490234375, + "loss_num": 0.01361083984375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 268159052, + "step": 4787 + }, + { + "epoch": 10.66369710467706, + "grad_norm": 12.712847709655762, + "learning_rate": 1e-06, + "loss": 0.3692, + "num_input_tokens_seen": 268214984, + "step": 4788 + }, + { + "epoch": 10.66369710467706, + "loss": 0.47919151186943054, + "loss_ce": 0.0001876367605291307, + "loss_iou": 0.21875, + "loss_num": 0.00848388671875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 268214984, + "step": 4788 + }, + { + "epoch": 10.665924276169266, + "grad_norm": 15.455141067504883, + "learning_rate": 1e-06, + "loss": 0.5079, + "num_input_tokens_seen": 268269568, + "step": 4789 + }, + { + "epoch": 10.665924276169266, + "loss": 0.4243258833885193, + "loss_ce": 0.00013158305955585092, + "loss_iou": 0.1650390625, + "loss_num": 0.018798828125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 268269568, + "step": 4789 + }, + { + "epoch": 10.66815144766147, + "grad_norm": 21.24195671081543, + "learning_rate": 1e-06, + "loss": 0.5663, + "num_input_tokens_seen": 268325664, + "step": 4790 + }, + { + "epoch": 10.66815144766147, + "loss": 0.4882197380065918, + "loss_ce": 0.000182637624675408, + "loss_iou": 0.2255859375, + "loss_num": 0.007415771484375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 268325664, + "step": 4790 + }, + { + "epoch": 10.670378619153675, + "grad_norm": 14.548911094665527, + "learning_rate": 1e-06, + "loss": 0.7085, + "num_input_tokens_seen": 268378752, + "step": 4791 + }, + { + "epoch": 10.670378619153675, + "loss": 0.6994498372077942, + "loss_ce": 0.00023110417532734573, + "loss_iou": 0.2734375, + "loss_num": 0.030029296875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 268378752, + "step": 4791 + }, + { + "epoch": 10.67260579064588, + "grad_norm": 14.135177612304688, + "learning_rate": 1e-06, + "loss": 0.6148, + "num_input_tokens_seen": 268435256, + "step": 4792 + }, + { + "epoch": 10.67260579064588, + "loss": 0.4595029354095459, + "loss_ce": 0.00015233115118462592, + "loss_iou": 0.2109375, + "loss_num": 0.007232666015625, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 268435256, + "step": 4792 + }, + { + "epoch": 10.674832962138085, + "grad_norm": 20.141786575317383, + "learning_rate": 1e-06, + "loss": 0.7621, + "num_input_tokens_seen": 268491724, + "step": 4793 + }, + { + "epoch": 10.674832962138085, + "loss": 0.5482085943222046, + "loss_ce": 0.00011288469249848276, + "loss_iou": 0.236328125, + "loss_num": 0.01507568359375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 268491724, + "step": 4793 + }, + { + "epoch": 10.67706013363029, + "grad_norm": 17.971179962158203, + "learning_rate": 1e-06, + "loss": 0.5321, + "num_input_tokens_seen": 268548580, + "step": 4794 + }, + { + "epoch": 10.67706013363029, + "loss": 0.5524182319641113, + "loss_ce": 0.0001721275766612962, + "loss_iou": 0.2353515625, + "loss_num": 0.01611328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 268548580, + "step": 4794 + }, + { + "epoch": 10.679287305122495, + "grad_norm": 17.928714752197266, + "learning_rate": 1e-06, + "loss": 0.4135, + "num_input_tokens_seen": 268604548, + "step": 4795 + }, + { + "epoch": 10.679287305122495, + "loss": 0.3612457513809204, + "loss_ce": 0.0001617687230464071, + "loss_iou": 0.1669921875, + "loss_num": 0.0054931640625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 268604548, + "step": 4795 + }, + { + "epoch": 10.6815144766147, + "grad_norm": 17.0757999420166, + "learning_rate": 1e-06, + "loss": 0.5741, + "num_input_tokens_seen": 268661340, + "step": 4796 + }, + { + "epoch": 10.6815144766147, + "loss": 0.6327658891677856, + "loss_ce": 0.0001975290069822222, + "loss_iou": 0.255859375, + "loss_num": 0.02392578125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 268661340, + "step": 4796 + }, + { + "epoch": 10.683741648106905, + "grad_norm": 19.690837860107422, + "learning_rate": 1e-06, + "loss": 0.5216, + "num_input_tokens_seen": 268715384, + "step": 4797 + }, + { + "epoch": 10.683741648106905, + "loss": 0.3917364180088043, + "loss_ce": 0.0001348454534308985, + "loss_iou": 0.1806640625, + "loss_num": 0.0059814453125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 268715384, + "step": 4797 + }, + { + "epoch": 10.68596881959911, + "grad_norm": 40.683292388916016, + "learning_rate": 1e-06, + "loss": 0.5789, + "num_input_tokens_seen": 268772412, + "step": 4798 + }, + { + "epoch": 10.68596881959911, + "loss": 0.6195793151855469, + "loss_ce": 0.00019457080634310842, + "loss_iou": 0.267578125, + "loss_num": 0.0164794921875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 268772412, + "step": 4798 + }, + { + "epoch": 10.688195991091314, + "grad_norm": 24.19279670715332, + "learning_rate": 1e-06, + "loss": 0.5381, + "num_input_tokens_seen": 268830036, + "step": 4799 + }, + { + "epoch": 10.688195991091314, + "loss": 0.528715193271637, + "loss_ce": 0.0001507120905444026, + "loss_iou": 0.244140625, + "loss_num": 0.00811767578125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 268830036, + "step": 4799 + }, + { + "epoch": 10.690423162583519, + "grad_norm": 15.297384262084961, + "learning_rate": 1e-06, + "loss": 0.5682, + "num_input_tokens_seen": 268885000, + "step": 4800 + }, + { + "epoch": 10.690423162583519, + "loss": 0.4110143184661865, + "loss_ce": 0.00012564694043248892, + "loss_iou": 0.1865234375, + "loss_num": 0.00750732421875, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 268885000, + "step": 4800 + }, + { + "epoch": 10.692650334075724, + "grad_norm": 30.53861427307129, + "learning_rate": 1e-06, + "loss": 0.6299, + "num_input_tokens_seen": 268941972, + "step": 4801 + }, + { + "epoch": 10.692650334075724, + "loss": 0.45105427503585815, + "loss_ce": 0.00012653737212531269, + "loss_iou": 0.201171875, + "loss_num": 0.009521484375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 268941972, + "step": 4801 + }, + { + "epoch": 10.694877505567929, + "grad_norm": 22.213823318481445, + "learning_rate": 1e-06, + "loss": 0.4161, + "num_input_tokens_seen": 268996852, + "step": 4802 + }, + { + "epoch": 10.694877505567929, + "loss": 0.36823076009750366, + "loss_ce": 0.00012772114132530987, + "loss_iou": 0.1533203125, + "loss_num": 0.0123291015625, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 268996852, + "step": 4802 + }, + { + "epoch": 10.697104677060134, + "grad_norm": 15.323545455932617, + "learning_rate": 1e-06, + "loss": 0.7173, + "num_input_tokens_seen": 269052540, + "step": 4803 + }, + { + "epoch": 10.697104677060134, + "loss": 0.6369800567626953, + "loss_ce": 0.0002612703829072416, + "loss_iou": 0.291015625, + "loss_num": 0.0113525390625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 269052540, + "step": 4803 + }, + { + "epoch": 10.699331848552339, + "grad_norm": 22.477542877197266, + "learning_rate": 1e-06, + "loss": 0.4287, + "num_input_tokens_seen": 269107128, + "step": 4804 + }, + { + "epoch": 10.699331848552339, + "loss": 0.31477898359298706, + "loss_ce": 0.00014274932618718594, + "loss_iou": 0.138671875, + "loss_num": 0.007354736328125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 269107128, + "step": 4804 + }, + { + "epoch": 10.701559020044543, + "grad_norm": 15.610451698303223, + "learning_rate": 1e-06, + "loss": 0.4464, + "num_input_tokens_seen": 269162060, + "step": 4805 + }, + { + "epoch": 10.701559020044543, + "loss": 0.3491445779800415, + "loss_ce": 0.00020659035362768918, + "loss_iou": 0.1552734375, + "loss_num": 0.00775146484375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 269162060, + "step": 4805 + }, + { + "epoch": 10.703786191536748, + "grad_norm": 28.197786331176758, + "learning_rate": 1e-06, + "loss": 0.6925, + "num_input_tokens_seen": 269217312, + "step": 4806 + }, + { + "epoch": 10.703786191536748, + "loss": 0.7023131847381592, + "loss_ce": 0.0001647536118980497, + "loss_iou": 0.30859375, + "loss_num": 0.016845703125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 269217312, + "step": 4806 + }, + { + "epoch": 10.706013363028953, + "grad_norm": 15.568071365356445, + "learning_rate": 1e-06, + "loss": 0.3815, + "num_input_tokens_seen": 269275728, + "step": 4807 + }, + { + "epoch": 10.706013363028953, + "loss": 0.4796176552772522, + "loss_ce": 0.00012549404345918447, + "loss_iou": 0.220703125, + "loss_num": 0.0074462890625, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 269275728, + "step": 4807 + }, + { + "epoch": 10.708240534521158, + "grad_norm": 17.49445915222168, + "learning_rate": 1e-06, + "loss": 0.4644, + "num_input_tokens_seen": 269332812, + "step": 4808 + }, + { + "epoch": 10.708240534521158, + "loss": 0.5414130687713623, + "loss_ce": 0.0001532989408588037, + "loss_iou": 0.2421875, + "loss_num": 0.01116943359375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 269332812, + "step": 4808 + }, + { + "epoch": 10.710467706013363, + "grad_norm": 28.943540573120117, + "learning_rate": 1e-06, + "loss": 0.5068, + "num_input_tokens_seen": 269388828, + "step": 4809 + }, + { + "epoch": 10.710467706013363, + "loss": 0.441672682762146, + "loss_ce": 0.0001443543005734682, + "loss_iou": 0.1923828125, + "loss_num": 0.01123046875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 269388828, + "step": 4809 + }, + { + "epoch": 10.712694877505568, + "grad_norm": 17.984169006347656, + "learning_rate": 1e-06, + "loss": 0.5561, + "num_input_tokens_seen": 269446184, + "step": 4810 + }, + { + "epoch": 10.712694877505568, + "loss": 0.5116062164306641, + "loss_ce": 0.0001315802219323814, + "loss_iou": 0.23828125, + "loss_num": 0.007110595703125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 269446184, + "step": 4810 + }, + { + "epoch": 10.714922048997773, + "grad_norm": 22.300188064575195, + "learning_rate": 1e-06, + "loss": 0.5555, + "num_input_tokens_seen": 269502464, + "step": 4811 + }, + { + "epoch": 10.714922048997773, + "loss": 0.6414833068847656, + "loss_ce": 0.00012583400530274957, + "loss_iou": 0.26171875, + "loss_num": 0.0238037109375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 269502464, + "step": 4811 + }, + { + "epoch": 10.717149220489977, + "grad_norm": 22.90621566772461, + "learning_rate": 1e-06, + "loss": 0.5469, + "num_input_tokens_seen": 269560400, + "step": 4812 + }, + { + "epoch": 10.717149220489977, + "loss": 0.694524884223938, + "loss_ce": 0.00018889480270445347, + "loss_iou": 0.32421875, + "loss_num": 0.009521484375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 269560400, + "step": 4812 + }, + { + "epoch": 10.719376391982182, + "grad_norm": 20.077648162841797, + "learning_rate": 1e-06, + "loss": 0.665, + "num_input_tokens_seen": 269617692, + "step": 4813 + }, + { + "epoch": 10.719376391982182, + "loss": 0.9352070093154907, + "loss_ce": 0.00014838551578577608, + "loss_iou": 0.408203125, + "loss_num": 0.023681640625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 269617692, + "step": 4813 + }, + { + "epoch": 10.721603563474387, + "grad_norm": 45.49837875366211, + "learning_rate": 1e-06, + "loss": 0.6051, + "num_input_tokens_seen": 269675808, + "step": 4814 + }, + { + "epoch": 10.721603563474387, + "loss": 0.6009942889213562, + "loss_ce": 0.00016420066822320223, + "loss_iou": 0.263671875, + "loss_num": 0.01446533203125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 269675808, + "step": 4814 + }, + { + "epoch": 10.723830734966592, + "grad_norm": 18.847713470458984, + "learning_rate": 1e-06, + "loss": 0.6815, + "num_input_tokens_seen": 269731972, + "step": 4815 + }, + { + "epoch": 10.723830734966592, + "loss": 0.5592037439346313, + "loss_ce": 0.00012171984417364001, + "loss_iou": 0.2392578125, + "loss_num": 0.01611328125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 269731972, + "step": 4815 + }, + { + "epoch": 10.726057906458797, + "grad_norm": 21.027870178222656, + "learning_rate": 1e-06, + "loss": 0.6109, + "num_input_tokens_seen": 269785520, + "step": 4816 + }, + { + "epoch": 10.726057906458797, + "loss": 0.5723341107368469, + "loss_ce": 0.0001295319088967517, + "loss_iou": 0.251953125, + "loss_num": 0.01373291015625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 269785520, + "step": 4816 + }, + { + "epoch": 10.728285077951002, + "grad_norm": 22.105512619018555, + "learning_rate": 1e-06, + "loss": 0.4586, + "num_input_tokens_seen": 269843136, + "step": 4817 + }, + { + "epoch": 10.728285077951002, + "loss": 0.5098918676376343, + "loss_ce": 0.0001262695004697889, + "loss_iou": 0.220703125, + "loss_num": 0.01373291015625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 269843136, + "step": 4817 + }, + { + "epoch": 10.730512249443207, + "grad_norm": 20.30752944946289, + "learning_rate": 1e-06, + "loss": 0.6458, + "num_input_tokens_seen": 269897972, + "step": 4818 + }, + { + "epoch": 10.730512249443207, + "loss": 0.8840016722679138, + "loss_ce": 0.00021258596098050475, + "loss_iou": 0.3671875, + "loss_num": 0.029541015625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 269897972, + "step": 4818 + }, + { + "epoch": 10.732739420935411, + "grad_norm": 19.499021530151367, + "learning_rate": 1e-06, + "loss": 0.4868, + "num_input_tokens_seen": 269955164, + "step": 4819 + }, + { + "epoch": 10.732739420935411, + "loss": 0.4875670373439789, + "loss_ce": 0.0001403049536747858, + "loss_iou": 0.1865234375, + "loss_num": 0.022705078125, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 269955164, + "step": 4819 + }, + { + "epoch": 10.734966592427616, + "grad_norm": 22.588951110839844, + "learning_rate": 1e-06, + "loss": 0.5172, + "num_input_tokens_seen": 270014316, + "step": 4820 + }, + { + "epoch": 10.734966592427616, + "loss": 0.3510775864124298, + "loss_ce": 0.00012545072240754962, + "loss_iou": 0.1533203125, + "loss_num": 0.00885009765625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 270014316, + "step": 4820 + }, + { + "epoch": 10.737193763919821, + "grad_norm": 20.028270721435547, + "learning_rate": 1e-06, + "loss": 0.6092, + "num_input_tokens_seen": 270070980, + "step": 4821 + }, + { + "epoch": 10.737193763919821, + "loss": 0.6935439109802246, + "loss_ce": 0.00018454053497407585, + "loss_iou": 0.29296875, + "loss_num": 0.021240234375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 270070980, + "step": 4821 + }, + { + "epoch": 10.739420935412026, + "grad_norm": 14.508070945739746, + "learning_rate": 1e-06, + "loss": 0.5061, + "num_input_tokens_seen": 270128576, + "step": 4822 + }, + { + "epoch": 10.739420935412026, + "loss": 0.6202830672264099, + "loss_ce": 0.00016589835286140442, + "loss_iou": 0.2578125, + "loss_num": 0.0208740234375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 270128576, + "step": 4822 + }, + { + "epoch": 10.74164810690423, + "grad_norm": 17.07376480102539, + "learning_rate": 1e-06, + "loss": 0.7271, + "num_input_tokens_seen": 270184516, + "step": 4823 + }, + { + "epoch": 10.74164810690423, + "loss": 0.64375239610672, + "loss_ce": 0.00019769492791965604, + "loss_iou": 0.298828125, + "loss_num": 0.0089111328125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 270184516, + "step": 4823 + }, + { + "epoch": 10.743875278396436, + "grad_norm": 20.712217330932617, + "learning_rate": 1e-06, + "loss": 0.5371, + "num_input_tokens_seen": 270240064, + "step": 4824 + }, + { + "epoch": 10.743875278396436, + "loss": 0.5007485747337341, + "loss_ce": 0.0001382490445394069, + "loss_iou": 0.216796875, + "loss_num": 0.01312255859375, + "loss_xval": 0.5, + "num_input_tokens_seen": 270240064, + "step": 4824 + }, + { + "epoch": 10.74610244988864, + "grad_norm": 20.345590591430664, + "learning_rate": 1e-06, + "loss": 0.5216, + "num_input_tokens_seen": 270297036, + "step": 4825 + }, + { + "epoch": 10.74610244988864, + "loss": 0.6117961406707764, + "loss_ce": 0.00022386244381777942, + "loss_iou": 0.267578125, + "loss_num": 0.015625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 270297036, + "step": 4825 + }, + { + "epoch": 10.748329621380847, + "grad_norm": 19.20088768005371, + "learning_rate": 1e-06, + "loss": 0.5529, + "num_input_tokens_seen": 270352788, + "step": 4826 + }, + { + "epoch": 10.748329621380847, + "loss": 0.6521322131156921, + "loss_ce": 0.00015466928016394377, + "loss_iou": 0.30078125, + "loss_num": 0.01025390625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 270352788, + "step": 4826 + }, + { + "epoch": 10.750556792873052, + "grad_norm": 16.478546142578125, + "learning_rate": 1e-06, + "loss": 0.458, + "num_input_tokens_seen": 270407444, + "step": 4827 + }, + { + "epoch": 10.750556792873052, + "loss": 0.5561587810516357, + "loss_ce": 0.00012851686915382743, + "loss_iou": 0.234375, + "loss_num": 0.017578125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 270407444, + "step": 4827 + }, + { + "epoch": 10.752783964365257, + "grad_norm": 30.999256134033203, + "learning_rate": 1e-06, + "loss": 0.4163, + "num_input_tokens_seen": 270465300, + "step": 4828 + }, + { + "epoch": 10.752783964365257, + "loss": 0.41544434428215027, + "loss_ce": 0.00016115896869450808, + "loss_iou": 0.1865234375, + "loss_num": 0.00830078125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 270465300, + "step": 4828 + }, + { + "epoch": 10.755011135857462, + "grad_norm": 21.072765350341797, + "learning_rate": 1e-06, + "loss": 0.507, + "num_input_tokens_seen": 270524188, + "step": 4829 + }, + { + "epoch": 10.755011135857462, + "loss": 0.682302713394165, + "loss_ce": 0.0005400052759796381, + "loss_iou": 0.294921875, + "loss_num": 0.018310546875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 270524188, + "step": 4829 + }, + { + "epoch": 10.757238307349667, + "grad_norm": 25.556285858154297, + "learning_rate": 1e-06, + "loss": 0.6146, + "num_input_tokens_seen": 270580156, + "step": 4830 + }, + { + "epoch": 10.757238307349667, + "loss": 0.43665558099746704, + "loss_ce": 0.000132172426674515, + "loss_iou": 0.189453125, + "loss_num": 0.01165771484375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 270580156, + "step": 4830 + }, + { + "epoch": 10.759465478841872, + "grad_norm": 39.73129653930664, + "learning_rate": 1e-06, + "loss": 0.6867, + "num_input_tokens_seen": 270637276, + "step": 4831 + }, + { + "epoch": 10.759465478841872, + "loss": 0.7980098128318787, + "loss_ce": 0.00015826240996830165, + "loss_iou": 0.328125, + "loss_num": 0.02783203125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 270637276, + "step": 4831 + }, + { + "epoch": 10.761692650334076, + "grad_norm": 31.106664657592773, + "learning_rate": 1e-06, + "loss": 0.5331, + "num_input_tokens_seen": 270691744, + "step": 4832 + }, + { + "epoch": 10.761692650334076, + "loss": 0.45221608877182007, + "loss_ce": 0.00012868586054537445, + "loss_iou": 0.1953125, + "loss_num": 0.01226806640625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 270691744, + "step": 4832 + }, + { + "epoch": 10.763919821826281, + "grad_norm": 19.635936737060547, + "learning_rate": 1e-06, + "loss": 0.4175, + "num_input_tokens_seen": 270749204, + "step": 4833 + }, + { + "epoch": 10.763919821826281, + "loss": 0.510043740272522, + "loss_ce": 0.00015601412451360375, + "loss_iou": 0.2197265625, + "loss_num": 0.01397705078125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 270749204, + "step": 4833 + }, + { + "epoch": 10.766146993318486, + "grad_norm": 30.472660064697266, + "learning_rate": 1e-06, + "loss": 0.6393, + "num_input_tokens_seen": 270803020, + "step": 4834 + }, + { + "epoch": 10.766146993318486, + "loss": 0.6473684906959534, + "loss_ce": 0.0001517097553005442, + "loss_iou": 0.28125, + "loss_num": 0.0172119140625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 270803020, + "step": 4834 + }, + { + "epoch": 10.768374164810691, + "grad_norm": 18.92053985595703, + "learning_rate": 1e-06, + "loss": 0.6669, + "num_input_tokens_seen": 270859752, + "step": 4835 + }, + { + "epoch": 10.768374164810691, + "loss": 0.4643715023994446, + "loss_ce": 0.00013810490781906992, + "loss_iou": 0.2138671875, + "loss_num": 0.007476806640625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 270859752, + "step": 4835 + }, + { + "epoch": 10.770601336302896, + "grad_norm": 16.783376693725586, + "learning_rate": 1e-06, + "loss": 0.4961, + "num_input_tokens_seen": 270916024, + "step": 4836 + }, + { + "epoch": 10.770601336302896, + "loss": 0.640272319316864, + "loss_ce": 0.0001356131979264319, + "loss_iou": 0.2734375, + "loss_num": 0.018798828125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 270916024, + "step": 4836 + }, + { + "epoch": 10.7728285077951, + "grad_norm": 28.108156204223633, + "learning_rate": 1e-06, + "loss": 0.4965, + "num_input_tokens_seen": 270972228, + "step": 4837 + }, + { + "epoch": 10.7728285077951, + "loss": 0.6063258051872253, + "loss_ce": 0.00012462104496080428, + "loss_iou": 0.25, + "loss_num": 0.021484375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 270972228, + "step": 4837 + }, + { + "epoch": 10.775055679287306, + "grad_norm": 14.273365020751953, + "learning_rate": 1e-06, + "loss": 0.396, + "num_input_tokens_seen": 271028976, + "step": 4838 + }, + { + "epoch": 10.775055679287306, + "loss": 0.40957385301589966, + "loss_ce": 0.00015002323198132217, + "loss_iou": 0.19140625, + "loss_num": 0.00531005859375, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 271028976, + "step": 4838 + }, + { + "epoch": 10.77728285077951, + "grad_norm": 16.653898239135742, + "learning_rate": 1e-06, + "loss": 0.4034, + "num_input_tokens_seen": 271086780, + "step": 4839 + }, + { + "epoch": 10.77728285077951, + "loss": 0.32855719327926636, + "loss_ce": 0.0001270081556867808, + "loss_iou": 0.1435546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 271086780, + "step": 4839 + }, + { + "epoch": 10.779510022271715, + "grad_norm": 22.08172607421875, + "learning_rate": 1e-06, + "loss": 0.6062, + "num_input_tokens_seen": 271142040, + "step": 4840 + }, + { + "epoch": 10.779510022271715, + "loss": 0.6195139288902283, + "loss_ce": 0.00012916022387798876, + "loss_iou": 0.267578125, + "loss_num": 0.0166015625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 271142040, + "step": 4840 + }, + { + "epoch": 10.78173719376392, + "grad_norm": 31.35270881652832, + "learning_rate": 1e-06, + "loss": 0.4893, + "num_input_tokens_seen": 271198064, + "step": 4841 + }, + { + "epoch": 10.78173719376392, + "loss": 0.6805669069290161, + "loss_ce": 0.00014694841229356825, + "loss_iou": 0.29296875, + "loss_num": 0.019287109375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 271198064, + "step": 4841 + }, + { + "epoch": 10.783964365256125, + "grad_norm": 19.86975860595703, + "learning_rate": 1e-06, + "loss": 0.5828, + "num_input_tokens_seen": 271253592, + "step": 4842 + }, + { + "epoch": 10.783964365256125, + "loss": 0.6472707390785217, + "loss_ce": 0.00020657834829762578, + "loss_iou": 0.2353515625, + "loss_num": 0.035400390625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 271253592, + "step": 4842 + }, + { + "epoch": 10.78619153674833, + "grad_norm": 21.59044075012207, + "learning_rate": 1e-06, + "loss": 0.7014, + "num_input_tokens_seen": 271310024, + "step": 4843 + }, + { + "epoch": 10.78619153674833, + "loss": 0.7749493718147278, + "loss_ce": 0.0002301402564626187, + "loss_iou": 0.306640625, + "loss_num": 0.03271484375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 271310024, + "step": 4843 + }, + { + "epoch": 10.788418708240535, + "grad_norm": 20.93609619140625, + "learning_rate": 1e-06, + "loss": 0.4648, + "num_input_tokens_seen": 271361888, + "step": 4844 + }, + { + "epoch": 10.788418708240535, + "loss": 0.5555073022842407, + "loss_ce": 0.00020940190006513149, + "loss_iou": 0.23046875, + "loss_num": 0.0191650390625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 271361888, + "step": 4844 + }, + { + "epoch": 10.79064587973274, + "grad_norm": 35.37983322143555, + "learning_rate": 1e-06, + "loss": 0.5951, + "num_input_tokens_seen": 271417132, + "step": 4845 + }, + { + "epoch": 10.79064587973274, + "loss": 0.44118914008140564, + "loss_ce": 0.0001490934519097209, + "loss_iou": 0.197265625, + "loss_num": 0.00927734375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 271417132, + "step": 4845 + }, + { + "epoch": 10.792873051224944, + "grad_norm": 19.222061157226562, + "learning_rate": 1e-06, + "loss": 0.5955, + "num_input_tokens_seen": 271471928, + "step": 4846 + }, + { + "epoch": 10.792873051224944, + "loss": 0.6026766300201416, + "loss_ce": 0.00013759825378656387, + "loss_iou": 0.26171875, + "loss_num": 0.0155029296875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 271471928, + "step": 4846 + }, + { + "epoch": 10.79510022271715, + "grad_norm": 17.895984649658203, + "learning_rate": 1e-06, + "loss": 0.5379, + "num_input_tokens_seen": 271526128, + "step": 4847 + }, + { + "epoch": 10.79510022271715, + "loss": 0.4887864291667938, + "loss_ce": 0.00013897617463953793, + "loss_iou": 0.21875, + "loss_num": 0.01019287109375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 271526128, + "step": 4847 + }, + { + "epoch": 10.797327394209354, + "grad_norm": 22.95305061340332, + "learning_rate": 1e-06, + "loss": 0.5451, + "num_input_tokens_seen": 271581608, + "step": 4848 + }, + { + "epoch": 10.797327394209354, + "loss": 0.42127272486686707, + "loss_ce": 0.00013015880540478975, + "loss_iou": 0.1923828125, + "loss_num": 0.007415771484375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 271581608, + "step": 4848 + }, + { + "epoch": 10.799554565701559, + "grad_norm": 28.807262420654297, + "learning_rate": 1e-06, + "loss": 0.5903, + "num_input_tokens_seen": 271638680, + "step": 4849 + }, + { + "epoch": 10.799554565701559, + "loss": 0.8107374310493469, + "loss_ce": 0.00019058278121519834, + "loss_iou": 0.345703125, + "loss_num": 0.024169921875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 271638680, + "step": 4849 + }, + { + "epoch": 10.801781737193764, + "grad_norm": 22.489822387695312, + "learning_rate": 1e-06, + "loss": 0.4126, + "num_input_tokens_seen": 271694712, + "step": 4850 + }, + { + "epoch": 10.801781737193764, + "loss": 0.43495360016822815, + "loss_ce": 0.000139150011818856, + "loss_iou": 0.1904296875, + "loss_num": 0.01080322265625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 271694712, + "step": 4850 + }, + { + "epoch": 10.804008908685969, + "grad_norm": 19.50384521484375, + "learning_rate": 1e-06, + "loss": 0.4934, + "num_input_tokens_seen": 271750692, + "step": 4851 + }, + { + "epoch": 10.804008908685969, + "loss": 0.36242765188217163, + "loss_ce": 0.00012293207691982388, + "loss_iou": 0.1611328125, + "loss_num": 0.00799560546875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 271750692, + "step": 4851 + }, + { + "epoch": 10.806236080178174, + "grad_norm": 20.99281883239746, + "learning_rate": 1e-06, + "loss": 0.5668, + "num_input_tokens_seen": 271807552, + "step": 4852 + }, + { + "epoch": 10.806236080178174, + "loss": 0.4213836193084717, + "loss_ce": 0.00011898807133547962, + "loss_iou": 0.1708984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 271807552, + "step": 4852 + }, + { + "epoch": 10.808463251670378, + "grad_norm": 14.47896957397461, + "learning_rate": 1e-06, + "loss": 0.5013, + "num_input_tokens_seen": 271862320, + "step": 4853 + }, + { + "epoch": 10.808463251670378, + "loss": 0.608984112739563, + "loss_ce": 9.740462701302022e-05, + "loss_iou": 0.21875, + "loss_num": 0.0341796875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 271862320, + "step": 4853 + }, + { + "epoch": 10.810690423162583, + "grad_norm": 25.232343673706055, + "learning_rate": 1e-06, + "loss": 0.7236, + "num_input_tokens_seen": 271916944, + "step": 4854 + }, + { + "epoch": 10.810690423162583, + "loss": 0.6214773654937744, + "loss_ce": 0.00013948752894066274, + "loss_iou": 0.265625, + "loss_num": 0.017578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 271916944, + "step": 4854 + }, + { + "epoch": 10.812917594654788, + "grad_norm": 24.822452545166016, + "learning_rate": 1e-06, + "loss": 0.4163, + "num_input_tokens_seen": 271972016, + "step": 4855 + }, + { + "epoch": 10.812917594654788, + "loss": 0.461017906665802, + "loss_ce": 0.0004466102982405573, + "loss_iou": 0.2080078125, + "loss_num": 0.00897216796875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 271972016, + "step": 4855 + }, + { + "epoch": 10.815144766146993, + "grad_norm": 17.059764862060547, + "learning_rate": 1e-06, + "loss": 0.5627, + "num_input_tokens_seen": 272026808, + "step": 4856 + }, + { + "epoch": 10.815144766146993, + "loss": 0.7428523898124695, + "loss_ce": 0.0001765760825946927, + "loss_iou": 0.3203125, + "loss_num": 0.020263671875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 272026808, + "step": 4856 + }, + { + "epoch": 10.817371937639198, + "grad_norm": 22.709232330322266, + "learning_rate": 1e-06, + "loss": 0.5341, + "num_input_tokens_seen": 272080680, + "step": 4857 + }, + { + "epoch": 10.817371937639198, + "loss": 0.6549853086471558, + "loss_ce": 0.0002001727989409119, + "loss_iou": 0.275390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 272080680, + "step": 4857 + }, + { + "epoch": 10.819599109131403, + "grad_norm": 19.562105178833008, + "learning_rate": 1e-06, + "loss": 0.5892, + "num_input_tokens_seen": 272137808, + "step": 4858 + }, + { + "epoch": 10.819599109131403, + "loss": 0.5621104836463928, + "loss_ce": 0.00022084277588874102, + "loss_iou": 0.2275390625, + "loss_num": 0.0213623046875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 272137808, + "step": 4858 + }, + { + "epoch": 10.821826280623608, + "grad_norm": 11.724193572998047, + "learning_rate": 1e-06, + "loss": 0.4827, + "num_input_tokens_seen": 272196140, + "step": 4859 + }, + { + "epoch": 10.821826280623608, + "loss": 0.531132161617279, + "loss_ce": 0.00024835762451402843, + "loss_iou": 0.197265625, + "loss_num": 0.0274658203125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 272196140, + "step": 4859 + }, + { + "epoch": 10.824053452115812, + "grad_norm": 26.051990509033203, + "learning_rate": 1e-06, + "loss": 0.7094, + "num_input_tokens_seen": 272252688, + "step": 4860 + }, + { + "epoch": 10.824053452115812, + "loss": 0.38587331771850586, + "loss_ce": 0.0001311138621531427, + "loss_iou": 0.17578125, + "loss_num": 0.00677490234375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 272252688, + "step": 4860 + }, + { + "epoch": 10.826280623608017, + "grad_norm": 19.309463500976562, + "learning_rate": 1e-06, + "loss": 0.4197, + "num_input_tokens_seen": 272307848, + "step": 4861 + }, + { + "epoch": 10.826280623608017, + "loss": 0.42674195766448975, + "loss_ce": 0.00010623160051181912, + "loss_iou": 0.177734375, + "loss_num": 0.01416015625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 272307848, + "step": 4861 + }, + { + "epoch": 10.828507795100222, + "grad_norm": 25.22977066040039, + "learning_rate": 1e-06, + "loss": 0.6507, + "num_input_tokens_seen": 272364068, + "step": 4862 + }, + { + "epoch": 10.828507795100222, + "loss": 0.6106005311012268, + "loss_ce": 0.00024895532988011837, + "loss_iou": 0.251953125, + "loss_num": 0.0211181640625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 272364068, + "step": 4862 + }, + { + "epoch": 10.830734966592427, + "grad_norm": 13.005237579345703, + "learning_rate": 1e-06, + "loss": 0.6308, + "num_input_tokens_seen": 272421856, + "step": 4863 + }, + { + "epoch": 10.830734966592427, + "loss": 0.5312567949295044, + "loss_ce": 0.00012890055950265378, + "loss_iou": 0.208984375, + "loss_num": 0.0225830078125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 272421856, + "step": 4863 + }, + { + "epoch": 10.832962138084632, + "grad_norm": 14.533258438110352, + "learning_rate": 1e-06, + "loss": 0.4557, + "num_input_tokens_seen": 272479860, + "step": 4864 + }, + { + "epoch": 10.832962138084632, + "loss": 0.4337289333343506, + "loss_ce": 0.000135167152620852, + "loss_iou": 0.193359375, + "loss_num": 0.00933837890625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 272479860, + "step": 4864 + }, + { + "epoch": 10.835189309576837, + "grad_norm": 16.71564292907715, + "learning_rate": 1e-06, + "loss": 0.4138, + "num_input_tokens_seen": 272534472, + "step": 4865 + }, + { + "epoch": 10.835189309576837, + "loss": 0.5382235646247864, + "loss_ce": 0.0001376058644382283, + "loss_iou": 0.244140625, + "loss_num": 0.01007080078125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 272534472, + "step": 4865 + }, + { + "epoch": 10.837416481069042, + "grad_norm": 35.24814987182617, + "learning_rate": 1e-06, + "loss": 0.5191, + "num_input_tokens_seen": 272591164, + "step": 4866 + }, + { + "epoch": 10.837416481069042, + "loss": 0.4814545214176178, + "loss_ce": 0.00013124944234732538, + "loss_iou": 0.197265625, + "loss_num": 0.017333984375, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 272591164, + "step": 4866 + }, + { + "epoch": 10.839643652561247, + "grad_norm": 13.28064250946045, + "learning_rate": 1e-06, + "loss": 0.4789, + "num_input_tokens_seen": 272647484, + "step": 4867 + }, + { + "epoch": 10.839643652561247, + "loss": 0.4676069915294647, + "loss_ce": 0.00013870664406567812, + "loss_iou": 0.201171875, + "loss_num": 0.01287841796875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 272647484, + "step": 4867 + }, + { + "epoch": 10.841870824053451, + "grad_norm": 20.322200775146484, + "learning_rate": 1e-06, + "loss": 0.5039, + "num_input_tokens_seen": 272703104, + "step": 4868 + }, + { + "epoch": 10.841870824053451, + "loss": 0.5150094628334045, + "loss_ce": 0.0005441223620437086, + "loss_iou": 0.2138671875, + "loss_num": 0.017333984375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 272703104, + "step": 4868 + }, + { + "epoch": 10.844097995545656, + "grad_norm": 24.41085433959961, + "learning_rate": 1e-06, + "loss": 0.5288, + "num_input_tokens_seen": 272760632, + "step": 4869 + }, + { + "epoch": 10.844097995545656, + "loss": 0.6352982521057129, + "loss_ce": 0.00016645353753119707, + "loss_iou": 0.25390625, + "loss_num": 0.0252685546875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 272760632, + "step": 4869 + }, + { + "epoch": 10.846325167037861, + "grad_norm": 26.662748336791992, + "learning_rate": 1e-06, + "loss": 0.5393, + "num_input_tokens_seen": 272817712, + "step": 4870 + }, + { + "epoch": 10.846325167037861, + "loss": 0.3460940420627594, + "loss_ce": 0.00014678272418677807, + "loss_iou": 0.158203125, + "loss_num": 0.0059814453125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 272817712, + "step": 4870 + }, + { + "epoch": 10.848552338530066, + "grad_norm": 24.583892822265625, + "learning_rate": 1e-06, + "loss": 0.7168, + "num_input_tokens_seen": 272871604, + "step": 4871 + }, + { + "epoch": 10.848552338530066, + "loss": 0.7562465667724609, + "loss_ce": 0.0001430131815141067, + "loss_iou": 0.3125, + "loss_num": 0.0263671875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 272871604, + "step": 4871 + }, + { + "epoch": 10.85077951002227, + "grad_norm": 21.29097557067871, + "learning_rate": 1e-06, + "loss": 0.4865, + "num_input_tokens_seen": 272924804, + "step": 4872 + }, + { + "epoch": 10.85077951002227, + "loss": 0.5399699211120605, + "loss_ce": 0.00017501205729786307, + "loss_iou": 0.2333984375, + "loss_num": 0.014404296875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 272924804, + "step": 4872 + }, + { + "epoch": 10.853006681514476, + "grad_norm": 24.785858154296875, + "learning_rate": 1e-06, + "loss": 0.7053, + "num_input_tokens_seen": 272979376, + "step": 4873 + }, + { + "epoch": 10.853006681514476, + "loss": 0.7309668660163879, + "loss_ce": 0.0002539680281188339, + "loss_iou": 0.322265625, + "loss_num": 0.01708984375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 272979376, + "step": 4873 + }, + { + "epoch": 10.855233853006682, + "grad_norm": 21.17921257019043, + "learning_rate": 1e-06, + "loss": 0.4353, + "num_input_tokens_seen": 273037616, + "step": 4874 + }, + { + "epoch": 10.855233853006682, + "loss": 0.5316353440284729, + "loss_ce": 0.00014121364802122116, + "loss_iou": 0.2255859375, + "loss_num": 0.0162353515625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 273037616, + "step": 4874 + }, + { + "epoch": 10.857461024498887, + "grad_norm": 21.799673080444336, + "learning_rate": 1e-06, + "loss": 0.8208, + "num_input_tokens_seen": 273097464, + "step": 4875 + }, + { + "epoch": 10.857461024498887, + "loss": 0.9345442652702332, + "loss_ce": 0.00021809733880218118, + "loss_iou": 0.341796875, + "loss_num": 0.0498046875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 273097464, + "step": 4875 + }, + { + "epoch": 10.859688195991092, + "grad_norm": 20.746036529541016, + "learning_rate": 1e-06, + "loss": 0.5591, + "num_input_tokens_seen": 273152000, + "step": 4876 + }, + { + "epoch": 10.859688195991092, + "loss": 0.5373209118843079, + "loss_ce": 0.00021149568783584982, + "loss_iou": 0.2431640625, + "loss_num": 0.01043701171875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 273152000, + "step": 4876 + }, + { + "epoch": 10.861915367483297, + "grad_norm": 16.878854751586914, + "learning_rate": 1e-06, + "loss": 0.5007, + "num_input_tokens_seen": 273209936, + "step": 4877 + }, + { + "epoch": 10.861915367483297, + "loss": 0.5112770795822144, + "loss_ce": 0.0001686862960923463, + "loss_iou": 0.20703125, + "loss_num": 0.019287109375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 273209936, + "step": 4877 + }, + { + "epoch": 10.864142538975502, + "grad_norm": 17.051406860351562, + "learning_rate": 1e-06, + "loss": 0.4679, + "num_input_tokens_seen": 273264832, + "step": 4878 + }, + { + "epoch": 10.864142538975502, + "loss": 0.5240702629089355, + "loss_ce": 0.00014447516878135502, + "loss_iou": 0.220703125, + "loss_num": 0.0166015625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 273264832, + "step": 4878 + }, + { + "epoch": 10.866369710467707, + "grad_norm": 19.403690338134766, + "learning_rate": 1e-06, + "loss": 0.7818, + "num_input_tokens_seen": 273321468, + "step": 4879 + }, + { + "epoch": 10.866369710467707, + "loss": 0.8524916172027588, + "loss_ce": 0.00013568377471528947, + "loss_iou": 0.345703125, + "loss_num": 0.0322265625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 273321468, + "step": 4879 + }, + { + "epoch": 10.868596881959911, + "grad_norm": 17.883703231811523, + "learning_rate": 1e-06, + "loss": 0.589, + "num_input_tokens_seen": 273378420, + "step": 4880 + }, + { + "epoch": 10.868596881959911, + "loss": 0.49145275354385376, + "loss_ce": 0.00024180561013054103, + "loss_iou": 0.2236328125, + "loss_num": 0.0089111328125, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 273378420, + "step": 4880 + }, + { + "epoch": 10.870824053452116, + "grad_norm": 29.07217025756836, + "learning_rate": 1e-06, + "loss": 0.4582, + "num_input_tokens_seen": 273433924, + "step": 4881 + }, + { + "epoch": 10.870824053452116, + "loss": 0.4086874723434448, + "loss_ce": 0.00011811024160124362, + "loss_iou": 0.1748046875, + "loss_num": 0.01171875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 273433924, + "step": 4881 + }, + { + "epoch": 10.873051224944321, + "grad_norm": 22.693309783935547, + "learning_rate": 1e-06, + "loss": 0.4058, + "num_input_tokens_seen": 273488764, + "step": 4882 + }, + { + "epoch": 10.873051224944321, + "loss": 0.5119615197181702, + "loss_ce": 0.00012070569937350228, + "loss_iou": 0.21875, + "loss_num": 0.0147705078125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 273488764, + "step": 4882 + }, + { + "epoch": 10.875278396436526, + "grad_norm": 16.45703125, + "learning_rate": 1e-06, + "loss": 0.5703, + "num_input_tokens_seen": 273547644, + "step": 4883 + }, + { + "epoch": 10.875278396436526, + "loss": 0.5592530965805054, + "loss_ce": 0.00017106565064750612, + "loss_iou": 0.234375, + "loss_num": 0.017822265625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 273547644, + "step": 4883 + }, + { + "epoch": 10.877505567928731, + "grad_norm": 16.91400909423828, + "learning_rate": 1e-06, + "loss": 0.4222, + "num_input_tokens_seen": 273602376, + "step": 4884 + }, + { + "epoch": 10.877505567928731, + "loss": 0.5984039306640625, + "loss_ce": 0.00013728870544582605, + "loss_iou": 0.224609375, + "loss_num": 0.0299072265625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 273602376, + "step": 4884 + }, + { + "epoch": 10.879732739420936, + "grad_norm": 15.582435607910156, + "learning_rate": 1e-06, + "loss": 0.736, + "num_input_tokens_seen": 273658164, + "step": 4885 + }, + { + "epoch": 10.879732739420936, + "loss": 0.728879451751709, + "loss_ce": 0.00011967639875365421, + "loss_iou": 0.296875, + "loss_num": 0.02685546875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 273658164, + "step": 4885 + }, + { + "epoch": 10.88195991091314, + "grad_norm": 21.012617111206055, + "learning_rate": 1e-06, + "loss": 0.5643, + "num_input_tokens_seen": 273715456, + "step": 4886 + }, + { + "epoch": 10.88195991091314, + "loss": 0.49945536255836487, + "loss_ce": 0.00018778612138703465, + "loss_iou": 0.208984375, + "loss_num": 0.016357421875, + "loss_xval": 0.5, + "num_input_tokens_seen": 273715456, + "step": 4886 + }, + { + "epoch": 10.884187082405345, + "grad_norm": 18.003925323486328, + "learning_rate": 1e-06, + "loss": 0.4371, + "num_input_tokens_seen": 273766692, + "step": 4887 + }, + { + "epoch": 10.884187082405345, + "loss": 0.45562559366226196, + "loss_ce": 0.00033384724520146847, + "loss_iou": 0.1669921875, + "loss_num": 0.024169921875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 273766692, + "step": 4887 + }, + { + "epoch": 10.88641425389755, + "grad_norm": 26.937620162963867, + "learning_rate": 1e-06, + "loss": 0.5724, + "num_input_tokens_seen": 273822332, + "step": 4888 + }, + { + "epoch": 10.88641425389755, + "loss": 0.5189422369003296, + "loss_ce": 0.00014341062342282385, + "loss_iou": 0.228515625, + "loss_num": 0.01220703125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 273822332, + "step": 4888 + }, + { + "epoch": 10.888641425389755, + "grad_norm": 25.740150451660156, + "learning_rate": 1e-06, + "loss": 0.5291, + "num_input_tokens_seen": 273877776, + "step": 4889 + }, + { + "epoch": 10.888641425389755, + "loss": 0.5222045183181763, + "loss_ce": 0.00023185207101050764, + "loss_iou": 0.2216796875, + "loss_num": 0.015869140625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 273877776, + "step": 4889 + }, + { + "epoch": 10.89086859688196, + "grad_norm": 13.944757461547852, + "learning_rate": 1e-06, + "loss": 0.5313, + "num_input_tokens_seen": 273934432, + "step": 4890 + }, + { + "epoch": 10.89086859688196, + "loss": 0.5552128553390503, + "loss_ce": 0.00015907504712231457, + "loss_iou": 0.2255859375, + "loss_num": 0.020751953125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 273934432, + "step": 4890 + }, + { + "epoch": 10.893095768374165, + "grad_norm": 32.77812194824219, + "learning_rate": 1e-06, + "loss": 0.5006, + "num_input_tokens_seen": 273990888, + "step": 4891 + }, + { + "epoch": 10.893095768374165, + "loss": 0.5784051418304443, + "loss_ce": 0.0001580218377057463, + "loss_iou": 0.2392578125, + "loss_num": 0.02001953125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 273990888, + "step": 4891 + }, + { + "epoch": 10.89532293986637, + "grad_norm": 21.749361038208008, + "learning_rate": 1e-06, + "loss": 0.4988, + "num_input_tokens_seen": 274045808, + "step": 4892 + }, + { + "epoch": 10.89532293986637, + "loss": 0.47173595428466797, + "loss_ce": 0.0017652796814218163, + "loss_iou": 0.2060546875, + "loss_num": 0.011474609375, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 274045808, + "step": 4892 + }, + { + "epoch": 10.897550111358575, + "grad_norm": 22.703105926513672, + "learning_rate": 1e-06, + "loss": 0.4889, + "num_input_tokens_seen": 274101860, + "step": 4893 + }, + { + "epoch": 10.897550111358575, + "loss": 0.5470762252807617, + "loss_ce": 0.000323267828207463, + "loss_iou": 0.2021484375, + "loss_num": 0.0281982421875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 274101860, + "step": 4893 + }, + { + "epoch": 10.89977728285078, + "grad_norm": 19.098848342895508, + "learning_rate": 1e-06, + "loss": 0.5863, + "num_input_tokens_seen": 274158588, + "step": 4894 + }, + { + "epoch": 10.89977728285078, + "loss": 0.47424912452697754, + "loss_ce": 0.0001280199212487787, + "loss_iou": 0.20703125, + "loss_num": 0.01190185546875, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 274158588, + "step": 4894 + }, + { + "epoch": 10.902004454342984, + "grad_norm": 25.0705509185791, + "learning_rate": 1e-06, + "loss": 0.5995, + "num_input_tokens_seen": 274214796, + "step": 4895 + }, + { + "epoch": 10.902004454342984, + "loss": 0.7026468515396118, + "loss_ce": 0.00013221558765508235, + "loss_iou": 0.294921875, + "loss_num": 0.0223388671875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 274214796, + "step": 4895 + }, + { + "epoch": 10.90423162583519, + "grad_norm": 16.38619613647461, + "learning_rate": 1e-06, + "loss": 0.4676, + "num_input_tokens_seen": 274270000, + "step": 4896 + }, + { + "epoch": 10.90423162583519, + "loss": 0.5520434379577637, + "loss_ce": 0.0002856444625649601, + "loss_iou": 0.22265625, + "loss_num": 0.0211181640625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 274270000, + "step": 4896 + }, + { + "epoch": 10.906458797327394, + "grad_norm": 28.38006591796875, + "learning_rate": 1e-06, + "loss": 0.4696, + "num_input_tokens_seen": 274324580, + "step": 4897 + }, + { + "epoch": 10.906458797327394, + "loss": 0.513346791267395, + "loss_ce": 0.00016320010763593018, + "loss_iou": 0.2333984375, + "loss_num": 0.009521484375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 274324580, + "step": 4897 + }, + { + "epoch": 10.908685968819599, + "grad_norm": 34.72296142578125, + "learning_rate": 1e-06, + "loss": 0.7088, + "num_input_tokens_seen": 274380456, + "step": 4898 + }, + { + "epoch": 10.908685968819599, + "loss": 0.5927585363388062, + "loss_ce": 0.0002292667340952903, + "loss_iou": 0.251953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 274380456, + "step": 4898 + }, + { + "epoch": 10.910913140311804, + "grad_norm": 17.991031646728516, + "learning_rate": 1e-06, + "loss": 0.7028, + "num_input_tokens_seen": 274436900, + "step": 4899 + }, + { + "epoch": 10.910913140311804, + "loss": 1.07291579246521, + "loss_ce": 0.0001619314862182364, + "loss_iou": 0.451171875, + "loss_num": 0.033935546875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 274436900, + "step": 4899 + }, + { + "epoch": 10.913140311804009, + "grad_norm": 19.890304565429688, + "learning_rate": 1e-06, + "loss": 0.6784, + "num_input_tokens_seen": 274492560, + "step": 4900 + }, + { + "epoch": 10.913140311804009, + "loss": 0.7131062746047974, + "loss_ce": 0.0007039305055513978, + "loss_iou": 0.28515625, + "loss_num": 0.028564453125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 274492560, + "step": 4900 + }, + { + "epoch": 10.915367483296214, + "grad_norm": 19.241336822509766, + "learning_rate": 1e-06, + "loss": 0.4938, + "num_input_tokens_seen": 274549672, + "step": 4901 + }, + { + "epoch": 10.915367483296214, + "loss": 0.4737103581428528, + "loss_ce": 0.00013855646830052137, + "loss_iou": 0.2138671875, + "loss_num": 0.00933837890625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 274549672, + "step": 4901 + }, + { + "epoch": 10.917594654788418, + "grad_norm": 20.94312858581543, + "learning_rate": 1e-06, + "loss": 0.5268, + "num_input_tokens_seen": 274606064, + "step": 4902 + }, + { + "epoch": 10.917594654788418, + "loss": 0.6111065149307251, + "loss_ce": 0.00014459769590757787, + "loss_iou": 0.2451171875, + "loss_num": 0.0244140625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 274606064, + "step": 4902 + }, + { + "epoch": 10.919821826280623, + "grad_norm": 55.303871154785156, + "learning_rate": 1e-06, + "loss": 0.4566, + "num_input_tokens_seen": 274662744, + "step": 4903 + }, + { + "epoch": 10.919821826280623, + "loss": 0.4556983709335327, + "loss_ce": 0.0001320057053817436, + "loss_iou": 0.2060546875, + "loss_num": 0.0086669921875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 274662744, + "step": 4903 + }, + { + "epoch": 10.922048997772828, + "grad_norm": 13.991397857666016, + "learning_rate": 1e-06, + "loss": 0.3496, + "num_input_tokens_seen": 274719288, + "step": 4904 + }, + { + "epoch": 10.922048997772828, + "loss": 0.35973674058914185, + "loss_ce": 0.00011758786422433332, + "loss_iou": 0.1513671875, + "loss_num": 0.0113525390625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 274719288, + "step": 4904 + }, + { + "epoch": 10.924276169265033, + "grad_norm": 18.747949600219727, + "learning_rate": 1e-06, + "loss": 0.4155, + "num_input_tokens_seen": 274774944, + "step": 4905 + }, + { + "epoch": 10.924276169265033, + "loss": 0.44049468636512756, + "loss_ce": 0.0002480950206518173, + "loss_iou": 0.185546875, + "loss_num": 0.013671875, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 274774944, + "step": 4905 + }, + { + "epoch": 10.926503340757238, + "grad_norm": 21.45561408996582, + "learning_rate": 1e-06, + "loss": 0.4906, + "num_input_tokens_seen": 274831664, + "step": 4906 + }, + { + "epoch": 10.926503340757238, + "loss": 0.5997252464294434, + "loss_ce": 0.00011587167682591826, + "loss_iou": 0.27734375, + "loss_num": 0.0086669921875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 274831664, + "step": 4906 + }, + { + "epoch": 10.928730512249443, + "grad_norm": 21.097562789916992, + "learning_rate": 1e-06, + "loss": 0.4488, + "num_input_tokens_seen": 274888812, + "step": 4907 + }, + { + "epoch": 10.928730512249443, + "loss": 0.4747345447540283, + "loss_ce": 0.00012518178846221417, + "loss_iou": 0.220703125, + "loss_num": 0.00640869140625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 274888812, + "step": 4907 + }, + { + "epoch": 10.930957683741648, + "grad_norm": 20.24143409729004, + "learning_rate": 1e-06, + "loss": 0.4771, + "num_input_tokens_seen": 274945504, + "step": 4908 + }, + { + "epoch": 10.930957683741648, + "loss": 0.39001041650772095, + "loss_ce": 0.00011784063826780766, + "loss_iou": 0.177734375, + "loss_num": 0.00677490234375, + "loss_xval": 0.390625, + "num_input_tokens_seen": 274945504, + "step": 4908 + }, + { + "epoch": 10.933184855233852, + "grad_norm": 20.678117752075195, + "learning_rate": 1e-06, + "loss": 0.4148, + "num_input_tokens_seen": 275004552, + "step": 4909 + }, + { + "epoch": 10.933184855233852, + "loss": 0.3625844419002533, + "loss_ce": 0.0001576666400069371, + "loss_iou": 0.1611328125, + "loss_num": 0.00787353515625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 275004552, + "step": 4909 + }, + { + "epoch": 10.935412026726057, + "grad_norm": 20.854366302490234, + "learning_rate": 1e-06, + "loss": 0.5889, + "num_input_tokens_seen": 275062356, + "step": 4910 + }, + { + "epoch": 10.935412026726057, + "loss": 0.4733467102050781, + "loss_ce": 0.0002021462714765221, + "loss_iou": 0.2158203125, + "loss_num": 0.00811767578125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 275062356, + "step": 4910 + }, + { + "epoch": 10.937639198218262, + "grad_norm": 23.64391326904297, + "learning_rate": 1e-06, + "loss": 0.7998, + "num_input_tokens_seen": 275117932, + "step": 4911 + }, + { + "epoch": 10.937639198218262, + "loss": 0.7572309970855713, + "loss_ce": 0.0001508938439656049, + "loss_iou": 0.322265625, + "loss_num": 0.0220947265625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 275117932, + "step": 4911 + }, + { + "epoch": 10.939866369710467, + "grad_norm": 22.301734924316406, + "learning_rate": 1e-06, + "loss": 0.4655, + "num_input_tokens_seen": 275171980, + "step": 4912 + }, + { + "epoch": 10.939866369710467, + "loss": 0.4408171772956848, + "loss_ce": 0.00014336456661112607, + "loss_iou": 0.208984375, + "loss_num": 0.0045166015625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 275171980, + "step": 4912 + }, + { + "epoch": 10.942093541202672, + "grad_norm": 18.19380760192871, + "learning_rate": 1e-06, + "loss": 0.6564, + "num_input_tokens_seen": 275227324, + "step": 4913 + }, + { + "epoch": 10.942093541202672, + "loss": 0.8009915351867676, + "loss_ce": 0.00021027974435128272, + "loss_iou": 0.3515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 275227324, + "step": 4913 + }, + { + "epoch": 10.944320712694877, + "grad_norm": 14.776144981384277, + "learning_rate": 1e-06, + "loss": 0.5706, + "num_input_tokens_seen": 275282508, + "step": 4914 + }, + { + "epoch": 10.944320712694877, + "loss": 0.657973051071167, + "loss_ce": 0.00013618965749628842, + "loss_iou": 0.2734375, + "loss_num": 0.022216796875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 275282508, + "step": 4914 + }, + { + "epoch": 10.946547884187082, + "grad_norm": 23.789960861206055, + "learning_rate": 1e-06, + "loss": 0.4952, + "num_input_tokens_seen": 275338856, + "step": 4915 + }, + { + "epoch": 10.946547884187082, + "loss": 0.48818719387054443, + "loss_ce": 0.00015006719331722707, + "loss_iou": 0.2158203125, + "loss_num": 0.01141357421875, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 275338856, + "step": 4915 + }, + { + "epoch": 10.948775055679288, + "grad_norm": 19.028474807739258, + "learning_rate": 1e-06, + "loss": 0.5003, + "num_input_tokens_seen": 275395740, + "step": 4916 + }, + { + "epoch": 10.948775055679288, + "loss": 0.5374853610992432, + "loss_ce": 0.0001318200957030058, + "loss_iou": 0.2294921875, + "loss_num": 0.01556396484375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 275395740, + "step": 4916 + }, + { + "epoch": 10.951002227171493, + "grad_norm": 20.012557983398438, + "learning_rate": 1e-06, + "loss": 0.4823, + "num_input_tokens_seen": 275454060, + "step": 4917 + }, + { + "epoch": 10.951002227171493, + "loss": 0.466092973947525, + "loss_ce": 0.0001506020489614457, + "loss_iou": 0.2021484375, + "loss_num": 0.01226806640625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 275454060, + "step": 4917 + }, + { + "epoch": 10.953229398663698, + "grad_norm": 16.04680633544922, + "learning_rate": 1e-06, + "loss": 0.4923, + "num_input_tokens_seen": 275511984, + "step": 4918 + }, + { + "epoch": 10.953229398663698, + "loss": 0.4693390130996704, + "loss_ce": 0.0001617702655494213, + "loss_iou": 0.2138671875, + "loss_num": 0.00836181640625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 275511984, + "step": 4918 + }, + { + "epoch": 10.955456570155903, + "grad_norm": 19.825653076171875, + "learning_rate": 1e-06, + "loss": 0.5232, + "num_input_tokens_seen": 275568436, + "step": 4919 + }, + { + "epoch": 10.955456570155903, + "loss": 0.561336100101471, + "loss_ce": 0.00011784351227106526, + "loss_iou": 0.2392578125, + "loss_num": 0.0166015625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 275568436, + "step": 4919 + }, + { + "epoch": 10.957683741648108, + "grad_norm": 22.18250846862793, + "learning_rate": 1e-06, + "loss": 0.4804, + "num_input_tokens_seen": 275625196, + "step": 4920 + }, + { + "epoch": 10.957683741648108, + "loss": 0.43557554483413696, + "loss_ce": 0.00015077181160449982, + "loss_iou": 0.18359375, + "loss_num": 0.0135498046875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 275625196, + "step": 4920 + }, + { + "epoch": 10.959910913140313, + "grad_norm": 23.71367835998535, + "learning_rate": 1e-06, + "loss": 0.5365, + "num_input_tokens_seen": 275681500, + "step": 4921 + }, + { + "epoch": 10.959910913140313, + "loss": 0.5750996470451355, + "loss_ce": 0.0002705350052565336, + "loss_iou": 0.2353515625, + "loss_num": 0.02099609375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 275681500, + "step": 4921 + }, + { + "epoch": 10.962138084632517, + "grad_norm": 36.595542907714844, + "learning_rate": 1e-06, + "loss": 0.4071, + "num_input_tokens_seen": 275736712, + "step": 4922 + }, + { + "epoch": 10.962138084632517, + "loss": 0.35608160495758057, + "loss_ce": 0.0001245439052581787, + "loss_iou": 0.162109375, + "loss_num": 0.0064697265625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 275736712, + "step": 4922 + }, + { + "epoch": 10.964365256124722, + "grad_norm": 22.66077995300293, + "learning_rate": 1e-06, + "loss": 0.6015, + "num_input_tokens_seen": 275792276, + "step": 4923 + }, + { + "epoch": 10.964365256124722, + "loss": 0.39454376697540283, + "loss_ce": 0.00013458277680911124, + "loss_iou": 0.1748046875, + "loss_num": 0.0089111328125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 275792276, + "step": 4923 + }, + { + "epoch": 10.966592427616927, + "grad_norm": 18.849925994873047, + "learning_rate": 1e-06, + "loss": 0.4902, + "num_input_tokens_seen": 275847012, + "step": 4924 + }, + { + "epoch": 10.966592427616927, + "loss": 0.5802684426307678, + "loss_ce": 0.0015330992173403502, + "loss_iou": 0.2412109375, + "loss_num": 0.019287109375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 275847012, + "step": 4924 + }, + { + "epoch": 10.968819599109132, + "grad_norm": 25.976478576660156, + "learning_rate": 1e-06, + "loss": 0.6866, + "num_input_tokens_seen": 275905656, + "step": 4925 + }, + { + "epoch": 10.968819599109132, + "loss": 0.5414575338363647, + "loss_ce": 0.0001978027867153287, + "loss_iou": 0.2236328125, + "loss_num": 0.018798828125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 275905656, + "step": 4925 + }, + { + "epoch": 10.971046770601337, + "grad_norm": 22.996116638183594, + "learning_rate": 1e-06, + "loss": 0.578, + "num_input_tokens_seen": 275961284, + "step": 4926 + }, + { + "epoch": 10.971046770601337, + "loss": 0.4320530593395233, + "loss_ce": 0.00016829956439323723, + "loss_iou": 0.1826171875, + "loss_num": 0.01361083984375, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 275961284, + "step": 4926 + }, + { + "epoch": 10.973273942093542, + "grad_norm": 15.009370803833008, + "learning_rate": 1e-06, + "loss": 0.596, + "num_input_tokens_seen": 276017624, + "step": 4927 + }, + { + "epoch": 10.973273942093542, + "loss": 0.6515401601791382, + "loss_ce": 0.0001729477517073974, + "loss_iou": 0.2734375, + "loss_num": 0.0208740234375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 276017624, + "step": 4927 + }, + { + "epoch": 10.975501113585747, + "grad_norm": 17.324737548828125, + "learning_rate": 1e-06, + "loss": 0.4243, + "num_input_tokens_seen": 276075216, + "step": 4928 + }, + { + "epoch": 10.975501113585747, + "loss": 0.45728790760040283, + "loss_ce": 0.00013462905189953744, + "loss_iou": 0.193359375, + "loss_num": 0.01397705078125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 276075216, + "step": 4928 + }, + { + "epoch": 10.977728285077951, + "grad_norm": 77.1220932006836, + "learning_rate": 1e-06, + "loss": 0.4622, + "num_input_tokens_seen": 276132700, + "step": 4929 + }, + { + "epoch": 10.977728285077951, + "loss": 0.4627261161804199, + "loss_ce": 0.000201685048523359, + "loss_iou": 0.2021484375, + "loss_num": 0.0115966796875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 276132700, + "step": 4929 + }, + { + "epoch": 10.979955456570156, + "grad_norm": 21.860414505004883, + "learning_rate": 1e-06, + "loss": 0.4459, + "num_input_tokens_seen": 276192092, + "step": 4930 + }, + { + "epoch": 10.979955456570156, + "loss": 0.5067447423934937, + "loss_ce": 0.00015294540207833052, + "loss_iou": 0.2236328125, + "loss_num": 0.01177978515625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 276192092, + "step": 4930 + }, + { + "epoch": 10.982182628062361, + "grad_norm": 23.157732009887695, + "learning_rate": 1e-06, + "loss": 0.5643, + "num_input_tokens_seen": 276248164, + "step": 4931 + }, + { + "epoch": 10.982182628062361, + "loss": 0.5332338809967041, + "loss_ce": 0.00013755704276263714, + "loss_iou": 0.2392578125, + "loss_num": 0.0111083984375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 276248164, + "step": 4931 + }, + { + "epoch": 10.984409799554566, + "grad_norm": 18.797258377075195, + "learning_rate": 1e-06, + "loss": 0.5975, + "num_input_tokens_seen": 276304632, + "step": 4932 + }, + { + "epoch": 10.984409799554566, + "loss": 0.6781335473060608, + "loss_ce": 0.00027711730217561126, + "loss_iou": 0.265625, + "loss_num": 0.0296630859375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 276304632, + "step": 4932 + }, + { + "epoch": 10.98663697104677, + "grad_norm": 23.31140899658203, + "learning_rate": 1e-06, + "loss": 0.4345, + "num_input_tokens_seen": 276361908, + "step": 4933 + }, + { + "epoch": 10.98663697104677, + "loss": 0.5331314206123352, + "loss_ce": 0.00011140106653328985, + "loss_iou": 0.2392578125, + "loss_num": 0.010986328125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 276361908, + "step": 4933 + }, + { + "epoch": 10.988864142538976, + "grad_norm": 15.896737098693848, + "learning_rate": 1e-06, + "loss": 0.6221, + "num_input_tokens_seen": 276420060, + "step": 4934 + }, + { + "epoch": 10.988864142538976, + "loss": 0.7978502511978149, + "loss_ce": 0.00012079518637619913, + "loss_iou": 0.3515625, + "loss_num": 0.0186767578125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 276420060, + "step": 4934 + }, + { + "epoch": 10.99109131403118, + "grad_norm": 17.5236759185791, + "learning_rate": 1e-06, + "loss": 0.6223, + "num_input_tokens_seen": 276475372, + "step": 4935 + }, + { + "epoch": 10.99109131403118, + "loss": 0.7966042160987854, + "loss_ce": 0.00021749922598246485, + "loss_iou": 0.30859375, + "loss_num": 0.035888671875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 276475372, + "step": 4935 + }, + { + "epoch": 10.993318485523385, + "grad_norm": 16.806560516357422, + "learning_rate": 1e-06, + "loss": 0.5073, + "num_input_tokens_seen": 276529704, + "step": 4936 + }, + { + "epoch": 10.993318485523385, + "loss": 0.3875495195388794, + "loss_ce": 0.00015937026182655245, + "loss_iou": 0.1669921875, + "loss_num": 0.0107421875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 276529704, + "step": 4936 + }, + { + "epoch": 10.99554565701559, + "grad_norm": 15.568613052368164, + "learning_rate": 1e-06, + "loss": 0.4306, + "num_input_tokens_seen": 276584256, + "step": 4937 + }, + { + "epoch": 10.99554565701559, + "loss": 0.3633689284324646, + "loss_ce": 0.0002707808162085712, + "loss_iou": 0.1279296875, + "loss_num": 0.021728515625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 276584256, + "step": 4937 + }, + { + "epoch": 10.997772828507795, + "grad_norm": 25.16409683227539, + "learning_rate": 1e-06, + "loss": 0.3894, + "num_input_tokens_seen": 276640164, + "step": 4938 + }, + { + "epoch": 10.997772828507795, + "loss": 0.41700875759124756, + "loss_ce": 0.00013862067135050893, + "loss_iou": 0.1865234375, + "loss_num": 0.00860595703125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 276640164, + "step": 4938 + }, + { + "epoch": 11.0, + "grad_norm": 23.693870544433594, + "learning_rate": 1e-06, + "loss": 0.7586, + "num_input_tokens_seen": 276695568, + "step": 4939 + }, + { + "epoch": 11.0, + "loss": 0.8163493275642395, + "loss_ce": 0.00018723538960330188, + "loss_iou": 0.35546875, + "loss_num": 0.020751953125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 276695568, + "step": 4939 + }, + { + "epoch": 11.002227171492205, + "grad_norm": 14.375535011291504, + "learning_rate": 1e-06, + "loss": 0.408, + "num_input_tokens_seen": 276751748, + "step": 4940 + }, + { + "epoch": 11.002227171492205, + "loss": 0.34511420130729675, + "loss_ce": 0.00020451581804081798, + "loss_iou": 0.158203125, + "loss_num": 0.005462646484375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 276751748, + "step": 4940 + }, + { + "epoch": 11.00445434298441, + "grad_norm": 17.11101722717285, + "learning_rate": 1e-06, + "loss": 0.4981, + "num_input_tokens_seen": 276808900, + "step": 4941 + }, + { + "epoch": 11.00445434298441, + "loss": 0.5812661647796631, + "loss_ce": 0.00021148948871996254, + "loss_iou": 0.259765625, + "loss_num": 0.0120849609375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 276808900, + "step": 4941 + }, + { + "epoch": 11.006681514476615, + "grad_norm": 24.729581832885742, + "learning_rate": 1e-06, + "loss": 0.591, + "num_input_tokens_seen": 276865236, + "step": 4942 + }, + { + "epoch": 11.006681514476615, + "loss": 0.5612735748291016, + "loss_ce": 0.00048253341810777783, + "loss_iou": 0.248046875, + "loss_num": 0.0130615234375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 276865236, + "step": 4942 + }, + { + "epoch": 11.00890868596882, + "grad_norm": 18.79891014099121, + "learning_rate": 1e-06, + "loss": 0.3387, + "num_input_tokens_seen": 276923576, + "step": 4943 + }, + { + "epoch": 11.00890868596882, + "loss": 0.26598143577575684, + "loss_ce": 0.00011229849769733846, + "loss_iou": 0.12109375, + "loss_num": 0.004608154296875, + "loss_xval": 0.265625, + "num_input_tokens_seen": 276923576, + "step": 4943 + }, + { + "epoch": 11.011135857461024, + "grad_norm": 19.149585723876953, + "learning_rate": 1e-06, + "loss": 0.3532, + "num_input_tokens_seen": 276980108, + "step": 4944 + }, + { + "epoch": 11.011135857461024, + "loss": 0.377750962972641, + "loss_ce": 0.00012643022637348622, + "loss_iou": 0.17578125, + "loss_num": 0.0052490234375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 276980108, + "step": 4944 + }, + { + "epoch": 11.01336302895323, + "grad_norm": 19.660892486572266, + "learning_rate": 1e-06, + "loss": 0.4988, + "num_input_tokens_seen": 277035084, + "step": 4945 + }, + { + "epoch": 11.01336302895323, + "loss": 0.49671003222465515, + "loss_ce": 0.00012798060197383165, + "loss_iou": 0.21875, + "loss_num": 0.01171875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 277035084, + "step": 4945 + }, + { + "epoch": 11.015590200445434, + "grad_norm": 17.99364471435547, + "learning_rate": 1e-06, + "loss": 0.5213, + "num_input_tokens_seen": 277090600, + "step": 4946 + }, + { + "epoch": 11.015590200445434, + "loss": 0.5547381043434143, + "loss_ce": 0.0002947351022157818, + "loss_iou": 0.2314453125, + "loss_num": 0.018310546875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 277090600, + "step": 4946 + }, + { + "epoch": 11.017817371937639, + "grad_norm": 26.641109466552734, + "learning_rate": 1e-06, + "loss": 0.4299, + "num_input_tokens_seen": 277144644, + "step": 4947 + }, + { + "epoch": 11.017817371937639, + "loss": 0.3615342974662781, + "loss_ce": 0.0001451151620130986, + "loss_iou": 0.1357421875, + "loss_num": 0.0181884765625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 277144644, + "step": 4947 + }, + { + "epoch": 11.020044543429844, + "grad_norm": 25.904264450073242, + "learning_rate": 1e-06, + "loss": 0.8089, + "num_input_tokens_seen": 277198968, + "step": 4948 + }, + { + "epoch": 11.020044543429844, + "loss": 0.8873788118362427, + "loss_ce": 0.0001717885461403057, + "loss_iou": 0.373046875, + "loss_num": 0.028076171875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 277198968, + "step": 4948 + }, + { + "epoch": 11.022271714922049, + "grad_norm": 18.756681442260742, + "learning_rate": 1e-06, + "loss": 0.6074, + "num_input_tokens_seen": 277252344, + "step": 4949 + }, + { + "epoch": 11.022271714922049, + "loss": 0.7791860103607178, + "loss_ce": 0.00037738942774012685, + "loss_iou": 0.302734375, + "loss_num": 0.0341796875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 277252344, + "step": 4949 + }, + { + "epoch": 11.024498886414253, + "grad_norm": 17.9628849029541, + "learning_rate": 1e-06, + "loss": 0.3852, + "num_input_tokens_seen": 277308488, + "step": 4950 + }, + { + "epoch": 11.024498886414253, + "loss": 0.34923800826072693, + "loss_ce": 0.00011689884559018537, + "loss_iou": 0.1513671875, + "loss_num": 0.00927734375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 277308488, + "step": 4950 + }, + { + "epoch": 11.026726057906458, + "grad_norm": 14.993110656738281, + "learning_rate": 1e-06, + "loss": 0.3982, + "num_input_tokens_seen": 277364872, + "step": 4951 + }, + { + "epoch": 11.026726057906458, + "loss": 0.3778972625732422, + "loss_ce": 0.00015067942149471492, + "loss_iou": 0.16796875, + "loss_num": 0.00823974609375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 277364872, + "step": 4951 + }, + { + "epoch": 11.028953229398663, + "grad_norm": 18.500215530395508, + "learning_rate": 1e-06, + "loss": 0.4492, + "num_input_tokens_seen": 277422360, + "step": 4952 + }, + { + "epoch": 11.028953229398663, + "loss": 0.517828643321991, + "loss_ce": 0.00012847439211327583, + "loss_iou": 0.2119140625, + "loss_num": 0.0185546875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 277422360, + "step": 4952 + }, + { + "epoch": 11.031180400890868, + "grad_norm": 28.1491641998291, + "learning_rate": 1e-06, + "loss": 0.6864, + "num_input_tokens_seen": 277479676, + "step": 4953 + }, + { + "epoch": 11.031180400890868, + "loss": 0.7513668537139893, + "loss_ce": 0.00014616544649470598, + "loss_iou": 0.2890625, + "loss_num": 0.03466796875, + "loss_xval": 0.75, + "num_input_tokens_seen": 277479676, + "step": 4953 + }, + { + "epoch": 11.033407572383073, + "grad_norm": 14.530891418457031, + "learning_rate": 1e-06, + "loss": 0.5625, + "num_input_tokens_seen": 277535648, + "step": 4954 + }, + { + "epoch": 11.033407572383073, + "loss": 0.6487134695053101, + "loss_ce": 0.00015391816850751638, + "loss_iou": 0.265625, + "loss_num": 0.023681640625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 277535648, + "step": 4954 + }, + { + "epoch": 11.035634743875278, + "grad_norm": 18.762638092041016, + "learning_rate": 1e-06, + "loss": 0.4931, + "num_input_tokens_seen": 277593048, + "step": 4955 + }, + { + "epoch": 11.035634743875278, + "loss": 0.5269228219985962, + "loss_ce": 0.00012840772978961468, + "loss_iou": 0.21875, + "loss_num": 0.0177001953125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 277593048, + "step": 4955 + }, + { + "epoch": 11.037861915367483, + "grad_norm": 14.296708106994629, + "learning_rate": 1e-06, + "loss": 0.6159, + "num_input_tokens_seen": 277647512, + "step": 4956 + }, + { + "epoch": 11.037861915367483, + "loss": 0.7726843357086182, + "loss_ce": 0.00022338703274726868, + "loss_iou": 0.296875, + "loss_num": 0.03564453125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 277647512, + "step": 4956 + }, + { + "epoch": 11.040089086859687, + "grad_norm": 16.444766998291016, + "learning_rate": 1e-06, + "loss": 0.476, + "num_input_tokens_seen": 277705924, + "step": 4957 + }, + { + "epoch": 11.040089086859687, + "loss": 0.48854783177375793, + "loss_ce": 0.0001445203961338848, + "loss_iou": 0.2119140625, + "loss_num": 0.0126953125, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 277705924, + "step": 4957 + }, + { + "epoch": 11.042316258351892, + "grad_norm": 33.47507095336914, + "learning_rate": 1e-06, + "loss": 0.6862, + "num_input_tokens_seen": 277763404, + "step": 4958 + }, + { + "epoch": 11.042316258351892, + "loss": 0.7765988111495972, + "loss_ce": 0.00010955688776448369, + "loss_iou": 0.34765625, + "loss_num": 0.0166015625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 277763404, + "step": 4958 + }, + { + "epoch": 11.044543429844097, + "grad_norm": 23.200275421142578, + "learning_rate": 1e-06, + "loss": 0.5959, + "num_input_tokens_seen": 277819472, + "step": 4959 + }, + { + "epoch": 11.044543429844097, + "loss": 0.7127882838249207, + "loss_ce": 0.00014180471771396697, + "loss_iou": 0.32421875, + "loss_num": 0.01287841796875, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 277819472, + "step": 4959 + }, + { + "epoch": 11.046770601336302, + "grad_norm": 19.549856185913086, + "learning_rate": 1e-06, + "loss": 0.6735, + "num_input_tokens_seen": 277874920, + "step": 4960 + }, + { + "epoch": 11.046770601336302, + "loss": 0.7458186149597168, + "loss_ce": 0.00021311640739440918, + "loss_iou": 0.294921875, + "loss_num": 0.03125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 277874920, + "step": 4960 + }, + { + "epoch": 11.048997772828507, + "grad_norm": 18.301462173461914, + "learning_rate": 1e-06, + "loss": 0.4467, + "num_input_tokens_seen": 277932412, + "step": 4961 + }, + { + "epoch": 11.048997772828507, + "loss": 0.38100382685661316, + "loss_ce": 0.0001444508379790932, + "loss_iou": 0.16796875, + "loss_num": 0.0091552734375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 277932412, + "step": 4961 + }, + { + "epoch": 11.051224944320714, + "grad_norm": 22.31706428527832, + "learning_rate": 1e-06, + "loss": 0.6524, + "num_input_tokens_seen": 277990888, + "step": 4962 + }, + { + "epoch": 11.051224944320714, + "loss": 0.6529885530471802, + "loss_ce": 0.00015653553418815136, + "loss_iou": 0.265625, + "loss_num": 0.0242919921875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 277990888, + "step": 4962 + }, + { + "epoch": 11.053452115812918, + "grad_norm": 48.24069595336914, + "learning_rate": 1e-06, + "loss": 0.5579, + "num_input_tokens_seen": 278045352, + "step": 4963 + }, + { + "epoch": 11.053452115812918, + "loss": 0.4462721645832062, + "loss_ce": 0.00013570513692684472, + "loss_iou": 0.171875, + "loss_num": 0.0206298828125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 278045352, + "step": 4963 + }, + { + "epoch": 11.055679287305123, + "grad_norm": 14.883129119873047, + "learning_rate": 1e-06, + "loss": 0.4838, + "num_input_tokens_seen": 278100884, + "step": 4964 + }, + { + "epoch": 11.055679287305123, + "loss": 0.5324569344520569, + "loss_ce": 0.00016932294238358736, + "loss_iou": 0.220703125, + "loss_num": 0.01806640625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 278100884, + "step": 4964 + }, + { + "epoch": 11.057906458797328, + "grad_norm": 92.90229797363281, + "learning_rate": 1e-06, + "loss": 0.5606, + "num_input_tokens_seen": 278155508, + "step": 4965 + }, + { + "epoch": 11.057906458797328, + "loss": 0.6233317255973816, + "loss_ce": 0.00016281349235214293, + "loss_iou": 0.287109375, + "loss_num": 0.00994873046875, + "loss_xval": 0.625, + "num_input_tokens_seen": 278155508, + "step": 4965 + }, + { + "epoch": 11.060133630289533, + "grad_norm": 22.694913864135742, + "learning_rate": 1e-06, + "loss": 0.5463, + "num_input_tokens_seen": 278210972, + "step": 4966 + }, + { + "epoch": 11.060133630289533, + "loss": 0.5516934394836426, + "loss_ce": 0.00017979381664190441, + "loss_iou": 0.2294921875, + "loss_num": 0.018310546875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 278210972, + "step": 4966 + }, + { + "epoch": 11.062360801781738, + "grad_norm": 14.845329284667969, + "learning_rate": 1e-06, + "loss": 0.3656, + "num_input_tokens_seen": 278266736, + "step": 4967 + }, + { + "epoch": 11.062360801781738, + "loss": 0.3910008668899536, + "loss_ce": 0.00019277536193840206, + "loss_iou": 0.1767578125, + "loss_num": 0.0074462890625, + "loss_xval": 0.390625, + "num_input_tokens_seen": 278266736, + "step": 4967 + }, + { + "epoch": 11.064587973273943, + "grad_norm": 16.45245361328125, + "learning_rate": 1e-06, + "loss": 0.3967, + "num_input_tokens_seen": 278321340, + "step": 4968 + }, + { + "epoch": 11.064587973273943, + "loss": 0.4062022566795349, + "loss_ce": 0.00013537969789467752, + "loss_iou": 0.177734375, + "loss_num": 0.00994873046875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 278321340, + "step": 4968 + }, + { + "epoch": 11.066815144766148, + "grad_norm": 13.446118354797363, + "learning_rate": 1e-06, + "loss": 0.4845, + "num_input_tokens_seen": 278377700, + "step": 4969 + }, + { + "epoch": 11.066815144766148, + "loss": 0.5192990303039551, + "loss_ce": 0.00013396976282820106, + "loss_iou": 0.2255859375, + "loss_num": 0.01361083984375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 278377700, + "step": 4969 + }, + { + "epoch": 11.069042316258352, + "grad_norm": 31.59632682800293, + "learning_rate": 1e-06, + "loss": 0.5488, + "num_input_tokens_seen": 278431796, + "step": 4970 + }, + { + "epoch": 11.069042316258352, + "loss": 0.5724695920944214, + "loss_ce": 0.0001734097022563219, + "loss_iou": 0.234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 278431796, + "step": 4970 + }, + { + "epoch": 11.071269487750557, + "grad_norm": 21.374279022216797, + "learning_rate": 1e-06, + "loss": 0.5448, + "num_input_tokens_seen": 278486652, + "step": 4971 + }, + { + "epoch": 11.071269487750557, + "loss": 0.5677635073661804, + "loss_ce": 0.00013656642113346606, + "loss_iou": 0.2431640625, + "loss_num": 0.0162353515625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 278486652, + "step": 4971 + }, + { + "epoch": 11.073496659242762, + "grad_norm": 62.18361282348633, + "learning_rate": 1e-06, + "loss": 0.696, + "num_input_tokens_seen": 278543052, + "step": 4972 + }, + { + "epoch": 11.073496659242762, + "loss": 0.5261435508728027, + "loss_ce": 0.00014258406008593738, + "loss_iou": 0.232421875, + "loss_num": 0.01220703125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 278543052, + "step": 4972 + }, + { + "epoch": 11.075723830734967, + "grad_norm": 16.852285385131836, + "learning_rate": 1e-06, + "loss": 0.7198, + "num_input_tokens_seen": 278598500, + "step": 4973 + }, + { + "epoch": 11.075723830734967, + "loss": 0.7613972425460815, + "loss_ce": 0.00016673312347847968, + "loss_iou": 0.333984375, + "loss_num": 0.0189208984375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 278598500, + "step": 4973 + }, + { + "epoch": 11.077951002227172, + "grad_norm": 26.721847534179688, + "learning_rate": 1e-06, + "loss": 0.5459, + "num_input_tokens_seen": 278652932, + "step": 4974 + }, + { + "epoch": 11.077951002227172, + "loss": 0.6054716110229492, + "loss_ce": 0.0001249103806912899, + "loss_iou": 0.2314453125, + "loss_num": 0.0289306640625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 278652932, + "step": 4974 + }, + { + "epoch": 11.080178173719377, + "grad_norm": 28.737173080444336, + "learning_rate": 1e-06, + "loss": 0.5445, + "num_input_tokens_seen": 278709224, + "step": 4975 + }, + { + "epoch": 11.080178173719377, + "loss": 0.5305880904197693, + "loss_ce": 0.00013153886538930237, + "loss_iou": 0.208984375, + "loss_num": 0.0224609375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 278709224, + "step": 4975 + }, + { + "epoch": 11.082405345211582, + "grad_norm": 22.68699836730957, + "learning_rate": 1e-06, + "loss": 0.4661, + "num_input_tokens_seen": 278761808, + "step": 4976 + }, + { + "epoch": 11.082405345211582, + "loss": 0.41276633739471436, + "loss_ce": 0.00016868110105860978, + "loss_iou": 0.181640625, + "loss_num": 0.0096435546875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 278761808, + "step": 4976 + }, + { + "epoch": 11.084632516703786, + "grad_norm": 23.453115463256836, + "learning_rate": 1e-06, + "loss": 0.6005, + "num_input_tokens_seen": 278817304, + "step": 4977 + }, + { + "epoch": 11.084632516703786, + "loss": 0.5315991044044495, + "loss_ce": 0.00010497516632312909, + "loss_iou": 0.220703125, + "loss_num": 0.01806640625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 278817304, + "step": 4977 + }, + { + "epoch": 11.086859688195991, + "grad_norm": 22.387310028076172, + "learning_rate": 1e-06, + "loss": 0.5064, + "num_input_tokens_seen": 278873696, + "step": 4978 + }, + { + "epoch": 11.086859688195991, + "loss": 0.41907966136932373, + "loss_ce": 0.00013435332220979035, + "loss_iou": 0.1748046875, + "loss_num": 0.01385498046875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 278873696, + "step": 4978 + }, + { + "epoch": 11.089086859688196, + "grad_norm": 15.790838241577148, + "learning_rate": 1e-06, + "loss": 0.596, + "num_input_tokens_seen": 278931180, + "step": 4979 + }, + { + "epoch": 11.089086859688196, + "loss": 0.5526787042617798, + "loss_ce": 0.00012747167784254998, + "loss_iou": 0.248046875, + "loss_num": 0.011474609375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 278931180, + "step": 4979 + }, + { + "epoch": 11.091314031180401, + "grad_norm": 15.259247779846191, + "learning_rate": 1e-06, + "loss": 0.6733, + "num_input_tokens_seen": 278988536, + "step": 4980 + }, + { + "epoch": 11.091314031180401, + "loss": 0.7944159507751465, + "loss_ce": 0.0004706614126916975, + "loss_iou": 0.341796875, + "loss_num": 0.0218505859375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 278988536, + "step": 4980 + }, + { + "epoch": 11.093541202672606, + "grad_norm": 22.694530487060547, + "learning_rate": 1e-06, + "loss": 0.5318, + "num_input_tokens_seen": 279045060, + "step": 4981 + }, + { + "epoch": 11.093541202672606, + "loss": 0.5756011605262756, + "loss_ce": 0.0001617103407625109, + "loss_iou": 0.2490234375, + "loss_num": 0.015380859375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 279045060, + "step": 4981 + }, + { + "epoch": 11.09576837416481, + "grad_norm": 20.57561492919922, + "learning_rate": 1e-06, + "loss": 0.5533, + "num_input_tokens_seen": 279101812, + "step": 4982 + }, + { + "epoch": 11.09576837416481, + "loss": 0.5785654783248901, + "loss_ce": 0.00019637157674878836, + "loss_iou": 0.2392578125, + "loss_num": 0.02001953125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 279101812, + "step": 4982 + }, + { + "epoch": 11.097995545657016, + "grad_norm": 20.943668365478516, + "learning_rate": 1e-06, + "loss": 0.4383, + "num_input_tokens_seen": 279156604, + "step": 4983 + }, + { + "epoch": 11.097995545657016, + "loss": 0.4315411448478699, + "loss_ce": 0.0002667338994797319, + "loss_iou": 0.1787109375, + "loss_num": 0.0145263671875, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 279156604, + "step": 4983 + }, + { + "epoch": 11.10022271714922, + "grad_norm": 25.43724822998047, + "learning_rate": 1e-06, + "loss": 0.4991, + "num_input_tokens_seen": 279212128, + "step": 4984 + }, + { + "epoch": 11.10022271714922, + "loss": 0.560702919960022, + "loss_ce": 0.00015607105160597712, + "loss_iou": 0.248046875, + "loss_num": 0.012939453125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 279212128, + "step": 4984 + }, + { + "epoch": 11.102449888641425, + "grad_norm": 30.57325553894043, + "learning_rate": 1e-06, + "loss": 0.6142, + "num_input_tokens_seen": 279267056, + "step": 4985 + }, + { + "epoch": 11.102449888641425, + "loss": 0.5670490860939026, + "loss_ce": 0.00015456389519385993, + "loss_iou": 0.2451171875, + "loss_num": 0.0152587890625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 279267056, + "step": 4985 + }, + { + "epoch": 11.10467706013363, + "grad_norm": 21.70029067993164, + "learning_rate": 1e-06, + "loss": 0.4192, + "num_input_tokens_seen": 279322728, + "step": 4986 + }, + { + "epoch": 11.10467706013363, + "loss": 0.4730498492717743, + "loss_ce": 0.00014946176088415086, + "loss_iou": 0.1982421875, + "loss_num": 0.015380859375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 279322728, + "step": 4986 + }, + { + "epoch": 11.106904231625835, + "grad_norm": 17.88077735900879, + "learning_rate": 1e-06, + "loss": 0.3673, + "num_input_tokens_seen": 279379872, + "step": 4987 + }, + { + "epoch": 11.106904231625835, + "loss": 0.3716070055961609, + "loss_ce": 0.00014704751083627343, + "loss_iou": 0.169921875, + "loss_num": 0.006561279296875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 279379872, + "step": 4987 + }, + { + "epoch": 11.10913140311804, + "grad_norm": 21.7382755279541, + "learning_rate": 1e-06, + "loss": 0.609, + "num_input_tokens_seen": 279435852, + "step": 4988 + }, + { + "epoch": 11.10913140311804, + "loss": 0.6673039197921753, + "loss_ce": 0.00018964617629535496, + "loss_iou": 0.265625, + "loss_num": 0.0272216796875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 279435852, + "step": 4988 + }, + { + "epoch": 11.111358574610245, + "grad_norm": 13.589532852172852, + "learning_rate": 1e-06, + "loss": 0.4054, + "num_input_tokens_seen": 279492372, + "step": 4989 + }, + { + "epoch": 11.111358574610245, + "loss": 0.5108894109725952, + "loss_ce": 0.00014718393504153937, + "loss_iou": 0.2255859375, + "loss_num": 0.01190185546875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 279492372, + "step": 4989 + }, + { + "epoch": 11.11358574610245, + "grad_norm": 30.877511978149414, + "learning_rate": 1e-06, + "loss": 0.4336, + "num_input_tokens_seen": 279548732, + "step": 4990 + }, + { + "epoch": 11.11358574610245, + "loss": 0.36500683426856995, + "loss_ce": 0.0001386810909025371, + "loss_iou": 0.1650390625, + "loss_num": 0.007049560546875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 279548732, + "step": 4990 + }, + { + "epoch": 11.115812917594655, + "grad_norm": 20.038894653320312, + "learning_rate": 1e-06, + "loss": 0.5222, + "num_input_tokens_seen": 279605916, + "step": 4991 + }, + { + "epoch": 11.115812917594655, + "loss": 0.6044121980667114, + "loss_ce": 0.00016413633420597762, + "loss_iou": 0.267578125, + "loss_num": 0.0140380859375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 279605916, + "step": 4991 + }, + { + "epoch": 11.11804008908686, + "grad_norm": 23.694477081298828, + "learning_rate": 1e-06, + "loss": 0.521, + "num_input_tokens_seen": 279659500, + "step": 4992 + }, + { + "epoch": 11.11804008908686, + "loss": 0.4539830684661865, + "loss_ce": 0.00012564370990730822, + "loss_iou": 0.201171875, + "loss_num": 0.010498046875, + "loss_xval": 0.453125, + "num_input_tokens_seen": 279659500, + "step": 4992 + }, + { + "epoch": 11.120267260579064, + "grad_norm": 16.13028907775879, + "learning_rate": 1e-06, + "loss": 0.5406, + "num_input_tokens_seen": 279716076, + "step": 4993 + }, + { + "epoch": 11.120267260579064, + "loss": 0.4950714707374573, + "loss_ce": 0.00013739880523644388, + "loss_iou": 0.2158203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 279716076, + "step": 4993 + }, + { + "epoch": 11.122494432071269, + "grad_norm": 25.81940460205078, + "learning_rate": 1e-06, + "loss": 0.7181, + "num_input_tokens_seen": 279772812, + "step": 4994 + }, + { + "epoch": 11.122494432071269, + "loss": 0.6644489765167236, + "loss_ce": 0.00014238519361242652, + "loss_iou": 0.283203125, + "loss_num": 0.02001953125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 279772812, + "step": 4994 + }, + { + "epoch": 11.124721603563474, + "grad_norm": 21.302309036254883, + "learning_rate": 1e-06, + "loss": 0.5902, + "num_input_tokens_seen": 279828292, + "step": 4995 + }, + { + "epoch": 11.124721603563474, + "loss": 0.5325771570205688, + "loss_ce": 0.00022852000256534666, + "loss_iou": 0.240234375, + "loss_num": 0.0106201171875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 279828292, + "step": 4995 + }, + { + "epoch": 11.126948775055679, + "grad_norm": 16.87586212158203, + "learning_rate": 1e-06, + "loss": 0.472, + "num_input_tokens_seen": 279885472, + "step": 4996 + }, + { + "epoch": 11.126948775055679, + "loss": 0.3971105217933655, + "loss_ce": 0.0001378602028125897, + "loss_iou": 0.166015625, + "loss_num": 0.01312255859375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 279885472, + "step": 4996 + }, + { + "epoch": 11.129175946547884, + "grad_norm": 20.097671508789062, + "learning_rate": 1e-06, + "loss": 0.6366, + "num_input_tokens_seen": 279938956, + "step": 4997 + }, + { + "epoch": 11.129175946547884, + "loss": 0.634530246257782, + "loss_ce": 0.00013083560043014586, + "loss_iou": 0.271484375, + "loss_num": 0.01806640625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 279938956, + "step": 4997 + }, + { + "epoch": 11.131403118040089, + "grad_norm": 16.503360748291016, + "learning_rate": 1e-06, + "loss": 0.5446, + "num_input_tokens_seen": 279994336, + "step": 4998 + }, + { + "epoch": 11.131403118040089, + "loss": 0.38283979892730713, + "loss_ce": 0.0001493898016633466, + "loss_iou": 0.16015625, + "loss_num": 0.0123291015625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 279994336, + "step": 4998 + }, + { + "epoch": 11.133630289532293, + "grad_norm": 13.981780052185059, + "learning_rate": 1e-06, + "loss": 0.606, + "num_input_tokens_seen": 280048108, + "step": 4999 + }, + { + "epoch": 11.133630289532293, + "loss": 0.5781067609786987, + "loss_ce": 0.00010381722677266225, + "loss_iou": 0.205078125, + "loss_num": 0.03369140625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 280048108, + "step": 4999 + }, + { + "epoch": 11.135857461024498, + "grad_norm": 16.743953704833984, + "learning_rate": 1e-06, + "loss": 0.4433, + "num_input_tokens_seen": 280103384, + "step": 5000 + }, + { + "epoch": 11.135857461024498, + "eval_seeclick_web_CIoU": 0.5857278108596802, + "eval_seeclick_web_GIoU": 0.5834327638149261, + "eval_seeclick_web_IoU": 0.6043886542320251, + "eval_seeclick_web_MAE_all": 0.015452081337571144, + "eval_seeclick_web_MAE_h": 0.0073387217707931995, + "eval_seeclick_web_MAE_w": 0.015388870611786842, + "eval_seeclick_web_MAE_x_boxes": 0.009706755401566625, + "eval_seeclick_web_MAE_y_boxes": 0.021358829457312822, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9223926067352295, + "eval_seeclick_web_loss_ce": 0.0001983401962206699, + "eval_seeclick_web_loss_iou": 0.4200439453125, + "eval_seeclick_web_loss_num": 0.01239013671875, + "eval_seeclick_web_loss_xval": 0.90185546875, + "eval_seeclick_web_runtime": 21.9551, + "eval_seeclick_web_samples_per_second": 2.277, + "eval_seeclick_web_steps_per_second": 0.091, + "num_input_tokens_seen": 280103384, + "step": 5000 + }, + { + "epoch": 11.135857461024498, + "eval_icons_CIoU": 0.2814893424510956, + "eval_icons_GIoU": 0.3012331575155258, + "eval_icons_IoU": 0.3595561385154724, + "eval_icons_MAE_all": 0.059540608897805214, + "eval_icons_MAE_h": 0.03891334868967533, + "eval_icons_MAE_w": 0.0580837675370276, + "eval_icons_MAE_x_boxes": 0.05784302018582821, + "eval_icons_MAE_y_boxes": 0.038528745993971825, + "eval_icons_inside_bbox": 0.6059027910232544, + "eval_icons_loss": 1.7233656644821167, + "eval_icons_loss_ce": 0.00022988053387962282, + "eval_icons_loss_iou": 0.678466796875, + "eval_icons_loss_num": 0.058696746826171875, + "eval_icons_loss_xval": 1.6513671875, + "eval_icons_runtime": 20.195, + "eval_icons_samples_per_second": 2.476, + "eval_icons_steps_per_second": 0.099, + "num_input_tokens_seen": 280103384, + "step": 5000 + }, + { + "epoch": 11.135857461024498, + "eval_screenspot_CIoU": 0.3535158932209015, + "eval_screenspot_GIoU": 0.36772539218266803, + "eval_screenspot_IoU": 0.43389413754145306, + "eval_screenspot_MAE_all": 0.05942438915371895, + "eval_screenspot_MAE_h": 0.03856873946885268, + "eval_screenspot_MAE_w": 0.0688897892832756, + "eval_screenspot_MAE_x_boxes": 0.07383330973486106, + "eval_screenspot_MAE_y_boxes": 0.03976897584895293, + "eval_screenspot_inside_bbox": 0.6862499912579855, + "eval_screenspot_loss": 1.6220872402191162, + "eval_screenspot_loss_ce": 0.000261851722219338, + "eval_screenspot_loss_iou": 0.67236328125, + "eval_screenspot_loss_num": 0.06821314493815105, + "eval_screenspot_loss_xval": 1.6868489583333333, + "eval_screenspot_runtime": 33.7501, + "eval_screenspot_samples_per_second": 2.637, + "eval_screenspot_steps_per_second": 0.089, + "num_input_tokens_seen": 280103384, + "step": 5000 + }, + { + "epoch": 11.135857461024498, + "eval_compot_CIoU": 0.3433762341737747, + "eval_compot_GIoU": 0.35395348072052, + "eval_compot_IoU": 0.4032938480377197, + "eval_compot_MAE_all": 0.01813736092299223, + "eval_compot_MAE_h": 0.009242744650691748, + "eval_compot_MAE_w": 0.02075517177581787, + "eval_compot_MAE_x_boxes": 0.030595741234719753, + "eval_compot_MAE_y_boxes": 0.007154007675126195, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3828521966934204, + "eval_compot_loss_ce": 0.000194034306332469, + "eval_compot_loss_iou": 0.6356201171875, + "eval_compot_loss_num": 0.016706466674804688, + "eval_compot_loss_xval": 1.35498046875, + "eval_compot_runtime": 22.2096, + "eval_compot_samples_per_second": 2.251, + "eval_compot_steps_per_second": 0.09, + "num_input_tokens_seen": 280103384, + "step": 5000 + }, + { + "epoch": 11.135857461024498, + "eval_custom_ui_val_CIoU": 0.46921708765957093, + "eval_custom_ui_val_GIoU": 0.48126794232262504, + "eval_custom_ui_val_IoU": 0.5315591428014967, + "eval_custom_ui_val_MAE_all": 0.030911025901635487, + "eval_custom_ui_val_MAE_h": 0.016821589320898056, + "eval_custom_ui_val_MAE_w": 0.0393333797239595, + "eval_custom_ui_val_MAE_x_boxes": 0.03895654301676485, + "eval_custom_ui_val_MAE_y_boxes": 0.015129889025249414, + "eval_custom_ui_val_inside_bbox": 0.7527006202273898, + "eval_custom_ui_val_loss": 1.2006866931915283, + "eval_custom_ui_val_loss_ce": 0.00023336601023200073, + "eval_custom_ui_val_loss_iou": 0.5094129774305556, + "eval_custom_ui_val_loss_num": 0.028292549981011286, + "eval_custom_ui_val_loss_xval": 1.1603190104166667, + "eval_custom_ui_val_runtime": 65.5609, + "eval_custom_ui_val_samples_per_second": 4.042, + "eval_custom_ui_val_steps_per_second": 0.137, + "num_input_tokens_seen": 280103384, + "step": 5000 + }, + { + "epoch": 11.135857461024498, + "loss": 0.8996152877807617, + "loss_ce": 0.00020119547843933105, + "loss_iou": 0.38671875, + "loss_num": 0.02490234375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 280103384, + "step": 5000 + }, + { + "epoch": 11.138084632516703, + "grad_norm": 19.082584381103516, + "learning_rate": 1e-06, + "loss": 0.4929, + "num_input_tokens_seen": 280158504, + "step": 5001 + }, + { + "epoch": 11.138084632516703, + "loss": 0.39870956540107727, + "loss_ce": 0.00015000064740888774, + "loss_iou": 0.1728515625, + "loss_num": 0.010498046875, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 280158504, + "step": 5001 + }, + { + "epoch": 11.140311804008908, + "grad_norm": 26.122913360595703, + "learning_rate": 1e-06, + "loss": 0.5198, + "num_input_tokens_seen": 280214744, + "step": 5002 + }, + { + "epoch": 11.140311804008908, + "loss": 0.45124655961990356, + "loss_ce": 0.00013570950250141323, + "loss_iou": 0.19921875, + "loss_num": 0.0107421875, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 280214744, + "step": 5002 + }, + { + "epoch": 11.142538975501113, + "grad_norm": 27.3781681060791, + "learning_rate": 1e-06, + "loss": 0.4616, + "num_input_tokens_seen": 280271032, + "step": 5003 + }, + { + "epoch": 11.142538975501113, + "loss": 0.32605159282684326, + "loss_ce": 0.00012386470916680992, + "loss_iou": 0.125, + "loss_num": 0.0150146484375, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 280271032, + "step": 5003 + }, + { + "epoch": 11.144766146993318, + "grad_norm": 19.879722595214844, + "learning_rate": 1e-06, + "loss": 0.531, + "num_input_tokens_seen": 280327288, + "step": 5004 + }, + { + "epoch": 11.144766146993318, + "loss": 0.43859922885894775, + "loss_ce": 0.00012264544784557074, + "loss_iou": 0.1982421875, + "loss_num": 0.00823974609375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 280327288, + "step": 5004 + }, + { + "epoch": 11.146993318485523, + "grad_norm": 22.407188415527344, + "learning_rate": 1e-06, + "loss": 0.7016, + "num_input_tokens_seen": 280383788, + "step": 5005 + }, + { + "epoch": 11.146993318485523, + "loss": 0.49451902508735657, + "loss_ce": 0.00013426925579551607, + "loss_iou": 0.203125, + "loss_num": 0.0177001953125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 280383788, + "step": 5005 + }, + { + "epoch": 11.14922048997773, + "grad_norm": 17.652860641479492, + "learning_rate": 1e-06, + "loss": 0.4529, + "num_input_tokens_seen": 280438888, + "step": 5006 + }, + { + "epoch": 11.14922048997773, + "loss": 0.38709527254104614, + "loss_ce": 0.00013238785322755575, + "loss_iou": 0.1796875, + "loss_num": 0.00537109375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 280438888, + "step": 5006 + }, + { + "epoch": 11.151447661469934, + "grad_norm": 28.968141555786133, + "learning_rate": 1e-06, + "loss": 0.561, + "num_input_tokens_seen": 280494244, + "step": 5007 + }, + { + "epoch": 11.151447661469934, + "loss": 0.5118998289108276, + "loss_ce": 0.0005473287310451269, + "loss_iou": 0.2158203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 280494244, + "step": 5007 + }, + { + "epoch": 11.153674832962139, + "grad_norm": 22.353212356567383, + "learning_rate": 1e-06, + "loss": 0.5322, + "num_input_tokens_seen": 280547136, + "step": 5008 + }, + { + "epoch": 11.153674832962139, + "loss": 0.6800668239593506, + "loss_ce": 0.00013513598241843283, + "loss_iou": 0.30078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 280547136, + "step": 5008 + }, + { + "epoch": 11.155902004454344, + "grad_norm": 16.07948875427246, + "learning_rate": 1e-06, + "loss": 0.6479, + "num_input_tokens_seen": 280605596, + "step": 5009 + }, + { + "epoch": 11.155902004454344, + "loss": 0.7251046895980835, + "loss_ce": 0.00012912959209643304, + "loss_iou": 0.2890625, + "loss_num": 0.029296875, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 280605596, + "step": 5009 + }, + { + "epoch": 11.158129175946549, + "grad_norm": 21.49631690979004, + "learning_rate": 1e-06, + "loss": 0.411, + "num_input_tokens_seen": 280662056, + "step": 5010 + }, + { + "epoch": 11.158129175946549, + "loss": 0.46879857778549194, + "loss_ce": 0.000170662795426324, + "loss_iou": 0.2138671875, + "loss_num": 0.00836181640625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 280662056, + "step": 5010 + }, + { + "epoch": 11.160356347438753, + "grad_norm": 25.566015243530273, + "learning_rate": 1e-06, + "loss": 0.4451, + "num_input_tokens_seen": 280718144, + "step": 5011 + }, + { + "epoch": 11.160356347438753, + "loss": 0.3402805030345917, + "loss_ce": 0.00019259938562754542, + "loss_iou": 0.1533203125, + "loss_num": 0.00677490234375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 280718144, + "step": 5011 + }, + { + "epoch": 11.162583518930958, + "grad_norm": 19.456497192382812, + "learning_rate": 1e-06, + "loss": 0.5608, + "num_input_tokens_seen": 280769956, + "step": 5012 + }, + { + "epoch": 11.162583518930958, + "loss": 0.5276530981063843, + "loss_ce": 0.00018727785209193826, + "loss_iou": 0.228515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 280769956, + "step": 5012 + }, + { + "epoch": 11.164810690423163, + "grad_norm": 19.570919036865234, + "learning_rate": 1e-06, + "loss": 0.4798, + "num_input_tokens_seen": 280824936, + "step": 5013 + }, + { + "epoch": 11.164810690423163, + "loss": 0.3978237807750702, + "loss_ce": 0.00011871426249854267, + "loss_iou": 0.15625, + "loss_num": 0.016845703125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 280824936, + "step": 5013 + }, + { + "epoch": 11.167037861915368, + "grad_norm": 26.35681915283203, + "learning_rate": 1e-06, + "loss": 0.5786, + "num_input_tokens_seen": 280878960, + "step": 5014 + }, + { + "epoch": 11.167037861915368, + "loss": 0.6183251142501831, + "loss_ce": 0.00016105023678392172, + "loss_iou": 0.255859375, + "loss_num": 0.0213623046875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 280878960, + "step": 5014 + }, + { + "epoch": 11.169265033407573, + "grad_norm": 20.287382125854492, + "learning_rate": 1e-06, + "loss": 0.5377, + "num_input_tokens_seen": 280934428, + "step": 5015 + }, + { + "epoch": 11.169265033407573, + "loss": 0.5026686191558838, + "loss_ce": 0.00010515956091694534, + "loss_iou": 0.2060546875, + "loss_num": 0.01806640625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 280934428, + "step": 5015 + }, + { + "epoch": 11.171492204899778, + "grad_norm": 27.965999603271484, + "learning_rate": 1e-06, + "loss": 0.514, + "num_input_tokens_seen": 280992196, + "step": 5016 + }, + { + "epoch": 11.171492204899778, + "loss": 0.521459698677063, + "loss_ce": 0.00021950851078145206, + "loss_iou": 0.2216796875, + "loss_num": 0.015625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 280992196, + "step": 5016 + }, + { + "epoch": 11.173719376391983, + "grad_norm": 16.570053100585938, + "learning_rate": 1e-06, + "loss": 0.4161, + "num_input_tokens_seen": 281050828, + "step": 5017 + }, + { + "epoch": 11.173719376391983, + "loss": 0.4344647526741028, + "loss_ce": 0.00013857701560482383, + "loss_iou": 0.1796875, + "loss_num": 0.01513671875, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 281050828, + "step": 5017 + }, + { + "epoch": 11.175946547884188, + "grad_norm": 16.34454345703125, + "learning_rate": 1e-06, + "loss": 0.6104, + "num_input_tokens_seen": 281106284, + "step": 5018 + }, + { + "epoch": 11.175946547884188, + "loss": 0.7851086854934692, + "loss_ce": 0.00019656957010738552, + "loss_iou": 0.361328125, + "loss_num": 0.01275634765625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 281106284, + "step": 5018 + }, + { + "epoch": 11.178173719376392, + "grad_norm": 15.942395210266113, + "learning_rate": 1e-06, + "loss": 0.4458, + "num_input_tokens_seen": 281164500, + "step": 5019 + }, + { + "epoch": 11.178173719376392, + "loss": 0.3177483081817627, + "loss_ce": 0.00012136220175307244, + "loss_iou": 0.140625, + "loss_num": 0.007080078125, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 281164500, + "step": 5019 + }, + { + "epoch": 11.180400890868597, + "grad_norm": 20.594022750854492, + "learning_rate": 1e-06, + "loss": 0.6501, + "num_input_tokens_seen": 281222920, + "step": 5020 + }, + { + "epoch": 11.180400890868597, + "loss": 0.4495760202407837, + "loss_ce": 0.00011313259165035561, + "loss_iou": 0.2041015625, + "loss_num": 0.00836181640625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 281222920, + "step": 5020 + }, + { + "epoch": 11.182628062360802, + "grad_norm": 20.63737678527832, + "learning_rate": 1e-06, + "loss": 0.7283, + "num_input_tokens_seen": 281280520, + "step": 5021 + }, + { + "epoch": 11.182628062360802, + "loss": 0.8178755640983582, + "loss_ce": 0.00012653997691813856, + "loss_iou": 0.330078125, + "loss_num": 0.03173828125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 281280520, + "step": 5021 + }, + { + "epoch": 11.184855233853007, + "grad_norm": 19.35364532470703, + "learning_rate": 1e-06, + "loss": 0.5065, + "num_input_tokens_seen": 281338688, + "step": 5022 + }, + { + "epoch": 11.184855233853007, + "loss": 0.5651026964187622, + "loss_ce": 0.00016126442642416805, + "loss_iou": 0.244140625, + "loss_num": 0.01519775390625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 281338688, + "step": 5022 + }, + { + "epoch": 11.187082405345212, + "grad_norm": 17.587987899780273, + "learning_rate": 1e-06, + "loss": 0.41, + "num_input_tokens_seen": 281394900, + "step": 5023 + }, + { + "epoch": 11.187082405345212, + "loss": 0.2979922890663147, + "loss_ce": 0.00014070735778659582, + "loss_iou": 0.119140625, + "loss_num": 0.01202392578125, + "loss_xval": 0.296875, + "num_input_tokens_seen": 281394900, + "step": 5023 + }, + { + "epoch": 11.189309576837417, + "grad_norm": 42.784358978271484, + "learning_rate": 1e-06, + "loss": 0.5281, + "num_input_tokens_seen": 281448492, + "step": 5024 + }, + { + "epoch": 11.189309576837417, + "loss": 0.5797094106674194, + "loss_ce": 0.00011953900684602559, + "loss_iou": 0.263671875, + "loss_num": 0.01025390625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 281448492, + "step": 5024 + }, + { + "epoch": 11.191536748329622, + "grad_norm": 19.45455551147461, + "learning_rate": 1e-06, + "loss": 0.5473, + "num_input_tokens_seen": 281505840, + "step": 5025 + }, + { + "epoch": 11.191536748329622, + "loss": 0.4985758662223816, + "loss_ce": 0.00016279635019600391, + "loss_iou": 0.2177734375, + "loss_num": 0.012451171875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 281505840, + "step": 5025 + }, + { + "epoch": 11.193763919821826, + "grad_norm": 14.789341926574707, + "learning_rate": 1e-06, + "loss": 0.428, + "num_input_tokens_seen": 281563968, + "step": 5026 + }, + { + "epoch": 11.193763919821826, + "loss": 0.5229382514953613, + "loss_ce": 0.00023318573948927224, + "loss_iou": 0.2099609375, + "loss_num": 0.0206298828125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 281563968, + "step": 5026 + }, + { + "epoch": 11.195991091314031, + "grad_norm": 24.538976669311523, + "learning_rate": 1e-06, + "loss": 0.5299, + "num_input_tokens_seen": 281618908, + "step": 5027 + }, + { + "epoch": 11.195991091314031, + "loss": 0.4837605655193329, + "loss_ce": 0.0001179840401164256, + "loss_iou": 0.20703125, + "loss_num": 0.01397705078125, + "loss_xval": 0.484375, + "num_input_tokens_seen": 281618908, + "step": 5027 + }, + { + "epoch": 11.198218262806236, + "grad_norm": 16.29832649230957, + "learning_rate": 1e-06, + "loss": 0.4994, + "num_input_tokens_seen": 281674612, + "step": 5028 + }, + { + "epoch": 11.198218262806236, + "loss": 0.5437781810760498, + "loss_ce": 0.00013804963964503258, + "loss_iou": 0.2314453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 281674612, + "step": 5028 + }, + { + "epoch": 11.200445434298441, + "grad_norm": 16.095746994018555, + "learning_rate": 1e-06, + "loss": 0.6377, + "num_input_tokens_seen": 281731544, + "step": 5029 + }, + { + "epoch": 11.200445434298441, + "loss": 0.584113359451294, + "loss_ce": 0.0003731203032657504, + "loss_iou": 0.224609375, + "loss_num": 0.027099609375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 281731544, + "step": 5029 + }, + { + "epoch": 11.202672605790646, + "grad_norm": 18.842683792114258, + "learning_rate": 1e-06, + "loss": 0.3468, + "num_input_tokens_seen": 281788084, + "step": 5030 + }, + { + "epoch": 11.202672605790646, + "loss": 0.38637834787368774, + "loss_ce": 0.00014787877444177866, + "loss_iou": 0.1611328125, + "loss_num": 0.0125732421875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 281788084, + "step": 5030 + }, + { + "epoch": 11.20489977728285, + "grad_norm": 17.219839096069336, + "learning_rate": 1e-06, + "loss": 0.655, + "num_input_tokens_seen": 281846932, + "step": 5031 + }, + { + "epoch": 11.20489977728285, + "loss": 0.9407340884208679, + "loss_ce": 0.00018237490439787507, + "loss_iou": 0.36328125, + "loss_num": 0.042236328125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 281846932, + "step": 5031 + }, + { + "epoch": 11.207126948775056, + "grad_norm": 22.804384231567383, + "learning_rate": 1e-06, + "loss": 0.452, + "num_input_tokens_seen": 281905708, + "step": 5032 + }, + { + "epoch": 11.207126948775056, + "loss": 0.5562899112701416, + "loss_ce": 0.00013752697850577533, + "loss_iou": 0.255859375, + "loss_num": 0.00860595703125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 281905708, + "step": 5032 + }, + { + "epoch": 11.20935412026726, + "grad_norm": 23.392213821411133, + "learning_rate": 1e-06, + "loss": 0.5097, + "num_input_tokens_seen": 281963088, + "step": 5033 + }, + { + "epoch": 11.20935412026726, + "loss": 0.4477725028991699, + "loss_ce": 0.00014065910363569856, + "loss_iou": 0.193359375, + "loss_num": 0.01220703125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 281963088, + "step": 5033 + }, + { + "epoch": 11.211581291759465, + "grad_norm": 26.24521255493164, + "learning_rate": 1e-06, + "loss": 0.673, + "num_input_tokens_seen": 282019504, + "step": 5034 + }, + { + "epoch": 11.211581291759465, + "loss": 0.5856134295463562, + "loss_ce": 0.00016419796156696975, + "loss_iou": 0.251953125, + "loss_num": 0.0166015625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 282019504, + "step": 5034 + }, + { + "epoch": 11.21380846325167, + "grad_norm": 13.377967834472656, + "learning_rate": 1e-06, + "loss": 0.5941, + "num_input_tokens_seen": 282077584, + "step": 5035 + }, + { + "epoch": 11.21380846325167, + "loss": 0.5731761455535889, + "loss_ce": 0.00017806813411880285, + "loss_iou": 0.263671875, + "loss_num": 0.0091552734375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 282077584, + "step": 5035 + }, + { + "epoch": 11.216035634743875, + "grad_norm": 23.46733283996582, + "learning_rate": 1e-06, + "loss": 0.555, + "num_input_tokens_seen": 282135040, + "step": 5036 + }, + { + "epoch": 11.216035634743875, + "loss": 0.5774731040000916, + "loss_ce": 0.00020261471217963845, + "loss_iou": 0.251953125, + "loss_num": 0.014892578125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 282135040, + "step": 5036 + }, + { + "epoch": 11.21826280623608, + "grad_norm": 18.02016258239746, + "learning_rate": 1e-06, + "loss": 0.5232, + "num_input_tokens_seen": 282192724, + "step": 5037 + }, + { + "epoch": 11.21826280623608, + "loss": 0.5690996050834656, + "loss_ce": 0.00012990040704607964, + "loss_iou": 0.2158203125, + "loss_num": 0.027587890625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 282192724, + "step": 5037 + }, + { + "epoch": 11.220489977728285, + "grad_norm": 17.373300552368164, + "learning_rate": 1e-06, + "loss": 0.4308, + "num_input_tokens_seen": 282245988, + "step": 5038 + }, + { + "epoch": 11.220489977728285, + "loss": 0.4509298801422119, + "loss_ce": 0.00012421750579960644, + "loss_iou": 0.193359375, + "loss_num": 0.0126953125, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 282245988, + "step": 5038 + }, + { + "epoch": 11.22271714922049, + "grad_norm": 16.49465560913086, + "learning_rate": 1e-06, + "loss": 0.6109, + "num_input_tokens_seen": 282301460, + "step": 5039 + }, + { + "epoch": 11.22271714922049, + "loss": 0.5691109895706177, + "loss_ce": 0.00014124812150839716, + "loss_iou": 0.2578125, + "loss_num": 0.010498046875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 282301460, + "step": 5039 + }, + { + "epoch": 11.224944320712694, + "grad_norm": 24.778156280517578, + "learning_rate": 1e-06, + "loss": 0.5473, + "num_input_tokens_seen": 282356420, + "step": 5040 + }, + { + "epoch": 11.224944320712694, + "loss": 0.5384612679481506, + "loss_ce": 0.0001312288804911077, + "loss_iou": 0.23828125, + "loss_num": 0.01220703125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 282356420, + "step": 5040 + }, + { + "epoch": 11.2271714922049, + "grad_norm": 17.579057693481445, + "learning_rate": 1e-06, + "loss": 0.5956, + "num_input_tokens_seen": 282411144, + "step": 5041 + }, + { + "epoch": 11.2271714922049, + "loss": 0.5568938851356506, + "loss_ce": 0.00013120746007189155, + "loss_iou": 0.236328125, + "loss_num": 0.0167236328125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 282411144, + "step": 5041 + }, + { + "epoch": 11.229398663697104, + "grad_norm": 20.767990112304688, + "learning_rate": 1e-06, + "loss": 0.5745, + "num_input_tokens_seen": 282466800, + "step": 5042 + }, + { + "epoch": 11.229398663697104, + "loss": 0.35937535762786865, + "loss_ce": 0.00012243175297044218, + "loss_iou": 0.162109375, + "loss_num": 0.006927490234375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 282466800, + "step": 5042 + }, + { + "epoch": 11.231625835189309, + "grad_norm": 18.92509651184082, + "learning_rate": 1e-06, + "loss": 0.4772, + "num_input_tokens_seen": 282524332, + "step": 5043 + }, + { + "epoch": 11.231625835189309, + "loss": 0.47509121894836426, + "loss_ce": 0.00011561952851479873, + "loss_iou": 0.22265625, + "loss_num": 0.006011962890625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 282524332, + "step": 5043 + }, + { + "epoch": 11.233853006681514, + "grad_norm": 22.10957908630371, + "learning_rate": 1e-06, + "loss": 0.5585, + "num_input_tokens_seen": 282580732, + "step": 5044 + }, + { + "epoch": 11.233853006681514, + "loss": 0.7198188304901123, + "loss_ce": 0.00033639208413660526, + "loss_iou": 0.306640625, + "loss_num": 0.0213623046875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 282580732, + "step": 5044 + }, + { + "epoch": 11.236080178173719, + "grad_norm": 20.335216522216797, + "learning_rate": 1e-06, + "loss": 0.3889, + "num_input_tokens_seen": 282638292, + "step": 5045 + }, + { + "epoch": 11.236080178173719, + "loss": 0.44411370158195496, + "loss_ce": 0.00014397443737834692, + "loss_iou": 0.2041015625, + "loss_num": 0.00701904296875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 282638292, + "step": 5045 + }, + { + "epoch": 11.238307349665924, + "grad_norm": 17.235029220581055, + "learning_rate": 1e-06, + "loss": 0.5289, + "num_input_tokens_seen": 282695300, + "step": 5046 + }, + { + "epoch": 11.238307349665924, + "loss": 0.5496923923492432, + "loss_ce": 0.00013185839634388685, + "loss_iou": 0.23828125, + "loss_num": 0.014404296875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 282695300, + "step": 5046 + }, + { + "epoch": 11.240534521158128, + "grad_norm": 15.562467575073242, + "learning_rate": 1e-06, + "loss": 0.7311, + "num_input_tokens_seen": 282751756, + "step": 5047 + }, + { + "epoch": 11.240534521158128, + "loss": 0.8263590335845947, + "loss_ce": 0.00018715820624493062, + "loss_iou": 0.33984375, + "loss_num": 0.029052734375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 282751756, + "step": 5047 + }, + { + "epoch": 11.242761692650333, + "grad_norm": 12.08515453338623, + "learning_rate": 1e-06, + "loss": 0.5536, + "num_input_tokens_seen": 282807936, + "step": 5048 + }, + { + "epoch": 11.242761692650333, + "loss": 0.4205526113510132, + "loss_ce": 0.00014246124192140996, + "loss_iou": 0.1904296875, + "loss_num": 0.0078125, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 282807936, + "step": 5048 + }, + { + "epoch": 11.244988864142538, + "grad_norm": 15.926555633544922, + "learning_rate": 1e-06, + "loss": 0.5036, + "num_input_tokens_seen": 282863292, + "step": 5049 + }, + { + "epoch": 11.244988864142538, + "loss": 0.6954518556594849, + "loss_ce": 0.00013932373258285224, + "loss_iou": 0.298828125, + "loss_num": 0.0194091796875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 282863292, + "step": 5049 + }, + { + "epoch": 11.247216035634743, + "grad_norm": 41.89063262939453, + "learning_rate": 1e-06, + "loss": 0.405, + "num_input_tokens_seen": 282917964, + "step": 5050 + }, + { + "epoch": 11.247216035634743, + "loss": 0.4797305464744568, + "loss_ce": 0.00011627860658336431, + "loss_iou": 0.216796875, + "loss_num": 0.00909423828125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 282917964, + "step": 5050 + }, + { + "epoch": 11.249443207126948, + "grad_norm": 20.080503463745117, + "learning_rate": 1e-06, + "loss": 0.5513, + "num_input_tokens_seen": 282975304, + "step": 5051 + }, + { + "epoch": 11.249443207126948, + "loss": 0.5874269604682922, + "loss_ce": 0.0001466784015065059, + "loss_iou": 0.255859375, + "loss_num": 0.0150146484375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 282975304, + "step": 5051 + }, + { + "epoch": 11.251670378619155, + "grad_norm": 18.951017379760742, + "learning_rate": 1e-06, + "loss": 0.4595, + "num_input_tokens_seen": 283031220, + "step": 5052 + }, + { + "epoch": 11.251670378619155, + "loss": 0.43687593936920166, + "loss_ce": 0.00010837505396921188, + "loss_iou": 0.1796875, + "loss_num": 0.01556396484375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 283031220, + "step": 5052 + }, + { + "epoch": 11.25389755011136, + "grad_norm": 20.08397674560547, + "learning_rate": 1e-06, + "loss": 0.5126, + "num_input_tokens_seen": 283088852, + "step": 5053 + }, + { + "epoch": 11.25389755011136, + "loss": 0.47264423966407776, + "loss_ce": 0.00011004768748534843, + "loss_iou": 0.197265625, + "loss_num": 0.015625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 283088852, + "step": 5053 + }, + { + "epoch": 11.256124721603564, + "grad_norm": 23.235828399658203, + "learning_rate": 1e-06, + "loss": 0.6491, + "num_input_tokens_seen": 283146176, + "step": 5054 + }, + { + "epoch": 11.256124721603564, + "loss": 0.5091769695281982, + "loss_ce": 0.000143796467455104, + "loss_iou": 0.21484375, + "loss_num": 0.015869140625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 283146176, + "step": 5054 + }, + { + "epoch": 11.25835189309577, + "grad_norm": 18.110916137695312, + "learning_rate": 1e-06, + "loss": 0.4254, + "num_input_tokens_seen": 283201764, + "step": 5055 + }, + { + "epoch": 11.25835189309577, + "loss": 0.3657214045524597, + "loss_ce": 0.00012084872287232429, + "loss_iou": 0.1640625, + "loss_num": 0.007568359375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 283201764, + "step": 5055 + }, + { + "epoch": 11.260579064587974, + "grad_norm": 15.130694389343262, + "learning_rate": 1e-06, + "loss": 0.5045, + "num_input_tokens_seen": 283259004, + "step": 5056 + }, + { + "epoch": 11.260579064587974, + "loss": 0.401218056678772, + "loss_ce": 9.503310866421089e-05, + "loss_iou": 0.16796875, + "loss_num": 0.012939453125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 283259004, + "step": 5056 + }, + { + "epoch": 11.262806236080179, + "grad_norm": 20.411800384521484, + "learning_rate": 1e-06, + "loss": 0.6591, + "num_input_tokens_seen": 283318136, + "step": 5057 + }, + { + "epoch": 11.262806236080179, + "loss": 0.8671286702156067, + "loss_ce": 0.00018532315152697265, + "loss_iou": 0.34375, + "loss_num": 0.0361328125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 283318136, + "step": 5057 + }, + { + "epoch": 11.265033407572384, + "grad_norm": 15.510054588317871, + "learning_rate": 1e-06, + "loss": 0.5234, + "num_input_tokens_seen": 283376952, + "step": 5058 + }, + { + "epoch": 11.265033407572384, + "loss": 0.48036888241767883, + "loss_ce": 0.00014426674169953912, + "loss_iou": 0.2109375, + "loss_num": 0.01165771484375, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 283376952, + "step": 5058 + }, + { + "epoch": 11.267260579064589, + "grad_norm": 16.76605224609375, + "learning_rate": 1e-06, + "loss": 0.5593, + "num_input_tokens_seen": 283432408, + "step": 5059 + }, + { + "epoch": 11.267260579064589, + "loss": 0.5044692754745483, + "loss_ce": 0.000135770023916848, + "loss_iou": 0.220703125, + "loss_num": 0.012451171875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 283432408, + "step": 5059 + }, + { + "epoch": 11.269487750556793, + "grad_norm": 21.036209106445312, + "learning_rate": 1e-06, + "loss": 0.5646, + "num_input_tokens_seen": 283490212, + "step": 5060 + }, + { + "epoch": 11.269487750556793, + "loss": 0.7949022054672241, + "loss_ce": 0.00022446672664955258, + "loss_iou": 0.337890625, + "loss_num": 0.0238037109375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 283490212, + "step": 5060 + }, + { + "epoch": 11.271714922048998, + "grad_norm": 14.880892753601074, + "learning_rate": 1e-06, + "loss": 0.5155, + "num_input_tokens_seen": 283547208, + "step": 5061 + }, + { + "epoch": 11.271714922048998, + "loss": 0.34775838255882263, + "loss_ce": 0.00010212791676167399, + "loss_iou": 0.126953125, + "loss_num": 0.0186767578125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 283547208, + "step": 5061 + }, + { + "epoch": 11.273942093541203, + "grad_norm": 11.523423194885254, + "learning_rate": 1e-06, + "loss": 0.4296, + "num_input_tokens_seen": 283605480, + "step": 5062 + }, + { + "epoch": 11.273942093541203, + "loss": 0.2592126131057739, + "loss_ce": 0.00011838733917102218, + "loss_iou": 0.103515625, + "loss_num": 0.01043701171875, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 283605480, + "step": 5062 + }, + { + "epoch": 11.276169265033408, + "grad_norm": 20.444461822509766, + "learning_rate": 1e-06, + "loss": 0.4135, + "num_input_tokens_seen": 283660960, + "step": 5063 + }, + { + "epoch": 11.276169265033408, + "loss": 0.3837610185146332, + "loss_ce": 0.0001245247694896534, + "loss_iou": 0.1611328125, + "loss_num": 0.01220703125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 283660960, + "step": 5063 + }, + { + "epoch": 11.278396436525613, + "grad_norm": 16.973899841308594, + "learning_rate": 1e-06, + "loss": 0.5658, + "num_input_tokens_seen": 283716348, + "step": 5064 + }, + { + "epoch": 11.278396436525613, + "loss": 0.47617107629776, + "loss_ce": 0.000585106376092881, + "loss_iou": 0.216796875, + "loss_num": 0.00848388671875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 283716348, + "step": 5064 + }, + { + "epoch": 11.280623608017818, + "grad_norm": 15.647051811218262, + "learning_rate": 1e-06, + "loss": 0.5102, + "num_input_tokens_seen": 283775620, + "step": 5065 + }, + { + "epoch": 11.280623608017818, + "loss": 0.4950053095817566, + "loss_ce": 0.00019331733346916735, + "loss_iou": 0.20703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 283775620, + "step": 5065 + }, + { + "epoch": 11.282850779510023, + "grad_norm": 14.765268325805664, + "learning_rate": 1e-06, + "loss": 0.4677, + "num_input_tokens_seen": 283834152, + "step": 5066 + }, + { + "epoch": 11.282850779510023, + "loss": 0.4379033148288727, + "loss_ce": 0.00012866513861808926, + "loss_iou": 0.193359375, + "loss_num": 0.01007080078125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 283834152, + "step": 5066 + }, + { + "epoch": 11.285077951002227, + "grad_norm": 18.143211364746094, + "learning_rate": 1e-06, + "loss": 0.4485, + "num_input_tokens_seen": 283891084, + "step": 5067 + }, + { + "epoch": 11.285077951002227, + "loss": 0.4209212064743042, + "loss_ce": 0.00014482939150184393, + "loss_iou": 0.185546875, + "loss_num": 0.010009765625, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 283891084, + "step": 5067 + }, + { + "epoch": 11.287305122494432, + "grad_norm": 14.855110168457031, + "learning_rate": 1e-06, + "loss": 0.3737, + "num_input_tokens_seen": 283949732, + "step": 5068 + }, + { + "epoch": 11.287305122494432, + "loss": 0.3620651960372925, + "loss_ce": 0.0001267299521714449, + "loss_iou": 0.166015625, + "loss_num": 0.005828857421875, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 283949732, + "step": 5068 + }, + { + "epoch": 11.289532293986637, + "grad_norm": 43.02797317504883, + "learning_rate": 1e-06, + "loss": 0.5399, + "num_input_tokens_seen": 284006896, + "step": 5069 + }, + { + "epoch": 11.289532293986637, + "loss": 0.5649739503860474, + "loss_ce": 0.00015458805137313902, + "loss_iou": 0.23828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 284006896, + "step": 5069 + }, + { + "epoch": 11.291759465478842, + "grad_norm": 25.057252883911133, + "learning_rate": 1e-06, + "loss": 0.5187, + "num_input_tokens_seen": 284063816, + "step": 5070 + }, + { + "epoch": 11.291759465478842, + "loss": 0.6080396175384521, + "loss_ce": 0.0001294128887820989, + "loss_iou": 0.287109375, + "loss_num": 0.007080078125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 284063816, + "step": 5070 + }, + { + "epoch": 11.293986636971047, + "grad_norm": 18.99828338623047, + "learning_rate": 1e-06, + "loss": 0.4259, + "num_input_tokens_seen": 284120160, + "step": 5071 + }, + { + "epoch": 11.293986636971047, + "loss": 0.5792200565338135, + "loss_ce": 0.00011846120469272137, + "loss_iou": 0.26953125, + "loss_num": 0.00787353515625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 284120160, + "step": 5071 + }, + { + "epoch": 11.296213808463252, + "grad_norm": 11.530062675476074, + "learning_rate": 1e-06, + "loss": 0.4847, + "num_input_tokens_seen": 284178348, + "step": 5072 + }, + { + "epoch": 11.296213808463252, + "loss": 0.4051365852355957, + "loss_ce": 0.0001073086605174467, + "loss_iou": 0.1787109375, + "loss_num": 0.00970458984375, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 284178348, + "step": 5072 + }, + { + "epoch": 11.298440979955457, + "grad_norm": 30.209646224975586, + "learning_rate": 1e-06, + "loss": 0.512, + "num_input_tokens_seen": 284237160, + "step": 5073 + }, + { + "epoch": 11.298440979955457, + "loss": 0.583868682384491, + "loss_ce": 0.00012847641482949257, + "loss_iou": 0.248046875, + "loss_num": 0.017333984375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 284237160, + "step": 5073 + }, + { + "epoch": 11.300668151447661, + "grad_norm": 23.079030990600586, + "learning_rate": 1e-06, + "loss": 0.4586, + "num_input_tokens_seen": 284293580, + "step": 5074 + }, + { + "epoch": 11.300668151447661, + "loss": 0.5660747289657593, + "loss_ce": 0.00015678048657719046, + "loss_iou": 0.25, + "loss_num": 0.01300048828125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 284293580, + "step": 5074 + }, + { + "epoch": 11.302895322939866, + "grad_norm": 18.702524185180664, + "learning_rate": 1e-06, + "loss": 0.4264, + "num_input_tokens_seen": 284350604, + "step": 5075 + }, + { + "epoch": 11.302895322939866, + "loss": 0.38073596358299255, + "loss_ce": 0.00012072990648448467, + "loss_iou": 0.1630859375, + "loss_num": 0.01092529296875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 284350604, + "step": 5075 + }, + { + "epoch": 11.305122494432071, + "grad_norm": 72.22659301757812, + "learning_rate": 1e-06, + "loss": 0.7127, + "num_input_tokens_seen": 284408168, + "step": 5076 + }, + { + "epoch": 11.305122494432071, + "loss": 0.8336408138275146, + "loss_ce": 0.00014475997886620462, + "loss_iou": 0.349609375, + "loss_num": 0.0269775390625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 284408168, + "step": 5076 + }, + { + "epoch": 11.307349665924276, + "grad_norm": 27.653568267822266, + "learning_rate": 1e-06, + "loss": 0.4948, + "num_input_tokens_seen": 284465572, + "step": 5077 + }, + { + "epoch": 11.307349665924276, + "loss": 0.4449566602706909, + "loss_ce": 0.0001324501063209027, + "loss_iou": 0.197265625, + "loss_num": 0.00994873046875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 284465572, + "step": 5077 + }, + { + "epoch": 11.309576837416481, + "grad_norm": 29.11145782470703, + "learning_rate": 1e-06, + "loss": 0.4789, + "num_input_tokens_seen": 284519732, + "step": 5078 + }, + { + "epoch": 11.309576837416481, + "loss": 0.3955211043357849, + "loss_ce": 0.0001353362895315513, + "loss_iou": 0.162109375, + "loss_num": 0.01416015625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 284519732, + "step": 5078 + }, + { + "epoch": 11.311804008908686, + "grad_norm": 14.795896530151367, + "learning_rate": 1e-06, + "loss": 0.5233, + "num_input_tokens_seen": 284578024, + "step": 5079 + }, + { + "epoch": 11.311804008908686, + "loss": 0.3413148522377014, + "loss_ce": 0.00012834850349463522, + "loss_iou": 0.1591796875, + "loss_num": 0.00457763671875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 284578024, + "step": 5079 + }, + { + "epoch": 11.31403118040089, + "grad_norm": 20.22231101989746, + "learning_rate": 1e-06, + "loss": 0.3846, + "num_input_tokens_seen": 284636628, + "step": 5080 + }, + { + "epoch": 11.31403118040089, + "loss": 0.40492209792137146, + "loss_ce": 0.0001369206584058702, + "loss_iou": 0.1875, + "loss_num": 0.006103515625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 284636628, + "step": 5080 + }, + { + "epoch": 11.316258351893095, + "grad_norm": 14.39472484588623, + "learning_rate": 1e-06, + "loss": 0.5526, + "num_input_tokens_seen": 284694192, + "step": 5081 + }, + { + "epoch": 11.316258351893095, + "loss": 0.5521363019943237, + "loss_ce": 0.00013434255379252136, + "loss_iou": 0.236328125, + "loss_num": 0.0159912109375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 284694192, + "step": 5081 + }, + { + "epoch": 11.3184855233853, + "grad_norm": 15.9749755859375, + "learning_rate": 1e-06, + "loss": 0.4355, + "num_input_tokens_seen": 284752428, + "step": 5082 + }, + { + "epoch": 11.3184855233853, + "loss": 0.5515244603157043, + "loss_ce": 0.00013286221656017005, + "loss_iou": 0.236328125, + "loss_num": 0.0157470703125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 284752428, + "step": 5082 + }, + { + "epoch": 11.320712694877505, + "grad_norm": 17.713111877441406, + "learning_rate": 1e-06, + "loss": 0.4379, + "num_input_tokens_seen": 284806252, + "step": 5083 + }, + { + "epoch": 11.320712694877505, + "loss": 0.44035178422927856, + "loss_ce": 0.0001662498980294913, + "loss_iou": 0.2001953125, + "loss_num": 0.008056640625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 284806252, + "step": 5083 + }, + { + "epoch": 11.32293986636971, + "grad_norm": 16.1520938873291, + "learning_rate": 1e-06, + "loss": 0.5477, + "num_input_tokens_seen": 284863332, + "step": 5084 + }, + { + "epoch": 11.32293986636971, + "loss": 0.6391512155532837, + "loss_ce": 0.000357240904122591, + "loss_iou": 0.26953125, + "loss_num": 0.020263671875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 284863332, + "step": 5084 + }, + { + "epoch": 11.325167037861915, + "grad_norm": 19.611209869384766, + "learning_rate": 1e-06, + "loss": 0.4462, + "num_input_tokens_seen": 284919672, + "step": 5085 + }, + { + "epoch": 11.325167037861915, + "loss": 0.37651515007019043, + "loss_ce": 0.00017235177801921964, + "loss_iou": 0.1650390625, + "loss_num": 0.0093994140625, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 284919672, + "step": 5085 + }, + { + "epoch": 11.32739420935412, + "grad_norm": 17.526779174804688, + "learning_rate": 1e-06, + "loss": 0.6155, + "num_input_tokens_seen": 284977908, + "step": 5086 + }, + { + "epoch": 11.32739420935412, + "loss": 0.48550522327423096, + "loss_ce": 0.00015366033767350018, + "loss_iou": 0.220703125, + "loss_num": 0.00885009765625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 284977908, + "step": 5086 + }, + { + "epoch": 11.329621380846325, + "grad_norm": 14.409747123718262, + "learning_rate": 1e-06, + "loss": 0.4557, + "num_input_tokens_seen": 285033440, + "step": 5087 + }, + { + "epoch": 11.329621380846325, + "loss": 0.44422948360443115, + "loss_ce": 0.00013769487850368023, + "loss_iou": 0.177734375, + "loss_num": 0.0179443359375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 285033440, + "step": 5087 + }, + { + "epoch": 11.33184855233853, + "grad_norm": 30.620845794677734, + "learning_rate": 1e-06, + "loss": 0.55, + "num_input_tokens_seen": 285089588, + "step": 5088 + }, + { + "epoch": 11.33184855233853, + "loss": 0.440792441368103, + "loss_ce": 0.00011861581879202276, + "loss_iou": 0.1865234375, + "loss_num": 0.013427734375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 285089588, + "step": 5088 + }, + { + "epoch": 11.334075723830734, + "grad_norm": 21.544681549072266, + "learning_rate": 1e-06, + "loss": 0.496, + "num_input_tokens_seen": 285144700, + "step": 5089 + }, + { + "epoch": 11.334075723830734, + "loss": 0.5140707492828369, + "loss_ce": 0.00015477146371267736, + "loss_iou": 0.2255859375, + "loss_num": 0.01251220703125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 285144700, + "step": 5089 + }, + { + "epoch": 11.33630289532294, + "grad_norm": 26.6117000579834, + "learning_rate": 1e-06, + "loss": 0.5348, + "num_input_tokens_seen": 285200876, + "step": 5090 + }, + { + "epoch": 11.33630289532294, + "loss": 0.5355468392372131, + "loss_ce": 0.00014644389739260077, + "loss_iou": 0.2431640625, + "loss_num": 0.0098876953125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 285200876, + "step": 5090 + }, + { + "epoch": 11.338530066815144, + "grad_norm": 17.32594871520996, + "learning_rate": 1e-06, + "loss": 0.6661, + "num_input_tokens_seen": 285258992, + "step": 5091 + }, + { + "epoch": 11.338530066815144, + "loss": 0.8300636410713196, + "loss_ce": 0.00022966302640270442, + "loss_iou": 0.33203125, + "loss_num": 0.032958984375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 285258992, + "step": 5091 + }, + { + "epoch": 11.340757238307349, + "grad_norm": 26.523305892944336, + "learning_rate": 1e-06, + "loss": 0.6046, + "num_input_tokens_seen": 285314720, + "step": 5092 + }, + { + "epoch": 11.340757238307349, + "loss": 0.6371076107025146, + "loss_ce": 0.0001447718241252005, + "loss_iou": 0.2734375, + "loss_num": 0.017822265625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 285314720, + "step": 5092 + }, + { + "epoch": 11.342984409799554, + "grad_norm": 23.49717140197754, + "learning_rate": 1e-06, + "loss": 0.4436, + "num_input_tokens_seen": 285372940, + "step": 5093 + }, + { + "epoch": 11.342984409799554, + "loss": 0.5071090459823608, + "loss_ce": 0.00015102185716386884, + "loss_iou": 0.220703125, + "loss_num": 0.01318359375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 285372940, + "step": 5093 + }, + { + "epoch": 11.345211581291759, + "grad_norm": 21.801851272583008, + "learning_rate": 1e-06, + "loss": 0.5209, + "num_input_tokens_seen": 285429172, + "step": 5094 + }, + { + "epoch": 11.345211581291759, + "loss": 0.526289701461792, + "loss_ce": 0.00016666974988766015, + "loss_iou": 0.2255859375, + "loss_num": 0.01507568359375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 285429172, + "step": 5094 + }, + { + "epoch": 11.347438752783964, + "grad_norm": 16.54652214050293, + "learning_rate": 1e-06, + "loss": 0.6023, + "num_input_tokens_seen": 285484976, + "step": 5095 + }, + { + "epoch": 11.347438752783964, + "loss": 0.5767943263053894, + "loss_ce": 0.00013418751768767834, + "loss_iou": 0.25390625, + "loss_num": 0.01318359375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 285484976, + "step": 5095 + }, + { + "epoch": 11.34966592427617, + "grad_norm": 21.48624610900879, + "learning_rate": 1e-06, + "loss": 0.5497, + "num_input_tokens_seen": 285540752, + "step": 5096 + }, + { + "epoch": 11.34966592427617, + "loss": 0.6203560829162598, + "loss_ce": 0.00023887879797257483, + "loss_iou": 0.2578125, + "loss_num": 0.0211181640625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 285540752, + "step": 5096 + }, + { + "epoch": 11.351893095768375, + "grad_norm": 24.3822021484375, + "learning_rate": 1e-06, + "loss": 0.553, + "num_input_tokens_seen": 285594876, + "step": 5097 + }, + { + "epoch": 11.351893095768375, + "loss": 0.4341278076171875, + "loss_ce": 0.00010682163701858371, + "loss_iou": 0.1748046875, + "loss_num": 0.0166015625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 285594876, + "step": 5097 + }, + { + "epoch": 11.35412026726058, + "grad_norm": 16.19316864013672, + "learning_rate": 1e-06, + "loss": 0.5853, + "num_input_tokens_seen": 285650340, + "step": 5098 + }, + { + "epoch": 11.35412026726058, + "loss": 0.7105726003646851, + "loss_ce": 0.00012335649807937443, + "loss_iou": 0.306640625, + "loss_num": 0.019287109375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 285650340, + "step": 5098 + }, + { + "epoch": 11.356347438752785, + "grad_norm": 17.640634536743164, + "learning_rate": 1e-06, + "loss": 0.516, + "num_input_tokens_seen": 285706040, + "step": 5099 + }, + { + "epoch": 11.356347438752785, + "loss": 0.6544548273086548, + "loss_ce": 0.0001579629024490714, + "loss_iou": 0.287109375, + "loss_num": 0.016357421875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 285706040, + "step": 5099 + }, + { + "epoch": 11.35857461024499, + "grad_norm": 22.37962532043457, + "learning_rate": 1e-06, + "loss": 0.7185, + "num_input_tokens_seen": 285762072, + "step": 5100 + }, + { + "epoch": 11.35857461024499, + "loss": 0.8059933185577393, + "loss_ce": 0.00020718795713037252, + "loss_iou": 0.306640625, + "loss_num": 0.038330078125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 285762072, + "step": 5100 + }, + { + "epoch": 11.360801781737194, + "grad_norm": 14.851436614990234, + "learning_rate": 1e-06, + "loss": 0.4088, + "num_input_tokens_seen": 285818724, + "step": 5101 + }, + { + "epoch": 11.360801781737194, + "loss": 0.40162622928619385, + "loss_ce": 0.0001369684759993106, + "loss_iou": 0.1806640625, + "loss_num": 0.0081787109375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 285818724, + "step": 5101 + }, + { + "epoch": 11.3630289532294, + "grad_norm": 16.314359664916992, + "learning_rate": 1e-06, + "loss": 0.4846, + "num_input_tokens_seen": 285875484, + "step": 5102 + }, + { + "epoch": 11.3630289532294, + "loss": 0.46129605174064636, + "loss_ce": 0.00011440047819633037, + "loss_iou": 0.208984375, + "loss_num": 0.0084228515625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 285875484, + "step": 5102 + }, + { + "epoch": 11.365256124721604, + "grad_norm": 23.65479278564453, + "learning_rate": 1e-06, + "loss": 0.5254, + "num_input_tokens_seen": 285931436, + "step": 5103 + }, + { + "epoch": 11.365256124721604, + "loss": 0.6119776964187622, + "loss_ce": 0.00016128391143865883, + "loss_iou": 0.267578125, + "loss_num": 0.015380859375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 285931436, + "step": 5103 + }, + { + "epoch": 11.367483296213809, + "grad_norm": 19.001941680908203, + "learning_rate": 1e-06, + "loss": 0.522, + "num_input_tokens_seen": 285987400, + "step": 5104 + }, + { + "epoch": 11.367483296213809, + "loss": 0.6559271216392517, + "loss_ce": 0.0001653625804465264, + "loss_iou": 0.28125, + "loss_num": 0.0184326171875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 285987400, + "step": 5104 + }, + { + "epoch": 11.369710467706014, + "grad_norm": 22.59067153930664, + "learning_rate": 1e-06, + "loss": 0.5637, + "num_input_tokens_seen": 286040144, + "step": 5105 + }, + { + "epoch": 11.369710467706014, + "loss": 0.5408531427383423, + "loss_ce": 0.00032578702666796744, + "loss_iou": 0.2470703125, + "loss_num": 0.009033203125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 286040144, + "step": 5105 + }, + { + "epoch": 11.371937639198219, + "grad_norm": 13.022751808166504, + "learning_rate": 1e-06, + "loss": 0.4658, + "num_input_tokens_seen": 286095652, + "step": 5106 + }, + { + "epoch": 11.371937639198219, + "loss": 0.49904271960258484, + "loss_ce": 0.0001413495047017932, + "loss_iou": 0.2099609375, + "loss_num": 0.015869140625, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 286095652, + "step": 5106 + }, + { + "epoch": 11.374164810690424, + "grad_norm": 17.726234436035156, + "learning_rate": 1e-06, + "loss": 0.4394, + "num_input_tokens_seen": 286153252, + "step": 5107 + }, + { + "epoch": 11.374164810690424, + "loss": 0.4727289080619812, + "loss_ce": 0.00019473947759252042, + "loss_iou": 0.189453125, + "loss_num": 0.0186767578125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 286153252, + "step": 5107 + }, + { + "epoch": 11.376391982182628, + "grad_norm": 20.799928665161133, + "learning_rate": 1e-06, + "loss": 0.5046, + "num_input_tokens_seen": 286210432, + "step": 5108 + }, + { + "epoch": 11.376391982182628, + "loss": 0.666250467300415, + "loss_ce": 0.00023488188162446022, + "loss_iou": 0.26953125, + "loss_num": 0.02490234375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 286210432, + "step": 5108 + }, + { + "epoch": 11.378619153674833, + "grad_norm": 21.947765350341797, + "learning_rate": 1e-06, + "loss": 0.4669, + "num_input_tokens_seen": 286266940, + "step": 5109 + }, + { + "epoch": 11.378619153674833, + "loss": 0.5005042552947998, + "loss_ce": 0.00013799341104459018, + "loss_iou": 0.22265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.5, + "num_input_tokens_seen": 286266940, + "step": 5109 + }, + { + "epoch": 11.380846325167038, + "grad_norm": 15.626119613647461, + "learning_rate": 1e-06, + "loss": 0.5814, + "num_input_tokens_seen": 286321024, + "step": 5110 + }, + { + "epoch": 11.380846325167038, + "loss": 0.5123552680015564, + "loss_ce": 0.0001482515363022685, + "loss_iou": 0.228515625, + "loss_num": 0.01129150390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 286321024, + "step": 5110 + }, + { + "epoch": 11.383073496659243, + "grad_norm": 20.23106575012207, + "learning_rate": 1e-06, + "loss": 0.4268, + "num_input_tokens_seen": 286376468, + "step": 5111 + }, + { + "epoch": 11.383073496659243, + "loss": 0.46500349044799805, + "loss_ce": 0.00015975304995663464, + "loss_iou": 0.21875, + "loss_num": 0.005584716796875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 286376468, + "step": 5111 + }, + { + "epoch": 11.385300668151448, + "grad_norm": 16.623905181884766, + "learning_rate": 1e-06, + "loss": 0.5151, + "num_input_tokens_seen": 286429412, + "step": 5112 + }, + { + "epoch": 11.385300668151448, + "loss": 0.46804624795913696, + "loss_ce": 0.00015074793191161007, + "loss_iou": 0.181640625, + "loss_num": 0.0206298828125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 286429412, + "step": 5112 + }, + { + "epoch": 11.387527839643653, + "grad_norm": 20.689804077148438, + "learning_rate": 1e-06, + "loss": 0.3954, + "num_input_tokens_seen": 286486412, + "step": 5113 + }, + { + "epoch": 11.387527839643653, + "loss": 0.3609923720359802, + "loss_ce": 0.00015251885633915663, + "loss_iou": 0.1640625, + "loss_num": 0.006500244140625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 286486412, + "step": 5113 + }, + { + "epoch": 11.389755011135858, + "grad_norm": 28.57643699645996, + "learning_rate": 1e-06, + "loss": 0.5011, + "num_input_tokens_seen": 286541332, + "step": 5114 + }, + { + "epoch": 11.389755011135858, + "loss": 0.4327373206615448, + "loss_ce": 0.00012012844672426581, + "loss_iou": 0.1923828125, + "loss_num": 0.009765625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 286541332, + "step": 5114 + }, + { + "epoch": 11.391982182628063, + "grad_norm": 15.638270378112793, + "learning_rate": 1e-06, + "loss": 0.5359, + "num_input_tokens_seen": 286599148, + "step": 5115 + }, + { + "epoch": 11.391982182628063, + "loss": 0.4875502586364746, + "loss_ce": 0.0001235070376424119, + "loss_iou": 0.2119140625, + "loss_num": 0.01263427734375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 286599148, + "step": 5115 + }, + { + "epoch": 11.394209354120267, + "grad_norm": 22.461973190307617, + "learning_rate": 1e-06, + "loss": 0.5282, + "num_input_tokens_seen": 286656460, + "step": 5116 + }, + { + "epoch": 11.394209354120267, + "loss": 0.7095192074775696, + "loss_ce": 0.00016860665346030146, + "loss_iou": 0.28515625, + "loss_num": 0.0277099609375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 286656460, + "step": 5116 + }, + { + "epoch": 11.396436525612472, + "grad_norm": 18.777915954589844, + "learning_rate": 1e-06, + "loss": 0.5819, + "num_input_tokens_seen": 286711268, + "step": 5117 + }, + { + "epoch": 11.396436525612472, + "loss": 0.5128494501113892, + "loss_ce": 0.00015413996879942715, + "loss_iou": 0.2158203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 286711268, + "step": 5117 + }, + { + "epoch": 11.398663697104677, + "grad_norm": 18.205013275146484, + "learning_rate": 1e-06, + "loss": 0.4723, + "num_input_tokens_seen": 286767632, + "step": 5118 + }, + { + "epoch": 11.398663697104677, + "loss": 0.44845569133758545, + "loss_ce": 0.0001524692343082279, + "loss_iou": 0.2060546875, + "loss_num": 0.007232666015625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 286767632, + "step": 5118 + }, + { + "epoch": 11.400890868596882, + "grad_norm": 14.385238647460938, + "learning_rate": 1e-06, + "loss": 0.5913, + "num_input_tokens_seen": 286824064, + "step": 5119 + }, + { + "epoch": 11.400890868596882, + "loss": 0.6861345767974854, + "loss_ce": 9.943717304849997e-05, + "loss_iou": 0.310546875, + "loss_num": 0.013427734375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 286824064, + "step": 5119 + }, + { + "epoch": 11.403118040089087, + "grad_norm": 15.494654655456543, + "learning_rate": 1e-06, + "loss": 0.3787, + "num_input_tokens_seen": 286881632, + "step": 5120 + }, + { + "epoch": 11.403118040089087, + "loss": 0.36211222410202026, + "loss_ce": 0.00011270066897850484, + "loss_iou": 0.158203125, + "loss_num": 0.0089111328125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 286881632, + "step": 5120 + }, + { + "epoch": 11.405345211581292, + "grad_norm": 17.95199203491211, + "learning_rate": 1e-06, + "loss": 0.711, + "num_input_tokens_seen": 286934124, + "step": 5121 + }, + { + "epoch": 11.405345211581292, + "loss": 0.7193175554275513, + "loss_ce": 0.00020134558144491166, + "loss_iou": 0.31640625, + "loss_num": 0.0172119140625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 286934124, + "step": 5121 + }, + { + "epoch": 11.407572383073497, + "grad_norm": 28.730737686157227, + "learning_rate": 1e-06, + "loss": 0.5827, + "num_input_tokens_seen": 286990012, + "step": 5122 + }, + { + "epoch": 11.407572383073497, + "loss": 0.39236217737197876, + "loss_ce": 0.00019602719112299383, + "loss_iou": 0.177734375, + "loss_num": 0.00750732421875, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 286990012, + "step": 5122 + }, + { + "epoch": 11.409799554565701, + "grad_norm": 39.97563171386719, + "learning_rate": 1e-06, + "loss": 0.6908, + "num_input_tokens_seen": 287044416, + "step": 5123 + }, + { + "epoch": 11.409799554565701, + "loss": 0.6997367739677429, + "loss_ce": 0.00015180771879386157, + "loss_iou": 0.306640625, + "loss_num": 0.016845703125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 287044416, + "step": 5123 + }, + { + "epoch": 11.412026726057906, + "grad_norm": 21.006521224975586, + "learning_rate": 1e-06, + "loss": 0.5632, + "num_input_tokens_seen": 287100240, + "step": 5124 + }, + { + "epoch": 11.412026726057906, + "loss": 0.6319659352302551, + "loss_ce": 0.00012999521277379245, + "loss_iou": 0.279296875, + "loss_num": 0.01458740234375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 287100240, + "step": 5124 + }, + { + "epoch": 11.414253897550111, + "grad_norm": 17.86157989501953, + "learning_rate": 1e-06, + "loss": 0.5612, + "num_input_tokens_seen": 287157868, + "step": 5125 + }, + { + "epoch": 11.414253897550111, + "loss": 0.3876880407333374, + "loss_ce": 0.00011479659588076174, + "loss_iou": 0.162109375, + "loss_num": 0.01263427734375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 287157868, + "step": 5125 + }, + { + "epoch": 11.416481069042316, + "grad_norm": 16.256969451904297, + "learning_rate": 1e-06, + "loss": 0.6867, + "num_input_tokens_seen": 287212888, + "step": 5126 + }, + { + "epoch": 11.416481069042316, + "loss": 0.6945188045501709, + "loss_ce": 0.00012188311666250229, + "loss_iou": 0.28515625, + "loss_num": 0.025146484375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 287212888, + "step": 5126 + }, + { + "epoch": 11.41870824053452, + "grad_norm": 36.22967529296875, + "learning_rate": 1e-06, + "loss": 0.6743, + "num_input_tokens_seen": 287268684, + "step": 5127 + }, + { + "epoch": 11.41870824053452, + "loss": 0.8071569204330444, + "loss_ce": 0.00015009319758974016, + "loss_iou": 0.375, + "loss_num": 0.01123046875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 287268684, + "step": 5127 + }, + { + "epoch": 11.420935412026726, + "grad_norm": 18.855321884155273, + "learning_rate": 1e-06, + "loss": 0.6559, + "num_input_tokens_seen": 287326204, + "step": 5128 + }, + { + "epoch": 11.420935412026726, + "loss": 0.6211212873458862, + "loss_ce": 0.00014961831038817763, + "loss_iou": 0.265625, + "loss_num": 0.017578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 287326204, + "step": 5128 + }, + { + "epoch": 11.42316258351893, + "grad_norm": 13.376533508300781, + "learning_rate": 1e-06, + "loss": 0.5103, + "num_input_tokens_seen": 287381800, + "step": 5129 + }, + { + "epoch": 11.42316258351893, + "loss": 0.5299174189567566, + "loss_ce": 0.0001627803430892527, + "loss_iou": 0.2294921875, + "loss_num": 0.0142822265625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 287381800, + "step": 5129 + }, + { + "epoch": 11.425389755011135, + "grad_norm": 22.959978103637695, + "learning_rate": 1e-06, + "loss": 0.5615, + "num_input_tokens_seen": 287435956, + "step": 5130 + }, + { + "epoch": 11.425389755011135, + "loss": 0.45396700501441956, + "loss_ce": 0.0001095635088859126, + "loss_iou": 0.1923828125, + "loss_num": 0.013916015625, + "loss_xval": 0.453125, + "num_input_tokens_seen": 287435956, + "step": 5130 + }, + { + "epoch": 11.42761692650334, + "grad_norm": 25.492908477783203, + "learning_rate": 1e-06, + "loss": 0.5793, + "num_input_tokens_seen": 287491780, + "step": 5131 + }, + { + "epoch": 11.42761692650334, + "loss": 0.6335743069648743, + "loss_ce": 0.0001514378236606717, + "loss_iou": 0.259765625, + "loss_num": 0.0230712890625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 287491780, + "step": 5131 + }, + { + "epoch": 11.429844097995545, + "grad_norm": 24.56211280822754, + "learning_rate": 1e-06, + "loss": 0.6357, + "num_input_tokens_seen": 287548600, + "step": 5132 + }, + { + "epoch": 11.429844097995545, + "loss": 0.28725236654281616, + "loss_ce": 0.000387123815016821, + "loss_iou": 0.1162109375, + "loss_num": 0.0108642578125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 287548600, + "step": 5132 + }, + { + "epoch": 11.43207126948775, + "grad_norm": 19.3801212310791, + "learning_rate": 1e-06, + "loss": 0.6795, + "num_input_tokens_seen": 287603800, + "step": 5133 + }, + { + "epoch": 11.43207126948775, + "loss": 0.7802882194519043, + "loss_ce": 0.00013687220052815974, + "loss_iou": 0.3203125, + "loss_num": 0.0277099609375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 287603800, + "step": 5133 + }, + { + "epoch": 11.434298440979955, + "grad_norm": 32.62427520751953, + "learning_rate": 1e-06, + "loss": 0.658, + "num_input_tokens_seen": 287658344, + "step": 5134 + }, + { + "epoch": 11.434298440979955, + "loss": 0.539149820804596, + "loss_ce": 0.0001483388477936387, + "loss_iou": 0.232421875, + "loss_num": 0.01513671875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 287658344, + "step": 5134 + }, + { + "epoch": 11.43652561247216, + "grad_norm": 36.44560623168945, + "learning_rate": 1e-06, + "loss": 0.6056, + "num_input_tokens_seen": 287713276, + "step": 5135 + }, + { + "epoch": 11.43652561247216, + "loss": 0.49588677287101746, + "loss_ce": 0.0001592589687788859, + "loss_iou": 0.2265625, + "loss_num": 0.00848388671875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 287713276, + "step": 5135 + }, + { + "epoch": 11.438752783964365, + "grad_norm": 29.658565521240234, + "learning_rate": 1e-06, + "loss": 0.3693, + "num_input_tokens_seen": 287770612, + "step": 5136 + }, + { + "epoch": 11.438752783964365, + "loss": 0.3307318091392517, + "loss_ce": 0.000165409262990579, + "loss_iou": 0.1494140625, + "loss_num": 0.0064697265625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 287770612, + "step": 5136 + }, + { + "epoch": 11.44097995545657, + "grad_norm": 16.48049545288086, + "learning_rate": 1e-06, + "loss": 0.508, + "num_input_tokens_seen": 287824620, + "step": 5137 + }, + { + "epoch": 11.44097995545657, + "loss": 0.45837414264678955, + "loss_ce": 0.00012218940537422895, + "loss_iou": 0.1953125, + "loss_num": 0.01336669921875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 287824620, + "step": 5137 + }, + { + "epoch": 11.443207126948774, + "grad_norm": 22.14146614074707, + "learning_rate": 1e-06, + "loss": 0.5379, + "num_input_tokens_seen": 287882580, + "step": 5138 + }, + { + "epoch": 11.443207126948774, + "loss": 0.5207144021987915, + "loss_ce": 0.0001455346355214715, + "loss_iou": 0.216796875, + "loss_num": 0.017333984375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 287882580, + "step": 5138 + }, + { + "epoch": 11.44543429844098, + "grad_norm": 19.23894500732422, + "learning_rate": 1e-06, + "loss": 0.3896, + "num_input_tokens_seen": 287938492, + "step": 5139 + }, + { + "epoch": 11.44543429844098, + "loss": 0.3553389310836792, + "loss_ce": 0.0001143506815424189, + "loss_iou": 0.154296875, + "loss_num": 0.0093994140625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 287938492, + "step": 5139 + }, + { + "epoch": 11.447661469933184, + "grad_norm": 17.079357147216797, + "learning_rate": 1e-06, + "loss": 0.5968, + "num_input_tokens_seen": 287994108, + "step": 5140 + }, + { + "epoch": 11.447661469933184, + "loss": 0.5203468203544617, + "loss_ce": 0.00014417944476008415, + "loss_iou": 0.236328125, + "loss_num": 0.009521484375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 287994108, + "step": 5140 + }, + { + "epoch": 11.449888641425389, + "grad_norm": 14.164678573608398, + "learning_rate": 1e-06, + "loss": 0.5193, + "num_input_tokens_seen": 288049672, + "step": 5141 + }, + { + "epoch": 11.449888641425389, + "loss": 0.4837890863418579, + "loss_ce": 0.0001465213717892766, + "loss_iou": 0.2060546875, + "loss_num": 0.014404296875, + "loss_xval": 0.484375, + "num_input_tokens_seen": 288049672, + "step": 5141 + }, + { + "epoch": 11.452115812917596, + "grad_norm": 20.210201263427734, + "learning_rate": 1e-06, + "loss": 0.7254, + "num_input_tokens_seen": 288106140, + "step": 5142 + }, + { + "epoch": 11.452115812917596, + "loss": 0.6080299615859985, + "loss_ce": 0.00011977371468674392, + "loss_iou": 0.267578125, + "loss_num": 0.01470947265625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 288106140, + "step": 5142 + }, + { + "epoch": 11.4543429844098, + "grad_norm": 20.572181701660156, + "learning_rate": 1e-06, + "loss": 0.4897, + "num_input_tokens_seen": 288163140, + "step": 5143 + }, + { + "epoch": 11.4543429844098, + "loss": 0.49498531222343445, + "loss_ce": 0.00011225108028156683, + "loss_iou": 0.220703125, + "loss_num": 0.01068115234375, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 288163140, + "step": 5143 + }, + { + "epoch": 11.456570155902005, + "grad_norm": 24.588138580322266, + "learning_rate": 1e-06, + "loss": 0.5276, + "num_input_tokens_seen": 288219856, + "step": 5144 + }, + { + "epoch": 11.456570155902005, + "loss": 0.5592126846313477, + "loss_ce": 0.0003747770097106695, + "loss_iou": 0.240234375, + "loss_num": 0.015625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 288219856, + "step": 5144 + }, + { + "epoch": 11.45879732739421, + "grad_norm": 16.87946319580078, + "learning_rate": 1e-06, + "loss": 0.3826, + "num_input_tokens_seen": 288276876, + "step": 5145 + }, + { + "epoch": 11.45879732739421, + "loss": 0.4041239619255066, + "loss_ce": 0.0008036848739720881, + "loss_iou": 0.1689453125, + "loss_num": 0.01318359375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 288276876, + "step": 5145 + }, + { + "epoch": 11.461024498886415, + "grad_norm": 34.00703811645508, + "learning_rate": 1e-06, + "loss": 0.5651, + "num_input_tokens_seen": 288334908, + "step": 5146 + }, + { + "epoch": 11.461024498886415, + "loss": 0.4293323755264282, + "loss_ce": 0.0001331909152213484, + "loss_iou": 0.2001953125, + "loss_num": 0.0059814453125, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 288334908, + "step": 5146 + }, + { + "epoch": 11.46325167037862, + "grad_norm": 21.397933959960938, + "learning_rate": 1e-06, + "loss": 0.4555, + "num_input_tokens_seen": 288393052, + "step": 5147 + }, + { + "epoch": 11.46325167037862, + "loss": 0.4838143587112427, + "loss_ce": 0.00017176388064399362, + "loss_iou": 0.203125, + "loss_num": 0.015380859375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 288393052, + "step": 5147 + }, + { + "epoch": 11.465478841870825, + "grad_norm": 26.195497512817383, + "learning_rate": 1e-06, + "loss": 0.4912, + "num_input_tokens_seen": 288445524, + "step": 5148 + }, + { + "epoch": 11.465478841870825, + "loss": 0.5275430679321289, + "loss_ce": 0.0008096360834315419, + "loss_iou": 0.2236328125, + "loss_num": 0.0159912109375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 288445524, + "step": 5148 + }, + { + "epoch": 11.46770601336303, + "grad_norm": 24.309276580810547, + "learning_rate": 1e-06, + "loss": 0.4972, + "num_input_tokens_seen": 288500740, + "step": 5149 + }, + { + "epoch": 11.46770601336303, + "loss": 0.623292088508606, + "loss_ce": 0.00012314121704548597, + "loss_iou": 0.263671875, + "loss_num": 0.0191650390625, + "loss_xval": 0.625, + "num_input_tokens_seen": 288500740, + "step": 5149 + }, + { + "epoch": 11.469933184855234, + "grad_norm": 16.2718448638916, + "learning_rate": 1e-06, + "loss": 0.6084, + "num_input_tokens_seen": 288553616, + "step": 5150 + }, + { + "epoch": 11.469933184855234, + "loss": 0.4980197846889496, + "loss_ce": 9.498859435552731e-05, + "loss_iou": 0.197265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 288553616, + "step": 5150 + }, + { + "epoch": 11.47216035634744, + "grad_norm": 14.356439590454102, + "learning_rate": 1e-06, + "loss": 0.4675, + "num_input_tokens_seen": 288611856, + "step": 5151 + }, + { + "epoch": 11.47216035634744, + "loss": 0.6592938303947449, + "loss_ce": 0.00011415663175284863, + "loss_iou": 0.287109375, + "loss_num": 0.0172119140625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 288611856, + "step": 5151 + }, + { + "epoch": 11.474387527839644, + "grad_norm": 15.644416809082031, + "learning_rate": 1e-06, + "loss": 0.5468, + "num_input_tokens_seen": 288669496, + "step": 5152 + }, + { + "epoch": 11.474387527839644, + "loss": 0.4559364318847656, + "loss_ce": 0.00012589515245053917, + "loss_iou": 0.189453125, + "loss_num": 0.01531982421875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 288669496, + "step": 5152 + }, + { + "epoch": 11.476614699331849, + "grad_norm": 17.78461265563965, + "learning_rate": 1e-06, + "loss": 0.4928, + "num_input_tokens_seen": 288727056, + "step": 5153 + }, + { + "epoch": 11.476614699331849, + "loss": 0.6607722043991089, + "loss_ce": 0.00012771939509548247, + "loss_iou": 0.2578125, + "loss_num": 0.0289306640625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 288727056, + "step": 5153 + }, + { + "epoch": 11.478841870824054, + "grad_norm": 12.651546478271484, + "learning_rate": 1e-06, + "loss": 0.4647, + "num_input_tokens_seen": 288782852, + "step": 5154 + }, + { + "epoch": 11.478841870824054, + "loss": 0.3253457248210907, + "loss_ce": 0.00015041950973682106, + "loss_iou": 0.1484375, + "loss_num": 0.0054931640625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 288782852, + "step": 5154 + }, + { + "epoch": 11.481069042316259, + "grad_norm": 18.01323890686035, + "learning_rate": 1e-06, + "loss": 0.6355, + "num_input_tokens_seen": 288835892, + "step": 5155 + }, + { + "epoch": 11.481069042316259, + "loss": 0.5539816617965698, + "loss_ce": 0.00014869558799546212, + "loss_iou": 0.240234375, + "loss_num": 0.01458740234375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 288835892, + "step": 5155 + }, + { + "epoch": 11.483296213808464, + "grad_norm": 13.251718521118164, + "learning_rate": 1e-06, + "loss": 0.5962, + "num_input_tokens_seen": 288891508, + "step": 5156 + }, + { + "epoch": 11.483296213808464, + "loss": 0.8043664693832397, + "loss_ce": 0.00016726629110053182, + "loss_iou": 0.337890625, + "loss_num": 0.02587890625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 288891508, + "step": 5156 + }, + { + "epoch": 11.485523385300668, + "grad_norm": 13.777200698852539, + "learning_rate": 1e-06, + "loss": 0.4696, + "num_input_tokens_seen": 288948688, + "step": 5157 + }, + { + "epoch": 11.485523385300668, + "loss": 0.4466667175292969, + "loss_ce": 0.00013350519293453544, + "loss_iou": 0.201171875, + "loss_num": 0.0089111328125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 288948688, + "step": 5157 + }, + { + "epoch": 11.487750556792873, + "grad_norm": 19.02153968811035, + "learning_rate": 1e-06, + "loss": 0.3835, + "num_input_tokens_seen": 289004432, + "step": 5158 + }, + { + "epoch": 11.487750556792873, + "loss": 0.5000653266906738, + "loss_ce": 0.00018739307415671647, + "loss_iou": 0.19921875, + "loss_num": 0.0205078125, + "loss_xval": 0.5, + "num_input_tokens_seen": 289004432, + "step": 5158 + }, + { + "epoch": 11.489977728285078, + "grad_norm": 35.4432258605957, + "learning_rate": 1e-06, + "loss": 0.4898, + "num_input_tokens_seen": 289058552, + "step": 5159 + }, + { + "epoch": 11.489977728285078, + "loss": 0.3960947096347809, + "loss_ce": 9.86138402367942e-05, + "loss_iou": 0.162109375, + "loss_num": 0.014404296875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 289058552, + "step": 5159 + }, + { + "epoch": 11.492204899777283, + "grad_norm": 44.51033401489258, + "learning_rate": 1e-06, + "loss": 0.4918, + "num_input_tokens_seen": 289115320, + "step": 5160 + }, + { + "epoch": 11.492204899777283, + "loss": 0.5892336964607239, + "loss_ce": 0.00012237070768605918, + "loss_iou": 0.283203125, + "loss_num": 0.00482177734375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 289115320, + "step": 5160 + }, + { + "epoch": 11.494432071269488, + "grad_norm": 20.451549530029297, + "learning_rate": 1e-06, + "loss": 0.6798, + "num_input_tokens_seen": 289171932, + "step": 5161 + }, + { + "epoch": 11.494432071269488, + "loss": 0.5714766383171082, + "loss_ce": 0.00018755605560727417, + "loss_iou": 0.2421875, + "loss_num": 0.01708984375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 289171932, + "step": 5161 + }, + { + "epoch": 11.496659242761693, + "grad_norm": 17.223535537719727, + "learning_rate": 1e-06, + "loss": 0.3775, + "num_input_tokens_seen": 289227384, + "step": 5162 + }, + { + "epoch": 11.496659242761693, + "loss": 0.41967087984085083, + "loss_ce": 0.00011523931607371196, + "loss_iou": 0.1845703125, + "loss_num": 0.010009765625, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 289227384, + "step": 5162 + }, + { + "epoch": 11.498886414253898, + "grad_norm": 18.763608932495117, + "learning_rate": 1e-06, + "loss": 0.3766, + "num_input_tokens_seen": 289282336, + "step": 5163 + }, + { + "epoch": 11.498886414253898, + "loss": 0.39795833826065063, + "loss_ce": 0.0001312018430326134, + "loss_iou": 0.17578125, + "loss_num": 0.00909423828125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 289282336, + "step": 5163 + }, + { + "epoch": 11.501113585746102, + "grad_norm": 25.63532829284668, + "learning_rate": 1e-06, + "loss": 0.6739, + "num_input_tokens_seen": 289337816, + "step": 5164 + }, + { + "epoch": 11.501113585746102, + "loss": 0.7168751358985901, + "loss_ce": 0.00026139506371691823, + "loss_iou": 0.271484375, + "loss_num": 0.034912109375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 289337816, + "step": 5164 + }, + { + "epoch": 11.503340757238307, + "grad_norm": 17.458229064941406, + "learning_rate": 1e-06, + "loss": 0.5773, + "num_input_tokens_seen": 289394264, + "step": 5165 + }, + { + "epoch": 11.503340757238307, + "loss": 0.570071816444397, + "loss_ce": 0.00012553209671750665, + "loss_iou": 0.2431640625, + "loss_num": 0.0164794921875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 289394264, + "step": 5165 + }, + { + "epoch": 11.505567928730512, + "grad_norm": 15.031347274780273, + "learning_rate": 1e-06, + "loss": 0.4042, + "num_input_tokens_seen": 289452076, + "step": 5166 + }, + { + "epoch": 11.505567928730512, + "loss": 0.32777801156044006, + "loss_ce": 0.0002633580006659031, + "loss_iou": 0.14453125, + "loss_num": 0.0078125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 289452076, + "step": 5166 + }, + { + "epoch": 11.507795100222717, + "grad_norm": 17.256629943847656, + "learning_rate": 1e-06, + "loss": 0.5362, + "num_input_tokens_seen": 289507452, + "step": 5167 + }, + { + "epoch": 11.507795100222717, + "loss": 0.6130794286727905, + "loss_ce": 0.00016442046035081148, + "loss_iou": 0.25390625, + "loss_num": 0.021484375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 289507452, + "step": 5167 + }, + { + "epoch": 11.510022271714922, + "grad_norm": 23.37602424621582, + "learning_rate": 1e-06, + "loss": 0.5456, + "num_input_tokens_seen": 289562412, + "step": 5168 + }, + { + "epoch": 11.510022271714922, + "loss": 0.6275073289871216, + "loss_ce": 0.0001879626652225852, + "loss_iou": 0.28125, + "loss_num": 0.01318359375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 289562412, + "step": 5168 + }, + { + "epoch": 11.512249443207127, + "grad_norm": 28.662569046020508, + "learning_rate": 1e-06, + "loss": 0.5528, + "num_input_tokens_seen": 289618920, + "step": 5169 + }, + { + "epoch": 11.512249443207127, + "loss": 0.564295768737793, + "loss_ce": 0.00020888610742986202, + "loss_iou": 0.24609375, + "loss_num": 0.01422119140625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 289618920, + "step": 5169 + }, + { + "epoch": 11.514476614699332, + "grad_norm": 28.88167381286621, + "learning_rate": 1e-06, + "loss": 0.5267, + "num_input_tokens_seen": 289675588, + "step": 5170 + }, + { + "epoch": 11.514476614699332, + "loss": 0.46462541818618774, + "loss_ce": 0.0001478660269640386, + "loss_iou": 0.212890625, + "loss_num": 0.00787353515625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 289675588, + "step": 5170 + }, + { + "epoch": 11.516703786191536, + "grad_norm": 12.050206184387207, + "learning_rate": 1e-06, + "loss": 0.4747, + "num_input_tokens_seen": 289733692, + "step": 5171 + }, + { + "epoch": 11.516703786191536, + "loss": 0.41063833236694336, + "loss_ce": 0.00011587166227400303, + "loss_iou": 0.18359375, + "loss_num": 0.00860595703125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 289733692, + "step": 5171 + }, + { + "epoch": 11.518930957683741, + "grad_norm": 13.707185745239258, + "learning_rate": 1e-06, + "loss": 0.5053, + "num_input_tokens_seen": 289791396, + "step": 5172 + }, + { + "epoch": 11.518930957683741, + "loss": 0.5518975257873535, + "loss_ce": 0.00013974419562146068, + "loss_iou": 0.2578125, + "loss_num": 0.00677490234375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 289791396, + "step": 5172 + }, + { + "epoch": 11.521158129175946, + "grad_norm": 17.915735244750977, + "learning_rate": 1e-06, + "loss": 0.6097, + "num_input_tokens_seen": 289848220, + "step": 5173 + }, + { + "epoch": 11.521158129175946, + "loss": 0.711284875869751, + "loss_ce": 0.0003473775868769735, + "loss_iou": 0.28515625, + "loss_num": 0.028076171875, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 289848220, + "step": 5173 + }, + { + "epoch": 11.523385300668151, + "grad_norm": 19.404088973999023, + "learning_rate": 1e-06, + "loss": 0.4896, + "num_input_tokens_seen": 289907036, + "step": 5174 + }, + { + "epoch": 11.523385300668151, + "loss": 0.4599684178829193, + "loss_ce": 0.00012956123100593686, + "loss_iou": 0.203125, + "loss_num": 0.0107421875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 289907036, + "step": 5174 + }, + { + "epoch": 11.525612472160356, + "grad_norm": 14.955151557922363, + "learning_rate": 1e-06, + "loss": 0.4704, + "num_input_tokens_seen": 289963580, + "step": 5175 + }, + { + "epoch": 11.525612472160356, + "loss": 0.6151525974273682, + "loss_ce": 0.0001623880089027807, + "loss_iou": 0.271484375, + "loss_num": 0.01409912109375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 289963580, + "step": 5175 + }, + { + "epoch": 11.52783964365256, + "grad_norm": 30.26787567138672, + "learning_rate": 1e-06, + "loss": 0.4761, + "num_input_tokens_seen": 290016656, + "step": 5176 + }, + { + "epoch": 11.52783964365256, + "loss": 0.3829212784767151, + "loss_ce": 0.00010878632019739598, + "loss_iou": 0.15625, + "loss_num": 0.0140380859375, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 290016656, + "step": 5176 + }, + { + "epoch": 11.530066815144766, + "grad_norm": 16.603086471557617, + "learning_rate": 1e-06, + "loss": 0.447, + "num_input_tokens_seen": 290074272, + "step": 5177 + }, + { + "epoch": 11.530066815144766, + "loss": 0.38126683235168457, + "loss_ce": 0.00016332257655449212, + "loss_iou": 0.16796875, + "loss_num": 0.00909423828125, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 290074272, + "step": 5177 + }, + { + "epoch": 11.53229398663697, + "grad_norm": 13.765169143676758, + "learning_rate": 1e-06, + "loss": 0.6256, + "num_input_tokens_seen": 290129748, + "step": 5178 + }, + { + "epoch": 11.53229398663697, + "loss": 0.6682248115539551, + "loss_ce": 0.00013396795839071274, + "loss_iou": 0.294921875, + "loss_num": 0.015869140625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 290129748, + "step": 5178 + }, + { + "epoch": 11.534521158129175, + "grad_norm": 26.716320037841797, + "learning_rate": 1e-06, + "loss": 0.7209, + "num_input_tokens_seen": 290182560, + "step": 5179 + }, + { + "epoch": 11.534521158129175, + "loss": 0.7032727003097534, + "loss_ce": 0.0001476738107157871, + "loss_iou": 0.3046875, + "loss_num": 0.0181884765625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 290182560, + "step": 5179 + }, + { + "epoch": 11.53674832962138, + "grad_norm": 23.010082244873047, + "learning_rate": 1e-06, + "loss": 0.7079, + "num_input_tokens_seen": 290238272, + "step": 5180 + }, + { + "epoch": 11.53674832962138, + "loss": 0.6293861865997314, + "loss_ce": 0.00023578619584441185, + "loss_iou": 0.28515625, + "loss_num": 0.01171875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 290238272, + "step": 5180 + }, + { + "epoch": 11.538975501113585, + "grad_norm": 12.722779273986816, + "learning_rate": 1e-06, + "loss": 0.4231, + "num_input_tokens_seen": 290294832, + "step": 5181 + }, + { + "epoch": 11.538975501113585, + "loss": 0.5143045783042908, + "loss_ce": 0.00014442717656493187, + "loss_iou": 0.2236328125, + "loss_num": 0.01324462890625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 290294832, + "step": 5181 + }, + { + "epoch": 11.54120267260579, + "grad_norm": 17.865341186523438, + "learning_rate": 1e-06, + "loss": 0.599, + "num_input_tokens_seen": 290351116, + "step": 5182 + }, + { + "epoch": 11.54120267260579, + "loss": 0.6641983985900879, + "loss_ce": 0.0001359161688014865, + "loss_iou": 0.255859375, + "loss_num": 0.03076171875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 290351116, + "step": 5182 + }, + { + "epoch": 11.543429844097995, + "grad_norm": 16.24390983581543, + "learning_rate": 1e-06, + "loss": 0.4797, + "num_input_tokens_seen": 290406636, + "step": 5183 + }, + { + "epoch": 11.543429844097995, + "loss": 0.5366250276565552, + "loss_ce": 0.0001260171557078138, + "loss_iou": 0.2255859375, + "loss_num": 0.01708984375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 290406636, + "step": 5183 + }, + { + "epoch": 11.5456570155902, + "grad_norm": 33.181251525878906, + "learning_rate": 1e-06, + "loss": 0.5539, + "num_input_tokens_seen": 290464236, + "step": 5184 + }, + { + "epoch": 11.5456570155902, + "loss": 0.4681469798088074, + "loss_ce": 0.0001293782697757706, + "loss_iou": 0.2138671875, + "loss_num": 0.008056640625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 290464236, + "step": 5184 + }, + { + "epoch": 11.547884187082406, + "grad_norm": 15.906049728393555, + "learning_rate": 1e-06, + "loss": 0.4499, + "num_input_tokens_seen": 290519360, + "step": 5185 + }, + { + "epoch": 11.547884187082406, + "loss": 0.5545018315315247, + "loss_ce": 0.00018054830434266478, + "loss_iou": 0.2294921875, + "loss_num": 0.0191650390625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 290519360, + "step": 5185 + }, + { + "epoch": 11.550111358574611, + "grad_norm": 25.81562042236328, + "learning_rate": 1e-06, + "loss": 0.5231, + "num_input_tokens_seen": 290572196, + "step": 5186 + }, + { + "epoch": 11.550111358574611, + "loss": 0.5178329944610596, + "loss_ce": 0.00025483942590653896, + "loss_iou": 0.23046875, + "loss_num": 0.01141357421875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 290572196, + "step": 5186 + }, + { + "epoch": 11.552338530066816, + "grad_norm": 17.265830993652344, + "learning_rate": 1e-06, + "loss": 0.4436, + "num_input_tokens_seen": 290629824, + "step": 5187 + }, + { + "epoch": 11.552338530066816, + "loss": 0.4118680953979492, + "loss_ce": 0.0001249448541784659, + "loss_iou": 0.1728515625, + "loss_num": 0.01318359375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 290629824, + "step": 5187 + }, + { + "epoch": 11.55456570155902, + "grad_norm": 13.806538581848145, + "learning_rate": 1e-06, + "loss": 0.465, + "num_input_tokens_seen": 290683676, + "step": 5188 + }, + { + "epoch": 11.55456570155902, + "loss": 0.28974953293800354, + "loss_ce": 0.00010721624130383134, + "loss_iou": 0.1220703125, + "loss_num": 0.00921630859375, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 290683676, + "step": 5188 + }, + { + "epoch": 11.556792873051226, + "grad_norm": 25.621070861816406, + "learning_rate": 1e-06, + "loss": 0.5772, + "num_input_tokens_seen": 290738676, + "step": 5189 + }, + { + "epoch": 11.556792873051226, + "loss": 0.5882642865180969, + "loss_ce": 0.0001295104157179594, + "loss_iou": 0.25, + "loss_num": 0.0174560546875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 290738676, + "step": 5189 + }, + { + "epoch": 11.55902004454343, + "grad_norm": 21.53620719909668, + "learning_rate": 1e-06, + "loss": 0.583, + "num_input_tokens_seen": 290793796, + "step": 5190 + }, + { + "epoch": 11.55902004454343, + "loss": 0.6559303998947144, + "loss_ce": 0.000412814028095454, + "loss_iou": 0.27734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 290793796, + "step": 5190 + }, + { + "epoch": 11.561247216035635, + "grad_norm": 16.96056365966797, + "learning_rate": 1e-06, + "loss": 0.5001, + "num_input_tokens_seen": 290850768, + "step": 5191 + }, + { + "epoch": 11.561247216035635, + "loss": 0.5135844349861145, + "loss_ce": 0.00015668988635297865, + "loss_iou": 0.23046875, + "loss_num": 0.010498046875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 290850768, + "step": 5191 + }, + { + "epoch": 11.56347438752784, + "grad_norm": 19.151756286621094, + "learning_rate": 1e-06, + "loss": 0.5346, + "num_input_tokens_seen": 290905584, + "step": 5192 + }, + { + "epoch": 11.56347438752784, + "loss": 0.3937605321407318, + "loss_ce": 0.00014479970559477806, + "loss_iou": 0.1640625, + "loss_num": 0.0130615234375, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 290905584, + "step": 5192 + }, + { + "epoch": 11.565701559020045, + "grad_norm": 19.19948959350586, + "learning_rate": 1e-06, + "loss": 0.5524, + "num_input_tokens_seen": 290963692, + "step": 5193 + }, + { + "epoch": 11.565701559020045, + "loss": 0.6412444114685059, + "loss_ce": 0.00013114791363477707, + "loss_iou": 0.267578125, + "loss_num": 0.0208740234375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 290963692, + "step": 5193 + }, + { + "epoch": 11.56792873051225, + "grad_norm": 18.386707305908203, + "learning_rate": 1e-06, + "loss": 0.4723, + "num_input_tokens_seen": 291021232, + "step": 5194 + }, + { + "epoch": 11.56792873051225, + "loss": 0.3769450783729553, + "loss_ce": 0.0002360670769121498, + "loss_iou": 0.1533203125, + "loss_num": 0.01397705078125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 291021232, + "step": 5194 + }, + { + "epoch": 11.570155902004455, + "grad_norm": 20.23085594177246, + "learning_rate": 1e-06, + "loss": 0.7242, + "num_input_tokens_seen": 291075260, + "step": 5195 + }, + { + "epoch": 11.570155902004455, + "loss": 0.5950526595115662, + "loss_ce": 0.00014300120528787374, + "loss_iou": 0.2734375, + "loss_num": 0.00982666015625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 291075260, + "step": 5195 + }, + { + "epoch": 11.57238307349666, + "grad_norm": 23.331451416015625, + "learning_rate": 1e-06, + "loss": 0.7261, + "num_input_tokens_seen": 291131208, + "step": 5196 + }, + { + "epoch": 11.57238307349666, + "loss": 0.664703905582428, + "loss_ce": 0.000153136788867414, + "loss_iou": 0.28125, + "loss_num": 0.020263671875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 291131208, + "step": 5196 + }, + { + "epoch": 11.574610244988865, + "grad_norm": 25.03767967224121, + "learning_rate": 1e-06, + "loss": 0.567, + "num_input_tokens_seen": 291187856, + "step": 5197 + }, + { + "epoch": 11.574610244988865, + "loss": 0.38165202736854553, + "loss_ce": 0.0001823049533413723, + "loss_iou": 0.1611328125, + "loss_num": 0.01177978515625, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 291187856, + "step": 5197 + }, + { + "epoch": 11.57683741648107, + "grad_norm": 18.827882766723633, + "learning_rate": 1e-06, + "loss": 0.7002, + "num_input_tokens_seen": 291243456, + "step": 5198 + }, + { + "epoch": 11.57683741648107, + "loss": 0.9664729237556458, + "loss_ce": 0.0004084957472514361, + "loss_iou": 0.44140625, + "loss_num": 0.0164794921875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 291243456, + "step": 5198 + }, + { + "epoch": 11.579064587973274, + "grad_norm": 15.689250946044922, + "learning_rate": 1e-06, + "loss": 0.5917, + "num_input_tokens_seen": 291301168, + "step": 5199 + }, + { + "epoch": 11.579064587973274, + "loss": 0.37210649251937866, + "loss_ce": 0.00015823188005015254, + "loss_iou": 0.1630859375, + "loss_num": 0.009033203125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 291301168, + "step": 5199 + }, + { + "epoch": 11.58129175946548, + "grad_norm": 26.7224178314209, + "learning_rate": 1e-06, + "loss": 0.6347, + "num_input_tokens_seen": 291356144, + "step": 5200 + }, + { + "epoch": 11.58129175946548, + "loss": 0.6205277442932129, + "loss_ce": 0.00016642545233480632, + "loss_iou": 0.2734375, + "loss_num": 0.0147705078125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 291356144, + "step": 5200 + }, + { + "epoch": 11.583518930957684, + "grad_norm": 17.09632110595703, + "learning_rate": 1e-06, + "loss": 0.431, + "num_input_tokens_seen": 291412052, + "step": 5201 + }, + { + "epoch": 11.583518930957684, + "loss": 0.3809802234172821, + "loss_ce": 0.0001208461108035408, + "loss_iou": 0.16796875, + "loss_num": 0.0089111328125, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 291412052, + "step": 5201 + }, + { + "epoch": 11.585746102449889, + "grad_norm": 13.215712547302246, + "learning_rate": 1e-06, + "loss": 0.3926, + "num_input_tokens_seen": 291468324, + "step": 5202 + }, + { + "epoch": 11.585746102449889, + "loss": 0.35443952679634094, + "loss_ce": 0.00013045519881416112, + "loss_iou": 0.1533203125, + "loss_num": 0.00946044921875, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 291468324, + "step": 5202 + }, + { + "epoch": 11.587973273942094, + "grad_norm": 15.096648216247559, + "learning_rate": 1e-06, + "loss": 0.4367, + "num_input_tokens_seen": 291525764, + "step": 5203 + }, + { + "epoch": 11.587973273942094, + "loss": 0.31994152069091797, + "loss_ce": 0.00011730578989954665, + "loss_iou": 0.1455078125, + "loss_num": 0.00579833984375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 291525764, + "step": 5203 + }, + { + "epoch": 11.590200445434299, + "grad_norm": 21.537437438964844, + "learning_rate": 1e-06, + "loss": 0.4525, + "num_input_tokens_seen": 291581816, + "step": 5204 + }, + { + "epoch": 11.590200445434299, + "loss": 0.4407280683517456, + "loss_ce": 0.0001763150212354958, + "loss_iou": 0.181640625, + "loss_num": 0.0155029296875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 291581816, + "step": 5204 + }, + { + "epoch": 11.592427616926503, + "grad_norm": 17.791162490844727, + "learning_rate": 1e-06, + "loss": 0.4189, + "num_input_tokens_seen": 291640008, + "step": 5205 + }, + { + "epoch": 11.592427616926503, + "loss": 0.45544835925102234, + "loss_ce": 0.0001261141151189804, + "loss_iou": 0.20703125, + "loss_num": 0.00836181640625, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 291640008, + "step": 5205 + }, + { + "epoch": 11.594654788418708, + "grad_norm": 15.262166976928711, + "learning_rate": 1e-06, + "loss": 0.4908, + "num_input_tokens_seen": 291696576, + "step": 5206 + }, + { + "epoch": 11.594654788418708, + "loss": 0.5726983547210693, + "loss_ce": 0.00012755353236570954, + "loss_iou": 0.2470703125, + "loss_num": 0.0157470703125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 291696576, + "step": 5206 + }, + { + "epoch": 11.596881959910913, + "grad_norm": 18.53944969177246, + "learning_rate": 1e-06, + "loss": 0.4826, + "num_input_tokens_seen": 291753164, + "step": 5207 + }, + { + "epoch": 11.596881959910913, + "loss": 0.5484709143638611, + "loss_ce": 0.0001310570805799216, + "loss_iou": 0.2236328125, + "loss_num": 0.0205078125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 291753164, + "step": 5207 + }, + { + "epoch": 11.599109131403118, + "grad_norm": 20.32893943786621, + "learning_rate": 1e-06, + "loss": 0.5119, + "num_input_tokens_seen": 291807164, + "step": 5208 + }, + { + "epoch": 11.599109131403118, + "loss": 0.6429668664932251, + "loss_ce": 0.00014457208453677595, + "loss_iou": 0.2734375, + "loss_num": 0.019287109375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 291807164, + "step": 5208 + }, + { + "epoch": 11.601336302895323, + "grad_norm": 16.207355499267578, + "learning_rate": 1e-06, + "loss": 0.3707, + "num_input_tokens_seen": 291861832, + "step": 5209 + }, + { + "epoch": 11.601336302895323, + "loss": 0.35583117604255676, + "loss_ce": 0.00011830384755739942, + "loss_iou": 0.1591796875, + "loss_num": 0.007354736328125, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 291861832, + "step": 5209 + }, + { + "epoch": 11.603563474387528, + "grad_norm": 20.643909454345703, + "learning_rate": 1e-06, + "loss": 0.3916, + "num_input_tokens_seen": 291919612, + "step": 5210 + }, + { + "epoch": 11.603563474387528, + "loss": 0.40422919392585754, + "loss_ce": 0.0001764479384291917, + "loss_iou": 0.162109375, + "loss_num": 0.015869140625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 291919612, + "step": 5210 + }, + { + "epoch": 11.605790645879733, + "grad_norm": 18.431650161743164, + "learning_rate": 1e-06, + "loss": 0.5809, + "num_input_tokens_seen": 291976832, + "step": 5211 + }, + { + "epoch": 11.605790645879733, + "loss": 0.5501755475997925, + "loss_ce": 0.00012670463183894753, + "loss_iou": 0.2490234375, + "loss_num": 0.010498046875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 291976832, + "step": 5211 + }, + { + "epoch": 11.608017817371937, + "grad_norm": 24.48961639404297, + "learning_rate": 1e-06, + "loss": 0.5894, + "num_input_tokens_seen": 292033284, + "step": 5212 + }, + { + "epoch": 11.608017817371937, + "loss": 0.565216064453125, + "loss_ce": 0.00015255842299666256, + "loss_iou": 0.248046875, + "loss_num": 0.0135498046875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 292033284, + "step": 5212 + }, + { + "epoch": 11.610244988864142, + "grad_norm": 16.848508834838867, + "learning_rate": 1e-06, + "loss": 0.404, + "num_input_tokens_seen": 292086472, + "step": 5213 + }, + { + "epoch": 11.610244988864142, + "loss": 0.3769487142562866, + "loss_ce": 0.0001176485966425389, + "loss_iou": 0.169921875, + "loss_num": 0.007354736328125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 292086472, + "step": 5213 + }, + { + "epoch": 11.612472160356347, + "grad_norm": 26.47736167907715, + "learning_rate": 1e-06, + "loss": 0.5472, + "num_input_tokens_seen": 292141628, + "step": 5214 + }, + { + "epoch": 11.612472160356347, + "loss": 0.6076180338859558, + "loss_ce": 0.00013512838631868362, + "loss_iou": 0.26953125, + "loss_num": 0.01361083984375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 292141628, + "step": 5214 + }, + { + "epoch": 11.614699331848552, + "grad_norm": 20.124786376953125, + "learning_rate": 1e-06, + "loss": 0.5394, + "num_input_tokens_seen": 292195460, + "step": 5215 + }, + { + "epoch": 11.614699331848552, + "loss": 0.746225118637085, + "loss_ce": 0.00013138932990841568, + "loss_iou": 0.33203125, + "loss_num": 0.0166015625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 292195460, + "step": 5215 + }, + { + "epoch": 11.616926503340757, + "grad_norm": 16.484128952026367, + "learning_rate": 1e-06, + "loss": 0.5526, + "num_input_tokens_seen": 292251864, + "step": 5216 + }, + { + "epoch": 11.616926503340757, + "loss": 0.5467870831489563, + "loss_ce": 0.00015625265950802714, + "loss_iou": 0.22265625, + "loss_num": 0.0201416015625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 292251864, + "step": 5216 + }, + { + "epoch": 11.619153674832962, + "grad_norm": 24.076684951782227, + "learning_rate": 1e-06, + "loss": 0.6972, + "num_input_tokens_seen": 292304412, + "step": 5217 + }, + { + "epoch": 11.619153674832962, + "loss": 0.6573572158813477, + "loss_ce": 0.00013063887308817357, + "loss_iou": 0.2890625, + "loss_num": 0.01611328125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 292304412, + "step": 5217 + }, + { + "epoch": 11.621380846325167, + "grad_norm": 20.732887268066406, + "learning_rate": 1e-06, + "loss": 0.5674, + "num_input_tokens_seen": 292360272, + "step": 5218 + }, + { + "epoch": 11.621380846325167, + "loss": 0.6395326256752014, + "loss_ce": 0.00012831481581088156, + "loss_iou": 0.27734375, + "loss_num": 0.017333984375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 292360272, + "step": 5218 + }, + { + "epoch": 11.623608017817372, + "grad_norm": 19.97538948059082, + "learning_rate": 1e-06, + "loss": 0.4222, + "num_input_tokens_seen": 292415788, + "step": 5219 + }, + { + "epoch": 11.623608017817372, + "loss": 0.3983200788497925, + "loss_ce": 0.00012667715782299638, + "loss_iou": 0.1845703125, + "loss_num": 0.0057373046875, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 292415788, + "step": 5219 + }, + { + "epoch": 11.625835189309576, + "grad_norm": 22.856096267700195, + "learning_rate": 1e-06, + "loss": 0.5726, + "num_input_tokens_seen": 292467908, + "step": 5220 + }, + { + "epoch": 11.625835189309576, + "loss": 0.6926459074020386, + "loss_ce": 0.00014099694089964032, + "loss_iou": 0.310546875, + "loss_num": 0.0147705078125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 292467908, + "step": 5220 + }, + { + "epoch": 11.628062360801781, + "grad_norm": 15.5626859664917, + "learning_rate": 1e-06, + "loss": 0.6509, + "num_input_tokens_seen": 292523568, + "step": 5221 + }, + { + "epoch": 11.628062360801781, + "loss": 0.6292951107025146, + "loss_ce": 0.0001447499671485275, + "loss_iou": 0.296875, + "loss_num": 0.00738525390625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 292523568, + "step": 5221 + }, + { + "epoch": 11.630289532293986, + "grad_norm": 38.288299560546875, + "learning_rate": 1e-06, + "loss": 0.5488, + "num_input_tokens_seen": 292577496, + "step": 5222 + }, + { + "epoch": 11.630289532293986, + "loss": 0.6104898452758789, + "loss_ce": 0.00013826916983816773, + "loss_iou": 0.2451171875, + "loss_num": 0.02392578125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 292577496, + "step": 5222 + }, + { + "epoch": 11.632516703786191, + "grad_norm": 25.399972915649414, + "learning_rate": 1e-06, + "loss": 0.482, + "num_input_tokens_seen": 292633528, + "step": 5223 + }, + { + "epoch": 11.632516703786191, + "loss": 0.5387084484100342, + "loss_ce": 0.00013427785597741604, + "loss_iou": 0.2412109375, + "loss_num": 0.010986328125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 292633528, + "step": 5223 + }, + { + "epoch": 11.634743875278396, + "grad_norm": 85.37445831298828, + "learning_rate": 1e-06, + "loss": 0.5576, + "num_input_tokens_seen": 292688036, + "step": 5224 + }, + { + "epoch": 11.634743875278396, + "loss": 0.7559876441955566, + "loss_ce": 0.00012828274338971823, + "loss_iou": 0.298828125, + "loss_num": 0.031494140625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 292688036, + "step": 5224 + }, + { + "epoch": 11.6369710467706, + "grad_norm": 54.62841796875, + "learning_rate": 1e-06, + "loss": 0.4919, + "num_input_tokens_seen": 292745052, + "step": 5225 + }, + { + "epoch": 11.6369710467706, + "loss": 0.5994965434074402, + "loss_ce": 0.00013131627929396927, + "loss_iou": 0.265625, + "loss_num": 0.0140380859375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 292745052, + "step": 5225 + }, + { + "epoch": 11.639198218262806, + "grad_norm": 13.105633735656738, + "learning_rate": 1e-06, + "loss": 0.3966, + "num_input_tokens_seen": 292803352, + "step": 5226 + }, + { + "epoch": 11.639198218262806, + "loss": 0.29151397943496704, + "loss_ce": 0.0001321233285125345, + "loss_iou": 0.134765625, + "loss_num": 0.00457763671875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 292803352, + "step": 5226 + }, + { + "epoch": 11.64142538975501, + "grad_norm": 30.192888259887695, + "learning_rate": 1e-06, + "loss": 0.5925, + "num_input_tokens_seen": 292859900, + "step": 5227 + }, + { + "epoch": 11.64142538975501, + "loss": 0.59175705909729, + "loss_ce": 0.00020430152653716505, + "loss_iou": 0.236328125, + "loss_num": 0.0238037109375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 292859900, + "step": 5227 + }, + { + "epoch": 11.643652561247215, + "grad_norm": 21.47431755065918, + "learning_rate": 1e-06, + "loss": 0.4515, + "num_input_tokens_seen": 292916668, + "step": 5228 + }, + { + "epoch": 11.643652561247215, + "loss": 0.5364910364151001, + "loss_ce": 0.00011407821148168296, + "loss_iou": 0.228515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 292916668, + "step": 5228 + }, + { + "epoch": 11.64587973273942, + "grad_norm": 29.797399520874023, + "learning_rate": 1e-06, + "loss": 0.4839, + "num_input_tokens_seen": 292969696, + "step": 5229 + }, + { + "epoch": 11.64587973273942, + "loss": 0.41816890239715576, + "loss_ce": 0.00010860178736038506, + "loss_iou": 0.1796875, + "loss_num": 0.01177978515625, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 292969696, + "step": 5229 + }, + { + "epoch": 11.648106904231625, + "grad_norm": 11.707711219787598, + "learning_rate": 1e-06, + "loss": 0.456, + "num_input_tokens_seen": 293027520, + "step": 5230 + }, + { + "epoch": 11.648106904231625, + "loss": 0.5190684199333191, + "loss_ce": 0.00014752443530596793, + "loss_iou": 0.21875, + "loss_num": 0.0162353515625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 293027520, + "step": 5230 + }, + { + "epoch": 11.65033407572383, + "grad_norm": 29.20980453491211, + "learning_rate": 1e-06, + "loss": 0.53, + "num_input_tokens_seen": 293085036, + "step": 5231 + }, + { + "epoch": 11.65033407572383, + "loss": 0.42614778876304626, + "loss_ce": 0.00012240000069141388, + "loss_iou": 0.1787109375, + "loss_num": 0.013671875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 293085036, + "step": 5231 + }, + { + "epoch": 11.652561247216035, + "grad_norm": 39.99692916870117, + "learning_rate": 1e-06, + "loss": 0.7307, + "num_input_tokens_seen": 293139624, + "step": 5232 + }, + { + "epoch": 11.652561247216035, + "loss": 0.8740052580833435, + "loss_ce": 0.00022594796610064805, + "loss_iou": 0.361328125, + "loss_num": 0.0301513671875, + "loss_xval": 0.875, + "num_input_tokens_seen": 293139624, + "step": 5232 + }, + { + "epoch": 11.654788418708241, + "grad_norm": 13.025435447692871, + "learning_rate": 1e-06, + "loss": 0.3551, + "num_input_tokens_seen": 293195124, + "step": 5233 + }, + { + "epoch": 11.654788418708241, + "loss": 0.299808144569397, + "loss_ce": 0.00018654628365766257, + "loss_iou": 0.1279296875, + "loss_num": 0.00872802734375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 293195124, + "step": 5233 + }, + { + "epoch": 11.657015590200446, + "grad_norm": 18.018341064453125, + "learning_rate": 1e-06, + "loss": 0.4124, + "num_input_tokens_seen": 293250064, + "step": 5234 + }, + { + "epoch": 11.657015590200446, + "loss": 0.48889029026031494, + "loss_ce": 0.00012073374819010496, + "loss_iou": 0.2216796875, + "loss_num": 0.009033203125, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 293250064, + "step": 5234 + }, + { + "epoch": 11.659242761692651, + "grad_norm": 17.814626693725586, + "learning_rate": 1e-06, + "loss": 0.5089, + "num_input_tokens_seen": 293302392, + "step": 5235 + }, + { + "epoch": 11.659242761692651, + "loss": 0.4006240665912628, + "loss_ce": 0.00011137685942230746, + "loss_iou": 0.162109375, + "loss_num": 0.01507568359375, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 293302392, + "step": 5235 + }, + { + "epoch": 11.661469933184856, + "grad_norm": 25.53102684020996, + "learning_rate": 1e-06, + "loss": 0.3704, + "num_input_tokens_seen": 293356928, + "step": 5236 + }, + { + "epoch": 11.661469933184856, + "loss": 0.36693528294563293, + "loss_ce": 0.00011399855429772288, + "loss_iou": 0.146484375, + "loss_num": 0.0146484375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 293356928, + "step": 5236 + }, + { + "epoch": 11.66369710467706, + "grad_norm": 18.298686981201172, + "learning_rate": 1e-06, + "loss": 0.5776, + "num_input_tokens_seen": 293410648, + "step": 5237 + }, + { + "epoch": 11.66369710467706, + "loss": 0.4626512825489044, + "loss_ce": 0.00012686576519627124, + "loss_iou": 0.2021484375, + "loss_num": 0.0115966796875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 293410648, + "step": 5237 + }, + { + "epoch": 11.665924276169266, + "grad_norm": 11.563762664794922, + "learning_rate": 1e-06, + "loss": 0.3882, + "num_input_tokens_seen": 293463284, + "step": 5238 + }, + { + "epoch": 11.665924276169266, + "loss": 0.2963898777961731, + "loss_ce": 0.00012523468467406929, + "loss_iou": 0.11865234375, + "loss_num": 0.01171875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 293463284, + "step": 5238 + }, + { + "epoch": 11.66815144766147, + "grad_norm": 16.8227596282959, + "learning_rate": 1e-06, + "loss": 0.6908, + "num_input_tokens_seen": 293517264, + "step": 5239 + }, + { + "epoch": 11.66815144766147, + "loss": 0.7157348394393921, + "loss_ce": 0.0001587077567819506, + "loss_iou": 0.30078125, + "loss_num": 0.0233154296875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 293517264, + "step": 5239 + }, + { + "epoch": 11.670378619153675, + "grad_norm": 17.42998695373535, + "learning_rate": 1e-06, + "loss": 0.4203, + "num_input_tokens_seen": 293572600, + "step": 5240 + }, + { + "epoch": 11.670378619153675, + "loss": 0.5567784905433655, + "loss_ce": 0.0001378921151626855, + "loss_iou": 0.2353515625, + "loss_num": 0.0172119140625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 293572600, + "step": 5240 + }, + { + "epoch": 11.67260579064588, + "grad_norm": 21.24955177307129, + "learning_rate": 1e-06, + "loss": 0.512, + "num_input_tokens_seen": 293628344, + "step": 5241 + }, + { + "epoch": 11.67260579064588, + "loss": 0.3846355080604553, + "loss_ce": 0.00011400008224882185, + "loss_iou": 0.1494140625, + "loss_num": 0.0172119140625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 293628344, + "step": 5241 + }, + { + "epoch": 11.674832962138085, + "grad_norm": 27.092111587524414, + "learning_rate": 1e-06, + "loss": 0.6355, + "num_input_tokens_seen": 293684796, + "step": 5242 + }, + { + "epoch": 11.674832962138085, + "loss": 0.6805571913719177, + "loss_ce": 0.0001372963743051514, + "loss_iou": 0.306640625, + "loss_num": 0.01373291015625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 293684796, + "step": 5242 + }, + { + "epoch": 11.67706013363029, + "grad_norm": 16.437114715576172, + "learning_rate": 1e-06, + "loss": 0.4618, + "num_input_tokens_seen": 293740992, + "step": 5243 + }, + { + "epoch": 11.67706013363029, + "loss": 0.3265749514102936, + "loss_ce": 0.00012842280557379127, + "loss_iou": 0.14453125, + "loss_num": 0.00732421875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 293740992, + "step": 5243 + }, + { + "epoch": 11.679287305122495, + "grad_norm": 23.96824836730957, + "learning_rate": 1e-06, + "loss": 0.6494, + "num_input_tokens_seen": 293795756, + "step": 5244 + }, + { + "epoch": 11.679287305122495, + "loss": 0.47252053022384644, + "loss_ce": 0.00010840976028703153, + "loss_iou": 0.2001953125, + "loss_num": 0.01416015625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 293795756, + "step": 5244 + }, + { + "epoch": 11.6815144766147, + "grad_norm": 24.439743041992188, + "learning_rate": 1e-06, + "loss": 0.5064, + "num_input_tokens_seen": 293854164, + "step": 5245 + }, + { + "epoch": 11.6815144766147, + "loss": 0.6212034821510315, + "loss_ce": 0.00010969607683364302, + "loss_iou": 0.27734375, + "loss_num": 0.013671875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 293854164, + "step": 5245 + }, + { + "epoch": 11.683741648106905, + "grad_norm": 14.390751838684082, + "learning_rate": 1e-06, + "loss": 0.5511, + "num_input_tokens_seen": 293910360, + "step": 5246 + }, + { + "epoch": 11.683741648106905, + "loss": 0.42605873942375183, + "loss_ce": 0.00027748823049478233, + "loss_iou": 0.2001953125, + "loss_num": 0.005126953125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 293910360, + "step": 5246 + }, + { + "epoch": 11.68596881959911, + "grad_norm": 24.848167419433594, + "learning_rate": 1e-06, + "loss": 0.4161, + "num_input_tokens_seen": 293969128, + "step": 5247 + }, + { + "epoch": 11.68596881959911, + "loss": 0.38915523886680603, + "loss_ce": 0.0001171459662145935, + "loss_iou": 0.171875, + "loss_num": 0.009033203125, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 293969128, + "step": 5247 + }, + { + "epoch": 11.688195991091314, + "grad_norm": 18.111801147460938, + "learning_rate": 1e-06, + "loss": 0.4344, + "num_input_tokens_seen": 294024516, + "step": 5248 + }, + { + "epoch": 11.688195991091314, + "loss": 0.5797381401062012, + "loss_ce": 0.0001482985680922866, + "loss_iou": 0.244140625, + "loss_num": 0.01806640625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 294024516, + "step": 5248 + }, + { + "epoch": 11.690423162583519, + "grad_norm": 18.03676414489746, + "learning_rate": 1e-06, + "loss": 0.543, + "num_input_tokens_seen": 294076868, + "step": 5249 + }, + { + "epoch": 11.690423162583519, + "loss": 0.41762179136276245, + "loss_ce": 0.0001413249410688877, + "loss_iou": 0.1845703125, + "loss_num": 0.009521484375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 294076868, + "step": 5249 + }, + { + "epoch": 11.692650334075724, + "grad_norm": 12.850332260131836, + "learning_rate": 1e-06, + "loss": 0.401, + "num_input_tokens_seen": 294132900, + "step": 5250 + }, + { + "epoch": 11.692650334075724, + "eval_seeclick_web_CIoU": 0.5772451758384705, + "eval_seeclick_web_GIoU": 0.5742323994636536, + "eval_seeclick_web_IoU": 0.5963829755783081, + "eval_seeclick_web_MAE_all": 0.015578721649944782, + "eval_seeclick_web_MAE_h": 0.007713136961683631, + "eval_seeclick_web_MAE_w": 0.015674122143536806, + "eval_seeclick_web_MAE_x_boxes": 0.009651401545852423, + "eval_seeclick_web_MAE_y_boxes": 0.021532843122258782, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.929573655128479, + "eval_seeclick_web_loss_ce": 0.00019275367230875418, + "eval_seeclick_web_loss_iou": 0.423828125, + "eval_seeclick_web_loss_num": 0.012483596801757812, + "eval_seeclick_web_loss_xval": 0.909912109375, + "eval_seeclick_web_runtime": 21.994, + "eval_seeclick_web_samples_per_second": 2.273, + "eval_seeclick_web_steps_per_second": 0.091, + "num_input_tokens_seen": 294132900, + "step": 5250 + }, + { + "epoch": 11.692650334075724, + "eval_icons_CIoU": 0.2656567245721817, + "eval_icons_GIoU": 0.2912362068891525, + "eval_icons_IoU": 0.35016924142837524, + "eval_icons_MAE_all": 0.06389489211142063, + "eval_icons_MAE_h": 0.04006360750645399, + "eval_icons_MAE_w": 0.0678694061934948, + "eval_icons_MAE_x_boxes": 0.059559810906648636, + "eval_icons_MAE_y_boxes": 0.03940416965633631, + "eval_icons_inside_bbox": 0.578125, + "eval_icons_loss": 1.754642128944397, + "eval_icons_loss_ce": 0.000254698476055637, + "eval_icons_loss_iou": 0.68310546875, + "eval_icons_loss_num": 0.06172943115234375, + "eval_icons_loss_xval": 1.67333984375, + "eval_icons_runtime": 20.5945, + "eval_icons_samples_per_second": 2.428, + "eval_icons_steps_per_second": 0.097, + "num_input_tokens_seen": 294132900, + "step": 5250 + }, + { + "epoch": 11.692650334075724, + "eval_screenspot_CIoU": 0.3591058651606242, + "eval_screenspot_GIoU": 0.3725058138370514, + "eval_screenspot_IoU": 0.4368898868560791, + "eval_screenspot_MAE_all": 0.05924547587831815, + "eval_screenspot_MAE_h": 0.039297002057234444, + "eval_screenspot_MAE_w": 0.06727503115932147, + "eval_screenspot_MAE_x_boxes": 0.07256409463783105, + "eval_screenspot_MAE_y_boxes": 0.04037608547757069, + "eval_screenspot_inside_bbox": 0.6862499912579855, + "eval_screenspot_loss": 1.6120883226394653, + "eval_screenspot_loss_ce": 0.0002683925946863989, + "eval_screenspot_loss_iou": 0.66650390625, + "eval_screenspot_loss_num": 0.06815338134765625, + "eval_screenspot_loss_xval": 1.6746419270833333, + "eval_screenspot_runtime": 35.2255, + "eval_screenspot_samples_per_second": 2.527, + "eval_screenspot_steps_per_second": 0.085, + "num_input_tokens_seen": 294132900, + "step": 5250 + }, + { + "epoch": 11.692650334075724, + "eval_compot_CIoU": 0.34088848531246185, + "eval_compot_GIoU": 0.3492155075073242, + "eval_compot_IoU": 0.400636687874794, + "eval_compot_MAE_all": 0.01801011897623539, + "eval_compot_MAE_h": 0.009378379676491022, + "eval_compot_MAE_w": 0.021228870376944542, + "eval_compot_MAE_x_boxes": 0.02999929618090391, + "eval_compot_MAE_y_boxes": 0.007140443194657564, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.4145660400390625, + "eval_compot_loss_ce": 0.00018989732052432373, + "eval_compot_loss_iou": 0.651611328125, + "eval_compot_loss_num": 0.016811370849609375, + "eval_compot_loss_xval": 1.387451171875, + "eval_compot_runtime": 22.0109, + "eval_compot_samples_per_second": 2.272, + "eval_compot_steps_per_second": 0.091, + "num_input_tokens_seen": 294132900, + "step": 5250 + }, + { + "epoch": 11.692650334075724, + "eval_custom_ui_val_CIoU": 0.4712279670768314, + "eval_custom_ui_val_GIoU": 0.48230206304126316, + "eval_custom_ui_val_IoU": 0.533691535393397, + "eval_custom_ui_val_MAE_all": 0.030553390168481402, + "eval_custom_ui_val_MAE_h": 0.016979538809715047, + "eval_custom_ui_val_MAE_w": 0.039266514798833266, + "eval_custom_ui_val_MAE_x_boxes": 0.03789379137257735, + "eval_custom_ui_val_MAE_y_boxes": 0.015024640881973837, + "eval_custom_ui_val_inside_bbox": 0.7685185207260979, + "eval_custom_ui_val_loss": 1.2054945230484009, + "eval_custom_ui_val_loss_ce": 0.0002350941342431017, + "eval_custom_ui_val_loss_iou": 0.5133327907986112, + "eval_custom_ui_val_loss_num": 0.027909808688693576, + "eval_custom_ui_val_loss_xval": 1.1665581597222223, + "eval_custom_ui_val_runtime": 59.6121, + "eval_custom_ui_val_samples_per_second": 4.445, + "eval_custom_ui_val_steps_per_second": 0.151, + "num_input_tokens_seen": 294132900, + "step": 5250 + }, + { + "epoch": 11.692650334075724, + "loss": 0.9047340154647827, + "loss_ce": 0.0001930339785758406, + "loss_iou": 0.390625, + "loss_num": 0.0242919921875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 294132900, + "step": 5250 + }, + { + "epoch": 11.694877505567929, + "grad_norm": 22.63847541809082, + "learning_rate": 1e-06, + "loss": 0.4242, + "num_input_tokens_seen": 294188952, + "step": 5251 + }, + { + "epoch": 11.694877505567929, + "loss": 0.36535871028900146, + "loss_ce": 0.00012433102529030293, + "loss_iou": 0.1552734375, + "loss_num": 0.0108642578125, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 294188952, + "step": 5251 + }, + { + "epoch": 11.697104677060134, + "grad_norm": 23.704172134399414, + "learning_rate": 1e-06, + "loss": 0.4967, + "num_input_tokens_seen": 294246668, + "step": 5252 + }, + { + "epoch": 11.697104677060134, + "loss": 0.47535350918769836, + "loss_ce": 0.00013378591393120587, + "loss_iou": 0.2197265625, + "loss_num": 0.0069580078125, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 294246668, + "step": 5252 + }, + { + "epoch": 11.699331848552339, + "grad_norm": 64.23724365234375, + "learning_rate": 1e-06, + "loss": 0.5925, + "num_input_tokens_seen": 294303032, + "step": 5253 + }, + { + "epoch": 11.699331848552339, + "loss": 0.4765591025352478, + "loss_ce": 0.00011863937834277749, + "loss_iou": 0.185546875, + "loss_num": 0.02099609375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 294303032, + "step": 5253 + }, + { + "epoch": 11.701559020044543, + "grad_norm": 19.613231658935547, + "learning_rate": 1e-06, + "loss": 0.5125, + "num_input_tokens_seen": 294360592, + "step": 5254 + }, + { + "epoch": 11.701559020044543, + "loss": 0.5249412059783936, + "loss_ce": 0.0001609077153261751, + "loss_iou": 0.2216796875, + "loss_num": 0.0162353515625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 294360592, + "step": 5254 + }, + { + "epoch": 11.703786191536748, + "grad_norm": 18.052928924560547, + "learning_rate": 1e-06, + "loss": 0.4107, + "num_input_tokens_seen": 294416216, + "step": 5255 + }, + { + "epoch": 11.703786191536748, + "loss": 0.3549973666667938, + "loss_ce": 0.00013897998724132776, + "loss_iou": 0.158203125, + "loss_num": 0.007659912109375, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 294416216, + "step": 5255 + }, + { + "epoch": 11.706013363028953, + "grad_norm": 20.044328689575195, + "learning_rate": 1e-06, + "loss": 0.6376, + "num_input_tokens_seen": 294473684, + "step": 5256 + }, + { + "epoch": 11.706013363028953, + "loss": 0.5567925572395325, + "loss_ce": 0.00015196386084426194, + "loss_iou": 0.2294921875, + "loss_num": 0.019775390625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 294473684, + "step": 5256 + }, + { + "epoch": 11.708240534521158, + "grad_norm": 32.992637634277344, + "learning_rate": 1e-06, + "loss": 0.4129, + "num_input_tokens_seen": 294531200, + "step": 5257 + }, + { + "epoch": 11.708240534521158, + "loss": 0.41336357593536377, + "loss_ce": 0.00015554996207356453, + "loss_iou": 0.19140625, + "loss_num": 0.00604248046875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 294531200, + "step": 5257 + }, + { + "epoch": 11.710467706013363, + "grad_norm": 17.366310119628906, + "learning_rate": 1e-06, + "loss": 0.3378, + "num_input_tokens_seen": 294587048, + "step": 5258 + }, + { + "epoch": 11.710467706013363, + "loss": 0.3781304359436035, + "loss_ce": 0.00020075507927685976, + "loss_iou": 0.1767578125, + "loss_num": 0.00494384765625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 294587048, + "step": 5258 + }, + { + "epoch": 11.712694877505568, + "grad_norm": 19.407751083374023, + "learning_rate": 1e-06, + "loss": 0.55, + "num_input_tokens_seen": 294645188, + "step": 5259 + }, + { + "epoch": 11.712694877505568, + "loss": 0.6682122349739075, + "loss_ce": 0.00012143873027525842, + "loss_iou": 0.283203125, + "loss_num": 0.0203857421875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 294645188, + "step": 5259 + }, + { + "epoch": 11.714922048997773, + "grad_norm": 27.0356388092041, + "learning_rate": 1e-06, + "loss": 0.5196, + "num_input_tokens_seen": 294702920, + "step": 5260 + }, + { + "epoch": 11.714922048997773, + "loss": 0.5554238557815552, + "loss_ce": 0.00012605905067175627, + "loss_iou": 0.240234375, + "loss_num": 0.01507568359375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 294702920, + "step": 5260 + }, + { + "epoch": 11.717149220489977, + "grad_norm": 25.279115676879883, + "learning_rate": 1e-06, + "loss": 0.5789, + "num_input_tokens_seen": 294760020, + "step": 5261 + }, + { + "epoch": 11.717149220489977, + "loss": 0.6674556732177734, + "loss_ce": 0.0002193464315496385, + "loss_iou": 0.283203125, + "loss_num": 0.0205078125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 294760020, + "step": 5261 + }, + { + "epoch": 11.719376391982182, + "grad_norm": 28.12050437927246, + "learning_rate": 1e-06, + "loss": 0.4775, + "num_input_tokens_seen": 294815096, + "step": 5262 + }, + { + "epoch": 11.719376391982182, + "loss": 0.4449352025985718, + "loss_ce": 0.00011096900561824441, + "loss_iou": 0.2001953125, + "loss_num": 0.0089111328125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 294815096, + "step": 5262 + }, + { + "epoch": 11.721603563474387, + "grad_norm": 15.876343727111816, + "learning_rate": 1e-06, + "loss": 0.6174, + "num_input_tokens_seen": 294872920, + "step": 5263 + }, + { + "epoch": 11.721603563474387, + "loss": 0.6857922077178955, + "loss_ce": 0.0003674498002510518, + "loss_iou": 0.30078125, + "loss_num": 0.0166015625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 294872920, + "step": 5263 + }, + { + "epoch": 11.723830734966592, + "grad_norm": 19.064332962036133, + "learning_rate": 1e-06, + "loss": 0.4169, + "num_input_tokens_seen": 294929388, + "step": 5264 + }, + { + "epoch": 11.723830734966592, + "loss": 0.2714940309524536, + "loss_ce": 0.00013172137551009655, + "loss_iou": 0.11279296875, + "loss_num": 0.00909423828125, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 294929388, + "step": 5264 + }, + { + "epoch": 11.726057906458797, + "grad_norm": 19.456663131713867, + "learning_rate": 1e-06, + "loss": 0.4162, + "num_input_tokens_seen": 294983048, + "step": 5265 + }, + { + "epoch": 11.726057906458797, + "loss": 0.4633742868900299, + "loss_ce": 0.00011746423842851073, + "loss_iou": 0.2021484375, + "loss_num": 0.01165771484375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 294983048, + "step": 5265 + }, + { + "epoch": 11.728285077951002, + "grad_norm": 18.227783203125, + "learning_rate": 1e-06, + "loss": 0.5663, + "num_input_tokens_seen": 295042996, + "step": 5266 + }, + { + "epoch": 11.728285077951002, + "loss": 0.4463956952095032, + "loss_ce": 0.00010662179556675255, + "loss_iou": 0.1748046875, + "loss_num": 0.0191650390625, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 295042996, + "step": 5266 + }, + { + "epoch": 11.730512249443207, + "grad_norm": 16.869722366333008, + "learning_rate": 1e-06, + "loss": 0.5432, + "num_input_tokens_seen": 295096444, + "step": 5267 + }, + { + "epoch": 11.730512249443207, + "loss": 0.6898572444915771, + "loss_ce": 0.00015996204456314445, + "loss_iou": 0.306640625, + "loss_num": 0.01531982421875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 295096444, + "step": 5267 + }, + { + "epoch": 11.732739420935411, + "grad_norm": 20.643428802490234, + "learning_rate": 1e-06, + "loss": 0.5397, + "num_input_tokens_seen": 295151264, + "step": 5268 + }, + { + "epoch": 11.732739420935411, + "loss": 0.45317167043685913, + "loss_ce": 0.00010773130634333938, + "loss_iou": 0.2060546875, + "loss_num": 0.00799560546875, + "loss_xval": 0.453125, + "num_input_tokens_seen": 295151264, + "step": 5268 + }, + { + "epoch": 11.734966592427616, + "grad_norm": 21.514039993286133, + "learning_rate": 1e-06, + "loss": 0.4523, + "num_input_tokens_seen": 295208360, + "step": 5269 + }, + { + "epoch": 11.734966592427616, + "loss": 0.41239604353904724, + "loss_ce": 0.0001646070450078696, + "loss_iou": 0.181640625, + "loss_num": 0.0096435546875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 295208360, + "step": 5269 + }, + { + "epoch": 11.737193763919821, + "grad_norm": 32.98282241821289, + "learning_rate": 1e-06, + "loss": 0.5202, + "num_input_tokens_seen": 295262584, + "step": 5270 + }, + { + "epoch": 11.737193763919821, + "loss": 0.4672822058200836, + "loss_ce": 0.00011912808258784935, + "loss_iou": 0.193359375, + "loss_num": 0.0162353515625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 295262584, + "step": 5270 + }, + { + "epoch": 11.739420935412026, + "grad_norm": 34.023441314697266, + "learning_rate": 1e-06, + "loss": 0.6488, + "num_input_tokens_seen": 295318340, + "step": 5271 + }, + { + "epoch": 11.739420935412026, + "loss": 0.686636209487915, + "loss_ce": 0.00011279142927378416, + "loss_iou": 0.3203125, + "loss_num": 0.00927734375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 295318340, + "step": 5271 + }, + { + "epoch": 11.74164810690423, + "grad_norm": 26.27116584777832, + "learning_rate": 1e-06, + "loss": 0.6936, + "num_input_tokens_seen": 295373900, + "step": 5272 + }, + { + "epoch": 11.74164810690423, + "loss": 0.806282639503479, + "loss_ce": 0.0001302346063312143, + "loss_iou": 0.345703125, + "loss_num": 0.0228271484375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 295373900, + "step": 5272 + }, + { + "epoch": 11.743875278396436, + "grad_norm": 23.026466369628906, + "learning_rate": 1e-06, + "loss": 0.3724, + "num_input_tokens_seen": 295429820, + "step": 5273 + }, + { + "epoch": 11.743875278396436, + "loss": 0.42163270711898804, + "loss_ce": 0.00012393189535941929, + "loss_iou": 0.19921875, + "loss_num": 0.0045166015625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 295429820, + "step": 5273 + }, + { + "epoch": 11.74610244988864, + "grad_norm": 22.444780349731445, + "learning_rate": 1e-06, + "loss": 0.6569, + "num_input_tokens_seen": 295488184, + "step": 5274 + }, + { + "epoch": 11.74610244988864, + "loss": 0.6977794766426086, + "loss_ce": 0.00014764037041459233, + "loss_iou": 0.298828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 295488184, + "step": 5274 + }, + { + "epoch": 11.748329621380847, + "grad_norm": 13.779067993164062, + "learning_rate": 1e-06, + "loss": 0.4753, + "num_input_tokens_seen": 295545832, + "step": 5275 + }, + { + "epoch": 11.748329621380847, + "loss": 0.5852853059768677, + "loss_ce": 0.0003243696701247245, + "loss_iou": 0.26171875, + "loss_num": 0.01275634765625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 295545832, + "step": 5275 + }, + { + "epoch": 11.750556792873052, + "grad_norm": 28.335033416748047, + "learning_rate": 1e-06, + "loss": 0.8353, + "num_input_tokens_seen": 295602708, + "step": 5276 + }, + { + "epoch": 11.750556792873052, + "loss": 0.6632611155509949, + "loss_ce": 0.0001752136304276064, + "loss_iou": 0.283203125, + "loss_num": 0.019287109375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 295602708, + "step": 5276 + }, + { + "epoch": 11.752783964365257, + "grad_norm": 17.9951114654541, + "learning_rate": 1e-06, + "loss": 0.5694, + "num_input_tokens_seen": 295661544, + "step": 5277 + }, + { + "epoch": 11.752783964365257, + "loss": 0.4410516023635864, + "loss_ce": 0.00013365020276978612, + "loss_iou": 0.205078125, + "loss_num": 0.00604248046875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 295661544, + "step": 5277 + }, + { + "epoch": 11.755011135857462, + "grad_norm": 18.01840591430664, + "learning_rate": 1e-06, + "loss": 0.5552, + "num_input_tokens_seen": 295718028, + "step": 5278 + }, + { + "epoch": 11.755011135857462, + "loss": 0.5536874532699585, + "loss_ce": 0.00015963352052494884, + "loss_iou": 0.255859375, + "loss_num": 0.00823974609375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 295718028, + "step": 5278 + }, + { + "epoch": 11.757238307349667, + "grad_norm": 16.344501495361328, + "learning_rate": 1e-06, + "loss": 0.494, + "num_input_tokens_seen": 295774608, + "step": 5279 + }, + { + "epoch": 11.757238307349667, + "loss": 0.4308076500892639, + "loss_ce": 0.000143599376315251, + "loss_iou": 0.1982421875, + "loss_num": 0.00677490234375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 295774608, + "step": 5279 + }, + { + "epoch": 11.759465478841872, + "grad_norm": 16.951433181762695, + "learning_rate": 1e-06, + "loss": 0.4982, + "num_input_tokens_seen": 295830944, + "step": 5280 + }, + { + "epoch": 11.759465478841872, + "loss": 0.5626462697982788, + "loss_ce": 0.00014624299365095794, + "loss_iou": 0.2421875, + "loss_num": 0.0157470703125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 295830944, + "step": 5280 + }, + { + "epoch": 11.761692650334076, + "grad_norm": 16.005573272705078, + "learning_rate": 1e-06, + "loss": 0.5351, + "num_input_tokens_seen": 295887036, + "step": 5281 + }, + { + "epoch": 11.761692650334076, + "loss": 0.3308425545692444, + "loss_ce": 0.00015408273611683398, + "loss_iou": 0.140625, + "loss_num": 0.00982666015625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 295887036, + "step": 5281 + }, + { + "epoch": 11.763919821826281, + "grad_norm": 24.683542251586914, + "learning_rate": 1e-06, + "loss": 0.4432, + "num_input_tokens_seen": 295942760, + "step": 5282 + }, + { + "epoch": 11.763919821826281, + "loss": 0.432136595249176, + "loss_ce": 0.00012974410492461175, + "loss_iou": 0.1943359375, + "loss_num": 0.00860595703125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 295942760, + "step": 5282 + }, + { + "epoch": 11.766146993318486, + "grad_norm": 14.518014907836914, + "learning_rate": 1e-06, + "loss": 0.4037, + "num_input_tokens_seen": 295998632, + "step": 5283 + }, + { + "epoch": 11.766146993318486, + "loss": 0.4797380268573761, + "loss_ce": 0.00012378576502669603, + "loss_iou": 0.20703125, + "loss_num": 0.01300048828125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 295998632, + "step": 5283 + }, + { + "epoch": 11.768374164810691, + "grad_norm": 21.187023162841797, + "learning_rate": 1e-06, + "loss": 0.4499, + "num_input_tokens_seen": 296051864, + "step": 5284 + }, + { + "epoch": 11.768374164810691, + "loss": 0.38384902477264404, + "loss_ce": 0.00018203401123173535, + "loss_iou": 0.17578125, + "loss_num": 0.006195068359375, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 296051864, + "step": 5284 + }, + { + "epoch": 11.770601336302896, + "grad_norm": 34.56988525390625, + "learning_rate": 1e-06, + "loss": 0.4661, + "num_input_tokens_seen": 296106776, + "step": 5285 + }, + { + "epoch": 11.770601336302896, + "loss": 0.44788599014282227, + "loss_ce": 0.00013207312440499663, + "loss_iou": 0.1962890625, + "loss_num": 0.010986328125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 296106776, + "step": 5285 + }, + { + "epoch": 11.7728285077951, + "grad_norm": 15.454012870788574, + "learning_rate": 1e-06, + "loss": 0.7173, + "num_input_tokens_seen": 296162332, + "step": 5286 + }, + { + "epoch": 11.7728285077951, + "loss": 0.5844168066978455, + "loss_ce": 0.0001882594224298373, + "loss_iou": 0.23828125, + "loss_num": 0.021728515625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 296162332, + "step": 5286 + }, + { + "epoch": 11.775055679287306, + "grad_norm": 16.151887893676758, + "learning_rate": 1e-06, + "loss": 0.5455, + "num_input_tokens_seen": 296219860, + "step": 5287 + }, + { + "epoch": 11.775055679287306, + "loss": 0.5150243043899536, + "loss_ce": 0.00013172382023185492, + "loss_iou": 0.234375, + "loss_num": 0.00933837890625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 296219860, + "step": 5287 + }, + { + "epoch": 11.77728285077951, + "grad_norm": 37.95972442626953, + "learning_rate": 1e-06, + "loss": 0.5543, + "num_input_tokens_seen": 296273744, + "step": 5288 + }, + { + "epoch": 11.77728285077951, + "loss": 0.5362709760665894, + "loss_ce": 0.00013816248974762857, + "loss_iou": 0.2333984375, + "loss_num": 0.01385498046875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 296273744, + "step": 5288 + }, + { + "epoch": 11.779510022271715, + "grad_norm": 30.687088012695312, + "learning_rate": 1e-06, + "loss": 0.465, + "num_input_tokens_seen": 296330476, + "step": 5289 + }, + { + "epoch": 11.779510022271715, + "loss": 0.575337290763855, + "loss_ce": 0.0001419962791260332, + "loss_iou": 0.25, + "loss_num": 0.01495361328125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 296330476, + "step": 5289 + }, + { + "epoch": 11.78173719376392, + "grad_norm": 22.51045799255371, + "learning_rate": 1e-06, + "loss": 0.6343, + "num_input_tokens_seen": 296386848, + "step": 5290 + }, + { + "epoch": 11.78173719376392, + "loss": 0.40053892135620117, + "loss_ce": 0.00014829869905952364, + "loss_iou": 0.1728515625, + "loss_num": 0.01092529296875, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 296386848, + "step": 5290 + }, + { + "epoch": 11.783964365256125, + "grad_norm": 17.85065269470215, + "learning_rate": 1e-06, + "loss": 0.5055, + "num_input_tokens_seen": 296442688, + "step": 5291 + }, + { + "epoch": 11.783964365256125, + "loss": 0.36511754989624023, + "loss_ce": 0.0001273062516702339, + "loss_iou": 0.1484375, + "loss_num": 0.013427734375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 296442688, + "step": 5291 + }, + { + "epoch": 11.78619153674833, + "grad_norm": 16.581172943115234, + "learning_rate": 1e-06, + "loss": 0.5759, + "num_input_tokens_seen": 296498192, + "step": 5292 + }, + { + "epoch": 11.78619153674833, + "loss": 0.3235490620136261, + "loss_ce": 0.0001237650285474956, + "loss_iou": 0.1416015625, + "loss_num": 0.008056640625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 296498192, + "step": 5292 + }, + { + "epoch": 11.788418708240535, + "grad_norm": 14.886438369750977, + "learning_rate": 1e-06, + "loss": 0.425, + "num_input_tokens_seen": 296555524, + "step": 5293 + }, + { + "epoch": 11.788418708240535, + "loss": 0.44675683975219727, + "loss_ce": 0.00016260198026429862, + "loss_iou": 0.166015625, + "loss_num": 0.0228271484375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 296555524, + "step": 5293 + }, + { + "epoch": 11.79064587973274, + "grad_norm": 15.52322006225586, + "learning_rate": 1e-06, + "loss": 0.4785, + "num_input_tokens_seen": 296613260, + "step": 5294 + }, + { + "epoch": 11.79064587973274, + "loss": 0.3428999185562134, + "loss_ce": 0.00012646152754314244, + "loss_iou": 0.162109375, + "loss_num": 0.0036163330078125, + "loss_xval": 0.34375, + "num_input_tokens_seen": 296613260, + "step": 5294 + }, + { + "epoch": 11.792873051224944, + "grad_norm": 27.934289932250977, + "learning_rate": 1e-06, + "loss": 0.502, + "num_input_tokens_seen": 296667928, + "step": 5295 + }, + { + "epoch": 11.792873051224944, + "loss": 0.5954668521881104, + "loss_ce": 0.00012995509314350784, + "loss_iou": 0.25390625, + "loss_num": 0.017578125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 296667928, + "step": 5295 + }, + { + "epoch": 11.79510022271715, + "grad_norm": 18.869325637817383, + "learning_rate": 1e-06, + "loss": 0.4571, + "num_input_tokens_seen": 296723936, + "step": 5296 + }, + { + "epoch": 11.79510022271715, + "loss": 0.5156769752502441, + "loss_ce": 0.0003877178824041039, + "loss_iou": 0.216796875, + "loss_num": 0.0162353515625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 296723936, + "step": 5296 + }, + { + "epoch": 11.797327394209354, + "grad_norm": 19.03141975402832, + "learning_rate": 1e-06, + "loss": 0.5304, + "num_input_tokens_seen": 296783132, + "step": 5297 + }, + { + "epoch": 11.797327394209354, + "loss": 0.3304428160190582, + "loss_ce": 0.00012054571561748162, + "loss_iou": 0.1474609375, + "loss_num": 0.0069580078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 296783132, + "step": 5297 + }, + { + "epoch": 11.799554565701559, + "grad_norm": 20.347238540649414, + "learning_rate": 1e-06, + "loss": 0.5689, + "num_input_tokens_seen": 296839528, + "step": 5298 + }, + { + "epoch": 11.799554565701559, + "loss": 0.5347905158996582, + "loss_ce": 0.00012258738570380956, + "loss_iou": 0.2421875, + "loss_num": 0.01031494140625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 296839528, + "step": 5298 + }, + { + "epoch": 11.801781737193764, + "grad_norm": 18.98991584777832, + "learning_rate": 1e-06, + "loss": 0.5361, + "num_input_tokens_seen": 296899164, + "step": 5299 + }, + { + "epoch": 11.801781737193764, + "loss": 0.6929968595504761, + "loss_ce": 0.00018683550297282636, + "loss_iou": 0.283203125, + "loss_num": 0.025390625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 296899164, + "step": 5299 + }, + { + "epoch": 11.804008908685969, + "grad_norm": 19.457019805908203, + "learning_rate": 1e-06, + "loss": 0.5461, + "num_input_tokens_seen": 296954940, + "step": 5300 + }, + { + "epoch": 11.804008908685969, + "loss": 0.5115100741386414, + "loss_ce": 0.00015755894128233194, + "loss_iou": 0.2265625, + "loss_num": 0.01141357421875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 296954940, + "step": 5300 + }, + { + "epoch": 11.806236080178174, + "grad_norm": 18.224740982055664, + "learning_rate": 1e-06, + "loss": 0.5393, + "num_input_tokens_seen": 297010916, + "step": 5301 + }, + { + "epoch": 11.806236080178174, + "loss": 0.6895302534103394, + "loss_ce": 0.00019916013116016984, + "loss_iou": 0.3046875, + "loss_num": 0.0155029296875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 297010916, + "step": 5301 + }, + { + "epoch": 11.808463251670378, + "grad_norm": 22.27739715576172, + "learning_rate": 1e-06, + "loss": 0.5199, + "num_input_tokens_seen": 297066748, + "step": 5302 + }, + { + "epoch": 11.808463251670378, + "loss": 0.4963473081588745, + "loss_ce": 0.00013145770935807377, + "loss_iou": 0.2216796875, + "loss_num": 0.010498046875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 297066748, + "step": 5302 + }, + { + "epoch": 11.810690423162583, + "grad_norm": 13.744184494018555, + "learning_rate": 1e-06, + "loss": 0.3914, + "num_input_tokens_seen": 297123360, + "step": 5303 + }, + { + "epoch": 11.810690423162583, + "loss": 0.32909929752349854, + "loss_ce": 0.00011983991134911776, + "loss_iou": 0.146484375, + "loss_num": 0.00732421875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 297123360, + "step": 5303 + }, + { + "epoch": 11.812917594654788, + "grad_norm": 17.198087692260742, + "learning_rate": 1e-06, + "loss": 0.6204, + "num_input_tokens_seen": 297179212, + "step": 5304 + }, + { + "epoch": 11.812917594654788, + "loss": 0.5738571882247925, + "loss_ce": 0.0001267509942408651, + "loss_iou": 0.22265625, + "loss_num": 0.0257568359375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 297179212, + "step": 5304 + }, + { + "epoch": 11.815144766146993, + "grad_norm": 21.798635482788086, + "learning_rate": 1e-06, + "loss": 0.4382, + "num_input_tokens_seen": 297237840, + "step": 5305 + }, + { + "epoch": 11.815144766146993, + "loss": 0.5149092674255371, + "loss_ce": 0.00013874081196263433, + "loss_iou": 0.2353515625, + "loss_num": 0.0087890625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 297237840, + "step": 5305 + }, + { + "epoch": 11.817371937639198, + "grad_norm": 15.751426696777344, + "learning_rate": 1e-06, + "loss": 0.5304, + "num_input_tokens_seen": 297295812, + "step": 5306 + }, + { + "epoch": 11.817371937639198, + "loss": 0.5815914869308472, + "loss_ce": 0.0001705837930785492, + "loss_iou": 0.240234375, + "loss_num": 0.020263671875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 297295812, + "step": 5306 + }, + { + "epoch": 11.819599109131403, + "grad_norm": 30.267009735107422, + "learning_rate": 1e-06, + "loss": 0.6813, + "num_input_tokens_seen": 297351380, + "step": 5307 + }, + { + "epoch": 11.819599109131403, + "loss": 0.7589607238769531, + "loss_ce": 0.000171684252563864, + "loss_iou": 0.3046875, + "loss_num": 0.0302734375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 297351380, + "step": 5307 + }, + { + "epoch": 11.821826280623608, + "grad_norm": 16.236085891723633, + "learning_rate": 1e-06, + "loss": 0.5397, + "num_input_tokens_seen": 297406340, + "step": 5308 + }, + { + "epoch": 11.821826280623608, + "loss": 0.5382388830184937, + "loss_ce": 0.00015292633906938136, + "loss_iou": 0.2412109375, + "loss_num": 0.01141357421875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 297406340, + "step": 5308 + }, + { + "epoch": 11.824053452115812, + "grad_norm": 15.723315238952637, + "learning_rate": 1e-06, + "loss": 0.5465, + "num_input_tokens_seen": 297464004, + "step": 5309 + }, + { + "epoch": 11.824053452115812, + "loss": 0.6881057024002075, + "loss_ce": 0.00011742223432520404, + "loss_iou": 0.2890625, + "loss_num": 0.021728515625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 297464004, + "step": 5309 + }, + { + "epoch": 11.826280623608017, + "grad_norm": 26.35066795349121, + "learning_rate": 1e-06, + "loss": 0.4958, + "num_input_tokens_seen": 297520084, + "step": 5310 + }, + { + "epoch": 11.826280623608017, + "loss": 0.4250958263874054, + "loss_ce": 0.00016907165991142392, + "loss_iou": 0.1748046875, + "loss_num": 0.01495361328125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 297520084, + "step": 5310 + }, + { + "epoch": 11.828507795100222, + "grad_norm": 17.730464935302734, + "learning_rate": 1e-06, + "loss": 0.6199, + "num_input_tokens_seen": 297575516, + "step": 5311 + }, + { + "epoch": 11.828507795100222, + "loss": 0.7365161180496216, + "loss_ce": 0.0008594049722887576, + "loss_iou": 0.291015625, + "loss_num": 0.0306396484375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 297575516, + "step": 5311 + }, + { + "epoch": 11.830734966592427, + "grad_norm": 23.806507110595703, + "learning_rate": 1e-06, + "loss": 0.5492, + "num_input_tokens_seen": 297631068, + "step": 5312 + }, + { + "epoch": 11.830734966592427, + "loss": 0.6309938430786133, + "loss_ce": 0.00013447852688841522, + "loss_iou": 0.275390625, + "loss_num": 0.016357421875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 297631068, + "step": 5312 + }, + { + "epoch": 11.832962138084632, + "grad_norm": 14.093077659606934, + "learning_rate": 1e-06, + "loss": 0.4271, + "num_input_tokens_seen": 297685616, + "step": 5313 + }, + { + "epoch": 11.832962138084632, + "loss": 0.23368564248085022, + "loss_ce": 0.00019564517424441874, + "loss_iou": 0.10205078125, + "loss_num": 0.00579833984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 297685616, + "step": 5313 + }, + { + "epoch": 11.835189309576837, + "grad_norm": 22.90943717956543, + "learning_rate": 1e-06, + "loss": 0.4905, + "num_input_tokens_seen": 297743540, + "step": 5314 + }, + { + "epoch": 11.835189309576837, + "loss": 0.39391031861305237, + "loss_ce": 0.00011150065256515518, + "loss_iou": 0.166015625, + "loss_num": 0.01239013671875, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 297743540, + "step": 5314 + }, + { + "epoch": 11.837416481069042, + "grad_norm": 16.595645904541016, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 297801180, + "step": 5315 + }, + { + "epoch": 11.837416481069042, + "loss": 0.6017987728118896, + "loss_ce": 0.00011419894872233272, + "loss_iou": 0.267578125, + "loss_num": 0.0135498046875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 297801180, + "step": 5315 + }, + { + "epoch": 11.839643652561247, + "grad_norm": 17.16973876953125, + "learning_rate": 1e-06, + "loss": 0.4081, + "num_input_tokens_seen": 297858728, + "step": 5316 + }, + { + "epoch": 11.839643652561247, + "loss": 0.3546229302883148, + "loss_ce": 0.00013074639718979597, + "loss_iou": 0.162109375, + "loss_num": 0.006072998046875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 297858728, + "step": 5316 + }, + { + "epoch": 11.841870824053451, + "grad_norm": 18.856740951538086, + "learning_rate": 1e-06, + "loss": 0.318, + "num_input_tokens_seen": 297915964, + "step": 5317 + }, + { + "epoch": 11.841870824053451, + "loss": 0.23262880742549896, + "loss_ce": 0.0001001132040983066, + "loss_iou": 0.10009765625, + "loss_num": 0.006439208984375, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 297915964, + "step": 5317 + }, + { + "epoch": 11.844097995545656, + "grad_norm": 16.652254104614258, + "learning_rate": 1e-06, + "loss": 0.5737, + "num_input_tokens_seen": 297971460, + "step": 5318 + }, + { + "epoch": 11.844097995545656, + "loss": 0.6619553565979004, + "loss_ce": 0.00033427210291847587, + "loss_iou": 0.27734375, + "loss_num": 0.021484375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 297971460, + "step": 5318 + }, + { + "epoch": 11.846325167037861, + "grad_norm": 27.506261825561523, + "learning_rate": 1e-06, + "loss": 0.4867, + "num_input_tokens_seen": 298027772, + "step": 5319 + }, + { + "epoch": 11.846325167037861, + "loss": 0.42390304803848267, + "loss_ce": 0.00013593377661891282, + "loss_iou": 0.1845703125, + "loss_num": 0.0111083984375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 298027772, + "step": 5319 + }, + { + "epoch": 11.848552338530066, + "grad_norm": 22.7269229888916, + "learning_rate": 1e-06, + "loss": 0.4718, + "num_input_tokens_seen": 298084460, + "step": 5320 + }, + { + "epoch": 11.848552338530066, + "loss": 0.469988614320755, + "loss_ce": 0.0007503442466259003, + "loss_iou": 0.2001953125, + "loss_num": 0.01397705078125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 298084460, + "step": 5320 + }, + { + "epoch": 11.85077951002227, + "grad_norm": 18.758642196655273, + "learning_rate": 1e-06, + "loss": 0.4439, + "num_input_tokens_seen": 298138912, + "step": 5321 + }, + { + "epoch": 11.85077951002227, + "loss": 0.5238094925880432, + "loss_ce": 0.00012785423314198852, + "loss_iou": 0.220703125, + "loss_num": 0.016357421875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 298138912, + "step": 5321 + }, + { + "epoch": 11.853006681514476, + "grad_norm": 20.615978240966797, + "learning_rate": 1e-06, + "loss": 0.5517, + "num_input_tokens_seen": 298193028, + "step": 5322 + }, + { + "epoch": 11.853006681514476, + "loss": 0.5698904991149902, + "loss_ce": 0.00018834380898624659, + "loss_iou": 0.2431640625, + "loss_num": 0.0166015625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 298193028, + "step": 5322 + }, + { + "epoch": 11.855233853006682, + "grad_norm": 16.021848678588867, + "learning_rate": 1e-06, + "loss": 0.342, + "num_input_tokens_seen": 298248720, + "step": 5323 + }, + { + "epoch": 11.855233853006682, + "loss": 0.24690712988376617, + "loss_ce": 0.00014198827557265759, + "loss_iou": 0.10693359375, + "loss_num": 0.00665283203125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 298248720, + "step": 5323 + }, + { + "epoch": 11.857461024498887, + "grad_norm": 23.891801834106445, + "learning_rate": 1e-06, + "loss": 0.561, + "num_input_tokens_seen": 298303808, + "step": 5324 + }, + { + "epoch": 11.857461024498887, + "loss": 0.6983004808425903, + "loss_ce": 0.000302450789604336, + "loss_iou": 0.255859375, + "loss_num": 0.037109375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 298303808, + "step": 5324 + }, + { + "epoch": 11.859688195991092, + "grad_norm": 18.63146209716797, + "learning_rate": 1e-06, + "loss": 0.414, + "num_input_tokens_seen": 298358804, + "step": 5325 + }, + { + "epoch": 11.859688195991092, + "loss": 0.35854852199554443, + "loss_ce": 0.00015006760077085346, + "loss_iou": 0.1572265625, + "loss_num": 0.00897216796875, + "loss_xval": 0.359375, + "num_input_tokens_seen": 298358804, + "step": 5325 + }, + { + "epoch": 11.861915367483297, + "grad_norm": 19.940717697143555, + "learning_rate": 1e-06, + "loss": 0.4274, + "num_input_tokens_seen": 298418072, + "step": 5326 + }, + { + "epoch": 11.861915367483297, + "loss": 0.2874617576599121, + "loss_ce": 0.00010823093907674775, + "loss_iou": 0.130859375, + "loss_num": 0.00494384765625, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 298418072, + "step": 5326 + }, + { + "epoch": 11.864142538975502, + "grad_norm": 23.029714584350586, + "learning_rate": 1e-06, + "loss": 0.4766, + "num_input_tokens_seen": 298476076, + "step": 5327 + }, + { + "epoch": 11.864142538975502, + "loss": 0.5553493499755859, + "loss_ce": 0.00017354739247821271, + "loss_iou": 0.2392578125, + "loss_num": 0.0155029296875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 298476076, + "step": 5327 + }, + { + "epoch": 11.866369710467707, + "grad_norm": 13.412109375, + "learning_rate": 1e-06, + "loss": 0.4057, + "num_input_tokens_seen": 298532080, + "step": 5328 + }, + { + "epoch": 11.866369710467707, + "loss": 0.3781711161136627, + "loss_ce": 0.00011935001384699717, + "loss_iou": 0.1494140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 298532080, + "step": 5328 + }, + { + "epoch": 11.868596881959911, + "grad_norm": 22.732973098754883, + "learning_rate": 1e-06, + "loss": 0.5291, + "num_input_tokens_seen": 298585788, + "step": 5329 + }, + { + "epoch": 11.868596881959911, + "loss": 0.5079842209815979, + "loss_ce": 0.00017171379295177758, + "loss_iou": 0.2314453125, + "loss_num": 0.00885009765625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 298585788, + "step": 5329 + }, + { + "epoch": 11.870824053452116, + "grad_norm": 22.004831314086914, + "learning_rate": 1e-06, + "loss": 0.555, + "num_input_tokens_seen": 298641296, + "step": 5330 + }, + { + "epoch": 11.870824053452116, + "loss": 0.7135499715805054, + "loss_ce": 0.00017109722830355167, + "loss_iou": 0.330078125, + "loss_num": 0.01043701171875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 298641296, + "step": 5330 + }, + { + "epoch": 11.873051224944321, + "grad_norm": 33.82167434692383, + "learning_rate": 1e-06, + "loss": 0.7149, + "num_input_tokens_seen": 298697308, + "step": 5331 + }, + { + "epoch": 11.873051224944321, + "loss": 0.9743344187736511, + "loss_ce": 0.0002132941735908389, + "loss_iou": 0.4296875, + "loss_num": 0.0225830078125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 298697308, + "step": 5331 + }, + { + "epoch": 11.875278396436526, + "grad_norm": 23.670555114746094, + "learning_rate": 1e-06, + "loss": 0.5041, + "num_input_tokens_seen": 298751336, + "step": 5332 + }, + { + "epoch": 11.875278396436526, + "loss": 0.5620195865631104, + "loss_ce": 0.00012994898133911192, + "loss_iou": 0.26171875, + "loss_num": 0.007568359375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 298751336, + "step": 5332 + }, + { + "epoch": 11.877505567928731, + "grad_norm": 19.82380485534668, + "learning_rate": 1e-06, + "loss": 0.6098, + "num_input_tokens_seen": 298808312, + "step": 5333 + }, + { + "epoch": 11.877505567928731, + "loss": 0.33348560333251953, + "loss_ce": 0.00011158882989548147, + "loss_iou": 0.1416015625, + "loss_num": 0.0101318359375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 298808312, + "step": 5333 + }, + { + "epoch": 11.879732739420936, + "grad_norm": 13.22695541381836, + "learning_rate": 1e-06, + "loss": 0.3795, + "num_input_tokens_seen": 298864232, + "step": 5334 + }, + { + "epoch": 11.879732739420936, + "loss": 0.37341421842575073, + "loss_ce": 0.00012321470421738923, + "loss_iou": 0.146484375, + "loss_num": 0.0162353515625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 298864232, + "step": 5334 + }, + { + "epoch": 11.88195991091314, + "grad_norm": 19.371503829956055, + "learning_rate": 1e-06, + "loss": 0.5704, + "num_input_tokens_seen": 298919880, + "step": 5335 + }, + { + "epoch": 11.88195991091314, + "loss": 0.5620713829994202, + "loss_ce": 0.00012070624507032335, + "loss_iou": 0.2392578125, + "loss_num": 0.0166015625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 298919880, + "step": 5335 + }, + { + "epoch": 11.884187082405345, + "grad_norm": 20.38959503173828, + "learning_rate": 1e-06, + "loss": 0.4472, + "num_input_tokens_seen": 298971852, + "step": 5336 + }, + { + "epoch": 11.884187082405345, + "loss": 0.3908699154853821, + "loss_ce": 0.0001228695473400876, + "loss_iou": 0.17578125, + "loss_num": 0.00762939453125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 298971852, + "step": 5336 + }, + { + "epoch": 11.88641425389755, + "grad_norm": 57.669029235839844, + "learning_rate": 1e-06, + "loss": 0.6538, + "num_input_tokens_seen": 299029644, + "step": 5337 + }, + { + "epoch": 11.88641425389755, + "loss": 0.6255764365196228, + "loss_ce": 0.0011867830762639642, + "loss_iou": 0.265625, + "loss_num": 0.018798828125, + "loss_xval": 0.625, + "num_input_tokens_seen": 299029644, + "step": 5337 + }, + { + "epoch": 11.888641425389755, + "grad_norm": 18.04958724975586, + "learning_rate": 1e-06, + "loss": 0.306, + "num_input_tokens_seen": 299085212, + "step": 5338 + }, + { + "epoch": 11.888641425389755, + "loss": 0.3291063904762268, + "loss_ce": 0.00012687721755355597, + "loss_iou": 0.1552734375, + "loss_num": 0.00384521484375, + "loss_xval": 0.328125, + "num_input_tokens_seen": 299085212, + "step": 5338 + }, + { + "epoch": 11.89086859688196, + "grad_norm": 26.414382934570312, + "learning_rate": 1e-06, + "loss": 0.6099, + "num_input_tokens_seen": 299142868, + "step": 5339 + }, + { + "epoch": 11.89086859688196, + "loss": 0.7533276081085205, + "loss_ce": 0.0001537687494419515, + "loss_iou": 0.30078125, + "loss_num": 0.030517578125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 299142868, + "step": 5339 + }, + { + "epoch": 11.893095768374165, + "grad_norm": 53.97526168823242, + "learning_rate": 1e-06, + "loss": 0.6077, + "num_input_tokens_seen": 299200428, + "step": 5340 + }, + { + "epoch": 11.893095768374165, + "loss": 0.7335488796234131, + "loss_ce": 0.0001504583196947351, + "loss_iou": 0.3046875, + "loss_num": 0.024658203125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 299200428, + "step": 5340 + }, + { + "epoch": 11.89532293986637, + "grad_norm": 20.818517684936523, + "learning_rate": 1e-06, + "loss": 0.4193, + "num_input_tokens_seen": 299255160, + "step": 5341 + }, + { + "epoch": 11.89532293986637, + "loss": 0.4669525623321533, + "loss_ce": 0.0001404241193085909, + "loss_iou": 0.20703125, + "loss_num": 0.0103759765625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 299255160, + "step": 5341 + }, + { + "epoch": 11.897550111358575, + "grad_norm": 24.498870849609375, + "learning_rate": 1e-06, + "loss": 0.5564, + "num_input_tokens_seen": 299309776, + "step": 5342 + }, + { + "epoch": 11.897550111358575, + "loss": 0.5995131731033325, + "loss_ce": 0.00014790653949603438, + "loss_iou": 0.26953125, + "loss_num": 0.01202392578125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 299309776, + "step": 5342 + }, + { + "epoch": 11.89977728285078, + "grad_norm": 23.035675048828125, + "learning_rate": 1e-06, + "loss": 0.5478, + "num_input_tokens_seen": 299366904, + "step": 5343 + }, + { + "epoch": 11.89977728285078, + "loss": 0.5480039715766907, + "loss_ce": 0.00015243007510434836, + "loss_iou": 0.228515625, + "loss_num": 0.018310546875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 299366904, + "step": 5343 + }, + { + "epoch": 11.902004454342984, + "grad_norm": 25.75360679626465, + "learning_rate": 1e-06, + "loss": 0.526, + "num_input_tokens_seen": 299421724, + "step": 5344 + }, + { + "epoch": 11.902004454342984, + "loss": 0.39622652530670166, + "loss_ce": 0.00010832876432687044, + "loss_iou": 0.17578125, + "loss_num": 0.00885009765625, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 299421724, + "step": 5344 + }, + { + "epoch": 11.90423162583519, + "grad_norm": 15.133755683898926, + "learning_rate": 1e-06, + "loss": 0.4379, + "num_input_tokens_seen": 299478644, + "step": 5345 + }, + { + "epoch": 11.90423162583519, + "loss": 0.515461802482605, + "loss_ce": 0.00020301563199609518, + "loss_iou": 0.224609375, + "loss_num": 0.0130615234375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 299478644, + "step": 5345 + }, + { + "epoch": 11.906458797327394, + "grad_norm": 14.36051082611084, + "learning_rate": 1e-06, + "loss": 0.4898, + "num_input_tokens_seen": 299534612, + "step": 5346 + }, + { + "epoch": 11.906458797327394, + "loss": 0.5628827810287476, + "loss_ce": 0.00013867080269847065, + "loss_iou": 0.255859375, + "loss_num": 0.01025390625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 299534612, + "step": 5346 + }, + { + "epoch": 11.908685968819599, + "grad_norm": 15.240983009338379, + "learning_rate": 1e-06, + "loss": 0.5047, + "num_input_tokens_seen": 299590416, + "step": 5347 + }, + { + "epoch": 11.908685968819599, + "loss": 0.558215856552124, + "loss_ce": 0.00011038097727578133, + "loss_iou": 0.2490234375, + "loss_num": 0.01202392578125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 299590416, + "step": 5347 + }, + { + "epoch": 11.910913140311804, + "grad_norm": 27.012542724609375, + "learning_rate": 1e-06, + "loss": 0.6381, + "num_input_tokens_seen": 299642864, + "step": 5348 + }, + { + "epoch": 11.910913140311804, + "loss": 0.6602959632873535, + "loss_ce": 0.00013973098248243332, + "loss_iou": 0.28515625, + "loss_num": 0.017822265625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 299642864, + "step": 5348 + }, + { + "epoch": 11.913140311804009, + "grad_norm": 23.465747833251953, + "learning_rate": 1e-06, + "loss": 0.6058, + "num_input_tokens_seen": 299696992, + "step": 5349 + }, + { + "epoch": 11.913140311804009, + "loss": 0.6304618716239929, + "loss_ce": 0.00033489661291241646, + "loss_iou": 0.279296875, + "loss_num": 0.01397705078125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 299696992, + "step": 5349 + }, + { + "epoch": 11.915367483296214, + "grad_norm": 18.98960304260254, + "learning_rate": 1e-06, + "loss": 0.5868, + "num_input_tokens_seen": 299753692, + "step": 5350 + }, + { + "epoch": 11.915367483296214, + "loss": 0.6957433223724365, + "loss_ce": 0.00018664819072000682, + "loss_iou": 0.29296875, + "loss_num": 0.021728515625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 299753692, + "step": 5350 + }, + { + "epoch": 11.917594654788418, + "grad_norm": 18.785682678222656, + "learning_rate": 1e-06, + "loss": 0.5032, + "num_input_tokens_seen": 299807860, + "step": 5351 + }, + { + "epoch": 11.917594654788418, + "loss": 0.48331549763679504, + "loss_ce": 0.00010016474698204547, + "loss_iou": 0.2060546875, + "loss_num": 0.01409912109375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 299807860, + "step": 5351 + }, + { + "epoch": 11.919821826280623, + "grad_norm": 17.613218307495117, + "learning_rate": 1e-06, + "loss": 0.6931, + "num_input_tokens_seen": 299864664, + "step": 5352 + }, + { + "epoch": 11.919821826280623, + "loss": 0.5709285736083984, + "loss_ce": 0.00012783391866832972, + "loss_iou": 0.2392578125, + "loss_num": 0.0184326171875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 299864664, + "step": 5352 + }, + { + "epoch": 11.922048997772828, + "grad_norm": 27.25409698486328, + "learning_rate": 1e-06, + "loss": 0.5266, + "num_input_tokens_seen": 299919748, + "step": 5353 + }, + { + "epoch": 11.922048997772828, + "loss": 0.41632139682769775, + "loss_ce": 0.00030577427241951227, + "loss_iou": 0.1806640625, + "loss_num": 0.0108642578125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 299919748, + "step": 5353 + }, + { + "epoch": 11.924276169265033, + "grad_norm": 23.497005462646484, + "learning_rate": 1e-06, + "loss": 0.4849, + "num_input_tokens_seen": 299977172, + "step": 5354 + }, + { + "epoch": 11.924276169265033, + "loss": 0.5234737396240234, + "loss_ce": 0.00015833397628739476, + "loss_iou": 0.2314453125, + "loss_num": 0.01214599609375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 299977172, + "step": 5354 + }, + { + "epoch": 11.926503340757238, + "grad_norm": 24.323163986206055, + "learning_rate": 1e-06, + "loss": 0.4479, + "num_input_tokens_seen": 300033976, + "step": 5355 + }, + { + "epoch": 11.926503340757238, + "loss": 0.4954897165298462, + "loss_ce": 0.00025045976508408785, + "loss_iou": 0.2216796875, + "loss_num": 0.0103759765625, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 300033976, + "step": 5355 + }, + { + "epoch": 11.928730512249443, + "grad_norm": 12.777032852172852, + "learning_rate": 1e-06, + "loss": 0.3908, + "num_input_tokens_seen": 300089880, + "step": 5356 + }, + { + "epoch": 11.928730512249443, + "loss": 0.38708722591400146, + "loss_ce": 0.00012434215750545263, + "loss_iou": 0.1708984375, + "loss_num": 0.00921630859375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 300089880, + "step": 5356 + }, + { + "epoch": 11.930957683741648, + "grad_norm": 18.82436180114746, + "learning_rate": 1e-06, + "loss": 0.3538, + "num_input_tokens_seen": 300144144, + "step": 5357 + }, + { + "epoch": 11.930957683741648, + "loss": 0.4027044475078583, + "loss_ce": 0.00011656976130325347, + "loss_iou": 0.1796875, + "loss_num": 0.00848388671875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 300144144, + "step": 5357 + }, + { + "epoch": 11.933184855233852, + "grad_norm": 15.833600044250488, + "learning_rate": 1e-06, + "loss": 0.3947, + "num_input_tokens_seen": 300199272, + "step": 5358 + }, + { + "epoch": 11.933184855233852, + "loss": 0.4046657085418701, + "loss_ce": 0.00012470208457671106, + "loss_iou": 0.1767578125, + "loss_num": 0.0103759765625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 300199272, + "step": 5358 + }, + { + "epoch": 11.935412026726057, + "grad_norm": 19.36171531677246, + "learning_rate": 1e-06, + "loss": 0.7569, + "num_input_tokens_seen": 300253388, + "step": 5359 + }, + { + "epoch": 11.935412026726057, + "loss": 0.6293305158615112, + "loss_ce": 0.00018012213695328683, + "loss_iou": 0.28125, + "loss_num": 0.01336669921875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 300253388, + "step": 5359 + }, + { + "epoch": 11.937639198218262, + "grad_norm": 26.180601119995117, + "learning_rate": 1e-06, + "loss": 0.5059, + "num_input_tokens_seen": 300307808, + "step": 5360 + }, + { + "epoch": 11.937639198218262, + "loss": 0.5130758881568909, + "loss_ce": 0.0001364394265692681, + "loss_iou": 0.2373046875, + "loss_num": 0.007659912109375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 300307808, + "step": 5360 + }, + { + "epoch": 11.939866369710467, + "grad_norm": 15.848928451538086, + "learning_rate": 1e-06, + "loss": 0.523, + "num_input_tokens_seen": 300363860, + "step": 5361 + }, + { + "epoch": 11.939866369710467, + "loss": 0.4611893892288208, + "loss_ce": 0.0001298237475566566, + "loss_iou": 0.201171875, + "loss_num": 0.01153564453125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 300363860, + "step": 5361 + }, + { + "epoch": 11.942093541202672, + "grad_norm": 17.287405014038086, + "learning_rate": 1e-06, + "loss": 0.5096, + "num_input_tokens_seen": 300421080, + "step": 5362 + }, + { + "epoch": 11.942093541202672, + "loss": 0.33068329095840454, + "loss_ce": 0.00011686367361107841, + "loss_iou": 0.1376953125, + "loss_num": 0.01092529296875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 300421080, + "step": 5362 + }, + { + "epoch": 11.944320712694877, + "grad_norm": 18.37860870361328, + "learning_rate": 1e-06, + "loss": 0.5593, + "num_input_tokens_seen": 300477264, + "step": 5363 + }, + { + "epoch": 11.944320712694877, + "loss": 0.5681136846542358, + "loss_ce": 0.00024256901815533638, + "loss_iou": 0.25, + "loss_num": 0.01318359375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 300477264, + "step": 5363 + }, + { + "epoch": 11.946547884187082, + "grad_norm": 23.356395721435547, + "learning_rate": 1e-06, + "loss": 0.4799, + "num_input_tokens_seen": 300535224, + "step": 5364 + }, + { + "epoch": 11.946547884187082, + "loss": 0.4557165503501892, + "loss_ce": 0.00015014578821137547, + "loss_iou": 0.19921875, + "loss_num": 0.01165771484375, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 300535224, + "step": 5364 + }, + { + "epoch": 11.948775055679288, + "grad_norm": 23.67485809326172, + "learning_rate": 1e-06, + "loss": 0.7032, + "num_input_tokens_seen": 300589224, + "step": 5365 + }, + { + "epoch": 11.948775055679288, + "loss": 0.7572053670883179, + "loss_ce": 0.00012526212958618999, + "loss_iou": 0.298828125, + "loss_num": 0.031494140625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 300589224, + "step": 5365 + }, + { + "epoch": 11.951002227171493, + "grad_norm": 18.73485565185547, + "learning_rate": 1e-06, + "loss": 0.7542, + "num_input_tokens_seen": 300643164, + "step": 5366 + }, + { + "epoch": 11.951002227171493, + "loss": 0.9482482075691223, + "loss_ce": 0.0002501863054931164, + "loss_iou": 0.357421875, + "loss_num": 0.04638671875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 300643164, + "step": 5366 + }, + { + "epoch": 11.953229398663698, + "grad_norm": 22.971054077148438, + "learning_rate": 1e-06, + "loss": 0.4646, + "num_input_tokens_seen": 300700556, + "step": 5367 + }, + { + "epoch": 11.953229398663698, + "loss": 0.35095998644828796, + "loss_ce": 0.0001299169525736943, + "loss_iou": 0.1630859375, + "loss_num": 0.004913330078125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 300700556, + "step": 5367 + }, + { + "epoch": 11.955456570155903, + "grad_norm": 20.057994842529297, + "learning_rate": 1e-06, + "loss": 0.4387, + "num_input_tokens_seen": 300755556, + "step": 5368 + }, + { + "epoch": 11.955456570155903, + "loss": 0.41150492429733276, + "loss_ce": 0.00012799599790014327, + "loss_iou": 0.18359375, + "loss_num": 0.00885009765625, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 300755556, + "step": 5368 + }, + { + "epoch": 11.957683741648108, + "grad_norm": 17.977746963500977, + "learning_rate": 1e-06, + "loss": 0.5373, + "num_input_tokens_seen": 300809448, + "step": 5369 + }, + { + "epoch": 11.957683741648108, + "loss": 0.5350706577301025, + "loss_ce": 0.00015853876539040357, + "loss_iou": 0.248046875, + "loss_num": 0.007659912109375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 300809448, + "step": 5369 + }, + { + "epoch": 11.959910913140313, + "grad_norm": 31.153818130493164, + "learning_rate": 1e-06, + "loss": 0.4172, + "num_input_tokens_seen": 300863900, + "step": 5370 + }, + { + "epoch": 11.959910913140313, + "loss": 0.4988964796066284, + "loss_ce": 0.00011718348105205223, + "loss_iou": 0.185546875, + "loss_num": 0.025390625, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 300863900, + "step": 5370 + }, + { + "epoch": 11.962138084632517, + "grad_norm": 20.06622314453125, + "learning_rate": 1e-06, + "loss": 0.385, + "num_input_tokens_seen": 300919612, + "step": 5371 + }, + { + "epoch": 11.962138084632517, + "loss": 0.37853488326072693, + "loss_ce": 0.00011691114195855334, + "loss_iou": 0.1728515625, + "loss_num": 0.006683349609375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 300919612, + "step": 5371 + }, + { + "epoch": 11.964365256124722, + "grad_norm": 22.610204696655273, + "learning_rate": 1e-06, + "loss": 0.4837, + "num_input_tokens_seen": 300975088, + "step": 5372 + }, + { + "epoch": 11.964365256124722, + "loss": 0.5133309364318848, + "loss_ce": 0.00014736468438059092, + "loss_iou": 0.2314453125, + "loss_num": 0.01025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 300975088, + "step": 5372 + }, + { + "epoch": 11.966592427616927, + "grad_norm": 23.854276657104492, + "learning_rate": 1e-06, + "loss": 0.5991, + "num_input_tokens_seen": 301032024, + "step": 5373 + }, + { + "epoch": 11.966592427616927, + "loss": 0.6122125387191772, + "loss_ce": 0.00015198803157545626, + "loss_iou": 0.25390625, + "loss_num": 0.0205078125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 301032024, + "step": 5373 + }, + { + "epoch": 11.968819599109132, + "grad_norm": 14.804436683654785, + "learning_rate": 1e-06, + "loss": 0.6603, + "num_input_tokens_seen": 301087604, + "step": 5374 + }, + { + "epoch": 11.968819599109132, + "loss": 0.8352023363113403, + "loss_ce": 0.0001192896525026299, + "loss_iou": 0.345703125, + "loss_num": 0.0289306640625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 301087604, + "step": 5374 + }, + { + "epoch": 11.971046770601337, + "grad_norm": 22.94840431213379, + "learning_rate": 1e-06, + "loss": 0.622, + "num_input_tokens_seen": 301142676, + "step": 5375 + }, + { + "epoch": 11.971046770601337, + "loss": 0.6568686962127686, + "loss_ce": 0.0001304006582358852, + "loss_iou": 0.279296875, + "loss_num": 0.019287109375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 301142676, + "step": 5375 + }, + { + "epoch": 11.973273942093542, + "grad_norm": 12.997676849365234, + "learning_rate": 1e-06, + "loss": 0.4442, + "num_input_tokens_seen": 301201488, + "step": 5376 + }, + { + "epoch": 11.973273942093542, + "loss": 0.3666253089904785, + "loss_ce": 0.00010921184730250388, + "loss_iou": 0.1650390625, + "loss_num": 0.00726318359375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 301201488, + "step": 5376 + }, + { + "epoch": 11.975501113585747, + "grad_norm": 28.56017303466797, + "learning_rate": 1e-06, + "loss": 0.5158, + "num_input_tokens_seen": 301259584, + "step": 5377 + }, + { + "epoch": 11.975501113585747, + "loss": 0.3714648485183716, + "loss_ce": 0.00012695527402684093, + "loss_iou": 0.16796875, + "loss_num": 0.006866455078125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 301259584, + "step": 5377 + }, + { + "epoch": 11.977728285077951, + "grad_norm": 18.66181755065918, + "learning_rate": 1e-06, + "loss": 0.5058, + "num_input_tokens_seen": 301316356, + "step": 5378 + }, + { + "epoch": 11.977728285077951, + "loss": 0.5127632021903992, + "loss_ce": 0.00018997653387486935, + "loss_iou": 0.2353515625, + "loss_num": 0.0084228515625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 301316356, + "step": 5378 + }, + { + "epoch": 11.979955456570156, + "grad_norm": 21.85940170288086, + "learning_rate": 1e-06, + "loss": 0.5486, + "num_input_tokens_seen": 301370076, + "step": 5379 + }, + { + "epoch": 11.979955456570156, + "loss": 0.4443596601486206, + "loss_ce": 0.00014578478294424713, + "loss_iou": 0.2021484375, + "loss_num": 0.00811767578125, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 301370076, + "step": 5379 + }, + { + "epoch": 11.982182628062361, + "grad_norm": 17.551218032836914, + "learning_rate": 1e-06, + "loss": 0.5728, + "num_input_tokens_seen": 301427120, + "step": 5380 + }, + { + "epoch": 11.982182628062361, + "loss": 0.7055622935295105, + "loss_ce": 0.00011793218436650932, + "loss_iou": 0.310546875, + "loss_num": 0.0167236328125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 301427120, + "step": 5380 + }, + { + "epoch": 11.984409799554566, + "grad_norm": 14.2471284866333, + "learning_rate": 1e-06, + "loss": 0.4307, + "num_input_tokens_seen": 301480952, + "step": 5381 + }, + { + "epoch": 11.984409799554566, + "loss": 0.4769345819950104, + "loss_ce": 0.0001279542047996074, + "loss_iou": 0.20703125, + "loss_num": 0.0126953125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 301480952, + "step": 5381 + }, + { + "epoch": 11.98663697104677, + "grad_norm": 23.872955322265625, + "learning_rate": 1e-06, + "loss": 0.6408, + "num_input_tokens_seen": 301536748, + "step": 5382 + }, + { + "epoch": 11.98663697104677, + "loss": 0.6590736508369446, + "loss_ce": 0.00013808724179398268, + "loss_iou": 0.29296875, + "loss_num": 0.014404296875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 301536748, + "step": 5382 + }, + { + "epoch": 11.988864142538976, + "grad_norm": 27.215002059936523, + "learning_rate": 1e-06, + "loss": 0.6737, + "num_input_tokens_seen": 301591624, + "step": 5383 + }, + { + "epoch": 11.988864142538976, + "loss": 0.7730184197425842, + "loss_ce": 0.0003133205755148083, + "loss_iou": 0.341796875, + "loss_num": 0.01806640625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 301591624, + "step": 5383 + }, + { + "epoch": 11.99109131403118, + "grad_norm": 16.59343910217285, + "learning_rate": 1e-06, + "loss": 0.4352, + "num_input_tokens_seen": 301651368, + "step": 5384 + }, + { + "epoch": 11.99109131403118, + "loss": 0.2988908886909485, + "loss_ce": 0.00024585792561993003, + "loss_iou": 0.1328125, + "loss_num": 0.00677490234375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 301651368, + "step": 5384 + }, + { + "epoch": 11.993318485523385, + "grad_norm": 17.6257266998291, + "learning_rate": 1e-06, + "loss": 0.5236, + "num_input_tokens_seen": 301708848, + "step": 5385 + }, + { + "epoch": 11.993318485523385, + "loss": 0.3951897919178009, + "loss_ce": 0.000170250961673446, + "loss_iou": 0.16796875, + "loss_num": 0.01171875, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 301708848, + "step": 5385 + }, + { + "epoch": 11.99554565701559, + "grad_norm": 18.835487365722656, + "learning_rate": 1e-06, + "loss": 0.4832, + "num_input_tokens_seen": 301766832, + "step": 5386 + }, + { + "epoch": 11.99554565701559, + "loss": 0.45329028367996216, + "loss_ce": 0.00016528656124137342, + "loss_iou": 0.2060546875, + "loss_num": 0.0081787109375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 301766832, + "step": 5386 + }, + { + "epoch": 11.997772828507795, + "grad_norm": 23.853605270385742, + "learning_rate": 1e-06, + "loss": 0.6195, + "num_input_tokens_seen": 301822292, + "step": 5387 + }, + { + "epoch": 11.997772828507795, + "loss": 0.6632393598556519, + "loss_ce": 0.00015345893916673958, + "loss_iou": 0.26953125, + "loss_num": 0.0242919921875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 301822292, + "step": 5387 + }, + { + "epoch": 12.0, + "grad_norm": 18.651371002197266, + "learning_rate": 1e-06, + "loss": 0.4679, + "num_input_tokens_seen": 301876956, + "step": 5388 + }, + { + "epoch": 12.0, + "loss": 0.5064892768859863, + "loss_ce": 0.00014158777776174247, + "loss_iou": 0.228515625, + "loss_num": 0.00994873046875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 301876956, + "step": 5388 + }, + { + "epoch": 12.002227171492205, + "grad_norm": 24.909053802490234, + "learning_rate": 1e-06, + "loss": 0.6428, + "num_input_tokens_seen": 301930580, + "step": 5389 + }, + { + "epoch": 12.002227171492205, + "loss": 0.7445248365402222, + "loss_ce": 0.00014005119737703353, + "loss_iou": 0.314453125, + "loss_num": 0.0228271484375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 301930580, + "step": 5389 + }, + { + "epoch": 12.00445434298441, + "grad_norm": 23.323347091674805, + "learning_rate": 1e-06, + "loss": 0.5544, + "num_input_tokens_seen": 301986100, + "step": 5390 + }, + { + "epoch": 12.00445434298441, + "loss": 0.45033586025238037, + "loss_ce": 0.0001405369839631021, + "loss_iou": 0.20703125, + "loss_num": 0.00701904296875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 301986100, + "step": 5390 + }, + { + "epoch": 12.006681514476615, + "grad_norm": 16.831626892089844, + "learning_rate": 1e-06, + "loss": 0.2852, + "num_input_tokens_seen": 302043816, + "step": 5391 + }, + { + "epoch": 12.006681514476615, + "loss": 0.2847111225128174, + "loss_ce": 0.00010420664330013096, + "loss_iou": 0.12158203125, + "loss_num": 0.00823974609375, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 302043816, + "step": 5391 + }, + { + "epoch": 12.00890868596882, + "grad_norm": 37.528282165527344, + "learning_rate": 1e-06, + "loss": 0.6277, + "num_input_tokens_seen": 302098732, + "step": 5392 + }, + { + "epoch": 12.00890868596882, + "loss": 0.6959569454193115, + "loss_ce": 0.00015614379663020372, + "loss_iou": 0.30859375, + "loss_num": 0.015869140625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 302098732, + "step": 5392 + }, + { + "epoch": 12.011135857461024, + "grad_norm": 24.19828987121582, + "learning_rate": 1e-06, + "loss": 0.4908, + "num_input_tokens_seen": 302157220, + "step": 5393 + }, + { + "epoch": 12.011135857461024, + "loss": 0.5651944279670715, + "loss_ce": 0.00013094657333567739, + "loss_iou": 0.259765625, + "loss_num": 0.00909423828125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 302157220, + "step": 5393 + }, + { + "epoch": 12.01336302895323, + "grad_norm": 14.71508502960205, + "learning_rate": 1e-06, + "loss": 0.496, + "num_input_tokens_seen": 302214052, + "step": 5394 + }, + { + "epoch": 12.01336302895323, + "loss": 0.5302682518959045, + "loss_ce": 0.00011686344078043476, + "loss_iou": 0.2412109375, + "loss_num": 0.00927734375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 302214052, + "step": 5394 + }, + { + "epoch": 12.015590200445434, + "grad_norm": 18.591922760009766, + "learning_rate": 1e-06, + "loss": 0.491, + "num_input_tokens_seen": 302269552, + "step": 5395 + }, + { + "epoch": 12.015590200445434, + "loss": 0.4772958755493164, + "loss_ce": 0.00024510070215910673, + "loss_iou": 0.2177734375, + "loss_num": 0.00823974609375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 302269552, + "step": 5395 + }, + { + "epoch": 12.017817371937639, + "grad_norm": 13.601188659667969, + "learning_rate": 1e-06, + "loss": 0.4128, + "num_input_tokens_seen": 302326968, + "step": 5396 + }, + { + "epoch": 12.017817371937639, + "loss": 0.4751969277858734, + "loss_ce": 9.925015910994262e-05, + "loss_iou": 0.197265625, + "loss_num": 0.016357421875, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 302326968, + "step": 5396 + }, + { + "epoch": 12.020044543429844, + "grad_norm": 22.953596115112305, + "learning_rate": 1e-06, + "loss": 0.449, + "num_input_tokens_seen": 302383496, + "step": 5397 + }, + { + "epoch": 12.020044543429844, + "loss": 0.5418699979782104, + "loss_ce": 0.0001219719197251834, + "loss_iou": 0.2431640625, + "loss_num": 0.0111083984375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 302383496, + "step": 5397 + }, + { + "epoch": 12.022271714922049, + "grad_norm": 28.485471725463867, + "learning_rate": 1e-06, + "loss": 0.4552, + "num_input_tokens_seen": 302438028, + "step": 5398 + }, + { + "epoch": 12.022271714922049, + "loss": 0.46738266944885254, + "loss_ce": 9.752875484991819e-05, + "loss_iou": 0.177734375, + "loss_num": 0.0224609375, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 302438028, + "step": 5398 + }, + { + "epoch": 12.024498886414253, + "grad_norm": 16.616348266601562, + "learning_rate": 1e-06, + "loss": 0.5826, + "num_input_tokens_seen": 302494768, + "step": 5399 + }, + { + "epoch": 12.024498886414253, + "loss": 0.5422226190567017, + "loss_ce": 0.0001083296156139113, + "loss_iou": 0.224609375, + "loss_num": 0.0184326171875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 302494768, + "step": 5399 + }, + { + "epoch": 12.026726057906458, + "grad_norm": 13.746217727661133, + "learning_rate": 1e-06, + "loss": 0.4764, + "num_input_tokens_seen": 302550772, + "step": 5400 + }, + { + "epoch": 12.026726057906458, + "loss": 0.4675309658050537, + "loss_ce": 0.0001237354299519211, + "loss_iou": 0.21484375, + "loss_num": 0.0074462890625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 302550772, + "step": 5400 + }, + { + "epoch": 12.028953229398663, + "grad_norm": 17.52446746826172, + "learning_rate": 1e-06, + "loss": 0.5146, + "num_input_tokens_seen": 302608476, + "step": 5401 + }, + { + "epoch": 12.028953229398663, + "loss": 0.5597883462905884, + "loss_ce": 0.0008283640490844846, + "loss_iou": 0.208984375, + "loss_num": 0.0281982421875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 302608476, + "step": 5401 + }, + { + "epoch": 12.031180400890868, + "grad_norm": 21.796451568603516, + "learning_rate": 1e-06, + "loss": 0.4571, + "num_input_tokens_seen": 302663956, + "step": 5402 + }, + { + "epoch": 12.031180400890868, + "loss": 0.5090689659118652, + "loss_ce": 0.00021888897754251957, + "loss_iou": 0.2099609375, + "loss_num": 0.017822265625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 302663956, + "step": 5402 + }, + { + "epoch": 12.033407572383073, + "grad_norm": 24.178424835205078, + "learning_rate": 1e-06, + "loss": 0.45, + "num_input_tokens_seen": 302719060, + "step": 5403 + }, + { + "epoch": 12.033407572383073, + "loss": 0.39098450541496277, + "loss_ce": 0.00011537145473994315, + "loss_iou": 0.18359375, + "loss_num": 0.004669189453125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 302719060, + "step": 5403 + }, + { + "epoch": 12.035634743875278, + "grad_norm": 17.01597785949707, + "learning_rate": 1e-06, + "loss": 0.4611, + "num_input_tokens_seen": 302773636, + "step": 5404 + }, + { + "epoch": 12.035634743875278, + "loss": 0.3315487504005432, + "loss_ce": 0.00012785526632796973, + "loss_iou": 0.1298828125, + "loss_num": 0.01422119140625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 302773636, + "step": 5404 + }, + { + "epoch": 12.037861915367483, + "grad_norm": 14.340649604797363, + "learning_rate": 1e-06, + "loss": 0.4127, + "num_input_tokens_seen": 302831388, + "step": 5405 + }, + { + "epoch": 12.037861915367483, + "loss": 0.3785756528377533, + "loss_ce": 0.00012716675701085478, + "loss_iou": 0.171875, + "loss_num": 0.0068359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 302831388, + "step": 5405 + }, + { + "epoch": 12.040089086859687, + "grad_norm": 25.321979522705078, + "learning_rate": 1e-06, + "loss": 0.5422, + "num_input_tokens_seen": 302888920, + "step": 5406 + }, + { + "epoch": 12.040089086859687, + "loss": 0.36657315492630005, + "loss_ce": 0.00011808329145424068, + "loss_iou": 0.1552734375, + "loss_num": 0.01123046875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 302888920, + "step": 5406 + }, + { + "epoch": 12.042316258351892, + "grad_norm": 18.01930809020996, + "learning_rate": 1e-06, + "loss": 0.4401, + "num_input_tokens_seen": 302942712, + "step": 5407 + }, + { + "epoch": 12.042316258351892, + "loss": 0.49123328924179077, + "loss_ce": 0.00014440924860537052, + "loss_iou": 0.21484375, + "loss_num": 0.01239013671875, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 302942712, + "step": 5407 + }, + { + "epoch": 12.044543429844097, + "grad_norm": 35.77482604980469, + "learning_rate": 1e-06, + "loss": 0.6199, + "num_input_tokens_seen": 302998400, + "step": 5408 + }, + { + "epoch": 12.044543429844097, + "loss": 0.6408650875091553, + "loss_ce": 0.00011807896953541785, + "loss_iou": 0.275390625, + "loss_num": 0.018310546875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 302998400, + "step": 5408 + }, + { + "epoch": 12.046770601336302, + "grad_norm": 40.30015563964844, + "learning_rate": 1e-06, + "loss": 0.4045, + "num_input_tokens_seen": 303052680, + "step": 5409 + }, + { + "epoch": 12.046770601336302, + "loss": 0.36034607887268066, + "loss_ce": 0.00011658002040348947, + "loss_iou": 0.1572265625, + "loss_num": 0.00927734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 303052680, + "step": 5409 + }, + { + "epoch": 12.048997772828507, + "grad_norm": 19.596263885498047, + "learning_rate": 1e-06, + "loss": 0.4598, + "num_input_tokens_seen": 303110848, + "step": 5410 + }, + { + "epoch": 12.048997772828507, + "loss": 0.3800050616264343, + "loss_ce": 0.00012227029947098345, + "loss_iou": 0.177734375, + "loss_num": 0.005035400390625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 303110848, + "step": 5410 + }, + { + "epoch": 12.051224944320714, + "grad_norm": 25.05034637451172, + "learning_rate": 1e-06, + "loss": 0.5697, + "num_input_tokens_seen": 303167852, + "step": 5411 + }, + { + "epoch": 12.051224944320714, + "loss": 0.6123285889625549, + "loss_ce": 0.00014595050015486777, + "loss_iou": 0.271484375, + "loss_num": 0.0137939453125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 303167852, + "step": 5411 + }, + { + "epoch": 12.053452115812918, + "grad_norm": 15.722366333007812, + "learning_rate": 1e-06, + "loss": 0.4301, + "num_input_tokens_seen": 303226468, + "step": 5412 + }, + { + "epoch": 12.053452115812918, + "loss": 0.44557318091392517, + "loss_ce": 0.00026066991267725825, + "loss_iou": 0.2001953125, + "loss_num": 0.0089111328125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 303226468, + "step": 5412 + }, + { + "epoch": 12.055679287305123, + "grad_norm": 30.098329544067383, + "learning_rate": 1e-06, + "loss": 0.5986, + "num_input_tokens_seen": 303282616, + "step": 5413 + }, + { + "epoch": 12.055679287305123, + "loss": 0.5686604976654053, + "loss_ce": 0.0003011383814737201, + "loss_iou": 0.2353515625, + "loss_num": 0.019287109375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 303282616, + "step": 5413 + }, + { + "epoch": 12.057906458797328, + "grad_norm": 11.368029594421387, + "learning_rate": 1e-06, + "loss": 0.618, + "num_input_tokens_seen": 303340068, + "step": 5414 + }, + { + "epoch": 12.057906458797328, + "loss": 0.6175664663314819, + "loss_ce": 0.00013479686458595097, + "loss_iou": 0.248046875, + "loss_num": 0.0242919921875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 303340068, + "step": 5414 + }, + { + "epoch": 12.060133630289533, + "grad_norm": 18.459564208984375, + "learning_rate": 1e-06, + "loss": 0.4627, + "num_input_tokens_seen": 303396632, + "step": 5415 + }, + { + "epoch": 12.060133630289533, + "loss": 0.5457392930984497, + "loss_ce": 0.00014600652502849698, + "loss_iou": 0.25, + "loss_num": 0.00885009765625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 303396632, + "step": 5415 + }, + { + "epoch": 12.062360801781738, + "grad_norm": 27.545385360717773, + "learning_rate": 1e-06, + "loss": 0.4222, + "num_input_tokens_seen": 303452996, + "step": 5416 + }, + { + "epoch": 12.062360801781738, + "loss": 0.3862226605415344, + "loss_ce": 0.00011425558477640152, + "loss_iou": 0.1796875, + "loss_num": 0.00531005859375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 303452996, + "step": 5416 + }, + { + "epoch": 12.064587973273943, + "grad_norm": 17.189687728881836, + "learning_rate": 1e-06, + "loss": 0.6149, + "num_input_tokens_seen": 303509476, + "step": 5417 + }, + { + "epoch": 12.064587973273943, + "loss": 0.4504542648792267, + "loss_ce": 0.0001368774683214724, + "loss_iou": 0.201171875, + "loss_num": 0.00970458984375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 303509476, + "step": 5417 + }, + { + "epoch": 12.066815144766148, + "grad_norm": 13.223028182983398, + "learning_rate": 1e-06, + "loss": 0.4632, + "num_input_tokens_seen": 303566016, + "step": 5418 + }, + { + "epoch": 12.066815144766148, + "loss": 0.5367510318756104, + "loss_ce": 0.00012994115240871906, + "loss_iou": 0.2041015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 303566016, + "step": 5418 + }, + { + "epoch": 12.069042316258352, + "grad_norm": 22.01005744934082, + "learning_rate": 1e-06, + "loss": 0.5708, + "num_input_tokens_seen": 303621640, + "step": 5419 + }, + { + "epoch": 12.069042316258352, + "loss": 0.33945903182029724, + "loss_ce": 0.00010356900747865438, + "loss_iou": 0.1474609375, + "loss_num": 0.00872802734375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 303621640, + "step": 5419 + }, + { + "epoch": 12.071269487750557, + "grad_norm": 16.25597381591797, + "learning_rate": 1e-06, + "loss": 0.4158, + "num_input_tokens_seen": 303676132, + "step": 5420 + }, + { + "epoch": 12.071269487750557, + "loss": 0.43920350074768066, + "loss_ce": 0.00011659059964586049, + "loss_iou": 0.1875, + "loss_num": 0.0126953125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 303676132, + "step": 5420 + }, + { + "epoch": 12.073496659242762, + "grad_norm": 26.435026168823242, + "learning_rate": 1e-06, + "loss": 0.4823, + "num_input_tokens_seen": 303731772, + "step": 5421 + }, + { + "epoch": 12.073496659242762, + "loss": 0.5533719658851624, + "loss_ce": 0.00014934616046957672, + "loss_iou": 0.2099609375, + "loss_num": 0.0264892578125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 303731772, + "step": 5421 + }, + { + "epoch": 12.075723830734967, + "grad_norm": 317.4898376464844, + "learning_rate": 1e-06, + "loss": 0.6007, + "num_input_tokens_seen": 303788636, + "step": 5422 + }, + { + "epoch": 12.075723830734967, + "loss": 0.8187873363494873, + "loss_ce": 0.00018380230176262558, + "loss_iou": 0.326171875, + "loss_num": 0.033203125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 303788636, + "step": 5422 + }, + { + "epoch": 12.077951002227172, + "grad_norm": 16.596609115600586, + "learning_rate": 1e-06, + "loss": 0.3791, + "num_input_tokens_seen": 303844472, + "step": 5423 + }, + { + "epoch": 12.077951002227172, + "loss": 0.29760003089904785, + "loss_ce": 0.00011467649164842442, + "loss_iou": 0.1259765625, + "loss_num": 0.00921630859375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 303844472, + "step": 5423 + }, + { + "epoch": 12.080178173719377, + "grad_norm": 19.923240661621094, + "learning_rate": 1e-06, + "loss": 0.5823, + "num_input_tokens_seen": 303899116, + "step": 5424 + }, + { + "epoch": 12.080178173719377, + "loss": 0.7797752618789673, + "loss_ce": 0.00011215673293918371, + "loss_iou": 0.318359375, + "loss_num": 0.028564453125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 303899116, + "step": 5424 + }, + { + "epoch": 12.082405345211582, + "grad_norm": 22.035127639770508, + "learning_rate": 1e-06, + "loss": 0.5462, + "num_input_tokens_seen": 303952664, + "step": 5425 + }, + { + "epoch": 12.082405345211582, + "loss": 0.6474908590316772, + "loss_ce": 0.00015202595386654139, + "loss_iou": 0.26953125, + "loss_num": 0.0216064453125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 303952664, + "step": 5425 + }, + { + "epoch": 12.084632516703786, + "grad_norm": 25.01907730102539, + "learning_rate": 1e-06, + "loss": 0.5354, + "num_input_tokens_seen": 304009648, + "step": 5426 + }, + { + "epoch": 12.084632516703786, + "loss": 0.660736083984375, + "loss_ce": 0.00021362912957556546, + "loss_iou": 0.263671875, + "loss_num": 0.0267333984375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 304009648, + "step": 5426 + }, + { + "epoch": 12.086859688195991, + "grad_norm": 14.428705215454102, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 304069088, + "step": 5427 + }, + { + "epoch": 12.086859688195991, + "loss": 0.3368920683860779, + "loss_ce": 0.0001000880729407072, + "loss_iou": 0.1572265625, + "loss_num": 0.0045166015625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 304069088, + "step": 5427 + }, + { + "epoch": 12.089086859688196, + "grad_norm": 23.767419815063477, + "learning_rate": 1e-06, + "loss": 0.5659, + "num_input_tokens_seen": 304125008, + "step": 5428 + }, + { + "epoch": 12.089086859688196, + "loss": 0.5689517259597778, + "loss_ce": 0.00010411132825538516, + "loss_iou": 0.2275390625, + "loss_num": 0.022705078125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 304125008, + "step": 5428 + }, + { + "epoch": 12.091314031180401, + "grad_norm": 25.242143630981445, + "learning_rate": 1e-06, + "loss": 0.3432, + "num_input_tokens_seen": 304182308, + "step": 5429 + }, + { + "epoch": 12.091314031180401, + "loss": 0.38244450092315674, + "loss_ce": 0.00012029155914206058, + "loss_iou": 0.1796875, + "loss_num": 0.00445556640625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 304182308, + "step": 5429 + }, + { + "epoch": 12.093541202672606, + "grad_norm": 13.21595287322998, + "learning_rate": 1e-06, + "loss": 0.5233, + "num_input_tokens_seen": 304239692, + "step": 5430 + }, + { + "epoch": 12.093541202672606, + "loss": 0.6246329545974731, + "loss_ce": 0.00012122253247071058, + "loss_iou": 0.263671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.625, + "num_input_tokens_seen": 304239692, + "step": 5430 + }, + { + "epoch": 12.09576837416481, + "grad_norm": 26.7417049407959, + "learning_rate": 1e-06, + "loss": 0.6778, + "num_input_tokens_seen": 304296608, + "step": 5431 + }, + { + "epoch": 12.09576837416481, + "loss": 0.5760781168937683, + "loss_ce": 0.0001503834209870547, + "loss_iou": 0.26171875, + "loss_num": 0.010498046875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 304296608, + "step": 5431 + }, + { + "epoch": 12.097995545657016, + "grad_norm": 14.140853881835938, + "learning_rate": 1e-06, + "loss": 0.5346, + "num_input_tokens_seen": 304351720, + "step": 5432 + }, + { + "epoch": 12.097995545657016, + "loss": 0.6522471308708191, + "loss_ce": 0.00014752510469406843, + "loss_iou": 0.265625, + "loss_num": 0.0244140625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 304351720, + "step": 5432 + }, + { + "epoch": 12.10022271714922, + "grad_norm": 17.195842742919922, + "learning_rate": 1e-06, + "loss": 0.4608, + "num_input_tokens_seen": 304411140, + "step": 5433 + }, + { + "epoch": 12.10022271714922, + "loss": 0.4692525267601013, + "loss_ce": 0.00013632513582706451, + "loss_iou": 0.212890625, + "loss_num": 0.00848388671875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 304411140, + "step": 5433 + }, + { + "epoch": 12.102449888641425, + "grad_norm": 17.013219833374023, + "learning_rate": 1e-06, + "loss": 0.5863, + "num_input_tokens_seen": 304466832, + "step": 5434 + }, + { + "epoch": 12.102449888641425, + "loss": 0.46361833810806274, + "loss_ce": 0.00011738392640836537, + "loss_iou": 0.205078125, + "loss_num": 0.01043701171875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 304466832, + "step": 5434 + }, + { + "epoch": 12.10467706013363, + "grad_norm": 19.217731475830078, + "learning_rate": 1e-06, + "loss": 0.5909, + "num_input_tokens_seen": 304524596, + "step": 5435 + }, + { + "epoch": 12.10467706013363, + "loss": 0.5402824878692627, + "loss_ce": 0.00012132612755522132, + "loss_iou": 0.240234375, + "loss_num": 0.01220703125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 304524596, + "step": 5435 + }, + { + "epoch": 12.106904231625835, + "grad_norm": 12.651470184326172, + "learning_rate": 1e-06, + "loss": 0.4578, + "num_input_tokens_seen": 304581824, + "step": 5436 + }, + { + "epoch": 12.106904231625835, + "loss": 0.35941869020462036, + "loss_ce": 0.00016576812777202576, + "loss_iou": 0.1435546875, + "loss_num": 0.0142822265625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 304581824, + "step": 5436 + }, + { + "epoch": 12.10913140311804, + "grad_norm": 15.489208221435547, + "learning_rate": 1e-06, + "loss": 0.3276, + "num_input_tokens_seen": 304637832, + "step": 5437 + }, + { + "epoch": 12.10913140311804, + "loss": 0.27845609188079834, + "loss_ce": 0.00013578942161984742, + "loss_iou": 0.1162109375, + "loss_num": 0.00909423828125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 304637832, + "step": 5437 + }, + { + "epoch": 12.111358574610245, + "grad_norm": 31.793598175048828, + "learning_rate": 1e-06, + "loss": 0.4174, + "num_input_tokens_seen": 304695616, + "step": 5438 + }, + { + "epoch": 12.111358574610245, + "loss": 0.40172773599624634, + "loss_ce": 0.00011640525190159678, + "loss_iou": 0.166015625, + "loss_num": 0.01397705078125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 304695616, + "step": 5438 + }, + { + "epoch": 12.11358574610245, + "grad_norm": 23.2586669921875, + "learning_rate": 1e-06, + "loss": 0.3909, + "num_input_tokens_seen": 304751796, + "step": 5439 + }, + { + "epoch": 12.11358574610245, + "loss": 0.3819471001625061, + "loss_ce": 0.00011117332905996591, + "loss_iou": 0.171875, + "loss_num": 0.00762939453125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 304751796, + "step": 5439 + }, + { + "epoch": 12.115812917594655, + "grad_norm": 15.429174423217773, + "learning_rate": 1e-06, + "loss": 0.3506, + "num_input_tokens_seen": 304806936, + "step": 5440 + }, + { + "epoch": 12.115812917594655, + "loss": 0.3502577543258667, + "loss_ce": 0.0001600981195224449, + "loss_iou": 0.16015625, + "loss_num": 0.00616455078125, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 304806936, + "step": 5440 + }, + { + "epoch": 12.11804008908686, + "grad_norm": 16.58045196533203, + "learning_rate": 1e-06, + "loss": 0.408, + "num_input_tokens_seen": 304864392, + "step": 5441 + }, + { + "epoch": 12.11804008908686, + "loss": 0.4236249625682831, + "loss_ce": 0.00016305723693221807, + "loss_iou": 0.173828125, + "loss_num": 0.01513671875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 304864392, + "step": 5441 + }, + { + "epoch": 12.120267260579064, + "grad_norm": 13.274721145629883, + "learning_rate": 1e-06, + "loss": 0.4532, + "num_input_tokens_seen": 304921888, + "step": 5442 + }, + { + "epoch": 12.120267260579064, + "loss": 0.4875772297382355, + "loss_ce": 0.00015048254863359034, + "loss_iou": 0.21484375, + "loss_num": 0.01171875, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 304921888, + "step": 5442 + }, + { + "epoch": 12.122494432071269, + "grad_norm": 17.956384658813477, + "learning_rate": 1e-06, + "loss": 0.6849, + "num_input_tokens_seen": 304977556, + "step": 5443 + }, + { + "epoch": 12.122494432071269, + "loss": 0.7030162811279297, + "loss_ce": 0.00013541628140956163, + "loss_iou": 0.263671875, + "loss_num": 0.03515625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 304977556, + "step": 5443 + }, + { + "epoch": 12.124721603563474, + "grad_norm": 16.645193099975586, + "learning_rate": 1e-06, + "loss": 0.6395, + "num_input_tokens_seen": 305033024, + "step": 5444 + }, + { + "epoch": 12.124721603563474, + "loss": 0.5505350232124329, + "loss_ce": 0.00011998764239251614, + "loss_iou": 0.244140625, + "loss_num": 0.01239013671875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 305033024, + "step": 5444 + }, + { + "epoch": 12.126948775055679, + "grad_norm": 13.85136604309082, + "learning_rate": 1e-06, + "loss": 0.4585, + "num_input_tokens_seen": 305089956, + "step": 5445 + }, + { + "epoch": 12.126948775055679, + "loss": 0.44617876410484314, + "loss_ce": 0.00013385072816163301, + "loss_iou": 0.1650390625, + "loss_num": 0.02294921875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 305089956, + "step": 5445 + }, + { + "epoch": 12.129175946547884, + "grad_norm": 23.356822967529297, + "learning_rate": 1e-06, + "loss": 0.4678, + "num_input_tokens_seen": 305143452, + "step": 5446 + }, + { + "epoch": 12.129175946547884, + "loss": 0.4208873510360718, + "loss_ce": 0.00011096645175712183, + "loss_iou": 0.1904296875, + "loss_num": 0.00811767578125, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 305143452, + "step": 5446 + }, + { + "epoch": 12.131403118040089, + "grad_norm": 24.38487434387207, + "learning_rate": 1e-06, + "loss": 0.488, + "num_input_tokens_seen": 305196260, + "step": 5447 + }, + { + "epoch": 12.131403118040089, + "loss": 0.4843854010105133, + "loss_ce": 0.00013247507740743458, + "loss_iou": 0.2119140625, + "loss_num": 0.01202392578125, + "loss_xval": 0.484375, + "num_input_tokens_seen": 305196260, + "step": 5447 + }, + { + "epoch": 12.133630289532293, + "grad_norm": 15.976398468017578, + "learning_rate": 1e-06, + "loss": 0.4716, + "num_input_tokens_seen": 305253716, + "step": 5448 + }, + { + "epoch": 12.133630289532293, + "loss": 0.5756908655166626, + "loss_ce": 0.00012929181684739888, + "loss_iou": 0.23046875, + "loss_num": 0.02294921875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 305253716, + "step": 5448 + }, + { + "epoch": 12.135857461024498, + "grad_norm": 17.87403678894043, + "learning_rate": 1e-06, + "loss": 0.5232, + "num_input_tokens_seen": 305307148, + "step": 5449 + }, + { + "epoch": 12.135857461024498, + "loss": 0.6177228689193726, + "loss_ce": 0.00016915984451770782, + "loss_iou": 0.26953125, + "loss_num": 0.0159912109375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 305307148, + "step": 5449 + }, + { + "epoch": 12.138084632516703, + "grad_norm": 18.055152893066406, + "learning_rate": 1e-06, + "loss": 0.5738, + "num_input_tokens_seen": 305364540, + "step": 5450 + }, + { + "epoch": 12.138084632516703, + "loss": 0.615594744682312, + "loss_ce": 0.00011621808516792953, + "loss_iou": 0.267578125, + "loss_num": 0.0162353515625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 305364540, + "step": 5450 + }, + { + "epoch": 12.140311804008908, + "grad_norm": 13.896754264831543, + "learning_rate": 1e-06, + "loss": 0.4093, + "num_input_tokens_seen": 305419432, + "step": 5451 + }, + { + "epoch": 12.140311804008908, + "loss": 0.4336770176887512, + "loss_ce": 0.0009377308306284249, + "loss_iou": 0.1826171875, + "loss_num": 0.013671875, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 305419432, + "step": 5451 + }, + { + "epoch": 12.142538975501113, + "grad_norm": 70.83055114746094, + "learning_rate": 1e-06, + "loss": 0.4653, + "num_input_tokens_seen": 305475840, + "step": 5452 + }, + { + "epoch": 12.142538975501113, + "loss": 0.3804883062839508, + "loss_ce": 0.00011720365728251636, + "loss_iou": 0.166015625, + "loss_num": 0.00970458984375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 305475840, + "step": 5452 + }, + { + "epoch": 12.144766146993318, + "grad_norm": 33.42408752441406, + "learning_rate": 1e-06, + "loss": 0.661, + "num_input_tokens_seen": 305532432, + "step": 5453 + }, + { + "epoch": 12.144766146993318, + "loss": 0.8063072562217712, + "loss_ce": 0.000154937180923298, + "loss_iou": 0.337890625, + "loss_num": 0.025634765625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 305532432, + "step": 5453 + }, + { + "epoch": 12.146993318485523, + "grad_norm": 23.046737670898438, + "learning_rate": 1e-06, + "loss": 0.3551, + "num_input_tokens_seen": 305589804, + "step": 5454 + }, + { + "epoch": 12.146993318485523, + "loss": 0.37097907066345215, + "loss_ce": 0.00012947215873282403, + "loss_iou": 0.166015625, + "loss_num": 0.0076904296875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 305589804, + "step": 5454 + }, + { + "epoch": 12.14922048997773, + "grad_norm": 17.94960594177246, + "learning_rate": 1e-06, + "loss": 0.533, + "num_input_tokens_seen": 305643368, + "step": 5455 + }, + { + "epoch": 12.14922048997773, + "loss": 0.5467525720596313, + "loss_ce": 0.00012171192065579817, + "loss_iou": 0.255859375, + "loss_num": 0.00701904296875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 305643368, + "step": 5455 + }, + { + "epoch": 12.151447661469934, + "grad_norm": 21.4966983795166, + "learning_rate": 1e-06, + "loss": 0.4453, + "num_input_tokens_seen": 305698236, + "step": 5456 + }, + { + "epoch": 12.151447661469934, + "loss": 0.5014622807502747, + "loss_ce": 0.00011951071064686403, + "loss_iou": 0.220703125, + "loss_num": 0.01214599609375, + "loss_xval": 0.5, + "num_input_tokens_seen": 305698236, + "step": 5456 + }, + { + "epoch": 12.153674832962139, + "grad_norm": 32.09659194946289, + "learning_rate": 1e-06, + "loss": 0.5381, + "num_input_tokens_seen": 305755728, + "step": 5457 + }, + { + "epoch": 12.153674832962139, + "loss": 0.6394698023796082, + "loss_ce": 0.0001875544257927686, + "loss_iou": 0.2890625, + "loss_num": 0.01177978515625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 305755728, + "step": 5457 + }, + { + "epoch": 12.155902004454344, + "grad_norm": 28.464859008789062, + "learning_rate": 1e-06, + "loss": 0.4574, + "num_input_tokens_seen": 305811120, + "step": 5458 + }, + { + "epoch": 12.155902004454344, + "loss": 0.34215444326400757, + "loss_ce": 0.00011341302888467908, + "loss_iou": 0.1474609375, + "loss_num": 0.00933837890625, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 305811120, + "step": 5458 + }, + { + "epoch": 12.158129175946549, + "grad_norm": 22.244796752929688, + "learning_rate": 1e-06, + "loss": 0.4736, + "num_input_tokens_seen": 305867104, + "step": 5459 + }, + { + "epoch": 12.158129175946549, + "loss": 0.4112476706504822, + "loss_ce": 0.00011484247806947678, + "loss_iou": 0.1884765625, + "loss_num": 0.006866455078125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 305867104, + "step": 5459 + }, + { + "epoch": 12.160356347438753, + "grad_norm": 29.14021873474121, + "learning_rate": 1e-06, + "loss": 0.5295, + "num_input_tokens_seen": 305921188, + "step": 5460 + }, + { + "epoch": 12.160356347438753, + "loss": 0.6369410753250122, + "loss_ce": 0.0004054922901559621, + "loss_iou": 0.283203125, + "loss_num": 0.0140380859375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 305921188, + "step": 5460 + }, + { + "epoch": 12.162583518930958, + "grad_norm": 16.301149368286133, + "learning_rate": 1e-06, + "loss": 0.4447, + "num_input_tokens_seen": 305978144, + "step": 5461 + }, + { + "epoch": 12.162583518930958, + "loss": 0.5294920802116394, + "loss_ce": 0.0001952020829776302, + "loss_iou": 0.2333984375, + "loss_num": 0.01263427734375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 305978144, + "step": 5461 + }, + { + "epoch": 12.164810690423163, + "grad_norm": 17.781740188598633, + "learning_rate": 1e-06, + "loss": 0.2532, + "num_input_tokens_seen": 306033640, + "step": 5462 + }, + { + "epoch": 12.164810690423163, + "loss": 0.2828177809715271, + "loss_ce": 0.0001639821712160483, + "loss_iou": 0.1259765625, + "loss_num": 0.00604248046875, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 306033640, + "step": 5462 + }, + { + "epoch": 12.167037861915368, + "grad_norm": 33.484806060791016, + "learning_rate": 1e-06, + "loss": 0.4501, + "num_input_tokens_seen": 306091020, + "step": 5463 + }, + { + "epoch": 12.167037861915368, + "loss": 0.46839091181755066, + "loss_ce": 0.00012922003224957734, + "loss_iou": 0.18359375, + "loss_num": 0.020263671875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 306091020, + "step": 5463 + }, + { + "epoch": 12.169265033407573, + "grad_norm": 26.4747257232666, + "learning_rate": 1e-06, + "loss": 0.6015, + "num_input_tokens_seen": 306147840, + "step": 5464 + }, + { + "epoch": 12.169265033407573, + "loss": 0.5650879740715027, + "loss_ce": 0.00014657803694717586, + "loss_iou": 0.25390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 306147840, + "step": 5464 + }, + { + "epoch": 12.171492204899778, + "grad_norm": 24.981590270996094, + "learning_rate": 1e-06, + "loss": 0.6129, + "num_input_tokens_seen": 306203280, + "step": 5465 + }, + { + "epoch": 12.171492204899778, + "loss": 0.8055815100669861, + "loss_ce": 0.00016156808123923838, + "loss_iou": 0.376953125, + "loss_num": 0.010009765625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 306203280, + "step": 5465 + }, + { + "epoch": 12.173719376391983, + "grad_norm": 14.754565238952637, + "learning_rate": 1e-06, + "loss": 0.5721, + "num_input_tokens_seen": 306257676, + "step": 5466 + }, + { + "epoch": 12.173719376391983, + "loss": 0.5629807710647583, + "loss_ce": 0.00011456996435299516, + "loss_iou": 0.25390625, + "loss_num": 0.0108642578125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 306257676, + "step": 5466 + }, + { + "epoch": 12.175946547884188, + "grad_norm": 25.851285934448242, + "learning_rate": 1e-06, + "loss": 0.5937, + "num_input_tokens_seen": 306314104, + "step": 5467 + }, + { + "epoch": 12.175946547884188, + "loss": 0.5030755996704102, + "loss_ce": 0.00020692951511591673, + "loss_iou": 0.2109375, + "loss_num": 0.0159912109375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 306314104, + "step": 5467 + }, + { + "epoch": 12.178173719376392, + "grad_norm": 19.489961624145508, + "learning_rate": 1e-06, + "loss": 0.385, + "num_input_tokens_seen": 306372532, + "step": 5468 + }, + { + "epoch": 12.178173719376392, + "loss": 0.4690125584602356, + "loss_ce": 0.00014049038873054087, + "loss_iou": 0.2001953125, + "loss_num": 0.0135498046875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 306372532, + "step": 5468 + }, + { + "epoch": 12.180400890868597, + "grad_norm": 14.486827850341797, + "learning_rate": 1e-06, + "loss": 0.416, + "num_input_tokens_seen": 306428160, + "step": 5469 + }, + { + "epoch": 12.180400890868597, + "loss": 0.4102684259414673, + "loss_ce": 0.00011218419240321964, + "loss_iou": 0.185546875, + "loss_num": 0.0078125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 306428160, + "step": 5469 + }, + { + "epoch": 12.182628062360802, + "grad_norm": 48.91545867919922, + "learning_rate": 1e-06, + "loss": 0.5029, + "num_input_tokens_seen": 306484716, + "step": 5470 + }, + { + "epoch": 12.182628062360802, + "loss": 0.4544667601585388, + "loss_ce": 0.00012103513290639967, + "loss_iou": 0.1982421875, + "loss_num": 0.01165771484375, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 306484716, + "step": 5470 + }, + { + "epoch": 12.184855233853007, + "grad_norm": 15.201376914978027, + "learning_rate": 1e-06, + "loss": 0.6931, + "num_input_tokens_seen": 306542220, + "step": 5471 + }, + { + "epoch": 12.184855233853007, + "loss": 0.5883694887161255, + "loss_ce": 0.00011262335465289652, + "loss_iou": 0.26953125, + "loss_num": 0.00946044921875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 306542220, + "step": 5471 + }, + { + "epoch": 12.187082405345212, + "grad_norm": 17.70683479309082, + "learning_rate": 1e-06, + "loss": 0.6325, + "num_input_tokens_seen": 306598764, + "step": 5472 + }, + { + "epoch": 12.187082405345212, + "loss": 0.5439872741699219, + "loss_ce": 0.00016404017515014857, + "loss_iou": 0.234375, + "loss_num": 0.01507568359375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 306598764, + "step": 5472 + }, + { + "epoch": 12.189309576837417, + "grad_norm": 15.60269546508789, + "learning_rate": 1e-06, + "loss": 0.4718, + "num_input_tokens_seen": 306655192, + "step": 5473 + }, + { + "epoch": 12.189309576837417, + "loss": 0.5688609480857849, + "loss_ce": 0.000135363225126639, + "loss_iou": 0.2373046875, + "loss_num": 0.0189208984375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 306655192, + "step": 5473 + }, + { + "epoch": 12.191536748329622, + "grad_norm": 23.64163589477539, + "learning_rate": 1e-06, + "loss": 0.5773, + "num_input_tokens_seen": 306711220, + "step": 5474 + }, + { + "epoch": 12.191536748329622, + "loss": 0.4205397963523865, + "loss_ce": 0.00012965156929567456, + "loss_iou": 0.1796875, + "loss_num": 0.0120849609375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 306711220, + "step": 5474 + }, + { + "epoch": 12.193763919821826, + "grad_norm": 19.232250213623047, + "learning_rate": 1e-06, + "loss": 0.6147, + "num_input_tokens_seen": 306765912, + "step": 5475 + }, + { + "epoch": 12.193763919821826, + "loss": 0.6638138294219971, + "loss_ce": 0.0001785356434993446, + "loss_iou": 0.3046875, + "loss_num": 0.01116943359375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 306765912, + "step": 5475 + }, + { + "epoch": 12.195991091314031, + "grad_norm": 24.970657348632812, + "learning_rate": 1e-06, + "loss": 0.4975, + "num_input_tokens_seen": 306823380, + "step": 5476 + }, + { + "epoch": 12.195991091314031, + "loss": 0.5586141347885132, + "loss_ce": 0.000142430275445804, + "loss_iou": 0.255859375, + "loss_num": 0.00927734375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 306823380, + "step": 5476 + }, + { + "epoch": 12.198218262806236, + "grad_norm": 23.488998413085938, + "learning_rate": 1e-06, + "loss": 0.5034, + "num_input_tokens_seen": 306880924, + "step": 5477 + }, + { + "epoch": 12.198218262806236, + "loss": 0.5716678500175476, + "loss_ce": 0.00013462700007949024, + "loss_iou": 0.24609375, + "loss_num": 0.015869140625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 306880924, + "step": 5477 + }, + { + "epoch": 12.200445434298441, + "grad_norm": 13.182589530944824, + "learning_rate": 1e-06, + "loss": 0.4163, + "num_input_tokens_seen": 306937384, + "step": 5478 + }, + { + "epoch": 12.200445434298441, + "loss": 0.4369211792945862, + "loss_ce": 0.00015359185636043549, + "loss_iou": 0.1982421875, + "loss_num": 0.0079345703125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 306937384, + "step": 5478 + }, + { + "epoch": 12.202672605790646, + "grad_norm": 24.044740676879883, + "learning_rate": 1e-06, + "loss": 0.365, + "num_input_tokens_seen": 306990924, + "step": 5479 + }, + { + "epoch": 12.202672605790646, + "loss": 0.3719392418861389, + "loss_ce": 0.00011306589294690639, + "loss_iou": 0.16796875, + "loss_num": 0.00701904296875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 306990924, + "step": 5479 + }, + { + "epoch": 12.20489977728285, + "grad_norm": 29.678030014038086, + "learning_rate": 1e-06, + "loss": 0.5995, + "num_input_tokens_seen": 307047468, + "step": 5480 + }, + { + "epoch": 12.20489977728285, + "loss": 0.4761919677257538, + "loss_ce": 0.00011774588347179815, + "loss_iou": 0.2060546875, + "loss_num": 0.01275634765625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 307047468, + "step": 5480 + }, + { + "epoch": 12.207126948775056, + "grad_norm": 11.851914405822754, + "learning_rate": 1e-06, + "loss": 0.4111, + "num_input_tokens_seen": 307103488, + "step": 5481 + }, + { + "epoch": 12.207126948775056, + "loss": 0.25440388917922974, + "loss_ce": 0.00022297943360172212, + "loss_iou": 0.11328125, + "loss_num": 0.00567626953125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 307103488, + "step": 5481 + }, + { + "epoch": 12.20935412026726, + "grad_norm": 17.849971771240234, + "learning_rate": 1e-06, + "loss": 0.3751, + "num_input_tokens_seen": 307159768, + "step": 5482 + }, + { + "epoch": 12.20935412026726, + "loss": 0.32383567094802856, + "loss_ce": 0.00010521031072130427, + "loss_iou": 0.13671875, + "loss_num": 0.00994873046875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 307159768, + "step": 5482 + }, + { + "epoch": 12.211581291759465, + "grad_norm": 24.031517028808594, + "learning_rate": 1e-06, + "loss": 0.3802, + "num_input_tokens_seen": 307212860, + "step": 5483 + }, + { + "epoch": 12.211581291759465, + "loss": 0.354860782623291, + "loss_ce": 0.00012447501649148762, + "loss_iou": 0.1630859375, + "loss_num": 0.0054931640625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 307212860, + "step": 5483 + }, + { + "epoch": 12.21380846325167, + "grad_norm": 25.485084533691406, + "learning_rate": 1e-06, + "loss": 0.4435, + "num_input_tokens_seen": 307265808, + "step": 5484 + }, + { + "epoch": 12.21380846325167, + "loss": 0.42531439661979675, + "loss_ce": 0.00013584828411694616, + "loss_iou": 0.1943359375, + "loss_num": 0.00714111328125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 307265808, + "step": 5484 + }, + { + "epoch": 12.216035634743875, + "grad_norm": 31.889545440673828, + "learning_rate": 1e-06, + "loss": 0.5601, + "num_input_tokens_seen": 307318728, + "step": 5485 + }, + { + "epoch": 12.216035634743875, + "loss": 0.5997210741043091, + "loss_ce": 0.00011171124060638249, + "loss_iou": 0.28125, + "loss_num": 0.0078125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 307318728, + "step": 5485 + }, + { + "epoch": 12.21826280623608, + "grad_norm": 13.831578254699707, + "learning_rate": 1e-06, + "loss": 0.4215, + "num_input_tokens_seen": 307374860, + "step": 5486 + }, + { + "epoch": 12.21826280623608, + "loss": 0.48658156394958496, + "loss_ce": 0.00013140994997229427, + "loss_iou": 0.2060546875, + "loss_num": 0.014892578125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 307374860, + "step": 5486 + }, + { + "epoch": 12.220489977728285, + "grad_norm": 19.31671714782715, + "learning_rate": 1e-06, + "loss": 0.4264, + "num_input_tokens_seen": 307432404, + "step": 5487 + }, + { + "epoch": 12.220489977728285, + "loss": 0.4271966814994812, + "loss_ce": 0.00013369151565711945, + "loss_iou": 0.1669921875, + "loss_num": 0.018798828125, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 307432404, + "step": 5487 + }, + { + "epoch": 12.22271714922049, + "grad_norm": 27.503997802734375, + "learning_rate": 1e-06, + "loss": 0.4349, + "num_input_tokens_seen": 307488920, + "step": 5488 + }, + { + "epoch": 12.22271714922049, + "loss": 0.4608370065689087, + "loss_ce": 0.00014365185052156448, + "loss_iou": 0.1923828125, + "loss_num": 0.01513671875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 307488920, + "step": 5488 + }, + { + "epoch": 12.224944320712694, + "grad_norm": 20.619155883789062, + "learning_rate": 1e-06, + "loss": 0.5518, + "num_input_tokens_seen": 307542936, + "step": 5489 + }, + { + "epoch": 12.224944320712694, + "loss": 0.4091936945915222, + "loss_ce": 0.00013606807624455541, + "loss_iou": 0.1884765625, + "loss_num": 0.006439208984375, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 307542936, + "step": 5489 + }, + { + "epoch": 12.2271714922049, + "grad_norm": 40.87490463256836, + "learning_rate": 1e-06, + "loss": 0.5842, + "num_input_tokens_seen": 307599944, + "step": 5490 + }, + { + "epoch": 12.2271714922049, + "loss": 0.5064796805381775, + "loss_ce": 0.0001320538140134886, + "loss_iou": 0.2216796875, + "loss_num": 0.01263427734375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 307599944, + "step": 5490 + }, + { + "epoch": 12.229398663697104, + "grad_norm": 19.404682159423828, + "learning_rate": 1e-06, + "loss": 0.3539, + "num_input_tokens_seen": 307656088, + "step": 5491 + }, + { + "epoch": 12.229398663697104, + "loss": 0.3526671230792999, + "loss_ce": 0.00012805430742446333, + "loss_iou": 0.158203125, + "loss_num": 0.007232666015625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 307656088, + "step": 5491 + }, + { + "epoch": 12.231625835189309, + "grad_norm": 15.24465560913086, + "learning_rate": 1e-06, + "loss": 0.6326, + "num_input_tokens_seen": 307711560, + "step": 5492 + }, + { + "epoch": 12.231625835189309, + "loss": 0.7728477120399475, + "loss_ce": 0.00014267500955611467, + "loss_iou": 0.306640625, + "loss_num": 0.03173828125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 307711560, + "step": 5492 + }, + { + "epoch": 12.233853006681514, + "grad_norm": 20.780717849731445, + "learning_rate": 1e-06, + "loss": 0.6387, + "num_input_tokens_seen": 307767096, + "step": 5493 + }, + { + "epoch": 12.233853006681514, + "loss": 0.6877992153167725, + "loss_ce": 0.00011609600915107876, + "loss_iou": 0.244140625, + "loss_num": 0.0400390625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 307767096, + "step": 5493 + }, + { + "epoch": 12.236080178173719, + "grad_norm": 38.66435241699219, + "learning_rate": 1e-06, + "loss": 0.4721, + "num_input_tokens_seen": 307824084, + "step": 5494 + }, + { + "epoch": 12.236080178173719, + "loss": 0.4882151782512665, + "loss_ce": 0.0001780918682925403, + "loss_iou": 0.2080078125, + "loss_num": 0.01422119140625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 307824084, + "step": 5494 + }, + { + "epoch": 12.238307349665924, + "grad_norm": 169.19766235351562, + "learning_rate": 1e-06, + "loss": 0.5511, + "num_input_tokens_seen": 307878876, + "step": 5495 + }, + { + "epoch": 12.238307349665924, + "loss": 0.6929959058761597, + "loss_ce": 0.00012478306598495692, + "loss_iou": 0.26171875, + "loss_num": 0.033935546875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 307878876, + "step": 5495 + }, + { + "epoch": 12.240534521158128, + "grad_norm": 18.359050750732422, + "learning_rate": 1e-06, + "loss": 0.4092, + "num_input_tokens_seen": 307936548, + "step": 5496 + }, + { + "epoch": 12.240534521158128, + "loss": 0.40076589584350586, + "loss_ce": 0.00013115604815538973, + "loss_iou": 0.185546875, + "loss_num": 0.005828857421875, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 307936548, + "step": 5496 + }, + { + "epoch": 12.242761692650333, + "grad_norm": 16.64657211303711, + "learning_rate": 1e-06, + "loss": 0.5673, + "num_input_tokens_seen": 307993096, + "step": 5497 + }, + { + "epoch": 12.242761692650333, + "loss": 0.6693442463874817, + "loss_ce": 0.0001547938009025529, + "loss_iou": 0.2890625, + "loss_num": 0.01806640625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 307993096, + "step": 5497 + }, + { + "epoch": 12.244988864142538, + "grad_norm": 23.77404022216797, + "learning_rate": 1e-06, + "loss": 0.6996, + "num_input_tokens_seen": 308046960, + "step": 5498 + }, + { + "epoch": 12.244988864142538, + "loss": 0.610221266746521, + "loss_ce": 0.00011382724915165454, + "loss_iou": 0.2578125, + "loss_num": 0.0191650390625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 308046960, + "step": 5498 + }, + { + "epoch": 12.247216035634743, + "grad_norm": 19.345346450805664, + "learning_rate": 1e-06, + "loss": 0.3564, + "num_input_tokens_seen": 308103812, + "step": 5499 + }, + { + "epoch": 12.247216035634743, + "loss": 0.3957526683807373, + "loss_ce": 0.00012281053932383657, + "loss_iou": 0.1806640625, + "loss_num": 0.006805419921875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 308103812, + "step": 5499 + }, + { + "epoch": 12.249443207126948, + "grad_norm": 18.410499572753906, + "learning_rate": 1e-06, + "loss": 0.5145, + "num_input_tokens_seen": 308158408, + "step": 5500 + }, + { + "epoch": 12.249443207126948, + "eval_seeclick_web_CIoU": 0.585688054561615, + "eval_seeclick_web_GIoU": 0.5826278626918793, + "eval_seeclick_web_IoU": 0.6041875779628754, + "eval_seeclick_web_MAE_all": 0.015673364512622356, + "eval_seeclick_web_MAE_h": 0.007765157613903284, + "eval_seeclick_web_MAE_w": 0.015952853485941887, + "eval_seeclick_web_MAE_x_boxes": 0.009551033610478044, + "eval_seeclick_web_MAE_y_boxes": 0.02141634118743241, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9183205962181091, + "eval_seeclick_web_loss_ce": 0.00018939840811071917, + "eval_seeclick_web_loss_iou": 0.4190673828125, + "eval_seeclick_web_loss_num": 0.0124969482421875, + "eval_seeclick_web_loss_xval": 0.900390625, + "eval_seeclick_web_runtime": 24.0722, + "eval_seeclick_web_samples_per_second": 2.077, + "eval_seeclick_web_steps_per_second": 0.083, + "num_input_tokens_seen": 308158408, + "step": 5500 + }, + { + "epoch": 12.249443207126948, + "eval_icons_CIoU": 0.2844693139195442, + "eval_icons_GIoU": 0.29983824491500854, + "eval_icons_IoU": 0.3569711297750473, + "eval_icons_MAE_all": 0.056874074041843414, + "eval_icons_MAE_h": 0.039589228108525276, + "eval_icons_MAE_w": 0.05369975045323372, + "eval_icons_MAE_x_boxes": 0.051283443346619606, + "eval_icons_MAE_y_boxes": 0.03893454186618328, + "eval_icons_inside_bbox": 0.6215277910232544, + "eval_icons_loss": 1.6849582195281982, + "eval_icons_loss_ce": 0.00027994449192192405, + "eval_icons_loss_iou": 0.662353515625, + "eval_icons_loss_num": 0.04971122741699219, + "eval_icons_loss_xval": 1.573486328125, + "eval_icons_runtime": 23.9366, + "eval_icons_samples_per_second": 2.089, + "eval_icons_steps_per_second": 0.084, + "num_input_tokens_seen": 308158408, + "step": 5500 + }, + { + "epoch": 12.249443207126948, + "eval_screenspot_CIoU": 0.35463671882947284, + "eval_screenspot_GIoU": 0.36913161476453143, + "eval_screenspot_IoU": 0.43245549003283185, + "eval_screenspot_MAE_all": 0.058157578110694885, + "eval_screenspot_MAE_h": 0.038887947176893554, + "eval_screenspot_MAE_w": 0.067777914305528, + "eval_screenspot_MAE_x_boxes": 0.07268660329282284, + "eval_screenspot_MAE_y_boxes": 0.037438808319469295, + "eval_screenspot_inside_bbox": 0.6966666579246521, + "eval_screenspot_loss": 1.6075245141983032, + "eval_screenspot_loss_ce": 0.0002708134804076205, + "eval_screenspot_loss_iou": 0.6668294270833334, + "eval_screenspot_loss_num": 0.06592305501302083, + "eval_screenspot_loss_xval": 1.66162109375, + "eval_screenspot_runtime": 38.3047, + "eval_screenspot_samples_per_second": 2.323, + "eval_screenspot_steps_per_second": 0.078, + "num_input_tokens_seen": 308158408, + "step": 5500 + }, + { + "epoch": 12.249443207126948, + "eval_compot_CIoU": 0.34248843789100647, + "eval_compot_GIoU": 0.3517104983329773, + "eval_compot_IoU": 0.40232492983341217, + "eval_compot_MAE_all": 0.017755805049091578, + "eval_compot_MAE_h": 0.009214944671839476, + "eval_compot_MAE_w": 0.02113647386431694, + "eval_compot_MAE_x_boxes": 0.029819749295711517, + "eval_compot_MAE_y_boxes": 0.0068535758182406425, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.4059686660766602, + "eval_compot_loss_ce": 0.00018623641517478973, + "eval_compot_loss_iou": 0.65087890625, + "eval_compot_loss_num": 0.016448974609375, + "eval_compot_loss_xval": 1.3837890625, + "eval_compot_runtime": 24.7377, + "eval_compot_samples_per_second": 2.021, + "eval_compot_steps_per_second": 0.081, + "num_input_tokens_seen": 308158408, + "step": 5500 + }, + { + "epoch": 12.249443207126948, + "eval_custom_ui_val_CIoU": 0.47646190888351864, + "eval_custom_ui_val_GIoU": 0.4871201482084062, + "eval_custom_ui_val_IoU": 0.5378114382425944, + "eval_custom_ui_val_MAE_all": 0.02943614311516285, + "eval_custom_ui_val_MAE_h": 0.016217040493049555, + "eval_custom_ui_val_MAE_w": 0.03668393205023474, + "eval_custom_ui_val_MAE_x_boxes": 0.03531148243281576, + "eval_custom_ui_val_MAE_y_boxes": 0.014928720322334103, + "eval_custom_ui_val_inside_bbox": 0.7719907429483202, + "eval_custom_ui_val_loss": 1.2021739482879639, + "eval_custom_ui_val_loss_ce": 0.00023076724998342493, + "eval_custom_ui_val_loss_iou": 0.5134412977430556, + "eval_custom_ui_val_loss_num": 0.027161492241753474, + "eval_custom_ui_val_loss_xval": 1.1628689236111112, + "eval_custom_ui_val_runtime": 76.3883, + "eval_custom_ui_val_samples_per_second": 3.469, + "eval_custom_ui_val_steps_per_second": 0.118, + "num_input_tokens_seen": 308158408, + "step": 5500 + }, + { + "epoch": 12.249443207126948, + "loss": 0.8959341049194336, + "loss_ce": 0.00018216308671981096, + "loss_iou": 0.390625, + "loss_num": 0.0228271484375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 308158408, + "step": 5500 + }, + { + "epoch": 12.251670378619155, + "grad_norm": 15.896366119384766, + "learning_rate": 1e-06, + "loss": 0.3427, + "num_input_tokens_seen": 308216040, + "step": 5501 + }, + { + "epoch": 12.251670378619155, + "loss": 0.28972911834716797, + "loss_ce": 0.0001172900665551424, + "loss_iou": 0.11962890625, + "loss_num": 0.010009765625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 308216040, + "step": 5501 + }, + { + "epoch": 12.25389755011136, + "grad_norm": 31.605810165405273, + "learning_rate": 1e-06, + "loss": 0.4781, + "num_input_tokens_seen": 308272560, + "step": 5502 + }, + { + "epoch": 12.25389755011136, + "loss": 0.5278470516204834, + "loss_ce": 0.00013708796177525073, + "loss_iou": 0.244140625, + "loss_num": 0.00787353515625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 308272560, + "step": 5502 + }, + { + "epoch": 12.256124721603564, + "grad_norm": 18.19232940673828, + "learning_rate": 1e-06, + "loss": 0.5417, + "num_input_tokens_seen": 308330492, + "step": 5503 + }, + { + "epoch": 12.256124721603564, + "loss": 0.5479559302330017, + "loss_ce": 0.00010438320168759674, + "loss_iou": 0.2421875, + "loss_num": 0.01263427734375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 308330492, + "step": 5503 + }, + { + "epoch": 12.25835189309577, + "grad_norm": 14.01863956451416, + "learning_rate": 1e-06, + "loss": 0.6479, + "num_input_tokens_seen": 308385352, + "step": 5504 + }, + { + "epoch": 12.25835189309577, + "loss": 0.4697185158729553, + "loss_ce": 0.00011405147233745083, + "loss_iou": 0.193359375, + "loss_num": 0.0166015625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 308385352, + "step": 5504 + }, + { + "epoch": 12.260579064587974, + "grad_norm": 21.31361198425293, + "learning_rate": 1e-06, + "loss": 0.7673, + "num_input_tokens_seen": 308443100, + "step": 5505 + }, + { + "epoch": 12.260579064587974, + "loss": 0.7160589694976807, + "loss_ce": 0.00011650074156932533, + "loss_iou": 0.29296875, + "loss_num": 0.025634765625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 308443100, + "step": 5505 + }, + { + "epoch": 12.262806236080179, + "grad_norm": 199.44406127929688, + "learning_rate": 1e-06, + "loss": 0.4544, + "num_input_tokens_seen": 308499652, + "step": 5506 + }, + { + "epoch": 12.262806236080179, + "loss": 0.556039035320282, + "loss_ce": 0.0001308466453338042, + "loss_iou": 0.251953125, + "loss_num": 0.01007080078125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 308499652, + "step": 5506 + }, + { + "epoch": 12.265033407572384, + "grad_norm": 28.171159744262695, + "learning_rate": 1e-06, + "loss": 0.4748, + "num_input_tokens_seen": 308551468, + "step": 5507 + }, + { + "epoch": 12.265033407572384, + "loss": 0.4873943626880646, + "loss_ce": 0.00021174979337956756, + "loss_iou": 0.212890625, + "loss_num": 0.01220703125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 308551468, + "step": 5507 + }, + { + "epoch": 12.267260579064589, + "grad_norm": 17.694232940673828, + "learning_rate": 1e-06, + "loss": 0.5427, + "num_input_tokens_seen": 308607228, + "step": 5508 + }, + { + "epoch": 12.267260579064589, + "loss": 0.3442959189414978, + "loss_ce": 0.00011866106069646776, + "loss_iou": 0.134765625, + "loss_num": 0.01513671875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 308607228, + "step": 5508 + }, + { + "epoch": 12.269487750556793, + "grad_norm": 17.69139289855957, + "learning_rate": 1e-06, + "loss": 0.5362, + "num_input_tokens_seen": 308664588, + "step": 5509 + }, + { + "epoch": 12.269487750556793, + "loss": 0.4937598705291748, + "loss_ce": 0.00010753308015409857, + "loss_iou": 0.21875, + "loss_num": 0.010986328125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 308664588, + "step": 5509 + }, + { + "epoch": 12.271714922048998, + "grad_norm": 18.234146118164062, + "learning_rate": 1e-06, + "loss": 0.6428, + "num_input_tokens_seen": 308716208, + "step": 5510 + }, + { + "epoch": 12.271714922048998, + "loss": 0.5791558027267456, + "loss_ce": 0.00014576420653611422, + "loss_iou": 0.26171875, + "loss_num": 0.0107421875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 308716208, + "step": 5510 + }, + { + "epoch": 12.273942093541203, + "grad_norm": 18.1691951751709, + "learning_rate": 1e-06, + "loss": 0.5535, + "num_input_tokens_seen": 308772680, + "step": 5511 + }, + { + "epoch": 12.273942093541203, + "loss": 0.5991677045822144, + "loss_ce": 0.00016867450904101133, + "loss_iou": 0.26171875, + "loss_num": 0.0147705078125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 308772680, + "step": 5511 + }, + { + "epoch": 12.276169265033408, + "grad_norm": 15.399046897888184, + "learning_rate": 1e-06, + "loss": 0.4188, + "num_input_tokens_seen": 308829192, + "step": 5512 + }, + { + "epoch": 12.276169265033408, + "loss": 0.5632211565971375, + "loss_ce": 0.0001413593563484028, + "loss_iou": 0.2001953125, + "loss_num": 0.032470703125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 308829192, + "step": 5512 + }, + { + "epoch": 12.278396436525613, + "grad_norm": 15.511956214904785, + "learning_rate": 1e-06, + "loss": 0.5697, + "num_input_tokens_seen": 308886748, + "step": 5513 + }, + { + "epoch": 12.278396436525613, + "loss": 0.5418818593025208, + "loss_ce": 0.00013380208110902458, + "loss_iou": 0.240234375, + "loss_num": 0.01239013671875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 308886748, + "step": 5513 + }, + { + "epoch": 12.280623608017818, + "grad_norm": 14.026480674743652, + "learning_rate": 1e-06, + "loss": 0.6363, + "num_input_tokens_seen": 308942736, + "step": 5514 + }, + { + "epoch": 12.280623608017818, + "loss": 0.7164598703384399, + "loss_ce": 0.0001512969029136002, + "loss_iou": 0.259765625, + "loss_num": 0.0390625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 308942736, + "step": 5514 + }, + { + "epoch": 12.282850779510023, + "grad_norm": 16.546672821044922, + "learning_rate": 1e-06, + "loss": 0.4416, + "num_input_tokens_seen": 308997492, + "step": 5515 + }, + { + "epoch": 12.282850779510023, + "loss": 0.3985058069229126, + "loss_ce": 0.00012933829566463828, + "loss_iou": 0.166015625, + "loss_num": 0.01318359375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 308997492, + "step": 5515 + }, + { + "epoch": 12.285077951002227, + "grad_norm": 16.246435165405273, + "learning_rate": 1e-06, + "loss": 0.517, + "num_input_tokens_seen": 309054160, + "step": 5516 + }, + { + "epoch": 12.285077951002227, + "loss": 0.4222395122051239, + "loss_ce": 0.00012038354179821908, + "loss_iou": 0.1953125, + "loss_num": 0.006256103515625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 309054160, + "step": 5516 + }, + { + "epoch": 12.287305122494432, + "grad_norm": 16.089317321777344, + "learning_rate": 1e-06, + "loss": 0.4671, + "num_input_tokens_seen": 309109496, + "step": 5517 + }, + { + "epoch": 12.287305122494432, + "loss": 0.48379087448120117, + "loss_ce": 0.00014830243890173733, + "loss_iou": 0.2109375, + "loss_num": 0.01214599609375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 309109496, + "step": 5517 + }, + { + "epoch": 12.289532293986637, + "grad_norm": 14.831037521362305, + "learning_rate": 1e-06, + "loss": 0.4507, + "num_input_tokens_seen": 309166044, + "step": 5518 + }, + { + "epoch": 12.289532293986637, + "loss": 0.6069037914276123, + "loss_ce": 0.00015331347822211683, + "loss_iou": 0.275390625, + "loss_num": 0.0115966796875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 309166044, + "step": 5518 + }, + { + "epoch": 12.291759465478842, + "grad_norm": 16.727554321289062, + "learning_rate": 1e-06, + "loss": 0.5212, + "num_input_tokens_seen": 309224472, + "step": 5519 + }, + { + "epoch": 12.291759465478842, + "loss": 0.37400585412979126, + "loss_ce": 0.00010447671229485422, + "loss_iou": 0.1611328125, + "loss_num": 0.0103759765625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 309224472, + "step": 5519 + }, + { + "epoch": 12.293986636971047, + "grad_norm": 16.9213809967041, + "learning_rate": 1e-06, + "loss": 0.8055, + "num_input_tokens_seen": 309281284, + "step": 5520 + }, + { + "epoch": 12.293986636971047, + "loss": 0.9495736360549927, + "loss_ce": 0.00011074876238126308, + "loss_iou": 0.373046875, + "loss_num": 0.040771484375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 309281284, + "step": 5520 + }, + { + "epoch": 12.296213808463252, + "grad_norm": 24.03369140625, + "learning_rate": 1e-06, + "loss": 0.5665, + "num_input_tokens_seen": 309336800, + "step": 5521 + }, + { + "epoch": 12.296213808463252, + "loss": 0.5887036323547363, + "loss_ce": 0.0003247222339268774, + "loss_iou": 0.232421875, + "loss_num": 0.0250244140625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 309336800, + "step": 5521 + }, + { + "epoch": 12.298440979955457, + "grad_norm": 17.732921600341797, + "learning_rate": 1e-06, + "loss": 0.6271, + "num_input_tokens_seen": 309390848, + "step": 5522 + }, + { + "epoch": 12.298440979955457, + "loss": 0.858605146408081, + "loss_ce": 0.000206666489248164, + "loss_iou": 0.365234375, + "loss_num": 0.025634765625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 309390848, + "step": 5522 + }, + { + "epoch": 12.300668151447661, + "grad_norm": 14.111288070678711, + "learning_rate": 1e-06, + "loss": 0.5704, + "num_input_tokens_seen": 309447824, + "step": 5523 + }, + { + "epoch": 12.300668151447661, + "loss": 0.3774692416191101, + "loss_ce": 0.00014990594354458153, + "loss_iou": 0.154296875, + "loss_num": 0.0135498046875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 309447824, + "step": 5523 + }, + { + "epoch": 12.302895322939866, + "grad_norm": 19.964794158935547, + "learning_rate": 1e-06, + "loss": 0.4378, + "num_input_tokens_seen": 309504964, + "step": 5524 + }, + { + "epoch": 12.302895322939866, + "loss": 0.3729360103607178, + "loss_ce": 0.00013328931527212262, + "loss_iou": 0.1669921875, + "loss_num": 0.0076904296875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 309504964, + "step": 5524 + }, + { + "epoch": 12.305122494432071, + "grad_norm": 14.157723426818848, + "learning_rate": 1e-06, + "loss": 0.4644, + "num_input_tokens_seen": 309560456, + "step": 5525 + }, + { + "epoch": 12.305122494432071, + "loss": 0.5831543207168579, + "loss_ce": 0.00014654998085461557, + "loss_iou": 0.2177734375, + "loss_num": 0.0296630859375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 309560456, + "step": 5525 + }, + { + "epoch": 12.307349665924276, + "grad_norm": 22.95296287536621, + "learning_rate": 1e-06, + "loss": 0.5372, + "num_input_tokens_seen": 309619680, + "step": 5526 + }, + { + "epoch": 12.307349665924276, + "loss": 0.5235176086425781, + "loss_ce": 0.00020221451995894313, + "loss_iou": 0.2041015625, + "loss_num": 0.0230712890625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 309619680, + "step": 5526 + }, + { + "epoch": 12.309576837416481, + "grad_norm": 14.134220123291016, + "learning_rate": 1e-06, + "loss": 0.4565, + "num_input_tokens_seen": 309676328, + "step": 5527 + }, + { + "epoch": 12.309576837416481, + "loss": 0.35591933131217957, + "loss_ce": 0.00020644423784688115, + "loss_iou": 0.1611328125, + "loss_num": 0.00677490234375, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 309676328, + "step": 5527 + }, + { + "epoch": 12.311804008908686, + "grad_norm": 26.886777877807617, + "learning_rate": 1e-06, + "loss": 0.5358, + "num_input_tokens_seen": 309727028, + "step": 5528 + }, + { + "epoch": 12.311804008908686, + "loss": 0.5397249460220337, + "loss_ce": 0.0001741335727274418, + "loss_iou": 0.236328125, + "loss_num": 0.0133056640625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 309727028, + "step": 5528 + }, + { + "epoch": 12.31403118040089, + "grad_norm": 17.852651596069336, + "learning_rate": 1e-06, + "loss": 0.5016, + "num_input_tokens_seen": 309783832, + "step": 5529 + }, + { + "epoch": 12.31403118040089, + "loss": 0.4710628390312195, + "loss_ce": 0.00011555143282748759, + "loss_iou": 0.2060546875, + "loss_num": 0.01165771484375, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 309783832, + "step": 5529 + }, + { + "epoch": 12.316258351893095, + "grad_norm": 14.421836853027344, + "learning_rate": 1e-06, + "loss": 0.4286, + "num_input_tokens_seen": 309839264, + "step": 5530 + }, + { + "epoch": 12.316258351893095, + "loss": 0.29735976457595825, + "loss_ce": 0.00011855886259581894, + "loss_iou": 0.1298828125, + "loss_num": 0.00738525390625, + "loss_xval": 0.296875, + "num_input_tokens_seen": 309839264, + "step": 5530 + }, + { + "epoch": 12.3184855233853, + "grad_norm": 41.36547088623047, + "learning_rate": 1e-06, + "loss": 0.5509, + "num_input_tokens_seen": 309893232, + "step": 5531 + }, + { + "epoch": 12.3184855233853, + "loss": 0.648200511932373, + "loss_ce": 0.00012922041059937328, + "loss_iou": 0.296875, + "loss_num": 0.0107421875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 309893232, + "step": 5531 + }, + { + "epoch": 12.320712694877505, + "grad_norm": 22.48734474182129, + "learning_rate": 1e-06, + "loss": 0.505, + "num_input_tokens_seen": 309948492, + "step": 5532 + }, + { + "epoch": 12.320712694877505, + "loss": 0.4604489803314209, + "loss_ce": 0.00012181737110950053, + "loss_iou": 0.2060546875, + "loss_num": 0.009521484375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 309948492, + "step": 5532 + }, + { + "epoch": 12.32293986636971, + "grad_norm": 17.8016414642334, + "learning_rate": 1e-06, + "loss": 0.3587, + "num_input_tokens_seen": 310005616, + "step": 5533 + }, + { + "epoch": 12.32293986636971, + "loss": 0.39793771505355835, + "loss_ce": 0.0001105824630940333, + "loss_iou": 0.1806640625, + "loss_num": 0.00726318359375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 310005616, + "step": 5533 + }, + { + "epoch": 12.325167037861915, + "grad_norm": 17.646106719970703, + "learning_rate": 1e-06, + "loss": 0.581, + "num_input_tokens_seen": 310061764, + "step": 5534 + }, + { + "epoch": 12.325167037861915, + "loss": 0.666999101638794, + "loss_ce": 0.00012893178791273385, + "loss_iou": 0.2734375, + "loss_num": 0.024169921875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 310061764, + "step": 5534 + }, + { + "epoch": 12.32739420935412, + "grad_norm": 19.61566925048828, + "learning_rate": 1e-06, + "loss": 0.5461, + "num_input_tokens_seen": 310116556, + "step": 5535 + }, + { + "epoch": 12.32739420935412, + "loss": 0.31176358461380005, + "loss_ce": 0.00011805207759607583, + "loss_iou": 0.1416015625, + "loss_num": 0.005584716796875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 310116556, + "step": 5535 + }, + { + "epoch": 12.329621380846325, + "grad_norm": 14.809009552001953, + "learning_rate": 1e-06, + "loss": 0.4208, + "num_input_tokens_seen": 310173568, + "step": 5536 + }, + { + "epoch": 12.329621380846325, + "loss": 0.40904849767684937, + "loss_ce": 0.00011295877629891038, + "loss_iou": 0.1845703125, + "loss_num": 0.00799560546875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 310173568, + "step": 5536 + }, + { + "epoch": 12.33184855233853, + "grad_norm": 26.909616470336914, + "learning_rate": 1e-06, + "loss": 0.4644, + "num_input_tokens_seen": 310228872, + "step": 5537 + }, + { + "epoch": 12.33184855233853, + "loss": 0.580930233001709, + "loss_ce": 0.00011961960990447551, + "loss_iou": 0.240234375, + "loss_num": 0.0198974609375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 310228872, + "step": 5537 + }, + { + "epoch": 12.334075723830734, + "grad_norm": 18.16213035583496, + "learning_rate": 1e-06, + "loss": 0.6525, + "num_input_tokens_seen": 310284508, + "step": 5538 + }, + { + "epoch": 12.334075723830734, + "loss": 0.5219680070877075, + "loss_ce": 0.00011739489855244756, + "loss_iou": 0.2353515625, + "loss_num": 0.01019287109375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 310284508, + "step": 5538 + }, + { + "epoch": 12.33630289532294, + "grad_norm": 32.21243667602539, + "learning_rate": 1e-06, + "loss": 0.4215, + "num_input_tokens_seen": 310340144, + "step": 5539 + }, + { + "epoch": 12.33630289532294, + "loss": 0.31780463457107544, + "loss_ce": 0.00011664302292047068, + "loss_iou": 0.1318359375, + "loss_num": 0.01092529296875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 310340144, + "step": 5539 + }, + { + "epoch": 12.338530066815144, + "grad_norm": 23.456138610839844, + "learning_rate": 1e-06, + "loss": 0.3759, + "num_input_tokens_seen": 310398476, + "step": 5540 + }, + { + "epoch": 12.338530066815144, + "loss": 0.36424580216407776, + "loss_ce": 0.00011004651605617255, + "loss_iou": 0.158203125, + "loss_num": 0.00946044921875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 310398476, + "step": 5540 + }, + { + "epoch": 12.340757238307349, + "grad_norm": 18.373727798461914, + "learning_rate": 1e-06, + "loss": 0.4471, + "num_input_tokens_seen": 310456252, + "step": 5541 + }, + { + "epoch": 12.340757238307349, + "loss": 0.4610535502433777, + "loss_ce": 0.0001160675601568073, + "loss_iou": 0.2060546875, + "loss_num": 0.009765625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 310456252, + "step": 5541 + }, + { + "epoch": 12.342984409799554, + "grad_norm": 15.688360214233398, + "learning_rate": 1e-06, + "loss": 0.4345, + "num_input_tokens_seen": 310509648, + "step": 5542 + }, + { + "epoch": 12.342984409799554, + "loss": 0.33905208110809326, + "loss_ce": 0.00012385296577122062, + "loss_iou": 0.1484375, + "loss_num": 0.0081787109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 310509648, + "step": 5542 + }, + { + "epoch": 12.345211581291759, + "grad_norm": 24.152406692504883, + "learning_rate": 1e-06, + "loss": 0.4911, + "num_input_tokens_seen": 310565268, + "step": 5543 + }, + { + "epoch": 12.345211581291759, + "loss": 0.45101678371429443, + "loss_ce": 0.00015007876208983362, + "loss_iou": 0.1923828125, + "loss_num": 0.01312255859375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 310565268, + "step": 5543 + }, + { + "epoch": 12.347438752783964, + "grad_norm": 18.96198081970215, + "learning_rate": 1e-06, + "loss": 0.3821, + "num_input_tokens_seen": 310618196, + "step": 5544 + }, + { + "epoch": 12.347438752783964, + "loss": 0.4492288827896118, + "loss_ce": 0.0001322347525274381, + "loss_iou": 0.2060546875, + "loss_num": 0.007476806640625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 310618196, + "step": 5544 + }, + { + "epoch": 12.34966592427617, + "grad_norm": 21.896156311035156, + "learning_rate": 1e-06, + "loss": 0.468, + "num_input_tokens_seen": 310673820, + "step": 5545 + }, + { + "epoch": 12.34966592427617, + "loss": 0.3710940182209015, + "loss_ce": 0.00012234911264386028, + "loss_iou": 0.1640625, + "loss_num": 0.00848388671875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 310673820, + "step": 5545 + }, + { + "epoch": 12.351893095768375, + "grad_norm": 33.179386138916016, + "learning_rate": 1e-06, + "loss": 0.5911, + "num_input_tokens_seen": 310725344, + "step": 5546 + }, + { + "epoch": 12.351893095768375, + "loss": 0.85118567943573, + "loss_ce": 0.0008438542135991156, + "loss_iou": 0.375, + "loss_num": 0.02001953125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 310725344, + "step": 5546 + }, + { + "epoch": 12.35412026726058, + "grad_norm": 24.786273956298828, + "learning_rate": 1e-06, + "loss": 0.5604, + "num_input_tokens_seen": 310780808, + "step": 5547 + }, + { + "epoch": 12.35412026726058, + "loss": 0.37451988458633423, + "loss_ce": 0.00025231053587049246, + "loss_iou": 0.16796875, + "loss_num": 0.007720947265625, + "loss_xval": 0.375, + "num_input_tokens_seen": 310780808, + "step": 5547 + }, + { + "epoch": 12.356347438752785, + "grad_norm": 22.51511573791504, + "learning_rate": 1e-06, + "loss": 0.5251, + "num_input_tokens_seen": 310839704, + "step": 5548 + }, + { + "epoch": 12.356347438752785, + "loss": 0.6323890089988708, + "loss_ce": 0.00012582968338392675, + "loss_iou": 0.25390625, + "loss_num": 0.024658203125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 310839704, + "step": 5548 + }, + { + "epoch": 12.35857461024499, + "grad_norm": 19.136016845703125, + "learning_rate": 1e-06, + "loss": 0.6059, + "num_input_tokens_seen": 310896396, + "step": 5549 + }, + { + "epoch": 12.35857461024499, + "loss": 0.5548136234283447, + "loss_ce": 0.00012610982230398804, + "loss_iou": 0.2412109375, + "loss_num": 0.0142822265625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 310896396, + "step": 5549 + }, + { + "epoch": 12.360801781737194, + "grad_norm": 18.60460090637207, + "learning_rate": 1e-06, + "loss": 0.4583, + "num_input_tokens_seen": 310954004, + "step": 5550 + }, + { + "epoch": 12.360801781737194, + "loss": 0.4283403754234314, + "loss_ce": 0.00011774353333748877, + "loss_iou": 0.1796875, + "loss_num": 0.0137939453125, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 310954004, + "step": 5550 + }, + { + "epoch": 12.3630289532294, + "grad_norm": 21.40949249267578, + "learning_rate": 1e-06, + "loss": 0.6223, + "num_input_tokens_seen": 311010500, + "step": 5551 + }, + { + "epoch": 12.3630289532294, + "loss": 0.8038879632949829, + "loss_ce": 0.00017696505528874695, + "loss_iou": 0.314453125, + "loss_num": 0.034912109375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 311010500, + "step": 5551 + }, + { + "epoch": 12.365256124721604, + "grad_norm": 18.762351989746094, + "learning_rate": 1e-06, + "loss": 0.3948, + "num_input_tokens_seen": 311070112, + "step": 5552 + }, + { + "epoch": 12.365256124721604, + "loss": 0.27796292304992676, + "loss_ce": 0.00013091039727441967, + "loss_iou": 0.11865234375, + "loss_num": 0.008056640625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 311070112, + "step": 5552 + }, + { + "epoch": 12.367483296213809, + "grad_norm": 22.09250259399414, + "learning_rate": 1e-06, + "loss": 0.7311, + "num_input_tokens_seen": 311125564, + "step": 5553 + }, + { + "epoch": 12.367483296213809, + "loss": 0.7942242622375488, + "loss_ce": 0.00015692379383835942, + "loss_iou": 0.337890625, + "loss_num": 0.023193359375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 311125564, + "step": 5553 + }, + { + "epoch": 12.369710467706014, + "grad_norm": 20.51223373413086, + "learning_rate": 1e-06, + "loss": 0.5099, + "num_input_tokens_seen": 311182804, + "step": 5554 + }, + { + "epoch": 12.369710467706014, + "loss": 0.6075503826141357, + "loss_ce": 0.00012847562902607024, + "loss_iou": 0.265625, + "loss_num": 0.0150146484375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 311182804, + "step": 5554 + }, + { + "epoch": 12.371937639198219, + "grad_norm": 27.149372100830078, + "learning_rate": 1e-06, + "loss": 0.6807, + "num_input_tokens_seen": 311239764, + "step": 5555 + }, + { + "epoch": 12.371937639198219, + "loss": 0.666401743888855, + "loss_ce": 0.00014195009134709835, + "loss_iou": 0.28125, + "loss_num": 0.020751953125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 311239764, + "step": 5555 + }, + { + "epoch": 12.374164810690424, + "grad_norm": 19.294330596923828, + "learning_rate": 1e-06, + "loss": 0.4761, + "num_input_tokens_seen": 311295712, + "step": 5556 + }, + { + "epoch": 12.374164810690424, + "loss": 0.3280073404312134, + "loss_ce": 0.00012647973198909312, + "loss_iou": 0.150390625, + "loss_num": 0.005340576171875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 311295712, + "step": 5556 + }, + { + "epoch": 12.376391982182628, + "grad_norm": 30.496156692504883, + "learning_rate": 1e-06, + "loss": 0.5462, + "num_input_tokens_seen": 311354216, + "step": 5557 + }, + { + "epoch": 12.376391982182628, + "loss": 0.4644961357116699, + "loss_ce": 0.00014063574781175703, + "loss_iou": 0.2060546875, + "loss_num": 0.0106201171875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 311354216, + "step": 5557 + }, + { + "epoch": 12.378619153674833, + "grad_norm": 18.934402465820312, + "learning_rate": 1e-06, + "loss": 0.3934, + "num_input_tokens_seen": 311409028, + "step": 5558 + }, + { + "epoch": 12.378619153674833, + "loss": 0.39881080389022827, + "loss_ce": 0.0001291544467676431, + "loss_iou": 0.166015625, + "loss_num": 0.01324462890625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 311409028, + "step": 5558 + }, + { + "epoch": 12.380846325167038, + "grad_norm": 25.294477462768555, + "learning_rate": 1e-06, + "loss": 0.5882, + "num_input_tokens_seen": 311465116, + "step": 5559 + }, + { + "epoch": 12.380846325167038, + "loss": 0.6579493880271912, + "loss_ce": 0.00011249056842643768, + "loss_iou": 0.27734375, + "loss_num": 0.0208740234375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 311465116, + "step": 5559 + }, + { + "epoch": 12.383073496659243, + "grad_norm": 21.457990646362305, + "learning_rate": 1e-06, + "loss": 0.4843, + "num_input_tokens_seen": 311520028, + "step": 5560 + }, + { + "epoch": 12.383073496659243, + "loss": 0.4148017168045044, + "loss_ce": 0.00012887550110463053, + "loss_iou": 0.1904296875, + "loss_num": 0.00665283203125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 311520028, + "step": 5560 + }, + { + "epoch": 12.385300668151448, + "grad_norm": 22.746858596801758, + "learning_rate": 1e-06, + "loss": 0.4334, + "num_input_tokens_seen": 311572928, + "step": 5561 + }, + { + "epoch": 12.385300668151448, + "loss": 0.2498151659965515, + "loss_ce": 0.00018137965525966138, + "loss_iou": 0.09619140625, + "loss_num": 0.01153564453125, + "loss_xval": 0.25, + "num_input_tokens_seen": 311572928, + "step": 5561 + }, + { + "epoch": 12.387527839643653, + "grad_norm": 28.644351959228516, + "learning_rate": 1e-06, + "loss": 0.586, + "num_input_tokens_seen": 311630476, + "step": 5562 + }, + { + "epoch": 12.387527839643653, + "loss": 0.6954451203346252, + "loss_ce": 0.0001326243655057624, + "loss_iou": 0.310546875, + "loss_num": 0.01531982421875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 311630476, + "step": 5562 + }, + { + "epoch": 12.389755011135858, + "grad_norm": 38.925472259521484, + "learning_rate": 1e-06, + "loss": 0.4525, + "num_input_tokens_seen": 311687972, + "step": 5563 + }, + { + "epoch": 12.389755011135858, + "loss": 0.43446385860443115, + "loss_ce": 0.00013769854558631778, + "loss_iou": 0.201171875, + "loss_num": 0.00634765625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 311687972, + "step": 5563 + }, + { + "epoch": 12.391982182628063, + "grad_norm": 19.39438247680664, + "learning_rate": 1e-06, + "loss": 0.5437, + "num_input_tokens_seen": 311744768, + "step": 5564 + }, + { + "epoch": 12.391982182628063, + "loss": 0.5501693487167358, + "loss_ce": 0.00012056773994117975, + "loss_iou": 0.251953125, + "loss_num": 0.0096435546875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 311744768, + "step": 5564 + }, + { + "epoch": 12.394209354120267, + "grad_norm": 13.309589385986328, + "learning_rate": 1e-06, + "loss": 0.4274, + "num_input_tokens_seen": 311799736, + "step": 5565 + }, + { + "epoch": 12.394209354120267, + "loss": 0.5023265480995178, + "loss_ce": 0.0001292880333494395, + "loss_iou": 0.212890625, + "loss_num": 0.01513671875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 311799736, + "step": 5565 + }, + { + "epoch": 12.396436525612472, + "grad_norm": 15.245659828186035, + "learning_rate": 1e-06, + "loss": 0.4462, + "num_input_tokens_seen": 311856860, + "step": 5566 + }, + { + "epoch": 12.396436525612472, + "loss": 0.5222017765045166, + "loss_ce": 0.00010704126907512546, + "loss_iou": 0.2294921875, + "loss_num": 0.01263427734375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 311856860, + "step": 5566 + }, + { + "epoch": 12.398663697104677, + "grad_norm": 12.112483024597168, + "learning_rate": 1e-06, + "loss": 0.4344, + "num_input_tokens_seen": 311909940, + "step": 5567 + }, + { + "epoch": 12.398663697104677, + "loss": 0.6277226805686951, + "loss_ce": 0.00015922555758152157, + "loss_iou": 0.287109375, + "loss_num": 0.0111083984375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 311909940, + "step": 5567 + }, + { + "epoch": 12.400890868596882, + "grad_norm": 35.967140197753906, + "learning_rate": 1e-06, + "loss": 0.5718, + "num_input_tokens_seen": 311964724, + "step": 5568 + }, + { + "epoch": 12.400890868596882, + "loss": 0.539790153503418, + "loss_ce": 0.0001172933480120264, + "loss_iou": 0.2294921875, + "loss_num": 0.0159912109375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 311964724, + "step": 5568 + }, + { + "epoch": 12.403118040089087, + "grad_norm": 12.642704963684082, + "learning_rate": 1e-06, + "loss": 0.4183, + "num_input_tokens_seen": 312021624, + "step": 5569 + }, + { + "epoch": 12.403118040089087, + "loss": 0.2902145981788635, + "loss_ce": 0.00011449479643488303, + "loss_iou": 0.125, + "loss_num": 0.00787353515625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 312021624, + "step": 5569 + }, + { + "epoch": 12.405345211581292, + "grad_norm": 24.059127807617188, + "learning_rate": 1e-06, + "loss": 0.4588, + "num_input_tokens_seen": 312075924, + "step": 5570 + }, + { + "epoch": 12.405345211581292, + "loss": 0.31721970438957214, + "loss_ce": 0.00011153465311508626, + "loss_iou": 0.1318359375, + "loss_num": 0.0106201171875, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 312075924, + "step": 5570 + }, + { + "epoch": 12.407572383073497, + "grad_norm": 16.74148178100586, + "learning_rate": 1e-06, + "loss": 0.386, + "num_input_tokens_seen": 312132284, + "step": 5571 + }, + { + "epoch": 12.407572383073497, + "loss": 0.43999338150024414, + "loss_ce": 0.0001740275911288336, + "loss_iou": 0.1884765625, + "loss_num": 0.0128173828125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 312132284, + "step": 5571 + }, + { + "epoch": 12.409799554565701, + "grad_norm": 33.77167510986328, + "learning_rate": 1e-06, + "loss": 0.5518, + "num_input_tokens_seen": 312190496, + "step": 5572 + }, + { + "epoch": 12.409799554565701, + "loss": 0.3965058922767639, + "loss_ce": 0.0001435701851733029, + "loss_iou": 0.1826171875, + "loss_num": 0.006378173828125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 312190496, + "step": 5572 + }, + { + "epoch": 12.412026726057906, + "grad_norm": 45.48998260498047, + "learning_rate": 1e-06, + "loss": 0.5871, + "num_input_tokens_seen": 312246352, + "step": 5573 + }, + { + "epoch": 12.412026726057906, + "loss": 0.5653384327888489, + "loss_ce": 0.00015288355643860996, + "loss_iou": 0.240234375, + "loss_num": 0.0169677734375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 312246352, + "step": 5573 + }, + { + "epoch": 12.414253897550111, + "grad_norm": 15.57947063446045, + "learning_rate": 1e-06, + "loss": 0.5422, + "num_input_tokens_seen": 312301816, + "step": 5574 + }, + { + "epoch": 12.414253897550111, + "loss": 0.6265942454338074, + "loss_ce": 0.00012941220484208316, + "loss_iou": 0.259765625, + "loss_num": 0.021240234375, + "loss_xval": 0.625, + "num_input_tokens_seen": 312301816, + "step": 5574 + }, + { + "epoch": 12.416481069042316, + "grad_norm": 13.934830665588379, + "learning_rate": 1e-06, + "loss": 0.3072, + "num_input_tokens_seen": 312361472, + "step": 5575 + }, + { + "epoch": 12.416481069042316, + "loss": 0.23996922373771667, + "loss_ce": 0.00010107570415129885, + "loss_iou": 0.10595703125, + "loss_num": 0.005645751953125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 312361472, + "step": 5575 + }, + { + "epoch": 12.41870824053452, + "grad_norm": 22.51824951171875, + "learning_rate": 1e-06, + "loss": 0.5321, + "num_input_tokens_seen": 312416660, + "step": 5576 + }, + { + "epoch": 12.41870824053452, + "loss": 0.3880695104598999, + "loss_ce": 0.00013007389497943223, + "loss_iou": 0.1796875, + "loss_num": 0.0054931640625, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 312416660, + "step": 5576 + }, + { + "epoch": 12.420935412026726, + "grad_norm": 12.614936828613281, + "learning_rate": 1e-06, + "loss": 0.3598, + "num_input_tokens_seen": 312472660, + "step": 5577 + }, + { + "epoch": 12.420935412026726, + "loss": 0.42401188611984253, + "loss_ce": 0.00018374717910774052, + "loss_iou": 0.1767578125, + "loss_num": 0.0140380859375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 312472660, + "step": 5577 + }, + { + "epoch": 12.42316258351893, + "grad_norm": 19.709209442138672, + "learning_rate": 1e-06, + "loss": 0.515, + "num_input_tokens_seen": 312528764, + "step": 5578 + }, + { + "epoch": 12.42316258351893, + "loss": 0.517235279083252, + "loss_ce": 0.00014545858721248806, + "loss_iou": 0.21875, + "loss_num": 0.0159912109375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 312528764, + "step": 5578 + }, + { + "epoch": 12.425389755011135, + "grad_norm": 15.936797142028809, + "learning_rate": 1e-06, + "loss": 0.5646, + "num_input_tokens_seen": 312586228, + "step": 5579 + }, + { + "epoch": 12.425389755011135, + "loss": 0.44118812680244446, + "loss_ce": 0.00014807441039010882, + "loss_iou": 0.20703125, + "loss_num": 0.005523681640625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 312586228, + "step": 5579 + }, + { + "epoch": 12.42761692650334, + "grad_norm": 16.843250274658203, + "learning_rate": 1e-06, + "loss": 0.3889, + "num_input_tokens_seen": 312640980, + "step": 5580 + }, + { + "epoch": 12.42761692650334, + "loss": 0.3072529435157776, + "loss_ce": 0.00012405663437675685, + "loss_iou": 0.1318359375, + "loss_num": 0.00872802734375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 312640980, + "step": 5580 + }, + { + "epoch": 12.429844097995545, + "grad_norm": 29.384336471557617, + "learning_rate": 1e-06, + "loss": 0.5642, + "num_input_tokens_seen": 312696640, + "step": 5581 + }, + { + "epoch": 12.429844097995545, + "loss": 0.5222534537315369, + "loss_ce": 0.00015875368262641132, + "loss_iou": 0.2275390625, + "loss_num": 0.01324462890625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 312696640, + "step": 5581 + }, + { + "epoch": 12.43207126948775, + "grad_norm": 31.232393264770508, + "learning_rate": 1e-06, + "loss": 0.5349, + "num_input_tokens_seen": 312751632, + "step": 5582 + }, + { + "epoch": 12.43207126948775, + "loss": 0.4201045632362366, + "loss_ce": 0.00012167952081654221, + "loss_iou": 0.19140625, + "loss_num": 0.00750732421875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 312751632, + "step": 5582 + }, + { + "epoch": 12.434298440979955, + "grad_norm": 36.4710578918457, + "learning_rate": 1e-06, + "loss": 0.5033, + "num_input_tokens_seen": 312802876, + "step": 5583 + }, + { + "epoch": 12.434298440979955, + "loss": 0.44115835428237915, + "loss_ce": 0.00011831161828013137, + "loss_iou": 0.203125, + "loss_num": 0.0068359375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 312802876, + "step": 5583 + }, + { + "epoch": 12.43652561247216, + "grad_norm": 17.38640594482422, + "learning_rate": 1e-06, + "loss": 0.4538, + "num_input_tokens_seen": 312861152, + "step": 5584 + }, + { + "epoch": 12.43652561247216, + "loss": 0.40885454416275024, + "loss_ce": 0.0001631533377803862, + "loss_iou": 0.1884765625, + "loss_num": 0.006378173828125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 312861152, + "step": 5584 + }, + { + "epoch": 12.438752783964365, + "grad_norm": 18.26358985900879, + "learning_rate": 1e-06, + "loss": 0.4418, + "num_input_tokens_seen": 312915440, + "step": 5585 + }, + { + "epoch": 12.438752783964365, + "loss": 0.542262613773346, + "loss_ce": 0.0001483731612097472, + "loss_iou": 0.23828125, + "loss_num": 0.0133056640625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 312915440, + "step": 5585 + }, + { + "epoch": 12.44097995545657, + "grad_norm": 19.2590274810791, + "learning_rate": 1e-06, + "loss": 0.5392, + "num_input_tokens_seen": 312969320, + "step": 5586 + }, + { + "epoch": 12.44097995545657, + "loss": 0.5372472405433655, + "loss_ce": 0.00013788053183816373, + "loss_iou": 0.2412109375, + "loss_num": 0.01092529296875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 312969320, + "step": 5586 + }, + { + "epoch": 12.443207126948774, + "grad_norm": 15.812176704406738, + "learning_rate": 1e-06, + "loss": 0.454, + "num_input_tokens_seen": 313026268, + "step": 5587 + }, + { + "epoch": 12.443207126948774, + "loss": 0.504636287689209, + "loss_ce": 0.00011972515494562685, + "loss_iou": 0.2001953125, + "loss_num": 0.0208740234375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 313026268, + "step": 5587 + }, + { + "epoch": 12.44543429844098, + "grad_norm": 15.350566864013672, + "learning_rate": 1e-06, + "loss": 0.618, + "num_input_tokens_seen": 313082880, + "step": 5588 + }, + { + "epoch": 12.44543429844098, + "loss": 0.4841225743293762, + "loss_ce": 0.00011377451301086694, + "loss_iou": 0.1923828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 313082880, + "step": 5588 + }, + { + "epoch": 12.447661469933184, + "grad_norm": 17.000885009765625, + "learning_rate": 1e-06, + "loss": 0.6271, + "num_input_tokens_seen": 313140436, + "step": 5589 + }, + { + "epoch": 12.447661469933184, + "loss": 0.959378182888031, + "loss_ce": 0.0001496242475695908, + "loss_iou": 0.37109375, + "loss_num": 0.04345703125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 313140436, + "step": 5589 + }, + { + "epoch": 12.449888641425389, + "grad_norm": 24.725290298461914, + "learning_rate": 1e-06, + "loss": 0.4681, + "num_input_tokens_seen": 313195976, + "step": 5590 + }, + { + "epoch": 12.449888641425389, + "loss": 0.47022485733032227, + "loss_ce": 0.00013207047595642507, + "loss_iou": 0.2060546875, + "loss_num": 0.01153564453125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 313195976, + "step": 5590 + }, + { + "epoch": 12.452115812917596, + "grad_norm": 17.44597053527832, + "learning_rate": 1e-06, + "loss": 0.376, + "num_input_tokens_seen": 313253104, + "step": 5591 + }, + { + "epoch": 12.452115812917596, + "loss": 0.3353341221809387, + "loss_ce": 0.00012904632603749633, + "loss_iou": 0.1337890625, + "loss_num": 0.01361083984375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 313253104, + "step": 5591 + }, + { + "epoch": 12.4543429844098, + "grad_norm": 22.889427185058594, + "learning_rate": 1e-06, + "loss": 0.5976, + "num_input_tokens_seen": 313309392, + "step": 5592 + }, + { + "epoch": 12.4543429844098, + "loss": 0.6958190202713013, + "loss_ce": 0.0001402627385687083, + "loss_iou": 0.314453125, + "loss_num": 0.01324462890625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 313309392, + "step": 5592 + }, + { + "epoch": 12.456570155902005, + "grad_norm": 23.9648494720459, + "learning_rate": 1e-06, + "loss": 0.3895, + "num_input_tokens_seen": 313365352, + "step": 5593 + }, + { + "epoch": 12.456570155902005, + "loss": 0.4225989580154419, + "loss_ce": 0.00011362304212525487, + "loss_iou": 0.1943359375, + "loss_num": 0.0069580078125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 313365352, + "step": 5593 + }, + { + "epoch": 12.45879732739421, + "grad_norm": 22.63255500793457, + "learning_rate": 1e-06, + "loss": 0.5079, + "num_input_tokens_seen": 313422004, + "step": 5594 + }, + { + "epoch": 12.45879732739421, + "loss": 0.37878933548927307, + "loss_ce": 0.00012723426334559917, + "loss_iou": 0.1728515625, + "loss_num": 0.00665283203125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 313422004, + "step": 5594 + }, + { + "epoch": 12.461024498886415, + "grad_norm": 12.69820785522461, + "learning_rate": 1e-06, + "loss": 0.454, + "num_input_tokens_seen": 313480284, + "step": 5595 + }, + { + "epoch": 12.461024498886415, + "loss": 0.34528595209121704, + "loss_ce": 0.0001321507734246552, + "loss_iou": 0.150390625, + "loss_num": 0.0087890625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 313480284, + "step": 5595 + }, + { + "epoch": 12.46325167037862, + "grad_norm": 155.06365966796875, + "learning_rate": 1e-06, + "loss": 0.4323, + "num_input_tokens_seen": 313537320, + "step": 5596 + }, + { + "epoch": 12.46325167037862, + "loss": 0.5743743181228638, + "loss_ce": 0.0001555891940370202, + "loss_iou": 0.23828125, + "loss_num": 0.019775390625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 313537320, + "step": 5596 + }, + { + "epoch": 12.465478841870825, + "grad_norm": 14.968194007873535, + "learning_rate": 1e-06, + "loss": 0.566, + "num_input_tokens_seen": 313592364, + "step": 5597 + }, + { + "epoch": 12.465478841870825, + "loss": 0.4135543406009674, + "loss_ce": 0.00010219329124083742, + "loss_iou": 0.1748046875, + "loss_num": 0.0128173828125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 313592364, + "step": 5597 + }, + { + "epoch": 12.46770601336303, + "grad_norm": 20.62652587890625, + "learning_rate": 1e-06, + "loss": 0.5161, + "num_input_tokens_seen": 313649912, + "step": 5598 + }, + { + "epoch": 12.46770601336303, + "loss": 0.6557765007019043, + "loss_ce": 0.00025890767574310303, + "loss_iou": 0.28125, + "loss_num": 0.0186767578125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 313649912, + "step": 5598 + }, + { + "epoch": 12.469933184855234, + "grad_norm": 30.149272918701172, + "learning_rate": 1e-06, + "loss": 0.3283, + "num_input_tokens_seen": 313707300, + "step": 5599 + }, + { + "epoch": 12.469933184855234, + "loss": 0.28626585006713867, + "loss_ce": 0.00010252131323795766, + "loss_iou": 0.1220703125, + "loss_num": 0.0084228515625, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 313707300, + "step": 5599 + }, + { + "epoch": 12.47216035634744, + "grad_norm": 23.769775390625, + "learning_rate": 1e-06, + "loss": 0.4423, + "num_input_tokens_seen": 313761436, + "step": 5600 + }, + { + "epoch": 12.47216035634744, + "loss": 0.4040485620498657, + "loss_ce": 0.00011789177369792014, + "loss_iou": 0.1787109375, + "loss_num": 0.00946044921875, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 313761436, + "step": 5600 + }, + { + "epoch": 12.474387527839644, + "grad_norm": 15.069268226623535, + "learning_rate": 1e-06, + "loss": 0.3506, + "num_input_tokens_seen": 313818588, + "step": 5601 + }, + { + "epoch": 12.474387527839644, + "loss": 0.38050034642219543, + "loss_ce": 0.00012924219481647015, + "loss_iou": 0.1708984375, + "loss_num": 0.00762939453125, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 313818588, + "step": 5601 + }, + { + "epoch": 12.476614699331849, + "grad_norm": 23.391334533691406, + "learning_rate": 1e-06, + "loss": 0.6686, + "num_input_tokens_seen": 313875568, + "step": 5602 + }, + { + "epoch": 12.476614699331849, + "loss": 1.0041569471359253, + "loss_ce": 0.002203819341957569, + "loss_iou": 0.35546875, + "loss_num": 0.05810546875, + "loss_xval": 1.0, + "num_input_tokens_seen": 313875568, + "step": 5602 + }, + { + "epoch": 12.478841870824054, + "grad_norm": 27.693687438964844, + "learning_rate": 1e-06, + "loss": 0.4511, + "num_input_tokens_seen": 313932528, + "step": 5603 + }, + { + "epoch": 12.478841870824054, + "loss": 0.4338412880897522, + "loss_ce": 0.00012547013466246426, + "loss_iou": 0.1796875, + "loss_num": 0.0150146484375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 313932528, + "step": 5603 + }, + { + "epoch": 12.481069042316259, + "grad_norm": 14.572260856628418, + "learning_rate": 1e-06, + "loss": 0.5831, + "num_input_tokens_seen": 313990412, + "step": 5604 + }, + { + "epoch": 12.481069042316259, + "loss": 0.47828370332717896, + "loss_ce": 0.00013426817895378917, + "loss_iou": 0.212890625, + "loss_num": 0.0103759765625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 313990412, + "step": 5604 + }, + { + "epoch": 12.483296213808464, + "grad_norm": 41.894989013671875, + "learning_rate": 1e-06, + "loss": 0.3697, + "num_input_tokens_seen": 314045856, + "step": 5605 + }, + { + "epoch": 12.483296213808464, + "loss": 0.3678470551967621, + "loss_ce": 0.00011023304250556976, + "loss_iou": 0.16015625, + "loss_num": 0.00970458984375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 314045856, + "step": 5605 + }, + { + "epoch": 12.485523385300668, + "grad_norm": 23.39073371887207, + "learning_rate": 1e-06, + "loss": 0.377, + "num_input_tokens_seen": 314100568, + "step": 5606 + }, + { + "epoch": 12.485523385300668, + "loss": 0.3292108476161957, + "loss_ce": 0.00010928411938948557, + "loss_iou": 0.1474609375, + "loss_num": 0.00689697265625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 314100568, + "step": 5606 + }, + { + "epoch": 12.487750556792873, + "grad_norm": 26.50432014465332, + "learning_rate": 1e-06, + "loss": 0.7268, + "num_input_tokens_seen": 314154296, + "step": 5607 + }, + { + "epoch": 12.487750556792873, + "loss": 0.7526974081993103, + "loss_ce": 0.00013394109555520117, + "loss_iou": 0.318359375, + "loss_num": 0.0233154296875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 314154296, + "step": 5607 + }, + { + "epoch": 12.489977728285078, + "grad_norm": 16.59043312072754, + "learning_rate": 1e-06, + "loss": 0.4595, + "num_input_tokens_seen": 314211988, + "step": 5608 + }, + { + "epoch": 12.489977728285078, + "loss": 0.5807017683982849, + "loss_ce": 0.00013538115308620036, + "loss_iou": 0.24609375, + "loss_num": 0.017578125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 314211988, + "step": 5608 + }, + { + "epoch": 12.492204899777283, + "grad_norm": 15.27440071105957, + "learning_rate": 1e-06, + "loss": 0.4032, + "num_input_tokens_seen": 314270680, + "step": 5609 + }, + { + "epoch": 12.492204899777283, + "loss": 0.349552184343338, + "loss_ce": 0.00012591719860211015, + "loss_iou": 0.1533203125, + "loss_num": 0.008544921875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 314270680, + "step": 5609 + }, + { + "epoch": 12.494432071269488, + "grad_norm": 27.822227478027344, + "learning_rate": 1e-06, + "loss": 0.3882, + "num_input_tokens_seen": 314326436, + "step": 5610 + }, + { + "epoch": 12.494432071269488, + "loss": 0.28033125400543213, + "loss_ce": 0.00011883871047757566, + "loss_iou": 0.12890625, + "loss_num": 0.004730224609375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 314326436, + "step": 5610 + }, + { + "epoch": 12.496659242761693, + "grad_norm": 18.214462280273438, + "learning_rate": 1e-06, + "loss": 0.4729, + "num_input_tokens_seen": 314383216, + "step": 5611 + }, + { + "epoch": 12.496659242761693, + "loss": 0.5203236937522888, + "loss_ce": 0.00012107704242225736, + "loss_iou": 0.2412109375, + "loss_num": 0.00762939453125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 314383216, + "step": 5611 + }, + { + "epoch": 12.498886414253898, + "grad_norm": 17.27739715576172, + "learning_rate": 1e-06, + "loss": 0.412, + "num_input_tokens_seen": 314438016, + "step": 5612 + }, + { + "epoch": 12.498886414253898, + "loss": 0.4331158995628357, + "loss_ce": 0.0001325118209933862, + "loss_iou": 0.1982421875, + "loss_num": 0.007293701171875, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 314438016, + "step": 5612 + }, + { + "epoch": 12.501113585746102, + "grad_norm": 25.569000244140625, + "learning_rate": 1e-06, + "loss": 0.5059, + "num_input_tokens_seen": 314494216, + "step": 5613 + }, + { + "epoch": 12.501113585746102, + "loss": 0.4582577347755432, + "loss_ce": 0.00012786393926944584, + "loss_iou": 0.1943359375, + "loss_num": 0.0137939453125, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 314494216, + "step": 5613 + }, + { + "epoch": 12.503340757238307, + "grad_norm": 16.633384704589844, + "learning_rate": 1e-06, + "loss": 0.529, + "num_input_tokens_seen": 314550764, + "step": 5614 + }, + { + "epoch": 12.503340757238307, + "loss": 0.5543428659439087, + "loss_ce": 0.00014360195200424641, + "loss_iou": 0.2158203125, + "loss_num": 0.024658203125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 314550764, + "step": 5614 + }, + { + "epoch": 12.505567928730512, + "grad_norm": 39.219783782958984, + "learning_rate": 1e-06, + "loss": 0.6081, + "num_input_tokens_seen": 314608284, + "step": 5615 + }, + { + "epoch": 12.505567928730512, + "loss": 0.4896491467952728, + "loss_ce": 0.00014716846635565162, + "loss_iou": 0.212890625, + "loss_num": 0.0125732421875, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 314608284, + "step": 5615 + }, + { + "epoch": 12.507795100222717, + "grad_norm": 30.928625106811523, + "learning_rate": 1e-06, + "loss": 0.4487, + "num_input_tokens_seen": 314660020, + "step": 5616 + }, + { + "epoch": 12.507795100222717, + "loss": 0.4297997057437897, + "loss_ce": 0.00011219394946238026, + "loss_iou": 0.1943359375, + "loss_num": 0.00836181640625, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 314660020, + "step": 5616 + }, + { + "epoch": 12.510022271714922, + "grad_norm": 20.47149085998535, + "learning_rate": 1e-06, + "loss": 0.4247, + "num_input_tokens_seen": 314715408, + "step": 5617 + }, + { + "epoch": 12.510022271714922, + "loss": 0.3184962868690491, + "loss_ce": 0.0001369007513858378, + "loss_iou": 0.130859375, + "loss_num": 0.01116943359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 314715408, + "step": 5617 + }, + { + "epoch": 12.512249443207127, + "grad_norm": 23.522342681884766, + "learning_rate": 1e-06, + "loss": 0.4706, + "num_input_tokens_seen": 314768072, + "step": 5618 + }, + { + "epoch": 12.512249443207127, + "loss": 0.62452632188797, + "loss_ce": 0.00013670045882463455, + "loss_iou": 0.271484375, + "loss_num": 0.01611328125, + "loss_xval": 0.625, + "num_input_tokens_seen": 314768072, + "step": 5618 + }, + { + "epoch": 12.514476614699332, + "grad_norm": 24.376148223876953, + "learning_rate": 1e-06, + "loss": 0.3809, + "num_input_tokens_seen": 314824920, + "step": 5619 + }, + { + "epoch": 12.514476614699332, + "loss": 0.43566471338272095, + "loss_ce": 0.00011783596710301936, + "loss_iou": 0.17578125, + "loss_num": 0.016845703125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 314824920, + "step": 5619 + }, + { + "epoch": 12.516703786191536, + "grad_norm": 21.62662124633789, + "learning_rate": 1e-06, + "loss": 0.4331, + "num_input_tokens_seen": 314881352, + "step": 5620 + }, + { + "epoch": 12.516703786191536, + "loss": 0.42295846343040466, + "loss_ce": 0.00013743109593633562, + "loss_iou": 0.185546875, + "loss_num": 0.01031494140625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 314881352, + "step": 5620 + }, + { + "epoch": 12.518930957683741, + "grad_norm": 17.14795684814453, + "learning_rate": 1e-06, + "loss": 0.4885, + "num_input_tokens_seen": 314938836, + "step": 5621 + }, + { + "epoch": 12.518930957683741, + "loss": 0.5315926671028137, + "loss_ce": 0.00022057080059312284, + "loss_iou": 0.2451171875, + "loss_num": 0.00830078125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 314938836, + "step": 5621 + }, + { + "epoch": 12.521158129175946, + "grad_norm": 32.41496658325195, + "learning_rate": 1e-06, + "loss": 0.5334, + "num_input_tokens_seen": 314997228, + "step": 5622 + }, + { + "epoch": 12.521158129175946, + "loss": 0.5393984317779541, + "loss_ce": 0.00015282572712749243, + "loss_iou": 0.22265625, + "loss_num": 0.018798828125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 314997228, + "step": 5622 + }, + { + "epoch": 12.523385300668151, + "grad_norm": 21.443031311035156, + "learning_rate": 1e-06, + "loss": 0.5174, + "num_input_tokens_seen": 315052164, + "step": 5623 + }, + { + "epoch": 12.523385300668151, + "loss": 0.49842369556427, + "loss_ce": 0.00013268653128761798, + "loss_iou": 0.2138671875, + "loss_num": 0.0140380859375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 315052164, + "step": 5623 + }, + { + "epoch": 12.525612472160356, + "grad_norm": 21.690479278564453, + "learning_rate": 1e-06, + "loss": 0.5104, + "num_input_tokens_seen": 315108840, + "step": 5624 + }, + { + "epoch": 12.525612472160356, + "loss": 0.3883495032787323, + "loss_ce": 0.00016590597806498408, + "loss_iou": 0.1748046875, + "loss_num": 0.0079345703125, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 315108840, + "step": 5624 + }, + { + "epoch": 12.52783964365256, + "grad_norm": 32.13372039794922, + "learning_rate": 1e-06, + "loss": 0.4522, + "num_input_tokens_seen": 315165896, + "step": 5625 + }, + { + "epoch": 12.52783964365256, + "loss": 0.5064772367477417, + "loss_ce": 0.00012959350715391338, + "loss_iou": 0.2333984375, + "loss_num": 0.008056640625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 315165896, + "step": 5625 + }, + { + "epoch": 12.530066815144766, + "grad_norm": 80.0682144165039, + "learning_rate": 1e-06, + "loss": 0.5871, + "num_input_tokens_seen": 315219956, + "step": 5626 + }, + { + "epoch": 12.530066815144766, + "loss": 0.7462646961212158, + "loss_ce": 0.0001709566276986152, + "loss_iou": 0.32421875, + "loss_num": 0.01953125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 315219956, + "step": 5626 + }, + { + "epoch": 12.53229398663697, + "grad_norm": 20.835941314697266, + "learning_rate": 1e-06, + "loss": 0.4619, + "num_input_tokens_seen": 315277464, + "step": 5627 + }, + { + "epoch": 12.53229398663697, + "loss": 0.362972229719162, + "loss_ce": 0.00017925890279002488, + "loss_iou": 0.1494140625, + "loss_num": 0.0128173828125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 315277464, + "step": 5627 + }, + { + "epoch": 12.534521158129175, + "grad_norm": 15.360301971435547, + "learning_rate": 1e-06, + "loss": 0.3773, + "num_input_tokens_seen": 315333928, + "step": 5628 + }, + { + "epoch": 12.534521158129175, + "loss": 0.31863662600517273, + "loss_ce": 0.00015518809959758073, + "loss_iou": 0.1396484375, + "loss_num": 0.007781982421875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 315333928, + "step": 5628 + }, + { + "epoch": 12.53674832962138, + "grad_norm": 18.045223236083984, + "learning_rate": 1e-06, + "loss": 0.444, + "num_input_tokens_seen": 315390648, + "step": 5629 + }, + { + "epoch": 12.53674832962138, + "loss": 0.30956727266311646, + "loss_ce": 0.00011902242840733379, + "loss_iou": 0.1298828125, + "loss_num": 0.0098876953125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 315390648, + "step": 5629 + }, + { + "epoch": 12.538975501113585, + "grad_norm": 36.879791259765625, + "learning_rate": 1e-06, + "loss": 0.4498, + "num_input_tokens_seen": 315447484, + "step": 5630 + }, + { + "epoch": 12.538975501113585, + "loss": 0.33547037839889526, + "loss_ce": 0.00011271548282820731, + "loss_iou": 0.1337890625, + "loss_num": 0.0135498046875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 315447484, + "step": 5630 + }, + { + "epoch": 12.54120267260579, + "grad_norm": 18.903820037841797, + "learning_rate": 1e-06, + "loss": 0.4593, + "num_input_tokens_seen": 315503840, + "step": 5631 + }, + { + "epoch": 12.54120267260579, + "loss": 0.3675609529018402, + "loss_ce": 0.0001293124514631927, + "loss_iou": 0.1650390625, + "loss_num": 0.007659912109375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 315503840, + "step": 5631 + }, + { + "epoch": 12.543429844097995, + "grad_norm": 15.022705078125, + "learning_rate": 1e-06, + "loss": 0.5607, + "num_input_tokens_seen": 315560960, + "step": 5632 + }, + { + "epoch": 12.543429844097995, + "loss": 0.5480488538742065, + "loss_ce": 0.00013630017929244787, + "loss_iou": 0.2353515625, + "loss_num": 0.015380859375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 315560960, + "step": 5632 + }, + { + "epoch": 12.5456570155902, + "grad_norm": 18.663776397705078, + "learning_rate": 1e-06, + "loss": 0.5616, + "num_input_tokens_seen": 315616752, + "step": 5633 + }, + { + "epoch": 12.5456570155902, + "loss": 0.5263513922691345, + "loss_ce": 0.00010624493006616831, + "loss_iou": 0.23046875, + "loss_num": 0.01324462890625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 315616752, + "step": 5633 + }, + { + "epoch": 12.547884187082406, + "grad_norm": 19.984249114990234, + "learning_rate": 1e-06, + "loss": 0.5842, + "num_input_tokens_seen": 315670220, + "step": 5634 + }, + { + "epoch": 12.547884187082406, + "loss": 0.47094982862472534, + "loss_ce": 0.0001246376777999103, + "loss_iou": 0.2177734375, + "loss_num": 0.007110595703125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 315670220, + "step": 5634 + }, + { + "epoch": 12.550111358574611, + "grad_norm": 22.835325241088867, + "learning_rate": 1e-06, + "loss": 0.5918, + "num_input_tokens_seen": 315723144, + "step": 5635 + }, + { + "epoch": 12.550111358574611, + "loss": 0.5138300657272339, + "loss_ce": 0.00015820455155335367, + "loss_iou": 0.2021484375, + "loss_num": 0.02197265625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 315723144, + "step": 5635 + }, + { + "epoch": 12.552338530066816, + "grad_norm": 13.647897720336914, + "learning_rate": 1e-06, + "loss": 0.5345, + "num_input_tokens_seen": 315778516, + "step": 5636 + }, + { + "epoch": 12.552338530066816, + "loss": 0.5501181483268738, + "loss_ce": 0.00013035524170845747, + "loss_iou": 0.2373046875, + "loss_num": 0.01507568359375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 315778516, + "step": 5636 + }, + { + "epoch": 12.55456570155902, + "grad_norm": 17.706628799438477, + "learning_rate": 1e-06, + "loss": 0.8976, + "num_input_tokens_seen": 315835116, + "step": 5637 + }, + { + "epoch": 12.55456570155902, + "loss": 0.998259425163269, + "loss_ce": 0.00015154268476180732, + "loss_iou": 0.419921875, + "loss_num": 0.031494140625, + "loss_xval": 1.0, + "num_input_tokens_seen": 315835116, + "step": 5637 + }, + { + "epoch": 12.556792873051226, + "grad_norm": 16.907272338867188, + "learning_rate": 1e-06, + "loss": 0.5527, + "num_input_tokens_seen": 315890436, + "step": 5638 + }, + { + "epoch": 12.556792873051226, + "loss": 0.652888298034668, + "loss_ce": 0.00011732908751582727, + "loss_iou": 0.28515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 315890436, + "step": 5638 + }, + { + "epoch": 12.55902004454343, + "grad_norm": 14.611957550048828, + "learning_rate": 1e-06, + "loss": 0.7071, + "num_input_tokens_seen": 315944500, + "step": 5639 + }, + { + "epoch": 12.55902004454343, + "loss": 0.6298529505729675, + "loss_ce": 0.0015570501564070582, + "loss_iou": 0.236328125, + "loss_num": 0.03125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 315944500, + "step": 5639 + }, + { + "epoch": 12.561247216035635, + "grad_norm": 39.95273208618164, + "learning_rate": 1e-06, + "loss": 0.7408, + "num_input_tokens_seen": 315998188, + "step": 5640 + }, + { + "epoch": 12.561247216035635, + "loss": 0.7573456168174744, + "loss_ce": 0.0001434848236385733, + "loss_iou": 0.298828125, + "loss_num": 0.03173828125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 315998188, + "step": 5640 + }, + { + "epoch": 12.56347438752784, + "grad_norm": 17.88589859008789, + "learning_rate": 1e-06, + "loss": 0.3115, + "num_input_tokens_seen": 316054048, + "step": 5641 + }, + { + "epoch": 12.56347438752784, + "loss": 0.3503269553184509, + "loss_ce": 0.00010725160245783627, + "loss_iou": 0.1611328125, + "loss_num": 0.005523681640625, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 316054048, + "step": 5641 + }, + { + "epoch": 12.565701559020045, + "grad_norm": 76.38765716552734, + "learning_rate": 1e-06, + "loss": 0.7108, + "num_input_tokens_seen": 316110144, + "step": 5642 + }, + { + "epoch": 12.565701559020045, + "loss": 0.7477288842201233, + "loss_ce": 0.00017026919522322714, + "loss_iou": 0.33203125, + "loss_num": 0.0169677734375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 316110144, + "step": 5642 + }, + { + "epoch": 12.56792873051225, + "grad_norm": 13.18075180053711, + "learning_rate": 1e-06, + "loss": 0.6063, + "num_input_tokens_seen": 316167200, + "step": 5643 + }, + { + "epoch": 12.56792873051225, + "loss": 0.6305142641067505, + "loss_ce": 0.00014317099703475833, + "loss_iou": 0.2431640625, + "loss_num": 0.0289306640625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 316167200, + "step": 5643 + }, + { + "epoch": 12.570155902004455, + "grad_norm": 14.634928703308105, + "learning_rate": 1e-06, + "loss": 0.4603, + "num_input_tokens_seen": 316224676, + "step": 5644 + }, + { + "epoch": 12.570155902004455, + "loss": 0.34102046489715576, + "loss_ce": 0.00020015044719912112, + "loss_iou": 0.1552734375, + "loss_num": 0.006256103515625, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 316224676, + "step": 5644 + }, + { + "epoch": 12.57238307349666, + "grad_norm": 21.283550262451172, + "learning_rate": 1e-06, + "loss": 0.5681, + "num_input_tokens_seen": 316282056, + "step": 5645 + }, + { + "epoch": 12.57238307349666, + "loss": 0.6777347326278687, + "loss_ce": 0.0001223925792146474, + "loss_iou": 0.296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 316282056, + "step": 5645 + }, + { + "epoch": 12.574610244988865, + "grad_norm": 21.21981430053711, + "learning_rate": 1e-06, + "loss": 0.604, + "num_input_tokens_seen": 316336632, + "step": 5646 + }, + { + "epoch": 12.574610244988865, + "loss": 0.4394035339355469, + "loss_ce": 0.00028606440173462033, + "loss_iou": 0.1875, + "loss_num": 0.0128173828125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 316336632, + "step": 5646 + }, + { + "epoch": 12.57683741648107, + "grad_norm": 16.894546508789062, + "learning_rate": 1e-06, + "loss": 0.3813, + "num_input_tokens_seen": 316392980, + "step": 5647 + }, + { + "epoch": 12.57683741648107, + "loss": 0.40146303176879883, + "loss_ce": 9.585732186678797e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.00738525390625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 316392980, + "step": 5647 + }, + { + "epoch": 12.579064587973274, + "grad_norm": 14.076600074768066, + "learning_rate": 1e-06, + "loss": 0.4109, + "num_input_tokens_seen": 316450712, + "step": 5648 + }, + { + "epoch": 12.579064587973274, + "loss": 0.4312838315963745, + "loss_ce": 0.00013148068683221936, + "loss_iou": 0.1875, + "loss_num": 0.0111083984375, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 316450712, + "step": 5648 + }, + { + "epoch": 12.58129175946548, + "grad_norm": 18.681835174560547, + "learning_rate": 1e-06, + "loss": 0.5381, + "num_input_tokens_seen": 316506272, + "step": 5649 + }, + { + "epoch": 12.58129175946548, + "loss": 0.5017099380493164, + "loss_ce": 0.00012300520029384643, + "loss_iou": 0.212890625, + "loss_num": 0.01519775390625, + "loss_xval": 0.5, + "num_input_tokens_seen": 316506272, + "step": 5649 + }, + { + "epoch": 12.583518930957684, + "grad_norm": 19.394519805908203, + "learning_rate": 1e-06, + "loss": 0.5845, + "num_input_tokens_seen": 316560448, + "step": 5650 + }, + { + "epoch": 12.583518930957684, + "loss": 0.49112582206726074, + "loss_ce": 0.00015903441817499697, + "loss_iou": 0.197265625, + "loss_num": 0.0194091796875, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 316560448, + "step": 5650 + }, + { + "epoch": 12.585746102449889, + "grad_norm": 19.638614654541016, + "learning_rate": 1e-06, + "loss": 0.3961, + "num_input_tokens_seen": 316616300, + "step": 5651 + }, + { + "epoch": 12.585746102449889, + "loss": 0.3637651801109314, + "loss_ce": 0.00011775536404456943, + "loss_iou": 0.1650390625, + "loss_num": 0.00689697265625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 316616300, + "step": 5651 + }, + { + "epoch": 12.587973273942094, + "grad_norm": 20.799516677856445, + "learning_rate": 1e-06, + "loss": 0.7011, + "num_input_tokens_seen": 316672848, + "step": 5652 + }, + { + "epoch": 12.587973273942094, + "loss": 0.555069088935852, + "loss_ce": 0.00013745573232881725, + "loss_iou": 0.255859375, + "loss_num": 0.0084228515625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 316672848, + "step": 5652 + }, + { + "epoch": 12.590200445434299, + "grad_norm": 16.250247955322266, + "learning_rate": 1e-06, + "loss": 0.5712, + "num_input_tokens_seen": 316726660, + "step": 5653 + }, + { + "epoch": 12.590200445434299, + "loss": 0.7183680534362793, + "loss_ce": 0.00018261410878039896, + "loss_iou": 0.271484375, + "loss_num": 0.035400390625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 316726660, + "step": 5653 + }, + { + "epoch": 12.592427616926503, + "grad_norm": 16.84650993347168, + "learning_rate": 1e-06, + "loss": 0.4514, + "num_input_tokens_seen": 316785652, + "step": 5654 + }, + { + "epoch": 12.592427616926503, + "loss": 0.4916442036628723, + "loss_ce": 0.00012808601604774594, + "loss_iou": 0.220703125, + "loss_num": 0.01025390625, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 316785652, + "step": 5654 + }, + { + "epoch": 12.594654788418708, + "grad_norm": 19.085908889770508, + "learning_rate": 1e-06, + "loss": 0.549, + "num_input_tokens_seen": 316838728, + "step": 5655 + }, + { + "epoch": 12.594654788418708, + "loss": 0.45017945766448975, + "loss_ce": 0.0002282563509652391, + "loss_iou": 0.201171875, + "loss_num": 0.00970458984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 316838728, + "step": 5655 + }, + { + "epoch": 12.596881959910913, + "grad_norm": 18.604005813598633, + "learning_rate": 1e-06, + "loss": 0.4854, + "num_input_tokens_seen": 316895116, + "step": 5656 + }, + { + "epoch": 12.596881959910913, + "loss": 0.4594961106777191, + "loss_ce": 0.00014551397180184722, + "loss_iou": 0.201171875, + "loss_num": 0.01116943359375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 316895116, + "step": 5656 + }, + { + "epoch": 12.599109131403118, + "grad_norm": 19.779462814331055, + "learning_rate": 1e-06, + "loss": 0.442, + "num_input_tokens_seen": 316951228, + "step": 5657 + }, + { + "epoch": 12.599109131403118, + "loss": 0.43638354539871216, + "loss_ce": 0.00010424081847304478, + "loss_iou": 0.1884765625, + "loss_num": 0.01190185546875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 316951228, + "step": 5657 + }, + { + "epoch": 12.601336302895323, + "grad_norm": 17.714229583740234, + "learning_rate": 1e-06, + "loss": 0.6931, + "num_input_tokens_seen": 317007992, + "step": 5658 + }, + { + "epoch": 12.601336302895323, + "loss": 0.7979649305343628, + "loss_ce": 0.00011335601448081434, + "loss_iou": 0.337890625, + "loss_num": 0.0242919921875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 317007992, + "step": 5658 + }, + { + "epoch": 12.603563474387528, + "grad_norm": 18.232986450195312, + "learning_rate": 1e-06, + "loss": 0.3883, + "num_input_tokens_seen": 317062396, + "step": 5659 + }, + { + "epoch": 12.603563474387528, + "loss": 0.3348948359489441, + "loss_ce": 0.00011698143498506397, + "loss_iou": 0.140625, + "loss_num": 0.01055908203125, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 317062396, + "step": 5659 + }, + { + "epoch": 12.605790645879733, + "grad_norm": 22.718849182128906, + "learning_rate": 1e-06, + "loss": 0.519, + "num_input_tokens_seen": 317118224, + "step": 5660 + }, + { + "epoch": 12.605790645879733, + "loss": 0.58899986743927, + "loss_ce": 0.00013271119678393006, + "loss_iou": 0.263671875, + "loss_num": 0.01239013671875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 317118224, + "step": 5660 + }, + { + "epoch": 12.608017817371937, + "grad_norm": 29.809764862060547, + "learning_rate": 1e-06, + "loss": 0.5945, + "num_input_tokens_seen": 317172604, + "step": 5661 + }, + { + "epoch": 12.608017817371937, + "loss": 0.6281754374504089, + "loss_ce": 0.0001236619718838483, + "loss_iou": 0.271484375, + "loss_num": 0.0169677734375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 317172604, + "step": 5661 + }, + { + "epoch": 12.610244988864142, + "grad_norm": 19.71194076538086, + "learning_rate": 1e-06, + "loss": 0.4504, + "num_input_tokens_seen": 317229076, + "step": 5662 + }, + { + "epoch": 12.610244988864142, + "loss": 0.3518773019313812, + "loss_ce": 0.00013169541489332914, + "loss_iou": 0.134765625, + "loss_num": 0.0164794921875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 317229076, + "step": 5662 + }, + { + "epoch": 12.612472160356347, + "grad_norm": 24.877761840820312, + "learning_rate": 1e-06, + "loss": 0.5304, + "num_input_tokens_seen": 317284080, + "step": 5663 + }, + { + "epoch": 12.612472160356347, + "loss": 0.5929080247879028, + "loss_ce": 0.0001345914788544178, + "loss_iou": 0.25390625, + "loss_num": 0.0172119140625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 317284080, + "step": 5663 + }, + { + "epoch": 12.614699331848552, + "grad_norm": 37.88312530517578, + "learning_rate": 1e-06, + "loss": 0.7225, + "num_input_tokens_seen": 317338048, + "step": 5664 + }, + { + "epoch": 12.614699331848552, + "loss": 0.859398365020752, + "loss_ce": 0.0002674940915312618, + "loss_iou": 0.359375, + "loss_num": 0.028564453125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 317338048, + "step": 5664 + }, + { + "epoch": 12.616926503340757, + "grad_norm": 25.082626342773438, + "learning_rate": 1e-06, + "loss": 0.6775, + "num_input_tokens_seen": 317396176, + "step": 5665 + }, + { + "epoch": 12.616926503340757, + "loss": 0.6872255802154541, + "loss_ce": 0.00033595875720493495, + "loss_iou": 0.283203125, + "loss_num": 0.024169921875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 317396176, + "step": 5665 + }, + { + "epoch": 12.619153674832962, + "grad_norm": 16.618837356567383, + "learning_rate": 1e-06, + "loss": 0.6738, + "num_input_tokens_seen": 317453000, + "step": 5666 + }, + { + "epoch": 12.619153674832962, + "loss": 0.7559080123901367, + "loss_ce": 0.0001707018236629665, + "loss_iou": 0.326171875, + "loss_num": 0.020751953125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 317453000, + "step": 5666 + }, + { + "epoch": 12.621380846325167, + "grad_norm": 13.969486236572266, + "learning_rate": 1e-06, + "loss": 0.3752, + "num_input_tokens_seen": 317510472, + "step": 5667 + }, + { + "epoch": 12.621380846325167, + "loss": 0.42377543449401855, + "loss_ce": 0.0001303813187405467, + "loss_iou": 0.193359375, + "loss_num": 0.00732421875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 317510472, + "step": 5667 + }, + { + "epoch": 12.623608017817372, + "grad_norm": 13.138300895690918, + "learning_rate": 1e-06, + "loss": 0.4098, + "num_input_tokens_seen": 317567232, + "step": 5668 + }, + { + "epoch": 12.623608017817372, + "loss": 0.3994289040565491, + "loss_ce": 0.00013694007066078484, + "loss_iou": 0.1904296875, + "loss_num": 0.0035552978515625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 317567232, + "step": 5668 + }, + { + "epoch": 12.625835189309576, + "grad_norm": 19.55143165588379, + "learning_rate": 1e-06, + "loss": 0.378, + "num_input_tokens_seen": 317624192, + "step": 5669 + }, + { + "epoch": 12.625835189309576, + "loss": 0.4045639634132385, + "loss_ce": 0.00014502542035188526, + "loss_iou": 0.1796875, + "loss_num": 0.0089111328125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 317624192, + "step": 5669 + }, + { + "epoch": 12.628062360801781, + "grad_norm": 33.738197326660156, + "learning_rate": 1e-06, + "loss": 0.4112, + "num_input_tokens_seen": 317676972, + "step": 5670 + }, + { + "epoch": 12.628062360801781, + "loss": 0.4077775180339813, + "loss_ce": 0.00012370766489766538, + "loss_iou": 0.1796875, + "loss_num": 0.009765625, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 317676972, + "step": 5670 + }, + { + "epoch": 12.630289532293986, + "grad_norm": 26.519407272338867, + "learning_rate": 1e-06, + "loss": 0.4019, + "num_input_tokens_seen": 317734016, + "step": 5671 + }, + { + "epoch": 12.630289532293986, + "loss": 0.40515443682670593, + "loss_ce": 0.00012512919784057885, + "loss_iou": 0.1650390625, + "loss_num": 0.0147705078125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 317734016, + "step": 5671 + }, + { + "epoch": 12.632516703786191, + "grad_norm": 22.868947982788086, + "learning_rate": 1e-06, + "loss": 0.439, + "num_input_tokens_seen": 317789104, + "step": 5672 + }, + { + "epoch": 12.632516703786191, + "loss": 0.403316855430603, + "loss_ce": 0.00011859952792292461, + "loss_iou": 0.1884765625, + "loss_num": 0.00518798828125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 317789104, + "step": 5672 + }, + { + "epoch": 12.634743875278396, + "grad_norm": 23.70015525817871, + "learning_rate": 1e-06, + "loss": 0.4627, + "num_input_tokens_seen": 317843592, + "step": 5673 + }, + { + "epoch": 12.634743875278396, + "loss": 0.5965588092803955, + "loss_ce": 0.00012326473370194435, + "loss_iou": 0.25390625, + "loss_num": 0.0179443359375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 317843592, + "step": 5673 + }, + { + "epoch": 12.6369710467706, + "grad_norm": 19.277725219726562, + "learning_rate": 1e-06, + "loss": 0.4047, + "num_input_tokens_seen": 317900916, + "step": 5674 + }, + { + "epoch": 12.6369710467706, + "loss": 0.3441046476364136, + "loss_ce": 0.00011049180466216058, + "loss_iou": 0.1484375, + "loss_num": 0.00927734375, + "loss_xval": 0.34375, + "num_input_tokens_seen": 317900916, + "step": 5674 + }, + { + "epoch": 12.639198218262806, + "grad_norm": 28.94078826904297, + "learning_rate": 1e-06, + "loss": 0.4402, + "num_input_tokens_seen": 317957468, + "step": 5675 + }, + { + "epoch": 12.639198218262806, + "loss": 0.3978157639503479, + "loss_ce": 0.00011067395826103166, + "loss_iou": 0.1591796875, + "loss_num": 0.015869140625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 317957468, + "step": 5675 + }, + { + "epoch": 12.64142538975501, + "grad_norm": 19.98812484741211, + "learning_rate": 1e-06, + "loss": 0.6088, + "num_input_tokens_seen": 318014916, + "step": 5676 + }, + { + "epoch": 12.64142538975501, + "loss": 0.5992527008056641, + "loss_ce": 0.0001316650304943323, + "loss_iou": 0.263671875, + "loss_num": 0.0140380859375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 318014916, + "step": 5676 + }, + { + "epoch": 12.643652561247215, + "grad_norm": 29.456689834594727, + "learning_rate": 1e-06, + "loss": 0.4071, + "num_input_tokens_seen": 318069856, + "step": 5677 + }, + { + "epoch": 12.643652561247215, + "loss": 0.29436251521110535, + "loss_ce": 0.00017306354129686952, + "loss_iou": 0.1376953125, + "loss_num": 0.003936767578125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 318069856, + "step": 5677 + }, + { + "epoch": 12.64587973273942, + "grad_norm": 29.443206787109375, + "learning_rate": 1e-06, + "loss": 0.5605, + "num_input_tokens_seen": 318127208, + "step": 5678 + }, + { + "epoch": 12.64587973273942, + "loss": 0.6685495972633362, + "loss_ce": 0.0003367103636264801, + "loss_iou": 0.28125, + "loss_num": 0.021240234375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 318127208, + "step": 5678 + }, + { + "epoch": 12.648106904231625, + "grad_norm": 16.127708435058594, + "learning_rate": 1e-06, + "loss": 0.4754, + "num_input_tokens_seen": 318183468, + "step": 5679 + }, + { + "epoch": 12.648106904231625, + "loss": 0.43421459197998047, + "loss_ce": 0.00013255488011054695, + "loss_iou": 0.1884765625, + "loss_num": 0.01153564453125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 318183468, + "step": 5679 + }, + { + "epoch": 12.65033407572383, + "grad_norm": 36.4363899230957, + "learning_rate": 1e-06, + "loss": 0.5273, + "num_input_tokens_seen": 318237944, + "step": 5680 + }, + { + "epoch": 12.65033407572383, + "loss": 0.3793076276779175, + "loss_ce": 0.00015721011732239276, + "loss_iou": 0.17578125, + "loss_num": 0.00555419921875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 318237944, + "step": 5680 + }, + { + "epoch": 12.652561247216035, + "grad_norm": 15.695813179016113, + "learning_rate": 1e-06, + "loss": 0.5399, + "num_input_tokens_seen": 318297084, + "step": 5681 + }, + { + "epoch": 12.652561247216035, + "loss": 0.46922624111175537, + "loss_ce": 0.00011001349048456177, + "loss_iou": 0.208984375, + "loss_num": 0.0101318359375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 318297084, + "step": 5681 + }, + { + "epoch": 12.654788418708241, + "grad_norm": 24.64415168762207, + "learning_rate": 1e-06, + "loss": 0.4352, + "num_input_tokens_seen": 318350612, + "step": 5682 + }, + { + "epoch": 12.654788418708241, + "loss": 0.5813356041908264, + "loss_ce": 0.00015886628534644842, + "loss_iou": 0.26171875, + "loss_num": 0.01190185546875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 318350612, + "step": 5682 + }, + { + "epoch": 12.657015590200446, + "grad_norm": 14.695323944091797, + "learning_rate": 1e-06, + "loss": 0.5865, + "num_input_tokens_seen": 318406072, + "step": 5683 + }, + { + "epoch": 12.657015590200446, + "loss": 0.4219498932361603, + "loss_ce": 0.000120684111607261, + "loss_iou": 0.19140625, + "loss_num": 0.00762939453125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 318406072, + "step": 5683 + }, + { + "epoch": 12.659242761692651, + "grad_norm": 18.124658584594727, + "learning_rate": 1e-06, + "loss": 0.5347, + "num_input_tokens_seen": 318463084, + "step": 5684 + }, + { + "epoch": 12.659242761692651, + "loss": 0.6086758971214294, + "loss_ce": 0.00015539961168542504, + "loss_iou": 0.23046875, + "loss_num": 0.0296630859375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 318463084, + "step": 5684 + }, + { + "epoch": 12.661469933184856, + "grad_norm": 14.113274574279785, + "learning_rate": 1e-06, + "loss": 0.466, + "num_input_tokens_seen": 318520428, + "step": 5685 + }, + { + "epoch": 12.661469933184856, + "loss": 0.5447894930839539, + "loss_ce": 0.00011178314161952585, + "loss_iou": 0.2265625, + "loss_num": 0.018310546875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 318520428, + "step": 5685 + }, + { + "epoch": 12.66369710467706, + "grad_norm": 12.743885040283203, + "learning_rate": 1e-06, + "loss": 0.7553, + "num_input_tokens_seen": 318576972, + "step": 5686 + }, + { + "epoch": 12.66369710467706, + "loss": 0.9210690259933472, + "loss_ce": 0.0003536554577294737, + "loss_iou": 0.37109375, + "loss_num": 0.035888671875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 318576972, + "step": 5686 + }, + { + "epoch": 12.665924276169266, + "grad_norm": 14.964234352111816, + "learning_rate": 1e-06, + "loss": 0.4453, + "num_input_tokens_seen": 318630160, + "step": 5687 + }, + { + "epoch": 12.665924276169266, + "loss": 0.42542383074760437, + "loss_ce": 0.00013086556282360107, + "loss_iou": 0.1826171875, + "loss_num": 0.01202392578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 318630160, + "step": 5687 + }, + { + "epoch": 12.66815144766147, + "grad_norm": 22.878787994384766, + "learning_rate": 1e-06, + "loss": 0.5036, + "num_input_tokens_seen": 318683880, + "step": 5688 + }, + { + "epoch": 12.66815144766147, + "loss": 0.47283750772476196, + "loss_ce": 0.00012018828419968486, + "loss_iou": 0.2119140625, + "loss_num": 0.00982666015625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 318683880, + "step": 5688 + }, + { + "epoch": 12.670378619153675, + "grad_norm": 16.93242073059082, + "learning_rate": 1e-06, + "loss": 0.4694, + "num_input_tokens_seen": 318737980, + "step": 5689 + }, + { + "epoch": 12.670378619153675, + "loss": 0.4250328838825226, + "loss_ce": 0.00010613650374580175, + "loss_iou": 0.1875, + "loss_num": 0.00982666015625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 318737980, + "step": 5689 + }, + { + "epoch": 12.67260579064588, + "grad_norm": 18.397140502929688, + "learning_rate": 1e-06, + "loss": 0.4985, + "num_input_tokens_seen": 318793520, + "step": 5690 + }, + { + "epoch": 12.67260579064588, + "loss": 0.5429917573928833, + "loss_ce": 0.0001451352145522833, + "loss_iou": 0.2314453125, + "loss_num": 0.015869140625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 318793520, + "step": 5690 + }, + { + "epoch": 12.674832962138085, + "grad_norm": 23.1961669921875, + "learning_rate": 1e-06, + "loss": 0.4391, + "num_input_tokens_seen": 318848756, + "step": 5691 + }, + { + "epoch": 12.674832962138085, + "loss": 0.45615941286087036, + "loss_ce": 0.00010473289148649201, + "loss_iou": 0.2001953125, + "loss_num": 0.01141357421875, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 318848756, + "step": 5691 + }, + { + "epoch": 12.67706013363029, + "grad_norm": 14.611727714538574, + "learning_rate": 1e-06, + "loss": 0.4351, + "num_input_tokens_seen": 318907276, + "step": 5692 + }, + { + "epoch": 12.67706013363029, + "loss": 0.4373611807823181, + "loss_ce": 0.0001053235464496538, + "loss_iou": 0.189453125, + "loss_num": 0.011474609375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 318907276, + "step": 5692 + }, + { + "epoch": 12.679287305122495, + "grad_norm": 16.515594482421875, + "learning_rate": 1e-06, + "loss": 0.4227, + "num_input_tokens_seen": 318962132, + "step": 5693 + }, + { + "epoch": 12.679287305122495, + "loss": 0.4306679964065552, + "loss_ce": 0.00012597074965015054, + "loss_iou": 0.1982421875, + "loss_num": 0.0067138671875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 318962132, + "step": 5693 + }, + { + "epoch": 12.6815144766147, + "grad_norm": 20.435245513916016, + "learning_rate": 1e-06, + "loss": 0.6693, + "num_input_tokens_seen": 319018288, + "step": 5694 + }, + { + "epoch": 12.6815144766147, + "loss": 0.9051839709281921, + "loss_ce": 0.00015468656783923507, + "loss_iou": 0.380859375, + "loss_num": 0.029052734375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 319018288, + "step": 5694 + }, + { + "epoch": 12.683741648106905, + "grad_norm": 17.04014778137207, + "learning_rate": 1e-06, + "loss": 0.3592, + "num_input_tokens_seen": 319073932, + "step": 5695 + }, + { + "epoch": 12.683741648106905, + "loss": 0.45563840866088867, + "loss_ce": 0.00019404885824769735, + "loss_iou": 0.2060546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 319073932, + "step": 5695 + }, + { + "epoch": 12.68596881959911, + "grad_norm": 18.900136947631836, + "learning_rate": 1e-06, + "loss": 0.4399, + "num_input_tokens_seen": 319131752, + "step": 5696 + }, + { + "epoch": 12.68596881959911, + "loss": 0.4523688554763794, + "loss_ce": 9.835186938289553e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.01495361328125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 319131752, + "step": 5696 + }, + { + "epoch": 12.688195991091314, + "grad_norm": 25.752071380615234, + "learning_rate": 1e-06, + "loss": 0.4534, + "num_input_tokens_seen": 319187908, + "step": 5697 + }, + { + "epoch": 12.688195991091314, + "loss": 0.42492321133613586, + "loss_ce": 0.00011853590694954619, + "loss_iou": 0.193359375, + "loss_num": 0.007568359375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 319187908, + "step": 5697 + }, + { + "epoch": 12.690423162583519, + "grad_norm": 19.532682418823242, + "learning_rate": 1e-06, + "loss": 0.538, + "num_input_tokens_seen": 319244400, + "step": 5698 + }, + { + "epoch": 12.690423162583519, + "loss": 0.5101226568222046, + "loss_ce": 0.00011285034997854382, + "loss_iou": 0.228515625, + "loss_num": 0.01068115234375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 319244400, + "step": 5698 + }, + { + "epoch": 12.692650334075724, + "grad_norm": 26.41732406616211, + "learning_rate": 1e-06, + "loss": 0.4874, + "num_input_tokens_seen": 319297184, + "step": 5699 + }, + { + "epoch": 12.692650334075724, + "loss": 0.57191401720047, + "loss_ce": 0.00013665735605172813, + "loss_iou": 0.251953125, + "loss_num": 0.01385498046875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 319297184, + "step": 5699 + }, + { + "epoch": 12.694877505567929, + "grad_norm": 18.265283584594727, + "learning_rate": 1e-06, + "loss": 0.4927, + "num_input_tokens_seen": 319355800, + "step": 5700 + }, + { + "epoch": 12.694877505567929, + "loss": 0.5474244356155396, + "loss_ce": 0.00012223176599945873, + "loss_iou": 0.2314453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 319355800, + "step": 5700 + }, + { + "epoch": 12.697104677060134, + "grad_norm": 26.21422576904297, + "learning_rate": 1e-06, + "loss": 0.7005, + "num_input_tokens_seen": 319413272, + "step": 5701 + }, + { + "epoch": 12.697104677060134, + "loss": 0.9590729475021362, + "loss_ce": 0.00021064060274511576, + "loss_iou": 0.359375, + "loss_num": 0.04833984375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 319413272, + "step": 5701 + }, + { + "epoch": 12.699331848552339, + "grad_norm": 24.68498992919922, + "learning_rate": 1e-06, + "loss": 0.4993, + "num_input_tokens_seen": 319467372, + "step": 5702 + }, + { + "epoch": 12.699331848552339, + "loss": 0.5787380933761597, + "loss_ce": 0.0001248328626388684, + "loss_iou": 0.224609375, + "loss_num": 0.02587890625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 319467372, + "step": 5702 + }, + { + "epoch": 12.701559020044543, + "grad_norm": 20.236909866333008, + "learning_rate": 1e-06, + "loss": 0.5838, + "num_input_tokens_seen": 319524172, + "step": 5703 + }, + { + "epoch": 12.701559020044543, + "loss": 0.6807386875152588, + "loss_ce": 0.0005628582439385355, + "loss_iou": 0.2890625, + "loss_num": 0.0203857421875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 319524172, + "step": 5703 + }, + { + "epoch": 12.703786191536748, + "grad_norm": 27.707962036132812, + "learning_rate": 1e-06, + "loss": 0.6471, + "num_input_tokens_seen": 319578916, + "step": 5704 + }, + { + "epoch": 12.703786191536748, + "loss": 0.8126009702682495, + "loss_ce": 0.00010092551383422688, + "loss_iou": 0.357421875, + "loss_num": 0.019775390625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 319578916, + "step": 5704 + }, + { + "epoch": 12.706013363028953, + "grad_norm": 21.496240615844727, + "learning_rate": 1e-06, + "loss": 0.449, + "num_input_tokens_seen": 319634576, + "step": 5705 + }, + { + "epoch": 12.706013363028953, + "loss": 0.4259480834007263, + "loss_ce": 0.00010578571527730674, + "loss_iou": 0.181640625, + "loss_num": 0.01263427734375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 319634576, + "step": 5705 + }, + { + "epoch": 12.708240534521158, + "grad_norm": 17.127723693847656, + "learning_rate": 1e-06, + "loss": 0.3865, + "num_input_tokens_seen": 319689076, + "step": 5706 + }, + { + "epoch": 12.708240534521158, + "loss": 0.3831561207771301, + "loss_ce": 9.949406376108527e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 319689076, + "step": 5706 + }, + { + "epoch": 12.710467706013363, + "grad_norm": 19.543319702148438, + "learning_rate": 1e-06, + "loss": 0.7191, + "num_input_tokens_seen": 319745764, + "step": 5707 + }, + { + "epoch": 12.710467706013363, + "loss": 0.780562162399292, + "loss_ce": 0.00028868275694549084, + "loss_iou": 0.337890625, + "loss_num": 0.02099609375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 319745764, + "step": 5707 + }, + { + "epoch": 12.712694877505568, + "grad_norm": 20.706933975219727, + "learning_rate": 1e-06, + "loss": 0.3687, + "num_input_tokens_seen": 319803272, + "step": 5708 + }, + { + "epoch": 12.712694877505568, + "loss": 0.45703423023223877, + "loss_ce": 0.00018610812549013644, + "loss_iou": 0.2119140625, + "loss_num": 0.006805419921875, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 319803272, + "step": 5708 + }, + { + "epoch": 12.714922048997773, + "grad_norm": 13.944723129272461, + "learning_rate": 1e-06, + "loss": 0.3891, + "num_input_tokens_seen": 319860864, + "step": 5709 + }, + { + "epoch": 12.714922048997773, + "loss": 0.3766050636768341, + "loss_ce": 0.00014021531387697905, + "loss_iou": 0.16796875, + "loss_num": 0.00830078125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 319860864, + "step": 5709 + }, + { + "epoch": 12.717149220489977, + "grad_norm": 20.481975555419922, + "learning_rate": 1e-06, + "loss": 0.639, + "num_input_tokens_seen": 319916192, + "step": 5710 + }, + { + "epoch": 12.717149220489977, + "loss": 0.6568468809127808, + "loss_ce": 0.0001086572592612356, + "loss_iou": 0.265625, + "loss_num": 0.024658203125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 319916192, + "step": 5710 + }, + { + "epoch": 12.719376391982182, + "grad_norm": 14.734237670898438, + "learning_rate": 1e-06, + "loss": 0.6377, + "num_input_tokens_seen": 319974044, + "step": 5711 + }, + { + "epoch": 12.719376391982182, + "loss": 0.8217337131500244, + "loss_ce": 0.00020050689636263996, + "loss_iou": 0.3515625, + "loss_num": 0.0238037109375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 319974044, + "step": 5711 + }, + { + "epoch": 12.721603563474387, + "grad_norm": 16.80459213256836, + "learning_rate": 1e-06, + "loss": 0.4424, + "num_input_tokens_seen": 320027692, + "step": 5712 + }, + { + "epoch": 12.721603563474387, + "loss": 0.46265923976898193, + "loss_ce": 0.00013479188783094287, + "loss_iou": 0.2109375, + "loss_num": 0.0081787109375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 320027692, + "step": 5712 + }, + { + "epoch": 12.723830734966592, + "grad_norm": 13.117379188537598, + "learning_rate": 1e-06, + "loss": 0.5347, + "num_input_tokens_seen": 320084636, + "step": 5713 + }, + { + "epoch": 12.723830734966592, + "loss": 0.4970431923866272, + "loss_ce": 9.497034625383094e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.0186767578125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 320084636, + "step": 5713 + }, + { + "epoch": 12.726057906458797, + "grad_norm": 15.85624885559082, + "learning_rate": 1e-06, + "loss": 0.5115, + "num_input_tokens_seen": 320141228, + "step": 5714 + }, + { + "epoch": 12.726057906458797, + "loss": 0.5403798222541809, + "loss_ce": 0.0002187040081480518, + "loss_iou": 0.2294921875, + "loss_num": 0.016357421875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 320141228, + "step": 5714 + }, + { + "epoch": 12.728285077951002, + "grad_norm": 15.332324981689453, + "learning_rate": 1e-06, + "loss": 0.3932, + "num_input_tokens_seen": 320196208, + "step": 5715 + }, + { + "epoch": 12.728285077951002, + "loss": 0.4100413918495178, + "loss_ce": 0.00012929215154144913, + "loss_iou": 0.1708984375, + "loss_num": 0.0137939453125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 320196208, + "step": 5715 + }, + { + "epoch": 12.730512249443207, + "grad_norm": 21.221384048461914, + "learning_rate": 1e-06, + "loss": 0.3587, + "num_input_tokens_seen": 320252508, + "step": 5716 + }, + { + "epoch": 12.730512249443207, + "loss": 0.27532318234443665, + "loss_ce": 0.00011566374450922012, + "loss_iou": 0.12109375, + "loss_num": 0.00653076171875, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 320252508, + "step": 5716 + }, + { + "epoch": 12.732739420935411, + "grad_norm": 19.24123191833496, + "learning_rate": 1e-06, + "loss": 0.4894, + "num_input_tokens_seen": 320308308, + "step": 5717 + }, + { + "epoch": 12.732739420935411, + "loss": 0.44945940375328064, + "loss_ce": 0.00011856977653224021, + "loss_iou": 0.1884765625, + "loss_num": 0.0142822265625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 320308308, + "step": 5717 + }, + { + "epoch": 12.734966592427616, + "grad_norm": 18.568979263305664, + "learning_rate": 1e-06, + "loss": 0.5369, + "num_input_tokens_seen": 320363968, + "step": 5718 + }, + { + "epoch": 12.734966592427616, + "loss": 0.5608214139938354, + "loss_ce": 0.00015247752889990807, + "loss_iou": 0.25390625, + "loss_num": 0.01025390625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 320363968, + "step": 5718 + }, + { + "epoch": 12.737193763919821, + "grad_norm": 27.49472427368164, + "learning_rate": 1e-06, + "loss": 0.4842, + "num_input_tokens_seen": 320420160, + "step": 5719 + }, + { + "epoch": 12.737193763919821, + "loss": 0.44296640157699585, + "loss_ce": 9.531481191515923e-05, + "loss_iou": 0.173828125, + "loss_num": 0.0189208984375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 320420160, + "step": 5719 + }, + { + "epoch": 12.739420935412026, + "grad_norm": 35.03486633300781, + "learning_rate": 1e-06, + "loss": 0.6408, + "num_input_tokens_seen": 320476520, + "step": 5720 + }, + { + "epoch": 12.739420935412026, + "loss": 0.6617658734321594, + "loss_ce": 0.000144752484629862, + "loss_iou": 0.283203125, + "loss_num": 0.019287109375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 320476520, + "step": 5720 + }, + { + "epoch": 12.74164810690423, + "grad_norm": 19.232091903686523, + "learning_rate": 1e-06, + "loss": 0.3847, + "num_input_tokens_seen": 320531336, + "step": 5721 + }, + { + "epoch": 12.74164810690423, + "loss": 0.41272228956222534, + "loss_ce": 0.00012462519225664437, + "loss_iou": 0.185546875, + "loss_num": 0.00811767578125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 320531336, + "step": 5721 + }, + { + "epoch": 12.743875278396436, + "grad_norm": 19.980670928955078, + "learning_rate": 1e-06, + "loss": 0.4017, + "num_input_tokens_seen": 320590096, + "step": 5722 + }, + { + "epoch": 12.743875278396436, + "loss": 0.4484846591949463, + "loss_ce": 0.00012038621207466349, + "loss_iou": 0.1884765625, + "loss_num": 0.01422119140625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 320590096, + "step": 5722 + }, + { + "epoch": 12.74610244988864, + "grad_norm": 23.660293579101562, + "learning_rate": 1e-06, + "loss": 0.4983, + "num_input_tokens_seen": 320644036, + "step": 5723 + }, + { + "epoch": 12.74610244988864, + "loss": 0.5904831290245056, + "loss_ce": 0.0001510925212642178, + "loss_iou": 0.24609375, + "loss_num": 0.019775390625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 320644036, + "step": 5723 + }, + { + "epoch": 12.748329621380847, + "grad_norm": 16.64579963684082, + "learning_rate": 1e-06, + "loss": 0.4319, + "num_input_tokens_seen": 320699152, + "step": 5724 + }, + { + "epoch": 12.748329621380847, + "loss": 0.4705604612827301, + "loss_ce": 0.00010146087151952088, + "loss_iou": 0.21484375, + "loss_num": 0.00830078125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 320699152, + "step": 5724 + }, + { + "epoch": 12.750556792873052, + "grad_norm": 24.360746383666992, + "learning_rate": 1e-06, + "loss": 0.6527, + "num_input_tokens_seen": 320751832, + "step": 5725 + }, + { + "epoch": 12.750556792873052, + "loss": 0.7235379219055176, + "loss_ce": 0.00014920823741704226, + "loss_iou": 0.287109375, + "loss_num": 0.0296630859375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 320751832, + "step": 5725 + }, + { + "epoch": 12.752783964365257, + "grad_norm": 18.12118148803711, + "learning_rate": 1e-06, + "loss": 0.5865, + "num_input_tokens_seen": 320807464, + "step": 5726 + }, + { + "epoch": 12.752783964365257, + "loss": 0.45097601413726807, + "loss_ce": 0.00010932252916973084, + "loss_iou": 0.2041015625, + "loss_num": 0.00872802734375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 320807464, + "step": 5726 + }, + { + "epoch": 12.755011135857462, + "grad_norm": 104.34639739990234, + "learning_rate": 1e-06, + "loss": 0.6179, + "num_input_tokens_seen": 320861640, + "step": 5727 + }, + { + "epoch": 12.755011135857462, + "loss": 0.5127081871032715, + "loss_ce": 0.000134985864860937, + "loss_iou": 0.2119140625, + "loss_num": 0.017578125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 320861640, + "step": 5727 + }, + { + "epoch": 12.757238307349667, + "grad_norm": 71.3646011352539, + "learning_rate": 1e-06, + "loss": 0.6442, + "num_input_tokens_seen": 320915204, + "step": 5728 + }, + { + "epoch": 12.757238307349667, + "loss": 0.6957196593284607, + "loss_ce": 0.00016302938456647098, + "loss_iou": 0.306640625, + "loss_num": 0.016357421875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 320915204, + "step": 5728 + }, + { + "epoch": 12.759465478841872, + "grad_norm": 26.151466369628906, + "learning_rate": 1e-06, + "loss": 0.3624, + "num_input_tokens_seen": 320971060, + "step": 5729 + }, + { + "epoch": 12.759465478841872, + "loss": 0.34516897797584534, + "loss_ce": 0.00010673723591025919, + "loss_iou": 0.1416015625, + "loss_num": 0.012451171875, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 320971060, + "step": 5729 + }, + { + "epoch": 12.761692650334076, + "grad_norm": 17.449047088623047, + "learning_rate": 1e-06, + "loss": 0.4778, + "num_input_tokens_seen": 321024964, + "step": 5730 + }, + { + "epoch": 12.761692650334076, + "loss": 0.5893593430519104, + "loss_ce": 0.00012595132284332067, + "loss_iou": 0.27734375, + "loss_num": 0.00726318359375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 321024964, + "step": 5730 + }, + { + "epoch": 12.763919821826281, + "grad_norm": 18.459321975708008, + "learning_rate": 1e-06, + "loss": 0.511, + "num_input_tokens_seen": 321081864, + "step": 5731 + }, + { + "epoch": 12.763919821826281, + "loss": 0.5054360628128052, + "loss_ce": 0.0005532268551178277, + "loss_iou": 0.2236328125, + "loss_num": 0.011474609375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 321081864, + "step": 5731 + }, + { + "epoch": 12.766146993318486, + "grad_norm": 14.343801498413086, + "learning_rate": 1e-06, + "loss": 0.4483, + "num_input_tokens_seen": 321134864, + "step": 5732 + }, + { + "epoch": 12.766146993318486, + "loss": 0.4396534860134125, + "loss_ce": 0.00020037099602632225, + "loss_iou": 0.1875, + "loss_num": 0.0126953125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 321134864, + "step": 5732 + }, + { + "epoch": 12.768374164810691, + "grad_norm": 25.014375686645508, + "learning_rate": 1e-06, + "loss": 0.4792, + "num_input_tokens_seen": 321189340, + "step": 5733 + }, + { + "epoch": 12.768374164810691, + "loss": 0.48544344305992126, + "loss_ce": 9.186341048916802e-05, + "loss_iou": 0.21484375, + "loss_num": 0.0113525390625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 321189340, + "step": 5733 + }, + { + "epoch": 12.770601336302896, + "grad_norm": 21.6157283782959, + "learning_rate": 1e-06, + "loss": 0.6859, + "num_input_tokens_seen": 321243252, + "step": 5734 + }, + { + "epoch": 12.770601336302896, + "loss": 0.938653290271759, + "loss_ce": 0.0001766829373082146, + "loss_iou": 0.396484375, + "loss_num": 0.02880859375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 321243252, + "step": 5734 + }, + { + "epoch": 12.7728285077951, + "grad_norm": 27.44350814819336, + "learning_rate": 1e-06, + "loss": 0.3906, + "num_input_tokens_seen": 321297028, + "step": 5735 + }, + { + "epoch": 12.7728285077951, + "loss": 0.39947906136512756, + "loss_ce": 0.00012604435323737562, + "loss_iou": 0.1708984375, + "loss_num": 0.01153564453125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 321297028, + "step": 5735 + }, + { + "epoch": 12.775055679287306, + "grad_norm": 18.349063873291016, + "learning_rate": 1e-06, + "loss": 0.4358, + "num_input_tokens_seen": 321354732, + "step": 5736 + }, + { + "epoch": 12.775055679287306, + "loss": 0.3706546723842621, + "loss_ce": 0.00011024883133359253, + "loss_iou": 0.1689453125, + "loss_num": 0.006561279296875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 321354732, + "step": 5736 + }, + { + "epoch": 12.77728285077951, + "grad_norm": 34.92224884033203, + "learning_rate": 1e-06, + "loss": 0.6155, + "num_input_tokens_seen": 321408740, + "step": 5737 + }, + { + "epoch": 12.77728285077951, + "loss": 0.7788517475128174, + "loss_ce": 0.00016523349040653557, + "loss_iou": 0.361328125, + "loss_num": 0.0111083984375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 321408740, + "step": 5737 + }, + { + "epoch": 12.779510022271715, + "grad_norm": 19.885868072509766, + "learning_rate": 1e-06, + "loss": 0.4942, + "num_input_tokens_seen": 321465824, + "step": 5738 + }, + { + "epoch": 12.779510022271715, + "loss": 0.49599504470825195, + "loss_ce": 0.0001454516313970089, + "loss_iou": 0.2021484375, + "loss_num": 0.0184326171875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 321465824, + "step": 5738 + }, + { + "epoch": 12.78173719376392, + "grad_norm": 24.478364944458008, + "learning_rate": 1e-06, + "loss": 0.4904, + "num_input_tokens_seen": 321521952, + "step": 5739 + }, + { + "epoch": 12.78173719376392, + "loss": 0.34351837635040283, + "loss_ce": 0.00013458389730658382, + "loss_iou": 0.134765625, + "loss_num": 0.01483154296875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 321521952, + "step": 5739 + }, + { + "epoch": 12.783964365256125, + "grad_norm": 34.84294128417969, + "learning_rate": 1e-06, + "loss": 0.4395, + "num_input_tokens_seen": 321577264, + "step": 5740 + }, + { + "epoch": 12.783964365256125, + "loss": 0.396610826253891, + "loss_ce": 0.00012645949027501047, + "loss_iou": 0.181640625, + "loss_num": 0.006591796875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 321577264, + "step": 5740 + }, + { + "epoch": 12.78619153674833, + "grad_norm": 18.09517478942871, + "learning_rate": 1e-06, + "loss": 0.5365, + "num_input_tokens_seen": 321634008, + "step": 5741 + }, + { + "epoch": 12.78619153674833, + "loss": 0.7020202875137329, + "loss_ce": 0.00011601038568187505, + "loss_iou": 0.31640625, + "loss_num": 0.01385498046875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 321634008, + "step": 5741 + }, + { + "epoch": 12.788418708240535, + "grad_norm": 20.708227157592773, + "learning_rate": 1e-06, + "loss": 0.4856, + "num_input_tokens_seen": 321690796, + "step": 5742 + }, + { + "epoch": 12.788418708240535, + "loss": 0.5002317428588867, + "loss_ce": 0.00010964082321152091, + "loss_iou": 0.20703125, + "loss_num": 0.01708984375, + "loss_xval": 0.5, + "num_input_tokens_seen": 321690796, + "step": 5742 + }, + { + "epoch": 12.79064587973274, + "grad_norm": 22.730310440063477, + "learning_rate": 1e-06, + "loss": 0.676, + "num_input_tokens_seen": 321744736, + "step": 5743 + }, + { + "epoch": 12.79064587973274, + "loss": 0.7551612854003906, + "loss_ce": 0.00015634976443834603, + "loss_iou": 0.3203125, + "loss_num": 0.022705078125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 321744736, + "step": 5743 + }, + { + "epoch": 12.792873051224944, + "grad_norm": 21.29202651977539, + "learning_rate": 1e-06, + "loss": 0.512, + "num_input_tokens_seen": 321801344, + "step": 5744 + }, + { + "epoch": 12.792873051224944, + "loss": 0.39646512269973755, + "loss_ce": 0.00010285238386131823, + "loss_iou": 0.171875, + "loss_num": 0.01055908203125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 321801344, + "step": 5744 + }, + { + "epoch": 12.79510022271715, + "grad_norm": 29.273391723632812, + "learning_rate": 1e-06, + "loss": 0.4766, + "num_input_tokens_seen": 321857176, + "step": 5745 + }, + { + "epoch": 12.79510022271715, + "loss": 0.6940093040466309, + "loss_ce": 0.0001615973305888474, + "loss_iou": 0.30078125, + "loss_num": 0.0184326171875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 321857176, + "step": 5745 + }, + { + "epoch": 12.797327394209354, + "grad_norm": 15.866458892822266, + "learning_rate": 1e-06, + "loss": 0.4327, + "num_input_tokens_seen": 321913700, + "step": 5746 + }, + { + "epoch": 12.797327394209354, + "loss": 0.3660312294960022, + "loss_ce": 0.00012545731442514807, + "loss_iou": 0.162109375, + "loss_num": 0.00836181640625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 321913700, + "step": 5746 + }, + { + "epoch": 12.799554565701559, + "grad_norm": 22.117265701293945, + "learning_rate": 1e-06, + "loss": 0.4373, + "num_input_tokens_seen": 321968160, + "step": 5747 + }, + { + "epoch": 12.799554565701559, + "loss": 0.40611228346824646, + "loss_ce": 0.00010643507994245738, + "loss_iou": 0.177734375, + "loss_num": 0.0101318359375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 321968160, + "step": 5747 + }, + { + "epoch": 12.801781737193764, + "grad_norm": 17.11176300048828, + "learning_rate": 1e-06, + "loss": 0.6027, + "num_input_tokens_seen": 322023584, + "step": 5748 + }, + { + "epoch": 12.801781737193764, + "loss": 0.6447978615760803, + "loss_ce": 0.00014456806820817292, + "loss_iou": 0.2578125, + "loss_num": 0.0252685546875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 322023584, + "step": 5748 + }, + { + "epoch": 12.804008908685969, + "grad_norm": 31.402372360229492, + "learning_rate": 1e-06, + "loss": 0.4811, + "num_input_tokens_seen": 322082440, + "step": 5749 + }, + { + "epoch": 12.804008908685969, + "loss": 0.39622175693511963, + "loss_ce": 0.00010357976134400815, + "loss_iou": 0.1708984375, + "loss_num": 0.010986328125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 322082440, + "step": 5749 + }, + { + "epoch": 12.806236080178174, + "grad_norm": 15.063833236694336, + "learning_rate": 1e-06, + "loss": 0.4572, + "num_input_tokens_seen": 322138808, + "step": 5750 + }, + { + "epoch": 12.806236080178174, + "eval_seeclick_web_CIoU": 0.5856587886810303, + "eval_seeclick_web_GIoU": 0.583136647939682, + "eval_seeclick_web_IoU": 0.6040867269039154, + "eval_seeclick_web_MAE_all": 0.0155873978510499, + "eval_seeclick_web_MAE_h": 0.007719088811427355, + "eval_seeclick_web_MAE_w": 0.015769829973578453, + "eval_seeclick_web_MAE_x_boxes": 0.009643178898841143, + "eval_seeclick_web_MAE_y_boxes": 0.021339073311537504, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9100306630134583, + "eval_seeclick_web_loss_ce": 0.00018124158668797463, + "eval_seeclick_web_loss_iou": 0.417724609375, + "eval_seeclick_web_loss_num": 0.012420654296875, + "eval_seeclick_web_loss_xval": 0.8966064453125, + "eval_seeclick_web_runtime": 22.4296, + "eval_seeclick_web_samples_per_second": 2.229, + "eval_seeclick_web_steps_per_second": 0.089, + "num_input_tokens_seen": 322138808, + "step": 5750 + }, + { + "epoch": 12.806236080178174, + "eval_icons_CIoU": 0.29134996235370636, + "eval_icons_GIoU": 0.31351229548454285, + "eval_icons_IoU": 0.3615979105234146, + "eval_icons_MAE_all": 0.05436762422323227, + "eval_icons_MAE_h": 0.036054016556590796, + "eval_icons_MAE_w": 0.045310807414352894, + "eval_icons_MAE_x_boxes": 0.05722554586827755, + "eval_icons_MAE_y_boxes": 0.03668802231550217, + "eval_icons_inside_bbox": 0.6059027910232544, + "eval_icons_loss": 1.6602177619934082, + "eval_icons_loss_ce": 0.00021313664910849184, + "eval_icons_loss_iou": 0.668701171875, + "eval_icons_loss_num": 0.053501129150390625, + "eval_icons_loss_xval": 1.60546875, + "eval_icons_runtime": 18.5773, + "eval_icons_samples_per_second": 2.691, + "eval_icons_steps_per_second": 0.108, + "num_input_tokens_seen": 322138808, + "step": 5750 + }, + { + "epoch": 12.806236080178174, + "eval_screenspot_CIoU": 0.35812679926554364, + "eval_screenspot_GIoU": 0.37103012204170227, + "eval_screenspot_IoU": 0.43642645080884296, + "eval_screenspot_MAE_all": 0.05802058925231298, + "eval_screenspot_MAE_h": 0.03933714081843694, + "eval_screenspot_MAE_w": 0.06783675154050191, + "eval_screenspot_MAE_x_boxes": 0.07193685260911782, + "eval_screenspot_MAE_y_boxes": 0.03876749984920025, + "eval_screenspot_inside_bbox": 0.6966666579246521, + "eval_screenspot_loss": 1.6010042428970337, + "eval_screenspot_loss_ce": 0.00024160779624556503, + "eval_screenspot_loss_iou": 0.664306640625, + "eval_screenspot_loss_num": 0.06610488891601562, + "eval_screenspot_loss_xval": 1.6591796875, + "eval_screenspot_runtime": 32.6846, + "eval_screenspot_samples_per_second": 2.723, + "eval_screenspot_steps_per_second": 0.092, + "num_input_tokens_seen": 322138808, + "step": 5750 + }, + { + "epoch": 12.806236080178174, + "eval_compot_CIoU": 0.3403613865375519, + "eval_compot_GIoU": 0.3508555740118027, + "eval_compot_IoU": 0.3995762914419174, + "eval_compot_MAE_all": 0.018100187182426453, + "eval_compot_MAE_h": 0.009705259930342436, + "eval_compot_MAE_w": 0.02139892429113388, + "eval_compot_MAE_x_boxes": 0.0300004780292511, + "eval_compot_MAE_y_boxes": 0.007140443893149495, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.413923740386963, + "eval_compot_loss_ce": 0.00017305201618000865, + "eval_compot_loss_iou": 0.6578369140625, + "eval_compot_loss_num": 0.016866683959960938, + "eval_compot_loss_xval": 1.399169921875, + "eval_compot_runtime": 19.9376, + "eval_compot_samples_per_second": 2.508, + "eval_compot_steps_per_second": 0.1, + "num_input_tokens_seen": 322138808, + "step": 5750 + }, + { + "epoch": 12.806236080178174, + "eval_custom_ui_val_CIoU": 0.47460226383474136, + "eval_custom_ui_val_GIoU": 0.4856240898370743, + "eval_custom_ui_val_IoU": 0.5360172192255656, + "eval_custom_ui_val_MAE_all": 0.028572862967848778, + "eval_custom_ui_val_MAE_h": 0.016446984217812616, + "eval_custom_ui_val_MAE_w": 0.03571122243172593, + "eval_custom_ui_val_MAE_x_boxes": 0.03412022265709109, + "eval_custom_ui_val_MAE_y_boxes": 0.014356489810678694, + "eval_custom_ui_val_inside_bbox": 0.7685185207260979, + "eval_custom_ui_val_loss": 1.1905332803726196, + "eval_custom_ui_val_loss_ce": 0.00019961439506409483, + "eval_custom_ui_val_loss_iou": 0.5112169053819444, + "eval_custom_ui_val_loss_num": 0.026023017035590276, + "eval_custom_ui_val_loss_xval": 1.1525065104166667, + "eval_custom_ui_val_runtime": 60.1958, + "eval_custom_ui_val_samples_per_second": 4.402, + "eval_custom_ui_val_steps_per_second": 0.15, + "num_input_tokens_seen": 322138808, + "step": 5750 + }, + { + "epoch": 12.806236080178174, + "loss": 0.9020213484764099, + "loss_ce": 0.00016588816652074456, + "loss_iou": 0.400390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 322138808, + "step": 5750 + }, + { + "epoch": 12.808463251670378, + "grad_norm": 38.99467086791992, + "learning_rate": 1e-06, + "loss": 0.7047, + "num_input_tokens_seen": 322196804, + "step": 5751 + }, + { + "epoch": 12.808463251670378, + "loss": 0.594732940196991, + "loss_ce": 0.0001284430327359587, + "loss_iou": 0.26171875, + "loss_num": 0.013671875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 322196804, + "step": 5751 + }, + { + "epoch": 12.810690423162583, + "grad_norm": 18.36638832092285, + "learning_rate": 1e-06, + "loss": 0.3216, + "num_input_tokens_seen": 322252400, + "step": 5752 + }, + { + "epoch": 12.810690423162583, + "loss": 0.3923182189464569, + "loss_ce": 0.00010630811448208988, + "loss_iou": 0.1708984375, + "loss_num": 0.010009765625, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 322252400, + "step": 5752 + }, + { + "epoch": 12.812917594654788, + "grad_norm": 29.22452163696289, + "learning_rate": 1e-06, + "loss": 0.6287, + "num_input_tokens_seen": 322308460, + "step": 5753 + }, + { + "epoch": 12.812917594654788, + "loss": 0.6383031606674194, + "loss_ce": 0.00011960881238337606, + "loss_iou": 0.2578125, + "loss_num": 0.0244140625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 322308460, + "step": 5753 + }, + { + "epoch": 12.815144766146993, + "grad_norm": 21.79752540588379, + "learning_rate": 1e-06, + "loss": 0.4675, + "num_input_tokens_seen": 322362036, + "step": 5754 + }, + { + "epoch": 12.815144766146993, + "loss": 0.5063232183456421, + "loss_ce": 9.764191054273397e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.025146484375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 322362036, + "step": 5754 + }, + { + "epoch": 12.817371937639198, + "grad_norm": 16.744384765625, + "learning_rate": 1e-06, + "loss": 0.6904, + "num_input_tokens_seen": 322418496, + "step": 5755 + }, + { + "epoch": 12.817371937639198, + "loss": 0.6048610806465149, + "loss_ce": 0.000124765865621157, + "loss_iou": 0.251953125, + "loss_num": 0.0206298828125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 322418496, + "step": 5755 + }, + { + "epoch": 12.819599109131403, + "grad_norm": 17.073532104492188, + "learning_rate": 1e-06, + "loss": 0.5932, + "num_input_tokens_seen": 322473040, + "step": 5756 + }, + { + "epoch": 12.819599109131403, + "loss": 0.733528733253479, + "loss_ce": 0.00013029485126025975, + "loss_iou": 0.3125, + "loss_num": 0.0211181640625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 322473040, + "step": 5756 + }, + { + "epoch": 12.821826280623608, + "grad_norm": 20.584901809692383, + "learning_rate": 1e-06, + "loss": 0.7059, + "num_input_tokens_seen": 322528604, + "step": 5757 + }, + { + "epoch": 12.821826280623608, + "loss": 0.8982992768287659, + "loss_ce": 0.00010590371675789356, + "loss_iou": 0.38671875, + "loss_num": 0.025146484375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 322528604, + "step": 5757 + }, + { + "epoch": 12.824053452115812, + "grad_norm": 20.415708541870117, + "learning_rate": 1e-06, + "loss": 0.5264, + "num_input_tokens_seen": 322583604, + "step": 5758 + }, + { + "epoch": 12.824053452115812, + "loss": 0.4630866050720215, + "loss_ce": 0.00013495869643520564, + "loss_iou": 0.1962890625, + "loss_num": 0.01409912109375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 322583604, + "step": 5758 + }, + { + "epoch": 12.826280623608017, + "grad_norm": 15.553330421447754, + "learning_rate": 1e-06, + "loss": 0.3762, + "num_input_tokens_seen": 322642080, + "step": 5759 + }, + { + "epoch": 12.826280623608017, + "loss": 0.4416653513908386, + "loss_ce": 0.0002591132652014494, + "loss_iou": 0.2041015625, + "loss_num": 0.006622314453125, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 322642080, + "step": 5759 + }, + { + "epoch": 12.828507795100222, + "grad_norm": 14.952034950256348, + "learning_rate": 1e-06, + "loss": 0.5567, + "num_input_tokens_seen": 322697788, + "step": 5760 + }, + { + "epoch": 12.828507795100222, + "loss": 0.548295259475708, + "loss_ce": 0.00019955809693783522, + "loss_iou": 0.2373046875, + "loss_num": 0.0146484375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 322697788, + "step": 5760 + }, + { + "epoch": 12.830734966592427, + "grad_norm": 25.951000213623047, + "learning_rate": 1e-06, + "loss": 0.4949, + "num_input_tokens_seen": 322752844, + "step": 5761 + }, + { + "epoch": 12.830734966592427, + "loss": 0.4571736454963684, + "loss_ce": 0.00014240591553971171, + "loss_iou": 0.20703125, + "loss_num": 0.0087890625, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 322752844, + "step": 5761 + }, + { + "epoch": 12.832962138084632, + "grad_norm": 43.291751861572266, + "learning_rate": 1e-06, + "loss": 0.5937, + "num_input_tokens_seen": 322808240, + "step": 5762 + }, + { + "epoch": 12.832962138084632, + "loss": 0.598831832408905, + "loss_ce": 0.0001380076282657683, + "loss_iou": 0.248046875, + "loss_num": 0.0206298828125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 322808240, + "step": 5762 + }, + { + "epoch": 12.835189309576837, + "grad_norm": 21.269405364990234, + "learning_rate": 1e-06, + "loss": 0.5252, + "num_input_tokens_seen": 322863788, + "step": 5763 + }, + { + "epoch": 12.835189309576837, + "loss": 0.5071128010749817, + "loss_ce": 0.00015480615547858179, + "loss_iou": 0.2373046875, + "loss_num": 0.006744384765625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 322863788, + "step": 5763 + }, + { + "epoch": 12.837416481069042, + "grad_norm": 25.716829299926758, + "learning_rate": 1e-06, + "loss": 0.5679, + "num_input_tokens_seen": 322917120, + "step": 5764 + }, + { + "epoch": 12.837416481069042, + "loss": 0.6708798408508301, + "loss_ce": 0.0002255703293485567, + "loss_iou": 0.306640625, + "loss_num": 0.01141357421875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 322917120, + "step": 5764 + }, + { + "epoch": 12.839643652561247, + "grad_norm": 26.497522354125977, + "learning_rate": 1e-06, + "loss": 0.7658, + "num_input_tokens_seen": 322972616, + "step": 5765 + }, + { + "epoch": 12.839643652561247, + "loss": 0.7098448872566223, + "loss_ce": 0.0001281315489904955, + "loss_iou": 0.283203125, + "loss_num": 0.0284423828125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 322972616, + "step": 5765 + }, + { + "epoch": 12.841870824053451, + "grad_norm": 76.6500244140625, + "learning_rate": 1e-06, + "loss": 0.6196, + "num_input_tokens_seen": 323026852, + "step": 5766 + }, + { + "epoch": 12.841870824053451, + "loss": 0.8302084803581238, + "loss_ce": 0.00013035557640250772, + "loss_iou": 0.337890625, + "loss_num": 0.03125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 323026852, + "step": 5766 + }, + { + "epoch": 12.844097995545656, + "grad_norm": 62.04082107543945, + "learning_rate": 1e-06, + "loss": 0.6541, + "num_input_tokens_seen": 323081780, + "step": 5767 + }, + { + "epoch": 12.844097995545656, + "loss": 0.5926365256309509, + "loss_ce": 0.00010719904821598902, + "loss_iou": 0.2734375, + "loss_num": 0.00933837890625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 323081780, + "step": 5767 + }, + { + "epoch": 12.846325167037861, + "grad_norm": 15.04085636138916, + "learning_rate": 1e-06, + "loss": 0.3717, + "num_input_tokens_seen": 323138412, + "step": 5768 + }, + { + "epoch": 12.846325167037861, + "loss": 0.3711279630661011, + "loss_ce": 0.00015629868721589446, + "loss_iou": 0.162109375, + "loss_num": 0.00921630859375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 323138412, + "step": 5768 + }, + { + "epoch": 12.848552338530066, + "grad_norm": 16.58966064453125, + "learning_rate": 1e-06, + "loss": 0.468, + "num_input_tokens_seen": 323194440, + "step": 5769 + }, + { + "epoch": 12.848552338530066, + "loss": 0.47240063548088074, + "loss_ce": 0.00011059406824642792, + "loss_iou": 0.201171875, + "loss_num": 0.013916015625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 323194440, + "step": 5769 + }, + { + "epoch": 12.85077951002227, + "grad_norm": 15.409582138061523, + "learning_rate": 1e-06, + "loss": 0.6628, + "num_input_tokens_seen": 323250432, + "step": 5770 + }, + { + "epoch": 12.85077951002227, + "loss": 0.5492154955863953, + "loss_ce": 0.0001432021672371775, + "loss_iou": 0.2470703125, + "loss_num": 0.0108642578125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 323250432, + "step": 5770 + }, + { + "epoch": 12.853006681514476, + "grad_norm": 17.768123626708984, + "learning_rate": 1e-06, + "loss": 0.467, + "num_input_tokens_seen": 323305916, + "step": 5771 + }, + { + "epoch": 12.853006681514476, + "loss": 0.3561987280845642, + "loss_ce": 0.00011960781557718292, + "loss_iou": 0.1611328125, + "loss_num": 0.0067138671875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 323305916, + "step": 5771 + }, + { + "epoch": 12.855233853006682, + "grad_norm": 26.621932983398438, + "learning_rate": 1e-06, + "loss": 0.6905, + "num_input_tokens_seen": 323363612, + "step": 5772 + }, + { + "epoch": 12.855233853006682, + "loss": 0.621519923210144, + "loss_ce": 0.000304146931739524, + "loss_iou": 0.287109375, + "loss_num": 0.0093994140625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 323363612, + "step": 5772 + }, + { + "epoch": 12.857461024498887, + "grad_norm": 36.93680953979492, + "learning_rate": 1e-06, + "loss": 0.4199, + "num_input_tokens_seen": 323418516, + "step": 5773 + }, + { + "epoch": 12.857461024498887, + "loss": 0.4533904790878296, + "loss_ce": 0.0001434021396562457, + "loss_iou": 0.1923828125, + "loss_num": 0.01385498046875, + "loss_xval": 0.453125, + "num_input_tokens_seen": 323418516, + "step": 5773 + }, + { + "epoch": 12.859688195991092, + "grad_norm": 12.92249870300293, + "learning_rate": 1e-06, + "loss": 0.3578, + "num_input_tokens_seen": 323475544, + "step": 5774 + }, + { + "epoch": 12.859688195991092, + "loss": 0.3216310143470764, + "loss_ce": 9.778769162949175e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.005126953125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 323475544, + "step": 5774 + }, + { + "epoch": 12.861915367483297, + "grad_norm": 15.084147453308105, + "learning_rate": 1e-06, + "loss": 0.5663, + "num_input_tokens_seen": 323531332, + "step": 5775 + }, + { + "epoch": 12.861915367483297, + "loss": 0.5575112104415894, + "loss_ce": 0.0001381381880491972, + "loss_iou": 0.255859375, + "loss_num": 0.00933837890625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 323531332, + "step": 5775 + }, + { + "epoch": 12.864142538975502, + "grad_norm": 23.849468231201172, + "learning_rate": 1e-06, + "loss": 0.4872, + "num_input_tokens_seen": 323586272, + "step": 5776 + }, + { + "epoch": 12.864142538975502, + "loss": 0.3724247217178345, + "loss_ce": 0.00011026560969185084, + "loss_iou": 0.16015625, + "loss_num": 0.010498046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 323586272, + "step": 5776 + }, + { + "epoch": 12.866369710467707, + "grad_norm": 16.385759353637695, + "learning_rate": 1e-06, + "loss": 0.4698, + "num_input_tokens_seen": 323642804, + "step": 5777 + }, + { + "epoch": 12.866369710467707, + "loss": 0.6077675819396973, + "loss_ce": 0.00010155436029890552, + "loss_iou": 0.26953125, + "loss_num": 0.01348876953125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 323642804, + "step": 5777 + }, + { + "epoch": 12.868596881959911, + "grad_norm": 13.454336166381836, + "learning_rate": 1e-06, + "loss": 0.3738, + "num_input_tokens_seen": 323699508, + "step": 5778 + }, + { + "epoch": 12.868596881959911, + "loss": 0.3336215317249298, + "loss_ce": 0.00012544200581032783, + "loss_iou": 0.1552734375, + "loss_num": 0.004486083984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 323699508, + "step": 5778 + }, + { + "epoch": 12.870824053452116, + "grad_norm": 32.175724029541016, + "learning_rate": 1e-06, + "loss": 0.3957, + "num_input_tokens_seen": 323753524, + "step": 5779 + }, + { + "epoch": 12.870824053452116, + "loss": 0.38816550374031067, + "loss_ce": 0.00010398947779322043, + "loss_iou": 0.1787109375, + "loss_num": 0.006317138671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 323753524, + "step": 5779 + }, + { + "epoch": 12.873051224944321, + "grad_norm": 16.53754234313965, + "learning_rate": 1e-06, + "loss": 0.3905, + "num_input_tokens_seen": 323807584, + "step": 5780 + }, + { + "epoch": 12.873051224944321, + "loss": 0.460315078496933, + "loss_ce": 0.00011000905215041712, + "loss_iou": 0.189453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 323807584, + "step": 5780 + }, + { + "epoch": 12.875278396436526, + "grad_norm": 33.93476486206055, + "learning_rate": 1e-06, + "loss": 0.5997, + "num_input_tokens_seen": 323865248, + "step": 5781 + }, + { + "epoch": 12.875278396436526, + "loss": 0.8404668569564819, + "loss_ce": 0.00013486588431987911, + "loss_iou": 0.365234375, + "loss_num": 0.021728515625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 323865248, + "step": 5781 + }, + { + "epoch": 12.877505567928731, + "grad_norm": 24.43963623046875, + "learning_rate": 1e-06, + "loss": 0.4762, + "num_input_tokens_seen": 323921064, + "step": 5782 + }, + { + "epoch": 12.877505567928731, + "loss": 0.5513740181922913, + "loss_ce": 0.00010449648834764957, + "loss_iou": 0.2138671875, + "loss_num": 0.0247802734375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 323921064, + "step": 5782 + }, + { + "epoch": 12.879732739420936, + "grad_norm": 19.904743194580078, + "learning_rate": 1e-06, + "loss": 0.4667, + "num_input_tokens_seen": 323979316, + "step": 5783 + }, + { + "epoch": 12.879732739420936, + "loss": 0.4628022313117981, + "loss_ce": 0.00015576645091641694, + "loss_iou": 0.1923828125, + "loss_num": 0.015625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 323979316, + "step": 5783 + }, + { + "epoch": 12.88195991091314, + "grad_norm": 13.64018440246582, + "learning_rate": 1e-06, + "loss": 0.3038, + "num_input_tokens_seen": 324037372, + "step": 5784 + }, + { + "epoch": 12.88195991091314, + "loss": 0.361078143119812, + "loss_ce": 0.00011623941827565432, + "loss_iou": 0.15234375, + "loss_num": 0.01116943359375, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 324037372, + "step": 5784 + }, + { + "epoch": 12.884187082405345, + "grad_norm": 75.058349609375, + "learning_rate": 1e-06, + "loss": 0.6451, + "num_input_tokens_seen": 324095976, + "step": 5785 + }, + { + "epoch": 12.884187082405345, + "loss": 0.6566053628921509, + "loss_ce": 0.0011488739401102066, + "loss_iou": 0.267578125, + "loss_num": 0.023681640625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 324095976, + "step": 5785 + }, + { + "epoch": 12.88641425389755, + "grad_norm": 13.707096099853516, + "learning_rate": 1e-06, + "loss": 0.5832, + "num_input_tokens_seen": 324150696, + "step": 5786 + }, + { + "epoch": 12.88641425389755, + "loss": 0.3526431918144226, + "loss_ce": 0.00010411690163891762, + "loss_iou": 0.154296875, + "loss_num": 0.00860595703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 324150696, + "step": 5786 + }, + { + "epoch": 12.888641425389755, + "grad_norm": 18.37203025817871, + "learning_rate": 1e-06, + "loss": 0.5096, + "num_input_tokens_seen": 324204900, + "step": 5787 + }, + { + "epoch": 12.888641425389755, + "loss": 0.5298916697502136, + "loss_ce": 0.00010648852912709117, + "loss_iou": 0.228515625, + "loss_num": 0.01434326171875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 324204900, + "step": 5787 + }, + { + "epoch": 12.89086859688196, + "grad_norm": 18.70199203491211, + "learning_rate": 1e-06, + "loss": 0.4261, + "num_input_tokens_seen": 324262452, + "step": 5788 + }, + { + "epoch": 12.89086859688196, + "loss": 0.3846321105957031, + "loss_ce": 0.00011065039871027693, + "loss_iou": 0.1787109375, + "loss_num": 0.005279541015625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 324262452, + "step": 5788 + }, + { + "epoch": 12.893095768374165, + "grad_norm": 22.604270935058594, + "learning_rate": 1e-06, + "loss": 0.5116, + "num_input_tokens_seen": 324313472, + "step": 5789 + }, + { + "epoch": 12.893095768374165, + "loss": 0.5472476482391357, + "loss_ce": 0.00012846475874539465, + "loss_iou": 0.240234375, + "loss_num": 0.01336669921875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 324313472, + "step": 5789 + }, + { + "epoch": 12.89532293986637, + "grad_norm": 19.753644943237305, + "learning_rate": 1e-06, + "loss": 0.4183, + "num_input_tokens_seen": 324369044, + "step": 5790 + }, + { + "epoch": 12.89532293986637, + "loss": 0.5045735836029053, + "loss_ce": 0.00011799067578976974, + "loss_iou": 0.21875, + "loss_num": 0.01348876953125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 324369044, + "step": 5790 + }, + { + "epoch": 12.897550111358575, + "grad_norm": 31.045259475708008, + "learning_rate": 1e-06, + "loss": 0.6084, + "num_input_tokens_seen": 324424920, + "step": 5791 + }, + { + "epoch": 12.897550111358575, + "loss": 0.7539187669754028, + "loss_ce": 0.0001345592609141022, + "loss_iou": 0.326171875, + "loss_num": 0.0205078125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 324424920, + "step": 5791 + }, + { + "epoch": 12.89977728285078, + "grad_norm": 23.0539493560791, + "learning_rate": 1e-06, + "loss": 0.5878, + "num_input_tokens_seen": 324481128, + "step": 5792 + }, + { + "epoch": 12.89977728285078, + "loss": 0.44552081823349, + "loss_ce": 0.00014727559755556285, + "loss_iou": 0.185546875, + "loss_num": 0.0147705078125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 324481128, + "step": 5792 + }, + { + "epoch": 12.902004454342984, + "grad_norm": 20.192691802978516, + "learning_rate": 1e-06, + "loss": 0.4366, + "num_input_tokens_seen": 324537512, + "step": 5793 + }, + { + "epoch": 12.902004454342984, + "loss": 0.4317473769187927, + "loss_ce": 0.00010673434007912874, + "loss_iou": 0.1875, + "loss_num": 0.0111083984375, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 324537512, + "step": 5793 + }, + { + "epoch": 12.90423162583519, + "grad_norm": 23.165922164916992, + "learning_rate": 1e-06, + "loss": 0.5654, + "num_input_tokens_seen": 324591436, + "step": 5794 + }, + { + "epoch": 12.90423162583519, + "loss": 0.7123044729232788, + "loss_ce": 0.000146272053825669, + "loss_iou": 0.2890625, + "loss_num": 0.026611328125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 324591436, + "step": 5794 + }, + { + "epoch": 12.906458797327394, + "grad_norm": 19.26278305053711, + "learning_rate": 1e-06, + "loss": 0.6626, + "num_input_tokens_seen": 324646868, + "step": 5795 + }, + { + "epoch": 12.906458797327394, + "loss": 0.9051787257194519, + "loss_ce": 0.0001494084281148389, + "loss_iou": 0.373046875, + "loss_num": 0.031494140625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 324646868, + "step": 5795 + }, + { + "epoch": 12.908685968819599, + "grad_norm": 18.52065658569336, + "learning_rate": 1e-06, + "loss": 0.4669, + "num_input_tokens_seen": 324704652, + "step": 5796 + }, + { + "epoch": 12.908685968819599, + "loss": 0.4736798405647278, + "loss_ce": 0.0001691254146862775, + "loss_iou": 0.18359375, + "loss_num": 0.02099609375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 324704652, + "step": 5796 + }, + { + "epoch": 12.910913140311804, + "grad_norm": 22.1650333404541, + "learning_rate": 1e-06, + "loss": 0.5009, + "num_input_tokens_seen": 324762108, + "step": 5797 + }, + { + "epoch": 12.910913140311804, + "loss": 0.44127357006073, + "loss_ce": 0.00011143650044687092, + "loss_iou": 0.1943359375, + "loss_num": 0.010498046875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 324762108, + "step": 5797 + }, + { + "epoch": 12.913140311804009, + "grad_norm": 18.897441864013672, + "learning_rate": 1e-06, + "loss": 0.4502, + "num_input_tokens_seen": 324817916, + "step": 5798 + }, + { + "epoch": 12.913140311804009, + "loss": 0.480578750371933, + "loss_ce": 0.00010997521167155355, + "loss_iou": 0.2158203125, + "loss_num": 0.00958251953125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 324817916, + "step": 5798 + }, + { + "epoch": 12.915367483296214, + "grad_norm": 13.311334609985352, + "learning_rate": 1e-06, + "loss": 0.5007, + "num_input_tokens_seen": 324872960, + "step": 5799 + }, + { + "epoch": 12.915367483296214, + "loss": 0.5006000995635986, + "loss_ce": 0.00011186262418050319, + "loss_iou": 0.2158203125, + "loss_num": 0.013671875, + "loss_xval": 0.5, + "num_input_tokens_seen": 324872960, + "step": 5799 + }, + { + "epoch": 12.917594654788418, + "grad_norm": 12.26301097869873, + "learning_rate": 1e-06, + "loss": 0.4789, + "num_input_tokens_seen": 324927908, + "step": 5800 + }, + { + "epoch": 12.917594654788418, + "loss": 0.47144681215286255, + "loss_ce": 0.00013335124822333455, + "loss_iou": 0.2109375, + "loss_num": 0.0096435546875, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 324927908, + "step": 5800 + }, + { + "epoch": 12.919821826280623, + "grad_norm": 16.52623176574707, + "learning_rate": 1e-06, + "loss": 0.6662, + "num_input_tokens_seen": 324986040, + "step": 5801 + }, + { + "epoch": 12.919821826280623, + "loss": 0.5938632488250732, + "loss_ce": 0.0012118464801460505, + "loss_iou": 0.263671875, + "loss_num": 0.01275634765625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 324986040, + "step": 5801 + }, + { + "epoch": 12.922048997772828, + "grad_norm": 29.222322463989258, + "learning_rate": 1e-06, + "loss": 0.4547, + "num_input_tokens_seen": 325043160, + "step": 5802 + }, + { + "epoch": 12.922048997772828, + "loss": 0.4256219267845154, + "loss_ce": 8.482815610477701e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0135498046875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 325043160, + "step": 5802 + }, + { + "epoch": 12.924276169265033, + "grad_norm": 19.258743286132812, + "learning_rate": 1e-06, + "loss": 0.3797, + "num_input_tokens_seen": 325099556, + "step": 5803 + }, + { + "epoch": 12.924276169265033, + "loss": 0.4342288076877594, + "loss_ce": 0.00014675800048280507, + "loss_iou": 0.1904296875, + "loss_num": 0.01068115234375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 325099556, + "step": 5803 + }, + { + "epoch": 12.926503340757238, + "grad_norm": 18.667795181274414, + "learning_rate": 1e-06, + "loss": 0.5903, + "num_input_tokens_seen": 325154616, + "step": 5804 + }, + { + "epoch": 12.926503340757238, + "loss": 0.5471016764640808, + "loss_ce": 0.00016564295219723135, + "loss_iou": 0.255859375, + "loss_num": 0.007232666015625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 325154616, + "step": 5804 + }, + { + "epoch": 12.928730512249443, + "grad_norm": 19.413196563720703, + "learning_rate": 1e-06, + "loss": 0.6071, + "num_input_tokens_seen": 325208228, + "step": 5805 + }, + { + "epoch": 12.928730512249443, + "loss": 0.6254149079322815, + "loss_ce": 0.00010969527647830546, + "loss_iou": 0.25390625, + "loss_num": 0.0235595703125, + "loss_xval": 0.625, + "num_input_tokens_seen": 325208228, + "step": 5805 + }, + { + "epoch": 12.930957683741648, + "grad_norm": 16.120576858520508, + "learning_rate": 1e-06, + "loss": 0.5249, + "num_input_tokens_seen": 325261784, + "step": 5806 + }, + { + "epoch": 12.930957683741648, + "loss": 0.5421345829963684, + "loss_ce": 0.0001423685171175748, + "loss_iou": 0.234375, + "loss_num": 0.01470947265625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 325261784, + "step": 5806 + }, + { + "epoch": 12.933184855233852, + "grad_norm": 14.40259075164795, + "learning_rate": 1e-06, + "loss": 0.4978, + "num_input_tokens_seen": 325318096, + "step": 5807 + }, + { + "epoch": 12.933184855233852, + "loss": 0.33383339643478394, + "loss_ce": 0.00015419043484143913, + "loss_iou": 0.1494140625, + "loss_num": 0.0069580078125, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 325318096, + "step": 5807 + }, + { + "epoch": 12.935412026726057, + "grad_norm": 29.450130462646484, + "learning_rate": 1e-06, + "loss": 0.5965, + "num_input_tokens_seen": 325372384, + "step": 5808 + }, + { + "epoch": 12.935412026726057, + "loss": 0.4921649694442749, + "loss_ce": 9.954207052942365e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.01190185546875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 325372384, + "step": 5808 + }, + { + "epoch": 12.937639198218262, + "grad_norm": 14.617000579833984, + "learning_rate": 1e-06, + "loss": 0.3659, + "num_input_tokens_seen": 325427388, + "step": 5809 + }, + { + "epoch": 12.937639198218262, + "loss": 0.4301881492137909, + "loss_ce": 0.0001344462507404387, + "loss_iou": 0.1689453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 325427388, + "step": 5809 + }, + { + "epoch": 12.939866369710467, + "grad_norm": 24.749897003173828, + "learning_rate": 1e-06, + "loss": 0.655, + "num_input_tokens_seen": 325481232, + "step": 5810 + }, + { + "epoch": 12.939866369710467, + "loss": 0.7332806587219238, + "loss_ce": 0.0001263371086679399, + "loss_iou": 0.314453125, + "loss_num": 0.0208740234375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 325481232, + "step": 5810 + }, + { + "epoch": 12.942093541202672, + "grad_norm": 17.5683650970459, + "learning_rate": 1e-06, + "loss": 0.4979, + "num_input_tokens_seen": 325535700, + "step": 5811 + }, + { + "epoch": 12.942093541202672, + "loss": 0.6848030090332031, + "loss_ce": 0.00011064937280025333, + "loss_iou": 0.310546875, + "loss_num": 0.01275634765625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 325535700, + "step": 5811 + }, + { + "epoch": 12.944320712694877, + "grad_norm": 25.585346221923828, + "learning_rate": 1e-06, + "loss": 0.4126, + "num_input_tokens_seen": 325594156, + "step": 5812 + }, + { + "epoch": 12.944320712694877, + "loss": 0.37169232964515686, + "loss_ce": 0.00011027594155166298, + "loss_iou": 0.1513671875, + "loss_num": 0.013671875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 325594156, + "step": 5812 + }, + { + "epoch": 12.946547884187082, + "grad_norm": 18.788799285888672, + "learning_rate": 1e-06, + "loss": 0.6043, + "num_input_tokens_seen": 325648944, + "step": 5813 + }, + { + "epoch": 12.946547884187082, + "loss": 0.6141434907913208, + "loss_ce": 0.0001298107672482729, + "loss_iou": 0.25390625, + "loss_num": 0.021240234375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 325648944, + "step": 5813 + }, + { + "epoch": 12.948775055679288, + "grad_norm": 21.269174575805664, + "learning_rate": 1e-06, + "loss": 0.8081, + "num_input_tokens_seen": 325706188, + "step": 5814 + }, + { + "epoch": 12.948775055679288, + "loss": 0.8276509046554565, + "loss_ce": 0.0002583618916105479, + "loss_iou": 0.326171875, + "loss_num": 0.034912109375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 325706188, + "step": 5814 + }, + { + "epoch": 12.951002227171493, + "grad_norm": 26.132108688354492, + "learning_rate": 1e-06, + "loss": 0.5818, + "num_input_tokens_seen": 325759016, + "step": 5815 + }, + { + "epoch": 12.951002227171493, + "loss": 0.7224629521369934, + "loss_ce": 0.0004475700843613595, + "loss_iou": 0.296875, + "loss_num": 0.0257568359375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 325759016, + "step": 5815 + }, + { + "epoch": 12.953229398663698, + "grad_norm": 19.72740364074707, + "learning_rate": 1e-06, + "loss": 0.3361, + "num_input_tokens_seen": 325815532, + "step": 5816 + }, + { + "epoch": 12.953229398663698, + "loss": 0.43498772382736206, + "loss_ce": 0.00011223182082176208, + "loss_iou": 0.166015625, + "loss_num": 0.0205078125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 325815532, + "step": 5816 + }, + { + "epoch": 12.955456570155903, + "grad_norm": 34.82518005371094, + "learning_rate": 1e-06, + "loss": 0.6078, + "num_input_tokens_seen": 325871972, + "step": 5817 + }, + { + "epoch": 12.955456570155903, + "loss": 0.743614912033081, + "loss_ce": 0.00020671662059612572, + "loss_iou": 0.29296875, + "loss_num": 0.031494140625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 325871972, + "step": 5817 + }, + { + "epoch": 12.957683741648108, + "grad_norm": 101.52168273925781, + "learning_rate": 1e-06, + "loss": 0.5968, + "num_input_tokens_seen": 325928992, + "step": 5818 + }, + { + "epoch": 12.957683741648108, + "loss": 0.7953479886054993, + "loss_ce": 0.00012093935220036656, + "loss_iou": 0.330078125, + "loss_num": 0.027099609375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 325928992, + "step": 5818 + }, + { + "epoch": 12.959910913140313, + "grad_norm": 19.320589065551758, + "learning_rate": 1e-06, + "loss": 0.8007, + "num_input_tokens_seen": 325984976, + "step": 5819 + }, + { + "epoch": 12.959910913140313, + "loss": 0.8446329236030579, + "loss_ce": 0.00015049244393594563, + "loss_iou": 0.341796875, + "loss_num": 0.0322265625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 325984976, + "step": 5819 + }, + { + "epoch": 12.962138084632517, + "grad_norm": 17.67026710510254, + "learning_rate": 1e-06, + "loss": 0.4272, + "num_input_tokens_seen": 326041104, + "step": 5820 + }, + { + "epoch": 12.962138084632517, + "loss": 0.3929480314254761, + "loss_ce": 0.0001257585099665448, + "loss_iou": 0.1748046875, + "loss_num": 0.0086669921875, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 326041104, + "step": 5820 + }, + { + "epoch": 12.964365256124722, + "grad_norm": 17.830753326416016, + "learning_rate": 1e-06, + "loss": 0.4385, + "num_input_tokens_seen": 326094916, + "step": 5821 + }, + { + "epoch": 12.964365256124722, + "loss": 0.38365253806114197, + "loss_ce": 0.00010762730380520225, + "loss_iou": 0.1640625, + "loss_num": 0.01104736328125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 326094916, + "step": 5821 + }, + { + "epoch": 12.966592427616927, + "grad_norm": 28.51872444152832, + "learning_rate": 1e-06, + "loss": 0.495, + "num_input_tokens_seen": 326151036, + "step": 5822 + }, + { + "epoch": 12.966592427616927, + "loss": 0.4512762427330017, + "loss_ce": 0.0007147122523747385, + "loss_iou": 0.1904296875, + "loss_num": 0.0140380859375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 326151036, + "step": 5822 + }, + { + "epoch": 12.968819599109132, + "grad_norm": 19.198762893676758, + "learning_rate": 1e-06, + "loss": 0.7932, + "num_input_tokens_seen": 326205816, + "step": 5823 + }, + { + "epoch": 12.968819599109132, + "loss": 0.7585012316703796, + "loss_ce": 0.00020045909332111478, + "loss_iou": 0.3125, + "loss_num": 0.02685546875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 326205816, + "step": 5823 + }, + { + "epoch": 12.971046770601337, + "grad_norm": 18.618919372558594, + "learning_rate": 1e-06, + "loss": 0.5058, + "num_input_tokens_seen": 326260604, + "step": 5824 + }, + { + "epoch": 12.971046770601337, + "loss": 0.6246432662010193, + "loss_ce": 0.00013153106556273997, + "loss_iou": 0.279296875, + "loss_num": 0.013427734375, + "loss_xval": 0.625, + "num_input_tokens_seen": 326260604, + "step": 5824 + }, + { + "epoch": 12.973273942093542, + "grad_norm": 14.253881454467773, + "learning_rate": 1e-06, + "loss": 0.4857, + "num_input_tokens_seen": 326318360, + "step": 5825 + }, + { + "epoch": 12.973273942093542, + "loss": 0.4617213010787964, + "loss_ce": 0.00017345792730338871, + "loss_iou": 0.1875, + "loss_num": 0.0172119140625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 326318360, + "step": 5825 + }, + { + "epoch": 12.975501113585747, + "grad_norm": 21.891193389892578, + "learning_rate": 1e-06, + "loss": 0.5982, + "num_input_tokens_seen": 326375376, + "step": 5826 + }, + { + "epoch": 12.975501113585747, + "loss": 0.6038088202476501, + "loss_ce": 0.00017113759531639516, + "loss_iou": 0.236328125, + "loss_num": 0.026123046875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 326375376, + "step": 5826 + }, + { + "epoch": 12.977728285077951, + "grad_norm": 24.07076072692871, + "learning_rate": 1e-06, + "loss": 0.412, + "num_input_tokens_seen": 326430360, + "step": 5827 + }, + { + "epoch": 12.977728285077951, + "loss": 0.5371109247207642, + "loss_ce": 0.00012366127339191735, + "loss_iou": 0.2421875, + "loss_num": 0.01055908203125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 326430360, + "step": 5827 + }, + { + "epoch": 12.979955456570156, + "grad_norm": 24.092050552368164, + "learning_rate": 1e-06, + "loss": 0.5159, + "num_input_tokens_seen": 326485148, + "step": 5828 + }, + { + "epoch": 12.979955456570156, + "loss": 0.6072977781295776, + "loss_ce": 0.00012003826850559562, + "loss_iou": 0.26171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 326485148, + "step": 5828 + }, + { + "epoch": 12.982182628062361, + "grad_norm": 15.061753273010254, + "learning_rate": 1e-06, + "loss": 0.4169, + "num_input_tokens_seen": 326541436, + "step": 5829 + }, + { + "epoch": 12.982182628062361, + "loss": 0.37990421056747437, + "loss_ce": 0.00014345324598252773, + "loss_iou": 0.15625, + "loss_num": 0.01324462890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 326541436, + "step": 5829 + }, + { + "epoch": 12.984409799554566, + "grad_norm": 17.364686965942383, + "learning_rate": 1e-06, + "loss": 0.5024, + "num_input_tokens_seen": 326598392, + "step": 5830 + }, + { + "epoch": 12.984409799554566, + "loss": 0.47910645604133606, + "loss_ce": 0.00010255920642521232, + "loss_iou": 0.2041015625, + "loss_num": 0.01416015625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 326598392, + "step": 5830 + }, + { + "epoch": 12.98663697104677, + "grad_norm": 18.025129318237305, + "learning_rate": 1e-06, + "loss": 0.4841, + "num_input_tokens_seen": 326654788, + "step": 5831 + }, + { + "epoch": 12.98663697104677, + "loss": 0.39598119258880615, + "loss_ce": 0.0001071398874046281, + "loss_iou": 0.1787109375, + "loss_num": 0.007781982421875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 326654788, + "step": 5831 + }, + { + "epoch": 12.988864142538976, + "grad_norm": 43.578460693359375, + "learning_rate": 1e-06, + "loss": 0.5461, + "num_input_tokens_seen": 326710960, + "step": 5832 + }, + { + "epoch": 12.988864142538976, + "loss": 0.588495135307312, + "loss_ce": 0.00011619819269981235, + "loss_iou": 0.2392578125, + "loss_num": 0.0218505859375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 326710960, + "step": 5832 + }, + { + "epoch": 12.99109131403118, + "grad_norm": 16.58260154724121, + "learning_rate": 1e-06, + "loss": 0.5595, + "num_input_tokens_seen": 326767560, + "step": 5833 + }, + { + "epoch": 12.99109131403118, + "loss": 0.6758525371551514, + "loss_ce": 0.00019332932424731553, + "loss_iou": 0.265625, + "loss_num": 0.029296875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 326767560, + "step": 5833 + }, + { + "epoch": 12.993318485523385, + "grad_norm": 100.04027557373047, + "learning_rate": 1e-06, + "loss": 0.4254, + "num_input_tokens_seen": 326824960, + "step": 5834 + }, + { + "epoch": 12.993318485523385, + "loss": 0.4192938804626465, + "loss_ce": 0.00010442556231282651, + "loss_iou": 0.17578125, + "loss_num": 0.01348876953125, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 326824960, + "step": 5834 + }, + { + "epoch": 12.99554565701559, + "grad_norm": 18.884227752685547, + "learning_rate": 1e-06, + "loss": 0.4109, + "num_input_tokens_seen": 326882800, + "step": 5835 + }, + { + "epoch": 12.99554565701559, + "loss": 0.41633379459381104, + "loss_ce": 0.00013507826952263713, + "loss_iou": 0.193359375, + "loss_num": 0.00604248046875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 326882800, + "step": 5835 + }, + { + "epoch": 12.997772828507795, + "grad_norm": 20.262374877929688, + "learning_rate": 1e-06, + "loss": 0.4506, + "num_input_tokens_seen": 326940800, + "step": 5836 + }, + { + "epoch": 12.997772828507795, + "loss": 0.48572030663490295, + "loss_ce": 0.00012461420556064695, + "loss_iou": 0.2099609375, + "loss_num": 0.01300048828125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 326940800, + "step": 5836 + }, + { + "epoch": 13.0, + "grad_norm": 17.950050354003906, + "learning_rate": 1e-06, + "loss": 0.4372, + "num_input_tokens_seen": 326994644, + "step": 5837 + }, + { + "epoch": 13.0, + "loss": 0.47044771909713745, + "loss_ce": 0.00011081169941462576, + "loss_iou": 0.181640625, + "loss_num": 0.021728515625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 326994644, + "step": 5837 + }, + { + "epoch": 13.002227171492205, + "grad_norm": 18.163402557373047, + "learning_rate": 1e-06, + "loss": 0.3952, + "num_input_tokens_seen": 327051472, + "step": 5838 + }, + { + "epoch": 13.002227171492205, + "loss": 0.45926615595817566, + "loss_ce": 0.0001597180962562561, + "loss_iou": 0.18359375, + "loss_num": 0.0185546875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 327051472, + "step": 5838 + }, + { + "epoch": 13.00445434298441, + "grad_norm": 17.97021484375, + "learning_rate": 1e-06, + "loss": 0.4858, + "num_input_tokens_seen": 327107924, + "step": 5839 + }, + { + "epoch": 13.00445434298441, + "loss": 0.6263518929481506, + "loss_ce": 0.00013117733760736883, + "loss_iou": 0.27734375, + "loss_num": 0.014404296875, + "loss_xval": 0.625, + "num_input_tokens_seen": 327107924, + "step": 5839 + }, + { + "epoch": 13.006681514476615, + "grad_norm": 28.137418746948242, + "learning_rate": 1e-06, + "loss": 0.4594, + "num_input_tokens_seen": 327166408, + "step": 5840 + }, + { + "epoch": 13.006681514476615, + "loss": 0.4613088369369507, + "loss_ce": 0.0001271882065339014, + "loss_iou": 0.2138671875, + "loss_num": 0.0067138671875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 327166408, + "step": 5840 + }, + { + "epoch": 13.00890868596882, + "grad_norm": 19.74036979675293, + "learning_rate": 1e-06, + "loss": 0.5521, + "num_input_tokens_seen": 327223808, + "step": 5841 + }, + { + "epoch": 13.00890868596882, + "loss": 0.39747354388237, + "loss_ce": 0.00013468455290421844, + "loss_iou": 0.173828125, + "loss_num": 0.00970458984375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 327223808, + "step": 5841 + }, + { + "epoch": 13.011135857461024, + "grad_norm": 40.84404754638672, + "learning_rate": 1e-06, + "loss": 0.5209, + "num_input_tokens_seen": 327280344, + "step": 5842 + }, + { + "epoch": 13.011135857461024, + "loss": 0.3576486110687256, + "loss_ce": 0.0001046471661538817, + "loss_iou": 0.162109375, + "loss_num": 0.006500244140625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 327280344, + "step": 5842 + }, + { + "epoch": 13.01336302895323, + "grad_norm": 25.01186180114746, + "learning_rate": 1e-06, + "loss": 0.4771, + "num_input_tokens_seen": 327332880, + "step": 5843 + }, + { + "epoch": 13.01336302895323, + "loss": 0.4530574679374695, + "loss_ce": 0.00011555968376342207, + "loss_iou": 0.201171875, + "loss_num": 0.0101318359375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 327332880, + "step": 5843 + }, + { + "epoch": 13.015590200445434, + "grad_norm": 17.630598068237305, + "learning_rate": 1e-06, + "loss": 0.5489, + "num_input_tokens_seen": 327390252, + "step": 5844 + }, + { + "epoch": 13.015590200445434, + "loss": 0.5149204730987549, + "loss_ce": 0.00011946188897127286, + "loss_iou": 0.216796875, + "loss_num": 0.0164794921875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 327390252, + "step": 5844 + }, + { + "epoch": 13.017817371937639, + "grad_norm": 16.314720153808594, + "learning_rate": 1e-06, + "loss": 0.4371, + "num_input_tokens_seen": 327445668, + "step": 5845 + }, + { + "epoch": 13.017817371937639, + "loss": 0.34429049491882324, + "loss_ce": 0.0001132564211729914, + "loss_iou": 0.142578125, + "loss_num": 0.01190185546875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 327445668, + "step": 5845 + }, + { + "epoch": 13.020044543429844, + "grad_norm": 20.126976013183594, + "learning_rate": 1e-06, + "loss": 0.3745, + "num_input_tokens_seen": 327502572, + "step": 5846 + }, + { + "epoch": 13.020044543429844, + "loss": 0.3755548298358917, + "loss_ce": 0.00012758253433275968, + "loss_iou": 0.1787109375, + "loss_num": 0.0035247802734375, + "loss_xval": 0.375, + "num_input_tokens_seen": 327502572, + "step": 5846 + }, + { + "epoch": 13.022271714922049, + "grad_norm": 18.101938247680664, + "learning_rate": 1e-06, + "loss": 0.4179, + "num_input_tokens_seen": 327560524, + "step": 5847 + }, + { + "epoch": 13.022271714922049, + "loss": 0.4644816517829895, + "loss_ce": 0.00012616706953849643, + "loss_iou": 0.208984375, + "loss_num": 0.0093994140625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 327560524, + "step": 5847 + }, + { + "epoch": 13.024498886414253, + "grad_norm": 22.762983322143555, + "learning_rate": 1e-06, + "loss": 0.5214, + "num_input_tokens_seen": 327615932, + "step": 5848 + }, + { + "epoch": 13.024498886414253, + "loss": 0.6657699346542358, + "loss_ce": 0.00012048571079503745, + "loss_iou": 0.2890625, + "loss_num": 0.0174560546875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 327615932, + "step": 5848 + }, + { + "epoch": 13.026726057906458, + "grad_norm": 38.849609375, + "learning_rate": 1e-06, + "loss": 0.5811, + "num_input_tokens_seen": 327669496, + "step": 5849 + }, + { + "epoch": 13.026726057906458, + "loss": 0.3934794068336487, + "loss_ce": 0.00010783905599964783, + "loss_iou": 0.158203125, + "loss_num": 0.0155029296875, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 327669496, + "step": 5849 + }, + { + "epoch": 13.028953229398663, + "grad_norm": 13.994296073913574, + "learning_rate": 1e-06, + "loss": 0.573, + "num_input_tokens_seen": 327726764, + "step": 5850 + }, + { + "epoch": 13.028953229398663, + "loss": 0.5137879252433777, + "loss_ce": 0.00011606388579821214, + "loss_iou": 0.22265625, + "loss_num": 0.0137939453125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 327726764, + "step": 5850 + }, + { + "epoch": 13.031180400890868, + "grad_norm": 29.13859748840332, + "learning_rate": 1e-06, + "loss": 0.7501, + "num_input_tokens_seen": 327783880, + "step": 5851 + }, + { + "epoch": 13.031180400890868, + "loss": 0.9725739359855652, + "loss_ce": 0.00016182669787667692, + "loss_iou": 0.3828125, + "loss_num": 0.041748046875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 327783880, + "step": 5851 + }, + { + "epoch": 13.033407572383073, + "grad_norm": 15.132269859313965, + "learning_rate": 1e-06, + "loss": 0.4839, + "num_input_tokens_seen": 327842228, + "step": 5852 + }, + { + "epoch": 13.033407572383073, + "loss": 0.5506539344787598, + "loss_ce": 0.00011678160808514804, + "loss_iou": 0.240234375, + "loss_num": 0.01416015625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 327842228, + "step": 5852 + }, + { + "epoch": 13.035634743875278, + "grad_norm": 21.988449096679688, + "learning_rate": 1e-06, + "loss": 0.4047, + "num_input_tokens_seen": 327899700, + "step": 5853 + }, + { + "epoch": 13.035634743875278, + "loss": 0.37145259976387024, + "loss_ce": 0.00011470088065834716, + "loss_iou": 0.1689453125, + "loss_num": 0.00653076171875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 327899700, + "step": 5853 + }, + { + "epoch": 13.037861915367483, + "grad_norm": 18.117429733276367, + "learning_rate": 1e-06, + "loss": 0.5186, + "num_input_tokens_seen": 327957944, + "step": 5854 + }, + { + "epoch": 13.037861915367483, + "loss": 0.3974757194519043, + "loss_ce": 0.00013685536396224052, + "loss_iou": 0.1767578125, + "loss_num": 0.00897216796875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 327957944, + "step": 5854 + }, + { + "epoch": 13.040089086859687, + "grad_norm": 20.6132869720459, + "learning_rate": 1e-06, + "loss": 0.3864, + "num_input_tokens_seen": 328014804, + "step": 5855 + }, + { + "epoch": 13.040089086859687, + "loss": 0.411630779504776, + "loss_ce": 0.00013175193453207612, + "loss_iou": 0.189453125, + "loss_num": 0.00665283203125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 328014804, + "step": 5855 + }, + { + "epoch": 13.042316258351892, + "grad_norm": 15.854263305664062, + "learning_rate": 1e-06, + "loss": 0.5165, + "num_input_tokens_seen": 328072992, + "step": 5856 + }, + { + "epoch": 13.042316258351892, + "loss": 0.5808364748954773, + "loss_ce": 0.00014802644727751613, + "loss_iou": 0.24609375, + "loss_num": 0.017578125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 328072992, + "step": 5856 + }, + { + "epoch": 13.044543429844097, + "grad_norm": 14.205804824829102, + "learning_rate": 1e-06, + "loss": 0.4901, + "num_input_tokens_seen": 328130500, + "step": 5857 + }, + { + "epoch": 13.044543429844097, + "loss": 0.5825508832931519, + "loss_ce": 0.00015347708540502936, + "loss_iou": 0.2578125, + "loss_num": 0.012939453125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 328130500, + "step": 5857 + }, + { + "epoch": 13.046770601336302, + "grad_norm": 18.12480354309082, + "learning_rate": 1e-06, + "loss": 0.4782, + "num_input_tokens_seen": 328185140, + "step": 5858 + }, + { + "epoch": 13.046770601336302, + "loss": 0.5846756100654602, + "loss_ce": 0.0002029853785643354, + "loss_iou": 0.2470703125, + "loss_num": 0.0179443359375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 328185140, + "step": 5858 + }, + { + "epoch": 13.048997772828507, + "grad_norm": 18.047836303710938, + "learning_rate": 1e-06, + "loss": 0.4255, + "num_input_tokens_seen": 328239304, + "step": 5859 + }, + { + "epoch": 13.048997772828507, + "loss": 0.5025643110275269, + "loss_ce": 0.00012291455641388893, + "loss_iou": 0.224609375, + "loss_num": 0.01068115234375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 328239304, + "step": 5859 + }, + { + "epoch": 13.051224944320714, + "grad_norm": 19.500625610351562, + "learning_rate": 1e-06, + "loss": 0.5162, + "num_input_tokens_seen": 328295948, + "step": 5860 + }, + { + "epoch": 13.051224944320714, + "loss": 0.47790929675102234, + "loss_ce": 0.0001260775316040963, + "loss_iou": 0.212890625, + "loss_num": 0.01031494140625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 328295948, + "step": 5860 + }, + { + "epoch": 13.053452115812918, + "grad_norm": 30.18915557861328, + "learning_rate": 1e-06, + "loss": 0.7097, + "num_input_tokens_seen": 328348972, + "step": 5861 + }, + { + "epoch": 13.053452115812918, + "loss": 0.7575015425682068, + "loss_ce": 0.0001773214025888592, + "loss_iou": 0.330078125, + "loss_num": 0.0194091796875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 328348972, + "step": 5861 + }, + { + "epoch": 13.055679287305123, + "grad_norm": 15.668800354003906, + "learning_rate": 1e-06, + "loss": 0.3318, + "num_input_tokens_seen": 328403404, + "step": 5862 + }, + { + "epoch": 13.055679287305123, + "loss": 0.3245905935764313, + "loss_ce": 0.0001277004776056856, + "loss_iou": 0.138671875, + "loss_num": 0.0093994140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 328403404, + "step": 5862 + }, + { + "epoch": 13.057906458797328, + "grad_norm": 15.234992027282715, + "learning_rate": 1e-06, + "loss": 0.4268, + "num_input_tokens_seen": 328459968, + "step": 5863 + }, + { + "epoch": 13.057906458797328, + "loss": 0.5279508829116821, + "loss_ce": 0.00011885567801073194, + "loss_iou": 0.2265625, + "loss_num": 0.01483154296875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 328459968, + "step": 5863 + }, + { + "epoch": 13.060133630289533, + "grad_norm": 19.412704467773438, + "learning_rate": 1e-06, + "loss": 0.5039, + "num_input_tokens_seen": 328516632, + "step": 5864 + }, + { + "epoch": 13.060133630289533, + "loss": 0.6012612581253052, + "loss_ce": 0.0001870518026407808, + "loss_iou": 0.2333984375, + "loss_num": 0.0267333984375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 328516632, + "step": 5864 + }, + { + "epoch": 13.062360801781738, + "grad_norm": 14.758916854858398, + "learning_rate": 1e-06, + "loss": 0.4313, + "num_input_tokens_seen": 328571604, + "step": 5865 + }, + { + "epoch": 13.062360801781738, + "loss": 0.37966063618659973, + "loss_ce": 0.00014401419321075082, + "loss_iou": 0.1708984375, + "loss_num": 0.0074462890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 328571604, + "step": 5865 + }, + { + "epoch": 13.064587973273943, + "grad_norm": 35.47437286376953, + "learning_rate": 1e-06, + "loss": 0.5209, + "num_input_tokens_seen": 328627528, + "step": 5866 + }, + { + "epoch": 13.064587973273943, + "loss": 0.5552887916564941, + "loss_ce": 0.00011297700984869152, + "loss_iou": 0.2119140625, + "loss_num": 0.0264892578125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 328627528, + "step": 5866 + }, + { + "epoch": 13.066815144766148, + "grad_norm": 24.737062454223633, + "learning_rate": 1e-06, + "loss": 0.3764, + "num_input_tokens_seen": 328684780, + "step": 5867 + }, + { + "epoch": 13.066815144766148, + "loss": 0.33116066455841064, + "loss_ce": 0.00010598442167975008, + "loss_iou": 0.1572265625, + "loss_num": 0.0034942626953125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 328684780, + "step": 5867 + }, + { + "epoch": 13.069042316258352, + "grad_norm": 32.23481750488281, + "learning_rate": 1e-06, + "loss": 0.5629, + "num_input_tokens_seen": 328738920, + "step": 5868 + }, + { + "epoch": 13.069042316258352, + "loss": 0.42393577098846436, + "loss_ce": 0.0001076542102964595, + "loss_iou": 0.193359375, + "loss_num": 0.007232666015625, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 328738920, + "step": 5868 + }, + { + "epoch": 13.071269487750557, + "grad_norm": 15.290202140808105, + "learning_rate": 1e-06, + "loss": 0.3362, + "num_input_tokens_seen": 328796456, + "step": 5869 + }, + { + "epoch": 13.071269487750557, + "loss": 0.33213841915130615, + "loss_ce": 0.00022925705707166344, + "loss_iou": 0.125, + "loss_num": 0.0162353515625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 328796456, + "step": 5869 + }, + { + "epoch": 13.073496659242762, + "grad_norm": 20.73664093017578, + "learning_rate": 1e-06, + "loss": 0.5255, + "num_input_tokens_seen": 328851748, + "step": 5870 + }, + { + "epoch": 13.073496659242762, + "loss": 0.41393500566482544, + "loss_ce": 0.0001166543661383912, + "loss_iou": 0.1796875, + "loss_num": 0.0108642578125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 328851748, + "step": 5870 + }, + { + "epoch": 13.075723830734967, + "grad_norm": 19.62030601501465, + "learning_rate": 1e-06, + "loss": 0.4958, + "num_input_tokens_seen": 328907888, + "step": 5871 + }, + { + "epoch": 13.075723830734967, + "loss": 0.5278200507164001, + "loss_ce": 0.00011009951413143426, + "loss_iou": 0.228515625, + "loss_num": 0.0142822265625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 328907888, + "step": 5871 + }, + { + "epoch": 13.077951002227172, + "grad_norm": 24.643142700195312, + "learning_rate": 1e-06, + "loss": 0.5101, + "num_input_tokens_seen": 328964040, + "step": 5872 + }, + { + "epoch": 13.077951002227172, + "loss": 0.5145541429519653, + "loss_ce": 0.0001498450292274356, + "loss_iou": 0.2294921875, + "loss_num": 0.0111083984375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 328964040, + "step": 5872 + }, + { + "epoch": 13.080178173719377, + "grad_norm": 18.29912567138672, + "learning_rate": 1e-06, + "loss": 0.4219, + "num_input_tokens_seen": 329019624, + "step": 5873 + }, + { + "epoch": 13.080178173719377, + "loss": 0.4937739372253418, + "loss_ce": 0.00012156983575550839, + "loss_iou": 0.21484375, + "loss_num": 0.01263427734375, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 329019624, + "step": 5873 + }, + { + "epoch": 13.082405345211582, + "grad_norm": 16.88625717163086, + "learning_rate": 1e-06, + "loss": 0.4574, + "num_input_tokens_seen": 329077484, + "step": 5874 + }, + { + "epoch": 13.082405345211582, + "loss": 0.3620465397834778, + "loss_ce": 0.00010803519398905337, + "loss_iou": 0.1611328125, + "loss_num": 0.008056640625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 329077484, + "step": 5874 + }, + { + "epoch": 13.084632516703786, + "grad_norm": 13.229454040527344, + "learning_rate": 1e-06, + "loss": 0.5683, + "num_input_tokens_seen": 329135212, + "step": 5875 + }, + { + "epoch": 13.084632516703786, + "loss": 0.6875127553939819, + "loss_ce": 0.0001348142686765641, + "loss_iou": 0.271484375, + "loss_num": 0.02880859375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 329135212, + "step": 5875 + }, + { + "epoch": 13.086859688195991, + "grad_norm": 30.31863021850586, + "learning_rate": 1e-06, + "loss": 0.5017, + "num_input_tokens_seen": 329190660, + "step": 5876 + }, + { + "epoch": 13.086859688195991, + "loss": 0.5497363805770874, + "loss_ce": 0.0001758422004058957, + "loss_iou": 0.2353515625, + "loss_num": 0.0157470703125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 329190660, + "step": 5876 + }, + { + "epoch": 13.089086859688196, + "grad_norm": 16.870037078857422, + "learning_rate": 1e-06, + "loss": 0.3866, + "num_input_tokens_seen": 329247060, + "step": 5877 + }, + { + "epoch": 13.089086859688196, + "loss": 0.36009788513183594, + "loss_ce": 0.00011254477431066334, + "loss_iou": 0.162109375, + "loss_num": 0.007293701171875, + "loss_xval": 0.359375, + "num_input_tokens_seen": 329247060, + "step": 5877 + }, + { + "epoch": 13.091314031180401, + "grad_norm": 21.952919006347656, + "learning_rate": 1e-06, + "loss": 0.4696, + "num_input_tokens_seen": 329301984, + "step": 5878 + }, + { + "epoch": 13.091314031180401, + "loss": 0.4436010420322418, + "loss_ce": 0.00011958822869928554, + "loss_iou": 0.2001953125, + "loss_num": 0.0087890625, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 329301984, + "step": 5878 + }, + { + "epoch": 13.093541202672606, + "grad_norm": 18.792381286621094, + "learning_rate": 1e-06, + "loss": 0.4403, + "num_input_tokens_seen": 329360572, + "step": 5879 + }, + { + "epoch": 13.093541202672606, + "loss": 0.5003446340560913, + "loss_ce": 0.00010050551645690575, + "loss_iou": 0.21484375, + "loss_num": 0.01397705078125, + "loss_xval": 0.5, + "num_input_tokens_seen": 329360572, + "step": 5879 + }, + { + "epoch": 13.09576837416481, + "grad_norm": 18.393442153930664, + "learning_rate": 1e-06, + "loss": 0.6429, + "num_input_tokens_seen": 329417288, + "step": 5880 + }, + { + "epoch": 13.09576837416481, + "loss": 0.7582250833511353, + "loss_ce": 0.00016841033357195556, + "loss_iou": 0.314453125, + "loss_num": 0.0257568359375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 329417288, + "step": 5880 + }, + { + "epoch": 13.097995545657016, + "grad_norm": 13.813261985778809, + "learning_rate": 1e-06, + "loss": 0.4882, + "num_input_tokens_seen": 329473344, + "step": 5881 + }, + { + "epoch": 13.097995545657016, + "loss": 0.6629891395568848, + "loss_ce": 0.0001473267620895058, + "loss_iou": 0.271484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 329473344, + "step": 5881 + }, + { + "epoch": 13.10022271714922, + "grad_norm": 15.445968627929688, + "learning_rate": 1e-06, + "loss": 0.4329, + "num_input_tokens_seen": 329531204, + "step": 5882 + }, + { + "epoch": 13.10022271714922, + "loss": 0.43495747447013855, + "loss_ce": 0.00014300504699349403, + "loss_iou": 0.197265625, + "loss_num": 0.008056640625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 329531204, + "step": 5882 + }, + { + "epoch": 13.102449888641425, + "grad_norm": 14.054932594299316, + "learning_rate": 1e-06, + "loss": 0.4633, + "num_input_tokens_seen": 329586744, + "step": 5883 + }, + { + "epoch": 13.102449888641425, + "loss": 0.5273807644844055, + "loss_ce": 0.0005252888076938689, + "loss_iou": 0.2353515625, + "loss_num": 0.01129150390625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 329586744, + "step": 5883 + }, + { + "epoch": 13.10467706013363, + "grad_norm": 16.611223220825195, + "learning_rate": 1e-06, + "loss": 0.397, + "num_input_tokens_seen": 329642216, + "step": 5884 + }, + { + "epoch": 13.10467706013363, + "loss": 0.4468822479248047, + "loss_ce": 0.00010488741099834442, + "loss_iou": 0.19921875, + "loss_num": 0.00946044921875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 329642216, + "step": 5884 + }, + { + "epoch": 13.106904231625835, + "grad_norm": 18.158891677856445, + "learning_rate": 1e-06, + "loss": 0.324, + "num_input_tokens_seen": 329698692, + "step": 5885 + }, + { + "epoch": 13.106904231625835, + "loss": 0.3763335049152374, + "loss_ce": 0.00011280621401965618, + "loss_iou": 0.1630859375, + "loss_num": 0.0101318359375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 329698692, + "step": 5885 + }, + { + "epoch": 13.10913140311804, + "grad_norm": 20.2602481842041, + "learning_rate": 1e-06, + "loss": 0.4628, + "num_input_tokens_seen": 329753408, + "step": 5886 + }, + { + "epoch": 13.10913140311804, + "loss": 0.590545117855072, + "loss_ce": 0.0005792796146124601, + "loss_iou": 0.232421875, + "loss_num": 0.0247802734375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 329753408, + "step": 5886 + }, + { + "epoch": 13.111358574610245, + "grad_norm": 17.374492645263672, + "learning_rate": 1e-06, + "loss": 0.5085, + "num_input_tokens_seen": 329811252, + "step": 5887 + }, + { + "epoch": 13.111358574610245, + "loss": 0.6364624500274658, + "loss_ce": 0.00010987733548972756, + "loss_iou": 0.28125, + "loss_num": 0.0150146484375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 329811252, + "step": 5887 + }, + { + "epoch": 13.11358574610245, + "grad_norm": 24.767221450805664, + "learning_rate": 1e-06, + "loss": 0.6877, + "num_input_tokens_seen": 329864808, + "step": 5888 + }, + { + "epoch": 13.11358574610245, + "loss": 0.9313529133796692, + "loss_ce": 0.0002005502174142748, + "loss_iou": 0.384765625, + "loss_num": 0.0322265625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 329864808, + "step": 5888 + }, + { + "epoch": 13.115812917594655, + "grad_norm": 18.444570541381836, + "learning_rate": 1e-06, + "loss": 0.3468, + "num_input_tokens_seen": 329923220, + "step": 5889 + }, + { + "epoch": 13.115812917594655, + "loss": 0.38743722438812256, + "loss_ce": 0.00010810734238475561, + "loss_iou": 0.1806640625, + "loss_num": 0.0052490234375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 329923220, + "step": 5889 + }, + { + "epoch": 13.11804008908686, + "grad_norm": 14.813982009887695, + "learning_rate": 1e-06, + "loss": 0.5645, + "num_input_tokens_seen": 329982272, + "step": 5890 + }, + { + "epoch": 13.11804008908686, + "loss": 0.7087510824203491, + "loss_ce": 0.00013293002848513424, + "loss_iou": 0.296875, + "loss_num": 0.02294921875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 329982272, + "step": 5890 + }, + { + "epoch": 13.120267260579064, + "grad_norm": 17.98593521118164, + "learning_rate": 1e-06, + "loss": 0.6082, + "num_input_tokens_seen": 330037796, + "step": 5891 + }, + { + "epoch": 13.120267260579064, + "loss": 0.6543379426002502, + "loss_ce": 0.00031572196166962385, + "loss_iou": 0.265625, + "loss_num": 0.0247802734375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 330037796, + "step": 5891 + }, + { + "epoch": 13.122494432071269, + "grad_norm": 16.788930892944336, + "learning_rate": 1e-06, + "loss": 0.4808, + "num_input_tokens_seen": 330092396, + "step": 5892 + }, + { + "epoch": 13.122494432071269, + "loss": 0.5958355665206909, + "loss_ce": 0.00013241068518254906, + "loss_iou": 0.23828125, + "loss_num": 0.0238037109375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 330092396, + "step": 5892 + }, + { + "epoch": 13.124721603563474, + "grad_norm": 17.42109489440918, + "learning_rate": 1e-06, + "loss": 0.405, + "num_input_tokens_seen": 330147804, + "step": 5893 + }, + { + "epoch": 13.124721603563474, + "loss": 0.4320144057273865, + "loss_ce": 0.000129641528474167, + "loss_iou": 0.193359375, + "loss_num": 0.00933837890625, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 330147804, + "step": 5893 + }, + { + "epoch": 13.126948775055679, + "grad_norm": 17.75126838684082, + "learning_rate": 1e-06, + "loss": 0.5447, + "num_input_tokens_seen": 330205316, + "step": 5894 + }, + { + "epoch": 13.126948775055679, + "loss": 0.6002463698387146, + "loss_ce": 0.00014872349856887013, + "loss_iou": 0.2470703125, + "loss_num": 0.0213623046875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 330205316, + "step": 5894 + }, + { + "epoch": 13.129175946547884, + "grad_norm": 22.149547576904297, + "learning_rate": 1e-06, + "loss": 0.4957, + "num_input_tokens_seen": 330261848, + "step": 5895 + }, + { + "epoch": 13.129175946547884, + "loss": 0.6235384941101074, + "loss_ce": 0.00012537876318674535, + "loss_iou": 0.275390625, + "loss_num": 0.0146484375, + "loss_xval": 0.625, + "num_input_tokens_seen": 330261848, + "step": 5895 + }, + { + "epoch": 13.131403118040089, + "grad_norm": 18.989896774291992, + "learning_rate": 1e-06, + "loss": 0.5355, + "num_input_tokens_seen": 330318236, + "step": 5896 + }, + { + "epoch": 13.131403118040089, + "loss": 0.4923059344291687, + "loss_ce": 0.00011842740059364587, + "loss_iou": 0.208984375, + "loss_num": 0.01495361328125, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 330318236, + "step": 5896 + }, + { + "epoch": 13.133630289532293, + "grad_norm": 15.263077735900879, + "learning_rate": 1e-06, + "loss": 0.486, + "num_input_tokens_seen": 330377552, + "step": 5897 + }, + { + "epoch": 13.133630289532293, + "loss": 0.4600605368614197, + "loss_ce": 9.961408795788884e-05, + "loss_iou": 0.1796875, + "loss_num": 0.0201416015625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 330377552, + "step": 5897 + }, + { + "epoch": 13.135857461024498, + "grad_norm": 17.812576293945312, + "learning_rate": 1e-06, + "loss": 0.5491, + "num_input_tokens_seen": 330433484, + "step": 5898 + }, + { + "epoch": 13.135857461024498, + "loss": 0.5521707534790039, + "loss_ce": 0.00010780214506667107, + "loss_iou": 0.234375, + "loss_num": 0.0164794921875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 330433484, + "step": 5898 + }, + { + "epoch": 13.138084632516703, + "grad_norm": 28.198577880859375, + "learning_rate": 1e-06, + "loss": 0.6077, + "num_input_tokens_seen": 330489352, + "step": 5899 + }, + { + "epoch": 13.138084632516703, + "loss": 0.6835901737213135, + "loss_ce": 0.0001184626089525409, + "loss_iou": 0.2890625, + "loss_num": 0.0205078125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 330489352, + "step": 5899 + }, + { + "epoch": 13.140311804008908, + "grad_norm": 16.04581069946289, + "learning_rate": 1e-06, + "loss": 0.5163, + "num_input_tokens_seen": 330544888, + "step": 5900 + }, + { + "epoch": 13.140311804008908, + "loss": 0.5014405846595764, + "loss_ce": 0.00012069322110619396, + "loss_iou": 0.21875, + "loss_num": 0.01275634765625, + "loss_xval": 0.5, + "num_input_tokens_seen": 330544888, + "step": 5900 + }, + { + "epoch": 13.142538975501113, + "grad_norm": 18.238014221191406, + "learning_rate": 1e-06, + "loss": 0.3431, + "num_input_tokens_seen": 330602552, + "step": 5901 + }, + { + "epoch": 13.142538975501113, + "loss": 0.3762207627296448, + "loss_ce": 0.0001221080165123567, + "loss_iou": 0.1708984375, + "loss_num": 0.006927490234375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 330602552, + "step": 5901 + }, + { + "epoch": 13.144766146993318, + "grad_norm": 32.46430206298828, + "learning_rate": 1e-06, + "loss": 0.5533, + "num_input_tokens_seen": 330657020, + "step": 5902 + }, + { + "epoch": 13.144766146993318, + "loss": 0.6620012521743774, + "loss_ce": 0.00013605058484245092, + "loss_iou": 0.27734375, + "loss_num": 0.0216064453125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 330657020, + "step": 5902 + }, + { + "epoch": 13.146993318485523, + "grad_norm": 30.36663246154785, + "learning_rate": 1e-06, + "loss": 0.4032, + "num_input_tokens_seen": 330713536, + "step": 5903 + }, + { + "epoch": 13.146993318485523, + "loss": 0.36765336990356445, + "loss_ce": 0.0005879499949514866, + "loss_iou": 0.146484375, + "loss_num": 0.0147705078125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 330713536, + "step": 5903 + }, + { + "epoch": 13.14922048997773, + "grad_norm": 15.999008178710938, + "learning_rate": 1e-06, + "loss": 0.4386, + "num_input_tokens_seen": 330767572, + "step": 5904 + }, + { + "epoch": 13.14922048997773, + "loss": 0.5433182716369629, + "loss_ce": 0.00010540579387452453, + "loss_iou": 0.2265625, + "loss_num": 0.0181884765625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 330767572, + "step": 5904 + }, + { + "epoch": 13.151447661469934, + "grad_norm": 17.65342903137207, + "learning_rate": 1e-06, + "loss": 0.3948, + "num_input_tokens_seen": 330825640, + "step": 5905 + }, + { + "epoch": 13.151447661469934, + "loss": 0.3161456882953644, + "loss_ce": 0.000105641498521436, + "loss_iou": 0.1396484375, + "loss_num": 0.0074462890625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 330825640, + "step": 5905 + }, + { + "epoch": 13.153674832962139, + "grad_norm": 31.29542350769043, + "learning_rate": 1e-06, + "loss": 0.7188, + "num_input_tokens_seen": 330884276, + "step": 5906 + }, + { + "epoch": 13.153674832962139, + "loss": 0.7189792990684509, + "loss_ce": 0.00010728358756750822, + "loss_iou": 0.31640625, + "loss_num": 0.0174560546875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 330884276, + "step": 5906 + }, + { + "epoch": 13.155902004454344, + "grad_norm": 18.346426010131836, + "learning_rate": 1e-06, + "loss": 0.5431, + "num_input_tokens_seen": 330938632, + "step": 5907 + }, + { + "epoch": 13.155902004454344, + "loss": 0.4295383095741272, + "loss_ce": 9.494097321294248e-05, + "loss_iou": 0.169921875, + "loss_num": 0.0179443359375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 330938632, + "step": 5907 + }, + { + "epoch": 13.158129175946549, + "grad_norm": 22.875947952270508, + "learning_rate": 1e-06, + "loss": 0.6056, + "num_input_tokens_seen": 330995628, + "step": 5908 + }, + { + "epoch": 13.158129175946549, + "loss": 0.5698830485343933, + "loss_ce": 0.00011983538570348173, + "loss_iou": 0.23046875, + "loss_num": 0.021728515625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 330995628, + "step": 5908 + }, + { + "epoch": 13.160356347438753, + "grad_norm": 15.61497974395752, + "learning_rate": 1e-06, + "loss": 0.4997, + "num_input_tokens_seen": 331049196, + "step": 5909 + }, + { + "epoch": 13.160356347438753, + "loss": 0.37679868936538696, + "loss_ce": 8.96861165529117e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0147705078125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 331049196, + "step": 5909 + }, + { + "epoch": 13.162583518930958, + "grad_norm": 15.534133911132812, + "learning_rate": 1e-06, + "loss": 0.6097, + "num_input_tokens_seen": 331104396, + "step": 5910 + }, + { + "epoch": 13.162583518930958, + "loss": 0.8099589347839355, + "loss_ce": 0.0001444965455448255, + "loss_iou": 0.33984375, + "loss_num": 0.0262451171875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 331104396, + "step": 5910 + }, + { + "epoch": 13.164810690423163, + "grad_norm": 41.32310104370117, + "learning_rate": 1e-06, + "loss": 0.581, + "num_input_tokens_seen": 331159788, + "step": 5911 + }, + { + "epoch": 13.164810690423163, + "loss": 0.4186583161354065, + "loss_ce": 0.00014023938274476677, + "loss_iou": 0.181640625, + "loss_num": 0.010986328125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 331159788, + "step": 5911 + }, + { + "epoch": 13.167037861915368, + "grad_norm": 17.732685089111328, + "learning_rate": 1e-06, + "loss": 0.5119, + "num_input_tokens_seen": 331216032, + "step": 5912 + }, + { + "epoch": 13.167037861915368, + "loss": 0.588236391544342, + "loss_ce": 0.00010163354454562068, + "loss_iou": 0.2578125, + "loss_num": 0.014892578125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 331216032, + "step": 5912 + }, + { + "epoch": 13.169265033407573, + "grad_norm": 20.534914016723633, + "learning_rate": 1e-06, + "loss": 0.3891, + "num_input_tokens_seen": 331273480, + "step": 5913 + }, + { + "epoch": 13.169265033407573, + "loss": 0.3801114559173584, + "loss_ce": 0.00010655220103217289, + "loss_iou": 0.169921875, + "loss_num": 0.0081787109375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 331273480, + "step": 5913 + }, + { + "epoch": 13.171492204899778, + "grad_norm": 45.9957389831543, + "learning_rate": 1e-06, + "loss": 0.5419, + "num_input_tokens_seen": 331328248, + "step": 5914 + }, + { + "epoch": 13.171492204899778, + "loss": 0.45751655101776123, + "loss_ce": 0.00011909787281183526, + "loss_iou": 0.208984375, + "loss_num": 0.0076904296875, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 331328248, + "step": 5914 + }, + { + "epoch": 13.173719376391983, + "grad_norm": 21.617551803588867, + "learning_rate": 1e-06, + "loss": 0.4976, + "num_input_tokens_seen": 331383188, + "step": 5915 + }, + { + "epoch": 13.173719376391983, + "loss": 0.5962991118431091, + "loss_ce": 0.0010842570336535573, + "loss_iou": 0.26171875, + "loss_num": 0.01397705078125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 331383188, + "step": 5915 + }, + { + "epoch": 13.175946547884188, + "grad_norm": 18.615833282470703, + "learning_rate": 1e-06, + "loss": 0.442, + "num_input_tokens_seen": 331439448, + "step": 5916 + }, + { + "epoch": 13.175946547884188, + "loss": 0.6015082001686096, + "loss_ce": 0.00022037216695025563, + "loss_iou": 0.236328125, + "loss_num": 0.02587890625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 331439448, + "step": 5916 + }, + { + "epoch": 13.178173719376392, + "grad_norm": 26.171741485595703, + "learning_rate": 1e-06, + "loss": 0.5615, + "num_input_tokens_seen": 331492752, + "step": 5917 + }, + { + "epoch": 13.178173719376392, + "loss": 0.47891539335250854, + "loss_ce": 0.00015565170906484127, + "loss_iou": 0.224609375, + "loss_num": 0.006011962890625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 331492752, + "step": 5917 + }, + { + "epoch": 13.180400890868597, + "grad_norm": 29.380762100219727, + "learning_rate": 1e-06, + "loss": 0.4608, + "num_input_tokens_seen": 331548328, + "step": 5918 + }, + { + "epoch": 13.180400890868597, + "loss": 0.37973326444625854, + "loss_ce": 9.459797001909465e-05, + "loss_iou": 0.154296875, + "loss_num": 0.01409912109375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 331548328, + "step": 5918 + }, + { + "epoch": 13.182628062360802, + "grad_norm": 19.07701301574707, + "learning_rate": 1e-06, + "loss": 0.5202, + "num_input_tokens_seen": 331603848, + "step": 5919 + }, + { + "epoch": 13.182628062360802, + "loss": 0.5613878965377808, + "loss_ce": 0.00010859225585591048, + "loss_iou": 0.24609375, + "loss_num": 0.013671875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 331603848, + "step": 5919 + }, + { + "epoch": 13.184855233853007, + "grad_norm": 23.228954315185547, + "learning_rate": 1e-06, + "loss": 0.4674, + "num_input_tokens_seen": 331663044, + "step": 5920 + }, + { + "epoch": 13.184855233853007, + "loss": 0.6362162828445435, + "loss_ce": 0.00010789166844915599, + "loss_iou": 0.27734375, + "loss_num": 0.0167236328125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 331663044, + "step": 5920 + }, + { + "epoch": 13.187082405345212, + "grad_norm": 24.631132125854492, + "learning_rate": 1e-06, + "loss": 0.7198, + "num_input_tokens_seen": 331717048, + "step": 5921 + }, + { + "epoch": 13.187082405345212, + "loss": 0.5893706679344177, + "loss_ce": 0.00013725095777772367, + "loss_iou": 0.2578125, + "loss_num": 0.01470947265625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 331717048, + "step": 5921 + }, + { + "epoch": 13.189309576837417, + "grad_norm": 27.25778579711914, + "learning_rate": 1e-06, + "loss": 0.4561, + "num_input_tokens_seen": 331770924, + "step": 5922 + }, + { + "epoch": 13.189309576837417, + "loss": 0.3740364909172058, + "loss_ce": 0.00013512761506717652, + "loss_iou": 0.1572265625, + "loss_num": 0.0120849609375, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 331770924, + "step": 5922 + }, + { + "epoch": 13.191536748329622, + "grad_norm": 13.549928665161133, + "learning_rate": 1e-06, + "loss": 0.5417, + "num_input_tokens_seen": 331826796, + "step": 5923 + }, + { + "epoch": 13.191536748329622, + "loss": 0.3583794832229614, + "loss_ce": 0.00010311156802345067, + "loss_iou": 0.16015625, + "loss_num": 0.00738525390625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 331826796, + "step": 5923 + }, + { + "epoch": 13.193763919821826, + "grad_norm": 19.371231079101562, + "learning_rate": 1e-06, + "loss": 0.4902, + "num_input_tokens_seen": 331882152, + "step": 5924 + }, + { + "epoch": 13.193763919821826, + "loss": 0.6329140067100525, + "loss_ce": 0.00010151312744710594, + "loss_iou": 0.291015625, + "loss_num": 0.01019287109375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 331882152, + "step": 5924 + }, + { + "epoch": 13.195991091314031, + "grad_norm": 20.2581787109375, + "learning_rate": 1e-06, + "loss": 0.4575, + "num_input_tokens_seen": 331939604, + "step": 5925 + }, + { + "epoch": 13.195991091314031, + "loss": 0.3990633487701416, + "loss_ce": 0.0001375791907776147, + "loss_iou": 0.1708984375, + "loss_num": 0.011474609375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 331939604, + "step": 5925 + }, + { + "epoch": 13.198218262806236, + "grad_norm": 86.76207733154297, + "learning_rate": 1e-06, + "loss": 0.5783, + "num_input_tokens_seen": 331993812, + "step": 5926 + }, + { + "epoch": 13.198218262806236, + "loss": 0.5379533171653748, + "loss_ce": 0.00011149346391903237, + "loss_iou": 0.2392578125, + "loss_num": 0.0118408203125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 331993812, + "step": 5926 + }, + { + "epoch": 13.200445434298441, + "grad_norm": 19.799470901489258, + "learning_rate": 1e-06, + "loss": 0.5504, + "num_input_tokens_seen": 332050244, + "step": 5927 + }, + { + "epoch": 13.200445434298441, + "loss": 0.5327932834625244, + "loss_ce": 0.00020055694039911032, + "loss_iou": 0.23828125, + "loss_num": 0.01129150390625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 332050244, + "step": 5927 + }, + { + "epoch": 13.202672605790646, + "grad_norm": 21.825603485107422, + "learning_rate": 1e-06, + "loss": 0.5413, + "num_input_tokens_seen": 332107696, + "step": 5928 + }, + { + "epoch": 13.202672605790646, + "loss": 0.4124048948287964, + "loss_ce": 0.00011244384222663939, + "loss_iou": 0.1845703125, + "loss_num": 0.00885009765625, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 332107696, + "step": 5928 + }, + { + "epoch": 13.20489977728285, + "grad_norm": 22.338329315185547, + "learning_rate": 1e-06, + "loss": 0.4693, + "num_input_tokens_seen": 332164076, + "step": 5929 + }, + { + "epoch": 13.20489977728285, + "loss": 0.2473558485507965, + "loss_ce": 0.00010242719145026058, + "loss_iou": 0.10595703125, + "loss_num": 0.007110595703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 332164076, + "step": 5929 + }, + { + "epoch": 13.207126948775056, + "grad_norm": 13.758119583129883, + "learning_rate": 1e-06, + "loss": 0.3369, + "num_input_tokens_seen": 332222060, + "step": 5930 + }, + { + "epoch": 13.207126948775056, + "loss": 0.3643418550491333, + "loss_ce": 0.00032818166073411703, + "loss_iou": 0.140625, + "loss_num": 0.0167236328125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 332222060, + "step": 5930 + }, + { + "epoch": 13.20935412026726, + "grad_norm": 20.242292404174805, + "learning_rate": 1e-06, + "loss": 0.4062, + "num_input_tokens_seen": 332280184, + "step": 5931 + }, + { + "epoch": 13.20935412026726, + "loss": 0.3549681603908539, + "loss_ce": 0.00023182205040939152, + "loss_iou": 0.16015625, + "loss_num": 0.007080078125, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 332280184, + "step": 5931 + }, + { + "epoch": 13.211581291759465, + "grad_norm": 57.8745002746582, + "learning_rate": 1e-06, + "loss": 0.5825, + "num_input_tokens_seen": 332335648, + "step": 5932 + }, + { + "epoch": 13.211581291759465, + "loss": 0.5452816486358643, + "loss_ce": 0.00011567381443455815, + "loss_iou": 0.216796875, + "loss_num": 0.0224609375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 332335648, + "step": 5932 + }, + { + "epoch": 13.21380846325167, + "grad_norm": 23.37744140625, + "learning_rate": 1e-06, + "loss": 0.5968, + "num_input_tokens_seen": 332391416, + "step": 5933 + }, + { + "epoch": 13.21380846325167, + "loss": 0.5802435278892517, + "loss_ce": 0.00034849648363888264, + "loss_iou": 0.240234375, + "loss_num": 0.0198974609375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 332391416, + "step": 5933 + }, + { + "epoch": 13.216035634743875, + "grad_norm": 24.591421127319336, + "learning_rate": 1e-06, + "loss": 0.4096, + "num_input_tokens_seen": 332448788, + "step": 5934 + }, + { + "epoch": 13.216035634743875, + "loss": 0.5153904557228088, + "loss_ce": 0.00013165900600142777, + "loss_iou": 0.23828125, + "loss_num": 0.00787353515625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 332448788, + "step": 5934 + }, + { + "epoch": 13.21826280623608, + "grad_norm": 23.88517951965332, + "learning_rate": 1e-06, + "loss": 0.4557, + "num_input_tokens_seen": 332505524, + "step": 5935 + }, + { + "epoch": 13.21826280623608, + "loss": 0.4439461827278137, + "loss_ce": 9.8556381999515e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.01556396484375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 332505524, + "step": 5935 + }, + { + "epoch": 13.220489977728285, + "grad_norm": 18.264699935913086, + "learning_rate": 1e-06, + "loss": 0.4795, + "num_input_tokens_seen": 332562732, + "step": 5936 + }, + { + "epoch": 13.220489977728285, + "loss": 0.5457608699798584, + "loss_ce": 0.00010662610293366015, + "loss_iou": 0.228515625, + "loss_num": 0.017822265625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 332562732, + "step": 5936 + }, + { + "epoch": 13.22271714922049, + "grad_norm": 20.514163970947266, + "learning_rate": 1e-06, + "loss": 0.5394, + "num_input_tokens_seen": 332617164, + "step": 5937 + }, + { + "epoch": 13.22271714922049, + "loss": 0.5808427333831787, + "loss_ce": 0.00015426846221089363, + "loss_iou": 0.25390625, + "loss_num": 0.0147705078125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 332617164, + "step": 5937 + }, + { + "epoch": 13.224944320712694, + "grad_norm": 19.999271392822266, + "learning_rate": 1e-06, + "loss": 0.4511, + "num_input_tokens_seen": 332672196, + "step": 5938 + }, + { + "epoch": 13.224944320712694, + "loss": 0.5069501996040344, + "loss_ce": 0.00017530655895825475, + "loss_iou": 0.208984375, + "loss_num": 0.0179443359375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 332672196, + "step": 5938 + }, + { + "epoch": 13.2271714922049, + "grad_norm": 19.271041870117188, + "learning_rate": 1e-06, + "loss": 0.5313, + "num_input_tokens_seen": 332729132, + "step": 5939 + }, + { + "epoch": 13.2271714922049, + "loss": 0.5557813048362732, + "loss_ce": 0.00011723280476871878, + "loss_iou": 0.255859375, + "loss_num": 0.00860595703125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 332729132, + "step": 5939 + }, + { + "epoch": 13.229398663697104, + "grad_norm": 22.940664291381836, + "learning_rate": 1e-06, + "loss": 0.7251, + "num_input_tokens_seen": 332783480, + "step": 5940 + }, + { + "epoch": 13.229398663697104, + "loss": 0.8985084891319275, + "loss_ce": 0.00019303051522001624, + "loss_iou": 0.330078125, + "loss_num": 0.0478515625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 332783480, + "step": 5940 + }, + { + "epoch": 13.231625835189309, + "grad_norm": 17.761545181274414, + "learning_rate": 1e-06, + "loss": 0.4187, + "num_input_tokens_seen": 332840084, + "step": 5941 + }, + { + "epoch": 13.231625835189309, + "loss": 0.5540998578071594, + "loss_ce": 0.00014480168465524912, + "loss_iou": 0.244140625, + "loss_num": 0.0133056640625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 332840084, + "step": 5941 + }, + { + "epoch": 13.233853006681514, + "grad_norm": 13.681570053100586, + "learning_rate": 1e-06, + "loss": 0.36, + "num_input_tokens_seen": 332896372, + "step": 5942 + }, + { + "epoch": 13.233853006681514, + "loss": 0.34540247917175293, + "loss_ce": 0.00018764848937280476, + "loss_iou": 0.1494140625, + "loss_num": 0.0091552734375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 332896372, + "step": 5942 + }, + { + "epoch": 13.236080178173719, + "grad_norm": 17.690418243408203, + "learning_rate": 1e-06, + "loss": 0.5782, + "num_input_tokens_seen": 332953536, + "step": 5943 + }, + { + "epoch": 13.236080178173719, + "loss": 0.43799030780792236, + "loss_ce": 0.00012410686758812517, + "loss_iou": 0.20703125, + "loss_num": 0.004608154296875, + "loss_xval": 0.4375, + "num_input_tokens_seen": 332953536, + "step": 5943 + }, + { + "epoch": 13.238307349665924, + "grad_norm": 25.94896125793457, + "learning_rate": 1e-06, + "loss": 0.5025, + "num_input_tokens_seen": 333008228, + "step": 5944 + }, + { + "epoch": 13.238307349665924, + "loss": 0.5409551858901978, + "loss_ce": 0.00012263350072316825, + "loss_iou": 0.2314453125, + "loss_num": 0.0157470703125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 333008228, + "step": 5944 + }, + { + "epoch": 13.240534521158128, + "grad_norm": 20.952259063720703, + "learning_rate": 1e-06, + "loss": 0.4657, + "num_input_tokens_seen": 333065924, + "step": 5945 + }, + { + "epoch": 13.240534521158128, + "loss": 0.2911282777786255, + "loss_ce": 0.00011265826469752938, + "loss_iou": 0.1328125, + "loss_num": 0.004913330078125, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 333065924, + "step": 5945 + }, + { + "epoch": 13.242761692650333, + "grad_norm": 117.26244354248047, + "learning_rate": 1e-06, + "loss": 0.6348, + "num_input_tokens_seen": 333117748, + "step": 5946 + }, + { + "epoch": 13.242761692650333, + "loss": 0.9416841864585876, + "loss_ce": 0.00015579743194393814, + "loss_iou": 0.421875, + "loss_num": 0.0196533203125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 333117748, + "step": 5946 + }, + { + "epoch": 13.244988864142538, + "grad_norm": 20.61402130126953, + "learning_rate": 1e-06, + "loss": 0.5212, + "num_input_tokens_seen": 333175660, + "step": 5947 + }, + { + "epoch": 13.244988864142538, + "loss": 0.5732554793357849, + "loss_ce": 0.00025745650054886937, + "loss_iou": 0.2470703125, + "loss_num": 0.0157470703125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 333175660, + "step": 5947 + }, + { + "epoch": 13.247216035634743, + "grad_norm": 18.954225540161133, + "learning_rate": 1e-06, + "loss": 0.621, + "num_input_tokens_seen": 333231676, + "step": 5948 + }, + { + "epoch": 13.247216035634743, + "loss": 0.6287169456481934, + "loss_ce": 0.0002990136854350567, + "loss_iou": 0.271484375, + "loss_num": 0.016845703125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 333231676, + "step": 5948 + }, + { + "epoch": 13.249443207126948, + "grad_norm": 16.13975715637207, + "learning_rate": 1e-06, + "loss": 0.5298, + "num_input_tokens_seen": 333287440, + "step": 5949 + }, + { + "epoch": 13.249443207126948, + "loss": 0.5134612917900085, + "loss_ce": 0.00015562049520667642, + "loss_iou": 0.224609375, + "loss_num": 0.01275634765625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 333287440, + "step": 5949 + }, + { + "epoch": 13.251670378619155, + "grad_norm": 21.270713806152344, + "learning_rate": 1e-06, + "loss": 0.4151, + "num_input_tokens_seen": 333340480, + "step": 5950 + }, + { + "epoch": 13.251670378619155, + "loss": 0.4000067412853241, + "loss_ce": 0.00010438874596729875, + "loss_iou": 0.1728515625, + "loss_num": 0.01080322265625, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 333340480, + "step": 5950 + }, + { + "epoch": 13.25389755011136, + "grad_norm": 12.74732780456543, + "learning_rate": 1e-06, + "loss": 0.3358, + "num_input_tokens_seen": 333396808, + "step": 5951 + }, + { + "epoch": 13.25389755011136, + "loss": 0.2938316762447357, + "loss_ce": 0.00013049585686530918, + "loss_iou": 0.109375, + "loss_num": 0.01507568359375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 333396808, + "step": 5951 + }, + { + "epoch": 13.256124721603564, + "grad_norm": 16.515743255615234, + "learning_rate": 1e-06, + "loss": 0.4615, + "num_input_tokens_seen": 333450628, + "step": 5952 + }, + { + "epoch": 13.256124721603564, + "loss": 0.48923662304878235, + "loss_ce": 0.00010087803093483672, + "loss_iou": 0.201171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 333450628, + "step": 5952 + }, + { + "epoch": 13.25835189309577, + "grad_norm": 29.306018829345703, + "learning_rate": 1e-06, + "loss": 0.3888, + "num_input_tokens_seen": 333507188, + "step": 5953 + }, + { + "epoch": 13.25835189309577, + "loss": 0.2866702973842621, + "loss_ce": 0.00011024670675396919, + "loss_iou": 0.1259765625, + "loss_num": 0.006805419921875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 333507188, + "step": 5953 + }, + { + "epoch": 13.260579064587974, + "grad_norm": 17.44719696044922, + "learning_rate": 1e-06, + "loss": 0.5703, + "num_input_tokens_seen": 333560676, + "step": 5954 + }, + { + "epoch": 13.260579064587974, + "loss": 0.6639593839645386, + "loss_ce": 0.00014105206355452538, + "loss_iou": 0.302734375, + "loss_num": 0.01153564453125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 333560676, + "step": 5954 + }, + { + "epoch": 13.262806236080179, + "grad_norm": 128.22251892089844, + "learning_rate": 1e-06, + "loss": 0.5567, + "num_input_tokens_seen": 333618720, + "step": 5955 + }, + { + "epoch": 13.262806236080179, + "loss": 0.6405112743377686, + "loss_ce": 0.00013040687190368772, + "loss_iou": 0.263671875, + "loss_num": 0.02294921875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 333618720, + "step": 5955 + }, + { + "epoch": 13.265033407572384, + "grad_norm": 18.05571746826172, + "learning_rate": 1e-06, + "loss": 0.4476, + "num_input_tokens_seen": 333676704, + "step": 5956 + }, + { + "epoch": 13.265033407572384, + "loss": 0.4810963273048401, + "loss_ce": 0.00013927766121923923, + "loss_iou": 0.19921875, + "loss_num": 0.0164794921875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 333676704, + "step": 5956 + }, + { + "epoch": 13.267260579064589, + "grad_norm": 17.209970474243164, + "learning_rate": 1e-06, + "loss": 0.4548, + "num_input_tokens_seen": 333732432, + "step": 5957 + }, + { + "epoch": 13.267260579064589, + "loss": 0.4918234944343567, + "loss_ce": 0.00012429816706571728, + "loss_iou": 0.2041015625, + "loss_num": 0.016845703125, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 333732432, + "step": 5957 + }, + { + "epoch": 13.269487750556793, + "grad_norm": 24.414487838745117, + "learning_rate": 1e-06, + "loss": 0.3929, + "num_input_tokens_seen": 333790064, + "step": 5958 + }, + { + "epoch": 13.269487750556793, + "loss": 0.2766364514827728, + "loss_ce": 0.00014718460442963988, + "loss_iou": 0.1171875, + "loss_num": 0.00836181640625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 333790064, + "step": 5958 + }, + { + "epoch": 13.271714922048998, + "grad_norm": 24.38360023498535, + "learning_rate": 1e-06, + "loss": 0.3817, + "num_input_tokens_seen": 333845108, + "step": 5959 + }, + { + "epoch": 13.271714922048998, + "loss": 0.34324610233306885, + "loss_ce": 0.00047269114293158054, + "loss_iou": 0.1572265625, + "loss_num": 0.0057373046875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 333845108, + "step": 5959 + }, + { + "epoch": 13.273942093541203, + "grad_norm": 28.868574142456055, + "learning_rate": 1e-06, + "loss": 0.5365, + "num_input_tokens_seen": 333899952, + "step": 5960 + }, + { + "epoch": 13.273942093541203, + "loss": 0.45695704221725464, + "loss_ce": 0.00010890320118051022, + "loss_iou": 0.19140625, + "loss_num": 0.0150146484375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 333899952, + "step": 5960 + }, + { + "epoch": 13.276169265033408, + "grad_norm": 17.488313674926758, + "learning_rate": 1e-06, + "loss": 0.5148, + "num_input_tokens_seen": 333955864, + "step": 5961 + }, + { + "epoch": 13.276169265033408, + "loss": 0.64747554063797, + "loss_ce": 0.00013665850565303117, + "loss_iou": 0.275390625, + "loss_num": 0.019287109375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 333955864, + "step": 5961 + }, + { + "epoch": 13.278396436525613, + "grad_norm": 14.644098281860352, + "learning_rate": 1e-06, + "loss": 0.3249, + "num_input_tokens_seen": 334012288, + "step": 5962 + }, + { + "epoch": 13.278396436525613, + "loss": 0.32973378896713257, + "loss_ce": 0.00014394470781553537, + "loss_iou": 0.14453125, + "loss_num": 0.00811767578125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 334012288, + "step": 5962 + }, + { + "epoch": 13.280623608017818, + "grad_norm": 14.35937213897705, + "learning_rate": 1e-06, + "loss": 0.4478, + "num_input_tokens_seen": 334070048, + "step": 5963 + }, + { + "epoch": 13.280623608017818, + "loss": 0.5133100152015686, + "loss_ce": 0.00012641007197089493, + "loss_iou": 0.2236328125, + "loss_num": 0.01318359375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 334070048, + "step": 5963 + }, + { + "epoch": 13.282850779510023, + "grad_norm": 20.978622436523438, + "learning_rate": 1e-06, + "loss": 0.6486, + "num_input_tokens_seen": 334125576, + "step": 5964 + }, + { + "epoch": 13.282850779510023, + "loss": 0.6014399528503418, + "loss_ce": 0.00012159476318629459, + "loss_iou": 0.26171875, + "loss_num": 0.015869140625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 334125576, + "step": 5964 + }, + { + "epoch": 13.285077951002227, + "grad_norm": 17.045791625976562, + "learning_rate": 1e-06, + "loss": 0.4465, + "num_input_tokens_seen": 334179024, + "step": 5965 + }, + { + "epoch": 13.285077951002227, + "loss": 0.40493443608283997, + "loss_ce": 0.00014927683514542878, + "loss_iou": 0.18359375, + "loss_num": 0.007415771484375, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 334179024, + "step": 5965 + }, + { + "epoch": 13.287305122494432, + "grad_norm": 17.58751678466797, + "learning_rate": 1e-06, + "loss": 0.5238, + "num_input_tokens_seen": 334233464, + "step": 5966 + }, + { + "epoch": 13.287305122494432, + "loss": 0.4632442593574524, + "loss_ce": 0.00010949977149721235, + "loss_iou": 0.2138671875, + "loss_num": 0.007049560546875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 334233464, + "step": 5966 + }, + { + "epoch": 13.289532293986637, + "grad_norm": 17.390804290771484, + "learning_rate": 1e-06, + "loss": 0.5365, + "num_input_tokens_seen": 334288260, + "step": 5967 + }, + { + "epoch": 13.289532293986637, + "loss": 0.6280704140663147, + "loss_ce": 0.00014069479948375374, + "loss_iou": 0.267578125, + "loss_num": 0.018798828125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 334288260, + "step": 5967 + }, + { + "epoch": 13.291759465478842, + "grad_norm": 20.9576358795166, + "learning_rate": 1e-06, + "loss": 0.3652, + "num_input_tokens_seen": 334345852, + "step": 5968 + }, + { + "epoch": 13.291759465478842, + "loss": 0.3794044554233551, + "loss_ce": 0.0001319996954407543, + "loss_iou": 0.1650390625, + "loss_num": 0.0098876953125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 334345852, + "step": 5968 + }, + { + "epoch": 13.293986636971047, + "grad_norm": 20.519437789916992, + "learning_rate": 1e-06, + "loss": 0.6077, + "num_input_tokens_seen": 334402480, + "step": 5969 + }, + { + "epoch": 13.293986636971047, + "loss": 0.8281785249710083, + "loss_ce": 0.00011449880548752844, + "loss_iou": 0.345703125, + "loss_num": 0.02783203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 334402480, + "step": 5969 + }, + { + "epoch": 13.296213808463252, + "grad_norm": 17.099870681762695, + "learning_rate": 1e-06, + "loss": 0.4134, + "num_input_tokens_seen": 334459196, + "step": 5970 + }, + { + "epoch": 13.296213808463252, + "loss": 0.35288766026496887, + "loss_ce": 0.0001044573073158972, + "loss_iou": 0.1533203125, + "loss_num": 0.00921630859375, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 334459196, + "step": 5970 + }, + { + "epoch": 13.298440979955457, + "grad_norm": 16.155277252197266, + "learning_rate": 1e-06, + "loss": 0.4195, + "num_input_tokens_seen": 334514964, + "step": 5971 + }, + { + "epoch": 13.298440979955457, + "loss": 0.4192987084388733, + "loss_ce": 0.00010926800314337015, + "loss_iou": 0.1865234375, + "loss_num": 0.00909423828125, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 334514964, + "step": 5971 + }, + { + "epoch": 13.300668151447661, + "grad_norm": 22.117937088012695, + "learning_rate": 1e-06, + "loss": 0.5437, + "num_input_tokens_seen": 334570392, + "step": 5972 + }, + { + "epoch": 13.300668151447661, + "loss": 0.524844765663147, + "loss_ce": 0.0005832784809172153, + "loss_iou": 0.205078125, + "loss_num": 0.022705078125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 334570392, + "step": 5972 + }, + { + "epoch": 13.302895322939866, + "grad_norm": 52.279701232910156, + "learning_rate": 1e-06, + "loss": 0.588, + "num_input_tokens_seen": 334628400, + "step": 5973 + }, + { + "epoch": 13.302895322939866, + "loss": 0.4728614389896393, + "loss_ce": 0.001250399393029511, + "loss_iou": 0.212890625, + "loss_num": 0.00927734375, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 334628400, + "step": 5973 + }, + { + "epoch": 13.305122494432071, + "grad_norm": 42.509918212890625, + "learning_rate": 1e-06, + "loss": 0.7125, + "num_input_tokens_seen": 334682692, + "step": 5974 + }, + { + "epoch": 13.305122494432071, + "loss": 1.0085320472717285, + "loss_ce": 0.0012078466825187206, + "loss_iou": 0.41015625, + "loss_num": 0.037109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 334682692, + "step": 5974 + }, + { + "epoch": 13.307349665924276, + "grad_norm": 23.17171287536621, + "learning_rate": 1e-06, + "loss": 0.5796, + "num_input_tokens_seen": 334738240, + "step": 5975 + }, + { + "epoch": 13.307349665924276, + "loss": 0.5287770628929138, + "loss_ce": 9.056585986400023e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0230712890625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 334738240, + "step": 5975 + }, + { + "epoch": 13.309576837416481, + "grad_norm": 15.882946014404297, + "learning_rate": 1e-06, + "loss": 0.449, + "num_input_tokens_seen": 334795800, + "step": 5976 + }, + { + "epoch": 13.309576837416481, + "loss": 0.51024329662323, + "loss_ce": 0.00011146925680804998, + "loss_iou": 0.224609375, + "loss_num": 0.0123291015625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 334795800, + "step": 5976 + }, + { + "epoch": 13.311804008908686, + "grad_norm": 16.047292709350586, + "learning_rate": 1e-06, + "loss": 0.525, + "num_input_tokens_seen": 334853320, + "step": 5977 + }, + { + "epoch": 13.311804008908686, + "loss": 0.42943665385246277, + "loss_ce": 0.0001153927732957527, + "loss_iou": 0.19140625, + "loss_num": 0.0091552734375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 334853320, + "step": 5977 + }, + { + "epoch": 13.31403118040089, + "grad_norm": 18.631088256835938, + "learning_rate": 1e-06, + "loss": 0.4337, + "num_input_tokens_seen": 334908868, + "step": 5978 + }, + { + "epoch": 13.31403118040089, + "loss": 0.3484327793121338, + "loss_ce": 0.00010514883615542203, + "loss_iou": 0.154296875, + "loss_num": 0.0078125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 334908868, + "step": 5978 + }, + { + "epoch": 13.316258351893095, + "grad_norm": 21.7668514251709, + "learning_rate": 1e-06, + "loss": 0.5039, + "num_input_tokens_seen": 334966060, + "step": 5979 + }, + { + "epoch": 13.316258351893095, + "loss": 0.5759820938110352, + "loss_ce": 0.00011536551028257236, + "loss_iou": 0.265625, + "loss_num": 0.0091552734375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 334966060, + "step": 5979 + }, + { + "epoch": 13.3184855233853, + "grad_norm": 18.8905029296875, + "learning_rate": 1e-06, + "loss": 0.3987, + "num_input_tokens_seen": 335019804, + "step": 5980 + }, + { + "epoch": 13.3184855233853, + "loss": 0.3916875720024109, + "loss_ce": 0.00020808231784030795, + "loss_iou": 0.1630859375, + "loss_num": 0.01318359375, + "loss_xval": 0.390625, + "num_input_tokens_seen": 335019804, + "step": 5980 + }, + { + "epoch": 13.320712694877505, + "grad_norm": 33.502071380615234, + "learning_rate": 1e-06, + "loss": 0.5049, + "num_input_tokens_seen": 335077580, + "step": 5981 + }, + { + "epoch": 13.320712694877505, + "loss": 0.5615602731704712, + "loss_ce": 0.00015890991198830307, + "loss_iou": 0.2265625, + "loss_num": 0.021728515625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 335077580, + "step": 5981 + }, + { + "epoch": 13.32293986636971, + "grad_norm": 48.20892333984375, + "learning_rate": 1e-06, + "loss": 0.4792, + "num_input_tokens_seen": 335131592, + "step": 5982 + }, + { + "epoch": 13.32293986636971, + "loss": 0.5993452668190002, + "loss_ce": 0.00010209815809503198, + "loss_iou": 0.255859375, + "loss_num": 0.01708984375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 335131592, + "step": 5982 + }, + { + "epoch": 13.325167037861915, + "grad_norm": 14.08517837524414, + "learning_rate": 1e-06, + "loss": 0.4242, + "num_input_tokens_seen": 335186972, + "step": 5983 + }, + { + "epoch": 13.325167037861915, + "loss": 0.5378490090370178, + "loss_ce": 0.0010219014948233962, + "loss_iou": 0.203125, + "loss_num": 0.0263671875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 335186972, + "step": 5983 + }, + { + "epoch": 13.32739420935412, + "grad_norm": 15.391449928283691, + "learning_rate": 1e-06, + "loss": 0.6247, + "num_input_tokens_seen": 335240660, + "step": 5984 + }, + { + "epoch": 13.32739420935412, + "loss": 0.46067914366722107, + "loss_ce": 0.00010786119673866779, + "loss_iou": 0.201171875, + "loss_num": 0.01177978515625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 335240660, + "step": 5984 + }, + { + "epoch": 13.329621380846325, + "grad_norm": 25.606029510498047, + "learning_rate": 1e-06, + "loss": 0.4295, + "num_input_tokens_seen": 335297500, + "step": 5985 + }, + { + "epoch": 13.329621380846325, + "loss": 0.4411547780036926, + "loss_ce": 0.00023681171296630055, + "loss_iou": 0.1962890625, + "loss_num": 0.00970458984375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 335297500, + "step": 5985 + }, + { + "epoch": 13.33184855233853, + "grad_norm": 31.844512939453125, + "learning_rate": 1e-06, + "loss": 0.4698, + "num_input_tokens_seen": 335350156, + "step": 5986 + }, + { + "epoch": 13.33184855233853, + "loss": 0.41549986600875854, + "loss_ce": 9.457894338993356e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 335350156, + "step": 5986 + }, + { + "epoch": 13.334075723830734, + "grad_norm": 24.63311195373535, + "learning_rate": 1e-06, + "loss": 0.5548, + "num_input_tokens_seen": 335405788, + "step": 5987 + }, + { + "epoch": 13.334075723830734, + "loss": 0.36372703313827515, + "loss_ce": 0.00014061132969800383, + "loss_iou": 0.1728515625, + "loss_num": 0.0037994384765625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 335405788, + "step": 5987 + }, + { + "epoch": 13.33630289532294, + "grad_norm": 13.776124000549316, + "learning_rate": 1e-06, + "loss": 0.4612, + "num_input_tokens_seen": 335460976, + "step": 5988 + }, + { + "epoch": 13.33630289532294, + "loss": 0.6514391899108887, + "loss_ce": 0.00013307490735314786, + "loss_iou": 0.29296875, + "loss_num": 0.01263427734375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 335460976, + "step": 5988 + }, + { + "epoch": 13.338530066815144, + "grad_norm": 15.759697914123535, + "learning_rate": 1e-06, + "loss": 0.5365, + "num_input_tokens_seen": 335516704, + "step": 5989 + }, + { + "epoch": 13.338530066815144, + "loss": 0.3756389021873474, + "loss_ce": 0.00015063249156810343, + "loss_iou": 0.138671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.375, + "num_input_tokens_seen": 335516704, + "step": 5989 + }, + { + "epoch": 13.340757238307349, + "grad_norm": 14.048306465148926, + "learning_rate": 1e-06, + "loss": 0.2585, + "num_input_tokens_seen": 335573700, + "step": 5990 + }, + { + "epoch": 13.340757238307349, + "loss": 0.2530948221683502, + "loss_ce": 0.00010410351387690753, + "loss_iou": 0.11181640625, + "loss_num": 0.00579833984375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 335573700, + "step": 5990 + }, + { + "epoch": 13.342984409799554, + "grad_norm": 45.206886291503906, + "learning_rate": 1e-06, + "loss": 0.4845, + "num_input_tokens_seen": 335628184, + "step": 5991 + }, + { + "epoch": 13.342984409799554, + "loss": 0.6019352078437805, + "loss_ce": 0.0001285744656343013, + "loss_iou": 0.267578125, + "loss_num": 0.01324462890625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 335628184, + "step": 5991 + }, + { + "epoch": 13.345211581291759, + "grad_norm": 99.1800765991211, + "learning_rate": 1e-06, + "loss": 0.5862, + "num_input_tokens_seen": 335684580, + "step": 5992 + }, + { + "epoch": 13.345211581291759, + "loss": 0.6407464146614075, + "loss_ce": 0.00012141239130869508, + "loss_iou": 0.2890625, + "loss_num": 0.0123291015625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 335684580, + "step": 5992 + }, + { + "epoch": 13.347438752783964, + "grad_norm": 16.95970916748047, + "learning_rate": 1e-06, + "loss": 0.4526, + "num_input_tokens_seen": 335741296, + "step": 5993 + }, + { + "epoch": 13.347438752783964, + "loss": 0.32951620221138, + "loss_ce": 0.00010945653048111126, + "loss_iou": 0.142578125, + "loss_num": 0.0087890625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 335741296, + "step": 5993 + }, + { + "epoch": 13.34966592427617, + "grad_norm": 40.8525505065918, + "learning_rate": 1e-06, + "loss": 0.4401, + "num_input_tokens_seen": 335797804, + "step": 5994 + }, + { + "epoch": 13.34966592427617, + "loss": 0.46965697407722473, + "loss_ce": 0.00017455026682000607, + "loss_iou": 0.1748046875, + "loss_num": 0.0240478515625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 335797804, + "step": 5994 + }, + { + "epoch": 13.351893095768375, + "grad_norm": 23.179540634155273, + "learning_rate": 1e-06, + "loss": 0.6922, + "num_input_tokens_seen": 335855320, + "step": 5995 + }, + { + "epoch": 13.351893095768375, + "loss": 0.5909324288368225, + "loss_ce": 0.00011209775402676314, + "loss_iou": 0.271484375, + "loss_num": 0.009521484375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 335855320, + "step": 5995 + }, + { + "epoch": 13.35412026726058, + "grad_norm": 28.706586837768555, + "learning_rate": 1e-06, + "loss": 0.4458, + "num_input_tokens_seen": 335911324, + "step": 5996 + }, + { + "epoch": 13.35412026726058, + "loss": 0.4113472104072571, + "loss_ce": 0.00021440400450956076, + "loss_iou": 0.1904296875, + "loss_num": 0.006195068359375, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 335911324, + "step": 5996 + }, + { + "epoch": 13.356347438752785, + "grad_norm": 14.33905029296875, + "learning_rate": 1e-06, + "loss": 0.3891, + "num_input_tokens_seen": 335964808, + "step": 5997 + }, + { + "epoch": 13.356347438752785, + "loss": 0.41698315739631653, + "loss_ce": 0.00011303767678327858, + "loss_iou": 0.1640625, + "loss_num": 0.017578125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 335964808, + "step": 5997 + }, + { + "epoch": 13.35857461024499, + "grad_norm": 15.864407539367676, + "learning_rate": 1e-06, + "loss": 0.3952, + "num_input_tokens_seen": 336021256, + "step": 5998 + }, + { + "epoch": 13.35857461024499, + "loss": 0.4110654890537262, + "loss_ce": 0.00017680224846117198, + "loss_iou": 0.1796875, + "loss_num": 0.01025390625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 336021256, + "step": 5998 + }, + { + "epoch": 13.360801781737194, + "grad_norm": 28.089126586914062, + "learning_rate": 1e-06, + "loss": 0.4279, + "num_input_tokens_seen": 336074836, + "step": 5999 + }, + { + "epoch": 13.360801781737194, + "loss": 0.4389985203742981, + "loss_ce": 0.00017103503341786563, + "loss_iou": 0.1689453125, + "loss_num": 0.02001953125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 336074836, + "step": 5999 + }, + { + "epoch": 13.3630289532294, + "grad_norm": 16.476625442504883, + "learning_rate": 1e-06, + "loss": 0.5683, + "num_input_tokens_seen": 336130500, + "step": 6000 + }, + { + "epoch": 13.3630289532294, + "eval_seeclick_web_CIoU": 0.589046448469162, + "eval_seeclick_web_GIoU": 0.5870300531387329, + "eval_seeclick_web_IoU": 0.6074443459510803, + "eval_seeclick_web_MAE_all": 0.015537765808403492, + "eval_seeclick_web_MAE_h": 0.007694335887208581, + "eval_seeclick_web_MAE_w": 0.015699473209679127, + "eval_seeclick_web_MAE_x_boxes": 0.009341908851638436, + "eval_seeclick_web_MAE_y_boxes": 0.021628314396366477, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.888150691986084, + "eval_seeclick_web_loss_ce": 0.0001769300361047499, + "eval_seeclick_web_loss_iou": 0.40771484375, + "eval_seeclick_web_loss_num": 0.0123291015625, + "eval_seeclick_web_loss_xval": 0.8765869140625, + "eval_seeclick_web_runtime": 19.879, + "eval_seeclick_web_samples_per_second": 2.515, + "eval_seeclick_web_steps_per_second": 0.101, + "num_input_tokens_seen": 336130500, + "step": 6000 + }, + { + "epoch": 13.3630289532294, + "eval_icons_CIoU": 0.2701048105955124, + "eval_icons_GIoU": 0.294067844748497, + "eval_icons_IoU": 0.3528982400894165, + "eval_icons_MAE_all": 0.06405339390039444, + "eval_icons_MAE_h": 0.03851390350610018, + "eval_icons_MAE_w": 0.06734280101954937, + "eval_icons_MAE_x_boxes": 0.06041870452463627, + "eval_icons_MAE_y_boxes": 0.03873829450458288, + "eval_icons_inside_bbox": 0.6059027910232544, + "eval_icons_loss": 1.748345971107483, + "eval_icons_loss_ce": 0.00021618494793074206, + "eval_icons_loss_iou": 0.6826171875, + "eval_icons_loss_num": 0.061618804931640625, + "eval_icons_loss_xval": 1.673828125, + "eval_icons_runtime": 18.4453, + "eval_icons_samples_per_second": 2.711, + "eval_icons_steps_per_second": 0.108, + "num_input_tokens_seen": 336130500, + "step": 6000 + }, + { + "epoch": 13.3630289532294, + "eval_screenspot_CIoU": 0.35929131507873535, + "eval_screenspot_GIoU": 0.3761854072411855, + "eval_screenspot_IoU": 0.43523843089739483, + "eval_screenspot_MAE_all": 0.05693357313672701, + "eval_screenspot_MAE_h": 0.039716811850667, + "eval_screenspot_MAE_w": 0.06415350238482158, + "eval_screenspot_MAE_x_boxes": 0.069387707238396, + "eval_screenspot_MAE_y_boxes": 0.03772336399803559, + "eval_screenspot_inside_bbox": 0.6966666579246521, + "eval_screenspot_loss": 1.5850136280059814, + "eval_screenspot_loss_ce": 0.00022644254689415297, + "eval_screenspot_loss_iou": 0.6593424479166666, + "eval_screenspot_loss_num": 0.0646069844563802, + "eval_screenspot_loss_xval": 1.640625, + "eval_screenspot_runtime": 29.6001, + "eval_screenspot_samples_per_second": 3.007, + "eval_screenspot_steps_per_second": 0.101, + "num_input_tokens_seen": 336130500, + "step": 6000 + }, + { + "epoch": 13.3630289532294, + "eval_compot_CIoU": 0.3427671641111374, + "eval_compot_GIoU": 0.3569800406694412, + "eval_compot_IoU": 0.4003662168979645, + "eval_compot_MAE_all": 0.019289949908852577, + "eval_compot_MAE_h": 0.012674622237682343, + "eval_compot_MAE_w": 0.021172930486500263, + "eval_compot_MAE_x_boxes": 0.02997487783432007, + "eval_compot_MAE_y_boxes": 0.007259466219693422, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.4314215183258057, + "eval_compot_loss_ce": 0.00017105540609918535, + "eval_compot_loss_iou": 0.6610107421875, + "eval_compot_loss_num": 0.018083572387695312, + "eval_compot_loss_xval": 1.413330078125, + "eval_compot_runtime": 19.5253, + "eval_compot_samples_per_second": 2.561, + "eval_compot_steps_per_second": 0.102, + "num_input_tokens_seen": 336130500, + "step": 6000 + }, + { + "epoch": 13.3630289532294, + "eval_custom_ui_val_CIoU": 0.48122422645489377, + "eval_custom_ui_val_GIoU": 0.4910411420795653, + "eval_custom_ui_val_IoU": 0.5414565900961558, + "eval_custom_ui_val_MAE_all": 0.027185753505263064, + "eval_custom_ui_val_MAE_h": 0.014914580983006291, + "eval_custom_ui_val_MAE_w": 0.03390147609429227, + "eval_custom_ui_val_MAE_x_boxes": 0.03341104726617535, + "eval_custom_ui_val_MAE_y_boxes": 0.013283828376895852, + "eval_custom_ui_val_inside_bbox": 0.7754629651705424, + "eval_custom_ui_val_loss": 1.1734305620193481, + "eval_custom_ui_val_loss_ce": 0.0001952857144513271, + "eval_custom_ui_val_loss_iou": 0.5054389105902778, + "eval_custom_ui_val_loss_num": 0.024320814344618056, + "eval_custom_ui_val_loss_xval": 1.1326497395833333, + "eval_custom_ui_val_runtime": 56.7538, + "eval_custom_ui_val_samples_per_second": 4.669, + "eval_custom_ui_val_steps_per_second": 0.159, + "num_input_tokens_seen": 336130500, + "step": 6000 + }, + { + "epoch": 13.3630289532294, + "loss": 0.8641664385795593, + "loss_ce": 0.00015277693455573171, + "loss_iou": 0.392578125, + "loss_num": 0.015869140625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 336130500, + "step": 6000 + }, + { + "epoch": 13.365256124721604, + "grad_norm": 16.492589950561523, + "learning_rate": 1e-06, + "loss": 0.3624, + "num_input_tokens_seen": 336187644, + "step": 6001 + }, + { + "epoch": 13.365256124721604, + "loss": 0.38099753856658936, + "loss_ce": 0.00026025049737654626, + "loss_iou": 0.1728515625, + "loss_num": 0.00701904296875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 336187644, + "step": 6001 + }, + { + "epoch": 13.367483296213809, + "grad_norm": 16.520002365112305, + "learning_rate": 1e-06, + "loss": 0.4096, + "num_input_tokens_seen": 336244060, + "step": 6002 + }, + { + "epoch": 13.367483296213809, + "loss": 0.2786784768104553, + "loss_ce": 0.00011401639494579285, + "loss_iou": 0.11181640625, + "loss_num": 0.010986328125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 336244060, + "step": 6002 + }, + { + "epoch": 13.369710467706014, + "grad_norm": 27.00398826599121, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 336298968, + "step": 6003 + }, + { + "epoch": 13.369710467706014, + "loss": 0.3439924120903015, + "loss_ce": 0.00012036073167109862, + "loss_iou": 0.146484375, + "loss_num": 0.0103759765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 336298968, + "step": 6003 + }, + { + "epoch": 13.371937639198219, + "grad_norm": 14.05265998840332, + "learning_rate": 1e-06, + "loss": 0.3699, + "num_input_tokens_seen": 336354760, + "step": 6004 + }, + { + "epoch": 13.371937639198219, + "loss": 0.3934824466705322, + "loss_ce": 0.0001108648139052093, + "loss_iou": 0.1767578125, + "loss_num": 0.0079345703125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 336354760, + "step": 6004 + }, + { + "epoch": 13.374164810690424, + "grad_norm": 24.29564094543457, + "learning_rate": 1e-06, + "loss": 0.5934, + "num_input_tokens_seen": 336412268, + "step": 6005 + }, + { + "epoch": 13.374164810690424, + "loss": 0.372583270072937, + "loss_ce": 0.00011621671728789806, + "loss_iou": 0.166015625, + "loss_num": 0.008056640625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 336412268, + "step": 6005 + }, + { + "epoch": 13.376391982182628, + "grad_norm": 18.128660202026367, + "learning_rate": 1e-06, + "loss": 0.4611, + "num_input_tokens_seen": 336467380, + "step": 6006 + }, + { + "epoch": 13.376391982182628, + "loss": 0.4521748423576355, + "loss_ce": 0.00014848702994640917, + "loss_iou": 0.20703125, + "loss_num": 0.007781982421875, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 336467380, + "step": 6006 + }, + { + "epoch": 13.378619153674833, + "grad_norm": 17.15667152404785, + "learning_rate": 1e-06, + "loss": 0.4673, + "num_input_tokens_seen": 336522912, + "step": 6007 + }, + { + "epoch": 13.378619153674833, + "loss": 0.4950922429561615, + "loss_ce": 9.710421727504581e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.012939453125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 336522912, + "step": 6007 + }, + { + "epoch": 13.380846325167038, + "grad_norm": 36.65664291381836, + "learning_rate": 1e-06, + "loss": 0.4762, + "num_input_tokens_seen": 336578132, + "step": 6008 + }, + { + "epoch": 13.380846325167038, + "loss": 0.4739997982978821, + "loss_ce": 0.0002449120220262557, + "loss_iou": 0.1728515625, + "loss_num": 0.025634765625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 336578132, + "step": 6008 + }, + { + "epoch": 13.383073496659243, + "grad_norm": 17.556377410888672, + "learning_rate": 1e-06, + "loss": 0.4995, + "num_input_tokens_seen": 336636856, + "step": 6009 + }, + { + "epoch": 13.383073496659243, + "loss": 0.5950038433074951, + "loss_ce": 0.0001551837776787579, + "loss_iou": 0.25390625, + "loss_num": 0.017578125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 336636856, + "step": 6009 + }, + { + "epoch": 13.385300668151448, + "grad_norm": 30.719839096069336, + "learning_rate": 1e-06, + "loss": 0.5576, + "num_input_tokens_seen": 336694384, + "step": 6010 + }, + { + "epoch": 13.385300668151448, + "loss": 0.5820613503456116, + "loss_ce": 0.00015218451153486967, + "loss_iou": 0.24609375, + "loss_num": 0.0179443359375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 336694384, + "step": 6010 + }, + { + "epoch": 13.387527839643653, + "grad_norm": 16.480695724487305, + "learning_rate": 1e-06, + "loss": 0.5837, + "num_input_tokens_seen": 336751420, + "step": 6011 + }, + { + "epoch": 13.387527839643653, + "loss": 0.4113236367702484, + "loss_ce": 0.00012982796761207283, + "loss_iou": 0.1796875, + "loss_num": 0.010498046875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 336751420, + "step": 6011 + }, + { + "epoch": 13.389755011135858, + "grad_norm": 27.67957305908203, + "learning_rate": 1e-06, + "loss": 0.4264, + "num_input_tokens_seen": 336804964, + "step": 6012 + }, + { + "epoch": 13.389755011135858, + "loss": 0.5694878101348877, + "loss_ce": 0.00015187214012257755, + "loss_iou": 0.265625, + "loss_num": 0.00732421875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 336804964, + "step": 6012 + }, + { + "epoch": 13.391982182628063, + "grad_norm": 15.864426612854004, + "learning_rate": 1e-06, + "loss": 0.6295, + "num_input_tokens_seen": 336862960, + "step": 6013 + }, + { + "epoch": 13.391982182628063, + "loss": 0.425285279750824, + "loss_ce": 0.00011438117508077994, + "loss_iou": 0.189453125, + "loss_num": 0.00927734375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 336862960, + "step": 6013 + }, + { + "epoch": 13.394209354120267, + "grad_norm": 13.584254264831543, + "learning_rate": 1e-06, + "loss": 0.6117, + "num_input_tokens_seen": 336920076, + "step": 6014 + }, + { + "epoch": 13.394209354120267, + "loss": 0.4187964200973511, + "loss_ce": 9.524912456981838e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.00927734375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 336920076, + "step": 6014 + }, + { + "epoch": 13.396436525612472, + "grad_norm": 15.942584037780762, + "learning_rate": 1e-06, + "loss": 0.4851, + "num_input_tokens_seen": 336975628, + "step": 6015 + }, + { + "epoch": 13.396436525612472, + "loss": 0.5123189687728882, + "loss_ce": 0.00011192913370905444, + "loss_iou": 0.2294921875, + "loss_num": 0.0107421875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 336975628, + "step": 6015 + }, + { + "epoch": 13.398663697104677, + "grad_norm": 29.960575103759766, + "learning_rate": 1e-06, + "loss": 0.4581, + "num_input_tokens_seen": 337031400, + "step": 6016 + }, + { + "epoch": 13.398663697104677, + "loss": 0.4230641722679138, + "loss_ce": 9.053543908521533e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.015625, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 337031400, + "step": 6016 + }, + { + "epoch": 13.400890868596882, + "grad_norm": 17.33017921447754, + "learning_rate": 1e-06, + "loss": 0.3594, + "num_input_tokens_seen": 337086372, + "step": 6017 + }, + { + "epoch": 13.400890868596882, + "loss": 0.46412086486816406, + "loss_ce": 0.00013159040827304125, + "loss_iou": 0.2138671875, + "loss_num": 0.0072021484375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 337086372, + "step": 6017 + }, + { + "epoch": 13.403118040089087, + "grad_norm": 24.617557525634766, + "learning_rate": 1e-06, + "loss": 0.6371, + "num_input_tokens_seen": 337141460, + "step": 6018 + }, + { + "epoch": 13.403118040089087, + "loss": 0.4511684775352478, + "loss_ce": 0.00011867978173540905, + "loss_iou": 0.201171875, + "loss_num": 0.0098876953125, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 337141460, + "step": 6018 + }, + { + "epoch": 13.405345211581292, + "grad_norm": 22.865720748901367, + "learning_rate": 1e-06, + "loss": 0.4544, + "num_input_tokens_seen": 337199884, + "step": 6019 + }, + { + "epoch": 13.405345211581292, + "loss": 0.4763756990432739, + "loss_ce": 0.00011837005149573088, + "loss_iou": 0.216796875, + "loss_num": 0.0084228515625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 337199884, + "step": 6019 + }, + { + "epoch": 13.407572383073497, + "grad_norm": 30.050506591796875, + "learning_rate": 1e-06, + "loss": 0.3933, + "num_input_tokens_seen": 337255584, + "step": 6020 + }, + { + "epoch": 13.407572383073497, + "loss": 0.5196518301963806, + "loss_ce": 0.00012056018749717623, + "loss_iou": 0.203125, + "loss_num": 0.022705078125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 337255584, + "step": 6020 + }, + { + "epoch": 13.409799554565701, + "grad_norm": 16.1778564453125, + "learning_rate": 1e-06, + "loss": 0.4133, + "num_input_tokens_seen": 337314328, + "step": 6021 + }, + { + "epoch": 13.409799554565701, + "loss": 0.4304283857345581, + "loss_ce": 0.00013054994633421302, + "loss_iou": 0.19921875, + "loss_num": 0.006317138671875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 337314328, + "step": 6021 + }, + { + "epoch": 13.412026726057906, + "grad_norm": 19.43562126159668, + "learning_rate": 1e-06, + "loss": 0.5453, + "num_input_tokens_seen": 337371160, + "step": 6022 + }, + { + "epoch": 13.412026726057906, + "loss": 0.8006750345230103, + "loss_ce": 0.00013796251732856035, + "loss_iou": 0.3125, + "loss_num": 0.03515625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 337371160, + "step": 6022 + }, + { + "epoch": 13.414253897550111, + "grad_norm": 20.63788414001465, + "learning_rate": 1e-06, + "loss": 0.3787, + "num_input_tokens_seen": 337427584, + "step": 6023 + }, + { + "epoch": 13.414253897550111, + "loss": 0.45666825771331787, + "loss_ce": 0.00012530997628346086, + "loss_iou": 0.2001953125, + "loss_num": 0.0113525390625, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 337427584, + "step": 6023 + }, + { + "epoch": 13.416481069042316, + "grad_norm": 14.40611457824707, + "learning_rate": 1e-06, + "loss": 0.4986, + "num_input_tokens_seen": 337486896, + "step": 6024 + }, + { + "epoch": 13.416481069042316, + "loss": 0.4070853590965271, + "loss_ce": 0.00010296083928551525, + "loss_iou": 0.1875, + "loss_num": 0.00653076171875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 337486896, + "step": 6024 + }, + { + "epoch": 13.41870824053452, + "grad_norm": 17.12459945678711, + "learning_rate": 1e-06, + "loss": 0.3375, + "num_input_tokens_seen": 337544700, + "step": 6025 + }, + { + "epoch": 13.41870824053452, + "loss": 0.28037208318710327, + "loss_ce": 9.865299216471612e-05, + "loss_iou": 0.12109375, + "loss_num": 0.007476806640625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 337544700, + "step": 6025 + }, + { + "epoch": 13.420935412026726, + "grad_norm": 17.389301300048828, + "learning_rate": 1e-06, + "loss": 0.5866, + "num_input_tokens_seen": 337602792, + "step": 6026 + }, + { + "epoch": 13.420935412026726, + "loss": 0.34812402725219727, + "loss_ce": 0.00010155315976589918, + "loss_iou": 0.15234375, + "loss_num": 0.00872802734375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 337602792, + "step": 6026 + }, + { + "epoch": 13.42316258351893, + "grad_norm": 14.135052680969238, + "learning_rate": 1e-06, + "loss": 0.6344, + "num_input_tokens_seen": 337657324, + "step": 6027 + }, + { + "epoch": 13.42316258351893, + "loss": 0.9954932332038879, + "loss_ce": 0.00013188435696065426, + "loss_iou": 0.419921875, + "loss_num": 0.031494140625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 337657324, + "step": 6027 + }, + { + "epoch": 13.425389755011135, + "grad_norm": 17.855140686035156, + "learning_rate": 1e-06, + "loss": 0.4056, + "num_input_tokens_seen": 337713028, + "step": 6028 + }, + { + "epoch": 13.425389755011135, + "loss": 0.30981168150901794, + "loss_ce": 0.00011929747415706515, + "loss_iou": 0.1318359375, + "loss_num": 0.00921630859375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 337713028, + "step": 6028 + }, + { + "epoch": 13.42761692650334, + "grad_norm": 19.777183532714844, + "learning_rate": 1e-06, + "loss": 0.4961, + "num_input_tokens_seen": 337767148, + "step": 6029 + }, + { + "epoch": 13.42761692650334, + "loss": 0.45299914479255676, + "loss_ce": 0.00011829864524770528, + "loss_iou": 0.19921875, + "loss_num": 0.01104736328125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 337767148, + "step": 6029 + }, + { + "epoch": 13.429844097995545, + "grad_norm": 28.941667556762695, + "learning_rate": 1e-06, + "loss": 0.3823, + "num_input_tokens_seen": 337824252, + "step": 6030 + }, + { + "epoch": 13.429844097995545, + "loss": 0.4544570744037628, + "loss_ce": 0.00011135141539853066, + "loss_iou": 0.2001953125, + "loss_num": 0.0108642578125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 337824252, + "step": 6030 + }, + { + "epoch": 13.43207126948775, + "grad_norm": 20.595233917236328, + "learning_rate": 1e-06, + "loss": 0.7976, + "num_input_tokens_seen": 337879876, + "step": 6031 + }, + { + "epoch": 13.43207126948775, + "loss": 0.8424028754234314, + "loss_ce": 0.0001787315122783184, + "loss_iou": 0.34765625, + "loss_num": 0.029296875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 337879876, + "step": 6031 + }, + { + "epoch": 13.434298440979955, + "grad_norm": 37.23560333251953, + "learning_rate": 1e-06, + "loss": 0.5585, + "num_input_tokens_seen": 337936932, + "step": 6032 + }, + { + "epoch": 13.434298440979955, + "loss": 0.4548329710960388, + "loss_ce": 0.00012105993664590642, + "loss_iou": 0.181640625, + "loss_num": 0.018310546875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 337936932, + "step": 6032 + }, + { + "epoch": 13.43652561247216, + "grad_norm": 20.569580078125, + "learning_rate": 1e-06, + "loss": 0.4455, + "num_input_tokens_seen": 337993396, + "step": 6033 + }, + { + "epoch": 13.43652561247216, + "loss": 0.6414711475372314, + "loss_ce": 0.00011370141146471724, + "loss_iou": 0.2294921875, + "loss_num": 0.036376953125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 337993396, + "step": 6033 + }, + { + "epoch": 13.438752783964365, + "grad_norm": 21.35686492919922, + "learning_rate": 1e-06, + "loss": 0.5028, + "num_input_tokens_seen": 338048168, + "step": 6034 + }, + { + "epoch": 13.438752783964365, + "loss": 0.27768754959106445, + "loss_ce": 0.0001606793375685811, + "loss_iou": 0.1279296875, + "loss_num": 0.004180908203125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 338048168, + "step": 6034 + }, + { + "epoch": 13.44097995545657, + "grad_norm": 14.87231159210205, + "learning_rate": 1e-06, + "loss": 0.6545, + "num_input_tokens_seen": 338104776, + "step": 6035 + }, + { + "epoch": 13.44097995545657, + "loss": 0.5546998381614685, + "loss_ce": 0.00013442571798805147, + "loss_iou": 0.2294921875, + "loss_num": 0.0191650390625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 338104776, + "step": 6035 + }, + { + "epoch": 13.443207126948774, + "grad_norm": 17.35746955871582, + "learning_rate": 1e-06, + "loss": 0.3714, + "num_input_tokens_seen": 338160688, + "step": 6036 + }, + { + "epoch": 13.443207126948774, + "loss": 0.2007206380367279, + "loss_ce": 8.280624751932919e-05, + "loss_iou": 0.080078125, + "loss_num": 0.00799560546875, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 338160688, + "step": 6036 + }, + { + "epoch": 13.44543429844098, + "grad_norm": 32.42356872558594, + "learning_rate": 1e-06, + "loss": 0.5975, + "num_input_tokens_seen": 338216708, + "step": 6037 + }, + { + "epoch": 13.44543429844098, + "loss": 0.4356258511543274, + "loss_ce": 0.00020103095448575914, + "loss_iou": 0.171875, + "loss_num": 0.0185546875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 338216708, + "step": 6037 + }, + { + "epoch": 13.447661469933184, + "grad_norm": 16.385221481323242, + "learning_rate": 1e-06, + "loss": 0.3499, + "num_input_tokens_seen": 338273736, + "step": 6038 + }, + { + "epoch": 13.447661469933184, + "loss": 0.3894416391849518, + "loss_ce": 0.00015943063772283494, + "loss_iou": 0.16015625, + "loss_num": 0.01361083984375, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 338273736, + "step": 6038 + }, + { + "epoch": 13.449888641425389, + "grad_norm": 16.43487548828125, + "learning_rate": 1e-06, + "loss": 0.4453, + "num_input_tokens_seen": 338329848, + "step": 6039 + }, + { + "epoch": 13.449888641425389, + "loss": 0.4355180859565735, + "loss_ce": 9.329354361398146e-05, + "loss_iou": 0.18359375, + "loss_num": 0.01373291015625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 338329848, + "step": 6039 + }, + { + "epoch": 13.452115812917596, + "grad_norm": 10.888897895812988, + "learning_rate": 1e-06, + "loss": 0.2805, + "num_input_tokens_seen": 338384060, + "step": 6040 + }, + { + "epoch": 13.452115812917596, + "loss": 0.24022263288497925, + "loss_ce": 0.0001103131435229443, + "loss_iou": 0.09912109375, + "loss_num": 0.0084228515625, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 338384060, + "step": 6040 + }, + { + "epoch": 13.4543429844098, + "grad_norm": 16.131216049194336, + "learning_rate": 1e-06, + "loss": 0.3434, + "num_input_tokens_seen": 338439568, + "step": 6041 + }, + { + "epoch": 13.4543429844098, + "loss": 0.44243180751800537, + "loss_ce": 0.00011001640814356506, + "loss_iou": 0.1884765625, + "loss_num": 0.012939453125, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 338439568, + "step": 6041 + }, + { + "epoch": 13.456570155902005, + "grad_norm": 24.778518676757812, + "learning_rate": 1e-06, + "loss": 0.3777, + "num_input_tokens_seen": 338495172, + "step": 6042 + }, + { + "epoch": 13.456570155902005, + "loss": 0.5041686296463013, + "loss_ce": 0.00035395825398154557, + "loss_iou": 0.2197265625, + "loss_num": 0.01287841796875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 338495172, + "step": 6042 + }, + { + "epoch": 13.45879732739421, + "grad_norm": 25.524629592895508, + "learning_rate": 1e-06, + "loss": 0.5037, + "num_input_tokens_seen": 338550088, + "step": 6043 + }, + { + "epoch": 13.45879732739421, + "loss": 0.5563878417015076, + "loss_ce": 0.00011343901860527694, + "loss_iou": 0.248046875, + "loss_num": 0.01202392578125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 338550088, + "step": 6043 + }, + { + "epoch": 13.461024498886415, + "grad_norm": 19.04424285888672, + "learning_rate": 1e-06, + "loss": 0.583, + "num_input_tokens_seen": 338605712, + "step": 6044 + }, + { + "epoch": 13.461024498886415, + "loss": 0.6645296812057495, + "loss_ce": 0.00022306920436676592, + "loss_iou": 0.2578125, + "loss_num": 0.0301513671875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 338605712, + "step": 6044 + }, + { + "epoch": 13.46325167037862, + "grad_norm": 14.406312942504883, + "learning_rate": 1e-06, + "loss": 0.379, + "num_input_tokens_seen": 338662752, + "step": 6045 + }, + { + "epoch": 13.46325167037862, + "loss": 0.3758947253227234, + "loss_ce": 0.00010126342385774478, + "loss_iou": 0.1533203125, + "loss_num": 0.013671875, + "loss_xval": 0.375, + "num_input_tokens_seen": 338662752, + "step": 6045 + }, + { + "epoch": 13.465478841870825, + "grad_norm": 40.62894821166992, + "learning_rate": 1e-06, + "loss": 0.5142, + "num_input_tokens_seen": 338716672, + "step": 6046 + }, + { + "epoch": 13.465478841870825, + "loss": 0.471059650182724, + "loss_ce": 0.00011239292507525533, + "loss_iou": 0.2099609375, + "loss_num": 0.01025390625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 338716672, + "step": 6046 + }, + { + "epoch": 13.46770601336303, + "grad_norm": 22.999338150024414, + "learning_rate": 1e-06, + "loss": 0.6799, + "num_input_tokens_seen": 338773908, + "step": 6047 + }, + { + "epoch": 13.46770601336303, + "loss": 0.46885955333709717, + "loss_ce": 0.00010957221093121916, + "loss_iou": 0.2119140625, + "loss_num": 0.00885009765625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 338773908, + "step": 6047 + }, + { + "epoch": 13.469933184855234, + "grad_norm": 17.78192138671875, + "learning_rate": 1e-06, + "loss": 0.4976, + "num_input_tokens_seen": 338831676, + "step": 6048 + }, + { + "epoch": 13.469933184855234, + "loss": 0.47100013494491577, + "loss_ce": 0.00017495593056082726, + "loss_iou": 0.1953125, + "loss_num": 0.0159912109375, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 338831676, + "step": 6048 + }, + { + "epoch": 13.47216035634744, + "grad_norm": 31.006885528564453, + "learning_rate": 1e-06, + "loss": 0.5294, + "num_input_tokens_seen": 338886208, + "step": 6049 + }, + { + "epoch": 13.47216035634744, + "loss": 0.49864301085472107, + "loss_ce": 0.00010783917969092727, + "loss_iou": 0.2275390625, + "loss_num": 0.00872802734375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 338886208, + "step": 6049 + }, + { + "epoch": 13.474387527839644, + "grad_norm": 17.400957107543945, + "learning_rate": 1e-06, + "loss": 0.5978, + "num_input_tokens_seen": 338941916, + "step": 6050 + }, + { + "epoch": 13.474387527839644, + "loss": 0.5912978649139404, + "loss_ce": 0.00017235292762052268, + "loss_iou": 0.263671875, + "loss_num": 0.01263427734375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 338941916, + "step": 6050 + }, + { + "epoch": 13.476614699331849, + "grad_norm": 17.695465087890625, + "learning_rate": 1e-06, + "loss": 0.4803, + "num_input_tokens_seen": 338998860, + "step": 6051 + }, + { + "epoch": 13.476614699331849, + "loss": 0.4964655339717865, + "loss_ce": 0.0001276445691473782, + "loss_iou": 0.21875, + "loss_num": 0.01190185546875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 338998860, + "step": 6051 + }, + { + "epoch": 13.478841870824054, + "grad_norm": 21.691905975341797, + "learning_rate": 1e-06, + "loss": 0.6199, + "num_input_tokens_seen": 339051660, + "step": 6052 + }, + { + "epoch": 13.478841870824054, + "loss": 0.5180391073226929, + "loss_ce": 9.476025297772139e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.015869140625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 339051660, + "step": 6052 + }, + { + "epoch": 13.481069042316259, + "grad_norm": 17.7266845703125, + "learning_rate": 1e-06, + "loss": 0.5722, + "num_input_tokens_seen": 339107488, + "step": 6053 + }, + { + "epoch": 13.481069042316259, + "loss": 0.4541052579879761, + "loss_ce": 0.00012577215966302902, + "loss_iou": 0.205078125, + "loss_num": 0.0087890625, + "loss_xval": 0.453125, + "num_input_tokens_seen": 339107488, + "step": 6053 + }, + { + "epoch": 13.483296213808464, + "grad_norm": 15.927001953125, + "learning_rate": 1e-06, + "loss": 0.4988, + "num_input_tokens_seen": 339162448, + "step": 6054 + }, + { + "epoch": 13.483296213808464, + "loss": 0.6007080078125, + "loss_ce": 0.00012205771054141223, + "loss_iou": 0.26171875, + "loss_num": 0.0157470703125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 339162448, + "step": 6054 + }, + { + "epoch": 13.485523385300668, + "grad_norm": 23.987932205200195, + "learning_rate": 1e-06, + "loss": 0.6223, + "num_input_tokens_seen": 339219908, + "step": 6055 + }, + { + "epoch": 13.485523385300668, + "loss": 0.4472610354423523, + "loss_ce": 0.00011747775715775788, + "loss_iou": 0.1953125, + "loss_num": 0.011474609375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 339219908, + "step": 6055 + }, + { + "epoch": 13.487750556792873, + "grad_norm": 26.649721145629883, + "learning_rate": 1e-06, + "loss": 0.3933, + "num_input_tokens_seen": 339274728, + "step": 6056 + }, + { + "epoch": 13.487750556792873, + "loss": 0.37924766540527344, + "loss_ce": 9.728018630994484e-05, + "loss_iou": 0.1640625, + "loss_num": 0.0101318359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 339274728, + "step": 6056 + }, + { + "epoch": 13.489977728285078, + "grad_norm": 11.94345760345459, + "learning_rate": 1e-06, + "loss": 0.3739, + "num_input_tokens_seen": 339332392, + "step": 6057 + }, + { + "epoch": 13.489977728285078, + "loss": 0.36925819516181946, + "loss_ce": 0.00011755910963984206, + "loss_iou": 0.1728515625, + "loss_num": 0.004791259765625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 339332392, + "step": 6057 + }, + { + "epoch": 13.492204899777283, + "grad_norm": 19.75101089477539, + "learning_rate": 1e-06, + "loss": 0.5577, + "num_input_tokens_seen": 339389392, + "step": 6058 + }, + { + "epoch": 13.492204899777283, + "loss": 0.7065274715423584, + "loss_ce": 0.00010661823762347922, + "loss_iou": 0.279296875, + "loss_num": 0.0294189453125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 339389392, + "step": 6058 + }, + { + "epoch": 13.494432071269488, + "grad_norm": 15.584753036499023, + "learning_rate": 1e-06, + "loss": 0.3834, + "num_input_tokens_seen": 339446776, + "step": 6059 + }, + { + "epoch": 13.494432071269488, + "loss": 0.4769487977027893, + "loss_ce": 0.00011162673763465136, + "loss_iou": 0.1884765625, + "loss_num": 0.02001953125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 339446776, + "step": 6059 + }, + { + "epoch": 13.496659242761693, + "grad_norm": 17.79234504699707, + "learning_rate": 1e-06, + "loss": 0.504, + "num_input_tokens_seen": 339503364, + "step": 6060 + }, + { + "epoch": 13.496659242761693, + "loss": 0.5225712060928345, + "loss_ce": 0.00011029178131138906, + "loss_iou": 0.2001953125, + "loss_num": 0.0242919921875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 339503364, + "step": 6060 + }, + { + "epoch": 13.498886414253898, + "grad_norm": 16.18769073486328, + "learning_rate": 1e-06, + "loss": 0.4275, + "num_input_tokens_seen": 339561052, + "step": 6061 + }, + { + "epoch": 13.498886414253898, + "loss": 0.40653717517852783, + "loss_ce": 0.00013457259046845138, + "loss_iou": 0.166015625, + "loss_num": 0.0150146484375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 339561052, + "step": 6061 + }, + { + "epoch": 13.501113585746102, + "grad_norm": 47.07794952392578, + "learning_rate": 1e-06, + "loss": 0.4221, + "num_input_tokens_seen": 339615028, + "step": 6062 + }, + { + "epoch": 13.501113585746102, + "loss": 0.5137805342674255, + "loss_ce": 0.00010863743227673694, + "loss_iou": 0.220703125, + "loss_num": 0.0145263671875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 339615028, + "step": 6062 + }, + { + "epoch": 13.503340757238307, + "grad_norm": 23.532276153564453, + "learning_rate": 1e-06, + "loss": 0.5707, + "num_input_tokens_seen": 339671324, + "step": 6063 + }, + { + "epoch": 13.503340757238307, + "loss": 0.5221333503723145, + "loss_ce": 0.00016070085985120386, + "loss_iou": 0.2314453125, + "loss_num": 0.01171875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 339671324, + "step": 6063 + }, + { + "epoch": 13.505567928730512, + "grad_norm": 15.250123977661133, + "learning_rate": 1e-06, + "loss": 0.3295, + "num_input_tokens_seen": 339726244, + "step": 6064 + }, + { + "epoch": 13.505567928730512, + "loss": 0.3152201473712921, + "loss_ce": 9.563060302753001e-05, + "loss_iou": 0.12451171875, + "loss_num": 0.01318359375, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 339726244, + "step": 6064 + }, + { + "epoch": 13.507795100222717, + "grad_norm": 30.90220832824707, + "learning_rate": 1e-06, + "loss": 0.6724, + "num_input_tokens_seen": 339782572, + "step": 6065 + }, + { + "epoch": 13.507795100222717, + "loss": 0.5396183729171753, + "loss_ce": 0.00012867330224253237, + "loss_iou": 0.234375, + "loss_num": 0.0142822265625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 339782572, + "step": 6065 + }, + { + "epoch": 13.510022271714922, + "grad_norm": 23.081607818603516, + "learning_rate": 1e-06, + "loss": 0.4508, + "num_input_tokens_seen": 339836684, + "step": 6066 + }, + { + "epoch": 13.510022271714922, + "loss": 0.37926536798477173, + "loss_ce": 0.00011500762775540352, + "loss_iou": 0.1611328125, + "loss_num": 0.01123046875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 339836684, + "step": 6066 + }, + { + "epoch": 13.512249443207127, + "grad_norm": 16.77637481689453, + "learning_rate": 1e-06, + "loss": 0.4764, + "num_input_tokens_seen": 339895292, + "step": 6067 + }, + { + "epoch": 13.512249443207127, + "loss": 0.5568765997886658, + "loss_ce": 0.00011388568964321166, + "loss_iou": 0.2451171875, + "loss_num": 0.01318359375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 339895292, + "step": 6067 + }, + { + "epoch": 13.514476614699332, + "grad_norm": 14.442283630371094, + "learning_rate": 1e-06, + "loss": 0.3909, + "num_input_tokens_seen": 339952572, + "step": 6068 + }, + { + "epoch": 13.514476614699332, + "loss": 0.33204570412635803, + "loss_ce": 0.000136529139126651, + "loss_iou": 0.13671875, + "loss_num": 0.011474609375, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 339952572, + "step": 6068 + }, + { + "epoch": 13.516703786191536, + "grad_norm": 28.08944320678711, + "learning_rate": 1e-06, + "loss": 0.4879, + "num_input_tokens_seen": 340010084, + "step": 6069 + }, + { + "epoch": 13.516703786191536, + "loss": 0.3379353880882263, + "loss_ce": 0.00010579422814771533, + "loss_iou": 0.142578125, + "loss_num": 0.01055908203125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 340010084, + "step": 6069 + }, + { + "epoch": 13.518930957683741, + "grad_norm": 17.983135223388672, + "learning_rate": 1e-06, + "loss": 0.4794, + "num_input_tokens_seen": 340068108, + "step": 6070 + }, + { + "epoch": 13.518930957683741, + "loss": 0.3784530460834503, + "loss_ce": 9.612538269720972e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.01373291015625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 340068108, + "step": 6070 + }, + { + "epoch": 13.521158129175946, + "grad_norm": 31.96721076965332, + "learning_rate": 1e-06, + "loss": 0.4647, + "num_input_tokens_seen": 340125068, + "step": 6071 + }, + { + "epoch": 13.521158129175946, + "loss": 0.4451667070388794, + "loss_ce": 9.836716344580054e-05, + "loss_iou": 0.1875, + "loss_num": 0.0140380859375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 340125068, + "step": 6071 + }, + { + "epoch": 13.523385300668151, + "grad_norm": 13.225302696228027, + "learning_rate": 1e-06, + "loss": 0.4289, + "num_input_tokens_seen": 340183096, + "step": 6072 + }, + { + "epoch": 13.523385300668151, + "loss": 0.4312567710876465, + "loss_ce": 0.00010440793994348496, + "loss_iou": 0.19140625, + "loss_num": 0.00970458984375, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 340183096, + "step": 6072 + }, + { + "epoch": 13.525612472160356, + "grad_norm": 11.266600608825684, + "learning_rate": 1e-06, + "loss": 0.2725, + "num_input_tokens_seen": 340239536, + "step": 6073 + }, + { + "epoch": 13.525612472160356, + "loss": 0.353848934173584, + "loss_ce": 0.00011968802573392168, + "loss_iou": 0.1572265625, + "loss_num": 0.00787353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 340239536, + "step": 6073 + }, + { + "epoch": 13.52783964365256, + "grad_norm": 17.064437866210938, + "learning_rate": 1e-06, + "loss": 0.4341, + "num_input_tokens_seen": 340295956, + "step": 6074 + }, + { + "epoch": 13.52783964365256, + "loss": 0.6365495920181274, + "loss_ce": 0.00013600349484477192, + "loss_iou": 0.25390625, + "loss_num": 0.0255126953125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 340295956, + "step": 6074 + }, + { + "epoch": 13.530066815144766, + "grad_norm": 22.03349494934082, + "learning_rate": 1e-06, + "loss": 0.4328, + "num_input_tokens_seen": 340353864, + "step": 6075 + }, + { + "epoch": 13.530066815144766, + "loss": 0.44873154163360596, + "loss_ce": 0.00012314703781157732, + "loss_iou": 0.2119140625, + "loss_num": 0.005126953125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 340353864, + "step": 6075 + }, + { + "epoch": 13.53229398663697, + "grad_norm": 19.127866744995117, + "learning_rate": 1e-06, + "loss": 0.6748, + "num_input_tokens_seen": 340407388, + "step": 6076 + }, + { + "epoch": 13.53229398663697, + "loss": 0.7257640361785889, + "loss_ce": 0.0001475829049013555, + "loss_iou": 0.26953125, + "loss_num": 0.03759765625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 340407388, + "step": 6076 + }, + { + "epoch": 13.534521158129175, + "grad_norm": 21.562355041503906, + "learning_rate": 1e-06, + "loss": 0.5317, + "num_input_tokens_seen": 340463944, + "step": 6077 + }, + { + "epoch": 13.534521158129175, + "loss": 0.5379794836044312, + "loss_ce": 0.00013768361532129347, + "loss_iou": 0.2255859375, + "loss_num": 0.017333984375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 340463944, + "step": 6077 + }, + { + "epoch": 13.53674832962138, + "grad_norm": 18.547332763671875, + "learning_rate": 1e-06, + "loss": 0.4863, + "num_input_tokens_seen": 340516692, + "step": 6078 + }, + { + "epoch": 13.53674832962138, + "loss": 0.3476347327232361, + "loss_ce": 0.0001005365265882574, + "loss_iou": 0.16015625, + "loss_num": 0.005523681640625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 340516692, + "step": 6078 + }, + { + "epoch": 13.538975501113585, + "grad_norm": 29.104293823242188, + "learning_rate": 1e-06, + "loss": 0.5956, + "num_input_tokens_seen": 340571300, + "step": 6079 + }, + { + "epoch": 13.538975501113585, + "loss": 0.6296223402023315, + "loss_ce": 0.00010570493031991646, + "loss_iou": 0.2578125, + "loss_num": 0.02294921875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 340571300, + "step": 6079 + }, + { + "epoch": 13.54120267260579, + "grad_norm": 21.421527862548828, + "learning_rate": 1e-06, + "loss": 0.4915, + "num_input_tokens_seen": 340628576, + "step": 6080 + }, + { + "epoch": 13.54120267260579, + "loss": 0.39269521832466125, + "loss_ce": 0.00011710192484315485, + "loss_iou": 0.1826171875, + "loss_num": 0.00543212890625, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 340628576, + "step": 6080 + }, + { + "epoch": 13.543429844097995, + "grad_norm": 20.787569046020508, + "learning_rate": 1e-06, + "loss": 0.5235, + "num_input_tokens_seen": 340685408, + "step": 6081 + }, + { + "epoch": 13.543429844097995, + "loss": 0.7552393078804016, + "loss_ce": 0.0008448001462966204, + "loss_iou": 0.30859375, + "loss_num": 0.027099609375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 340685408, + "step": 6081 + }, + { + "epoch": 13.5456570155902, + "grad_norm": 18.21969985961914, + "learning_rate": 1e-06, + "loss": 0.5217, + "num_input_tokens_seen": 340741140, + "step": 6082 + }, + { + "epoch": 13.5456570155902, + "loss": 0.579943060874939, + "loss_ce": 0.00010910046694334596, + "loss_iou": 0.25390625, + "loss_num": 0.01416015625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 340741140, + "step": 6082 + }, + { + "epoch": 13.547884187082406, + "grad_norm": 22.53654670715332, + "learning_rate": 1e-06, + "loss": 0.4789, + "num_input_tokens_seen": 340796904, + "step": 6083 + }, + { + "epoch": 13.547884187082406, + "loss": 0.3791908621788025, + "loss_ce": 0.00010148352885153145, + "loss_iou": 0.171875, + "loss_num": 0.006927490234375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 340796904, + "step": 6083 + }, + { + "epoch": 13.550111358574611, + "grad_norm": 25.32193946838379, + "learning_rate": 1e-06, + "loss": 0.3296, + "num_input_tokens_seen": 340852732, + "step": 6084 + }, + { + "epoch": 13.550111358574611, + "loss": 0.39512139558792114, + "loss_ce": 0.00010186532745137811, + "loss_iou": 0.1689453125, + "loss_num": 0.0115966796875, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 340852732, + "step": 6084 + }, + { + "epoch": 13.552338530066816, + "grad_norm": 16.27466583251953, + "learning_rate": 1e-06, + "loss": 0.3523, + "num_input_tokens_seen": 340909556, + "step": 6085 + }, + { + "epoch": 13.552338530066816, + "loss": 0.38939177989959717, + "loss_ce": 0.00010956230107694864, + "loss_iou": 0.1806640625, + "loss_num": 0.005706787109375, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 340909556, + "step": 6085 + }, + { + "epoch": 13.55456570155902, + "grad_norm": 82.14557647705078, + "learning_rate": 1e-06, + "loss": 0.5106, + "num_input_tokens_seen": 340967244, + "step": 6086 + }, + { + "epoch": 13.55456570155902, + "loss": 0.40943825244903564, + "loss_ce": 0.00013651512563228607, + "loss_iou": 0.1728515625, + "loss_num": 0.0126953125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 340967244, + "step": 6086 + }, + { + "epoch": 13.556792873051226, + "grad_norm": 24.278966903686523, + "learning_rate": 1e-06, + "loss": 0.3729, + "num_input_tokens_seen": 341022336, + "step": 6087 + }, + { + "epoch": 13.556792873051226, + "loss": 0.3812282681465149, + "loss_ce": 0.00012474997492972761, + "loss_iou": 0.1689453125, + "loss_num": 0.0086669921875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 341022336, + "step": 6087 + }, + { + "epoch": 13.55902004454343, + "grad_norm": 17.74454689025879, + "learning_rate": 1e-06, + "loss": 0.5962, + "num_input_tokens_seen": 341078164, + "step": 6088 + }, + { + "epoch": 13.55902004454343, + "loss": 0.34824979305267334, + "loss_ce": 0.00010525665857130662, + "loss_iou": 0.150390625, + "loss_num": 0.0096435546875, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 341078164, + "step": 6088 + }, + { + "epoch": 13.561247216035635, + "grad_norm": 15.242683410644531, + "learning_rate": 1e-06, + "loss": 0.3475, + "num_input_tokens_seen": 341135916, + "step": 6089 + }, + { + "epoch": 13.561247216035635, + "loss": 0.34324419498443604, + "loss_ce": 0.00010453617142047733, + "loss_iou": 0.158203125, + "loss_num": 0.005462646484375, + "loss_xval": 0.34375, + "num_input_tokens_seen": 341135916, + "step": 6089 + }, + { + "epoch": 13.56347438752784, + "grad_norm": 19.24591827392578, + "learning_rate": 1e-06, + "loss": 0.5041, + "num_input_tokens_seen": 341190420, + "step": 6090 + }, + { + "epoch": 13.56347438752784, + "loss": 0.36112260818481445, + "loss_ce": 0.00016069506818894297, + "loss_iou": 0.1708984375, + "loss_num": 0.003875732421875, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 341190420, + "step": 6090 + }, + { + "epoch": 13.565701559020045, + "grad_norm": 20.001699447631836, + "learning_rate": 1e-06, + "loss": 0.7154, + "num_input_tokens_seen": 341245680, + "step": 6091 + }, + { + "epoch": 13.565701559020045, + "loss": 0.670586109161377, + "loss_ce": 0.00017597324040252715, + "loss_iou": 0.2890625, + "loss_num": 0.0184326171875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 341245680, + "step": 6091 + }, + { + "epoch": 13.56792873051225, + "grad_norm": 21.868173599243164, + "learning_rate": 1e-06, + "loss": 0.4888, + "num_input_tokens_seen": 341299816, + "step": 6092 + }, + { + "epoch": 13.56792873051225, + "loss": 0.49984219670295715, + "loss_ce": 0.00011683723278110847, + "loss_iou": 0.21484375, + "loss_num": 0.01397705078125, + "loss_xval": 0.5, + "num_input_tokens_seen": 341299816, + "step": 6092 + }, + { + "epoch": 13.570155902004455, + "grad_norm": 26.928741455078125, + "learning_rate": 1e-06, + "loss": 0.4256, + "num_input_tokens_seen": 341351004, + "step": 6093 + }, + { + "epoch": 13.570155902004455, + "loss": 0.5890183448791504, + "loss_ce": 0.00015114745474420488, + "loss_iou": 0.2392578125, + "loss_num": 0.0220947265625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 341351004, + "step": 6093 + }, + { + "epoch": 13.57238307349666, + "grad_norm": 15.756858825683594, + "learning_rate": 1e-06, + "loss": 0.4628, + "num_input_tokens_seen": 341406828, + "step": 6094 + }, + { + "epoch": 13.57238307349666, + "loss": 0.608989953994751, + "loss_ce": 0.00010319190187146887, + "loss_iou": 0.259765625, + "loss_num": 0.01806640625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 341406828, + "step": 6094 + }, + { + "epoch": 13.574610244988865, + "grad_norm": 16.31437110900879, + "learning_rate": 1e-06, + "loss": 0.5845, + "num_input_tokens_seen": 341463276, + "step": 6095 + }, + { + "epoch": 13.574610244988865, + "loss": 0.5452686548233032, + "loss_ce": 0.0001026621539494954, + "loss_iou": 0.2412109375, + "loss_num": 0.01263427734375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 341463276, + "step": 6095 + }, + { + "epoch": 13.57683741648107, + "grad_norm": 22.215185165405273, + "learning_rate": 1e-06, + "loss": 0.4455, + "num_input_tokens_seen": 341520420, + "step": 6096 + }, + { + "epoch": 13.57683741648107, + "loss": 0.4676268398761749, + "loss_ce": 9.754978236742318e-05, + "loss_iou": 0.208984375, + "loss_num": 0.00994873046875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 341520420, + "step": 6096 + }, + { + "epoch": 13.579064587973274, + "grad_norm": 15.833555221557617, + "learning_rate": 1e-06, + "loss": 0.5093, + "num_input_tokens_seen": 341577448, + "step": 6097 + }, + { + "epoch": 13.579064587973274, + "loss": 0.5223226547241211, + "loss_ce": 0.00010583880066405982, + "loss_iou": 0.2314453125, + "loss_num": 0.0118408203125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 341577448, + "step": 6097 + }, + { + "epoch": 13.58129175946548, + "grad_norm": 22.064420700073242, + "learning_rate": 1e-06, + "loss": 0.4894, + "num_input_tokens_seen": 341634480, + "step": 6098 + }, + { + "epoch": 13.58129175946548, + "loss": 0.40562325716018677, + "loss_ce": 0.00010569434380158782, + "loss_iou": 0.1796875, + "loss_num": 0.00909423828125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 341634480, + "step": 6098 + }, + { + "epoch": 13.583518930957684, + "grad_norm": 16.516891479492188, + "learning_rate": 1e-06, + "loss": 0.4845, + "num_input_tokens_seen": 341690908, + "step": 6099 + }, + { + "epoch": 13.583518930957684, + "loss": 0.36192750930786133, + "loss_ce": 0.00011110490595456213, + "loss_iou": 0.1591796875, + "loss_num": 0.00885009765625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 341690908, + "step": 6099 + }, + { + "epoch": 13.585746102449889, + "grad_norm": 30.61893081665039, + "learning_rate": 1e-06, + "loss": 0.4235, + "num_input_tokens_seen": 341744072, + "step": 6100 + }, + { + "epoch": 13.585746102449889, + "loss": 0.38622716069221497, + "loss_ce": 0.0001187745074275881, + "loss_iou": 0.1728515625, + "loss_num": 0.008056640625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 341744072, + "step": 6100 + }, + { + "epoch": 13.587973273942094, + "grad_norm": 14.904489517211914, + "learning_rate": 1e-06, + "loss": 0.3583, + "num_input_tokens_seen": 341802688, + "step": 6101 + }, + { + "epoch": 13.587973273942094, + "loss": 0.3524148464202881, + "loss_ce": 0.00011993409134447575, + "loss_iou": 0.1640625, + "loss_num": 0.004791259765625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 341802688, + "step": 6101 + }, + { + "epoch": 13.590200445434299, + "grad_norm": 24.03605842590332, + "learning_rate": 1e-06, + "loss": 0.4686, + "num_input_tokens_seen": 341857712, + "step": 6102 + }, + { + "epoch": 13.590200445434299, + "loss": 0.44786059856414795, + "loss_ce": 0.00022875834838487208, + "loss_iou": 0.1904296875, + "loss_num": 0.0133056640625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 341857712, + "step": 6102 + }, + { + "epoch": 13.592427616926503, + "grad_norm": 41.318817138671875, + "learning_rate": 1e-06, + "loss": 0.4597, + "num_input_tokens_seen": 341911152, + "step": 6103 + }, + { + "epoch": 13.592427616926503, + "loss": 0.3626706302165985, + "loss_ce": 0.0001218135585077107, + "loss_iou": 0.1650390625, + "loss_num": 0.006591796875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 341911152, + "step": 6103 + }, + { + "epoch": 13.594654788418708, + "grad_norm": 27.05071449279785, + "learning_rate": 1e-06, + "loss": 0.4711, + "num_input_tokens_seen": 341965840, + "step": 6104 + }, + { + "epoch": 13.594654788418708, + "loss": 0.4232081174850464, + "loss_ce": 0.00011239905143156648, + "loss_iou": 0.181640625, + "loss_num": 0.0118408203125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 341965840, + "step": 6104 + }, + { + "epoch": 13.596881959910913, + "grad_norm": 16.308061599731445, + "learning_rate": 1e-06, + "loss": 0.4512, + "num_input_tokens_seen": 342024032, + "step": 6105 + }, + { + "epoch": 13.596881959910913, + "loss": 0.41770923137664795, + "loss_ce": 0.0001066841505235061, + "loss_iou": 0.1845703125, + "loss_num": 0.00958251953125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 342024032, + "step": 6105 + }, + { + "epoch": 13.599109131403118, + "grad_norm": 15.531981468200684, + "learning_rate": 1e-06, + "loss": 0.3986, + "num_input_tokens_seen": 342078468, + "step": 6106 + }, + { + "epoch": 13.599109131403118, + "loss": 0.4702088534832001, + "loss_ce": 0.00011607841588556767, + "loss_iou": 0.20703125, + "loss_num": 0.01104736328125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 342078468, + "step": 6106 + }, + { + "epoch": 13.601336302895323, + "grad_norm": 21.346141815185547, + "learning_rate": 1e-06, + "loss": 0.4758, + "num_input_tokens_seen": 342134352, + "step": 6107 + }, + { + "epoch": 13.601336302895323, + "loss": 0.43420565128326416, + "loss_ce": 0.00012361952394712716, + "loss_iou": 0.1923828125, + "loss_num": 0.009765625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 342134352, + "step": 6107 + }, + { + "epoch": 13.603563474387528, + "grad_norm": 18.531774520874023, + "learning_rate": 1e-06, + "loss": 0.4012, + "num_input_tokens_seen": 342192804, + "step": 6108 + }, + { + "epoch": 13.603563474387528, + "loss": 0.3070923388004303, + "loss_ce": 8.548818004783243e-05, + "loss_iou": 0.13671875, + "loss_num": 0.00689697265625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 342192804, + "step": 6108 + }, + { + "epoch": 13.605790645879733, + "grad_norm": 26.71335792541504, + "learning_rate": 1e-06, + "loss": 0.5715, + "num_input_tokens_seen": 342249992, + "step": 6109 + }, + { + "epoch": 13.605790645879733, + "loss": 0.5036748051643372, + "loss_ce": 0.00013476383173838258, + "loss_iou": 0.203125, + "loss_num": 0.0194091796875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 342249992, + "step": 6109 + }, + { + "epoch": 13.608017817371937, + "grad_norm": 18.72128677368164, + "learning_rate": 1e-06, + "loss": 0.4366, + "num_input_tokens_seen": 342306096, + "step": 6110 + }, + { + "epoch": 13.608017817371937, + "loss": 0.39169156551361084, + "loss_ce": 0.0001129057418438606, + "loss_iou": 0.1787109375, + "loss_num": 0.00665283203125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 342306096, + "step": 6110 + }, + { + "epoch": 13.610244988864142, + "grad_norm": 19.031667709350586, + "learning_rate": 1e-06, + "loss": 0.4382, + "num_input_tokens_seen": 342360516, + "step": 6111 + }, + { + "epoch": 13.610244988864142, + "loss": 0.417463481426239, + "loss_ce": 0.00010505890531931072, + "loss_iou": 0.181640625, + "loss_num": 0.0108642578125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 342360516, + "step": 6111 + }, + { + "epoch": 13.612472160356347, + "grad_norm": 16.581167221069336, + "learning_rate": 1e-06, + "loss": 0.4032, + "num_input_tokens_seen": 342419136, + "step": 6112 + }, + { + "epoch": 13.612472160356347, + "loss": 0.46885186433792114, + "loss_ce": 0.00010187575389863923, + "loss_iou": 0.19140625, + "loss_num": 0.0174560546875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 342419136, + "step": 6112 + }, + { + "epoch": 13.614699331848552, + "grad_norm": 21.586606979370117, + "learning_rate": 1e-06, + "loss": 0.4784, + "num_input_tokens_seen": 342474436, + "step": 6113 + }, + { + "epoch": 13.614699331848552, + "loss": 0.6273195743560791, + "loss_ce": 0.00024438908440060914, + "loss_iou": 0.26171875, + "loss_num": 0.02099609375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 342474436, + "step": 6113 + }, + { + "epoch": 13.616926503340757, + "grad_norm": 19.1909122467041, + "learning_rate": 1e-06, + "loss": 0.4076, + "num_input_tokens_seen": 342531496, + "step": 6114 + }, + { + "epoch": 13.616926503340757, + "loss": 0.5540522933006287, + "loss_ce": 9.722121467348188e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.0125732421875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 342531496, + "step": 6114 + }, + { + "epoch": 13.619153674832962, + "grad_norm": 15.85007381439209, + "learning_rate": 1e-06, + "loss": 0.4735, + "num_input_tokens_seen": 342587152, + "step": 6115 + }, + { + "epoch": 13.619153674832962, + "loss": 0.47263315320014954, + "loss_ce": 0.000709333224222064, + "loss_iou": 0.19921875, + "loss_num": 0.014892578125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 342587152, + "step": 6115 + }, + { + "epoch": 13.621380846325167, + "grad_norm": 18.876895904541016, + "learning_rate": 1e-06, + "loss": 0.4915, + "num_input_tokens_seen": 342642656, + "step": 6116 + }, + { + "epoch": 13.621380846325167, + "loss": 0.5616579651832581, + "loss_ce": 0.0001345211494481191, + "loss_iou": 0.2392578125, + "loss_num": 0.016845703125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 342642656, + "step": 6116 + }, + { + "epoch": 13.623608017817372, + "grad_norm": 18.534420013427734, + "learning_rate": 1e-06, + "loss": 0.4426, + "num_input_tokens_seen": 342696908, + "step": 6117 + }, + { + "epoch": 13.623608017817372, + "loss": 0.55345219373703, + "loss_ce": 0.00010746420593932271, + "loss_iou": 0.2421875, + "loss_num": 0.01373291015625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 342696908, + "step": 6117 + }, + { + "epoch": 13.625835189309576, + "grad_norm": 13.372831344604492, + "learning_rate": 1e-06, + "loss": 0.4674, + "num_input_tokens_seen": 342754328, + "step": 6118 + }, + { + "epoch": 13.625835189309576, + "loss": 0.5626158714294434, + "loss_ce": 0.00011584434105316177, + "loss_iou": 0.216796875, + "loss_num": 0.0257568359375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 342754328, + "step": 6118 + }, + { + "epoch": 13.628062360801781, + "grad_norm": 34.681114196777344, + "learning_rate": 1e-06, + "loss": 0.5469, + "num_input_tokens_seen": 342812480, + "step": 6119 + }, + { + "epoch": 13.628062360801781, + "loss": 0.5858219861984253, + "loss_ce": 0.000128652696730569, + "loss_iou": 0.259765625, + "loss_num": 0.01300048828125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 342812480, + "step": 6119 + }, + { + "epoch": 13.630289532293986, + "grad_norm": 20.8580265045166, + "learning_rate": 1e-06, + "loss": 0.5921, + "num_input_tokens_seen": 342868916, + "step": 6120 + }, + { + "epoch": 13.630289532293986, + "loss": 0.49220043420791626, + "loss_ce": 0.00013499979104381055, + "loss_iou": 0.2275390625, + "loss_num": 0.00750732421875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 342868916, + "step": 6120 + }, + { + "epoch": 13.632516703786191, + "grad_norm": 28.065649032592773, + "learning_rate": 1e-06, + "loss": 0.4325, + "num_input_tokens_seen": 342923592, + "step": 6121 + }, + { + "epoch": 13.632516703786191, + "loss": 0.41245564818382263, + "loss_ce": 0.00010210397886112332, + "loss_iou": 0.189453125, + "loss_num": 0.006622314453125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 342923592, + "step": 6121 + }, + { + "epoch": 13.634743875278396, + "grad_norm": 21.294170379638672, + "learning_rate": 1e-06, + "loss": 0.4094, + "num_input_tokens_seen": 342978364, + "step": 6122 + }, + { + "epoch": 13.634743875278396, + "loss": 0.43907153606414795, + "loss_ce": 0.00010670385381672531, + "loss_iou": 0.19921875, + "loss_num": 0.00823974609375, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 342978364, + "step": 6122 + }, + { + "epoch": 13.6369710467706, + "grad_norm": 15.803836822509766, + "learning_rate": 1e-06, + "loss": 0.6057, + "num_input_tokens_seen": 343034688, + "step": 6123 + }, + { + "epoch": 13.6369710467706, + "loss": 0.6668506264686584, + "loss_ce": 0.00010255983943352476, + "loss_iou": 0.302734375, + "loss_num": 0.0120849609375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 343034688, + "step": 6123 + }, + { + "epoch": 13.639198218262806, + "grad_norm": 16.880170822143555, + "learning_rate": 1e-06, + "loss": 0.5102, + "num_input_tokens_seen": 343091224, + "step": 6124 + }, + { + "epoch": 13.639198218262806, + "loss": 0.4283756613731384, + "loss_ce": 0.0001530120789539069, + "loss_iou": 0.19140625, + "loss_num": 0.009033203125, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 343091224, + "step": 6124 + }, + { + "epoch": 13.64142538975501, + "grad_norm": 22.758419036865234, + "learning_rate": 1e-06, + "loss": 0.5235, + "num_input_tokens_seen": 343147108, + "step": 6125 + }, + { + "epoch": 13.64142538975501, + "loss": 0.44180482625961304, + "loss_ce": 0.00015441025607287884, + "loss_iou": 0.1845703125, + "loss_num": 0.01446533203125, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 343147108, + "step": 6125 + }, + { + "epoch": 13.643652561247215, + "grad_norm": 24.595998764038086, + "learning_rate": 1e-06, + "loss": 0.5212, + "num_input_tokens_seen": 343202528, + "step": 6126 + }, + { + "epoch": 13.643652561247215, + "loss": 0.6394776701927185, + "loss_ce": 0.0001954361068783328, + "loss_iou": 0.279296875, + "loss_num": 0.0164794921875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 343202528, + "step": 6126 + }, + { + "epoch": 13.64587973273942, + "grad_norm": 13.369487762451172, + "learning_rate": 1e-06, + "loss": 0.383, + "num_input_tokens_seen": 343259188, + "step": 6127 + }, + { + "epoch": 13.64587973273942, + "loss": 0.451729953289032, + "loss_ce": 0.0004359965678304434, + "loss_iou": 0.1826171875, + "loss_num": 0.01708984375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 343259188, + "step": 6127 + }, + { + "epoch": 13.648106904231625, + "grad_norm": 20.52755355834961, + "learning_rate": 1e-06, + "loss": 0.4196, + "num_input_tokens_seen": 343316188, + "step": 6128 + }, + { + "epoch": 13.648106904231625, + "loss": 0.3270159065723419, + "loss_ce": 0.00011160006397403777, + "loss_iou": 0.154296875, + "loss_num": 0.003692626953125, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 343316188, + "step": 6128 + }, + { + "epoch": 13.65033407572383, + "grad_norm": 21.22062873840332, + "learning_rate": 1e-06, + "loss": 0.4893, + "num_input_tokens_seen": 343369164, + "step": 6129 + }, + { + "epoch": 13.65033407572383, + "loss": 0.4859585165977478, + "loss_ce": 0.00011869698209920898, + "loss_iou": 0.2158203125, + "loss_num": 0.01104736328125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 343369164, + "step": 6129 + }, + { + "epoch": 13.652561247216035, + "grad_norm": 21.014507293701172, + "learning_rate": 1e-06, + "loss": 0.5389, + "num_input_tokens_seen": 343425304, + "step": 6130 + }, + { + "epoch": 13.652561247216035, + "loss": 0.4401944875717163, + "loss_ce": 0.00013100114301778376, + "loss_iou": 0.19921875, + "loss_num": 0.0084228515625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 343425304, + "step": 6130 + }, + { + "epoch": 13.654788418708241, + "grad_norm": 25.746784210205078, + "learning_rate": 1e-06, + "loss": 0.5463, + "num_input_tokens_seen": 343483376, + "step": 6131 + }, + { + "epoch": 13.654788418708241, + "loss": 0.7010675668716431, + "loss_ce": 0.00013978403876535594, + "loss_iou": 0.302734375, + "loss_num": 0.0189208984375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 343483376, + "step": 6131 + }, + { + "epoch": 13.657015590200446, + "grad_norm": 13.484420776367188, + "learning_rate": 1e-06, + "loss": 0.4436, + "num_input_tokens_seen": 343540884, + "step": 6132 + }, + { + "epoch": 13.657015590200446, + "loss": 0.44479644298553467, + "loss_ce": 9.430477803107351e-05, + "loss_iou": 0.201171875, + "loss_num": 0.008544921875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 343540884, + "step": 6132 + }, + { + "epoch": 13.659242761692651, + "grad_norm": 27.06049346923828, + "learning_rate": 1e-06, + "loss": 0.4747, + "num_input_tokens_seen": 343592412, + "step": 6133 + }, + { + "epoch": 13.659242761692651, + "loss": 0.40442535281181335, + "loss_ce": 0.00012847778270952404, + "loss_iou": 0.173828125, + "loss_num": 0.0113525390625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 343592412, + "step": 6133 + }, + { + "epoch": 13.661469933184856, + "grad_norm": 17.517444610595703, + "learning_rate": 1e-06, + "loss": 0.5275, + "num_input_tokens_seen": 343646720, + "step": 6134 + }, + { + "epoch": 13.661469933184856, + "loss": 0.44858014583587646, + "loss_ce": 0.00012434981181286275, + "loss_iou": 0.201171875, + "loss_num": 0.009033203125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 343646720, + "step": 6134 + }, + { + "epoch": 13.66369710467706, + "grad_norm": 13.671886444091797, + "learning_rate": 1e-06, + "loss": 0.4997, + "num_input_tokens_seen": 343703044, + "step": 6135 + }, + { + "epoch": 13.66369710467706, + "loss": 0.3881835341453552, + "loss_ce": 0.00015255186008289456, + "loss_iou": 0.173828125, + "loss_num": 0.00799560546875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 343703044, + "step": 6135 + }, + { + "epoch": 13.665924276169266, + "grad_norm": 28.485546112060547, + "learning_rate": 1e-06, + "loss": 0.4675, + "num_input_tokens_seen": 343759188, + "step": 6136 + }, + { + "epoch": 13.665924276169266, + "loss": 0.4605761170387268, + "loss_ce": 0.00012687427806667984, + "loss_iou": 0.1904296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 343759188, + "step": 6136 + }, + { + "epoch": 13.66815144766147, + "grad_norm": 23.070680618286133, + "learning_rate": 1e-06, + "loss": 0.3344, + "num_input_tokens_seen": 343815236, + "step": 6137 + }, + { + "epoch": 13.66815144766147, + "loss": 0.29162734746932983, + "loss_ce": 0.0001234151714015752, + "loss_iou": 0.1103515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 343815236, + "step": 6137 + }, + { + "epoch": 13.670378619153675, + "grad_norm": 19.009531021118164, + "learning_rate": 1e-06, + "loss": 0.6555, + "num_input_tokens_seen": 343872764, + "step": 6138 + }, + { + "epoch": 13.670378619153675, + "loss": 0.7084763050079346, + "loss_ce": 0.00010225173900835216, + "loss_iou": 0.27734375, + "loss_num": 0.0303955078125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 343872764, + "step": 6138 + }, + { + "epoch": 13.67260579064588, + "grad_norm": 15.859116554260254, + "learning_rate": 1e-06, + "loss": 0.5408, + "num_input_tokens_seen": 343930516, + "step": 6139 + }, + { + "epoch": 13.67260579064588, + "loss": 0.6228233575820923, + "loss_ce": 0.0001426705566700548, + "loss_iou": 0.26953125, + "loss_num": 0.0164794921875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 343930516, + "step": 6139 + }, + { + "epoch": 13.674832962138085, + "grad_norm": 16.675167083740234, + "learning_rate": 1e-06, + "loss": 0.6865, + "num_input_tokens_seen": 343987948, + "step": 6140 + }, + { + "epoch": 13.674832962138085, + "loss": 0.7435585856437683, + "loss_ce": 0.0013711238279938698, + "loss_iou": 0.296875, + "loss_num": 0.0296630859375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 343987948, + "step": 6140 + }, + { + "epoch": 13.67706013363029, + "grad_norm": 18.57615852355957, + "learning_rate": 1e-06, + "loss": 0.5157, + "num_input_tokens_seen": 344043668, + "step": 6141 + }, + { + "epoch": 13.67706013363029, + "loss": 0.5919273495674133, + "loss_ce": 0.0001304733450524509, + "loss_iou": 0.2470703125, + "loss_num": 0.01953125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 344043668, + "step": 6141 + }, + { + "epoch": 13.679287305122495, + "grad_norm": 17.68915367126465, + "learning_rate": 1e-06, + "loss": 0.3544, + "num_input_tokens_seen": 344098800, + "step": 6142 + }, + { + "epoch": 13.679287305122495, + "loss": 0.3864895701408386, + "loss_ce": 0.00013704363664146513, + "loss_iou": 0.1748046875, + "loss_num": 0.00738525390625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 344098800, + "step": 6142 + }, + { + "epoch": 13.6815144766147, + "grad_norm": 25.44866371154785, + "learning_rate": 1e-06, + "loss": 0.3647, + "num_input_tokens_seen": 344156784, + "step": 6143 + }, + { + "epoch": 13.6815144766147, + "loss": 0.3855780065059662, + "loss_ce": 0.00011047557200072333, + "loss_iou": 0.171875, + "loss_num": 0.00848388671875, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 344156784, + "step": 6143 + }, + { + "epoch": 13.683741648106905, + "grad_norm": 15.047554969787598, + "learning_rate": 1e-06, + "loss": 0.3801, + "num_input_tokens_seen": 344215212, + "step": 6144 + }, + { + "epoch": 13.683741648106905, + "loss": 0.2978753447532654, + "loss_ce": 8.48290219437331e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.007568359375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 344215212, + "step": 6144 + }, + { + "epoch": 13.68596881959911, + "grad_norm": 48.20288848876953, + "learning_rate": 1e-06, + "loss": 0.4122, + "num_input_tokens_seen": 344271784, + "step": 6145 + }, + { + "epoch": 13.68596881959911, + "loss": 0.40208396315574646, + "loss_ce": 0.00010644205030985177, + "loss_iou": 0.185546875, + "loss_num": 0.006317138671875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 344271784, + "step": 6145 + }, + { + "epoch": 13.688195991091314, + "grad_norm": 71.81108093261719, + "learning_rate": 1e-06, + "loss": 0.576, + "num_input_tokens_seen": 344329204, + "step": 6146 + }, + { + "epoch": 13.688195991091314, + "loss": 0.5167222023010254, + "loss_ce": 0.00012068171781720594, + "loss_iou": 0.2236328125, + "loss_num": 0.0140380859375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 344329204, + "step": 6146 + }, + { + "epoch": 13.690423162583519, + "grad_norm": 12.681405067443848, + "learning_rate": 1e-06, + "loss": 0.4676, + "num_input_tokens_seen": 344385616, + "step": 6147 + }, + { + "epoch": 13.690423162583519, + "loss": 0.5067310929298401, + "loss_ce": 0.0005054936627857387, + "loss_iou": 0.1962890625, + "loss_num": 0.0230712890625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 344385616, + "step": 6147 + }, + { + "epoch": 13.692650334075724, + "grad_norm": 25.468963623046875, + "learning_rate": 1e-06, + "loss": 0.3891, + "num_input_tokens_seen": 344442500, + "step": 6148 + }, + { + "epoch": 13.692650334075724, + "loss": 0.443470299243927, + "loss_ce": 0.0001109380682464689, + "loss_iou": 0.1962890625, + "loss_num": 0.0101318359375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 344442500, + "step": 6148 + }, + { + "epoch": 13.694877505567929, + "grad_norm": 29.40053367614746, + "learning_rate": 1e-06, + "loss": 0.5509, + "num_input_tokens_seen": 344499324, + "step": 6149 + }, + { + "epoch": 13.694877505567929, + "loss": 0.5093941688537598, + "loss_ce": 0.00011685908248182386, + "loss_iou": 0.23046875, + "loss_num": 0.0096435546875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 344499324, + "step": 6149 + }, + { + "epoch": 13.697104677060134, + "grad_norm": 21.182100296020508, + "learning_rate": 1e-06, + "loss": 0.4615, + "num_input_tokens_seen": 344556944, + "step": 6150 + }, + { + "epoch": 13.697104677060134, + "loss": 0.568960428237915, + "loss_ce": 0.00011279522004770115, + "loss_iou": 0.23828125, + "loss_num": 0.0185546875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 344556944, + "step": 6150 + }, + { + "epoch": 13.699331848552339, + "grad_norm": 34.760746002197266, + "learning_rate": 1e-06, + "loss": 0.4418, + "num_input_tokens_seen": 344613508, + "step": 6151 + }, + { + "epoch": 13.699331848552339, + "loss": 0.49267372488975525, + "loss_ce": 0.00012003096344415098, + "loss_iou": 0.2138671875, + "loss_num": 0.01312255859375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 344613508, + "step": 6151 + }, + { + "epoch": 13.701559020044543, + "grad_norm": 34.04802322387695, + "learning_rate": 1e-06, + "loss": 0.4694, + "num_input_tokens_seen": 344669656, + "step": 6152 + }, + { + "epoch": 13.701559020044543, + "loss": 0.38548383116722107, + "loss_ce": 0.00010786794882733375, + "loss_iou": 0.1796875, + "loss_num": 0.00506591796875, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 344669656, + "step": 6152 + }, + { + "epoch": 13.703786191536748, + "grad_norm": 19.769567489624023, + "learning_rate": 1e-06, + "loss": 0.4088, + "num_input_tokens_seen": 344725372, + "step": 6153 + }, + { + "epoch": 13.703786191536748, + "loss": 0.49809765815734863, + "loss_ce": 0.00017283624038100243, + "loss_iou": 0.216796875, + "loss_num": 0.01318359375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 344725372, + "step": 6153 + }, + { + "epoch": 13.706013363028953, + "grad_norm": 23.97338104248047, + "learning_rate": 1e-06, + "loss": 0.7272, + "num_input_tokens_seen": 344781164, + "step": 6154 + }, + { + "epoch": 13.706013363028953, + "loss": 0.5699473023414612, + "loss_ce": 0.0001230589987244457, + "loss_iou": 0.23828125, + "loss_num": 0.0186767578125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 344781164, + "step": 6154 + }, + { + "epoch": 13.708240534521158, + "grad_norm": 39.72063064575195, + "learning_rate": 1e-06, + "loss": 0.4243, + "num_input_tokens_seen": 344836304, + "step": 6155 + }, + { + "epoch": 13.708240534521158, + "loss": 0.37404966354370117, + "loss_ce": 0.00014831856242381036, + "loss_iou": 0.1630859375, + "loss_num": 0.00970458984375, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 344836304, + "step": 6155 + }, + { + "epoch": 13.710467706013363, + "grad_norm": 23.376707077026367, + "learning_rate": 1e-06, + "loss": 0.5827, + "num_input_tokens_seen": 344894388, + "step": 6156 + }, + { + "epoch": 13.710467706013363, + "loss": 0.42637595534324646, + "loss_ce": 0.00010641853441484272, + "loss_iou": 0.189453125, + "loss_num": 0.00933837890625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 344894388, + "step": 6156 + }, + { + "epoch": 13.712694877505568, + "grad_norm": 13.803214073181152, + "learning_rate": 1e-06, + "loss": 0.3646, + "num_input_tokens_seen": 344951236, + "step": 6157 + }, + { + "epoch": 13.712694877505568, + "loss": 0.3642551600933075, + "loss_ce": 0.00011941190314246342, + "loss_iou": 0.1669921875, + "loss_num": 0.006134033203125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 344951236, + "step": 6157 + }, + { + "epoch": 13.714922048997773, + "grad_norm": 22.018762588500977, + "learning_rate": 1e-06, + "loss": 0.6011, + "num_input_tokens_seen": 345007564, + "step": 6158 + }, + { + "epoch": 13.714922048997773, + "loss": 0.6203541159629822, + "loss_ce": 0.00011484955030027777, + "loss_iou": 0.265625, + "loss_num": 0.0179443359375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 345007564, + "step": 6158 + }, + { + "epoch": 13.717149220489977, + "grad_norm": 17.11638069152832, + "learning_rate": 1e-06, + "loss": 0.4277, + "num_input_tokens_seen": 345064000, + "step": 6159 + }, + { + "epoch": 13.717149220489977, + "loss": 0.5495654344558716, + "loss_ce": 0.00012692378368228674, + "loss_iou": 0.2470703125, + "loss_num": 0.01129150390625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 345064000, + "step": 6159 + }, + { + "epoch": 13.719376391982182, + "grad_norm": 20.523475646972656, + "learning_rate": 1e-06, + "loss": 0.3536, + "num_input_tokens_seen": 345120072, + "step": 6160 + }, + { + "epoch": 13.719376391982182, + "loss": 0.37014520168304443, + "loss_ce": 0.00027214642614126205, + "loss_iou": 0.1650390625, + "loss_num": 0.0081787109375, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 345120072, + "step": 6160 + }, + { + "epoch": 13.721603563474387, + "grad_norm": 32.90972900390625, + "learning_rate": 1e-06, + "loss": 0.5949, + "num_input_tokens_seen": 345176544, + "step": 6161 + }, + { + "epoch": 13.721603563474387, + "loss": 0.44534897804260254, + "loss_ce": 9.752592450240627e-05, + "loss_iou": 0.18359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 345176544, + "step": 6161 + }, + { + "epoch": 13.723830734966592, + "grad_norm": 18.18287467956543, + "learning_rate": 1e-06, + "loss": 0.379, + "num_input_tokens_seen": 345233380, + "step": 6162 + }, + { + "epoch": 13.723830734966592, + "loss": 0.2666533589363098, + "loss_ce": 8.995502139441669e-05, + "loss_iou": 0.11279296875, + "loss_num": 0.00830078125, + "loss_xval": 0.265625, + "num_input_tokens_seen": 345233380, + "step": 6162 + }, + { + "epoch": 13.726057906458797, + "grad_norm": 15.033989906311035, + "learning_rate": 1e-06, + "loss": 0.6433, + "num_input_tokens_seen": 345289992, + "step": 6163 + }, + { + "epoch": 13.726057906458797, + "loss": 0.7077064514160156, + "loss_ce": 0.0009193975711241364, + "loss_iou": 0.3046875, + "loss_num": 0.0194091796875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 345289992, + "step": 6163 + }, + { + "epoch": 13.728285077951002, + "grad_norm": 12.950138092041016, + "learning_rate": 1e-06, + "loss": 0.4458, + "num_input_tokens_seen": 345345848, + "step": 6164 + }, + { + "epoch": 13.728285077951002, + "loss": 0.27292484045028687, + "loss_ce": 9.76905066636391e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.00787353515625, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 345345848, + "step": 6164 + }, + { + "epoch": 13.730512249443207, + "grad_norm": 17.856874465942383, + "learning_rate": 1e-06, + "loss": 0.4661, + "num_input_tokens_seen": 345402492, + "step": 6165 + }, + { + "epoch": 13.730512249443207, + "loss": 0.5341655015945435, + "loss_ce": 0.00022992276353761554, + "loss_iou": 0.2109375, + "loss_num": 0.0225830078125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 345402492, + "step": 6165 + }, + { + "epoch": 13.732739420935411, + "grad_norm": 17.676965713500977, + "learning_rate": 1e-06, + "loss": 0.4308, + "num_input_tokens_seen": 345459516, + "step": 6166 + }, + { + "epoch": 13.732739420935411, + "loss": 0.5320949554443359, + "loss_ce": 0.00011253212142037228, + "loss_iou": 0.2373046875, + "loss_num": 0.0113525390625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 345459516, + "step": 6166 + }, + { + "epoch": 13.734966592427616, + "grad_norm": 19.01355743408203, + "learning_rate": 1e-06, + "loss": 0.4474, + "num_input_tokens_seen": 345514608, + "step": 6167 + }, + { + "epoch": 13.734966592427616, + "loss": 0.4031931757926941, + "loss_ce": 0.0001169660608866252, + "loss_iou": 0.1845703125, + "loss_num": 0.006866455078125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 345514608, + "step": 6167 + }, + { + "epoch": 13.737193763919821, + "grad_norm": 19.29640769958496, + "learning_rate": 1e-06, + "loss": 0.5236, + "num_input_tokens_seen": 345571524, + "step": 6168 + }, + { + "epoch": 13.737193763919821, + "loss": 0.491804838180542, + "loss_ce": 0.00010560073133092374, + "loss_iou": 0.2177734375, + "loss_num": 0.01123046875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 345571524, + "step": 6168 + }, + { + "epoch": 13.739420935412026, + "grad_norm": 21.25169563293457, + "learning_rate": 1e-06, + "loss": 0.4618, + "num_input_tokens_seen": 345627688, + "step": 6169 + }, + { + "epoch": 13.739420935412026, + "loss": 0.4485178589820862, + "loss_ce": 0.00012308621080592275, + "loss_iou": 0.1923828125, + "loss_num": 0.0126953125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 345627688, + "step": 6169 + }, + { + "epoch": 13.74164810690423, + "grad_norm": 16.865291595458984, + "learning_rate": 1e-06, + "loss": 0.5896, + "num_input_tokens_seen": 345684380, + "step": 6170 + }, + { + "epoch": 13.74164810690423, + "loss": 0.5811623334884644, + "loss_ce": 0.00010766902414616197, + "loss_iou": 0.26171875, + "loss_num": 0.01123046875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 345684380, + "step": 6170 + }, + { + "epoch": 13.743875278396436, + "grad_norm": 20.563676834106445, + "learning_rate": 1e-06, + "loss": 0.5899, + "num_input_tokens_seen": 345740580, + "step": 6171 + }, + { + "epoch": 13.743875278396436, + "loss": 0.44517427682876587, + "loss_ce": 0.00010591248428681865, + "loss_iou": 0.1923828125, + "loss_num": 0.01202392578125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 345740580, + "step": 6171 + }, + { + "epoch": 13.74610244988864, + "grad_norm": 16.448043823242188, + "learning_rate": 1e-06, + "loss": 0.5441, + "num_input_tokens_seen": 345799396, + "step": 6172 + }, + { + "epoch": 13.74610244988864, + "loss": 0.6715624332427979, + "loss_ce": 0.00011470924073364586, + "loss_iou": 0.2890625, + "loss_num": 0.0181884765625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 345799396, + "step": 6172 + }, + { + "epoch": 13.748329621380847, + "grad_norm": 47.44057846069336, + "learning_rate": 1e-06, + "loss": 0.5772, + "num_input_tokens_seen": 345855724, + "step": 6173 + }, + { + "epoch": 13.748329621380847, + "loss": 0.7166056632995605, + "loss_ce": 0.00017500200192444026, + "loss_iou": 0.322265625, + "loss_num": 0.0147705078125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 345855724, + "step": 6173 + }, + { + "epoch": 13.750556792873052, + "grad_norm": 16.256362915039062, + "learning_rate": 1e-06, + "loss": 0.5361, + "num_input_tokens_seen": 345912732, + "step": 6174 + }, + { + "epoch": 13.750556792873052, + "loss": 0.4795916676521301, + "loss_ce": 9.949602099368349e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.01904296875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 345912732, + "step": 6174 + }, + { + "epoch": 13.752783964365257, + "grad_norm": 21.9121036529541, + "learning_rate": 1e-06, + "loss": 0.495, + "num_input_tokens_seen": 345968416, + "step": 6175 + }, + { + "epoch": 13.752783964365257, + "loss": 0.3780425786972046, + "loss_ce": 0.00011291421833448112, + "loss_iou": 0.169921875, + "loss_num": 0.007476806640625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 345968416, + "step": 6175 + }, + { + "epoch": 13.755011135857462, + "grad_norm": 15.832091331481934, + "learning_rate": 1e-06, + "loss": 0.6457, + "num_input_tokens_seen": 346026396, + "step": 6176 + }, + { + "epoch": 13.755011135857462, + "loss": 0.4686300754547119, + "loss_ce": 0.00012421910651028156, + "loss_iou": 0.2138671875, + "loss_num": 0.00830078125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 346026396, + "step": 6176 + }, + { + "epoch": 13.757238307349667, + "grad_norm": 21.13070297241211, + "learning_rate": 1e-06, + "loss": 0.5723, + "num_input_tokens_seen": 346080884, + "step": 6177 + }, + { + "epoch": 13.757238307349667, + "loss": 0.5670015811920166, + "loss_ce": 0.00010699567792471498, + "loss_iou": 0.244140625, + "loss_num": 0.0159912109375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 346080884, + "step": 6177 + }, + { + "epoch": 13.759465478841872, + "grad_norm": 23.448190689086914, + "learning_rate": 1e-06, + "loss": 0.5021, + "num_input_tokens_seen": 346134556, + "step": 6178 + }, + { + "epoch": 13.759465478841872, + "loss": 0.47252950072288513, + "loss_ce": 0.00011740469199139625, + "loss_iou": 0.1923828125, + "loss_num": 0.017333984375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 346134556, + "step": 6178 + }, + { + "epoch": 13.761692650334076, + "grad_norm": 18.187257766723633, + "learning_rate": 1e-06, + "loss": 0.3635, + "num_input_tokens_seen": 346188980, + "step": 6179 + }, + { + "epoch": 13.761692650334076, + "loss": 0.35398566722869873, + "loss_ce": 0.00010384367487858981, + "loss_iou": 0.1630859375, + "loss_num": 0.005615234375, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 346188980, + "step": 6179 + }, + { + "epoch": 13.763919821826281, + "grad_norm": 31.72901153564453, + "learning_rate": 1e-06, + "loss": 0.3924, + "num_input_tokens_seen": 346245600, + "step": 6180 + }, + { + "epoch": 13.763919821826281, + "loss": 0.3832623362541199, + "loss_ce": 8.365388202946633e-05, + "loss_iou": 0.173828125, + "loss_num": 0.007110595703125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 346245600, + "step": 6180 + }, + { + "epoch": 13.766146993318486, + "grad_norm": 21.675914764404297, + "learning_rate": 1e-06, + "loss": 0.4688, + "num_input_tokens_seen": 346299216, + "step": 6181 + }, + { + "epoch": 13.766146993318486, + "loss": 0.5464923977851868, + "loss_ce": 0.0001056903856806457, + "loss_iou": 0.216796875, + "loss_num": 0.0225830078125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 346299216, + "step": 6181 + }, + { + "epoch": 13.768374164810691, + "grad_norm": 24.515111923217773, + "learning_rate": 1e-06, + "loss": 0.5284, + "num_input_tokens_seen": 346353404, + "step": 6182 + }, + { + "epoch": 13.768374164810691, + "loss": 0.47796687483787537, + "loss_ce": 0.00018368265591561794, + "loss_iou": 0.197265625, + "loss_num": 0.0164794921875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 346353404, + "step": 6182 + }, + { + "epoch": 13.770601336302896, + "grad_norm": 13.62939453125, + "learning_rate": 1e-06, + "loss": 0.4113, + "num_input_tokens_seen": 346409016, + "step": 6183 + }, + { + "epoch": 13.770601336302896, + "loss": 0.395562082529068, + "loss_ce": 0.00011531692871358246, + "loss_iou": 0.1708984375, + "loss_num": 0.0106201171875, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 346409016, + "step": 6183 + }, + { + "epoch": 13.7728285077951, + "grad_norm": 14.132533073425293, + "learning_rate": 1e-06, + "loss": 0.418, + "num_input_tokens_seen": 346464900, + "step": 6184 + }, + { + "epoch": 13.7728285077951, + "loss": 0.416792631149292, + "loss_ce": 0.00016667114687152207, + "loss_iou": 0.1923828125, + "loss_num": 0.006439208984375, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 346464900, + "step": 6184 + }, + { + "epoch": 13.775055679287306, + "grad_norm": 78.39266967773438, + "learning_rate": 1e-06, + "loss": 0.6477, + "num_input_tokens_seen": 346521172, + "step": 6185 + }, + { + "epoch": 13.775055679287306, + "loss": 0.45030081272125244, + "loss_ce": 0.00010552178719080985, + "loss_iou": 0.197265625, + "loss_num": 0.01116943359375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 346521172, + "step": 6185 + }, + { + "epoch": 13.77728285077951, + "grad_norm": 23.97130584716797, + "learning_rate": 1e-06, + "loss": 0.5591, + "num_input_tokens_seen": 346576376, + "step": 6186 + }, + { + "epoch": 13.77728285077951, + "loss": 0.7201170921325684, + "loss_ce": 0.00014645657211076468, + "loss_iou": 0.322265625, + "loss_num": 0.0155029296875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 346576376, + "step": 6186 + }, + { + "epoch": 13.779510022271715, + "grad_norm": 16.92420196533203, + "learning_rate": 1e-06, + "loss": 0.4977, + "num_input_tokens_seen": 346633420, + "step": 6187 + }, + { + "epoch": 13.779510022271715, + "loss": 0.633420467376709, + "loss_ce": 0.00011968903709203005, + "loss_iou": 0.26171875, + "loss_num": 0.022216796875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 346633420, + "step": 6187 + }, + { + "epoch": 13.78173719376392, + "grad_norm": 18.442733764648438, + "learning_rate": 1e-06, + "loss": 0.5796, + "num_input_tokens_seen": 346689692, + "step": 6188 + }, + { + "epoch": 13.78173719376392, + "loss": 0.7934539318084717, + "loss_ce": 0.00011891472968272865, + "loss_iou": 0.345703125, + "loss_num": 0.020263671875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 346689692, + "step": 6188 + }, + { + "epoch": 13.783964365256125, + "grad_norm": 21.14725685119629, + "learning_rate": 1e-06, + "loss": 0.3824, + "num_input_tokens_seen": 346745212, + "step": 6189 + }, + { + "epoch": 13.783964365256125, + "loss": 0.3489500880241394, + "loss_ce": 0.00013415844296105206, + "loss_iou": 0.15625, + "loss_num": 0.007232666015625, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 346745212, + "step": 6189 + }, + { + "epoch": 13.78619153674833, + "grad_norm": 23.13970375061035, + "learning_rate": 1e-06, + "loss": 0.3716, + "num_input_tokens_seen": 346803220, + "step": 6190 + }, + { + "epoch": 13.78619153674833, + "loss": 0.43285852670669556, + "loss_ce": 0.00011927761079277843, + "loss_iou": 0.205078125, + "loss_num": 0.00439453125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 346803220, + "step": 6190 + }, + { + "epoch": 13.788418708240535, + "grad_norm": 17.715566635131836, + "learning_rate": 1e-06, + "loss": 0.49, + "num_input_tokens_seen": 346858040, + "step": 6191 + }, + { + "epoch": 13.788418708240535, + "loss": 0.29737383127212524, + "loss_ce": 0.0001326015335507691, + "loss_iou": 0.1328125, + "loss_num": 0.0062255859375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 346858040, + "step": 6191 + }, + { + "epoch": 13.79064587973274, + "grad_norm": 25.321876525878906, + "learning_rate": 1e-06, + "loss": 0.5248, + "num_input_tokens_seen": 346914080, + "step": 6192 + }, + { + "epoch": 13.79064587973274, + "loss": 0.6016849279403687, + "loss_ce": 0.00010718886915128678, + "loss_iou": 0.25390625, + "loss_num": 0.0185546875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 346914080, + "step": 6192 + }, + { + "epoch": 13.792873051224944, + "grad_norm": 15.98505973815918, + "learning_rate": 1e-06, + "loss": 0.5626, + "num_input_tokens_seen": 346967296, + "step": 6193 + }, + { + "epoch": 13.792873051224944, + "loss": 0.677720844745636, + "loss_ce": 0.0001085417898138985, + "loss_iou": 0.30078125, + "loss_num": 0.01507568359375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 346967296, + "step": 6193 + }, + { + "epoch": 13.79510022271715, + "grad_norm": 16.50602149963379, + "learning_rate": 1e-06, + "loss": 0.6727, + "num_input_tokens_seen": 347023416, + "step": 6194 + }, + { + "epoch": 13.79510022271715, + "loss": 0.8356806039810181, + "loss_ce": 0.00010931311408057809, + "loss_iou": 0.33203125, + "loss_num": 0.033935546875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 347023416, + "step": 6194 + }, + { + "epoch": 13.797327394209354, + "grad_norm": 25.20470428466797, + "learning_rate": 1e-06, + "loss": 0.4481, + "num_input_tokens_seen": 347078308, + "step": 6195 + }, + { + "epoch": 13.797327394209354, + "loss": 0.39737868309020996, + "loss_ce": 0.00016190031601581722, + "loss_iou": 0.1787109375, + "loss_num": 0.00799560546875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 347078308, + "step": 6195 + }, + { + "epoch": 13.799554565701559, + "grad_norm": 19.322734832763672, + "learning_rate": 1e-06, + "loss": 0.5349, + "num_input_tokens_seen": 347134956, + "step": 6196 + }, + { + "epoch": 13.799554565701559, + "loss": 0.510852038860321, + "loss_ce": 0.0007202195120044053, + "loss_iou": 0.2236328125, + "loss_num": 0.01239013671875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 347134956, + "step": 6196 + }, + { + "epoch": 13.801781737193764, + "grad_norm": 25.328693389892578, + "learning_rate": 1e-06, + "loss": 0.7473, + "num_input_tokens_seen": 347193060, + "step": 6197 + }, + { + "epoch": 13.801781737193764, + "loss": 0.9841349720954895, + "loss_ce": 0.00012613809667527676, + "loss_iou": 0.40234375, + "loss_num": 0.03564453125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 347193060, + "step": 6197 + }, + { + "epoch": 13.804008908685969, + "grad_norm": 17.876684188842773, + "learning_rate": 1e-06, + "loss": 0.4347, + "num_input_tokens_seen": 347247788, + "step": 6198 + }, + { + "epoch": 13.804008908685969, + "loss": 0.5318405628204346, + "loss_ce": 0.00010229815234197304, + "loss_iou": 0.2255859375, + "loss_num": 0.01611328125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 347247788, + "step": 6198 + }, + { + "epoch": 13.806236080178174, + "grad_norm": 21.041126251220703, + "learning_rate": 1e-06, + "loss": 0.6075, + "num_input_tokens_seen": 347302380, + "step": 6199 + }, + { + "epoch": 13.806236080178174, + "loss": 0.7548277378082275, + "loss_ce": 0.00025013202684931457, + "loss_iou": 0.314453125, + "loss_num": 0.0257568359375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 347302380, + "step": 6199 + }, + { + "epoch": 13.808463251670378, + "grad_norm": 14.308113098144531, + "learning_rate": 1e-06, + "loss": 0.5091, + "num_input_tokens_seen": 347355180, + "step": 6200 + }, + { + "epoch": 13.808463251670378, + "loss": 0.6481593251228333, + "loss_ce": 0.00014905582065694034, + "loss_iou": 0.255859375, + "loss_num": 0.027099609375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 347355180, + "step": 6200 + }, + { + "epoch": 13.810690423162583, + "grad_norm": 24.352041244506836, + "learning_rate": 1e-06, + "loss": 0.4425, + "num_input_tokens_seen": 347410936, + "step": 6201 + }, + { + "epoch": 13.810690423162583, + "loss": 0.40238019824028015, + "loss_ce": 9.74750000750646e-05, + "loss_iou": 0.17578125, + "loss_num": 0.01025390625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 347410936, + "step": 6201 + }, + { + "epoch": 13.812917594654788, + "grad_norm": 12.734814643859863, + "learning_rate": 1e-06, + "loss": 0.4761, + "num_input_tokens_seen": 347466652, + "step": 6202 + }, + { + "epoch": 13.812917594654788, + "loss": 0.5395181179046631, + "loss_ce": 0.00010463084618095309, + "loss_iou": 0.23046875, + "loss_num": 0.015380859375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 347466652, + "step": 6202 + }, + { + "epoch": 13.815144766146993, + "grad_norm": 19.63954734802246, + "learning_rate": 1e-06, + "loss": 0.5178, + "num_input_tokens_seen": 347523340, + "step": 6203 + }, + { + "epoch": 13.815144766146993, + "loss": 0.6680713295936584, + "loss_ce": 0.00010255551023874432, + "loss_iou": 0.294921875, + "loss_num": 0.015380859375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 347523340, + "step": 6203 + }, + { + "epoch": 13.817371937639198, + "grad_norm": 27.84977149963379, + "learning_rate": 1e-06, + "loss": 0.4513, + "num_input_tokens_seen": 347581752, + "step": 6204 + }, + { + "epoch": 13.817371937639198, + "loss": 0.509273886680603, + "loss_ce": 0.00011864411499118432, + "loss_iou": 0.220703125, + "loss_num": 0.0135498046875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 347581752, + "step": 6204 + }, + { + "epoch": 13.819599109131403, + "grad_norm": 18.612722396850586, + "learning_rate": 1e-06, + "loss": 0.5813, + "num_input_tokens_seen": 347641196, + "step": 6205 + }, + { + "epoch": 13.819599109131403, + "loss": 0.6570059061050415, + "loss_ce": 0.00011501011613290757, + "loss_iou": 0.275390625, + "loss_num": 0.0213623046875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 347641196, + "step": 6205 + }, + { + "epoch": 13.821826280623608, + "grad_norm": 17.84321403503418, + "learning_rate": 1e-06, + "loss": 0.3995, + "num_input_tokens_seen": 347699640, + "step": 6206 + }, + { + "epoch": 13.821826280623608, + "loss": 0.42600324749946594, + "loss_ce": 9.991762635763735e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.00982666015625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 347699640, + "step": 6206 + }, + { + "epoch": 13.824053452115812, + "grad_norm": 17.020217895507812, + "learning_rate": 1e-06, + "loss": 0.5993, + "num_input_tokens_seen": 347755124, + "step": 6207 + }, + { + "epoch": 13.824053452115812, + "loss": 0.4133188724517822, + "loss_ce": 0.00011088576866313815, + "loss_iou": 0.1748046875, + "loss_num": 0.0126953125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 347755124, + "step": 6207 + }, + { + "epoch": 13.826280623608017, + "grad_norm": 16.579845428466797, + "learning_rate": 1e-06, + "loss": 0.5781, + "num_input_tokens_seen": 347809184, + "step": 6208 + }, + { + "epoch": 13.826280623608017, + "loss": 0.5595046281814575, + "loss_ce": 0.00017845621914602816, + "loss_iou": 0.2265625, + "loss_num": 0.0213623046875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 347809184, + "step": 6208 + }, + { + "epoch": 13.828507795100222, + "grad_norm": 21.396059036254883, + "learning_rate": 1e-06, + "loss": 0.5244, + "num_input_tokens_seen": 347865520, + "step": 6209 + }, + { + "epoch": 13.828507795100222, + "loss": 0.5646365880966187, + "loss_ce": 0.00012240419164299965, + "loss_iou": 0.2255859375, + "loss_num": 0.02294921875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 347865520, + "step": 6209 + }, + { + "epoch": 13.830734966592427, + "grad_norm": 12.559648513793945, + "learning_rate": 1e-06, + "loss": 0.4268, + "num_input_tokens_seen": 347922004, + "step": 6210 + }, + { + "epoch": 13.830734966592427, + "loss": 0.5307496786117554, + "loss_ce": 0.00011002724932041019, + "loss_iou": 0.2314453125, + "loss_num": 0.01348876953125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 347922004, + "step": 6210 + }, + { + "epoch": 13.832962138084632, + "grad_norm": 20.28154945373535, + "learning_rate": 1e-06, + "loss": 0.5987, + "num_input_tokens_seen": 347977316, + "step": 6211 + }, + { + "epoch": 13.832962138084632, + "loss": 0.8992745876312256, + "loss_ce": 0.00010463996295584366, + "loss_iou": 0.357421875, + "loss_num": 0.036865234375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 347977316, + "step": 6211 + }, + { + "epoch": 13.835189309576837, + "grad_norm": 21.434589385986328, + "learning_rate": 1e-06, + "loss": 0.5093, + "num_input_tokens_seen": 348031884, + "step": 6212 + }, + { + "epoch": 13.835189309576837, + "loss": 0.5445865392684937, + "loss_ce": 0.0001529275468783453, + "loss_iou": 0.23046875, + "loss_num": 0.0164794921875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 348031884, + "step": 6212 + }, + { + "epoch": 13.837416481069042, + "grad_norm": 13.820694923400879, + "learning_rate": 1e-06, + "loss": 0.5881, + "num_input_tokens_seen": 348088760, + "step": 6213 + }, + { + "epoch": 13.837416481069042, + "loss": 0.5043725371360779, + "loss_ce": 0.0001000510819721967, + "loss_iou": 0.203125, + "loss_num": 0.0196533203125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 348088760, + "step": 6213 + }, + { + "epoch": 13.839643652561247, + "grad_norm": 16.25212287902832, + "learning_rate": 1e-06, + "loss": 0.4709, + "num_input_tokens_seen": 348145232, + "step": 6214 + }, + { + "epoch": 13.839643652561247, + "loss": 0.5301461219787598, + "loss_ce": 0.0001167980080936104, + "loss_iou": 0.23828125, + "loss_num": 0.0106201171875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 348145232, + "step": 6214 + }, + { + "epoch": 13.841870824053451, + "grad_norm": 13.156241416931152, + "learning_rate": 1e-06, + "loss": 0.6487, + "num_input_tokens_seen": 348203452, + "step": 6215 + }, + { + "epoch": 13.841870824053451, + "loss": 0.9121133089065552, + "loss_ce": 0.00012593530118465424, + "loss_iou": 0.396484375, + "loss_num": 0.02392578125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 348203452, + "step": 6215 + }, + { + "epoch": 13.844097995545656, + "grad_norm": 19.261072158813477, + "learning_rate": 1e-06, + "loss": 0.5774, + "num_input_tokens_seen": 348261088, + "step": 6216 + }, + { + "epoch": 13.844097995545656, + "loss": 0.3862478733062744, + "loss_ce": 7.846013613743708e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.0087890625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 348261088, + "step": 6216 + }, + { + "epoch": 13.846325167037861, + "grad_norm": 14.543188095092773, + "learning_rate": 1e-06, + "loss": 0.3715, + "num_input_tokens_seen": 348316648, + "step": 6217 + }, + { + "epoch": 13.846325167037861, + "loss": 0.398532897233963, + "loss_ce": 9.540202154312283e-05, + "loss_iou": 0.171875, + "loss_num": 0.0108642578125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 348316648, + "step": 6217 + }, + { + "epoch": 13.848552338530066, + "grad_norm": 16.337297439575195, + "learning_rate": 1e-06, + "loss": 0.6434, + "num_input_tokens_seen": 348376184, + "step": 6218 + }, + { + "epoch": 13.848552338530066, + "loss": 0.41022244095802307, + "loss_ce": 0.00012721509847324342, + "loss_iou": 0.177734375, + "loss_num": 0.01080322265625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 348376184, + "step": 6218 + }, + { + "epoch": 13.85077951002227, + "grad_norm": 34.131065368652344, + "learning_rate": 1e-06, + "loss": 0.6145, + "num_input_tokens_seen": 348429572, + "step": 6219 + }, + { + "epoch": 13.85077951002227, + "loss": 0.7063025236129761, + "loss_ce": 0.0001257747644558549, + "loss_iou": 0.306640625, + "loss_num": 0.01904296875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 348429572, + "step": 6219 + }, + { + "epoch": 13.853006681514476, + "grad_norm": 17.740169525146484, + "learning_rate": 1e-06, + "loss": 0.4083, + "num_input_tokens_seen": 348487176, + "step": 6220 + }, + { + "epoch": 13.853006681514476, + "loss": 0.4807821214199066, + "loss_ce": 0.00013025107909925282, + "loss_iou": 0.2041015625, + "loss_num": 0.01458740234375, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 348487176, + "step": 6220 + }, + { + "epoch": 13.855233853006682, + "grad_norm": 25.03499984741211, + "learning_rate": 1e-06, + "loss": 0.5935, + "num_input_tokens_seen": 348542216, + "step": 6221 + }, + { + "epoch": 13.855233853006682, + "loss": 0.7307603359222412, + "loss_ce": 0.0001694800885161385, + "loss_iou": 0.30859375, + "loss_num": 0.0228271484375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 348542216, + "step": 6221 + }, + { + "epoch": 13.857461024498887, + "grad_norm": 22.183853149414062, + "learning_rate": 1e-06, + "loss": 0.4923, + "num_input_tokens_seen": 348599416, + "step": 6222 + }, + { + "epoch": 13.857461024498887, + "loss": 0.5290480852127075, + "loss_ce": 0.00011743127834051847, + "loss_iou": 0.24609375, + "loss_num": 0.0074462890625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 348599416, + "step": 6222 + }, + { + "epoch": 13.859688195991092, + "grad_norm": 16.716617584228516, + "learning_rate": 1e-06, + "loss": 0.4975, + "num_input_tokens_seen": 348657108, + "step": 6223 + }, + { + "epoch": 13.859688195991092, + "loss": 0.5516940355300903, + "loss_ce": 0.00011933541827602312, + "loss_iou": 0.2431640625, + "loss_num": 0.012939453125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 348657108, + "step": 6223 + }, + { + "epoch": 13.861915367483297, + "grad_norm": 24.67982292175293, + "learning_rate": 1e-06, + "loss": 0.4872, + "num_input_tokens_seen": 348711952, + "step": 6224 + }, + { + "epoch": 13.861915367483297, + "loss": 0.36566367745399475, + "loss_ce": 0.0001241198042407632, + "loss_iou": 0.158203125, + "loss_num": 0.009765625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 348711952, + "step": 6224 + }, + { + "epoch": 13.864142538975502, + "grad_norm": 23.012540817260742, + "learning_rate": 1e-06, + "loss": 0.4581, + "num_input_tokens_seen": 348767728, + "step": 6225 + }, + { + "epoch": 13.864142538975502, + "loss": 0.5795168280601501, + "loss_ce": 0.00011010624439222738, + "loss_iou": 0.263671875, + "loss_num": 0.0103759765625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 348767728, + "step": 6225 + }, + { + "epoch": 13.866369710467707, + "grad_norm": 26.270526885986328, + "learning_rate": 1e-06, + "loss": 0.3405, + "num_input_tokens_seen": 348821880, + "step": 6226 + }, + { + "epoch": 13.866369710467707, + "loss": 0.3175492286682129, + "loss_ce": 0.00010535772162256762, + "loss_iou": 0.1259765625, + "loss_num": 0.0130615234375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 348821880, + "step": 6226 + }, + { + "epoch": 13.868596881959911, + "grad_norm": 19.418540954589844, + "learning_rate": 1e-06, + "loss": 0.3874, + "num_input_tokens_seen": 348875740, + "step": 6227 + }, + { + "epoch": 13.868596881959911, + "loss": 0.3931885063648224, + "loss_ce": 0.00012211257126182318, + "loss_iou": 0.173828125, + "loss_num": 0.0089111328125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 348875740, + "step": 6227 + }, + { + "epoch": 13.870824053452116, + "grad_norm": 19.871246337890625, + "learning_rate": 1e-06, + "loss": 0.6113, + "num_input_tokens_seen": 348931196, + "step": 6228 + }, + { + "epoch": 13.870824053452116, + "loss": 0.4289402961730957, + "loss_ce": 0.000107303996628616, + "loss_iou": 0.1982421875, + "loss_num": 0.00634765625, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 348931196, + "step": 6228 + }, + { + "epoch": 13.873051224944321, + "grad_norm": 37.94038772583008, + "learning_rate": 1e-06, + "loss": 0.4865, + "num_input_tokens_seen": 348988736, + "step": 6229 + }, + { + "epoch": 13.873051224944321, + "loss": 0.48054730892181396, + "loss_ce": 0.00013959786156192422, + "loss_iou": 0.20703125, + "loss_num": 0.01348876953125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 348988736, + "step": 6229 + }, + { + "epoch": 13.875278396436526, + "grad_norm": 17.617862701416016, + "learning_rate": 1e-06, + "loss": 0.3775, + "num_input_tokens_seen": 349044504, + "step": 6230 + }, + { + "epoch": 13.875278396436526, + "loss": 0.3631332814693451, + "loss_ce": 9.617961768526584e-05, + "loss_iou": 0.1484375, + "loss_num": 0.01336669921875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 349044504, + "step": 6230 + }, + { + "epoch": 13.877505567928731, + "grad_norm": 19.312604904174805, + "learning_rate": 1e-06, + "loss": 0.533, + "num_input_tokens_seen": 349101952, + "step": 6231 + }, + { + "epoch": 13.877505567928731, + "loss": 0.4969325065612793, + "loss_ce": 0.00010634450882207602, + "loss_iou": 0.220703125, + "loss_num": 0.01104736328125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 349101952, + "step": 6231 + }, + { + "epoch": 13.879732739420936, + "grad_norm": 13.717912673950195, + "learning_rate": 1e-06, + "loss": 0.3243, + "num_input_tokens_seen": 349155880, + "step": 6232 + }, + { + "epoch": 13.879732739420936, + "loss": 0.3837384581565857, + "loss_ce": 0.0001325202756561339, + "loss_iou": 0.171875, + "loss_num": 0.00787353515625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 349155880, + "step": 6232 + }, + { + "epoch": 13.88195991091314, + "grad_norm": 22.239849090576172, + "learning_rate": 1e-06, + "loss": 0.4274, + "num_input_tokens_seen": 349208756, + "step": 6233 + }, + { + "epoch": 13.88195991091314, + "loss": 0.47713950276374817, + "loss_ce": 8.873137267073616e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.0081787109375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 349208756, + "step": 6233 + }, + { + "epoch": 13.884187082405345, + "grad_norm": 17.712202072143555, + "learning_rate": 1e-06, + "loss": 0.4714, + "num_input_tokens_seen": 349265684, + "step": 6234 + }, + { + "epoch": 13.884187082405345, + "loss": 0.5098845958709717, + "loss_ce": 0.00011895706120412797, + "loss_iou": 0.2265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 349265684, + "step": 6234 + }, + { + "epoch": 13.88641425389755, + "grad_norm": 20.694042205810547, + "learning_rate": 1e-06, + "loss": 0.5065, + "num_input_tokens_seen": 349323304, + "step": 6235 + }, + { + "epoch": 13.88641425389755, + "loss": 0.505508542060852, + "loss_ce": 0.00013747680350206792, + "loss_iou": 0.203125, + "loss_num": 0.0196533203125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 349323304, + "step": 6235 + }, + { + "epoch": 13.888641425389755, + "grad_norm": 17.659744262695312, + "learning_rate": 1e-06, + "loss": 0.6468, + "num_input_tokens_seen": 349378928, + "step": 6236 + }, + { + "epoch": 13.888641425389755, + "loss": 0.7527920603752136, + "loss_ce": 0.00010650817421264946, + "loss_iou": 0.326171875, + "loss_num": 0.0205078125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 349378928, + "step": 6236 + }, + { + "epoch": 13.89086859688196, + "grad_norm": 13.622602462768555, + "learning_rate": 1e-06, + "loss": 0.3588, + "num_input_tokens_seen": 349436664, + "step": 6237 + }, + { + "epoch": 13.89086859688196, + "loss": 0.42641109228134155, + "loss_ce": 0.00014156656106933951, + "loss_iou": 0.169921875, + "loss_num": 0.01708984375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 349436664, + "step": 6237 + }, + { + "epoch": 13.893095768374165, + "grad_norm": 16.968406677246094, + "learning_rate": 1e-06, + "loss": 0.4017, + "num_input_tokens_seen": 349492432, + "step": 6238 + }, + { + "epoch": 13.893095768374165, + "loss": 0.4099974036216736, + "loss_ce": 8.531904313713312e-05, + "loss_iou": 0.181640625, + "loss_num": 0.00946044921875, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 349492432, + "step": 6238 + }, + { + "epoch": 13.89532293986637, + "grad_norm": 28.478656768798828, + "learning_rate": 1e-06, + "loss": 0.4145, + "num_input_tokens_seen": 349550060, + "step": 6239 + }, + { + "epoch": 13.89532293986637, + "loss": 0.37417882680892944, + "loss_ce": 0.000277488463325426, + "loss_iou": 0.1630859375, + "loss_num": 0.0093994140625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 349550060, + "step": 6239 + }, + { + "epoch": 13.897550111358575, + "grad_norm": 23.06316375732422, + "learning_rate": 1e-06, + "loss": 0.4668, + "num_input_tokens_seen": 349603676, + "step": 6240 + }, + { + "epoch": 13.897550111358575, + "loss": 0.5238842964172363, + "loss_ce": 0.0001416070736013353, + "loss_iou": 0.2236328125, + "loss_num": 0.0152587890625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 349603676, + "step": 6240 + }, + { + "epoch": 13.89977728285078, + "grad_norm": 15.570908546447754, + "learning_rate": 1e-06, + "loss": 0.6621, + "num_input_tokens_seen": 349655680, + "step": 6241 + }, + { + "epoch": 13.89977728285078, + "loss": 0.6655749678611755, + "loss_ce": 0.00010867592936847359, + "loss_iou": 0.263671875, + "loss_num": 0.0272216796875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 349655680, + "step": 6241 + }, + { + "epoch": 13.902004454342984, + "grad_norm": 20.07220458984375, + "learning_rate": 1e-06, + "loss": 0.4852, + "num_input_tokens_seen": 349710520, + "step": 6242 + }, + { + "epoch": 13.902004454342984, + "loss": 0.48412150144577026, + "loss_ce": 0.00011269759124843404, + "loss_iou": 0.22265625, + "loss_num": 0.00787353515625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 349710520, + "step": 6242 + }, + { + "epoch": 13.90423162583519, + "grad_norm": 19.768949508666992, + "learning_rate": 1e-06, + "loss": 0.4714, + "num_input_tokens_seen": 349766024, + "step": 6243 + }, + { + "epoch": 13.90423162583519, + "loss": 0.5210692882537842, + "loss_ce": 0.0003172849537804723, + "loss_iou": 0.212890625, + "loss_num": 0.0191650390625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 349766024, + "step": 6243 + }, + { + "epoch": 13.906458797327394, + "grad_norm": 22.428667068481445, + "learning_rate": 1e-06, + "loss": 0.5109, + "num_input_tokens_seen": 349821500, + "step": 6244 + }, + { + "epoch": 13.906458797327394, + "loss": 0.5417461395263672, + "loss_ce": 0.0001201537816086784, + "loss_iou": 0.2236328125, + "loss_num": 0.0189208984375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 349821500, + "step": 6244 + }, + { + "epoch": 13.908685968819599, + "grad_norm": 14.843369483947754, + "learning_rate": 1e-06, + "loss": 0.4807, + "num_input_tokens_seen": 349877108, + "step": 6245 + }, + { + "epoch": 13.908685968819599, + "loss": 0.6390482187271118, + "loss_ce": 0.0001321482559433207, + "loss_iou": 0.263671875, + "loss_num": 0.0223388671875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 349877108, + "step": 6245 + }, + { + "epoch": 13.910913140311804, + "grad_norm": 77.71258544921875, + "learning_rate": 1e-06, + "loss": 0.5519, + "num_input_tokens_seen": 349930668, + "step": 6246 + }, + { + "epoch": 13.910913140311804, + "loss": 0.7169502377510071, + "loss_ce": 0.00015332532348111272, + "loss_iou": 0.298828125, + "loss_num": 0.024169921875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 349930668, + "step": 6246 + }, + { + "epoch": 13.913140311804009, + "grad_norm": 16.5616512298584, + "learning_rate": 1e-06, + "loss": 0.4834, + "num_input_tokens_seen": 349988348, + "step": 6247 + }, + { + "epoch": 13.913140311804009, + "loss": 0.39076149463653564, + "loss_ce": 0.00013649219181388617, + "loss_iou": 0.1640625, + "loss_num": 0.0126953125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 349988348, + "step": 6247 + }, + { + "epoch": 13.915367483296214, + "grad_norm": 20.010635375976562, + "learning_rate": 1e-06, + "loss": 0.5611, + "num_input_tokens_seen": 350046812, + "step": 6248 + }, + { + "epoch": 13.915367483296214, + "loss": 0.6575692892074585, + "loss_ce": 9.861437138170004e-05, + "loss_iou": 0.296875, + "loss_num": 0.01251220703125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 350046812, + "step": 6248 + }, + { + "epoch": 13.917594654788418, + "grad_norm": 15.879632949829102, + "learning_rate": 1e-06, + "loss": 0.4344, + "num_input_tokens_seen": 350104192, + "step": 6249 + }, + { + "epoch": 13.917594654788418, + "loss": 0.38620513677597046, + "loss_ce": 9.672513988334686e-05, + "loss_iou": 0.14453125, + "loss_num": 0.0194091796875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 350104192, + "step": 6249 + }, + { + "epoch": 13.919821826280623, + "grad_norm": 13.887146949768066, + "learning_rate": 1e-06, + "loss": 0.4196, + "num_input_tokens_seen": 350161060, + "step": 6250 + }, + { + "epoch": 13.919821826280623, + "eval_seeclick_web_CIoU": 0.5887904167175293, + "eval_seeclick_web_GIoU": 0.5874381363391876, + "eval_seeclick_web_IoU": 0.6068101227283478, + "eval_seeclick_web_MAE_all": 0.015451115556061268, + "eval_seeclick_web_MAE_h": 0.007658603135496378, + "eval_seeclick_web_MAE_w": 0.015746516175568104, + "eval_seeclick_web_MAE_x_boxes": 0.009071735199540854, + "eval_seeclick_web_MAE_y_boxes": 0.02139524114318192, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.8951788544654846, + "eval_seeclick_web_loss_ce": 0.00017347045650240034, + "eval_seeclick_web_loss_iou": 0.41064453125, + "eval_seeclick_web_loss_num": 0.01229095458984375, + "eval_seeclick_web_loss_xval": 0.88232421875, + "eval_seeclick_web_runtime": 21.1304, + "eval_seeclick_web_samples_per_second": 2.366, + "eval_seeclick_web_steps_per_second": 0.095, + "num_input_tokens_seen": 350161060, + "step": 6250 + }, + { + "epoch": 13.919821826280623, + "eval_icons_CIoU": 0.2605983316898346, + "eval_icons_GIoU": 0.29324978590011597, + "eval_icons_IoU": 0.3456629067659378, + "eval_icons_MAE_all": 0.06302645802497864, + "eval_icons_MAE_h": 0.033296503126621246, + "eval_icons_MAE_w": 0.07835287041962147, + "eval_icons_MAE_x_boxes": 0.05176934972405434, + "eval_icons_MAE_y_boxes": 0.03694954700767994, + "eval_icons_inside_bbox": 0.6059027910232544, + "eval_icons_loss": 1.7142837047576904, + "eval_icons_loss_ce": 0.00022611367603531107, + "eval_icons_loss_iou": 0.67041015625, + "eval_icons_loss_num": 0.06027984619140625, + "eval_icons_loss_xval": 1.642822265625, + "eval_icons_runtime": 19.5905, + "eval_icons_samples_per_second": 2.552, + "eval_icons_steps_per_second": 0.102, + "num_input_tokens_seen": 350161060, + "step": 6250 + }, + { + "epoch": 13.919821826280623, + "eval_screenspot_CIoU": 0.38164886832237244, + "eval_screenspot_GIoU": 0.39806386828422546, + "eval_screenspot_IoU": 0.4498288830121358, + "eval_screenspot_MAE_all": 0.05408271153767904, + "eval_screenspot_MAE_h": 0.03916273762782415, + "eval_screenspot_MAE_w": 0.056641269475221634, + "eval_screenspot_MAE_x_boxes": 0.06717804819345474, + "eval_screenspot_MAE_y_boxes": 0.036980644799768925, + "eval_screenspot_inside_bbox": 0.7070833245913187, + "eval_screenspot_loss": 1.5276081562042236, + "eval_screenspot_loss_ce": 0.00023197308473754674, + "eval_screenspot_loss_iou": 0.6381022135416666, + "eval_screenspot_loss_num": 0.061542510986328125, + "eval_screenspot_loss_xval": 1.5841471354166667, + "eval_screenspot_runtime": 34.0345, + "eval_screenspot_samples_per_second": 2.615, + "eval_screenspot_steps_per_second": 0.088, + "num_input_tokens_seen": 350161060, + "step": 6250 + }, + { + "epoch": 13.919821826280623, + "eval_compot_CIoU": 0.3389701098203659, + "eval_compot_GIoU": 0.3561352342367172, + "eval_compot_IoU": 0.39824149012565613, + "eval_compot_MAE_all": 0.019840517081320286, + "eval_compot_MAE_h": 0.013703062664717436, + "eval_compot_MAE_w": 0.021303851157426834, + "eval_compot_MAE_x_boxes": 0.030402760952711105, + "eval_compot_MAE_y_boxes": 0.006665045628324151, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.4226690530776978, + "eval_compot_loss_ce": 0.0001683719819993712, + "eval_compot_loss_iou": 0.655029296875, + "eval_compot_loss_num": 0.018640518188476562, + "eval_compot_loss_xval": 1.403564453125, + "eval_compot_runtime": 20.5568, + "eval_compot_samples_per_second": 2.432, + "eval_compot_steps_per_second": 0.097, + "num_input_tokens_seen": 350161060, + "step": 6250 + }, + { + "epoch": 13.919821826280623, + "eval_custom_ui_val_CIoU": 0.4785856149262852, + "eval_custom_ui_val_GIoU": 0.4871194263299306, + "eval_custom_ui_val_IoU": 0.5391448040803274, + "eval_custom_ui_val_MAE_all": 0.027749659959226847, + "eval_custom_ui_val_MAE_h": 0.015134843920047084, + "eval_custom_ui_val_MAE_w": 0.03520229996906386, + "eval_custom_ui_val_MAE_x_boxes": 0.03391206937117709, + "eval_custom_ui_val_MAE_y_boxes": 0.01371948312347134, + "eval_custom_ui_val_inside_bbox": 0.7650462985038757, + "eval_custom_ui_val_loss": 1.1787360906600952, + "eval_custom_ui_val_loss_ce": 0.00019364922385041913, + "eval_custom_ui_val_loss_iou": 0.5048014322916666, + "eval_custom_ui_val_loss_num": 0.024472342597113714, + "eval_custom_ui_val_loss_xval": 1.1316189236111112, + "eval_custom_ui_val_runtime": 61.6566, + "eval_custom_ui_val_samples_per_second": 4.298, + "eval_custom_ui_val_steps_per_second": 0.146, + "num_input_tokens_seen": 350161060, + "step": 6250 + }, + { + "epoch": 13.919821826280623, + "loss": 0.8202172517776489, + "loss_ce": 0.00014893364277668297, + "loss_iou": 0.369140625, + "loss_num": 0.016357421875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 350161060, + "step": 6250 + }, + { + "epoch": 13.922048997772828, + "grad_norm": 18.40821647644043, + "learning_rate": 1e-06, + "loss": 0.5081, + "num_input_tokens_seen": 350218528, + "step": 6251 + }, + { + "epoch": 13.922048997772828, + "loss": 0.4476110637187958, + "loss_ce": 0.00010129214206244797, + "loss_iou": 0.19140625, + "loss_num": 0.01275634765625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 350218528, + "step": 6251 + }, + { + "epoch": 13.924276169265033, + "grad_norm": 28.36144256591797, + "learning_rate": 1e-06, + "loss": 0.4074, + "num_input_tokens_seen": 350276052, + "step": 6252 + }, + { + "epoch": 13.924276169265033, + "loss": 0.3698493242263794, + "loss_ce": 9.837304969551042e-05, + "loss_iou": 0.171875, + "loss_num": 0.005462646484375, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 350276052, + "step": 6252 + }, + { + "epoch": 13.926503340757238, + "grad_norm": 21.219388961791992, + "learning_rate": 1e-06, + "loss": 0.4852, + "num_input_tokens_seen": 350333528, + "step": 6253 + }, + { + "epoch": 13.926503340757238, + "loss": 0.43346133828163147, + "loss_ce": 0.00011172999802511185, + "loss_iou": 0.197265625, + "loss_num": 0.00775146484375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 350333528, + "step": 6253 + }, + { + "epoch": 13.928730512249443, + "grad_norm": 12.669755935668945, + "learning_rate": 1e-06, + "loss": 0.3754, + "num_input_tokens_seen": 350391076, + "step": 6254 + }, + { + "epoch": 13.928730512249443, + "loss": 0.30187392234802246, + "loss_ce": 0.00011611805530264974, + "loss_iou": 0.140625, + "loss_num": 0.0042724609375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 350391076, + "step": 6254 + }, + { + "epoch": 13.930957683741648, + "grad_norm": 13.667628288269043, + "learning_rate": 1e-06, + "loss": 0.2749, + "num_input_tokens_seen": 350447892, + "step": 6255 + }, + { + "epoch": 13.930957683741648, + "loss": 0.21279628574848175, + "loss_ce": 8.877179789124057e-05, + "loss_iou": 0.0859375, + "loss_num": 0.00823974609375, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 350447892, + "step": 6255 + }, + { + "epoch": 13.933184855233852, + "grad_norm": 28.72197723388672, + "learning_rate": 1e-06, + "loss": 0.47, + "num_input_tokens_seen": 350503796, + "step": 6256 + }, + { + "epoch": 13.933184855233852, + "loss": 0.46128642559051514, + "loss_ce": 0.00010479921184014529, + "loss_iou": 0.212890625, + "loss_num": 0.006866455078125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 350503796, + "step": 6256 + }, + { + "epoch": 13.935412026726057, + "grad_norm": 21.46579933166504, + "learning_rate": 1e-06, + "loss": 0.5377, + "num_input_tokens_seen": 350558404, + "step": 6257 + }, + { + "epoch": 13.935412026726057, + "loss": 0.32118260860443115, + "loss_ce": 0.00010715379903558642, + "loss_iou": 0.1357421875, + "loss_num": 0.010009765625, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 350558404, + "step": 6257 + }, + { + "epoch": 13.937639198218262, + "grad_norm": 21.574478149414062, + "learning_rate": 1e-06, + "loss": 0.5635, + "num_input_tokens_seen": 350617276, + "step": 6258 + }, + { + "epoch": 13.937639198218262, + "loss": 0.4228344261646271, + "loss_ce": 0.00010491947614355013, + "loss_iou": 0.1875, + "loss_num": 0.00958251953125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 350617276, + "step": 6258 + }, + { + "epoch": 13.939866369710467, + "grad_norm": 23.370849609375, + "learning_rate": 1e-06, + "loss": 0.392, + "num_input_tokens_seen": 350674816, + "step": 6259 + }, + { + "epoch": 13.939866369710467, + "loss": 0.5362622737884521, + "loss_ce": 0.0001294276735279709, + "loss_iou": 0.2392578125, + "loss_num": 0.0115966796875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 350674816, + "step": 6259 + }, + { + "epoch": 13.942093541202672, + "grad_norm": 27.103878021240234, + "learning_rate": 1e-06, + "loss": 0.4583, + "num_input_tokens_seen": 350729952, + "step": 6260 + }, + { + "epoch": 13.942093541202672, + "loss": 0.41460520029067993, + "loss_ce": 0.00011545630695763975, + "loss_iou": 0.1884765625, + "loss_num": 0.00750732421875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 350729952, + "step": 6260 + }, + { + "epoch": 13.944320712694877, + "grad_norm": 17.797826766967773, + "learning_rate": 1e-06, + "loss": 0.3609, + "num_input_tokens_seen": 350784792, + "step": 6261 + }, + { + "epoch": 13.944320712694877, + "loss": 0.33117377758026123, + "loss_ce": 0.00011909526074305177, + "loss_iou": 0.1552734375, + "loss_num": 0.00408935546875, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 350784792, + "step": 6261 + }, + { + "epoch": 13.946547884187082, + "grad_norm": 19.662729263305664, + "learning_rate": 1e-06, + "loss": 0.5463, + "num_input_tokens_seen": 350838324, + "step": 6262 + }, + { + "epoch": 13.946547884187082, + "loss": 0.5496599674224854, + "loss_ce": 9.940941527020186e-05, + "loss_iou": 0.2421875, + "loss_num": 0.01318359375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 350838324, + "step": 6262 + }, + { + "epoch": 13.948775055679288, + "grad_norm": 15.954992294311523, + "learning_rate": 1e-06, + "loss": 0.3793, + "num_input_tokens_seen": 350897484, + "step": 6263 + }, + { + "epoch": 13.948775055679288, + "loss": 0.46910548210144043, + "loss_ce": 0.00011134070518892258, + "loss_iou": 0.212890625, + "loss_num": 0.00860595703125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 350897484, + "step": 6263 + }, + { + "epoch": 13.951002227171493, + "grad_norm": 28.188735961914062, + "learning_rate": 1e-06, + "loss": 0.606, + "num_input_tokens_seen": 350953472, + "step": 6264 + }, + { + "epoch": 13.951002227171493, + "loss": 0.5297653079032898, + "loss_ce": 0.00010221092088613659, + "loss_iou": 0.203125, + "loss_num": 0.0247802734375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 350953472, + "step": 6264 + }, + { + "epoch": 13.953229398663698, + "grad_norm": 18.34393310546875, + "learning_rate": 1e-06, + "loss": 0.4006, + "num_input_tokens_seen": 351008120, + "step": 6265 + }, + { + "epoch": 13.953229398663698, + "loss": 0.5186876058578491, + "loss_ce": 0.00013288386981002986, + "loss_iou": 0.224609375, + "loss_num": 0.013671875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 351008120, + "step": 6265 + }, + { + "epoch": 13.955456570155903, + "grad_norm": 19.615419387817383, + "learning_rate": 1e-06, + "loss": 0.5483, + "num_input_tokens_seen": 351064248, + "step": 6266 + }, + { + "epoch": 13.955456570155903, + "loss": 0.7774511575698853, + "loss_ce": 0.00010741624282673001, + "loss_iou": 0.328125, + "loss_num": 0.0242919921875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 351064248, + "step": 6266 + }, + { + "epoch": 13.957683741648108, + "grad_norm": 25.06759262084961, + "learning_rate": 1e-06, + "loss": 0.5834, + "num_input_tokens_seen": 351117008, + "step": 6267 + }, + { + "epoch": 13.957683741648108, + "loss": 0.4678989052772522, + "loss_ce": 0.00012549315579235554, + "loss_iou": 0.20703125, + "loss_num": 0.0108642578125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 351117008, + "step": 6267 + }, + { + "epoch": 13.959910913140313, + "grad_norm": 14.953537940979004, + "learning_rate": 1e-06, + "loss": 0.3766, + "num_input_tokens_seen": 351171192, + "step": 6268 + }, + { + "epoch": 13.959910913140313, + "loss": 0.3148655891418457, + "loss_ce": 0.00010729426867328584, + "loss_iou": 0.140625, + "loss_num": 0.006866455078125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 351171192, + "step": 6268 + }, + { + "epoch": 13.962138084632517, + "grad_norm": 17.59319305419922, + "learning_rate": 1e-06, + "loss": 0.4396, + "num_input_tokens_seen": 351224944, + "step": 6269 + }, + { + "epoch": 13.962138084632517, + "loss": 0.5811617374420166, + "loss_ce": 0.00016810203669592738, + "loss_iou": 0.267578125, + "loss_num": 0.00897216796875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 351224944, + "step": 6269 + }, + { + "epoch": 13.964365256124722, + "grad_norm": 12.526680946350098, + "learning_rate": 1e-06, + "loss": 0.4156, + "num_input_tokens_seen": 351278692, + "step": 6270 + }, + { + "epoch": 13.964365256124722, + "loss": 0.4158197343349457, + "loss_ce": 0.0001092795209842734, + "loss_iou": 0.177734375, + "loss_num": 0.01190185546875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 351278692, + "step": 6270 + }, + { + "epoch": 13.966592427616927, + "grad_norm": 15.219315528869629, + "learning_rate": 1e-06, + "loss": 0.4471, + "num_input_tokens_seen": 351330732, + "step": 6271 + }, + { + "epoch": 13.966592427616927, + "loss": 0.4622631371021271, + "loss_ce": 0.0001049246930051595, + "loss_iou": 0.2119140625, + "loss_num": 0.0078125, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 351330732, + "step": 6271 + }, + { + "epoch": 13.968819599109132, + "grad_norm": 16.425418853759766, + "learning_rate": 1e-06, + "loss": 0.6101, + "num_input_tokens_seen": 351389508, + "step": 6272 + }, + { + "epoch": 13.968819599109132, + "loss": 0.4642532467842102, + "loss_ce": 0.0001418821921106428, + "loss_iou": 0.1708984375, + "loss_num": 0.0245361328125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 351389508, + "step": 6272 + }, + { + "epoch": 13.971046770601337, + "grad_norm": 15.943306922912598, + "learning_rate": 1e-06, + "loss": 0.5201, + "num_input_tokens_seen": 351447368, + "step": 6273 + }, + { + "epoch": 13.971046770601337, + "loss": 0.5003509521484375, + "loss_ce": 0.00010678636317607015, + "loss_iou": 0.2177734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.5, + "num_input_tokens_seen": 351447368, + "step": 6273 + }, + { + "epoch": 13.973273942093542, + "grad_norm": 21.748048782348633, + "learning_rate": 1e-06, + "loss": 0.4717, + "num_input_tokens_seen": 351506568, + "step": 6274 + }, + { + "epoch": 13.973273942093542, + "loss": 0.5563892126083374, + "loss_ce": 0.00011485861614346504, + "loss_iou": 0.228515625, + "loss_num": 0.0198974609375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 351506568, + "step": 6274 + }, + { + "epoch": 13.975501113585747, + "grad_norm": 23.033185958862305, + "learning_rate": 1e-06, + "loss": 0.4948, + "num_input_tokens_seen": 351562164, + "step": 6275 + }, + { + "epoch": 13.975501113585747, + "loss": 0.662088930606842, + "loss_ce": 0.00010161636600969359, + "loss_iou": 0.294921875, + "loss_num": 0.014404296875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 351562164, + "step": 6275 + }, + { + "epoch": 13.977728285077951, + "grad_norm": 14.771820068359375, + "learning_rate": 1e-06, + "loss": 0.3768, + "num_input_tokens_seen": 351616064, + "step": 6276 + }, + { + "epoch": 13.977728285077951, + "loss": 0.5282983779907227, + "loss_ce": 0.00022217544028535485, + "loss_iou": 0.2353515625, + "loss_num": 0.01153564453125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 351616064, + "step": 6276 + }, + { + "epoch": 13.979955456570156, + "grad_norm": 15.470437049865723, + "learning_rate": 1e-06, + "loss": 0.4431, + "num_input_tokens_seen": 351671208, + "step": 6277 + }, + { + "epoch": 13.979955456570156, + "loss": 0.4118441939353943, + "loss_ce": 0.00010104169632541016, + "loss_iou": 0.1865234375, + "loss_num": 0.0076904296875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 351671208, + "step": 6277 + }, + { + "epoch": 13.982182628062361, + "grad_norm": 19.21230697631836, + "learning_rate": 1e-06, + "loss": 0.4367, + "num_input_tokens_seen": 351725684, + "step": 6278 + }, + { + "epoch": 13.982182628062361, + "loss": 0.27793729305267334, + "loss_ce": 0.00010527193080633879, + "loss_iou": 0.11669921875, + "loss_num": 0.0089111328125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 351725684, + "step": 6278 + }, + { + "epoch": 13.984409799554566, + "grad_norm": 21.042078018188477, + "learning_rate": 1e-06, + "loss": 0.3497, + "num_input_tokens_seen": 351782280, + "step": 6279 + }, + { + "epoch": 13.984409799554566, + "loss": 0.2832984924316406, + "loss_ce": 9.53472190303728e-05, + "loss_iou": 0.12451171875, + "loss_num": 0.00677490234375, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 351782280, + "step": 6279 + }, + { + "epoch": 13.98663697104677, + "grad_norm": 19.898780822753906, + "learning_rate": 1e-06, + "loss": 0.6716, + "num_input_tokens_seen": 351838828, + "step": 6280 + }, + { + "epoch": 13.98663697104677, + "loss": 0.9190574288368225, + "loss_ce": 0.00011211737000849098, + "loss_iou": 0.36328125, + "loss_num": 0.038818359375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 351838828, + "step": 6280 + }, + { + "epoch": 13.988864142538976, + "grad_norm": 20.307798385620117, + "learning_rate": 1e-06, + "loss": 0.4632, + "num_input_tokens_seen": 351894124, + "step": 6281 + }, + { + "epoch": 13.988864142538976, + "loss": 0.3406025767326355, + "loss_ce": 0.00014847918646410108, + "loss_iou": 0.15234375, + "loss_num": 0.006988525390625, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 351894124, + "step": 6281 + }, + { + "epoch": 13.99109131403118, + "grad_norm": 16.50027084350586, + "learning_rate": 1e-06, + "loss": 0.4563, + "num_input_tokens_seen": 351949824, + "step": 6282 + }, + { + "epoch": 13.99109131403118, + "loss": 0.38493967056274414, + "loss_ce": 0.0001129888987634331, + "loss_iou": 0.1806640625, + "loss_num": 0.004547119140625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 351949824, + "step": 6282 + }, + { + "epoch": 13.993318485523385, + "grad_norm": 30.10491180419922, + "learning_rate": 1e-06, + "loss": 0.5799, + "num_input_tokens_seen": 352004840, + "step": 6283 + }, + { + "epoch": 13.993318485523385, + "loss": 0.3842771351337433, + "loss_ce": 0.00012185641389805824, + "loss_iou": 0.16796875, + "loss_num": 0.00982666015625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 352004840, + "step": 6283 + }, + { + "epoch": 13.99554565701559, + "grad_norm": 26.573183059692383, + "learning_rate": 1e-06, + "loss": 0.6592, + "num_input_tokens_seen": 352060908, + "step": 6284 + }, + { + "epoch": 13.99554565701559, + "loss": 0.8157883882522583, + "loss_ce": 0.00011452929174993187, + "loss_iou": 0.33203125, + "loss_num": 0.030029296875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 352060908, + "step": 6284 + }, + { + "epoch": 13.997772828507795, + "grad_norm": 21.5324649810791, + "learning_rate": 1e-06, + "loss": 0.5067, + "num_input_tokens_seen": 352115772, + "step": 6285 + }, + { + "epoch": 13.997772828507795, + "loss": 0.6693138480186462, + "loss_ce": 0.00012438424164429307, + "loss_iou": 0.271484375, + "loss_num": 0.0252685546875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 352115772, + "step": 6285 + }, + { + "epoch": 14.0, + "grad_norm": 16.17325210571289, + "learning_rate": 1e-06, + "loss": 0.3972, + "num_input_tokens_seen": 352173692, + "step": 6286 + }, + { + "epoch": 14.0, + "loss": 0.43904364109039307, + "loss_ce": 0.00010934725287370384, + "loss_iou": 0.1845703125, + "loss_num": 0.01409912109375, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 352173692, + "step": 6286 + }, + { + "epoch": 14.002227171492205, + "grad_norm": 22.501575469970703, + "learning_rate": 1e-06, + "loss": 0.4806, + "num_input_tokens_seen": 352229596, + "step": 6287 + }, + { + "epoch": 14.002227171492205, + "loss": 0.4168241024017334, + "loss_ce": 0.00010654539801180363, + "loss_iou": 0.1845703125, + "loss_num": 0.00946044921875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 352229596, + "step": 6287 + }, + { + "epoch": 14.00445434298441, + "grad_norm": 19.57029151916504, + "learning_rate": 1e-06, + "loss": 0.5103, + "num_input_tokens_seen": 352285008, + "step": 6288 + }, + { + "epoch": 14.00445434298441, + "loss": 0.5710501670837402, + "loss_ce": 0.0008597684209235013, + "loss_iou": 0.2470703125, + "loss_num": 0.01519775390625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 352285008, + "step": 6288 + }, + { + "epoch": 14.006681514476615, + "grad_norm": 13.75488567352295, + "learning_rate": 1e-06, + "loss": 0.3481, + "num_input_tokens_seen": 352342884, + "step": 6289 + }, + { + "epoch": 14.006681514476615, + "loss": 0.4669305682182312, + "loss_ce": 0.0001337063149549067, + "loss_iou": 0.2158203125, + "loss_num": 0.0069580078125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 352342884, + "step": 6289 + }, + { + "epoch": 14.00890868596882, + "grad_norm": 19.051847457885742, + "learning_rate": 1e-06, + "loss": 0.4405, + "num_input_tokens_seen": 352397744, + "step": 6290 + }, + { + "epoch": 14.00890868596882, + "loss": 0.5650573968887329, + "loss_ce": 0.00011596856347750872, + "loss_iou": 0.255859375, + "loss_num": 0.01068115234375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 352397744, + "step": 6290 + }, + { + "epoch": 14.011135857461024, + "grad_norm": 15.41951847076416, + "learning_rate": 1e-06, + "loss": 0.4502, + "num_input_tokens_seen": 352454612, + "step": 6291 + }, + { + "epoch": 14.011135857461024, + "loss": 0.5639560222625732, + "loss_ce": 0.00011326879757689312, + "loss_iou": 0.2431640625, + "loss_num": 0.0155029296875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 352454612, + "step": 6291 + }, + { + "epoch": 14.01336302895323, + "grad_norm": 12.051553726196289, + "learning_rate": 1e-06, + "loss": 0.4497, + "num_input_tokens_seen": 352511128, + "step": 6292 + }, + { + "epoch": 14.01336302895323, + "loss": 0.2919390797615051, + "loss_ce": 0.00013000219769310206, + "loss_iou": 0.12060546875, + "loss_num": 0.0101318359375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 352511128, + "step": 6292 + }, + { + "epoch": 14.015590200445434, + "grad_norm": 22.342031478881836, + "learning_rate": 1e-06, + "loss": 0.3669, + "num_input_tokens_seen": 352567572, + "step": 6293 + }, + { + "epoch": 14.015590200445434, + "loss": 0.4516177773475647, + "loss_ce": 0.00014069155440665781, + "loss_iou": 0.1962890625, + "loss_num": 0.01165771484375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 352567572, + "step": 6293 + }, + { + "epoch": 14.017817371937639, + "grad_norm": 21.999927520751953, + "learning_rate": 1e-06, + "loss": 0.495, + "num_input_tokens_seen": 352623720, + "step": 6294 + }, + { + "epoch": 14.017817371937639, + "loss": 0.3385887145996094, + "loss_ce": 0.00010300398571416736, + "loss_iou": 0.15625, + "loss_num": 0.005340576171875, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 352623720, + "step": 6294 + }, + { + "epoch": 14.020044543429844, + "grad_norm": 18.128568649291992, + "learning_rate": 1e-06, + "loss": 0.5133, + "num_input_tokens_seen": 352679200, + "step": 6295 + }, + { + "epoch": 14.020044543429844, + "loss": 0.4763103127479553, + "loss_ce": 0.0001140405802289024, + "loss_iou": 0.216796875, + "loss_num": 0.00848388671875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 352679200, + "step": 6295 + }, + { + "epoch": 14.022271714922049, + "grad_norm": 22.718978881835938, + "learning_rate": 1e-06, + "loss": 0.4883, + "num_input_tokens_seen": 352737092, + "step": 6296 + }, + { + "epoch": 14.022271714922049, + "loss": 0.6851996183395386, + "loss_ce": 0.00014106131857261062, + "loss_iou": 0.30859375, + "loss_num": 0.013427734375, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 352737092, + "step": 6296 + }, + { + "epoch": 14.024498886414253, + "grad_norm": 23.775402069091797, + "learning_rate": 1e-06, + "loss": 0.5388, + "num_input_tokens_seen": 352791712, + "step": 6297 + }, + { + "epoch": 14.024498886414253, + "loss": 0.728790819644928, + "loss_ce": 9.21202008612454e-05, + "loss_iou": 0.3203125, + "loss_num": 0.017578125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 352791712, + "step": 6297 + }, + { + "epoch": 14.026726057906458, + "grad_norm": 27.953927993774414, + "learning_rate": 1e-06, + "loss": 0.4414, + "num_input_tokens_seen": 352846396, + "step": 6298 + }, + { + "epoch": 14.026726057906458, + "loss": 0.41728514432907104, + "loss_ce": 0.00010984927939716727, + "loss_iou": 0.1845703125, + "loss_num": 0.0096435546875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 352846396, + "step": 6298 + }, + { + "epoch": 14.028953229398663, + "grad_norm": 14.124860763549805, + "learning_rate": 1e-06, + "loss": 0.573, + "num_input_tokens_seen": 352902036, + "step": 6299 + }, + { + "epoch": 14.028953229398663, + "loss": 0.3383561968803406, + "loss_ce": 9.933360706781968e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.0040283203125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 352902036, + "step": 6299 + }, + { + "epoch": 14.031180400890868, + "grad_norm": 22.924598693847656, + "learning_rate": 1e-06, + "loss": 0.4167, + "num_input_tokens_seen": 352956388, + "step": 6300 + }, + { + "epoch": 14.031180400890868, + "loss": 0.41771095991134644, + "loss_ce": 0.00010839892638614401, + "loss_iou": 0.1943359375, + "loss_num": 0.0059814453125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 352956388, + "step": 6300 + }, + { + "epoch": 14.033407572383073, + "grad_norm": 16.806791305541992, + "learning_rate": 1e-06, + "loss": 0.4694, + "num_input_tokens_seen": 353012648, + "step": 6301 + }, + { + "epoch": 14.033407572383073, + "loss": 0.6352797150611877, + "loss_ce": 0.0001478988560847938, + "loss_iou": 0.259765625, + "loss_num": 0.0230712890625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 353012648, + "step": 6301 + }, + { + "epoch": 14.035634743875278, + "grad_norm": 16.72489356994629, + "learning_rate": 1e-06, + "loss": 0.4935, + "num_input_tokens_seen": 353071536, + "step": 6302 + }, + { + "epoch": 14.035634743875278, + "loss": 0.5169607996940613, + "loss_ce": 0.0001150843090726994, + "loss_iou": 0.205078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 353071536, + "step": 6302 + }, + { + "epoch": 14.037861915367483, + "grad_norm": 19.627099990844727, + "learning_rate": 1e-06, + "loss": 0.4313, + "num_input_tokens_seen": 353128896, + "step": 6303 + }, + { + "epoch": 14.037861915367483, + "loss": 0.3792652487754822, + "loss_ce": 0.00011487019946798682, + "loss_iou": 0.1728515625, + "loss_num": 0.006805419921875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 353128896, + "step": 6303 + }, + { + "epoch": 14.040089086859687, + "grad_norm": 18.96303367614746, + "learning_rate": 1e-06, + "loss": 0.5243, + "num_input_tokens_seen": 353185372, + "step": 6304 + }, + { + "epoch": 14.040089086859687, + "loss": 0.5035980939865112, + "loss_ce": 0.00011909975728485733, + "loss_iou": 0.21875, + "loss_num": 0.01324462890625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 353185372, + "step": 6304 + }, + { + "epoch": 14.042316258351892, + "grad_norm": 24.597492218017578, + "learning_rate": 1e-06, + "loss": 0.5827, + "num_input_tokens_seen": 353240952, + "step": 6305 + }, + { + "epoch": 14.042316258351892, + "loss": 0.3702905774116516, + "loss_ce": 0.0002954796073026955, + "loss_iou": 0.1630859375, + "loss_num": 0.0086669921875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 353240952, + "step": 6305 + }, + { + "epoch": 14.044543429844097, + "grad_norm": 25.436721801757812, + "learning_rate": 1e-06, + "loss": 0.6157, + "num_input_tokens_seen": 353297520, + "step": 6306 + }, + { + "epoch": 14.044543429844097, + "loss": 0.5884321928024292, + "loss_ce": 0.0001753760880092159, + "loss_iou": 0.248046875, + "loss_num": 0.0185546875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 353297520, + "step": 6306 + }, + { + "epoch": 14.046770601336302, + "grad_norm": 18.7614688873291, + "learning_rate": 1e-06, + "loss": 0.5636, + "num_input_tokens_seen": 353352068, + "step": 6307 + }, + { + "epoch": 14.046770601336302, + "loss": 0.7366272807121277, + "loss_ce": 0.00011604969040490687, + "loss_iou": 0.28125, + "loss_num": 0.034912109375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 353352068, + "step": 6307 + }, + { + "epoch": 14.048997772828507, + "grad_norm": 15.662527084350586, + "learning_rate": 1e-06, + "loss": 0.4108, + "num_input_tokens_seen": 353409180, + "step": 6308 + }, + { + "epoch": 14.048997772828507, + "loss": 0.4494924545288086, + "loss_ce": 9.055372356669977e-05, + "loss_iou": 0.201171875, + "loss_num": 0.00946044921875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 353409180, + "step": 6308 + }, + { + "epoch": 14.051224944320714, + "grad_norm": 16.739437103271484, + "learning_rate": 1e-06, + "loss": 0.3383, + "num_input_tokens_seen": 353465760, + "step": 6309 + }, + { + "epoch": 14.051224944320714, + "loss": 0.4493185877799988, + "loss_ce": 9.985115320887417e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.01446533203125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 353465760, + "step": 6309 + }, + { + "epoch": 14.053452115812918, + "grad_norm": 15.760394096374512, + "learning_rate": 1e-06, + "loss": 0.3149, + "num_input_tokens_seen": 353519740, + "step": 6310 + }, + { + "epoch": 14.053452115812918, + "loss": 0.28842777013778687, + "loss_ce": 9.770273754838854e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.006500244140625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 353519740, + "step": 6310 + }, + { + "epoch": 14.055679287305123, + "grad_norm": 22.58936882019043, + "learning_rate": 1e-06, + "loss": 0.4747, + "num_input_tokens_seen": 353575876, + "step": 6311 + }, + { + "epoch": 14.055679287305123, + "loss": 0.4683777093887329, + "loss_ce": 0.00011599110439419746, + "loss_iou": 0.220703125, + "loss_num": 0.00555419921875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 353575876, + "step": 6311 + }, + { + "epoch": 14.057906458797328, + "grad_norm": 15.249661445617676, + "learning_rate": 1e-06, + "loss": 0.4432, + "num_input_tokens_seen": 353631208, + "step": 6312 + }, + { + "epoch": 14.057906458797328, + "loss": 0.462121844291687, + "loss_ce": 8.568944758735597e-05, + "loss_iou": 0.205078125, + "loss_num": 0.01025390625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 353631208, + "step": 6312 + }, + { + "epoch": 14.060133630289533, + "grad_norm": 14.119874954223633, + "learning_rate": 1e-06, + "loss": 0.365, + "num_input_tokens_seen": 353687380, + "step": 6313 + }, + { + "epoch": 14.060133630289533, + "loss": 0.3032883107662201, + "loss_ce": 9.618209878681228e-05, + "loss_iou": 0.130859375, + "loss_num": 0.00836181640625, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 353687380, + "step": 6313 + }, + { + "epoch": 14.062360801781738, + "grad_norm": 21.891582489013672, + "learning_rate": 1e-06, + "loss": 0.7123, + "num_input_tokens_seen": 353742548, + "step": 6314 + }, + { + "epoch": 14.062360801781738, + "loss": 0.7177752256393433, + "loss_ce": 0.00012388010509312153, + "loss_iou": 0.318359375, + "loss_num": 0.0162353515625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 353742548, + "step": 6314 + }, + { + "epoch": 14.064587973273943, + "grad_norm": 18.22274398803711, + "learning_rate": 1e-06, + "loss": 0.4084, + "num_input_tokens_seen": 353796396, + "step": 6315 + }, + { + "epoch": 14.064587973273943, + "loss": 0.4653054475784302, + "loss_ce": 9.54567440203391e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.010498046875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 353796396, + "step": 6315 + }, + { + "epoch": 14.066815144766148, + "grad_norm": 14.07210922241211, + "learning_rate": 1e-06, + "loss": 0.3755, + "num_input_tokens_seen": 353852356, + "step": 6316 + }, + { + "epoch": 14.066815144766148, + "loss": 0.3321226239204407, + "loss_ce": 9.135504660662264e-05, + "loss_iou": 0.142578125, + "loss_num": 0.00958251953125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 353852356, + "step": 6316 + }, + { + "epoch": 14.069042316258352, + "grad_norm": 12.254171371459961, + "learning_rate": 1e-06, + "loss": 0.3489, + "num_input_tokens_seen": 353908400, + "step": 6317 + }, + { + "epoch": 14.069042316258352, + "loss": 0.37034958600997925, + "loss_ce": 0.00011033388000214472, + "loss_iou": 0.171875, + "loss_num": 0.00543212890625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 353908400, + "step": 6317 + }, + { + "epoch": 14.071269487750557, + "grad_norm": 19.02251625061035, + "learning_rate": 1e-06, + "loss": 0.3671, + "num_input_tokens_seen": 353963924, + "step": 6318 + }, + { + "epoch": 14.071269487750557, + "loss": 0.32162177562713623, + "loss_ce": 8.858899673214182e-05, + "loss_iou": 0.138671875, + "loss_num": 0.0087890625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 353963924, + "step": 6318 + }, + { + "epoch": 14.073496659242762, + "grad_norm": 21.90892791748047, + "learning_rate": 1e-06, + "loss": 0.5865, + "num_input_tokens_seen": 354021696, + "step": 6319 + }, + { + "epoch": 14.073496659242762, + "loss": 0.7010501623153687, + "loss_ce": 0.00012240943033248186, + "loss_iou": 0.283203125, + "loss_num": 0.027099609375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 354021696, + "step": 6319 + }, + { + "epoch": 14.075723830734967, + "grad_norm": 22.8824520111084, + "learning_rate": 1e-06, + "loss": 0.3748, + "num_input_tokens_seen": 354076836, + "step": 6320 + }, + { + "epoch": 14.075723830734967, + "loss": 0.43087825179100037, + "loss_ce": 9.210931602865458e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.015380859375, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 354076836, + "step": 6320 + }, + { + "epoch": 14.077951002227172, + "grad_norm": 18.361591339111328, + "learning_rate": 1e-06, + "loss": 0.6328, + "num_input_tokens_seen": 354131804, + "step": 6321 + }, + { + "epoch": 14.077951002227172, + "loss": 0.9000133275985718, + "loss_ce": 0.00011099971743533388, + "loss_iou": 0.359375, + "loss_num": 0.03662109375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 354131804, + "step": 6321 + }, + { + "epoch": 14.080178173719377, + "grad_norm": 41.84800338745117, + "learning_rate": 1e-06, + "loss": 0.4503, + "num_input_tokens_seen": 354187764, + "step": 6322 + }, + { + "epoch": 14.080178173719377, + "loss": 0.4067497253417969, + "loss_ce": 0.00013353029498830438, + "loss_iou": 0.1650390625, + "loss_num": 0.015380859375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 354187764, + "step": 6322 + }, + { + "epoch": 14.082405345211582, + "grad_norm": 24.2533016204834, + "learning_rate": 1e-06, + "loss": 0.5849, + "num_input_tokens_seen": 354243268, + "step": 6323 + }, + { + "epoch": 14.082405345211582, + "loss": 0.5544114708900452, + "loss_ce": 9.021456935442984e-05, + "loss_iou": 0.23828125, + "loss_num": 0.0152587890625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 354243268, + "step": 6323 + }, + { + "epoch": 14.084632516703786, + "grad_norm": 18.090354919433594, + "learning_rate": 1e-06, + "loss": 0.4398, + "num_input_tokens_seen": 354301500, + "step": 6324 + }, + { + "epoch": 14.084632516703786, + "loss": 0.4318804442882538, + "loss_ce": 0.00011774900485761464, + "loss_iou": 0.189453125, + "loss_num": 0.01055908203125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 354301500, + "step": 6324 + }, + { + "epoch": 14.086859688195991, + "grad_norm": 14.694714546203613, + "learning_rate": 1e-06, + "loss": 0.547, + "num_input_tokens_seen": 354357332, + "step": 6325 + }, + { + "epoch": 14.086859688195991, + "loss": 0.42843663692474365, + "loss_ce": 9.19099838938564e-05, + "loss_iou": 0.189453125, + "loss_num": 0.009765625, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 354357332, + "step": 6325 + }, + { + "epoch": 14.089086859688196, + "grad_norm": 23.925561904907227, + "learning_rate": 1e-06, + "loss": 0.3976, + "num_input_tokens_seen": 354411896, + "step": 6326 + }, + { + "epoch": 14.089086859688196, + "loss": 0.47738510370254517, + "loss_ce": 9.019082790473476e-05, + "loss_iou": 0.1953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 354411896, + "step": 6326 + }, + { + "epoch": 14.091314031180401, + "grad_norm": 17.225788116455078, + "learning_rate": 1e-06, + "loss": 0.5451, + "num_input_tokens_seen": 354468544, + "step": 6327 + }, + { + "epoch": 14.091314031180401, + "loss": 0.47155094146728516, + "loss_ce": 0.00011540517152752727, + "loss_iou": 0.2041015625, + "loss_num": 0.01275634765625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 354468544, + "step": 6327 + }, + { + "epoch": 14.093541202672606, + "grad_norm": 37.0084342956543, + "learning_rate": 1e-06, + "loss": 0.3675, + "num_input_tokens_seen": 354527544, + "step": 6328 + }, + { + "epoch": 14.093541202672606, + "loss": 0.3508872985839844, + "loss_ce": 0.00011826444824691862, + "loss_iou": 0.150390625, + "loss_num": 0.0101318359375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 354527544, + "step": 6328 + }, + { + "epoch": 14.09576837416481, + "grad_norm": 20.719806671142578, + "learning_rate": 1e-06, + "loss": 0.3541, + "num_input_tokens_seen": 354581636, + "step": 6329 + }, + { + "epoch": 14.09576837416481, + "loss": 0.2988535761833191, + "loss_ce": 8.648804941913113e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00592041015625, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 354581636, + "step": 6329 + }, + { + "epoch": 14.097995545657016, + "grad_norm": 23.697895050048828, + "learning_rate": 1e-06, + "loss": 0.4439, + "num_input_tokens_seen": 354638068, + "step": 6330 + }, + { + "epoch": 14.097995545657016, + "loss": 0.3387835621833801, + "loss_ce": 9.947673243004829e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.0086669921875, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 354638068, + "step": 6330 + }, + { + "epoch": 14.10022271714922, + "grad_norm": 13.052238464355469, + "learning_rate": 1e-06, + "loss": 0.3469, + "num_input_tokens_seen": 354695576, + "step": 6331 + }, + { + "epoch": 14.10022271714922, + "loss": 0.3428958058357239, + "loss_ce": 0.00012233792222104967, + "loss_iou": 0.138671875, + "loss_num": 0.01287841796875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 354695576, + "step": 6331 + }, + { + "epoch": 14.102449888641425, + "grad_norm": 12.137063980102539, + "learning_rate": 1e-06, + "loss": 0.4448, + "num_input_tokens_seen": 354750020, + "step": 6332 + }, + { + "epoch": 14.102449888641425, + "loss": 0.5491877198219299, + "loss_ce": 0.00011541788990143687, + "loss_iou": 0.255859375, + "loss_num": 0.007354736328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 354750020, + "step": 6332 + }, + { + "epoch": 14.10467706013363, + "grad_norm": 18.966337203979492, + "learning_rate": 1e-06, + "loss": 0.3768, + "num_input_tokens_seen": 354808800, + "step": 6333 + }, + { + "epoch": 14.10467706013363, + "loss": 0.30045419931411743, + "loss_ce": 0.00010020330955740064, + "loss_iou": 0.130859375, + "loss_num": 0.007537841796875, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 354808800, + "step": 6333 + }, + { + "epoch": 14.106904231625835, + "grad_norm": 69.14574432373047, + "learning_rate": 1e-06, + "loss": 0.5943, + "num_input_tokens_seen": 354863896, + "step": 6334 + }, + { + "epoch": 14.106904231625835, + "loss": 0.6161885261535645, + "loss_ce": 9.962108015315607e-05, + "loss_iou": 0.248046875, + "loss_num": 0.0240478515625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 354863896, + "step": 6334 + }, + { + "epoch": 14.10913140311804, + "grad_norm": 29.898832321166992, + "learning_rate": 1e-06, + "loss": 0.5497, + "num_input_tokens_seen": 354920516, + "step": 6335 + }, + { + "epoch": 14.10913140311804, + "loss": 0.5754295587539673, + "loss_ce": 0.00011214042024221271, + "loss_iou": 0.271484375, + "loss_num": 0.00689697265625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 354920516, + "step": 6335 + }, + { + "epoch": 14.111358574610245, + "grad_norm": 17.41396141052246, + "learning_rate": 1e-06, + "loss": 0.2817, + "num_input_tokens_seen": 354976696, + "step": 6336 + }, + { + "epoch": 14.111358574610245, + "loss": 0.3508448898792267, + "loss_ce": 0.0001368697703583166, + "loss_iou": 0.1416015625, + "loss_num": 0.0135498046875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 354976696, + "step": 6336 + }, + { + "epoch": 14.11358574610245, + "grad_norm": 41.86484146118164, + "learning_rate": 1e-06, + "loss": 0.5174, + "num_input_tokens_seen": 355034820, + "step": 6337 + }, + { + "epoch": 14.11358574610245, + "loss": 0.38658392429351807, + "loss_ce": 0.00010932501754723489, + "loss_iou": 0.171875, + "loss_num": 0.0084228515625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 355034820, + "step": 6337 + }, + { + "epoch": 14.115812917594655, + "grad_norm": 22.32383918762207, + "learning_rate": 1e-06, + "loss": 0.6725, + "num_input_tokens_seen": 355092456, + "step": 6338 + }, + { + "epoch": 14.115812917594655, + "loss": 0.47444137930870056, + "loss_ce": 0.00013718288391828537, + "loss_iou": 0.19140625, + "loss_num": 0.0184326171875, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 355092456, + "step": 6338 + }, + { + "epoch": 14.11804008908686, + "grad_norm": 15.70544147491455, + "learning_rate": 1e-06, + "loss": 0.3347, + "num_input_tokens_seen": 355146944, + "step": 6339 + }, + { + "epoch": 14.11804008908686, + "loss": 0.33699989318847656, + "loss_ce": 8.58691637404263e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.01171875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 355146944, + "step": 6339 + }, + { + "epoch": 14.120267260579064, + "grad_norm": 16.518766403198242, + "learning_rate": 1e-06, + "loss": 0.4509, + "num_input_tokens_seen": 355204304, + "step": 6340 + }, + { + "epoch": 14.120267260579064, + "loss": 0.626310408115387, + "loss_ce": 8.97101781447418e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0224609375, + "loss_xval": 0.625, + "num_input_tokens_seen": 355204304, + "step": 6340 + }, + { + "epoch": 14.122494432071269, + "grad_norm": 18.224716186523438, + "learning_rate": 1e-06, + "loss": 0.4472, + "num_input_tokens_seen": 355260784, + "step": 6341 + }, + { + "epoch": 14.122494432071269, + "loss": 0.504758894443512, + "loss_ce": 0.00012020649592159316, + "loss_iou": 0.2353515625, + "loss_num": 0.00677490234375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 355260784, + "step": 6341 + }, + { + "epoch": 14.124721603563474, + "grad_norm": 16.273900985717773, + "learning_rate": 1e-06, + "loss": 0.47, + "num_input_tokens_seen": 355315728, + "step": 6342 + }, + { + "epoch": 14.124721603563474, + "loss": 0.4144068956375122, + "loss_ce": 0.0001002842909656465, + "loss_iou": 0.19140625, + "loss_num": 0.006256103515625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 355315728, + "step": 6342 + }, + { + "epoch": 14.126948775055679, + "grad_norm": 13.019576072692871, + "learning_rate": 1e-06, + "loss": 0.3871, + "num_input_tokens_seen": 355369944, + "step": 6343 + }, + { + "epoch": 14.126948775055679, + "loss": 0.40841740369796753, + "loss_ce": 9.22168546821922e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.01275634765625, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 355369944, + "step": 6343 + }, + { + "epoch": 14.129175946547884, + "grad_norm": 20.190988540649414, + "learning_rate": 1e-06, + "loss": 0.4851, + "num_input_tokens_seen": 355423788, + "step": 6344 + }, + { + "epoch": 14.129175946547884, + "loss": 0.4641231596469879, + "loss_ce": 0.00013389563537202775, + "loss_iou": 0.21875, + "loss_num": 0.00518798828125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 355423788, + "step": 6344 + }, + { + "epoch": 14.131403118040089, + "grad_norm": 18.983137130737305, + "learning_rate": 1e-06, + "loss": 0.2741, + "num_input_tokens_seen": 355483244, + "step": 6345 + }, + { + "epoch": 14.131403118040089, + "loss": 0.33998721837997437, + "loss_ce": 0.0001434668229194358, + "loss_iou": 0.1494140625, + "loss_num": 0.0081787109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 355483244, + "step": 6345 + }, + { + "epoch": 14.133630289532293, + "grad_norm": 15.164654731750488, + "learning_rate": 1e-06, + "loss": 0.4301, + "num_input_tokens_seen": 355538616, + "step": 6346 + }, + { + "epoch": 14.133630289532293, + "loss": 0.669284462928772, + "loss_ce": 0.00015608461399096996, + "loss_iou": 0.291015625, + "loss_num": 0.0177001953125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 355538616, + "step": 6346 + }, + { + "epoch": 14.135857461024498, + "grad_norm": 35.34605026245117, + "learning_rate": 1e-06, + "loss": 0.5471, + "num_input_tokens_seen": 355594156, + "step": 6347 + }, + { + "epoch": 14.135857461024498, + "loss": 0.6321955323219299, + "loss_ce": 0.00011544384324224666, + "loss_iou": 0.2734375, + "loss_num": 0.0167236328125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 355594156, + "step": 6347 + }, + { + "epoch": 14.138084632516703, + "grad_norm": 20.306251525878906, + "learning_rate": 1e-06, + "loss": 0.3952, + "num_input_tokens_seen": 355649844, + "step": 6348 + }, + { + "epoch": 14.138084632516703, + "loss": 0.3479854166507721, + "loss_ce": 8.503998105879873e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.00830078125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 355649844, + "step": 6348 + }, + { + "epoch": 14.140311804008908, + "grad_norm": 15.569748878479004, + "learning_rate": 1e-06, + "loss": 0.4208, + "num_input_tokens_seen": 355703872, + "step": 6349 + }, + { + "epoch": 14.140311804008908, + "loss": 0.37485039234161377, + "loss_ce": 9.451658843318e-05, + "loss_iou": 0.162109375, + "loss_num": 0.01007080078125, + "loss_xval": 0.375, + "num_input_tokens_seen": 355703872, + "step": 6349 + }, + { + "epoch": 14.142538975501113, + "grad_norm": 26.381547927856445, + "learning_rate": 1e-06, + "loss": 0.561, + "num_input_tokens_seen": 355758976, + "step": 6350 + }, + { + "epoch": 14.142538975501113, + "loss": 0.6393318176269531, + "loss_ce": 0.0001717099512461573, + "loss_iou": 0.28515625, + "loss_num": 0.01361083984375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 355758976, + "step": 6350 + }, + { + "epoch": 14.144766146993318, + "grad_norm": 18.737266540527344, + "learning_rate": 1e-06, + "loss": 0.5725, + "num_input_tokens_seen": 355810544, + "step": 6351 + }, + { + "epoch": 14.144766146993318, + "loss": 0.4250592589378357, + "loss_ce": 0.00013248772302176803, + "loss_iou": 0.1875, + "loss_num": 0.0098876953125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 355810544, + "step": 6351 + }, + { + "epoch": 14.146993318485523, + "grad_norm": 20.644193649291992, + "learning_rate": 1e-06, + "loss": 0.4436, + "num_input_tokens_seen": 355865660, + "step": 6352 + }, + { + "epoch": 14.146993318485523, + "loss": 0.3755820393562317, + "loss_ce": 9.376133675687015e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.0074462890625, + "loss_xval": 0.375, + "num_input_tokens_seen": 355865660, + "step": 6352 + }, + { + "epoch": 14.14922048997773, + "grad_norm": 23.84221839904785, + "learning_rate": 1e-06, + "loss": 0.3666, + "num_input_tokens_seen": 355921244, + "step": 6353 + }, + { + "epoch": 14.14922048997773, + "loss": 0.3702180087566376, + "loss_ce": 0.00010083305824082345, + "loss_iou": 0.1689453125, + "loss_num": 0.006500244140625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 355921244, + "step": 6353 + }, + { + "epoch": 14.151447661469934, + "grad_norm": 28.195049285888672, + "learning_rate": 1e-06, + "loss": 0.4026, + "num_input_tokens_seen": 355978880, + "step": 6354 + }, + { + "epoch": 14.151447661469934, + "loss": 0.48697197437286377, + "loss_ce": 0.00015556240396108478, + "loss_iou": 0.2021484375, + "loss_num": 0.016357421875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 355978880, + "step": 6354 + }, + { + "epoch": 14.153674832962139, + "grad_norm": 19.111854553222656, + "learning_rate": 1e-06, + "loss": 0.529, + "num_input_tokens_seen": 356033656, + "step": 6355 + }, + { + "epoch": 14.153674832962139, + "loss": 0.5453453063964844, + "loss_ce": 0.00011828625429188833, + "loss_iou": 0.240234375, + "loss_num": 0.01324462890625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 356033656, + "step": 6355 + }, + { + "epoch": 14.155902004454344, + "grad_norm": 17.10947036743164, + "learning_rate": 1e-06, + "loss": 0.4607, + "num_input_tokens_seen": 356092412, + "step": 6356 + }, + { + "epoch": 14.155902004454344, + "loss": 0.3630419373512268, + "loss_ce": 0.00012691874871961772, + "loss_iou": 0.1650390625, + "loss_num": 0.0064697265625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 356092412, + "step": 6356 + }, + { + "epoch": 14.158129175946549, + "grad_norm": 21.90672492980957, + "learning_rate": 1e-06, + "loss": 0.3746, + "num_input_tokens_seen": 356149732, + "step": 6357 + }, + { + "epoch": 14.158129175946549, + "loss": 0.35400235652923584, + "loss_ce": 0.0002426167920930311, + "loss_iou": 0.158203125, + "loss_num": 0.007476806640625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 356149732, + "step": 6357 + }, + { + "epoch": 14.160356347438753, + "grad_norm": 31.798358917236328, + "learning_rate": 1e-06, + "loss": 0.4167, + "num_input_tokens_seen": 356204552, + "step": 6358 + }, + { + "epoch": 14.160356347438753, + "loss": 0.5162287950515747, + "loss_ce": 0.00011550244380487129, + "loss_iou": 0.234375, + "loss_num": 0.00933837890625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 356204552, + "step": 6358 + }, + { + "epoch": 14.162583518930958, + "grad_norm": 15.809988021850586, + "learning_rate": 1e-06, + "loss": 0.3727, + "num_input_tokens_seen": 356262836, + "step": 6359 + }, + { + "epoch": 14.162583518930958, + "loss": 0.4119860827922821, + "loss_ce": 0.00012085679190931842, + "loss_iou": 0.1875, + "loss_num": 0.00726318359375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 356262836, + "step": 6359 + }, + { + "epoch": 14.164810690423163, + "grad_norm": 14.865153312683105, + "learning_rate": 1e-06, + "loss": 0.4879, + "num_input_tokens_seen": 356319092, + "step": 6360 + }, + { + "epoch": 14.164810690423163, + "loss": 0.34744900465011597, + "loss_ce": 9.792236960493028e-05, + "loss_iou": 0.16015625, + "loss_num": 0.005279541015625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 356319092, + "step": 6360 + }, + { + "epoch": 14.167037861915368, + "grad_norm": 21.529083251953125, + "learning_rate": 1e-06, + "loss": 0.4694, + "num_input_tokens_seen": 356375520, + "step": 6361 + }, + { + "epoch": 14.167037861915368, + "loss": 0.3479962944984436, + "loss_ce": 9.590189438313246e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.010009765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 356375520, + "step": 6361 + }, + { + "epoch": 14.169265033407573, + "grad_norm": 15.021240234375, + "learning_rate": 1e-06, + "loss": 0.2992, + "num_input_tokens_seen": 356434312, + "step": 6362 + }, + { + "epoch": 14.169265033407573, + "loss": 0.27756401896476746, + "loss_ce": 9.818993567023426e-05, + "loss_iou": 0.11572265625, + "loss_num": 0.00921630859375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 356434312, + "step": 6362 + }, + { + "epoch": 14.171492204899778, + "grad_norm": 51.64773941040039, + "learning_rate": 1e-06, + "loss": 0.3894, + "num_input_tokens_seen": 356489392, + "step": 6363 + }, + { + "epoch": 14.171492204899778, + "loss": 0.5095750689506531, + "loss_ce": 0.0002977301483042538, + "loss_iou": 0.2001953125, + "loss_num": 0.0218505859375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 356489392, + "step": 6363 + }, + { + "epoch": 14.173719376391983, + "grad_norm": 16.491901397705078, + "learning_rate": 1e-06, + "loss": 0.371, + "num_input_tokens_seen": 356548064, + "step": 6364 + }, + { + "epoch": 14.173719376391983, + "loss": 0.3434831500053406, + "loss_ce": 9.938179573509842e-05, + "loss_iou": 0.15234375, + "loss_num": 0.00762939453125, + "loss_xval": 0.34375, + "num_input_tokens_seen": 356548064, + "step": 6364 + }, + { + "epoch": 14.175946547884188, + "grad_norm": 12.178879737854004, + "learning_rate": 1e-06, + "loss": 0.4276, + "num_input_tokens_seen": 356606084, + "step": 6365 + }, + { + "epoch": 14.175946547884188, + "loss": 0.39342576265335083, + "loss_ce": 0.00011522185377543792, + "loss_iou": 0.173828125, + "loss_num": 0.0091552734375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 356606084, + "step": 6365 + }, + { + "epoch": 14.178173719376392, + "grad_norm": 31.509164810180664, + "learning_rate": 1e-06, + "loss": 0.5393, + "num_input_tokens_seen": 356662456, + "step": 6366 + }, + { + "epoch": 14.178173719376392, + "loss": 0.522935152053833, + "loss_ce": 0.00010799485608004034, + "loss_iou": 0.232421875, + "loss_num": 0.011474609375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 356662456, + "step": 6366 + }, + { + "epoch": 14.180400890868597, + "grad_norm": 15.755391120910645, + "learning_rate": 1e-06, + "loss": 0.478, + "num_input_tokens_seen": 356718496, + "step": 6367 + }, + { + "epoch": 14.180400890868597, + "loss": 0.46932005882263184, + "loss_ce": 8.176272240234539e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.01513671875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 356718496, + "step": 6367 + }, + { + "epoch": 14.182628062360802, + "grad_norm": 20.124523162841797, + "learning_rate": 1e-06, + "loss": 0.443, + "num_input_tokens_seen": 356772880, + "step": 6368 + }, + { + "epoch": 14.182628062360802, + "loss": 0.4469994902610779, + "loss_ce": 0.00010007787204813212, + "loss_iou": 0.1787109375, + "loss_num": 0.0179443359375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 356772880, + "step": 6368 + }, + { + "epoch": 14.184855233853007, + "grad_norm": 14.184540748596191, + "learning_rate": 1e-06, + "loss": 0.364, + "num_input_tokens_seen": 356829648, + "step": 6369 + }, + { + "epoch": 14.184855233853007, + "loss": 0.49447864294052124, + "loss_ce": 9.389698243467137e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.01171875, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 356829648, + "step": 6369 + }, + { + "epoch": 14.187082405345212, + "grad_norm": 17.749513626098633, + "learning_rate": 1e-06, + "loss": 0.4139, + "num_input_tokens_seen": 356886340, + "step": 6370 + }, + { + "epoch": 14.187082405345212, + "loss": 0.38717857003211975, + "loss_ce": 9.360718831885606e-05, + "loss_iou": 0.171875, + "loss_num": 0.008544921875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 356886340, + "step": 6370 + }, + { + "epoch": 14.189309576837417, + "grad_norm": 49.11314392089844, + "learning_rate": 1e-06, + "loss": 0.6898, + "num_input_tokens_seen": 356942864, + "step": 6371 + }, + { + "epoch": 14.189309576837417, + "loss": 0.91033935546875, + "loss_ce": 0.00018309304141439497, + "loss_iou": 0.3984375, + "loss_num": 0.0225830078125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 356942864, + "step": 6371 + }, + { + "epoch": 14.191536748329622, + "grad_norm": 14.434630393981934, + "learning_rate": 1e-06, + "loss": 0.4182, + "num_input_tokens_seen": 356998272, + "step": 6372 + }, + { + "epoch": 14.191536748329622, + "loss": 0.3537047207355499, + "loss_ce": 0.00012805727601516992, + "loss_iou": 0.1611328125, + "loss_num": 0.006195068359375, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 356998272, + "step": 6372 + }, + { + "epoch": 14.193763919821826, + "grad_norm": 26.62126350402832, + "learning_rate": 1e-06, + "loss": 0.3928, + "num_input_tokens_seen": 357056092, + "step": 6373 + }, + { + "epoch": 14.193763919821826, + "loss": 0.3926212787628174, + "loss_ce": 0.00010417943849461153, + "loss_iou": 0.1640625, + "loss_num": 0.0130615234375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 357056092, + "step": 6373 + }, + { + "epoch": 14.195991091314031, + "grad_norm": 19.854206085205078, + "learning_rate": 1e-06, + "loss": 0.4684, + "num_input_tokens_seen": 357114944, + "step": 6374 + }, + { + "epoch": 14.195991091314031, + "loss": 0.5363778471946716, + "loss_ce": 0.00012294600310269743, + "loss_iou": 0.2353515625, + "loss_num": 0.01318359375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 357114944, + "step": 6374 + }, + { + "epoch": 14.198218262806236, + "grad_norm": 17.966840744018555, + "learning_rate": 1e-06, + "loss": 0.4877, + "num_input_tokens_seen": 357170380, + "step": 6375 + }, + { + "epoch": 14.198218262806236, + "loss": 0.5723801851272583, + "loss_ce": 0.00017562352877575904, + "loss_iou": 0.2333984375, + "loss_num": 0.0208740234375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 357170380, + "step": 6375 + }, + { + "epoch": 14.200445434298441, + "grad_norm": 20.891889572143555, + "learning_rate": 1e-06, + "loss": 0.5619, + "num_input_tokens_seen": 357227228, + "step": 6376 + }, + { + "epoch": 14.200445434298441, + "loss": 0.6446791887283325, + "loss_ce": 0.00011739273031707853, + "loss_iou": 0.267578125, + "loss_num": 0.021728515625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 357227228, + "step": 6376 + }, + { + "epoch": 14.202672605790646, + "grad_norm": 18.76251792907715, + "learning_rate": 1e-06, + "loss": 0.3435, + "num_input_tokens_seen": 357285012, + "step": 6377 + }, + { + "epoch": 14.202672605790646, + "loss": 0.3950054347515106, + "loss_ce": 0.00010797058348543942, + "loss_iou": 0.173828125, + "loss_num": 0.00927734375, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 357285012, + "step": 6377 + }, + { + "epoch": 14.20489977728285, + "grad_norm": 13.638835906982422, + "learning_rate": 1e-06, + "loss": 0.3753, + "num_input_tokens_seen": 357341596, + "step": 6378 + }, + { + "epoch": 14.20489977728285, + "loss": 0.4419354498386383, + "loss_ce": 0.00010194515925832093, + "loss_iou": 0.185546875, + "loss_num": 0.01409912109375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 357341596, + "step": 6378 + }, + { + "epoch": 14.207126948775056, + "grad_norm": 17.007076263427734, + "learning_rate": 1e-06, + "loss": 0.5073, + "num_input_tokens_seen": 357397024, + "step": 6379 + }, + { + "epoch": 14.207126948775056, + "loss": 0.2852746248245239, + "loss_ce": 0.00011835141776828095, + "loss_iou": 0.1240234375, + "loss_num": 0.007415771484375, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 357397024, + "step": 6379 + }, + { + "epoch": 14.20935412026726, + "grad_norm": 15.55772590637207, + "learning_rate": 1e-06, + "loss": 0.4203, + "num_input_tokens_seen": 357453348, + "step": 6380 + }, + { + "epoch": 14.20935412026726, + "loss": 0.3796178698539734, + "loss_ce": 0.00010125982225872576, + "loss_iou": 0.17578125, + "loss_num": 0.00567626953125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 357453348, + "step": 6380 + }, + { + "epoch": 14.211581291759465, + "grad_norm": 21.045106887817383, + "learning_rate": 1e-06, + "loss": 0.4578, + "num_input_tokens_seen": 357508568, + "step": 6381 + }, + { + "epoch": 14.211581291759465, + "loss": 0.40396052598953247, + "loss_ce": 0.00015192307182587683, + "loss_iou": 0.16796875, + "loss_num": 0.013671875, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 357508568, + "step": 6381 + }, + { + "epoch": 14.21380846325167, + "grad_norm": 27.14858627319336, + "learning_rate": 1e-06, + "loss": 0.4897, + "num_input_tokens_seen": 357561732, + "step": 6382 + }, + { + "epoch": 14.21380846325167, + "loss": 0.5612715482711792, + "loss_ce": 0.00023641872394364327, + "loss_iou": 0.24609375, + "loss_num": 0.01385498046875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 357561732, + "step": 6382 + }, + { + "epoch": 14.216035634743875, + "grad_norm": 17.41556167602539, + "learning_rate": 1e-06, + "loss": 0.4793, + "num_input_tokens_seen": 357618236, + "step": 6383 + }, + { + "epoch": 14.216035634743875, + "loss": 0.5158074498176575, + "loss_ce": 0.00012142492778366432, + "loss_iou": 0.2138671875, + "loss_num": 0.0177001953125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 357618236, + "step": 6383 + }, + { + "epoch": 14.21826280623608, + "grad_norm": 15.721260070800781, + "learning_rate": 1e-06, + "loss": 0.3657, + "num_input_tokens_seen": 357676752, + "step": 6384 + }, + { + "epoch": 14.21826280623608, + "loss": 0.4123426675796509, + "loss_ce": 0.0001112363679567352, + "loss_iou": 0.1826171875, + "loss_num": 0.00927734375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 357676752, + "step": 6384 + }, + { + "epoch": 14.220489977728285, + "grad_norm": 26.944326400756836, + "learning_rate": 1e-06, + "loss": 0.4681, + "num_input_tokens_seen": 357734280, + "step": 6385 + }, + { + "epoch": 14.220489977728285, + "loss": 0.46214836835861206, + "loss_ce": 0.00011222571629332379, + "loss_iou": 0.197265625, + "loss_num": 0.013427734375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 357734280, + "step": 6385 + }, + { + "epoch": 14.22271714922049, + "grad_norm": 19.070985794067383, + "learning_rate": 1e-06, + "loss": 0.3808, + "num_input_tokens_seen": 357788536, + "step": 6386 + }, + { + "epoch": 14.22271714922049, + "loss": 0.32470569014549255, + "loss_ce": 0.00012072586105205119, + "loss_iou": 0.1416015625, + "loss_num": 0.00836181640625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 357788536, + "step": 6386 + }, + { + "epoch": 14.224944320712694, + "grad_norm": 12.709535598754883, + "learning_rate": 1e-06, + "loss": 0.5202, + "num_input_tokens_seen": 357844904, + "step": 6387 + }, + { + "epoch": 14.224944320712694, + "loss": 0.35143405199050903, + "loss_ce": 0.00011568302579689771, + "loss_iou": 0.1611328125, + "loss_num": 0.005706787109375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 357844904, + "step": 6387 + }, + { + "epoch": 14.2271714922049, + "grad_norm": 15.428078651428223, + "learning_rate": 1e-06, + "loss": 0.4241, + "num_input_tokens_seen": 357902220, + "step": 6388 + }, + { + "epoch": 14.2271714922049, + "loss": 0.5605834126472473, + "loss_ce": 0.00015861910651437938, + "loss_iou": 0.26171875, + "loss_num": 0.007354736328125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 357902220, + "step": 6388 + }, + { + "epoch": 14.229398663697104, + "grad_norm": 25.415952682495117, + "learning_rate": 1e-06, + "loss": 0.4713, + "num_input_tokens_seen": 357958080, + "step": 6389 + }, + { + "epoch": 14.229398663697104, + "loss": 0.47960424423217773, + "loss_ce": 0.00011205507325939834, + "loss_iou": 0.224609375, + "loss_num": 0.00604248046875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 357958080, + "step": 6389 + }, + { + "epoch": 14.231625835189309, + "grad_norm": 14.51382827758789, + "learning_rate": 1e-06, + "loss": 0.4541, + "num_input_tokens_seen": 358015476, + "step": 6390 + }, + { + "epoch": 14.231625835189309, + "loss": 0.3873499631881714, + "loss_ce": 0.00038707145722582936, + "loss_iou": 0.1796875, + "loss_num": 0.00543212890625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 358015476, + "step": 6390 + }, + { + "epoch": 14.233853006681514, + "grad_norm": 20.579307556152344, + "learning_rate": 1e-06, + "loss": 0.592, + "num_input_tokens_seen": 358073920, + "step": 6391 + }, + { + "epoch": 14.233853006681514, + "loss": 0.554311215877533, + "loss_ce": 0.00011194508260814473, + "loss_iou": 0.234375, + "loss_num": 0.016845703125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 358073920, + "step": 6391 + }, + { + "epoch": 14.236080178173719, + "grad_norm": 18.920732498168945, + "learning_rate": 1e-06, + "loss": 0.4462, + "num_input_tokens_seen": 358130704, + "step": 6392 + }, + { + "epoch": 14.236080178173719, + "loss": 0.5425920486450195, + "loss_ce": 0.00011164323223056272, + "loss_iou": 0.2412109375, + "loss_num": 0.01202392578125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 358130704, + "step": 6392 + }, + { + "epoch": 14.238307349665924, + "grad_norm": 16.76801872253418, + "learning_rate": 1e-06, + "loss": 0.3899, + "num_input_tokens_seen": 358189660, + "step": 6393 + }, + { + "epoch": 14.238307349665924, + "loss": 0.345196008682251, + "loss_ce": 0.00010322515299776569, + "loss_iou": 0.150390625, + "loss_num": 0.0087890625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 358189660, + "step": 6393 + }, + { + "epoch": 14.240534521158128, + "grad_norm": 20.114219665527344, + "learning_rate": 1e-06, + "loss": 0.3621, + "num_input_tokens_seen": 358247428, + "step": 6394 + }, + { + "epoch": 14.240534521158128, + "loss": 0.3770490884780884, + "loss_ce": 9.597234020475298e-05, + "loss_iou": 0.17578125, + "loss_num": 0.005096435546875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 358247428, + "step": 6394 + }, + { + "epoch": 14.242761692650333, + "grad_norm": 20.959352493286133, + "learning_rate": 1e-06, + "loss": 0.5468, + "num_input_tokens_seen": 358304388, + "step": 6395 + }, + { + "epoch": 14.242761692650333, + "loss": 0.6914682388305664, + "loss_ce": 0.00030615812283940613, + "loss_iou": 0.298828125, + "loss_num": 0.0185546875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 358304388, + "step": 6395 + }, + { + "epoch": 14.244988864142538, + "grad_norm": 26.55893325805664, + "learning_rate": 1e-06, + "loss": 0.5794, + "num_input_tokens_seen": 358360252, + "step": 6396 + }, + { + "epoch": 14.244988864142538, + "loss": 0.46212613582611084, + "loss_ce": 8.996979158837348e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.01141357421875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 358360252, + "step": 6396 + }, + { + "epoch": 14.247216035634743, + "grad_norm": 18.9809627532959, + "learning_rate": 1e-06, + "loss": 0.4802, + "num_input_tokens_seen": 358418356, + "step": 6397 + }, + { + "epoch": 14.247216035634743, + "loss": 0.3323664665222168, + "loss_ce": 9.106392099056393e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.006561279296875, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 358418356, + "step": 6397 + }, + { + "epoch": 14.249443207126948, + "grad_norm": 16.457611083984375, + "learning_rate": 1e-06, + "loss": 0.4579, + "num_input_tokens_seen": 358475404, + "step": 6398 + }, + { + "epoch": 14.249443207126948, + "loss": 0.6108676791191101, + "loss_ce": 0.00014991118223406374, + "loss_iou": 0.232421875, + "loss_num": 0.029296875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 358475404, + "step": 6398 + }, + { + "epoch": 14.251670378619155, + "grad_norm": 19.050983428955078, + "learning_rate": 1e-06, + "loss": 0.4976, + "num_input_tokens_seen": 358528924, + "step": 6399 + }, + { + "epoch": 14.251670378619155, + "loss": 0.6439430713653564, + "loss_ce": 0.0001442273351131007, + "loss_iou": 0.2890625, + "loss_num": 0.01312255859375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 358528924, + "step": 6399 + }, + { + "epoch": 14.25389755011136, + "grad_norm": 14.289558410644531, + "learning_rate": 1e-06, + "loss": 0.46, + "num_input_tokens_seen": 358585368, + "step": 6400 + }, + { + "epoch": 14.25389755011136, + "loss": 0.6162996292114258, + "loss_ce": 8.872315811458975e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0206298828125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 358585368, + "step": 6400 + }, + { + "epoch": 14.256124721603564, + "grad_norm": 22.632549285888672, + "learning_rate": 1e-06, + "loss": 0.4443, + "num_input_tokens_seen": 358642216, + "step": 6401 + }, + { + "epoch": 14.256124721603564, + "loss": 0.39599111676216125, + "loss_ce": 0.00011710106628015637, + "loss_iou": 0.1708984375, + "loss_num": 0.01068115234375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 358642216, + "step": 6401 + }, + { + "epoch": 14.25835189309577, + "grad_norm": 15.929319381713867, + "learning_rate": 1e-06, + "loss": 0.4255, + "num_input_tokens_seen": 358698924, + "step": 6402 + }, + { + "epoch": 14.25835189309577, + "loss": 0.35032692551612854, + "loss_ce": 0.00010718655539676547, + "loss_iou": 0.1533203125, + "loss_num": 0.00860595703125, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 358698924, + "step": 6402 + }, + { + "epoch": 14.260579064587974, + "grad_norm": 13.306114196777344, + "learning_rate": 1e-06, + "loss": 0.3793, + "num_input_tokens_seen": 358755212, + "step": 6403 + }, + { + "epoch": 14.260579064587974, + "loss": 0.2552165389060974, + "loss_ce": 8.956858073361218e-05, + "loss_iou": 0.11181640625, + "loss_num": 0.0062255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 358755212, + "step": 6403 + }, + { + "epoch": 14.262806236080179, + "grad_norm": 17.36546516418457, + "learning_rate": 1e-06, + "loss": 0.4423, + "num_input_tokens_seen": 358814232, + "step": 6404 + }, + { + "epoch": 14.262806236080179, + "loss": 0.4097899794578552, + "loss_ce": 0.00012203957885503769, + "loss_iou": 0.1806640625, + "loss_num": 0.0096435546875, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 358814232, + "step": 6404 + }, + { + "epoch": 14.265033407572384, + "grad_norm": 29.2259464263916, + "learning_rate": 1e-06, + "loss": 0.5395, + "num_input_tokens_seen": 358871184, + "step": 6405 + }, + { + "epoch": 14.265033407572384, + "loss": 0.682025671005249, + "loss_ce": 0.00014085797010920942, + "loss_iou": 0.28125, + "loss_num": 0.0235595703125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 358871184, + "step": 6405 + }, + { + "epoch": 14.267260579064589, + "grad_norm": 12.634069442749023, + "learning_rate": 1e-06, + "loss": 0.2714, + "num_input_tokens_seen": 358928648, + "step": 6406 + }, + { + "epoch": 14.267260579064589, + "loss": 0.2787477970123291, + "loss_ce": 9.179921471513808e-05, + "loss_iou": 0.1123046875, + "loss_num": 0.01092529296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 358928648, + "step": 6406 + }, + { + "epoch": 14.269487750556793, + "grad_norm": 25.011402130126953, + "learning_rate": 1e-06, + "loss": 0.5663, + "num_input_tokens_seen": 358987088, + "step": 6407 + }, + { + "epoch": 14.269487750556793, + "loss": 0.4417472183704376, + "loss_ce": 9.685405530035496e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.01470947265625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 358987088, + "step": 6407 + }, + { + "epoch": 14.271714922048998, + "grad_norm": 14.262114524841309, + "learning_rate": 1e-06, + "loss": 0.4572, + "num_input_tokens_seen": 359043584, + "step": 6408 + }, + { + "epoch": 14.271714922048998, + "loss": 0.40514233708381653, + "loss_ce": 0.00011304439249215648, + "loss_iou": 0.1669921875, + "loss_num": 0.01446533203125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 359043584, + "step": 6408 + }, + { + "epoch": 14.273942093541203, + "grad_norm": 19.64780616760254, + "learning_rate": 1e-06, + "loss": 0.3768, + "num_input_tokens_seen": 359097740, + "step": 6409 + }, + { + "epoch": 14.273942093541203, + "loss": 0.39305636286735535, + "loss_ce": 0.00011201576853636652, + "loss_iou": 0.1845703125, + "loss_num": 0.004852294921875, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 359097740, + "step": 6409 + }, + { + "epoch": 14.276169265033408, + "grad_norm": 16.076786041259766, + "learning_rate": 1e-06, + "loss": 0.5821, + "num_input_tokens_seen": 359153996, + "step": 6410 + }, + { + "epoch": 14.276169265033408, + "loss": 0.5809260606765747, + "loss_ce": 0.00011550600902410224, + "loss_iou": 0.2470703125, + "loss_num": 0.0174560546875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 359153996, + "step": 6410 + }, + { + "epoch": 14.278396436525613, + "grad_norm": 14.925246238708496, + "learning_rate": 1e-06, + "loss": 0.3981, + "num_input_tokens_seen": 359209560, + "step": 6411 + }, + { + "epoch": 14.278396436525613, + "loss": 0.4395759403705597, + "loss_ce": 0.00012282837997190654, + "loss_iou": 0.19921875, + "loss_num": 0.00830078125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 359209560, + "step": 6411 + }, + { + "epoch": 14.280623608017818, + "grad_norm": 20.60141372680664, + "learning_rate": 1e-06, + "loss": 0.6134, + "num_input_tokens_seen": 359263596, + "step": 6412 + }, + { + "epoch": 14.280623608017818, + "loss": 0.5152615308761597, + "loss_ce": 0.00012483607861213386, + "loss_iou": 0.220703125, + "loss_num": 0.01483154296875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 359263596, + "step": 6412 + }, + { + "epoch": 14.282850779510023, + "grad_norm": 43.90515899658203, + "learning_rate": 1e-06, + "loss": 0.6257, + "num_input_tokens_seen": 359322224, + "step": 6413 + }, + { + "epoch": 14.282850779510023, + "loss": 0.6065540313720703, + "loss_ce": 0.00010870952974073589, + "loss_iou": 0.26953125, + "loss_num": 0.0135498046875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 359322224, + "step": 6413 + }, + { + "epoch": 14.285077951002227, + "grad_norm": 22.319034576416016, + "learning_rate": 1e-06, + "loss": 0.3332, + "num_input_tokens_seen": 359377716, + "step": 6414 + }, + { + "epoch": 14.285077951002227, + "loss": 0.2222072184085846, + "loss_ce": 0.00010028109682025388, + "loss_iou": 0.091796875, + "loss_num": 0.007568359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 359377716, + "step": 6414 + }, + { + "epoch": 14.287305122494432, + "grad_norm": 16.046680450439453, + "learning_rate": 1e-06, + "loss": 0.4248, + "num_input_tokens_seen": 359432444, + "step": 6415 + }, + { + "epoch": 14.287305122494432, + "loss": 0.37368685007095337, + "loss_ce": 9.067119390238076e-05, + "loss_iou": 0.16015625, + "loss_num": 0.01080322265625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 359432444, + "step": 6415 + }, + { + "epoch": 14.289532293986637, + "grad_norm": 14.897786140441895, + "learning_rate": 1e-06, + "loss": 0.3186, + "num_input_tokens_seen": 359489464, + "step": 6416 + }, + { + "epoch": 14.289532293986637, + "loss": 0.36753690242767334, + "loss_ce": 0.00010526920959819108, + "loss_iou": 0.15625, + "loss_num": 0.010986328125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 359489464, + "step": 6416 + }, + { + "epoch": 14.291759465478842, + "grad_norm": 17.839426040649414, + "learning_rate": 1e-06, + "loss": 0.455, + "num_input_tokens_seen": 359543156, + "step": 6417 + }, + { + "epoch": 14.291759465478842, + "loss": 0.5617268085479736, + "loss_ce": 0.0001423186477040872, + "loss_iou": 0.2294921875, + "loss_num": 0.0205078125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 359543156, + "step": 6417 + }, + { + "epoch": 14.293986636971047, + "grad_norm": 18.264699935913086, + "learning_rate": 1e-06, + "loss": 0.5459, + "num_input_tokens_seen": 359597988, + "step": 6418 + }, + { + "epoch": 14.293986636971047, + "loss": 0.5761609077453613, + "loss_ce": 0.00011114442168036476, + "loss_iou": 0.21484375, + "loss_num": 0.0289306640625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 359597988, + "step": 6418 + }, + { + "epoch": 14.296213808463252, + "grad_norm": 110.11897277832031, + "learning_rate": 1e-06, + "loss": 0.4713, + "num_input_tokens_seen": 359655404, + "step": 6419 + }, + { + "epoch": 14.296213808463252, + "loss": 0.3378644287586212, + "loss_ce": 0.00015691184671595693, + "loss_iou": 0.150390625, + "loss_num": 0.007476806640625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 359655404, + "step": 6419 + }, + { + "epoch": 14.298440979955457, + "grad_norm": 15.50516128540039, + "learning_rate": 1e-06, + "loss": 0.3962, + "num_input_tokens_seen": 359712096, + "step": 6420 + }, + { + "epoch": 14.298440979955457, + "loss": 0.3304358124732971, + "loss_ce": 0.00011355809692759067, + "loss_iou": 0.14453125, + "loss_num": 0.00823974609375, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 359712096, + "step": 6420 + }, + { + "epoch": 14.300668151447661, + "grad_norm": 23.723712921142578, + "learning_rate": 1e-06, + "loss": 0.5297, + "num_input_tokens_seen": 359769160, + "step": 6421 + }, + { + "epoch": 14.300668151447661, + "loss": 0.6636959910392761, + "loss_ce": 0.00012177543976576999, + "loss_iou": 0.279296875, + "loss_num": 0.021240234375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 359769160, + "step": 6421 + }, + { + "epoch": 14.302895322939866, + "grad_norm": 13.204316139221191, + "learning_rate": 1e-06, + "loss": 0.7653, + "num_input_tokens_seen": 359824584, + "step": 6422 + }, + { + "epoch": 14.302895322939866, + "loss": 0.5646822452545166, + "loss_ce": 0.00010707967157941312, + "loss_iou": 0.2177734375, + "loss_num": 0.02587890625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 359824584, + "step": 6422 + }, + { + "epoch": 14.305122494432071, + "grad_norm": 25.105751037597656, + "learning_rate": 1e-06, + "loss": 0.5937, + "num_input_tokens_seen": 359877240, + "step": 6423 + }, + { + "epoch": 14.305122494432071, + "loss": 0.6058588027954102, + "loss_ce": 0.00026802660431712866, + "loss_iou": 0.25, + "loss_num": 0.020751953125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 359877240, + "step": 6423 + }, + { + "epoch": 14.307349665924276, + "grad_norm": 20.69352149963379, + "learning_rate": 1e-06, + "loss": 0.579, + "num_input_tokens_seen": 359932336, + "step": 6424 + }, + { + "epoch": 14.307349665924276, + "loss": 0.5356358289718628, + "loss_ce": 0.00011333586007822305, + "loss_iou": 0.248046875, + "loss_num": 0.00811767578125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 359932336, + "step": 6424 + }, + { + "epoch": 14.309576837416481, + "grad_norm": 21.304393768310547, + "learning_rate": 1e-06, + "loss": 0.3508, + "num_input_tokens_seen": 359991588, + "step": 6425 + }, + { + "epoch": 14.309576837416481, + "loss": 0.3918312191963196, + "loss_ce": 0.0001076057887985371, + "loss_iou": 0.173828125, + "loss_num": 0.0089111328125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 359991588, + "step": 6425 + }, + { + "epoch": 14.311804008908686, + "grad_norm": 16.4875545501709, + "learning_rate": 1e-06, + "loss": 0.3818, + "num_input_tokens_seen": 360046080, + "step": 6426 + }, + { + "epoch": 14.311804008908686, + "loss": 0.42865312099456787, + "loss_ce": 9.47633889154531e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.01165771484375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 360046080, + "step": 6426 + }, + { + "epoch": 14.31403118040089, + "grad_norm": 27.263931274414062, + "learning_rate": 1e-06, + "loss": 0.4138, + "num_input_tokens_seen": 360101588, + "step": 6427 + }, + { + "epoch": 14.31403118040089, + "loss": 0.4932384490966797, + "loss_ce": 0.00013543458771891892, + "loss_iou": 0.212890625, + "loss_num": 0.013427734375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 360101588, + "step": 6427 + }, + { + "epoch": 14.316258351893095, + "grad_norm": 15.546148300170898, + "learning_rate": 1e-06, + "loss": 0.5737, + "num_input_tokens_seen": 360157696, + "step": 6428 + }, + { + "epoch": 14.316258351893095, + "loss": 0.774878740310669, + "loss_ce": 0.00034262199187651277, + "loss_iou": 0.291015625, + "loss_num": 0.0380859375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 360157696, + "step": 6428 + }, + { + "epoch": 14.3184855233853, + "grad_norm": 17.817882537841797, + "learning_rate": 1e-06, + "loss": 0.5475, + "num_input_tokens_seen": 360211328, + "step": 6429 + }, + { + "epoch": 14.3184855233853, + "loss": 0.4766749143600464, + "loss_ce": 0.00011241542233619839, + "loss_iou": 0.205078125, + "loss_num": 0.013427734375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 360211328, + "step": 6429 + }, + { + "epoch": 14.320712694877505, + "grad_norm": 26.077342987060547, + "learning_rate": 1e-06, + "loss": 0.5338, + "num_input_tokens_seen": 360266996, + "step": 6430 + }, + { + "epoch": 14.320712694877505, + "loss": 0.5293970704078674, + "loss_ce": 0.00010019890760304406, + "loss_iou": 0.224609375, + "loss_num": 0.01611328125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 360266996, + "step": 6430 + }, + { + "epoch": 14.32293986636971, + "grad_norm": 21.88715362548828, + "learning_rate": 1e-06, + "loss": 0.5877, + "num_input_tokens_seen": 360324620, + "step": 6431 + }, + { + "epoch": 14.32293986636971, + "loss": 0.5899729132652283, + "loss_ce": 0.00012914868420921266, + "loss_iou": 0.2470703125, + "loss_num": 0.0191650390625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 360324620, + "step": 6431 + }, + { + "epoch": 14.325167037861915, + "grad_norm": 21.561594009399414, + "learning_rate": 1e-06, + "loss": 0.519, + "num_input_tokens_seen": 360381216, + "step": 6432 + }, + { + "epoch": 14.325167037861915, + "loss": 0.6034383773803711, + "loss_ce": 0.00016689574113115668, + "loss_iou": 0.2734375, + "loss_num": 0.01116943359375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 360381216, + "step": 6432 + }, + { + "epoch": 14.32739420935412, + "grad_norm": 34.43752670288086, + "learning_rate": 1e-06, + "loss": 0.8583, + "num_input_tokens_seen": 360434988, + "step": 6433 + }, + { + "epoch": 14.32739420935412, + "loss": 0.6987577080726624, + "loss_ce": 0.00014932786871213466, + "loss_iou": 0.275390625, + "loss_num": 0.030029296875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 360434988, + "step": 6433 + }, + { + "epoch": 14.329621380846325, + "grad_norm": 14.277132034301758, + "learning_rate": 1e-06, + "loss": 0.653, + "num_input_tokens_seen": 360490608, + "step": 6434 + }, + { + "epoch": 14.329621380846325, + "loss": 0.697412371635437, + "loss_ce": 0.0001467484253225848, + "loss_iou": 0.283203125, + "loss_num": 0.026611328125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 360490608, + "step": 6434 + }, + { + "epoch": 14.33184855233853, + "grad_norm": 18.22037696838379, + "learning_rate": 1e-06, + "loss": 0.4797, + "num_input_tokens_seen": 360547056, + "step": 6435 + }, + { + "epoch": 14.33184855233853, + "loss": 0.4767254590988159, + "loss_ce": 0.00010193933121627197, + "loss_iou": 0.2119140625, + "loss_num": 0.0106201171875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 360547056, + "step": 6435 + }, + { + "epoch": 14.334075723830734, + "grad_norm": 16.72195816040039, + "learning_rate": 1e-06, + "loss": 0.3839, + "num_input_tokens_seen": 360604288, + "step": 6436 + }, + { + "epoch": 14.334075723830734, + "loss": 0.38419631123542786, + "loss_ce": 0.00010207582090515643, + "loss_iou": 0.158203125, + "loss_num": 0.01373291015625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 360604288, + "step": 6436 + }, + { + "epoch": 14.33630289532294, + "grad_norm": 16.082204818725586, + "learning_rate": 1e-06, + "loss": 0.5829, + "num_input_tokens_seen": 360662528, + "step": 6437 + }, + { + "epoch": 14.33630289532294, + "loss": 0.446768581867218, + "loss_ce": 0.00011331496352795511, + "loss_iou": 0.19140625, + "loss_num": 0.01275634765625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 360662528, + "step": 6437 + }, + { + "epoch": 14.338530066815144, + "grad_norm": 19.101377487182617, + "learning_rate": 1e-06, + "loss": 0.3889, + "num_input_tokens_seen": 360717448, + "step": 6438 + }, + { + "epoch": 14.338530066815144, + "loss": 0.5185633897781372, + "loss_ce": 0.00013074232265353203, + "loss_iou": 0.216796875, + "loss_num": 0.016845703125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 360717448, + "step": 6438 + }, + { + "epoch": 14.340757238307349, + "grad_norm": 21.411943435668945, + "learning_rate": 1e-06, + "loss": 0.4566, + "num_input_tokens_seen": 360773220, + "step": 6439 + }, + { + "epoch": 14.340757238307349, + "loss": 0.4258883595466614, + "loss_ce": 0.00010708505578804761, + "loss_iou": 0.197265625, + "loss_num": 0.006378173828125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 360773220, + "step": 6439 + }, + { + "epoch": 14.342984409799554, + "grad_norm": 21.08489418029785, + "learning_rate": 1e-06, + "loss": 0.4344, + "num_input_tokens_seen": 360829380, + "step": 6440 + }, + { + "epoch": 14.342984409799554, + "loss": 0.45614272356033325, + "loss_ce": 8.802305092103779e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0169677734375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 360829380, + "step": 6440 + }, + { + "epoch": 14.345211581291759, + "grad_norm": 19.268587112426758, + "learning_rate": 1e-06, + "loss": 0.4262, + "num_input_tokens_seen": 360885632, + "step": 6441 + }, + { + "epoch": 14.345211581291759, + "loss": 0.561874270439148, + "loss_ce": 0.00010666967136785388, + "loss_iou": 0.220703125, + "loss_num": 0.0240478515625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 360885632, + "step": 6441 + }, + { + "epoch": 14.347438752783964, + "grad_norm": 16.14497947692871, + "learning_rate": 1e-06, + "loss": 0.4857, + "num_input_tokens_seen": 360941352, + "step": 6442 + }, + { + "epoch": 14.347438752783964, + "loss": 0.6506056189537048, + "loss_ce": 9.296346252085641e-05, + "loss_iou": 0.294921875, + "loss_num": 0.01239013671875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 360941352, + "step": 6442 + }, + { + "epoch": 14.34966592427617, + "grad_norm": 16.787885665893555, + "learning_rate": 1e-06, + "loss": 0.3944, + "num_input_tokens_seen": 360996836, + "step": 6443 + }, + { + "epoch": 14.34966592427617, + "loss": 0.44542229175567627, + "loss_ce": 0.00010977737110806629, + "loss_iou": 0.20703125, + "loss_num": 0.005950927734375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 360996836, + "step": 6443 + }, + { + "epoch": 14.351893095768375, + "grad_norm": 18.984052658081055, + "learning_rate": 1e-06, + "loss": 0.7604, + "num_input_tokens_seen": 361050092, + "step": 6444 + }, + { + "epoch": 14.351893095768375, + "loss": 0.9335750341415405, + "loss_ce": 0.00010336375271435827, + "loss_iou": 0.392578125, + "loss_num": 0.0294189453125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 361050092, + "step": 6444 + }, + { + "epoch": 14.35412026726058, + "grad_norm": 16.68486213684082, + "learning_rate": 1e-06, + "loss": 0.3219, + "num_input_tokens_seen": 361106880, + "step": 6445 + }, + { + "epoch": 14.35412026726058, + "loss": 0.3571700155735016, + "loss_ce": 0.00011435095075285062, + "loss_iou": 0.1630859375, + "loss_num": 0.00628662109375, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 361106880, + "step": 6445 + }, + { + "epoch": 14.356347438752785, + "grad_norm": 31.47307014465332, + "learning_rate": 1e-06, + "loss": 0.4125, + "num_input_tokens_seen": 361161316, + "step": 6446 + }, + { + "epoch": 14.356347438752785, + "loss": 0.3598456084728241, + "loss_ce": 0.00010440165351610631, + "loss_iou": 0.1572265625, + "loss_num": 0.00921630859375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 361161316, + "step": 6446 + }, + { + "epoch": 14.35857461024499, + "grad_norm": 20.589462280273438, + "learning_rate": 1e-06, + "loss": 0.425, + "num_input_tokens_seen": 361216012, + "step": 6447 + }, + { + "epoch": 14.35857461024499, + "loss": 0.4665713906288147, + "loss_ce": 0.0001407517702318728, + "loss_iou": 0.201171875, + "loss_num": 0.0126953125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 361216012, + "step": 6447 + }, + { + "epoch": 14.360801781737194, + "grad_norm": 31.11367416381836, + "learning_rate": 1e-06, + "loss": 0.5042, + "num_input_tokens_seen": 361273420, + "step": 6448 + }, + { + "epoch": 14.360801781737194, + "loss": 0.5230991244316101, + "loss_ce": 0.00014988121984060854, + "loss_iou": 0.2373046875, + "loss_num": 0.00958251953125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 361273420, + "step": 6448 + }, + { + "epoch": 14.3630289532294, + "grad_norm": 21.973804473876953, + "learning_rate": 1e-06, + "loss": 0.5315, + "num_input_tokens_seen": 361327552, + "step": 6449 + }, + { + "epoch": 14.3630289532294, + "loss": 0.5520008206367493, + "loss_ce": 0.00012092564429622144, + "loss_iou": 0.2490234375, + "loss_num": 0.0107421875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 361327552, + "step": 6449 + }, + { + "epoch": 14.365256124721604, + "grad_norm": 16.959115982055664, + "learning_rate": 1e-06, + "loss": 0.5215, + "num_input_tokens_seen": 361384308, + "step": 6450 + }, + { + "epoch": 14.365256124721604, + "loss": 0.5186026096343994, + "loss_ce": 0.0001089318175218068, + "loss_iou": 0.2158203125, + "loss_num": 0.0172119140625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 361384308, + "step": 6450 + }, + { + "epoch": 14.367483296213809, + "grad_norm": 18.288415908813477, + "learning_rate": 1e-06, + "loss": 0.3716, + "num_input_tokens_seen": 361441164, + "step": 6451 + }, + { + "epoch": 14.367483296213809, + "loss": 0.2685241103172302, + "loss_ce": 9.149497782345861e-05, + "loss_iou": 0.11181640625, + "loss_num": 0.0089111328125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 361441164, + "step": 6451 + }, + { + "epoch": 14.369710467706014, + "grad_norm": 22.0161190032959, + "learning_rate": 1e-06, + "loss": 0.4447, + "num_input_tokens_seen": 361500032, + "step": 6452 + }, + { + "epoch": 14.369710467706014, + "loss": 0.5349976420402527, + "loss_ce": 0.0001465780078433454, + "loss_iou": 0.2373046875, + "loss_num": 0.01190185546875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 361500032, + "step": 6452 + }, + { + "epoch": 14.371937639198219, + "grad_norm": 22.7355899810791, + "learning_rate": 1e-06, + "loss": 0.4947, + "num_input_tokens_seen": 361555872, + "step": 6453 + }, + { + "epoch": 14.371937639198219, + "loss": 0.46275314688682556, + "loss_ce": 0.00010668374306987971, + "loss_iou": 0.2021484375, + "loss_num": 0.01171875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 361555872, + "step": 6453 + }, + { + "epoch": 14.374164810690424, + "grad_norm": 25.038970947265625, + "learning_rate": 1e-06, + "loss": 0.4937, + "num_input_tokens_seen": 361614468, + "step": 6454 + }, + { + "epoch": 14.374164810690424, + "loss": 0.5680195093154907, + "loss_ce": 8.73721728567034e-05, + "loss_iou": 0.232421875, + "loss_num": 0.0208740234375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 361614468, + "step": 6454 + }, + { + "epoch": 14.376391982182628, + "grad_norm": 18.690160751342773, + "learning_rate": 1e-06, + "loss": 0.6027, + "num_input_tokens_seen": 361670736, + "step": 6455 + }, + { + "epoch": 14.376391982182628, + "loss": 0.7412841320037842, + "loss_ce": 0.00043940101750195026, + "loss_iou": 0.306640625, + "loss_num": 0.0257568359375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 361670736, + "step": 6455 + }, + { + "epoch": 14.378619153674833, + "grad_norm": 13.359357833862305, + "learning_rate": 1e-06, + "loss": 0.4412, + "num_input_tokens_seen": 361725704, + "step": 6456 + }, + { + "epoch": 14.378619153674833, + "loss": 0.3059990406036377, + "loss_ce": 9.084792691282928e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00738525390625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 361725704, + "step": 6456 + }, + { + "epoch": 14.380846325167038, + "grad_norm": 18.930578231811523, + "learning_rate": 1e-06, + "loss": 0.4059, + "num_input_tokens_seen": 361782364, + "step": 6457 + }, + { + "epoch": 14.380846325167038, + "loss": 0.4163687527179718, + "loss_ce": 0.00010898528853431344, + "loss_iou": 0.19140625, + "loss_num": 0.0068359375, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 361782364, + "step": 6457 + }, + { + "epoch": 14.383073496659243, + "grad_norm": 14.719660758972168, + "learning_rate": 1e-06, + "loss": 0.4225, + "num_input_tokens_seen": 361836700, + "step": 6458 + }, + { + "epoch": 14.383073496659243, + "loss": 0.41648274660110474, + "loss_ce": 0.0001009054685709998, + "loss_iou": 0.181640625, + "loss_num": 0.0107421875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 361836700, + "step": 6458 + }, + { + "epoch": 14.385300668151448, + "grad_norm": 17.509685516357422, + "learning_rate": 1e-06, + "loss": 0.4016, + "num_input_tokens_seen": 361894212, + "step": 6459 + }, + { + "epoch": 14.385300668151448, + "loss": 0.42406773567199707, + "loss_ce": 0.0001785791973816231, + "loss_iou": 0.16015625, + "loss_num": 0.0206298828125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 361894212, + "step": 6459 + }, + { + "epoch": 14.387527839643653, + "grad_norm": 21.18560218811035, + "learning_rate": 1e-06, + "loss": 0.6036, + "num_input_tokens_seen": 361949500, + "step": 6460 + }, + { + "epoch": 14.387527839643653, + "loss": 0.5647926926612854, + "loss_ce": 9.54304778133519e-05, + "loss_iou": 0.25390625, + "loss_num": 0.01104736328125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 361949500, + "step": 6460 + }, + { + "epoch": 14.389755011135858, + "grad_norm": 18.161174774169922, + "learning_rate": 1e-06, + "loss": 0.4029, + "num_input_tokens_seen": 362002964, + "step": 6461 + }, + { + "epoch": 14.389755011135858, + "loss": 0.3434833586215973, + "loss_ce": 9.955540008377284e-05, + "loss_iou": 0.138671875, + "loss_num": 0.01336669921875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 362002964, + "step": 6461 + }, + { + "epoch": 14.391982182628063, + "grad_norm": 18.891141891479492, + "learning_rate": 1e-06, + "loss": 0.4224, + "num_input_tokens_seen": 362056564, + "step": 6462 + }, + { + "epoch": 14.391982182628063, + "loss": 0.3922403156757355, + "loss_ce": 0.00015046796761453152, + "loss_iou": 0.16796875, + "loss_num": 0.0111083984375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 362056564, + "step": 6462 + }, + { + "epoch": 14.394209354120267, + "grad_norm": 42.18777847290039, + "learning_rate": 1e-06, + "loss": 0.5473, + "num_input_tokens_seen": 362113504, + "step": 6463 + }, + { + "epoch": 14.394209354120267, + "loss": 0.6558715105056763, + "loss_ce": 0.00010980549268424511, + "loss_iou": 0.30078125, + "loss_num": 0.01080322265625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 362113504, + "step": 6463 + }, + { + "epoch": 14.396436525612472, + "grad_norm": 17.04292869567871, + "learning_rate": 1e-06, + "loss": 0.5568, + "num_input_tokens_seen": 362168292, + "step": 6464 + }, + { + "epoch": 14.396436525612472, + "loss": 0.5546635985374451, + "loss_ce": 9.817900718189776e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.01104736328125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 362168292, + "step": 6464 + }, + { + "epoch": 14.398663697104677, + "grad_norm": 18.7966251373291, + "learning_rate": 1e-06, + "loss": 0.4122, + "num_input_tokens_seen": 362226676, + "step": 6465 + }, + { + "epoch": 14.398663697104677, + "loss": 0.5053236484527588, + "loss_ce": 0.0001357014843961224, + "loss_iou": 0.2099609375, + "loss_num": 0.01708984375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 362226676, + "step": 6465 + }, + { + "epoch": 14.400890868596882, + "grad_norm": 17.50356674194336, + "learning_rate": 1e-06, + "loss": 0.4747, + "num_input_tokens_seen": 362285324, + "step": 6466 + }, + { + "epoch": 14.400890868596882, + "loss": 0.6701542735099792, + "loss_ce": 0.00011035045463358983, + "loss_iou": 0.283203125, + "loss_num": 0.02099609375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 362285324, + "step": 6466 + }, + { + "epoch": 14.403118040089087, + "grad_norm": 18.688034057617188, + "learning_rate": 1e-06, + "loss": 0.4448, + "num_input_tokens_seen": 362343140, + "step": 6467 + }, + { + "epoch": 14.403118040089087, + "loss": 0.6092594861984253, + "loss_ce": 0.00012863794108852744, + "loss_iou": 0.26171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 362343140, + "step": 6467 + }, + { + "epoch": 14.405345211581292, + "grad_norm": 25.48725700378418, + "learning_rate": 1e-06, + "loss": 0.5325, + "num_input_tokens_seen": 362401224, + "step": 6468 + }, + { + "epoch": 14.405345211581292, + "loss": 0.5204082131385803, + "loss_ce": 0.0001445315283490345, + "loss_iou": 0.236328125, + "loss_num": 0.009765625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 362401224, + "step": 6468 + }, + { + "epoch": 14.407572383073497, + "grad_norm": 19.30179786682129, + "learning_rate": 1e-06, + "loss": 0.4949, + "num_input_tokens_seen": 362459744, + "step": 6469 + }, + { + "epoch": 14.407572383073497, + "loss": 0.3543410301208496, + "loss_ce": 9.295364725403488e-05, + "loss_iou": 0.15234375, + "loss_num": 0.00994873046875, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 362459744, + "step": 6469 + }, + { + "epoch": 14.409799554565701, + "grad_norm": 19.212997436523438, + "learning_rate": 1e-06, + "loss": 0.5026, + "num_input_tokens_seen": 362516148, + "step": 6470 + }, + { + "epoch": 14.409799554565701, + "loss": 0.34019219875335693, + "loss_ce": 0.00010428522364236414, + "loss_iou": 0.1494140625, + "loss_num": 0.0084228515625, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 362516148, + "step": 6470 + }, + { + "epoch": 14.412026726057906, + "grad_norm": 20.393836975097656, + "learning_rate": 1e-06, + "loss": 0.5051, + "num_input_tokens_seen": 362571256, + "step": 6471 + }, + { + "epoch": 14.412026726057906, + "loss": 0.49153202772140503, + "loss_ce": 0.0003210945869795978, + "loss_iou": 0.1923828125, + "loss_num": 0.021240234375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 362571256, + "step": 6471 + }, + { + "epoch": 14.414253897550111, + "grad_norm": 48.49162292480469, + "learning_rate": 1e-06, + "loss": 0.4618, + "num_input_tokens_seen": 362627180, + "step": 6472 + }, + { + "epoch": 14.414253897550111, + "loss": 0.5423309206962585, + "loss_ce": 9.457490523345768e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.0181884765625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 362627180, + "step": 6472 + }, + { + "epoch": 14.416481069042316, + "grad_norm": 38.27447509765625, + "learning_rate": 1e-06, + "loss": 0.6114, + "num_input_tokens_seen": 362681880, + "step": 6473 + }, + { + "epoch": 14.416481069042316, + "loss": 0.6459391117095947, + "loss_ce": 0.00012608422548510134, + "loss_iou": 0.2734375, + "loss_num": 0.020263671875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 362681880, + "step": 6473 + }, + { + "epoch": 14.41870824053452, + "grad_norm": 25.192468643188477, + "learning_rate": 1e-06, + "loss": 0.5313, + "num_input_tokens_seen": 362738364, + "step": 6474 + }, + { + "epoch": 14.41870824053452, + "loss": 0.44662946462631226, + "loss_ce": 9.624061931390315e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.006927490234375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 362738364, + "step": 6474 + }, + { + "epoch": 14.420935412026726, + "grad_norm": 19.721994400024414, + "learning_rate": 1e-06, + "loss": 0.5204, + "num_input_tokens_seen": 362793976, + "step": 6475 + }, + { + "epoch": 14.420935412026726, + "loss": 0.530852198600769, + "loss_ce": 9.049595973920077e-05, + "loss_iou": 0.208984375, + "loss_num": 0.022705078125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 362793976, + "step": 6475 + }, + { + "epoch": 14.42316258351893, + "grad_norm": 25.936969757080078, + "learning_rate": 1e-06, + "loss": 0.5892, + "num_input_tokens_seen": 362849184, + "step": 6476 + }, + { + "epoch": 14.42316258351893, + "loss": 0.6352825164794922, + "loss_ce": 0.00015069330402184278, + "loss_iou": 0.27734375, + "loss_num": 0.016357421875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 362849184, + "step": 6476 + }, + { + "epoch": 14.425389755011135, + "grad_norm": 14.354238510131836, + "learning_rate": 1e-06, + "loss": 0.3588, + "num_input_tokens_seen": 362902516, + "step": 6477 + }, + { + "epoch": 14.425389755011135, + "loss": 0.4610298275947571, + "loss_ce": 9.230234718415886e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.02294921875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 362902516, + "step": 6477 + }, + { + "epoch": 14.42761692650334, + "grad_norm": 16.179262161254883, + "learning_rate": 1e-06, + "loss": 0.4084, + "num_input_tokens_seen": 362954364, + "step": 6478 + }, + { + "epoch": 14.42761692650334, + "loss": 0.2978529930114746, + "loss_ce": 9.299164230469614e-05, + "loss_iou": 0.130859375, + "loss_num": 0.00726318359375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 362954364, + "step": 6478 + }, + { + "epoch": 14.429844097995545, + "grad_norm": 38.88999557495117, + "learning_rate": 1e-06, + "loss": 0.4598, + "num_input_tokens_seen": 363011612, + "step": 6479 + }, + { + "epoch": 14.429844097995545, + "loss": 0.5600452423095703, + "loss_ce": 0.00016974634490907192, + "loss_iou": 0.26171875, + "loss_num": 0.007354736328125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 363011612, + "step": 6479 + }, + { + "epoch": 14.43207126948775, + "grad_norm": 34.526512145996094, + "learning_rate": 1e-06, + "loss": 0.4824, + "num_input_tokens_seen": 363069500, + "step": 6480 + }, + { + "epoch": 14.43207126948775, + "loss": 0.4800405502319336, + "loss_ce": 0.00012108102964702994, + "loss_iou": 0.193359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 363069500, + "step": 6480 + }, + { + "epoch": 14.434298440979955, + "grad_norm": 24.97442626953125, + "learning_rate": 1e-06, + "loss": 0.5423, + "num_input_tokens_seen": 363125684, + "step": 6481 + }, + { + "epoch": 14.434298440979955, + "loss": 0.3962355852127075, + "loss_ce": 0.00011745323718059808, + "loss_iou": 0.1728515625, + "loss_num": 0.0103759765625, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 363125684, + "step": 6481 + }, + { + "epoch": 14.43652561247216, + "grad_norm": 13.889195442199707, + "learning_rate": 1e-06, + "loss": 0.4004, + "num_input_tokens_seen": 363181616, + "step": 6482 + }, + { + "epoch": 14.43652561247216, + "loss": 0.4475998878479004, + "loss_ce": 0.0001511801965534687, + "loss_iou": 0.1943359375, + "loss_num": 0.01190185546875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 363181616, + "step": 6482 + }, + { + "epoch": 14.438752783964365, + "grad_norm": 16.7701358795166, + "learning_rate": 1e-06, + "loss": 0.3514, + "num_input_tokens_seen": 363238068, + "step": 6483 + }, + { + "epoch": 14.438752783964365, + "loss": 0.38326865434646606, + "loss_ce": 8.997365512186661e-05, + "loss_iou": 0.162109375, + "loss_num": 0.0118408203125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 363238068, + "step": 6483 + }, + { + "epoch": 14.44097995545657, + "grad_norm": 16.830076217651367, + "learning_rate": 1e-06, + "loss": 0.4811, + "num_input_tokens_seen": 363293032, + "step": 6484 + }, + { + "epoch": 14.44097995545657, + "loss": 0.4167139530181885, + "loss_ce": 8.8007356680464e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.01092529296875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 363293032, + "step": 6484 + }, + { + "epoch": 14.443207126948774, + "grad_norm": 13.648219108581543, + "learning_rate": 1e-06, + "loss": 0.543, + "num_input_tokens_seen": 363349520, + "step": 6485 + }, + { + "epoch": 14.443207126948774, + "loss": 0.47776174545288086, + "loss_ce": 0.0002226911747129634, + "loss_iou": 0.1865234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 363349520, + "step": 6485 + }, + { + "epoch": 14.44543429844098, + "grad_norm": 14.256153106689453, + "learning_rate": 1e-06, + "loss": 0.4144, + "num_input_tokens_seen": 363407380, + "step": 6486 + }, + { + "epoch": 14.44543429844098, + "loss": 0.21311235427856445, + "loss_ce": 9.964955097530037e-05, + "loss_iou": 0.0927734375, + "loss_num": 0.00543212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 363407380, + "step": 6486 + }, + { + "epoch": 14.447661469933184, + "grad_norm": 20.257829666137695, + "learning_rate": 1e-06, + "loss": 0.4631, + "num_input_tokens_seen": 363463888, + "step": 6487 + }, + { + "epoch": 14.447661469933184, + "loss": 0.4675000011920929, + "loss_ce": 9.276315540773794e-05, + "loss_iou": 0.193359375, + "loss_num": 0.01611328125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 363463888, + "step": 6487 + }, + { + "epoch": 14.449888641425389, + "grad_norm": 15.722443580627441, + "learning_rate": 1e-06, + "loss": 0.401, + "num_input_tokens_seen": 363520880, + "step": 6488 + }, + { + "epoch": 14.449888641425389, + "loss": 0.3268738389015198, + "loss_ce": 9.159540786640719e-05, + "loss_iou": 0.142578125, + "loss_num": 0.0084228515625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 363520880, + "step": 6488 + }, + { + "epoch": 14.452115812917596, + "grad_norm": 24.610681533813477, + "learning_rate": 1e-06, + "loss": 0.5481, + "num_input_tokens_seen": 363575452, + "step": 6489 + }, + { + "epoch": 14.452115812917596, + "loss": 0.42991262674331665, + "loss_ce": 0.00010304449824616313, + "loss_iou": 0.19140625, + "loss_num": 0.00933837890625, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 363575452, + "step": 6489 + }, + { + "epoch": 14.4543429844098, + "grad_norm": 15.40800952911377, + "learning_rate": 1e-06, + "loss": 0.32, + "num_input_tokens_seen": 363630124, + "step": 6490 + }, + { + "epoch": 14.4543429844098, + "loss": 0.35164231061935425, + "loss_ce": 9.507540380582213e-05, + "loss_iou": 0.15625, + "loss_num": 0.007659912109375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 363630124, + "step": 6490 + }, + { + "epoch": 14.456570155902005, + "grad_norm": 27.49700927734375, + "learning_rate": 1e-06, + "loss": 0.4365, + "num_input_tokens_seen": 363683792, + "step": 6491 + }, + { + "epoch": 14.456570155902005, + "loss": 0.5234502553939819, + "loss_ce": 0.00013481616042554379, + "loss_iou": 0.2412109375, + "loss_num": 0.00823974609375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 363683792, + "step": 6491 + }, + { + "epoch": 14.45879732739421, + "grad_norm": 16.735675811767578, + "learning_rate": 1e-06, + "loss": 0.359, + "num_input_tokens_seen": 363740708, + "step": 6492 + }, + { + "epoch": 14.45879732739421, + "loss": 0.35196787118911743, + "loss_ce": 0.00010018555622082204, + "loss_iou": 0.13671875, + "loss_num": 0.0157470703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 363740708, + "step": 6492 + }, + { + "epoch": 14.461024498886415, + "grad_norm": 27.95579719543457, + "learning_rate": 1e-06, + "loss": 0.5228, + "num_input_tokens_seen": 363795664, + "step": 6493 + }, + { + "epoch": 14.461024498886415, + "loss": 0.5091565847396851, + "loss_ce": 0.00012336287181824446, + "loss_iou": 0.2333984375, + "loss_num": 0.00836181640625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 363795664, + "step": 6493 + }, + { + "epoch": 14.46325167037862, + "grad_norm": 20.272802352905273, + "learning_rate": 1e-06, + "loss": 0.4311, + "num_input_tokens_seen": 363853956, + "step": 6494 + }, + { + "epoch": 14.46325167037862, + "loss": 0.4811999201774597, + "loss_ce": 0.00012079046427970752, + "loss_iou": 0.220703125, + "loss_num": 0.00799560546875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 363853956, + "step": 6494 + }, + { + "epoch": 14.465478841870825, + "grad_norm": 18.344619750976562, + "learning_rate": 1e-06, + "loss": 0.2613, + "num_input_tokens_seen": 363911696, + "step": 6495 + }, + { + "epoch": 14.465478841870825, + "loss": 0.237228661775589, + "loss_ce": 0.00010707815818022937, + "loss_iou": 0.1044921875, + "loss_num": 0.005523681640625, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 363911696, + "step": 6495 + }, + { + "epoch": 14.46770601336303, + "grad_norm": 35.97431564331055, + "learning_rate": 1e-06, + "loss": 0.8532, + "num_input_tokens_seen": 363966144, + "step": 6496 + }, + { + "epoch": 14.46770601336303, + "loss": 0.8004045486450195, + "loss_ce": 0.00011163462477270514, + "loss_iou": 0.34375, + "loss_num": 0.0223388671875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 363966144, + "step": 6496 + }, + { + "epoch": 14.469933184855234, + "grad_norm": 33.640872955322266, + "learning_rate": 1e-06, + "loss": 0.5273, + "num_input_tokens_seen": 364021896, + "step": 6497 + }, + { + "epoch": 14.469933184855234, + "loss": 0.3864748775959015, + "loss_ce": 0.00012233102461323142, + "loss_iou": 0.1669921875, + "loss_num": 0.01055908203125, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 364021896, + "step": 6497 + }, + { + "epoch": 14.47216035634744, + "grad_norm": 18.709012985229492, + "learning_rate": 1e-06, + "loss": 0.4962, + "num_input_tokens_seen": 364079360, + "step": 6498 + }, + { + "epoch": 14.47216035634744, + "loss": 0.40141546726226807, + "loss_ce": 0.00010935450700344518, + "loss_iou": 0.1708984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 364079360, + "step": 6498 + }, + { + "epoch": 14.474387527839644, + "grad_norm": 18.93471908569336, + "learning_rate": 1e-06, + "loss": 0.3134, + "num_input_tokens_seen": 364136176, + "step": 6499 + }, + { + "epoch": 14.474387527839644, + "loss": 0.36948075890541077, + "loss_ce": 0.00021804316202178597, + "loss_iou": 0.1591796875, + "loss_num": 0.01007080078125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 364136176, + "step": 6499 + }, + { + "epoch": 14.476614699331849, + "grad_norm": 21.971736907958984, + "learning_rate": 1e-06, + "loss": 0.4228, + "num_input_tokens_seen": 364192972, + "step": 6500 + }, + { + "epoch": 14.476614699331849, + "eval_seeclick_web_CIoU": 0.5850892961025238, + "eval_seeclick_web_GIoU": 0.5826087892055511, + "eval_seeclick_web_IoU": 0.6035565435886383, + "eval_seeclick_web_MAE_all": 0.015261294320225716, + "eval_seeclick_web_MAE_h": 0.007648690138012171, + "eval_seeclick_web_MAE_w": 0.015442864038050175, + "eval_seeclick_web_MAE_x_boxes": 0.009242635453119874, + "eval_seeclick_web_MAE_y_boxes": 0.021023853914812207, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9148587584495544, + "eval_seeclick_web_loss_ce": 0.0001663194634602405, + "eval_seeclick_web_loss_iou": 0.4219970703125, + "eval_seeclick_web_loss_num": 0.01227569580078125, + "eval_seeclick_web_loss_xval": 0.9052734375, + "eval_seeclick_web_runtime": 22.0534, + "eval_seeclick_web_samples_per_second": 2.267, + "eval_seeclick_web_steps_per_second": 0.091, + "num_input_tokens_seen": 364192972, + "step": 6500 + }, + { + "epoch": 14.476614699331849, + "eval_icons_CIoU": 0.294839546084404, + "eval_icons_GIoU": 0.3122626394033432, + "eval_icons_IoU": 0.36477692425251007, + "eval_icons_MAE_all": 0.05375087633728981, + "eval_icons_MAE_h": 0.032996498979628086, + "eval_icons_MAE_w": 0.051369220949709415, + "eval_icons_MAE_x_boxes": 0.05044420249760151, + "eval_icons_MAE_y_boxes": 0.03664529975503683, + "eval_icons_inside_bbox": 0.6649305522441864, + "eval_icons_loss": 1.6395889520645142, + "eval_icons_loss_ce": 0.00020623258751584217, + "eval_icons_loss_iou": 0.659423828125, + "eval_icons_loss_num": 0.05293846130371094, + "eval_icons_loss_xval": 1.583984375, + "eval_icons_runtime": 19.0398, + "eval_icons_samples_per_second": 2.626, + "eval_icons_steps_per_second": 0.105, + "num_input_tokens_seen": 364192972, + "step": 6500 + }, + { + "epoch": 14.476614699331849, + "eval_screenspot_CIoU": 0.37580615282058716, + "eval_screenspot_GIoU": 0.3901708126068115, + "eval_screenspot_IoU": 0.44902459780375165, + "eval_screenspot_MAE_all": 0.054527596880992256, + "eval_screenspot_MAE_h": 0.03962646176417669, + "eval_screenspot_MAE_w": 0.061993442475795746, + "eval_screenspot_MAE_x_boxes": 0.07007026796539624, + "eval_screenspot_MAE_y_boxes": 0.037376622669398785, + "eval_screenspot_inside_bbox": 0.7041666706403097, + "eval_screenspot_loss": 1.547979474067688, + "eval_screenspot_loss_ce": 0.00023582103798010698, + "eval_screenspot_loss_iou": 0.646484375, + "eval_screenspot_loss_num": 0.06189727783203125, + "eval_screenspot_loss_xval": 1.60205078125, + "eval_screenspot_runtime": 35.3553, + "eval_screenspot_samples_per_second": 2.517, + "eval_screenspot_steps_per_second": 0.085, + "num_input_tokens_seen": 364192972, + "step": 6500 + }, + { + "epoch": 14.476614699331849, + "eval_compot_CIoU": 0.3404065817594528, + "eval_compot_GIoU": 0.35687774419784546, + "eval_compot_IoU": 0.40112267434597015, + "eval_compot_MAE_all": 0.01965143345296383, + "eval_compot_MAE_h": 0.013854971155524254, + "eval_compot_MAE_w": 0.02131012175232172, + "eval_compot_MAE_x_boxes": 0.030210323631763458, + "eval_compot_MAE_y_boxes": 0.006537551758810878, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.4148516654968262, + "eval_compot_loss_ce": 0.00016382582543883473, + "eval_compot_loss_iou": 0.649169921875, + "eval_compot_loss_num": 0.018445968627929688, + "eval_compot_loss_xval": 1.390380859375, + "eval_compot_runtime": 21.0154, + "eval_compot_samples_per_second": 2.379, + "eval_compot_steps_per_second": 0.095, + "num_input_tokens_seen": 364192972, + "step": 6500 + }, + { + "epoch": 14.476614699331849, + "eval_custom_ui_val_CIoU": 0.4791228680147065, + "eval_custom_ui_val_GIoU": 0.4861221942636702, + "eval_custom_ui_val_IoU": 0.5396713250213199, + "eval_custom_ui_val_MAE_all": 0.02759691560640931, + "eval_custom_ui_val_MAE_h": 0.015171076895462142, + "eval_custom_ui_val_MAE_w": 0.03542729518893692, + "eval_custom_ui_val_MAE_x_boxes": 0.03368757442674703, + "eval_custom_ui_val_MAE_y_boxes": 0.013593513725532426, + "eval_custom_ui_val_inside_bbox": 0.7789351873927646, + "eval_custom_ui_val_loss": 1.167110800743103, + "eval_custom_ui_val_loss_ce": 0.00018443458328773786, + "eval_custom_ui_val_loss_iou": 0.5000135633680556, + "eval_custom_ui_val_loss_num": 0.024277369181315105, + "eval_custom_ui_val_loss_xval": 1.1207139756944444, + "eval_custom_ui_val_runtime": 62.376, + "eval_custom_ui_val_samples_per_second": 4.248, + "eval_custom_ui_val_steps_per_second": 0.144, + "num_input_tokens_seen": 364192972, + "step": 6500 + }, + { + "epoch": 14.476614699331849, + "loss": 0.8343714475631714, + "loss_ce": 0.00014291857951320708, + "loss_iou": 0.375, + "loss_num": 0.0164794921875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 364192972, + "step": 6500 + }, + { + "epoch": 14.478841870824054, + "grad_norm": 12.816341400146484, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 364250380, + "step": 6501 + }, + { + "epoch": 14.478841870824054, + "loss": 0.26279568672180176, + "loss_ce": 0.00010036412277258933, + "loss_iou": 0.109375, + "loss_num": 0.00872802734375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 364250380, + "step": 6501 + }, + { + "epoch": 14.481069042316259, + "grad_norm": 12.928550720214844, + "learning_rate": 1e-06, + "loss": 0.4519, + "num_input_tokens_seen": 364306508, + "step": 6502 + }, + { + "epoch": 14.481069042316259, + "loss": 0.47788378596305847, + "loss_ce": 0.00010056734754471108, + "loss_iou": 0.2080078125, + "loss_num": 0.01220703125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 364306508, + "step": 6502 + }, + { + "epoch": 14.483296213808464, + "grad_norm": 17.1734619140625, + "learning_rate": 1e-06, + "loss": 0.4719, + "num_input_tokens_seen": 364362888, + "step": 6503 + }, + { + "epoch": 14.483296213808464, + "loss": 0.48692283034324646, + "loss_ce": 0.00010643218411132693, + "loss_iou": 0.20703125, + "loss_num": 0.014404296875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 364362888, + "step": 6503 + }, + { + "epoch": 14.485523385300668, + "grad_norm": 24.356948852539062, + "learning_rate": 1e-06, + "loss": 0.5933, + "num_input_tokens_seen": 364418476, + "step": 6504 + }, + { + "epoch": 14.485523385300668, + "loss": 0.5144957900047302, + "loss_ce": 9.150611003860831e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.01202392578125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 364418476, + "step": 6504 + }, + { + "epoch": 14.487750556792873, + "grad_norm": 21.26868438720703, + "learning_rate": 1e-06, + "loss": 0.4377, + "num_input_tokens_seen": 364474968, + "step": 6505 + }, + { + "epoch": 14.487750556792873, + "loss": 0.3293471932411194, + "loss_ce": 0.00012356144725345075, + "loss_iou": 0.1376953125, + "loss_num": 0.0106201171875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 364474968, + "step": 6505 + }, + { + "epoch": 14.489977728285078, + "grad_norm": 12.99026107788086, + "learning_rate": 1e-06, + "loss": 0.4491, + "num_input_tokens_seen": 364532716, + "step": 6506 + }, + { + "epoch": 14.489977728285078, + "loss": 0.25777584314346313, + "loss_ce": 8.539756527170539e-05, + "loss_iou": 0.10595703125, + "loss_num": 0.0091552734375, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 364532716, + "step": 6506 + }, + { + "epoch": 14.492204899777283, + "grad_norm": 15.61511516571045, + "learning_rate": 1e-06, + "loss": 0.4898, + "num_input_tokens_seen": 364589292, + "step": 6507 + }, + { + "epoch": 14.492204899777283, + "loss": 0.5754222869873047, + "loss_ce": 0.00010490816930541769, + "loss_iou": 0.2451171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 364589292, + "step": 6507 + }, + { + "epoch": 14.494432071269488, + "grad_norm": 17.655942916870117, + "learning_rate": 1e-06, + "loss": 0.4318, + "num_input_tokens_seen": 364644880, + "step": 6508 + }, + { + "epoch": 14.494432071269488, + "loss": 0.3868103623390198, + "loss_ce": 9.161405614577234e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00994873046875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 364644880, + "step": 6508 + }, + { + "epoch": 14.496659242761693, + "grad_norm": 20.9801082611084, + "learning_rate": 1e-06, + "loss": 0.4926, + "num_input_tokens_seen": 364702920, + "step": 6509 + }, + { + "epoch": 14.496659242761693, + "loss": 0.48010796308517456, + "loss_ce": 0.00012747629079967737, + "loss_iou": 0.2255859375, + "loss_num": 0.005859375, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 364702920, + "step": 6509 + }, + { + "epoch": 14.498886414253898, + "grad_norm": 20.405927658081055, + "learning_rate": 1e-06, + "loss": 0.439, + "num_input_tokens_seen": 364757992, + "step": 6510 + }, + { + "epoch": 14.498886414253898, + "loss": 0.642722487449646, + "loss_ce": 0.00014438082871492952, + "loss_iou": 0.255859375, + "loss_num": 0.0257568359375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 364757992, + "step": 6510 + }, + { + "epoch": 14.501113585746102, + "grad_norm": 22.799161911010742, + "learning_rate": 1e-06, + "loss": 0.3945, + "num_input_tokens_seen": 364815876, + "step": 6511 + }, + { + "epoch": 14.501113585746102, + "loss": 0.5448106527328491, + "loss_ce": 0.00013291001960169524, + "loss_iou": 0.244140625, + "loss_num": 0.01123046875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 364815876, + "step": 6511 + }, + { + "epoch": 14.503340757238307, + "grad_norm": 18.602733612060547, + "learning_rate": 1e-06, + "loss": 0.533, + "num_input_tokens_seen": 364871856, + "step": 6512 + }, + { + "epoch": 14.503340757238307, + "loss": 0.3779085874557495, + "loss_ce": 0.00010096091136801988, + "loss_iou": 0.142578125, + "loss_num": 0.018310546875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 364871856, + "step": 6512 + }, + { + "epoch": 14.505567928730512, + "grad_norm": 35.201637268066406, + "learning_rate": 1e-06, + "loss": 0.6026, + "num_input_tokens_seen": 364927504, + "step": 6513 + }, + { + "epoch": 14.505567928730512, + "loss": 0.4659165143966675, + "loss_ce": 9.620728815207258e-05, + "loss_iou": 0.2109375, + "loss_num": 0.00872802734375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 364927504, + "step": 6513 + }, + { + "epoch": 14.507795100222717, + "grad_norm": 19.808265686035156, + "learning_rate": 1e-06, + "loss": 0.5087, + "num_input_tokens_seen": 364985060, + "step": 6514 + }, + { + "epoch": 14.507795100222717, + "loss": 0.5555931329727173, + "loss_ce": 0.00011220310989301652, + "loss_iou": 0.2314453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 364985060, + "step": 6514 + }, + { + "epoch": 14.510022271714922, + "grad_norm": 14.76258373260498, + "learning_rate": 1e-06, + "loss": 0.3267, + "num_input_tokens_seen": 365042232, + "step": 6515 + }, + { + "epoch": 14.510022271714922, + "loss": 0.3308001160621643, + "loss_ce": 0.00011161710426677018, + "loss_iou": 0.154296875, + "loss_num": 0.00445556640625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 365042232, + "step": 6515 + }, + { + "epoch": 14.512249443207127, + "grad_norm": 15.700284004211426, + "learning_rate": 1e-06, + "loss": 0.5323, + "num_input_tokens_seen": 365094928, + "step": 6516 + }, + { + "epoch": 14.512249443207127, + "loss": 0.48436519503593445, + "loss_ce": 0.00011227864888496697, + "loss_iou": 0.1943359375, + "loss_num": 0.019287109375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 365094928, + "step": 6516 + }, + { + "epoch": 14.514476614699332, + "grad_norm": 12.311739921569824, + "learning_rate": 1e-06, + "loss": 0.3459, + "num_input_tokens_seen": 365152328, + "step": 6517 + }, + { + "epoch": 14.514476614699332, + "loss": 0.41318339109420776, + "loss_ce": 9.744486305862665e-05, + "loss_iou": 0.177734375, + "loss_num": 0.01177978515625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 365152328, + "step": 6517 + }, + { + "epoch": 14.516703786191536, + "grad_norm": 16.08889389038086, + "learning_rate": 1e-06, + "loss": 0.4272, + "num_input_tokens_seen": 365211004, + "step": 6518 + }, + { + "epoch": 14.516703786191536, + "loss": 0.40454918146133423, + "loss_ce": 0.0002522985450923443, + "loss_iou": 0.1669921875, + "loss_num": 0.01416015625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 365211004, + "step": 6518 + }, + { + "epoch": 14.518930957683741, + "grad_norm": 15.88845157623291, + "learning_rate": 1e-06, + "loss": 0.474, + "num_input_tokens_seen": 365267152, + "step": 6519 + }, + { + "epoch": 14.518930957683741, + "loss": 0.4582583010196686, + "loss_ce": 0.0001284035388380289, + "loss_iou": 0.203125, + "loss_num": 0.01043701171875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 365267152, + "step": 6519 + }, + { + "epoch": 14.521158129175946, + "grad_norm": 22.944534301757812, + "learning_rate": 1e-06, + "loss": 0.3661, + "num_input_tokens_seen": 365322816, + "step": 6520 + }, + { + "epoch": 14.521158129175946, + "loss": 0.3829045295715332, + "loss_ce": 0.00021408403699751943, + "loss_iou": 0.166015625, + "loss_num": 0.0101318359375, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 365322816, + "step": 6520 + }, + { + "epoch": 14.523385300668151, + "grad_norm": 15.910293579101562, + "learning_rate": 1e-06, + "loss": 0.5393, + "num_input_tokens_seen": 365377560, + "step": 6521 + }, + { + "epoch": 14.523385300668151, + "loss": 0.6288875937461853, + "loss_ce": 0.0001033980370266363, + "loss_iou": 0.27734375, + "loss_num": 0.014404296875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 365377560, + "step": 6521 + }, + { + "epoch": 14.525612472160356, + "grad_norm": 31.319820404052734, + "learning_rate": 1e-06, + "loss": 0.5413, + "num_input_tokens_seen": 365431876, + "step": 6522 + }, + { + "epoch": 14.525612472160356, + "loss": 0.45200419425964355, + "loss_ce": 9.987234807340428e-05, + "loss_iou": 0.201171875, + "loss_num": 0.010009765625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 365431876, + "step": 6522 + }, + { + "epoch": 14.52783964365256, + "grad_norm": 33.108680725097656, + "learning_rate": 1e-06, + "loss": 0.5389, + "num_input_tokens_seen": 365487292, + "step": 6523 + }, + { + "epoch": 14.52783964365256, + "loss": 0.4871612787246704, + "loss_ce": 0.0001007293612929061, + "loss_iou": 0.2138671875, + "loss_num": 0.01202392578125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 365487292, + "step": 6523 + }, + { + "epoch": 14.530066815144766, + "grad_norm": 26.078786849975586, + "learning_rate": 1e-06, + "loss": 0.3983, + "num_input_tokens_seen": 365545116, + "step": 6524 + }, + { + "epoch": 14.530066815144766, + "loss": 0.5119673013687134, + "loss_ce": 0.00012648927804548293, + "loss_iou": 0.21875, + "loss_num": 0.01495361328125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 365545116, + "step": 6524 + }, + { + "epoch": 14.53229398663697, + "grad_norm": 21.211854934692383, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 365603000, + "step": 6525 + }, + { + "epoch": 14.53229398663697, + "loss": 0.5028167963027954, + "loss_ce": 0.00013121790834702551, + "loss_iou": 0.21484375, + "loss_num": 0.014404296875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 365603000, + "step": 6525 + }, + { + "epoch": 14.534521158129175, + "grad_norm": 13.118301391601562, + "learning_rate": 1e-06, + "loss": 0.3706, + "num_input_tokens_seen": 365658648, + "step": 6526 + }, + { + "epoch": 14.534521158129175, + "loss": 0.37536337971687317, + "loss_ce": 0.00011921751865884289, + "loss_iou": 0.16796875, + "loss_num": 0.0079345703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 365658648, + "step": 6526 + }, + { + "epoch": 14.53674832962138, + "grad_norm": 12.883088111877441, + "learning_rate": 1e-06, + "loss": 0.4133, + "num_input_tokens_seen": 365713996, + "step": 6527 + }, + { + "epoch": 14.53674832962138, + "loss": 0.4605136811733246, + "loss_ce": 0.0004306669579818845, + "loss_iou": 0.2021484375, + "loss_num": 0.01104736328125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 365713996, + "step": 6527 + }, + { + "epoch": 14.538975501113585, + "grad_norm": 21.47534942626953, + "learning_rate": 1e-06, + "loss": 0.3516, + "num_input_tokens_seen": 365769532, + "step": 6528 + }, + { + "epoch": 14.538975501113585, + "loss": 0.3355334401130676, + "loss_ce": 8.420874655712396e-05, + "loss_iou": 0.150390625, + "loss_num": 0.007110595703125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 365769532, + "step": 6528 + }, + { + "epoch": 14.54120267260579, + "grad_norm": 17.586118698120117, + "learning_rate": 1e-06, + "loss": 0.549, + "num_input_tokens_seen": 365826440, + "step": 6529 + }, + { + "epoch": 14.54120267260579, + "loss": 0.5860310792922974, + "loss_ce": 9.35560601647012e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.02294921875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 365826440, + "step": 6529 + }, + { + "epoch": 14.543429844097995, + "grad_norm": 19.080286026000977, + "learning_rate": 1e-06, + "loss": 0.4543, + "num_input_tokens_seen": 365882108, + "step": 6530 + }, + { + "epoch": 14.543429844097995, + "loss": 0.4774046540260315, + "loss_ce": 0.00010971432493533939, + "loss_iou": 0.21484375, + "loss_num": 0.00958251953125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 365882108, + "step": 6530 + }, + { + "epoch": 14.5456570155902, + "grad_norm": 18.724021911621094, + "learning_rate": 1e-06, + "loss": 0.5758, + "num_input_tokens_seen": 365940436, + "step": 6531 + }, + { + "epoch": 14.5456570155902, + "loss": 0.7265654802322388, + "loss_ce": 0.00012504393816925585, + "loss_iou": 0.30078125, + "loss_num": 0.0247802734375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 365940436, + "step": 6531 + }, + { + "epoch": 14.547884187082406, + "grad_norm": 16.194499969482422, + "learning_rate": 1e-06, + "loss": 0.4139, + "num_input_tokens_seen": 365996348, + "step": 6532 + }, + { + "epoch": 14.547884187082406, + "loss": 0.36508145928382874, + "loss_ce": 9.123167546931654e-05, + "loss_iou": 0.169921875, + "loss_num": 0.004791259765625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 365996348, + "step": 6532 + }, + { + "epoch": 14.550111358574611, + "grad_norm": 14.120255470275879, + "learning_rate": 1e-06, + "loss": 0.4066, + "num_input_tokens_seen": 366050788, + "step": 6533 + }, + { + "epoch": 14.550111358574611, + "loss": 0.380734384059906, + "loss_ce": 0.0003022679884452373, + "loss_iou": 0.1669921875, + "loss_num": 0.00921630859375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 366050788, + "step": 6533 + }, + { + "epoch": 14.552338530066816, + "grad_norm": 16.832948684692383, + "learning_rate": 1e-06, + "loss": 0.5344, + "num_input_tokens_seen": 366103204, + "step": 6534 + }, + { + "epoch": 14.552338530066816, + "loss": 0.49608922004699707, + "loss_ce": 0.00011752717546187341, + "loss_iou": 0.2138671875, + "loss_num": 0.0135498046875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 366103204, + "step": 6534 + }, + { + "epoch": 14.55456570155902, + "grad_norm": 20.420133590698242, + "learning_rate": 1e-06, + "loss": 0.4178, + "num_input_tokens_seen": 366161640, + "step": 6535 + }, + { + "epoch": 14.55456570155902, + "loss": 0.4828900098800659, + "loss_ce": 0.00010191020555794239, + "loss_iou": 0.2041015625, + "loss_num": 0.01470947265625, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 366161640, + "step": 6535 + }, + { + "epoch": 14.556792873051226, + "grad_norm": 19.563575744628906, + "learning_rate": 1e-06, + "loss": 0.3978, + "num_input_tokens_seen": 366216184, + "step": 6536 + }, + { + "epoch": 14.556792873051226, + "loss": 0.5175689458847046, + "loss_ce": 0.0001129416050389409, + "loss_iou": 0.23046875, + "loss_num": 0.01141357421875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 366216184, + "step": 6536 + }, + { + "epoch": 14.55902004454343, + "grad_norm": 14.907690048217773, + "learning_rate": 1e-06, + "loss": 0.5119, + "num_input_tokens_seen": 366273176, + "step": 6537 + }, + { + "epoch": 14.55902004454343, + "loss": 0.4903450906276703, + "loss_ce": 0.00011072470806539059, + "loss_iou": 0.212890625, + "loss_num": 0.01318359375, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 366273176, + "step": 6537 + }, + { + "epoch": 14.561247216035635, + "grad_norm": 21.058177947998047, + "learning_rate": 1e-06, + "loss": 0.4864, + "num_input_tokens_seen": 366329652, + "step": 6538 + }, + { + "epoch": 14.561247216035635, + "loss": 0.38425058126449585, + "loss_ce": 9.530353418085724e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.01312255859375, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 366329652, + "step": 6538 + }, + { + "epoch": 14.56347438752784, + "grad_norm": 76.75809478759766, + "learning_rate": 1e-06, + "loss": 0.6636, + "num_input_tokens_seen": 366388276, + "step": 6539 + }, + { + "epoch": 14.56347438752784, + "loss": 0.8201836347579956, + "loss_ce": 0.00011528450704645365, + "loss_iou": 0.330078125, + "loss_num": 0.03173828125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 366388276, + "step": 6539 + }, + { + "epoch": 14.565701559020045, + "grad_norm": 18.305295944213867, + "learning_rate": 1e-06, + "loss": 0.5901, + "num_input_tokens_seen": 366444384, + "step": 6540 + }, + { + "epoch": 14.565701559020045, + "loss": 0.5785954594612122, + "loss_ce": 0.00010423710045870394, + "loss_iou": 0.25390625, + "loss_num": 0.01385498046875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 366444384, + "step": 6540 + }, + { + "epoch": 14.56792873051225, + "grad_norm": 16.77396583557129, + "learning_rate": 1e-06, + "loss": 0.5024, + "num_input_tokens_seen": 366500264, + "step": 6541 + }, + { + "epoch": 14.56792873051225, + "loss": 0.47824224829673767, + "loss_ce": 9.282668179366738e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.025634765625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 366500264, + "step": 6541 + }, + { + "epoch": 14.570155902004455, + "grad_norm": 22.724868774414062, + "learning_rate": 1e-06, + "loss": 0.4041, + "num_input_tokens_seen": 366556940, + "step": 6542 + }, + { + "epoch": 14.570155902004455, + "loss": 0.45686769485473633, + "loss_ce": 8.05618183221668e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0140380859375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 366556940, + "step": 6542 + }, + { + "epoch": 14.57238307349666, + "grad_norm": 20.71229362487793, + "learning_rate": 1e-06, + "loss": 0.5195, + "num_input_tokens_seen": 366610572, + "step": 6543 + }, + { + "epoch": 14.57238307349666, + "loss": 0.5313507318496704, + "loss_ce": 0.0001007560349535197, + "loss_iou": 0.2119140625, + "loss_num": 0.0213623046875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 366610572, + "step": 6543 + }, + { + "epoch": 14.574610244988865, + "grad_norm": 20.400714874267578, + "learning_rate": 1e-06, + "loss": 0.5844, + "num_input_tokens_seen": 366665496, + "step": 6544 + }, + { + "epoch": 14.574610244988865, + "loss": 0.46006056666374207, + "loss_ce": 9.960292663890868e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.0036163330078125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 366665496, + "step": 6544 + }, + { + "epoch": 14.57683741648107, + "grad_norm": 16.26686668395996, + "learning_rate": 1e-06, + "loss": 0.6016, + "num_input_tokens_seen": 366720024, + "step": 6545 + }, + { + "epoch": 14.57683741648107, + "loss": 0.622795820236206, + "loss_ce": 0.00011514931975398213, + "loss_iou": 0.2578125, + "loss_num": 0.021240234375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 366720024, + "step": 6545 + }, + { + "epoch": 14.579064587973274, + "grad_norm": 14.509425163269043, + "learning_rate": 1e-06, + "loss": 0.4275, + "num_input_tokens_seen": 366778568, + "step": 6546 + }, + { + "epoch": 14.579064587973274, + "loss": 0.36485838890075684, + "loss_ce": 0.00011229590745642781, + "loss_iou": 0.169921875, + "loss_num": 0.0050048828125, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 366778568, + "step": 6546 + }, + { + "epoch": 14.58129175946548, + "grad_norm": 26.61733627319336, + "learning_rate": 1e-06, + "loss": 0.5409, + "num_input_tokens_seen": 366834208, + "step": 6547 + }, + { + "epoch": 14.58129175946548, + "loss": 0.6287715435028076, + "loss_ce": 0.0001094555773306638, + "loss_iou": 0.28125, + "loss_num": 0.01336669921875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 366834208, + "step": 6547 + }, + { + "epoch": 14.583518930957684, + "grad_norm": 21.295969009399414, + "learning_rate": 1e-06, + "loss": 0.4005, + "num_input_tokens_seen": 366889968, + "step": 6548 + }, + { + "epoch": 14.583518930957684, + "loss": 0.4716094136238098, + "loss_ce": 0.00011283693311270326, + "loss_iou": 0.21484375, + "loss_num": 0.00836181640625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 366889968, + "step": 6548 + }, + { + "epoch": 14.585746102449889, + "grad_norm": 30.666751861572266, + "learning_rate": 1e-06, + "loss": 0.4064, + "num_input_tokens_seen": 366945348, + "step": 6549 + }, + { + "epoch": 14.585746102449889, + "loss": 0.4583427906036377, + "loss_ce": 9.083442273549736e-05, + "loss_iou": 0.1875, + "loss_num": 0.0166015625, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 366945348, + "step": 6549 + }, + { + "epoch": 14.587973273942094, + "grad_norm": 36.7827262878418, + "learning_rate": 1e-06, + "loss": 0.5327, + "num_input_tokens_seen": 367000992, + "step": 6550 + }, + { + "epoch": 14.587973273942094, + "loss": 0.6207618713378906, + "loss_ce": 0.0001563684199936688, + "loss_iou": 0.28125, + "loss_num": 0.01202392578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 367000992, + "step": 6550 + }, + { + "epoch": 14.590200445434299, + "grad_norm": 13.286869049072266, + "learning_rate": 1e-06, + "loss": 0.4203, + "num_input_tokens_seen": 367057428, + "step": 6551 + }, + { + "epoch": 14.590200445434299, + "loss": 0.3902452886104584, + "loss_ce": 0.00010857303277589381, + "loss_iou": 0.171875, + "loss_num": 0.00933837890625, + "loss_xval": 0.390625, + "num_input_tokens_seen": 367057428, + "step": 6551 + }, + { + "epoch": 14.592427616926503, + "grad_norm": 25.443744659423828, + "learning_rate": 1e-06, + "loss": 0.486, + "num_input_tokens_seen": 367110136, + "step": 6552 + }, + { + "epoch": 14.592427616926503, + "loss": 0.612711489200592, + "loss_ce": 0.0016885376535356045, + "loss_iou": 0.267578125, + "loss_num": 0.015625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 367110136, + "step": 6552 + }, + { + "epoch": 14.594654788418708, + "grad_norm": 25.125425338745117, + "learning_rate": 1e-06, + "loss": 0.4382, + "num_input_tokens_seen": 367167548, + "step": 6553 + }, + { + "epoch": 14.594654788418708, + "loss": 0.3994516134262085, + "loss_ce": 9.859049168881029e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.009033203125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 367167548, + "step": 6553 + }, + { + "epoch": 14.596881959910913, + "grad_norm": 16.59556770324707, + "learning_rate": 1e-06, + "loss": 0.4072, + "num_input_tokens_seen": 367223212, + "step": 6554 + }, + { + "epoch": 14.596881959910913, + "loss": 0.3758706748485565, + "loss_ce": 0.00010771742381621152, + "loss_iou": 0.16796875, + "loss_num": 0.008056640625, + "loss_xval": 0.375, + "num_input_tokens_seen": 367223212, + "step": 6554 + }, + { + "epoch": 14.599109131403118, + "grad_norm": 12.429368019104004, + "learning_rate": 1e-06, + "loss": 0.4022, + "num_input_tokens_seen": 367279056, + "step": 6555 + }, + { + "epoch": 14.599109131403118, + "loss": 0.5174868702888489, + "loss_ce": 9.187131217913702e-05, + "loss_iou": 0.236328125, + "loss_num": 0.00909423828125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 367279056, + "step": 6555 + }, + { + "epoch": 14.601336302895323, + "grad_norm": 18.785852432250977, + "learning_rate": 1e-06, + "loss": 0.4512, + "num_input_tokens_seen": 367336108, + "step": 6556 + }, + { + "epoch": 14.601336302895323, + "loss": 0.5070604085922241, + "loss_ce": 0.00010236204252578318, + "loss_iou": 0.21484375, + "loss_num": 0.015380859375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 367336108, + "step": 6556 + }, + { + "epoch": 14.603563474387528, + "grad_norm": 15.28122329711914, + "learning_rate": 1e-06, + "loss": 0.3388, + "num_input_tokens_seen": 367393664, + "step": 6557 + }, + { + "epoch": 14.603563474387528, + "loss": 0.4066086709499359, + "loss_ce": 0.00011453506886027753, + "loss_iou": 0.17578125, + "loss_num": 0.01080322265625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 367393664, + "step": 6557 + }, + { + "epoch": 14.605790645879733, + "grad_norm": 12.419171333312988, + "learning_rate": 1e-06, + "loss": 0.4405, + "num_input_tokens_seen": 367451044, + "step": 6558 + }, + { + "epoch": 14.605790645879733, + "loss": 0.4510771334171295, + "loss_ce": 0.00014940105029381812, + "loss_iou": 0.1923828125, + "loss_num": 0.0133056640625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 367451044, + "step": 6558 + }, + { + "epoch": 14.608017817371937, + "grad_norm": 14.014966011047363, + "learning_rate": 1e-06, + "loss": 0.5277, + "num_input_tokens_seen": 367508844, + "step": 6559 + }, + { + "epoch": 14.608017817371937, + "loss": 0.5670410990715027, + "loss_ce": 0.00014657452993560582, + "loss_iou": 0.21875, + "loss_num": 0.025634765625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 367508844, + "step": 6559 + }, + { + "epoch": 14.610244988864142, + "grad_norm": 15.982457160949707, + "learning_rate": 1e-06, + "loss": 0.3984, + "num_input_tokens_seen": 367566664, + "step": 6560 + }, + { + "epoch": 14.610244988864142, + "loss": 0.400331974029541, + "loss_ce": 0.0001854914880823344, + "loss_iou": 0.1728515625, + "loss_num": 0.0108642578125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 367566664, + "step": 6560 + }, + { + "epoch": 14.612472160356347, + "grad_norm": 20.686161041259766, + "learning_rate": 1e-06, + "loss": 0.4902, + "num_input_tokens_seen": 367619140, + "step": 6561 + }, + { + "epoch": 14.612472160356347, + "loss": 0.6208482384681702, + "loss_ce": 0.00012072353274561465, + "loss_iou": 0.259765625, + "loss_num": 0.02001953125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 367619140, + "step": 6561 + }, + { + "epoch": 14.614699331848552, + "grad_norm": 22.73474884033203, + "learning_rate": 1e-06, + "loss": 0.4558, + "num_input_tokens_seen": 367675996, + "step": 6562 + }, + { + "epoch": 14.614699331848552, + "loss": 0.4799794554710388, + "loss_ce": 0.00012105887435609475, + "loss_iou": 0.1845703125, + "loss_num": 0.0220947265625, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 367675996, + "step": 6562 + }, + { + "epoch": 14.616926503340757, + "grad_norm": 19.15255355834961, + "learning_rate": 1e-06, + "loss": 0.4012, + "num_input_tokens_seen": 367733188, + "step": 6563 + }, + { + "epoch": 14.616926503340757, + "loss": 0.38610512018203735, + "loss_ce": 0.00011879668454639614, + "loss_iou": 0.1689453125, + "loss_num": 0.009521484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 367733188, + "step": 6563 + }, + { + "epoch": 14.619153674832962, + "grad_norm": 19.479084014892578, + "learning_rate": 1e-06, + "loss": 0.5865, + "num_input_tokens_seen": 367791252, + "step": 6564 + }, + { + "epoch": 14.619153674832962, + "loss": 0.6352872848510742, + "loss_ce": 0.0001554101036163047, + "loss_iou": 0.28125, + "loss_num": 0.0147705078125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 367791252, + "step": 6564 + }, + { + "epoch": 14.621380846325167, + "grad_norm": 33.37403869628906, + "learning_rate": 1e-06, + "loss": 0.494, + "num_input_tokens_seen": 367845888, + "step": 6565 + }, + { + "epoch": 14.621380846325167, + "loss": 0.3272702097892761, + "loss_ce": 0.00012174884614069015, + "loss_iou": 0.1416015625, + "loss_num": 0.00885009765625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 367845888, + "step": 6565 + }, + { + "epoch": 14.623608017817372, + "grad_norm": 14.532173156738281, + "learning_rate": 1e-06, + "loss": 0.5322, + "num_input_tokens_seen": 367902024, + "step": 6566 + }, + { + "epoch": 14.623608017817372, + "loss": 0.6220463514328003, + "loss_ce": 9.808540198719129e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0208740234375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 367902024, + "step": 6566 + }, + { + "epoch": 14.625835189309576, + "grad_norm": 25.17264747619629, + "learning_rate": 1e-06, + "loss": 0.4745, + "num_input_tokens_seen": 367956996, + "step": 6567 + }, + { + "epoch": 14.625835189309576, + "loss": 0.46531087160110474, + "loss_ce": 0.00010092130105476826, + "loss_iou": 0.193359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 367956996, + "step": 6567 + }, + { + "epoch": 14.628062360801781, + "grad_norm": 21.86960792541504, + "learning_rate": 1e-06, + "loss": 0.3833, + "num_input_tokens_seen": 368011628, + "step": 6568 + }, + { + "epoch": 14.628062360801781, + "loss": 0.41635486483573914, + "loss_ce": 9.509771916782483e-05, + "loss_iou": 0.19140625, + "loss_num": 0.006561279296875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 368011628, + "step": 6568 + }, + { + "epoch": 14.630289532293986, + "grad_norm": 20.430923461914062, + "learning_rate": 1e-06, + "loss": 0.7204, + "num_input_tokens_seen": 368065324, + "step": 6569 + }, + { + "epoch": 14.630289532293986, + "loss": 0.7128270268440247, + "loss_ce": 0.0001805600186344236, + "loss_iou": 0.306640625, + "loss_num": 0.0198974609375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 368065324, + "step": 6569 + }, + { + "epoch": 14.632516703786191, + "grad_norm": 23.498016357421875, + "learning_rate": 1e-06, + "loss": 0.5559, + "num_input_tokens_seen": 368120640, + "step": 6570 + }, + { + "epoch": 14.632516703786191, + "loss": 0.6478054523468018, + "loss_ce": 0.00010038249456556514, + "loss_iou": 0.263671875, + "loss_num": 0.0242919921875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 368120640, + "step": 6570 + }, + { + "epoch": 14.634743875278396, + "grad_norm": 20.486783981323242, + "learning_rate": 1e-06, + "loss": 0.5641, + "num_input_tokens_seen": 368176708, + "step": 6571 + }, + { + "epoch": 14.634743875278396, + "loss": 0.49949222803115845, + "loss_ce": 0.00010254726657876745, + "loss_iou": 0.208984375, + "loss_num": 0.0162353515625, + "loss_xval": 0.5, + "num_input_tokens_seen": 368176708, + "step": 6571 + }, + { + "epoch": 14.6369710467706, + "grad_norm": 18.985647201538086, + "learning_rate": 1e-06, + "loss": 0.5617, + "num_input_tokens_seen": 368231812, + "step": 6572 + }, + { + "epoch": 14.6369710467706, + "loss": 0.6410832405090332, + "loss_ce": 9.200733620673418e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00897216796875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 368231812, + "step": 6572 + }, + { + "epoch": 14.639198218262806, + "grad_norm": 14.87316608428955, + "learning_rate": 1e-06, + "loss": 0.6231, + "num_input_tokens_seen": 368285028, + "step": 6573 + }, + { + "epoch": 14.639198218262806, + "loss": 0.7178068161010742, + "loss_ce": 0.00015544812777079642, + "loss_iou": 0.302734375, + "loss_num": 0.0224609375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 368285028, + "step": 6573 + }, + { + "epoch": 14.64142538975501, + "grad_norm": 18.456830978393555, + "learning_rate": 1e-06, + "loss": 0.4465, + "num_input_tokens_seen": 368341996, + "step": 6574 + }, + { + "epoch": 14.64142538975501, + "loss": 0.5098215937614441, + "loss_ce": 0.0015208279946818948, + "loss_iou": 0.2265625, + "loss_num": 0.01104736328125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 368341996, + "step": 6574 + }, + { + "epoch": 14.643652561247215, + "grad_norm": 15.073392868041992, + "learning_rate": 1e-06, + "loss": 0.5345, + "num_input_tokens_seen": 368397584, + "step": 6575 + }, + { + "epoch": 14.643652561247215, + "loss": 0.49510324001312256, + "loss_ce": 0.00010811796528287232, + "loss_iou": 0.208984375, + "loss_num": 0.01556396484375, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 368397584, + "step": 6575 + }, + { + "epoch": 14.64587973273942, + "grad_norm": 22.879812240600586, + "learning_rate": 1e-06, + "loss": 0.4901, + "num_input_tokens_seen": 368452996, + "step": 6576 + }, + { + "epoch": 14.64587973273942, + "loss": 0.6450592279434204, + "loss_ce": 0.00016175321070477366, + "loss_iou": 0.2578125, + "loss_num": 0.0257568359375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 368452996, + "step": 6576 + }, + { + "epoch": 14.648106904231625, + "grad_norm": 36.055259704589844, + "learning_rate": 1e-06, + "loss": 0.4346, + "num_input_tokens_seen": 368511036, + "step": 6577 + }, + { + "epoch": 14.648106904231625, + "loss": 0.457261860370636, + "loss_ce": 0.00010854535503312945, + "loss_iou": 0.1953125, + "loss_num": 0.01318359375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 368511036, + "step": 6577 + }, + { + "epoch": 14.65033407572383, + "grad_norm": 22.31118392944336, + "learning_rate": 1e-06, + "loss": 0.4259, + "num_input_tokens_seen": 368564932, + "step": 6578 + }, + { + "epoch": 14.65033407572383, + "loss": 0.39772558212280273, + "loss_ce": 0.00014260000898502767, + "loss_iou": 0.162109375, + "loss_num": 0.0146484375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 368564932, + "step": 6578 + }, + { + "epoch": 14.652561247216035, + "grad_norm": 17.83094596862793, + "learning_rate": 1e-06, + "loss": 0.5154, + "num_input_tokens_seen": 368621580, + "step": 6579 + }, + { + "epoch": 14.652561247216035, + "loss": 0.5228110551834106, + "loss_ce": 0.0002280529006384313, + "loss_iou": 0.21875, + "loss_num": 0.0172119140625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 368621580, + "step": 6579 + }, + { + "epoch": 14.654788418708241, + "grad_norm": 24.0405216217041, + "learning_rate": 1e-06, + "loss": 0.4541, + "num_input_tokens_seen": 368674988, + "step": 6580 + }, + { + "epoch": 14.654788418708241, + "loss": 0.4888764023780823, + "loss_ce": 0.00010689307237043977, + "loss_iou": 0.2158203125, + "loss_num": 0.0113525390625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 368674988, + "step": 6580 + }, + { + "epoch": 14.657015590200446, + "grad_norm": 17.233251571655273, + "learning_rate": 1e-06, + "loss": 0.3594, + "num_input_tokens_seen": 368731232, + "step": 6581 + }, + { + "epoch": 14.657015590200446, + "loss": 0.319793164730072, + "loss_ce": 9.099852468352765e-05, + "loss_iou": 0.138671875, + "loss_num": 0.00848388671875, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 368731232, + "step": 6581 + }, + { + "epoch": 14.659242761692651, + "grad_norm": 31.06590461730957, + "learning_rate": 1e-06, + "loss": 0.6744, + "num_input_tokens_seen": 368787912, + "step": 6582 + }, + { + "epoch": 14.659242761692651, + "loss": 0.6104459762573242, + "loss_ce": 9.442644659429789e-05, + "loss_iou": 0.23828125, + "loss_num": 0.0269775390625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 368787912, + "step": 6582 + }, + { + "epoch": 14.661469933184856, + "grad_norm": 16.3990535736084, + "learning_rate": 1e-06, + "loss": 0.3871, + "num_input_tokens_seen": 368844564, + "step": 6583 + }, + { + "epoch": 14.661469933184856, + "loss": 0.4364955723285675, + "loss_ce": 9.420434071216732e-05, + "loss_iou": 0.1953125, + "loss_num": 0.00921630859375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 368844564, + "step": 6583 + }, + { + "epoch": 14.66369710467706, + "grad_norm": 16.442039489746094, + "learning_rate": 1e-06, + "loss": 0.5207, + "num_input_tokens_seen": 368903620, + "step": 6584 + }, + { + "epoch": 14.66369710467706, + "loss": 0.8181473612785339, + "loss_ce": 0.00015422608703374863, + "loss_iou": 0.33203125, + "loss_num": 0.0308837890625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 368903620, + "step": 6584 + }, + { + "epoch": 14.665924276169266, + "grad_norm": 18.313331604003906, + "learning_rate": 1e-06, + "loss": 0.5139, + "num_input_tokens_seen": 368961612, + "step": 6585 + }, + { + "epoch": 14.665924276169266, + "loss": 0.6265621185302734, + "loss_ce": 9.728968143463135e-05, + "loss_iou": 0.26953125, + "loss_num": 0.017578125, + "loss_xval": 0.625, + "num_input_tokens_seen": 368961612, + "step": 6585 + }, + { + "epoch": 14.66815144766147, + "grad_norm": 27.344846725463867, + "learning_rate": 1e-06, + "loss": 0.4549, + "num_input_tokens_seen": 369014408, + "step": 6586 + }, + { + "epoch": 14.66815144766147, + "loss": 0.5841131806373596, + "loss_ce": 0.00012881754082627594, + "loss_iou": 0.234375, + "loss_num": 0.023193359375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 369014408, + "step": 6586 + }, + { + "epoch": 14.670378619153675, + "grad_norm": 19.41844367980957, + "learning_rate": 1e-06, + "loss": 0.4142, + "num_input_tokens_seen": 369068716, + "step": 6587 + }, + { + "epoch": 14.670378619153675, + "loss": 0.4551857113838196, + "loss_ce": 0.00010757872951216996, + "loss_iou": 0.2080078125, + "loss_num": 0.0078125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 369068716, + "step": 6587 + }, + { + "epoch": 14.67260579064588, + "grad_norm": 17.168302536010742, + "learning_rate": 1e-06, + "loss": 0.5873, + "num_input_tokens_seen": 369125348, + "step": 6588 + }, + { + "epoch": 14.67260579064588, + "loss": 0.46933096647262573, + "loss_ce": 9.269404108636081e-05, + "loss_iou": 0.20703125, + "loss_num": 0.010986328125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 369125348, + "step": 6588 + }, + { + "epoch": 14.674832962138085, + "grad_norm": 24.008495330810547, + "learning_rate": 1e-06, + "loss": 0.5045, + "num_input_tokens_seen": 369181304, + "step": 6589 + }, + { + "epoch": 14.674832962138085, + "loss": 0.46079450845718384, + "loss_ce": 0.00010113501048181206, + "loss_iou": 0.216796875, + "loss_num": 0.00567626953125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 369181304, + "step": 6589 + }, + { + "epoch": 14.67706013363029, + "grad_norm": 15.958958625793457, + "learning_rate": 1e-06, + "loss": 0.365, + "num_input_tokens_seen": 369237872, + "step": 6590 + }, + { + "epoch": 14.67706013363029, + "loss": 0.4678671360015869, + "loss_ce": 9.370467159897089e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01361083984375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 369237872, + "step": 6590 + }, + { + "epoch": 14.679287305122495, + "grad_norm": 14.867953300476074, + "learning_rate": 1e-06, + "loss": 0.364, + "num_input_tokens_seen": 369294824, + "step": 6591 + }, + { + "epoch": 14.679287305122495, + "loss": 0.4817339777946472, + "loss_ce": 0.0001055731117958203, + "loss_iou": 0.2197265625, + "loss_num": 0.0084228515625, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 369294824, + "step": 6591 + }, + { + "epoch": 14.6815144766147, + "grad_norm": 30.02640724182129, + "learning_rate": 1e-06, + "loss": 0.4666, + "num_input_tokens_seen": 369352304, + "step": 6592 + }, + { + "epoch": 14.6815144766147, + "loss": 0.5130610466003418, + "loss_ce": 0.00012160430196672678, + "loss_iou": 0.21875, + "loss_num": 0.01531982421875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 369352304, + "step": 6592 + }, + { + "epoch": 14.683741648106905, + "grad_norm": 29.63843536376953, + "learning_rate": 1e-06, + "loss": 0.4834, + "num_input_tokens_seen": 369407840, + "step": 6593 + }, + { + "epoch": 14.683741648106905, + "loss": 0.5572299957275391, + "loss_ce": 0.00010112335439771414, + "loss_iou": 0.2197265625, + "loss_num": 0.0234375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 369407840, + "step": 6593 + }, + { + "epoch": 14.68596881959911, + "grad_norm": 21.299360275268555, + "learning_rate": 1e-06, + "loss": 0.5773, + "num_input_tokens_seen": 369461952, + "step": 6594 + }, + { + "epoch": 14.68596881959911, + "loss": 0.6562404632568359, + "loss_ce": 0.0003566770756151527, + "loss_iou": 0.2890625, + "loss_num": 0.0155029296875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 369461952, + "step": 6594 + }, + { + "epoch": 14.688195991091314, + "grad_norm": 14.860562324523926, + "learning_rate": 1e-06, + "loss": 0.6268, + "num_input_tokens_seen": 369517888, + "step": 6595 + }, + { + "epoch": 14.688195991091314, + "loss": 0.745347797870636, + "loss_ce": 0.00010857224697247148, + "loss_iou": 0.333984375, + "loss_num": 0.01556396484375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 369517888, + "step": 6595 + }, + { + "epoch": 14.690423162583519, + "grad_norm": 18.80927085876465, + "learning_rate": 1e-06, + "loss": 0.4626, + "num_input_tokens_seen": 369575536, + "step": 6596 + }, + { + "epoch": 14.690423162583519, + "loss": 0.5963727831840515, + "loss_ce": 0.00012036593398079276, + "loss_iou": 0.263671875, + "loss_num": 0.01397705078125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 369575536, + "step": 6596 + }, + { + "epoch": 14.692650334075724, + "grad_norm": 19.051189422607422, + "learning_rate": 1e-06, + "loss": 0.3497, + "num_input_tokens_seen": 369633808, + "step": 6597 + }, + { + "epoch": 14.692650334075724, + "loss": 0.4147190749645233, + "loss_ce": 0.0002903687418438494, + "loss_iou": 0.1767578125, + "loss_num": 0.0120849609375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 369633808, + "step": 6597 + }, + { + "epoch": 14.694877505567929, + "grad_norm": 18.837825775146484, + "learning_rate": 1e-06, + "loss": 0.4103, + "num_input_tokens_seen": 369691260, + "step": 6598 + }, + { + "epoch": 14.694877505567929, + "loss": 0.49033933877944946, + "loss_ce": 0.00010496500908629969, + "loss_iou": 0.2001953125, + "loss_num": 0.0179443359375, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 369691260, + "step": 6598 + }, + { + "epoch": 14.697104677060134, + "grad_norm": 21.672534942626953, + "learning_rate": 1e-06, + "loss": 0.4892, + "num_input_tokens_seen": 369748488, + "step": 6599 + }, + { + "epoch": 14.697104677060134, + "loss": 0.5086407661437988, + "loss_ce": 9.587158274371177e-05, + "loss_iou": 0.224609375, + "loss_num": 0.01177978515625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 369748488, + "step": 6599 + }, + { + "epoch": 14.699331848552339, + "grad_norm": 17.431072235107422, + "learning_rate": 1e-06, + "loss": 0.369, + "num_input_tokens_seen": 369805384, + "step": 6600 + }, + { + "epoch": 14.699331848552339, + "loss": 0.3214370012283325, + "loss_ce": 8.691436960361898e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.004364013671875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 369805384, + "step": 6600 + }, + { + "epoch": 14.701559020044543, + "grad_norm": 18.672565460205078, + "learning_rate": 1e-06, + "loss": 0.4749, + "num_input_tokens_seen": 369860176, + "step": 6601 + }, + { + "epoch": 14.701559020044543, + "loss": 0.5711730718612671, + "loss_ce": 0.0001281223667319864, + "loss_iou": 0.2392578125, + "loss_num": 0.0185546875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 369860176, + "step": 6601 + }, + { + "epoch": 14.703786191536748, + "grad_norm": 17.336938858032227, + "learning_rate": 1e-06, + "loss": 0.4992, + "num_input_tokens_seen": 369916752, + "step": 6602 + }, + { + "epoch": 14.703786191536748, + "loss": 0.290561705827713, + "loss_ce": 9.540806786390021e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.004669189453125, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 369916752, + "step": 6602 + }, + { + "epoch": 14.706013363028953, + "grad_norm": 19.08681297302246, + "learning_rate": 1e-06, + "loss": 0.417, + "num_input_tokens_seen": 369973148, + "step": 6603 + }, + { + "epoch": 14.706013363028953, + "loss": 0.4227031469345093, + "loss_ce": 9.572567796567455e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.0106201171875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 369973148, + "step": 6603 + }, + { + "epoch": 14.708240534521158, + "grad_norm": 19.571962356567383, + "learning_rate": 1e-06, + "loss": 0.3897, + "num_input_tokens_seen": 370030248, + "step": 6604 + }, + { + "epoch": 14.708240534521158, + "loss": 0.3606414496898651, + "loss_ce": 0.00022884068312123418, + "loss_iou": 0.154296875, + "loss_num": 0.01043701171875, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 370030248, + "step": 6604 + }, + { + "epoch": 14.710467706013363, + "grad_norm": 25.896663665771484, + "learning_rate": 1e-06, + "loss": 0.444, + "num_input_tokens_seen": 370086860, + "step": 6605 + }, + { + "epoch": 14.710467706013363, + "loss": 0.4243563711643219, + "loss_ce": 0.00010100057988893241, + "loss_iou": 0.193359375, + "loss_num": 0.00762939453125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 370086860, + "step": 6605 + }, + { + "epoch": 14.712694877505568, + "grad_norm": 11.470439910888672, + "learning_rate": 1e-06, + "loss": 0.4257, + "num_input_tokens_seen": 370140792, + "step": 6606 + }, + { + "epoch": 14.712694877505568, + "loss": 0.4308934211730957, + "loss_ce": 0.00010726226173574105, + "loss_iou": 0.1865234375, + "loss_num": 0.0115966796875, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 370140792, + "step": 6606 + }, + { + "epoch": 14.714922048997773, + "grad_norm": 37.93767547607422, + "learning_rate": 1e-06, + "loss": 0.4366, + "num_input_tokens_seen": 370198188, + "step": 6607 + }, + { + "epoch": 14.714922048997773, + "loss": 0.3961656987667084, + "loss_ce": 0.000108551379526034, + "loss_iou": 0.15625, + "loss_num": 0.0167236328125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 370198188, + "step": 6607 + }, + { + "epoch": 14.717149220489977, + "grad_norm": 19.189268112182617, + "learning_rate": 1e-06, + "loss": 0.5345, + "num_input_tokens_seen": 370256068, + "step": 6608 + }, + { + "epoch": 14.717149220489977, + "loss": 0.5142604112625122, + "loss_ce": 0.00010023896174971014, + "loss_iou": 0.2373046875, + "loss_num": 0.008056640625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 370256068, + "step": 6608 + }, + { + "epoch": 14.719376391982182, + "grad_norm": 27.949832916259766, + "learning_rate": 1e-06, + "loss": 0.4502, + "num_input_tokens_seen": 370310132, + "step": 6609 + }, + { + "epoch": 14.719376391982182, + "loss": 0.41979140043258667, + "loss_ce": 0.000540890556294471, + "loss_iou": 0.177734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 370310132, + "step": 6609 + }, + { + "epoch": 14.721603563474387, + "grad_norm": 19.57186508178711, + "learning_rate": 1e-06, + "loss": 0.4912, + "num_input_tokens_seen": 370365772, + "step": 6610 + }, + { + "epoch": 14.721603563474387, + "loss": 0.4778703451156616, + "loss_ce": 8.710511610843241e-05, + "loss_iou": 0.2109375, + "loss_num": 0.01129150390625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 370365772, + "step": 6610 + }, + { + "epoch": 14.723830734966592, + "grad_norm": 22.238262176513672, + "learning_rate": 1e-06, + "loss": 0.4615, + "num_input_tokens_seen": 370422604, + "step": 6611 + }, + { + "epoch": 14.723830734966592, + "loss": 0.536961019039154, + "loss_ce": 0.00021785832359455526, + "loss_iou": 0.236328125, + "loss_num": 0.01275634765625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 370422604, + "step": 6611 + }, + { + "epoch": 14.726057906458797, + "grad_norm": 19.965530395507812, + "learning_rate": 1e-06, + "loss": 0.4523, + "num_input_tokens_seen": 370477032, + "step": 6612 + }, + { + "epoch": 14.726057906458797, + "loss": 0.44715508818626404, + "loss_ce": 0.00013360095908865333, + "loss_iou": 0.20703125, + "loss_num": 0.00640869140625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 370477032, + "step": 6612 + }, + { + "epoch": 14.728285077951002, + "grad_norm": 18.242788314819336, + "learning_rate": 1e-06, + "loss": 0.5108, + "num_input_tokens_seen": 370532400, + "step": 6613 + }, + { + "epoch": 14.728285077951002, + "loss": 0.606315553188324, + "loss_ce": 0.00011437687498982996, + "loss_iou": 0.265625, + "loss_num": 0.01507568359375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 370532400, + "step": 6613 + }, + { + "epoch": 14.730512249443207, + "grad_norm": 14.153312683105469, + "learning_rate": 1e-06, + "loss": 0.484, + "num_input_tokens_seen": 370589516, + "step": 6614 + }, + { + "epoch": 14.730512249443207, + "loss": 0.6295005083084106, + "loss_ce": 0.0001059654459822923, + "loss_iou": 0.251953125, + "loss_num": 0.0252685546875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 370589516, + "step": 6614 + }, + { + "epoch": 14.732739420935411, + "grad_norm": 19.10258674621582, + "learning_rate": 1e-06, + "loss": 0.465, + "num_input_tokens_seen": 370647412, + "step": 6615 + }, + { + "epoch": 14.732739420935411, + "loss": 0.6935760974884033, + "loss_ce": 9.462784510105848e-05, + "loss_iou": 0.30859375, + "loss_num": 0.014892578125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 370647412, + "step": 6615 + }, + { + "epoch": 14.734966592427616, + "grad_norm": 14.81308650970459, + "learning_rate": 1e-06, + "loss": 0.4206, + "num_input_tokens_seen": 370706064, + "step": 6616 + }, + { + "epoch": 14.734966592427616, + "loss": 0.5853002071380615, + "loss_ce": 9.509964729659259e-05, + "loss_iou": 0.240234375, + "loss_num": 0.0208740234375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 370706064, + "step": 6616 + }, + { + "epoch": 14.737193763919821, + "grad_norm": 44.200828552246094, + "learning_rate": 1e-06, + "loss": 0.4139, + "num_input_tokens_seen": 370762332, + "step": 6617 + }, + { + "epoch": 14.737193763919821, + "loss": 0.5105452537536621, + "loss_ce": 0.00016923280782066286, + "loss_iou": 0.2197265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 370762332, + "step": 6617 + }, + { + "epoch": 14.739420935412026, + "grad_norm": 15.740882873535156, + "learning_rate": 1e-06, + "loss": 0.3973, + "num_input_tokens_seen": 370817860, + "step": 6618 + }, + { + "epoch": 14.739420935412026, + "loss": 0.43589556217193604, + "loss_ce": 0.00010457850294187665, + "loss_iou": 0.18359375, + "loss_num": 0.01373291015625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 370817860, + "step": 6618 + }, + { + "epoch": 14.74164810690423, + "grad_norm": 18.04197120666504, + "learning_rate": 1e-06, + "loss": 0.4308, + "num_input_tokens_seen": 370872024, + "step": 6619 + }, + { + "epoch": 14.74164810690423, + "loss": 0.29858967661857605, + "loss_ce": 9.725069685373455e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.00921630859375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 370872024, + "step": 6619 + }, + { + "epoch": 14.743875278396436, + "grad_norm": 17.093969345092773, + "learning_rate": 1e-06, + "loss": 0.4089, + "num_input_tokens_seen": 370928616, + "step": 6620 + }, + { + "epoch": 14.743875278396436, + "loss": 0.37071430683135986, + "loss_ce": 0.00010881570779019967, + "loss_iou": 0.1728515625, + "loss_num": 0.005035400390625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 370928616, + "step": 6620 + }, + { + "epoch": 14.74610244988864, + "grad_norm": 19.94365882873535, + "learning_rate": 1e-06, + "loss": 0.5362, + "num_input_tokens_seen": 370984480, + "step": 6621 + }, + { + "epoch": 14.74610244988864, + "loss": 0.7782934904098511, + "loss_ce": 9.5245341071859e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01385498046875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 370984480, + "step": 6621 + }, + { + "epoch": 14.748329621380847, + "grad_norm": 19.63141632080078, + "learning_rate": 1e-06, + "loss": 0.451, + "num_input_tokens_seen": 371042128, + "step": 6622 + }, + { + "epoch": 14.748329621380847, + "loss": 0.46288466453552246, + "loss_ce": 0.00011610893852775916, + "loss_iou": 0.1962890625, + "loss_num": 0.01416015625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 371042128, + "step": 6622 + }, + { + "epoch": 14.750556792873052, + "grad_norm": 23.052513122558594, + "learning_rate": 1e-06, + "loss": 0.4397, + "num_input_tokens_seen": 371099580, + "step": 6623 + }, + { + "epoch": 14.750556792873052, + "loss": 0.5583330392837524, + "loss_ce": 0.00010552619642112404, + "loss_iou": 0.255859375, + "loss_num": 0.00970458984375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 371099580, + "step": 6623 + }, + { + "epoch": 14.752783964365257, + "grad_norm": 12.574701309204102, + "learning_rate": 1e-06, + "loss": 0.2786, + "num_input_tokens_seen": 371154964, + "step": 6624 + }, + { + "epoch": 14.752783964365257, + "loss": 0.29406648874282837, + "loss_ce": 9.068727376870811e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.00689697265625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 371154964, + "step": 6624 + }, + { + "epoch": 14.755011135857462, + "grad_norm": 16.99627113342285, + "learning_rate": 1e-06, + "loss": 0.5441, + "num_input_tokens_seen": 371211736, + "step": 6625 + }, + { + "epoch": 14.755011135857462, + "loss": 0.5980323553085327, + "loss_ce": 0.00013196782674640417, + "loss_iou": 0.2373046875, + "loss_num": 0.0245361328125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 371211736, + "step": 6625 + }, + { + "epoch": 14.757238307349667, + "grad_norm": 25.345653533935547, + "learning_rate": 1e-06, + "loss": 0.6329, + "num_input_tokens_seen": 371267888, + "step": 6626 + }, + { + "epoch": 14.757238307349667, + "loss": 0.5716487765312195, + "loss_ce": 0.00011558398546185344, + "loss_iou": 0.2578125, + "loss_num": 0.0113525390625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 371267888, + "step": 6626 + }, + { + "epoch": 14.759465478841872, + "grad_norm": 35.48616027832031, + "learning_rate": 1e-06, + "loss": 0.5655, + "num_input_tokens_seen": 371324636, + "step": 6627 + }, + { + "epoch": 14.759465478841872, + "loss": 0.6881007552146912, + "loss_ce": 0.00011247480870224535, + "loss_iou": 0.279296875, + "loss_num": 0.025634765625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 371324636, + "step": 6627 + }, + { + "epoch": 14.761692650334076, + "grad_norm": 17.21990394592285, + "learning_rate": 1e-06, + "loss": 0.7006, + "num_input_tokens_seen": 371381504, + "step": 6628 + }, + { + "epoch": 14.761692650334076, + "loss": 0.7638993263244629, + "loss_ce": 0.00010532997839618474, + "loss_iou": 0.302734375, + "loss_num": 0.03125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 371381504, + "step": 6628 + }, + { + "epoch": 14.763919821826281, + "grad_norm": 14.376852035522461, + "learning_rate": 1e-06, + "loss": 0.3699, + "num_input_tokens_seen": 371435912, + "step": 6629 + }, + { + "epoch": 14.763919821826281, + "loss": 0.43231716752052307, + "loss_ce": 0.00012721640814561397, + "loss_iou": 0.1767578125, + "loss_num": 0.0155029296875, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 371435912, + "step": 6629 + }, + { + "epoch": 14.766146993318486, + "grad_norm": 18.436325073242188, + "learning_rate": 1e-06, + "loss": 0.3864, + "num_input_tokens_seen": 371491144, + "step": 6630 + }, + { + "epoch": 14.766146993318486, + "loss": 0.47628775238990784, + "loss_ce": 9.145887452177703e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.009765625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 371491144, + "step": 6630 + }, + { + "epoch": 14.768374164810691, + "grad_norm": 19.47183609008789, + "learning_rate": 1e-06, + "loss": 0.4151, + "num_input_tokens_seen": 371548080, + "step": 6631 + }, + { + "epoch": 14.768374164810691, + "loss": 0.4063809812068939, + "loss_ce": 0.0001309568469878286, + "loss_iou": 0.17578125, + "loss_num": 0.01092529296875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 371548080, + "step": 6631 + }, + { + "epoch": 14.770601336302896, + "grad_norm": 14.947174072265625, + "learning_rate": 1e-06, + "loss": 0.5903, + "num_input_tokens_seen": 371605028, + "step": 6632 + }, + { + "epoch": 14.770601336302896, + "loss": 0.6663827896118164, + "loss_ce": 0.00012304184201639146, + "loss_iou": 0.27734375, + "loss_num": 0.0223388671875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 371605028, + "step": 6632 + }, + { + "epoch": 14.7728285077951, + "grad_norm": 13.995532989501953, + "learning_rate": 1e-06, + "loss": 0.387, + "num_input_tokens_seen": 371660708, + "step": 6633 + }, + { + "epoch": 14.7728285077951, + "loss": 0.29860347509384155, + "loss_ce": 8.051018812693655e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.00927734375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 371660708, + "step": 6633 + }, + { + "epoch": 14.775055679287306, + "grad_norm": 20.504301071166992, + "learning_rate": 1e-06, + "loss": 0.5561, + "num_input_tokens_seen": 371717676, + "step": 6634 + }, + { + "epoch": 14.775055679287306, + "loss": 0.6223191022872925, + "loss_ce": 0.0002488004101905972, + "loss_iou": 0.271484375, + "loss_num": 0.015625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 371717676, + "step": 6634 + }, + { + "epoch": 14.77728285077951, + "grad_norm": 14.648569107055664, + "learning_rate": 1e-06, + "loss": 0.3508, + "num_input_tokens_seen": 371772420, + "step": 6635 + }, + { + "epoch": 14.77728285077951, + "loss": 0.4088403880596161, + "loss_ce": 0.000148980543599464, + "loss_iou": 0.166015625, + "loss_num": 0.01556396484375, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 371772420, + "step": 6635 + }, + { + "epoch": 14.779510022271715, + "grad_norm": 17.389978408813477, + "learning_rate": 1e-06, + "loss": 0.6203, + "num_input_tokens_seen": 371827592, + "step": 6636 + }, + { + "epoch": 14.779510022271715, + "loss": 0.5807891488075256, + "loss_ce": 0.00010066662798635662, + "loss_iou": 0.2578125, + "loss_num": 0.0130615234375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 371827592, + "step": 6636 + }, + { + "epoch": 14.78173719376392, + "grad_norm": 15.41741943359375, + "learning_rate": 1e-06, + "loss": 0.3496, + "num_input_tokens_seen": 371883112, + "step": 6637 + }, + { + "epoch": 14.78173719376392, + "loss": 0.36627358198165894, + "loss_ce": 0.0001541778037790209, + "loss_iou": 0.1513671875, + "loss_num": 0.0125732421875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 371883112, + "step": 6637 + }, + { + "epoch": 14.783964365256125, + "grad_norm": 11.838160514831543, + "learning_rate": 1e-06, + "loss": 0.4474, + "num_input_tokens_seen": 371939696, + "step": 6638 + }, + { + "epoch": 14.783964365256125, + "loss": 0.49210840463638306, + "loss_ce": 0.0001039962880895473, + "loss_iou": 0.203125, + "loss_num": 0.0174560546875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 371939696, + "step": 6638 + }, + { + "epoch": 14.78619153674833, + "grad_norm": 20.356592178344727, + "learning_rate": 1e-06, + "loss": 0.3329, + "num_input_tokens_seen": 371998200, + "step": 6639 + }, + { + "epoch": 14.78619153674833, + "loss": 0.32651287317276, + "loss_ce": 9.684590622782707e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.00616455078125, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 371998200, + "step": 6639 + }, + { + "epoch": 14.788418708240535, + "grad_norm": 16.34804344177246, + "learning_rate": 1e-06, + "loss": 0.4732, + "num_input_tokens_seen": 372054520, + "step": 6640 + }, + { + "epoch": 14.788418708240535, + "loss": 0.386675626039505, + "loss_ce": 7.893913425505161e-05, + "loss_iou": 0.158203125, + "loss_num": 0.01409912109375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 372054520, + "step": 6640 + }, + { + "epoch": 14.79064587973274, + "grad_norm": 31.840667724609375, + "learning_rate": 1e-06, + "loss": 0.5544, + "num_input_tokens_seen": 372109948, + "step": 6641 + }, + { + "epoch": 14.79064587973274, + "loss": 0.5972850322723389, + "loss_ce": 0.00011702888878062367, + "loss_iou": 0.25, + "loss_num": 0.019287109375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 372109948, + "step": 6641 + }, + { + "epoch": 14.792873051224944, + "grad_norm": 16.975744247436523, + "learning_rate": 1e-06, + "loss": 0.4895, + "num_input_tokens_seen": 372165020, + "step": 6642 + }, + { + "epoch": 14.792873051224944, + "loss": 0.4539491832256317, + "loss_ce": 9.175058221444488e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.0091552734375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 372165020, + "step": 6642 + }, + { + "epoch": 14.79510022271715, + "grad_norm": 17.83343505859375, + "learning_rate": 1e-06, + "loss": 0.7496, + "num_input_tokens_seen": 372220692, + "step": 6643 + }, + { + "epoch": 14.79510022271715, + "loss": 0.9950196743011475, + "loss_ce": 0.0001466473040636629, + "loss_iou": 0.412109375, + "loss_num": 0.034423828125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 372220692, + "step": 6643 + }, + { + "epoch": 14.797327394209354, + "grad_norm": 13.327498435974121, + "learning_rate": 1e-06, + "loss": 0.4146, + "num_input_tokens_seen": 372275868, + "step": 6644 + }, + { + "epoch": 14.797327394209354, + "loss": 0.3324364423751831, + "loss_ce": 0.00010003681381931528, + "loss_iou": 0.1533203125, + "loss_num": 0.005035400390625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 372275868, + "step": 6644 + }, + { + "epoch": 14.799554565701559, + "grad_norm": 20.737871170043945, + "learning_rate": 1e-06, + "loss": 0.5179, + "num_input_tokens_seen": 372327744, + "step": 6645 + }, + { + "epoch": 14.799554565701559, + "loss": 0.3846180737018585, + "loss_ce": 9.659097850089893e-05, + "loss_iou": 0.171875, + "loss_num": 0.008056640625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 372327744, + "step": 6645 + }, + { + "epoch": 14.801781737193764, + "grad_norm": 19.62179946899414, + "learning_rate": 1e-06, + "loss": 0.5129, + "num_input_tokens_seen": 372384104, + "step": 6646 + }, + { + "epoch": 14.801781737193764, + "loss": 0.31222450733184814, + "loss_ce": 9.073612454812974e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.0157470703125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 372384104, + "step": 6646 + }, + { + "epoch": 14.804008908685969, + "grad_norm": 19.51956558227539, + "learning_rate": 1e-06, + "loss": 0.4317, + "num_input_tokens_seen": 372440792, + "step": 6647 + }, + { + "epoch": 14.804008908685969, + "loss": 0.40453433990478516, + "loss_ce": 0.00011538183025550097, + "loss_iou": 0.1806640625, + "loss_num": 0.0084228515625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 372440792, + "step": 6647 + }, + { + "epoch": 14.806236080178174, + "grad_norm": 18.005043029785156, + "learning_rate": 1e-06, + "loss": 0.5158, + "num_input_tokens_seen": 372496276, + "step": 6648 + }, + { + "epoch": 14.806236080178174, + "loss": 0.7478663921356201, + "loss_ce": 0.00030782315297983587, + "loss_iou": 0.333984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 372496276, + "step": 6648 + }, + { + "epoch": 14.808463251670378, + "grad_norm": 15.16850757598877, + "learning_rate": 1e-06, + "loss": 0.6658, + "num_input_tokens_seen": 372554104, + "step": 6649 + }, + { + "epoch": 14.808463251670378, + "loss": 0.7384415864944458, + "loss_ce": 0.00016030135157052428, + "loss_iou": 0.26171875, + "loss_num": 0.043212890625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 372554104, + "step": 6649 + }, + { + "epoch": 14.810690423162583, + "grad_norm": 15.439560890197754, + "learning_rate": 1e-06, + "loss": 0.4836, + "num_input_tokens_seen": 372609652, + "step": 6650 + }, + { + "epoch": 14.810690423162583, + "loss": 0.499762624502182, + "loss_ce": 0.0002508952165953815, + "loss_iou": 0.201171875, + "loss_num": 0.0191650390625, + "loss_xval": 0.5, + "num_input_tokens_seen": 372609652, + "step": 6650 + }, + { + "epoch": 14.812917594654788, + "grad_norm": 17.865459442138672, + "learning_rate": 1e-06, + "loss": 0.5121, + "num_input_tokens_seen": 372665168, + "step": 6651 + }, + { + "epoch": 14.812917594654788, + "loss": 0.5034142136573792, + "loss_ce": 0.00011833346798084676, + "loss_iou": 0.2099609375, + "loss_num": 0.016845703125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 372665168, + "step": 6651 + }, + { + "epoch": 14.815144766146993, + "grad_norm": 24.064001083374023, + "learning_rate": 1e-06, + "loss": 0.4675, + "num_input_tokens_seen": 372719608, + "step": 6652 + }, + { + "epoch": 14.815144766146993, + "loss": 0.550445020198822, + "loss_ce": 0.00015209712728392333, + "loss_iou": 0.255859375, + "loss_num": 0.00732421875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 372719608, + "step": 6652 + }, + { + "epoch": 14.817371937639198, + "grad_norm": 19.275604248046875, + "learning_rate": 1e-06, + "loss": 0.3122, + "num_input_tokens_seen": 372777240, + "step": 6653 + }, + { + "epoch": 14.817371937639198, + "loss": 0.3434959650039673, + "loss_ce": 0.00011215943231945857, + "loss_iou": 0.140625, + "loss_num": 0.0125732421875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 372777240, + "step": 6653 + }, + { + "epoch": 14.819599109131403, + "grad_norm": 24.118017196655273, + "learning_rate": 1e-06, + "loss": 0.5053, + "num_input_tokens_seen": 372828860, + "step": 6654 + }, + { + "epoch": 14.819599109131403, + "loss": 0.48031556606292725, + "loss_ce": 9.092504478758201e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0201416015625, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 372828860, + "step": 6654 + }, + { + "epoch": 14.821826280623608, + "grad_norm": 15.35105037689209, + "learning_rate": 1e-06, + "loss": 0.4081, + "num_input_tokens_seen": 372886876, + "step": 6655 + }, + { + "epoch": 14.821826280623608, + "loss": 0.3949987590312958, + "loss_ce": 0.00010130574082722887, + "loss_iou": 0.1650390625, + "loss_num": 0.01287841796875, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 372886876, + "step": 6655 + }, + { + "epoch": 14.824053452115812, + "grad_norm": 22.87786293029785, + "learning_rate": 1e-06, + "loss": 0.5704, + "num_input_tokens_seen": 372943852, + "step": 6656 + }, + { + "epoch": 14.824053452115812, + "loss": 0.5047581195831299, + "loss_ce": 0.00011940376134589314, + "loss_iou": 0.203125, + "loss_num": 0.01953125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 372943852, + "step": 6656 + }, + { + "epoch": 14.826280623608017, + "grad_norm": 14.802397727966309, + "learning_rate": 1e-06, + "loss": 0.2924, + "num_input_tokens_seen": 373000588, + "step": 6657 + }, + { + "epoch": 14.826280623608017, + "loss": 0.2866537570953369, + "loss_ce": 9.368563769385219e-05, + "loss_iou": 0.12890625, + "loss_num": 0.00555419921875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 373000588, + "step": 6657 + }, + { + "epoch": 14.828507795100222, + "grad_norm": 24.41092872619629, + "learning_rate": 1e-06, + "loss": 0.4099, + "num_input_tokens_seen": 373058656, + "step": 6658 + }, + { + "epoch": 14.828507795100222, + "loss": 0.4437221884727478, + "loss_ce": 0.00011863959662150592, + "loss_iou": 0.1923828125, + "loss_num": 0.01190185546875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 373058656, + "step": 6658 + }, + { + "epoch": 14.830734966592427, + "grad_norm": 30.71437644958496, + "learning_rate": 1e-06, + "loss": 0.4298, + "num_input_tokens_seen": 373112252, + "step": 6659 + }, + { + "epoch": 14.830734966592427, + "loss": 0.3821251690387726, + "loss_ce": 0.00010613477934384719, + "loss_iou": 0.166015625, + "loss_num": 0.010009765625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 373112252, + "step": 6659 + }, + { + "epoch": 14.832962138084632, + "grad_norm": 14.227256774902344, + "learning_rate": 1e-06, + "loss": 0.4838, + "num_input_tokens_seen": 373169236, + "step": 6660 + }, + { + "epoch": 14.832962138084632, + "loss": 0.4358832836151123, + "loss_ce": 9.228321141563356e-05, + "loss_iou": 0.1953125, + "loss_num": 0.00909423828125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 373169236, + "step": 6660 + }, + { + "epoch": 14.835189309576837, + "grad_norm": 17.108369827270508, + "learning_rate": 1e-06, + "loss": 0.6556, + "num_input_tokens_seen": 373227608, + "step": 6661 + }, + { + "epoch": 14.835189309576837, + "loss": 0.8442061543464661, + "loss_ce": 0.00015093988622538745, + "loss_iou": 0.333984375, + "loss_num": 0.034912109375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 373227608, + "step": 6661 + }, + { + "epoch": 14.837416481069042, + "grad_norm": 15.365765571594238, + "learning_rate": 1e-06, + "loss": 0.5749, + "num_input_tokens_seen": 373280432, + "step": 6662 + }, + { + "epoch": 14.837416481069042, + "loss": 0.6416932940483093, + "loss_ce": 9.174088336294517e-05, + "loss_iou": 0.265625, + "loss_num": 0.021728515625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 373280432, + "step": 6662 + }, + { + "epoch": 14.839643652561247, + "grad_norm": 20.231231689453125, + "learning_rate": 1e-06, + "loss": 0.5829, + "num_input_tokens_seen": 373335292, + "step": 6663 + }, + { + "epoch": 14.839643652561247, + "loss": 0.5822228193283081, + "loss_ce": 0.00013056171883363277, + "loss_iou": 0.248046875, + "loss_num": 0.01708984375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 373335292, + "step": 6663 + }, + { + "epoch": 14.841870824053451, + "grad_norm": 14.344491004943848, + "learning_rate": 1e-06, + "loss": 0.3425, + "num_input_tokens_seen": 373391224, + "step": 6664 + }, + { + "epoch": 14.841870824053451, + "loss": 0.4869232773780823, + "loss_ce": 0.00010687689064070582, + "loss_iou": 0.201171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 373391224, + "step": 6664 + }, + { + "epoch": 14.844097995545656, + "grad_norm": 18.94646644592285, + "learning_rate": 1e-06, + "loss": 0.4103, + "num_input_tokens_seen": 373448636, + "step": 6665 + }, + { + "epoch": 14.844097995545656, + "loss": 0.34946849942207336, + "loss_ce": 0.00010326325718779117, + "loss_iou": 0.1640625, + "loss_num": 0.004180908203125, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 373448636, + "step": 6665 + }, + { + "epoch": 14.846325167037861, + "grad_norm": 26.247119903564453, + "learning_rate": 1e-06, + "loss": 0.4845, + "num_input_tokens_seen": 373505896, + "step": 6666 + }, + { + "epoch": 14.846325167037861, + "loss": 0.44346243143081665, + "loss_ce": 0.0001030644925776869, + "loss_iou": 0.18359375, + "loss_num": 0.01513671875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 373505896, + "step": 6666 + }, + { + "epoch": 14.848552338530066, + "grad_norm": 16.330686569213867, + "learning_rate": 1e-06, + "loss": 0.3428, + "num_input_tokens_seen": 373562748, + "step": 6667 + }, + { + "epoch": 14.848552338530066, + "loss": 0.3223639130592346, + "loss_ce": 9.82833735179156e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.01080322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 373562748, + "step": 6667 + }, + { + "epoch": 14.85077951002227, + "grad_norm": 17.0013484954834, + "learning_rate": 1e-06, + "loss": 0.4217, + "num_input_tokens_seen": 373620644, + "step": 6668 + }, + { + "epoch": 14.85077951002227, + "loss": 0.4391821026802063, + "loss_ce": 9.516206046100706e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.00927734375, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 373620644, + "step": 6668 + }, + { + "epoch": 14.853006681514476, + "grad_norm": 18.03797721862793, + "learning_rate": 1e-06, + "loss": 0.5505, + "num_input_tokens_seen": 373675496, + "step": 6669 + }, + { + "epoch": 14.853006681514476, + "loss": 0.6275442242622375, + "loss_ce": 0.00010283813753630966, + "loss_iou": 0.279296875, + "loss_num": 0.013427734375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 373675496, + "step": 6669 + }, + { + "epoch": 14.855233853006682, + "grad_norm": 18.613082885742188, + "learning_rate": 1e-06, + "loss": 0.4396, + "num_input_tokens_seen": 373729180, + "step": 6670 + }, + { + "epoch": 14.855233853006682, + "loss": 0.28043514490127563, + "loss_ce": 0.00010065691458294168, + "loss_iou": 0.119140625, + "loss_num": 0.00830078125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 373729180, + "step": 6670 + }, + { + "epoch": 14.857461024498887, + "grad_norm": 15.190927505493164, + "learning_rate": 1e-06, + "loss": 0.632, + "num_input_tokens_seen": 373785848, + "step": 6671 + }, + { + "epoch": 14.857461024498887, + "loss": 0.4553018808364868, + "loss_ce": 0.00010172194743063301, + "loss_iou": 0.1884765625, + "loss_num": 0.01556396484375, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 373785848, + "step": 6671 + }, + { + "epoch": 14.859688195991092, + "grad_norm": 14.331643104553223, + "learning_rate": 1e-06, + "loss": 0.5239, + "num_input_tokens_seen": 373840668, + "step": 6672 + }, + { + "epoch": 14.859688195991092, + "loss": 0.386011004447937, + "loss_ce": 8.569139026803896e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.017333984375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 373840668, + "step": 6672 + }, + { + "epoch": 14.861915367483297, + "grad_norm": 14.409646987915039, + "learning_rate": 1e-06, + "loss": 0.5363, + "num_input_tokens_seen": 373895844, + "step": 6673 + }, + { + "epoch": 14.861915367483297, + "loss": 0.6987284421920776, + "loss_ce": 0.00012002349831163883, + "loss_iou": 0.3203125, + "loss_num": 0.0118408203125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 373895844, + "step": 6673 + }, + { + "epoch": 14.864142538975502, + "grad_norm": 18.301753997802734, + "learning_rate": 1e-06, + "loss": 0.4477, + "num_input_tokens_seen": 373949004, + "step": 6674 + }, + { + "epoch": 14.864142538975502, + "loss": 0.45737865567207336, + "loss_ce": 0.00010326381016056985, + "loss_iou": 0.1982421875, + "loss_num": 0.01220703125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 373949004, + "step": 6674 + }, + { + "epoch": 14.866369710467707, + "grad_norm": 12.914517402648926, + "learning_rate": 1e-06, + "loss": 0.4054, + "num_input_tokens_seen": 374005364, + "step": 6675 + }, + { + "epoch": 14.866369710467707, + "loss": 0.3800061047077179, + "loss_ce": 0.0001232977374456823, + "loss_iou": 0.1748046875, + "loss_num": 0.006072998046875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 374005364, + "step": 6675 + }, + { + "epoch": 14.868596881959911, + "grad_norm": 17.27826690673828, + "learning_rate": 1e-06, + "loss": 0.3994, + "num_input_tokens_seen": 374060220, + "step": 6676 + }, + { + "epoch": 14.868596881959911, + "loss": 0.3738711476325989, + "loss_ce": 9.184792725136504e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.01025390625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 374060220, + "step": 6676 + }, + { + "epoch": 14.870824053452116, + "grad_norm": 19.53494644165039, + "learning_rate": 1e-06, + "loss": 0.2948, + "num_input_tokens_seen": 374117248, + "step": 6677 + }, + { + "epoch": 14.870824053452116, + "loss": 0.31820207834243774, + "loss_ce": 8.682158659212291e-05, + "loss_iou": 0.134765625, + "loss_num": 0.009521484375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 374117248, + "step": 6677 + }, + { + "epoch": 14.873051224944321, + "grad_norm": 25.144716262817383, + "learning_rate": 1e-06, + "loss": 0.5595, + "num_input_tokens_seen": 374170888, + "step": 6678 + }, + { + "epoch": 14.873051224944321, + "loss": 0.7696267366409302, + "loss_ce": 9.549126116326079e-05, + "loss_iou": 0.287109375, + "loss_num": 0.038818359375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 374170888, + "step": 6678 + }, + { + "epoch": 14.875278396436526, + "grad_norm": 26.434158325195312, + "learning_rate": 1e-06, + "loss": 0.3463, + "num_input_tokens_seen": 374229660, + "step": 6679 + }, + { + "epoch": 14.875278396436526, + "loss": 0.5142629146575928, + "loss_ce": 0.00010273464431520551, + "loss_iou": 0.236328125, + "loss_num": 0.00823974609375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 374229660, + "step": 6679 + }, + { + "epoch": 14.877505567928731, + "grad_norm": 18.97011947631836, + "learning_rate": 1e-06, + "loss": 0.6984, + "num_input_tokens_seen": 374285568, + "step": 6680 + }, + { + "epoch": 14.877505567928731, + "loss": 0.852816641330719, + "loss_ce": 0.00015551211254205555, + "loss_iou": 0.33984375, + "loss_num": 0.0341796875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 374285568, + "step": 6680 + }, + { + "epoch": 14.879732739420936, + "grad_norm": 23.420482635498047, + "learning_rate": 1e-06, + "loss": 0.5408, + "num_input_tokens_seen": 374343228, + "step": 6681 + }, + { + "epoch": 14.879732739420936, + "loss": 0.7148338556289673, + "loss_ce": 0.0004784108605235815, + "loss_iou": 0.296875, + "loss_num": 0.02392578125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 374343228, + "step": 6681 + }, + { + "epoch": 14.88195991091314, + "grad_norm": 19.06096076965332, + "learning_rate": 1e-06, + "loss": 0.4187, + "num_input_tokens_seen": 374399024, + "step": 6682 + }, + { + "epoch": 14.88195991091314, + "loss": 0.29825353622436523, + "loss_ce": 9.67753876466304e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.0059814453125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 374399024, + "step": 6682 + }, + { + "epoch": 14.884187082405345, + "grad_norm": 25.65113639831543, + "learning_rate": 1e-06, + "loss": 0.5878, + "num_input_tokens_seen": 374456632, + "step": 6683 + }, + { + "epoch": 14.884187082405345, + "loss": 0.6034575700759888, + "loss_ce": 9.450462675886229e-05, + "loss_iou": 0.26953125, + "loss_num": 0.01287841796875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 374456632, + "step": 6683 + }, + { + "epoch": 14.88641425389755, + "grad_norm": 18.01433753967285, + "learning_rate": 1e-06, + "loss": 0.4982, + "num_input_tokens_seen": 374511092, + "step": 6684 + }, + { + "epoch": 14.88641425389755, + "loss": 0.5666952133178711, + "loss_ce": 0.00010584105621092021, + "loss_iou": 0.232421875, + "loss_num": 0.020263671875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 374511092, + "step": 6684 + }, + { + "epoch": 14.888641425389755, + "grad_norm": 19.60915756225586, + "learning_rate": 1e-06, + "loss": 0.4411, + "num_input_tokens_seen": 374566352, + "step": 6685 + }, + { + "epoch": 14.888641425389755, + "loss": 0.5250272154808044, + "loss_ce": 0.00012483444879762828, + "loss_iou": 0.240234375, + "loss_num": 0.009033203125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 374566352, + "step": 6685 + }, + { + "epoch": 14.89086859688196, + "grad_norm": 22.95705795288086, + "learning_rate": 1e-06, + "loss": 0.454, + "num_input_tokens_seen": 374624096, + "step": 6686 + }, + { + "epoch": 14.89086859688196, + "loss": 0.3750418722629547, + "loss_ce": 0.00010290060890838504, + "loss_iou": 0.15625, + "loss_num": 0.01251220703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 374624096, + "step": 6686 + }, + { + "epoch": 14.893095768374165, + "grad_norm": 21.94954490661621, + "learning_rate": 1e-06, + "loss": 0.5847, + "num_input_tokens_seen": 374678944, + "step": 6687 + }, + { + "epoch": 14.893095768374165, + "loss": 0.6209622621536255, + "loss_ce": 0.0001126552015193738, + "loss_iou": 0.263671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 374678944, + "step": 6687 + }, + { + "epoch": 14.89532293986637, + "grad_norm": 21.50327491760254, + "learning_rate": 1e-06, + "loss": 0.5005, + "num_input_tokens_seen": 374736156, + "step": 6688 + }, + { + "epoch": 14.89532293986637, + "loss": 0.420645534992218, + "loss_ce": 0.00011328914115438238, + "loss_iou": 0.201171875, + "loss_num": 0.0038299560546875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 374736156, + "step": 6688 + }, + { + "epoch": 14.897550111358575, + "grad_norm": 25.155494689941406, + "learning_rate": 1e-06, + "loss": 0.4771, + "num_input_tokens_seen": 374792696, + "step": 6689 + }, + { + "epoch": 14.897550111358575, + "loss": 0.4023246765136719, + "loss_ce": 0.00010299000859959051, + "loss_iou": 0.173828125, + "loss_num": 0.01116943359375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 374792696, + "step": 6689 + }, + { + "epoch": 14.89977728285078, + "grad_norm": 15.196331024169922, + "learning_rate": 1e-06, + "loss": 0.2636, + "num_input_tokens_seen": 374849908, + "step": 6690 + }, + { + "epoch": 14.89977728285078, + "loss": 0.24606722593307495, + "loss_ce": 0.0009500437881797552, + "loss_iou": 0.1123046875, + "loss_num": 0.003997802734375, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 374849908, + "step": 6690 + }, + { + "epoch": 14.902004454342984, + "grad_norm": 26.573570251464844, + "learning_rate": 1e-06, + "loss": 0.4118, + "num_input_tokens_seen": 374904924, + "step": 6691 + }, + { + "epoch": 14.902004454342984, + "loss": 0.3819239139556885, + "loss_ce": 8.79746803548187e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.0118408203125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 374904924, + "step": 6691 + }, + { + "epoch": 14.90423162583519, + "grad_norm": 29.359655380249023, + "learning_rate": 1e-06, + "loss": 0.4543, + "num_input_tokens_seen": 374959508, + "step": 6692 + }, + { + "epoch": 14.90423162583519, + "loss": 0.28562378883361816, + "loss_ce": 0.00010133428440894932, + "loss_iou": 0.11474609375, + "loss_num": 0.01116943359375, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 374959508, + "step": 6692 + }, + { + "epoch": 14.906458797327394, + "grad_norm": 16.882619857788086, + "learning_rate": 1e-06, + "loss": 0.5222, + "num_input_tokens_seen": 375016108, + "step": 6693 + }, + { + "epoch": 14.906458797327394, + "loss": 0.4062420725822449, + "loss_ce": 0.00011414792970754206, + "loss_iou": 0.18359375, + "loss_num": 0.007781982421875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 375016108, + "step": 6693 + }, + { + "epoch": 14.908685968819599, + "grad_norm": 15.009581565856934, + "learning_rate": 1e-06, + "loss": 0.4768, + "num_input_tokens_seen": 375073380, + "step": 6694 + }, + { + "epoch": 14.908685968819599, + "loss": 0.5022239685058594, + "loss_ce": 0.00014879369700793177, + "loss_iou": 0.2255859375, + "loss_num": 0.01025390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 375073380, + "step": 6694 + }, + { + "epoch": 14.910913140311804, + "grad_norm": 16.225059509277344, + "learning_rate": 1e-06, + "loss": 0.3288, + "num_input_tokens_seen": 375129260, + "step": 6695 + }, + { + "epoch": 14.910913140311804, + "loss": 0.23318275809288025, + "loss_ce": 8.948949835030362e-05, + "loss_iou": 0.10009765625, + "loss_num": 0.006500244140625, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 375129260, + "step": 6695 + }, + { + "epoch": 14.913140311804009, + "grad_norm": 18.727340698242188, + "learning_rate": 1e-06, + "loss": 0.4627, + "num_input_tokens_seen": 375184036, + "step": 6696 + }, + { + "epoch": 14.913140311804009, + "loss": 0.5411036610603333, + "loss_ce": 8.803869422990829e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.0218505859375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 375184036, + "step": 6696 + }, + { + "epoch": 14.915367483296214, + "grad_norm": 14.84618854522705, + "learning_rate": 1e-06, + "loss": 0.3687, + "num_input_tokens_seen": 375240860, + "step": 6697 + }, + { + "epoch": 14.915367483296214, + "loss": 0.3764420747756958, + "loss_ce": 9.930084343068302e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.00543212890625, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 375240860, + "step": 6697 + }, + { + "epoch": 14.917594654788418, + "grad_norm": 14.689440727233887, + "learning_rate": 1e-06, + "loss": 0.5437, + "num_input_tokens_seen": 375297676, + "step": 6698 + }, + { + "epoch": 14.917594654788418, + "loss": 0.5835915803909302, + "loss_ce": 9.553229756420478e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0133056640625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 375297676, + "step": 6698 + }, + { + "epoch": 14.919821826280623, + "grad_norm": 17.56962013244629, + "learning_rate": 1e-06, + "loss": 0.5139, + "num_input_tokens_seen": 375353512, + "step": 6699 + }, + { + "epoch": 14.919821826280623, + "loss": 0.5415977239608765, + "loss_ce": 9.384811710333452e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.021240234375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 375353512, + "step": 6699 + }, + { + "epoch": 14.922048997772828, + "grad_norm": 17.380096435546875, + "learning_rate": 1e-06, + "loss": 0.329, + "num_input_tokens_seen": 375408140, + "step": 6700 + }, + { + "epoch": 14.922048997772828, + "loss": 0.3042938709259033, + "loss_ce": 9.468305506743491e-05, + "loss_iou": 0.126953125, + "loss_num": 0.00994873046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 375408140, + "step": 6700 + }, + { + "epoch": 14.924276169265033, + "grad_norm": 19.394245147705078, + "learning_rate": 1e-06, + "loss": 0.3533, + "num_input_tokens_seen": 375462496, + "step": 6701 + }, + { + "epoch": 14.924276169265033, + "loss": 0.3009084463119507, + "loss_ce": 9.669142309576273e-05, + "loss_iou": 0.12890625, + "loss_num": 0.00836181640625, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 375462496, + "step": 6701 + }, + { + "epoch": 14.926503340757238, + "grad_norm": 46.145076751708984, + "learning_rate": 1e-06, + "loss": 0.5536, + "num_input_tokens_seen": 375515212, + "step": 6702 + }, + { + "epoch": 14.926503340757238, + "loss": 0.44150978326797485, + "loss_ce": 0.00010355835547670722, + "loss_iou": 0.1826171875, + "loss_num": 0.01519775390625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 375515212, + "step": 6702 + }, + { + "epoch": 14.928730512249443, + "grad_norm": 17.22191619873047, + "learning_rate": 1e-06, + "loss": 0.6082, + "num_input_tokens_seen": 375573600, + "step": 6703 + }, + { + "epoch": 14.928730512249443, + "loss": 0.49972373247146606, + "loss_ce": 8.993092342279851e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.00738525390625, + "loss_xval": 0.5, + "num_input_tokens_seen": 375573600, + "step": 6703 + }, + { + "epoch": 14.930957683741648, + "grad_norm": 17.4095458984375, + "learning_rate": 1e-06, + "loss": 0.4707, + "num_input_tokens_seen": 375627936, + "step": 6704 + }, + { + "epoch": 14.930957683741648, + "loss": 0.35287582874298096, + "loss_ce": 9.260718070436269e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.0115966796875, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 375627936, + "step": 6704 + }, + { + "epoch": 14.933184855233852, + "grad_norm": 14.413276672363281, + "learning_rate": 1e-06, + "loss": 0.502, + "num_input_tokens_seen": 375684652, + "step": 6705 + }, + { + "epoch": 14.933184855233852, + "loss": 0.6157550811767578, + "loss_ce": 9.349231550004333e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0162353515625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 375684652, + "step": 6705 + }, + { + "epoch": 14.935412026726057, + "grad_norm": 28.362884521484375, + "learning_rate": 1e-06, + "loss": 0.4897, + "num_input_tokens_seen": 375741700, + "step": 6706 + }, + { + "epoch": 14.935412026726057, + "loss": 0.3510599732398987, + "loss_ce": 0.00010782700701383874, + "loss_iou": 0.15625, + "loss_num": 0.00787353515625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 375741700, + "step": 6706 + }, + { + "epoch": 14.937639198218262, + "grad_norm": 24.491363525390625, + "learning_rate": 1e-06, + "loss": 0.4178, + "num_input_tokens_seen": 375797580, + "step": 6707 + }, + { + "epoch": 14.937639198218262, + "loss": 0.2931881546974182, + "loss_ce": 9.731938189361244e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.006805419921875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 375797580, + "step": 6707 + }, + { + "epoch": 14.939866369710467, + "grad_norm": 18.060317993164062, + "learning_rate": 1e-06, + "loss": 0.5446, + "num_input_tokens_seen": 375851716, + "step": 6708 + }, + { + "epoch": 14.939866369710467, + "loss": 0.40395689010620117, + "loss_ce": 0.00014827345148660243, + "loss_iou": 0.185546875, + "loss_num": 0.006622314453125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 375851716, + "step": 6708 + }, + { + "epoch": 14.942093541202672, + "grad_norm": 29.22158432006836, + "learning_rate": 1e-06, + "loss": 0.4788, + "num_input_tokens_seen": 375907252, + "step": 6709 + }, + { + "epoch": 14.942093541202672, + "loss": 0.43601399660110474, + "loss_ce": 0.00010091814328916371, + "loss_iou": 0.19921875, + "loss_num": 0.007659912109375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 375907252, + "step": 6709 + }, + { + "epoch": 14.944320712694877, + "grad_norm": 23.212482452392578, + "learning_rate": 1e-06, + "loss": 0.5383, + "num_input_tokens_seen": 375962688, + "step": 6710 + }, + { + "epoch": 14.944320712694877, + "loss": 0.6079156398773193, + "loss_ce": 0.0001275775139220059, + "loss_iou": 0.263671875, + "loss_num": 0.0162353515625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 375962688, + "step": 6710 + }, + { + "epoch": 14.946547884187082, + "grad_norm": 17.981061935424805, + "learning_rate": 1e-06, + "loss": 0.3841, + "num_input_tokens_seen": 376018616, + "step": 6711 + }, + { + "epoch": 14.946547884187082, + "loss": 0.2717045545578003, + "loss_ce": 9.8096948931925e-05, + "loss_iou": 0.11572265625, + "loss_num": 0.00799560546875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 376018616, + "step": 6711 + }, + { + "epoch": 14.948775055679288, + "grad_norm": 19.030765533447266, + "learning_rate": 1e-06, + "loss": 0.3394, + "num_input_tokens_seen": 376074472, + "step": 6712 + }, + { + "epoch": 14.948775055679288, + "loss": 0.31912869215011597, + "loss_ce": 9.793009667191654e-05, + "loss_iou": 0.1484375, + "loss_num": 0.004241943359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 376074472, + "step": 6712 + }, + { + "epoch": 14.951002227171493, + "grad_norm": 18.102794647216797, + "learning_rate": 1e-06, + "loss": 0.5757, + "num_input_tokens_seen": 376129896, + "step": 6713 + }, + { + "epoch": 14.951002227171493, + "loss": 0.7676899433135986, + "loss_ce": 0.00011178333807038143, + "loss_iou": 0.294921875, + "loss_num": 0.03515625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 376129896, + "step": 6713 + }, + { + "epoch": 14.953229398663698, + "grad_norm": 28.493032455444336, + "learning_rate": 1e-06, + "loss": 0.5445, + "num_input_tokens_seen": 376188248, + "step": 6714 + }, + { + "epoch": 14.953229398663698, + "loss": 0.6533517837524414, + "loss_ce": 0.00015353792696259916, + "loss_iou": 0.267578125, + "loss_num": 0.023681640625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 376188248, + "step": 6714 + }, + { + "epoch": 14.955456570155903, + "grad_norm": 36.27149200439453, + "learning_rate": 1e-06, + "loss": 0.5764, + "num_input_tokens_seen": 376241592, + "step": 6715 + }, + { + "epoch": 14.955456570155903, + "loss": 0.3418195843696594, + "loss_ce": 0.00014476115757133812, + "loss_iou": 0.1484375, + "loss_num": 0.00909423828125, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 376241592, + "step": 6715 + }, + { + "epoch": 14.957683741648108, + "grad_norm": 16.67228889465332, + "learning_rate": 1e-06, + "loss": 0.4621, + "num_input_tokens_seen": 376298924, + "step": 6716 + }, + { + "epoch": 14.957683741648108, + "loss": 0.6529318690299988, + "loss_ce": 9.987308294512331e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0233154296875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 376298924, + "step": 6716 + }, + { + "epoch": 14.959910913140313, + "grad_norm": 14.509023666381836, + "learning_rate": 1e-06, + "loss": 0.4821, + "num_input_tokens_seen": 376356404, + "step": 6717 + }, + { + "epoch": 14.959910913140313, + "loss": 0.4666660726070404, + "loss_ce": 0.00011332825670251623, + "loss_iou": 0.208984375, + "loss_num": 0.0096435546875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 376356404, + "step": 6717 + }, + { + "epoch": 14.962138084632517, + "grad_norm": 18.535402297973633, + "learning_rate": 1e-06, + "loss": 0.5056, + "num_input_tokens_seen": 376413404, + "step": 6718 + }, + { + "epoch": 14.962138084632517, + "loss": 0.4525294899940491, + "loss_ce": 0.0013728843769058585, + "loss_iou": 0.171875, + "loss_num": 0.0213623046875, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 376413404, + "step": 6718 + }, + { + "epoch": 14.964365256124722, + "grad_norm": 23.715373992919922, + "learning_rate": 1e-06, + "loss": 0.3654, + "num_input_tokens_seen": 376471520, + "step": 6719 + }, + { + "epoch": 14.964365256124722, + "loss": 0.3640136122703552, + "loss_ce": 0.00012200840865261853, + "loss_iou": 0.16015625, + "loss_num": 0.0087890625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 376471520, + "step": 6719 + }, + { + "epoch": 14.966592427616927, + "grad_norm": 22.593597412109375, + "learning_rate": 1e-06, + "loss": 0.5501, + "num_input_tokens_seen": 376527260, + "step": 6720 + }, + { + "epoch": 14.966592427616927, + "loss": 0.5731148719787598, + "loss_ce": 0.00011685446224873886, + "loss_iou": 0.2294921875, + "loss_num": 0.022705078125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 376527260, + "step": 6720 + }, + { + "epoch": 14.968819599109132, + "grad_norm": 14.375116348266602, + "learning_rate": 1e-06, + "loss": 0.5558, + "num_input_tokens_seen": 376585680, + "step": 6721 + }, + { + "epoch": 14.968819599109132, + "loss": 0.45606058835983276, + "loss_ce": 0.0006162732024677098, + "loss_iou": 0.1982421875, + "loss_num": 0.01165771484375, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 376585680, + "step": 6721 + }, + { + "epoch": 14.971046770601337, + "grad_norm": 15.256864547729492, + "learning_rate": 1e-06, + "loss": 0.4053, + "num_input_tokens_seen": 376641292, + "step": 6722 + }, + { + "epoch": 14.971046770601337, + "loss": 0.44297972321510315, + "loss_ce": 0.00010863250645343214, + "loss_iou": 0.181640625, + "loss_num": 0.0159912109375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 376641292, + "step": 6722 + }, + { + "epoch": 14.973273942093542, + "grad_norm": 20.12420082092285, + "learning_rate": 1e-06, + "loss": 0.5, + "num_input_tokens_seen": 376696188, + "step": 6723 + }, + { + "epoch": 14.973273942093542, + "loss": 0.42197349667549133, + "loss_ce": 9.847040200838819e-05, + "loss_iou": 0.171875, + "loss_num": 0.0157470703125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 376696188, + "step": 6723 + }, + { + "epoch": 14.975501113585747, + "grad_norm": 21.817781448364258, + "learning_rate": 1e-06, + "loss": 0.5414, + "num_input_tokens_seen": 376752988, + "step": 6724 + }, + { + "epoch": 14.975501113585747, + "loss": 0.5460734367370605, + "loss_ce": 0.00017501445836387575, + "loss_iou": 0.22265625, + "loss_num": 0.0205078125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 376752988, + "step": 6724 + }, + { + "epoch": 14.977728285077951, + "grad_norm": 16.294130325317383, + "learning_rate": 1e-06, + "loss": 0.3279, + "num_input_tokens_seen": 376810460, + "step": 6725 + }, + { + "epoch": 14.977728285077951, + "loss": 0.28637370467185974, + "loss_ce": 0.00011881387035828084, + "loss_iou": 0.109375, + "loss_num": 0.01348876953125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 376810460, + "step": 6725 + }, + { + "epoch": 14.979955456570156, + "grad_norm": 17.604066848754883, + "learning_rate": 1e-06, + "loss": 0.4209, + "num_input_tokens_seen": 376866340, + "step": 6726 + }, + { + "epoch": 14.979955456570156, + "loss": 0.4271067678928375, + "loss_ce": 0.00010479907359695062, + "loss_iou": 0.193359375, + "loss_num": 0.00823974609375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 376866340, + "step": 6726 + }, + { + "epoch": 14.982182628062361, + "grad_norm": 18.081811904907227, + "learning_rate": 1e-06, + "loss": 0.4397, + "num_input_tokens_seen": 376921200, + "step": 6727 + }, + { + "epoch": 14.982182628062361, + "loss": 0.43722379207611084, + "loss_ce": 9.000849240692332e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 376921200, + "step": 6727 + }, + { + "epoch": 14.984409799554566, + "grad_norm": 18.38675308227539, + "learning_rate": 1e-06, + "loss": 0.3883, + "num_input_tokens_seen": 376979648, + "step": 6728 + }, + { + "epoch": 14.984409799554566, + "loss": 0.36479222774505615, + "loss_ce": 0.00010718008707044646, + "loss_iou": 0.146484375, + "loss_num": 0.0145263671875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 376979648, + "step": 6728 + }, + { + "epoch": 14.98663697104677, + "grad_norm": 24.232728958129883, + "learning_rate": 1e-06, + "loss": 0.521, + "num_input_tokens_seen": 377035696, + "step": 6729 + }, + { + "epoch": 14.98663697104677, + "loss": 0.47929060459136963, + "loss_ce": 0.00016459994367323816, + "loss_iou": 0.1923828125, + "loss_num": 0.01904296875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 377035696, + "step": 6729 + }, + { + "epoch": 14.988864142538976, + "grad_norm": 16.893199920654297, + "learning_rate": 1e-06, + "loss": 0.4381, + "num_input_tokens_seen": 377094444, + "step": 6730 + }, + { + "epoch": 14.988864142538976, + "loss": 0.32279184460639954, + "loss_ce": 0.00016000941104721278, + "loss_iou": 0.150390625, + "loss_num": 0.00457763671875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 377094444, + "step": 6730 + }, + { + "epoch": 14.99109131403118, + "grad_norm": 19.161134719848633, + "learning_rate": 1e-06, + "loss": 0.3922, + "num_input_tokens_seen": 377154056, + "step": 6731 + }, + { + "epoch": 14.99109131403118, + "loss": 0.3056414723396301, + "loss_ce": 9.948984370566905e-05, + "loss_iou": 0.140625, + "loss_num": 0.0048828125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 377154056, + "step": 6731 + }, + { + "epoch": 14.993318485523385, + "grad_norm": 19.205812454223633, + "learning_rate": 1e-06, + "loss": 0.3231, + "num_input_tokens_seen": 377211084, + "step": 6732 + }, + { + "epoch": 14.993318485523385, + "loss": 0.34961456060409546, + "loss_ce": 0.00012724896077997983, + "loss_iou": 0.150390625, + "loss_num": 0.0096435546875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 377211084, + "step": 6732 + }, + { + "epoch": 14.99554565701559, + "grad_norm": 19.172298431396484, + "learning_rate": 1e-06, + "loss": 0.5644, + "num_input_tokens_seen": 377269112, + "step": 6733 + }, + { + "epoch": 14.99554565701559, + "loss": 0.5044995546340942, + "loss_ce": 0.00010500279313419014, + "loss_iou": 0.21875, + "loss_num": 0.0133056640625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 377269112, + "step": 6733 + }, + { + "epoch": 14.997772828507795, + "grad_norm": 18.66171646118164, + "learning_rate": 1e-06, + "loss": 0.4576, + "num_input_tokens_seen": 377325276, + "step": 6734 + }, + { + "epoch": 14.997772828507795, + "loss": 0.29831087589263916, + "loss_ce": 9.311985195381567e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.01080322265625, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 377325276, + "step": 6734 + }, + { + "epoch": 15.0, + "grad_norm": 23.2895565032959, + "learning_rate": 1e-06, + "loss": 0.3809, + "num_input_tokens_seen": 377382228, + "step": 6735 + }, + { + "epoch": 15.0, + "loss": 0.4358910620212555, + "loss_ce": 0.00010004001524066553, + "loss_iou": 0.1953125, + "loss_num": 0.0089111328125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 377382228, + "step": 6735 + }, + { + "epoch": 15.002227171492205, + "grad_norm": 15.336739540100098, + "learning_rate": 1e-06, + "loss": 0.4672, + "num_input_tokens_seen": 377438264, + "step": 6736 + }, + { + "epoch": 15.002227171492205, + "loss": 0.25816991925239563, + "loss_ce": 0.00011327323591103777, + "loss_iou": 0.11474609375, + "loss_num": 0.00579833984375, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 377438264, + "step": 6736 + }, + { + "epoch": 15.00445434298441, + "grad_norm": 16.66543197631836, + "learning_rate": 1e-06, + "loss": 0.5834, + "num_input_tokens_seen": 377490272, + "step": 6737 + }, + { + "epoch": 15.00445434298441, + "loss": 0.7062318325042725, + "loss_ce": 0.0001771746901795268, + "loss_iou": 0.3046875, + "loss_num": 0.019775390625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 377490272, + "step": 6737 + }, + { + "epoch": 15.006681514476615, + "grad_norm": 16.386180877685547, + "learning_rate": 1e-06, + "loss": 0.3941, + "num_input_tokens_seen": 377547544, + "step": 6738 + }, + { + "epoch": 15.006681514476615, + "loss": 0.4180727005004883, + "loss_ce": 8.869110752129927e-05, + "loss_iou": 0.169921875, + "loss_num": 0.015625, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 377547544, + "step": 6738 + }, + { + "epoch": 15.00890868596882, + "grad_norm": 13.663156509399414, + "learning_rate": 1e-06, + "loss": 0.3605, + "num_input_tokens_seen": 377605960, + "step": 6739 + }, + { + "epoch": 15.00890868596882, + "loss": 0.29490476846694946, + "loss_ce": 0.00010495680180611089, + "loss_iou": 0.130859375, + "loss_num": 0.00653076171875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 377605960, + "step": 6739 + }, + { + "epoch": 15.011135857461024, + "grad_norm": 20.333576202392578, + "learning_rate": 1e-06, + "loss": 0.3858, + "num_input_tokens_seen": 377663176, + "step": 6740 + }, + { + "epoch": 15.011135857461024, + "loss": 0.34868597984313965, + "loss_ce": 0.00011420654482208192, + "loss_iou": 0.15625, + "loss_num": 0.00726318359375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 377663176, + "step": 6740 + }, + { + "epoch": 15.01336302895323, + "grad_norm": 13.948286056518555, + "learning_rate": 1e-06, + "loss": 0.3959, + "num_input_tokens_seen": 377717832, + "step": 6741 + }, + { + "epoch": 15.01336302895323, + "loss": 0.42416131496429443, + "loss_ce": 8.905168215278536e-05, + "loss_iou": 0.173828125, + "loss_num": 0.0155029296875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 377717832, + "step": 6741 + }, + { + "epoch": 15.015590200445434, + "grad_norm": 19.52324867248535, + "learning_rate": 1e-06, + "loss": 0.3894, + "num_input_tokens_seen": 377775816, + "step": 6742 + }, + { + "epoch": 15.015590200445434, + "loss": 0.43786442279815674, + "loss_ce": 0.00012025667092530057, + "loss_iou": 0.177734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.4375, + "num_input_tokens_seen": 377775816, + "step": 6742 + }, + { + "epoch": 15.017817371937639, + "grad_norm": 26.31039047241211, + "learning_rate": 1e-06, + "loss": 0.4081, + "num_input_tokens_seen": 377834092, + "step": 6743 + }, + { + "epoch": 15.017817371937639, + "loss": 0.4304569363594055, + "loss_ce": 9.805826266529039e-05, + "loss_iou": 0.189453125, + "loss_num": 0.01025390625, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 377834092, + "step": 6743 + }, + { + "epoch": 15.020044543429844, + "grad_norm": 13.118270874023438, + "learning_rate": 1e-06, + "loss": 0.6506, + "num_input_tokens_seen": 377890548, + "step": 6744 + }, + { + "epoch": 15.020044543429844, + "loss": 0.8540452718734741, + "loss_ce": 0.0001634818036109209, + "loss_iou": 0.349609375, + "loss_num": 0.03125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 377890548, + "step": 6744 + }, + { + "epoch": 15.022271714922049, + "grad_norm": 16.432491302490234, + "learning_rate": 1e-06, + "loss": 0.394, + "num_input_tokens_seen": 377948476, + "step": 6745 + }, + { + "epoch": 15.022271714922049, + "loss": 0.39914602041244507, + "loss_ce": 9.820661216508597e-05, + "loss_iou": 0.181640625, + "loss_num": 0.00738525390625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 377948476, + "step": 6745 + }, + { + "epoch": 15.024498886414253, + "grad_norm": 17.178516387939453, + "learning_rate": 1e-06, + "loss": 0.435, + "num_input_tokens_seen": 378003020, + "step": 6746 + }, + { + "epoch": 15.024498886414253, + "loss": 0.5235254168510437, + "loss_ce": 0.0004541404196061194, + "loss_iou": 0.232421875, + "loss_num": 0.01171875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 378003020, + "step": 6746 + }, + { + "epoch": 15.026726057906458, + "grad_norm": 28.950069427490234, + "learning_rate": 1e-06, + "loss": 0.327, + "num_input_tokens_seen": 378060880, + "step": 6747 + }, + { + "epoch": 15.026726057906458, + "loss": 0.35339534282684326, + "loss_ce": 0.0001238558324985206, + "loss_iou": 0.16015625, + "loss_num": 0.006622314453125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 378060880, + "step": 6747 + }, + { + "epoch": 15.028953229398663, + "grad_norm": 30.91240882873535, + "learning_rate": 1e-06, + "loss": 0.6332, + "num_input_tokens_seen": 378113388, + "step": 6748 + }, + { + "epoch": 15.028953229398663, + "loss": 0.6445518136024475, + "loss_ce": 0.0016074487939476967, + "loss_iou": 0.283203125, + "loss_num": 0.0150146484375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 378113388, + "step": 6748 + }, + { + "epoch": 15.031180400890868, + "grad_norm": 110.00398254394531, + "learning_rate": 1e-06, + "loss": 0.3879, + "num_input_tokens_seen": 378168524, + "step": 6749 + }, + { + "epoch": 15.031180400890868, + "loss": 0.41330039501190186, + "loss_ce": 9.242070518666878e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.006500244140625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 378168524, + "step": 6749 + }, + { + "epoch": 15.033407572383073, + "grad_norm": 36.06410217285156, + "learning_rate": 1e-06, + "loss": 0.6565, + "num_input_tokens_seen": 378223964, + "step": 6750 + }, + { + "epoch": 15.033407572383073, + "eval_seeclick_web_CIoU": 0.5852765142917633, + "eval_seeclick_web_GIoU": 0.5835680663585663, + "eval_seeclick_web_IoU": 0.6037326455116272, + "eval_seeclick_web_MAE_all": 0.015391120221465826, + "eval_seeclick_web_MAE_h": 0.007827216759324074, + "eval_seeclick_web_MAE_w": 0.015470336191356182, + "eval_seeclick_web_MAE_x_boxes": 0.008828274440020323, + "eval_seeclick_web_MAE_y_boxes": 0.021092181792482734, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9029772877693176, + "eval_seeclick_web_loss_ce": 0.00016041052003856748, + "eval_seeclick_web_loss_iou": 0.4168701171875, + "eval_seeclick_web_loss_num": 0.012361526489257812, + "eval_seeclick_web_loss_xval": 0.895263671875, + "eval_seeclick_web_runtime": 25.9811, + "eval_seeclick_web_samples_per_second": 1.924, + "eval_seeclick_web_steps_per_second": 0.077, + "num_input_tokens_seen": 378223964, + "step": 6750 + }, + { + "epoch": 15.033407572383073, + "eval_icons_CIoU": 0.28028304874897003, + "eval_icons_GIoU": 0.30072087049484253, + "eval_icons_IoU": 0.3571899086236954, + "eval_icons_MAE_all": 0.057889632880687714, + "eval_icons_MAE_h": 0.03522232733666897, + "eval_icons_MAE_w": 0.05322147347033024, + "eval_icons_MAE_x_boxes": 0.05888655222952366, + "eval_icons_MAE_y_boxes": 0.03719859570264816, + "eval_icons_inside_bbox": 0.6371527910232544, + "eval_icons_loss": 1.6711426973342896, + "eval_icons_loss_ce": 0.00020551962370518595, + "eval_icons_loss_iou": 0.650146484375, + "eval_icons_loss_num": 0.04911231994628906, + "eval_icons_loss_xval": 1.546142578125, + "eval_icons_runtime": 25.2226, + "eval_icons_samples_per_second": 1.982, + "eval_icons_steps_per_second": 0.079, + "num_input_tokens_seen": 378223964, + "step": 6750 + }, + { + "epoch": 15.033407572383073, + "eval_screenspot_CIoU": 0.37860554456710815, + "eval_screenspot_GIoU": 0.39325101176897687, + "eval_screenspot_IoU": 0.4482241968313853, + "eval_screenspot_MAE_all": 0.05375574777523676, + "eval_screenspot_MAE_h": 0.03931415639817715, + "eval_screenspot_MAE_w": 0.05822847535212835, + "eval_screenspot_MAE_x_boxes": 0.06611844276388486, + "eval_screenspot_MAE_y_boxes": 0.036580439967413746, + "eval_screenspot_inside_bbox": 0.7041666706403097, + "eval_screenspot_loss": 1.5432301759719849, + "eval_screenspot_loss_ce": 0.00022206837699438134, + "eval_screenspot_loss_iou": 0.644775390625, + "eval_screenspot_loss_num": 0.061370849609375, + "eval_screenspot_loss_xval": 1.5965169270833333, + "eval_screenspot_runtime": 42.0263, + "eval_screenspot_samples_per_second": 2.118, + "eval_screenspot_steps_per_second": 0.071, + "num_input_tokens_seen": 378223964, + "step": 6750 + }, + { + "epoch": 15.033407572383073, + "eval_compot_CIoU": 0.34727177023887634, + "eval_compot_GIoU": 0.3595842719078064, + "eval_compot_IoU": 0.4042260944843292, + "eval_compot_MAE_all": 0.018605078104883432, + "eval_compot_MAE_h": 0.011235271580517292, + "eval_compot_MAE_w": 0.020991048775613308, + "eval_compot_MAE_x_boxes": 0.030081474222242832, + "eval_compot_MAE_y_boxes": 0.006614114856347442, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.4067184925079346, + "eval_compot_loss_ce": 0.0001557795621920377, + "eval_compot_loss_iou": 0.650146484375, + "eval_compot_loss_num": 0.0174102783203125, + "eval_compot_loss_xval": 1.3876953125, + "eval_compot_runtime": 25.4621, + "eval_compot_samples_per_second": 1.964, + "eval_compot_steps_per_second": 0.079, + "num_input_tokens_seen": 378223964, + "step": 6750 + }, + { + "epoch": 15.033407572383073, + "eval_custom_ui_val_CIoU": 0.48026987661918, + "eval_custom_ui_val_GIoU": 0.48731139302253723, + "eval_custom_ui_val_IoU": 0.5409456855720944, + "eval_custom_ui_val_MAE_all": 0.027578827666325703, + "eval_custom_ui_val_MAE_h": 0.015306917821160622, + "eval_custom_ui_val_MAE_w": 0.03556989893938104, + "eval_custom_ui_val_MAE_x_boxes": 0.033483781981178455, + "eval_custom_ui_val_MAE_y_boxes": 0.013460160000249743, + "eval_custom_ui_val_inside_bbox": 0.7719907429483202, + "eval_custom_ui_val_loss": 1.1606297492980957, + "eval_custom_ui_val_loss_ce": 0.0001816005743522611, + "eval_custom_ui_val_loss_iou": 0.4960530598958333, + "eval_custom_ui_val_loss_num": 0.02418337927924262, + "eval_custom_ui_val_loss_xval": 1.1128472222222223, + "eval_custom_ui_val_runtime": 78.0431, + "eval_custom_ui_val_samples_per_second": 3.396, + "eval_custom_ui_val_steps_per_second": 0.115, + "num_input_tokens_seen": 378223964, + "step": 6750 + }, + { + "epoch": 15.033407572383073, + "loss": 0.8121492862701416, + "loss_ce": 0.00013749845675192773, + "loss_iou": 0.365234375, + "loss_num": 0.0166015625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 378223964, + "step": 6750 + }, + { + "epoch": 15.035634743875278, + "grad_norm": 23.943416595458984, + "learning_rate": 1e-06, + "loss": 0.3742, + "num_input_tokens_seen": 378281124, + "step": 6751 + }, + { + "epoch": 15.035634743875278, + "loss": 0.33616477251052856, + "loss_ce": 0.00010521589138079435, + "loss_iou": 0.1474609375, + "loss_num": 0.00811767578125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 378281124, + "step": 6751 + }, + { + "epoch": 15.037861915367483, + "grad_norm": 16.794620513916016, + "learning_rate": 1e-06, + "loss": 0.5123, + "num_input_tokens_seen": 378335736, + "step": 6752 + }, + { + "epoch": 15.037861915367483, + "loss": 0.42880141735076904, + "loss_ce": 9.046800551004708e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.0089111328125, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 378335736, + "step": 6752 + }, + { + "epoch": 15.040089086859687, + "grad_norm": 23.620935440063477, + "learning_rate": 1e-06, + "loss": 0.6279, + "num_input_tokens_seen": 378391368, + "step": 6753 + }, + { + "epoch": 15.040089086859687, + "loss": 0.624180793762207, + "loss_ce": 0.00015735081979073584, + "loss_iou": 0.279296875, + "loss_num": 0.01312255859375, + "loss_xval": 0.625, + "num_input_tokens_seen": 378391368, + "step": 6753 + }, + { + "epoch": 15.042316258351892, + "grad_norm": 16.139259338378906, + "learning_rate": 1e-06, + "loss": 0.4466, + "num_input_tokens_seen": 378446784, + "step": 6754 + }, + { + "epoch": 15.042316258351892, + "loss": 0.5067760348320007, + "loss_ce": 9.269756264984608e-05, + "loss_iou": 0.20703125, + "loss_num": 0.018310546875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 378446784, + "step": 6754 + }, + { + "epoch": 15.044543429844097, + "grad_norm": 17.31475257873535, + "learning_rate": 1e-06, + "loss": 0.3974, + "num_input_tokens_seen": 378504372, + "step": 6755 + }, + { + "epoch": 15.044543429844097, + "loss": 0.44474291801452637, + "loss_ce": 0.00022385823831427842, + "loss_iou": 0.177734375, + "loss_num": 0.0177001953125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 378504372, + "step": 6755 + }, + { + "epoch": 15.046770601336302, + "grad_norm": 22.26161766052246, + "learning_rate": 1e-06, + "loss": 0.6522, + "num_input_tokens_seen": 378560752, + "step": 6756 + }, + { + "epoch": 15.046770601336302, + "loss": 0.6529574990272522, + "loss_ce": 0.00036961075966246426, + "loss_iou": 0.28515625, + "loss_num": 0.0162353515625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 378560752, + "step": 6756 + }, + { + "epoch": 15.048997772828507, + "grad_norm": 18.25050163269043, + "learning_rate": 1e-06, + "loss": 0.5268, + "num_input_tokens_seen": 378619176, + "step": 6757 + }, + { + "epoch": 15.048997772828507, + "loss": 0.43017441034317017, + "loss_ce": 0.00012070426600985229, + "loss_iou": 0.18359375, + "loss_num": 0.0125732421875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 378619176, + "step": 6757 + }, + { + "epoch": 15.051224944320714, + "grad_norm": 16.298351287841797, + "learning_rate": 1e-06, + "loss": 0.3852, + "num_input_tokens_seen": 378673764, + "step": 6758 + }, + { + "epoch": 15.051224944320714, + "loss": 0.3430963158607483, + "loss_ce": 0.00012450873327907175, + "loss_iou": 0.1376953125, + "loss_num": 0.0137939453125, + "loss_xval": 0.34375, + "num_input_tokens_seen": 378673764, + "step": 6758 + }, + { + "epoch": 15.053452115812918, + "grad_norm": 15.573384284973145, + "learning_rate": 1e-06, + "loss": 0.3511, + "num_input_tokens_seen": 378729620, + "step": 6759 + }, + { + "epoch": 15.053452115812918, + "loss": 0.36082327365875244, + "loss_ce": 0.00010551903687883168, + "loss_iou": 0.166015625, + "loss_num": 0.00579833984375, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 378729620, + "step": 6759 + }, + { + "epoch": 15.055679287305123, + "grad_norm": 16.886507034301758, + "learning_rate": 1e-06, + "loss": 0.5778, + "num_input_tokens_seen": 378784440, + "step": 6760 + }, + { + "epoch": 15.055679287305123, + "loss": 0.5877401828765869, + "loss_ce": 9.368160681333393e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.0264892578125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 378784440, + "step": 6760 + }, + { + "epoch": 15.057906458797328, + "grad_norm": 18.894807815551758, + "learning_rate": 1e-06, + "loss": 0.4008, + "num_input_tokens_seen": 378839964, + "step": 6761 + }, + { + "epoch": 15.057906458797328, + "loss": 0.3778371512889862, + "loss_ce": 9.055679402081296e-05, + "loss_iou": 0.169921875, + "loss_num": 0.007598876953125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 378839964, + "step": 6761 + }, + { + "epoch": 15.060133630289533, + "grad_norm": 22.449024200439453, + "learning_rate": 1e-06, + "loss": 0.5987, + "num_input_tokens_seen": 378897408, + "step": 6762 + }, + { + "epoch": 15.060133630289533, + "loss": 0.6189790368080139, + "loss_ce": 8.254876593127847e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0216064453125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 378897408, + "step": 6762 + }, + { + "epoch": 15.062360801781738, + "grad_norm": 21.661102294921875, + "learning_rate": 1e-06, + "loss": 0.4368, + "num_input_tokens_seen": 378951352, + "step": 6763 + }, + { + "epoch": 15.062360801781738, + "loss": 0.39707666635513306, + "loss_ce": 0.00010398250014986843, + "loss_iou": 0.1826171875, + "loss_num": 0.006439208984375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 378951352, + "step": 6763 + }, + { + "epoch": 15.064587973273943, + "grad_norm": 14.383728981018066, + "learning_rate": 1e-06, + "loss": 0.5996, + "num_input_tokens_seen": 379007704, + "step": 6764 + }, + { + "epoch": 15.064587973273943, + "loss": 0.6301254034042358, + "loss_ce": 0.00012049202632624656, + "loss_iou": 0.265625, + "loss_num": 0.0194091796875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 379007704, + "step": 6764 + }, + { + "epoch": 15.066815144766148, + "grad_norm": 16.69063377380371, + "learning_rate": 1e-06, + "loss": 0.5299, + "num_input_tokens_seen": 379060260, + "step": 6765 + }, + { + "epoch": 15.066815144766148, + "loss": 0.49555858969688416, + "loss_ce": 0.00019726283790078014, + "loss_iou": 0.1953125, + "loss_num": 0.0211181640625, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 379060260, + "step": 6765 + }, + { + "epoch": 15.069042316258352, + "grad_norm": 18.389291763305664, + "learning_rate": 1e-06, + "loss": 0.6429, + "num_input_tokens_seen": 379115936, + "step": 6766 + }, + { + "epoch": 15.069042316258352, + "loss": 0.9386324882507324, + "loss_ce": 0.00015596779121551663, + "loss_iou": 0.41015625, + "loss_num": 0.0240478515625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 379115936, + "step": 6766 + }, + { + "epoch": 15.071269487750557, + "grad_norm": 33.009193420410156, + "learning_rate": 1e-06, + "loss": 0.4666, + "num_input_tokens_seen": 379171576, + "step": 6767 + }, + { + "epoch": 15.071269487750557, + "loss": 0.5097314119338989, + "loss_ce": 8.78519203979522e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.0108642578125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 379171576, + "step": 6767 + }, + { + "epoch": 15.073496659242762, + "grad_norm": 20.735618591308594, + "learning_rate": 1e-06, + "loss": 0.3335, + "num_input_tokens_seen": 379231148, + "step": 6768 + }, + { + "epoch": 15.073496659242762, + "loss": 0.4110013246536255, + "loss_ce": 0.00011265121429460123, + "loss_iou": 0.1962890625, + "loss_num": 0.0038299560546875, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 379231148, + "step": 6768 + }, + { + "epoch": 15.075723830734967, + "grad_norm": 13.399028778076172, + "learning_rate": 1e-06, + "loss": 0.4072, + "num_input_tokens_seen": 379288424, + "step": 6769 + }, + { + "epoch": 15.075723830734967, + "loss": 0.2551080882549286, + "loss_ce": 0.00010320404544472694, + "loss_iou": 0.11474609375, + "loss_num": 0.00506591796875, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 379288424, + "step": 6769 + }, + { + "epoch": 15.077951002227172, + "grad_norm": 15.024955749511719, + "learning_rate": 1e-06, + "loss": 0.5097, + "num_input_tokens_seen": 379341648, + "step": 6770 + }, + { + "epoch": 15.077951002227172, + "loss": 0.5870286226272583, + "loss_ce": 0.00011455308413133025, + "loss_iou": 0.25, + "loss_num": 0.0172119140625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 379341648, + "step": 6770 + }, + { + "epoch": 15.080178173719377, + "grad_norm": 57.792747497558594, + "learning_rate": 1e-06, + "loss": 0.5396, + "num_input_tokens_seen": 379396880, + "step": 6771 + }, + { + "epoch": 15.080178173719377, + "loss": 0.6382882595062256, + "loss_ce": 0.00010471623681951314, + "loss_iou": 0.251953125, + "loss_num": 0.02685546875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 379396880, + "step": 6771 + }, + { + "epoch": 15.082405345211582, + "grad_norm": 17.591594696044922, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 379451464, + "step": 6772 + }, + { + "epoch": 15.082405345211582, + "loss": 0.41464707255363464, + "loss_ce": 9.628412954043597e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.00762939453125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 379451464, + "step": 6772 + }, + { + "epoch": 15.084632516703786, + "grad_norm": 19.806339263916016, + "learning_rate": 1e-06, + "loss": 0.3308, + "num_input_tokens_seen": 379507000, + "step": 6773 + }, + { + "epoch": 15.084632516703786, + "loss": 0.32612937688827515, + "loss_ce": 7.959181675687432e-05, + "loss_iou": 0.12890625, + "loss_num": 0.01348876953125, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 379507000, + "step": 6773 + }, + { + "epoch": 15.086859688195991, + "grad_norm": 17.055814743041992, + "learning_rate": 1e-06, + "loss": 0.3506, + "num_input_tokens_seen": 379562540, + "step": 6774 + }, + { + "epoch": 15.086859688195991, + "loss": 0.31467652320861816, + "loss_ce": 0.00010132987517863512, + "loss_iou": 0.13671875, + "loss_num": 0.00799560546875, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 379562540, + "step": 6774 + }, + { + "epoch": 15.089086859688196, + "grad_norm": 24.606420516967773, + "learning_rate": 1e-06, + "loss": 0.401, + "num_input_tokens_seen": 379620760, + "step": 6775 + }, + { + "epoch": 15.089086859688196, + "loss": 0.3719760477542877, + "loss_ce": 8.884338603820652e-05, + "loss_iou": 0.169921875, + "loss_num": 0.006256103515625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 379620760, + "step": 6775 + }, + { + "epoch": 15.091314031180401, + "grad_norm": 20.017690658569336, + "learning_rate": 1e-06, + "loss": 0.4856, + "num_input_tokens_seen": 379677336, + "step": 6776 + }, + { + "epoch": 15.091314031180401, + "loss": 0.45812952518463135, + "loss_ce": 0.00012173528375569731, + "loss_iou": 0.18359375, + "loss_num": 0.018310546875, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 379677336, + "step": 6776 + }, + { + "epoch": 15.093541202672606, + "grad_norm": 16.497709274291992, + "learning_rate": 1e-06, + "loss": 0.4482, + "num_input_tokens_seen": 379734720, + "step": 6777 + }, + { + "epoch": 15.093541202672606, + "loss": 0.34024685621261597, + "loss_ce": 9.794162178877741e-05, + "loss_iou": 0.15234375, + "loss_num": 0.007080078125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 379734720, + "step": 6777 + }, + { + "epoch": 15.09576837416481, + "grad_norm": 15.530109405517578, + "learning_rate": 1e-06, + "loss": 0.4104, + "num_input_tokens_seen": 379790532, + "step": 6778 + }, + { + "epoch": 15.09576837416481, + "loss": 0.30709517002105713, + "loss_ce": 8.832212188281119e-05, + "loss_iou": 0.13671875, + "loss_num": 0.006805419921875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 379790532, + "step": 6778 + }, + { + "epoch": 15.097995545657016, + "grad_norm": 16.189306259155273, + "learning_rate": 1e-06, + "loss": 0.4835, + "num_input_tokens_seen": 379847060, + "step": 6779 + }, + { + "epoch": 15.097995545657016, + "loss": 0.5428781509399414, + "loss_ce": 0.0001535568735562265, + "loss_iou": 0.224609375, + "loss_num": 0.0186767578125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 379847060, + "step": 6779 + }, + { + "epoch": 15.10022271714922, + "grad_norm": 15.896251678466797, + "learning_rate": 1e-06, + "loss": 0.4458, + "num_input_tokens_seen": 379902864, + "step": 6780 + }, + { + "epoch": 15.10022271714922, + "loss": 0.38522452116012573, + "loss_ce": 9.271127782994881e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.0069580078125, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 379902864, + "step": 6780 + }, + { + "epoch": 15.102449888641425, + "grad_norm": 52.78928756713867, + "learning_rate": 1e-06, + "loss": 0.3965, + "num_input_tokens_seen": 379959096, + "step": 6781 + }, + { + "epoch": 15.102449888641425, + "loss": 0.4485187530517578, + "loss_ce": 0.0002765837707556784, + "loss_iou": 0.19140625, + "loss_num": 0.01318359375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 379959096, + "step": 6781 + }, + { + "epoch": 15.10467706013363, + "grad_norm": 25.405174255371094, + "learning_rate": 1e-06, + "loss": 0.362, + "num_input_tokens_seen": 380015168, + "step": 6782 + }, + { + "epoch": 15.10467706013363, + "loss": 0.42562615871429443, + "loss_ce": 8.907825394999236e-05, + "loss_iou": 0.185546875, + "loss_num": 0.01104736328125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 380015168, + "step": 6782 + }, + { + "epoch": 15.106904231625835, + "grad_norm": 11.471501350402832, + "learning_rate": 1e-06, + "loss": 0.4449, + "num_input_tokens_seen": 380071884, + "step": 6783 + }, + { + "epoch": 15.106904231625835, + "loss": 0.4200718402862549, + "loss_ce": 9.654099267208949e-05, + "loss_iou": 0.1953125, + "loss_num": 0.00592041015625, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 380071884, + "step": 6783 + }, + { + "epoch": 15.10913140311804, + "grad_norm": 15.025222778320312, + "learning_rate": 1e-06, + "loss": 0.4441, + "num_input_tokens_seen": 380129560, + "step": 6784 + }, + { + "epoch": 15.10913140311804, + "loss": 0.4893530011177063, + "loss_ce": 9.517707803752273e-05, + "loss_iou": 0.203125, + "loss_num": 0.0166015625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 380129560, + "step": 6784 + }, + { + "epoch": 15.111358574610245, + "grad_norm": 29.484661102294922, + "learning_rate": 1e-06, + "loss": 0.4307, + "num_input_tokens_seen": 380185012, + "step": 6785 + }, + { + "epoch": 15.111358574610245, + "loss": 0.314787894487381, + "loss_ce": 9.060885349754244e-05, + "loss_iou": 0.140625, + "loss_num": 0.00677490234375, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 380185012, + "step": 6785 + }, + { + "epoch": 15.11358574610245, + "grad_norm": 14.522575378417969, + "learning_rate": 1e-06, + "loss": 0.5448, + "num_input_tokens_seen": 380241104, + "step": 6786 + }, + { + "epoch": 15.11358574610245, + "loss": 0.576589822769165, + "loss_ce": 0.00011273389827692881, + "loss_iou": 0.2490234375, + "loss_num": 0.015625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 380241104, + "step": 6786 + }, + { + "epoch": 15.115812917594655, + "grad_norm": 13.87673568725586, + "learning_rate": 1e-06, + "loss": 0.5374, + "num_input_tokens_seen": 380297004, + "step": 6787 + }, + { + "epoch": 15.115812917594655, + "loss": 0.30384352803230286, + "loss_ce": 0.0001325808116234839, + "loss_iou": 0.1376953125, + "loss_num": 0.005615234375, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 380297004, + "step": 6787 + }, + { + "epoch": 15.11804008908686, + "grad_norm": 23.66053581237793, + "learning_rate": 1e-06, + "loss": 0.5607, + "num_input_tokens_seen": 380352836, + "step": 6788 + }, + { + "epoch": 15.11804008908686, + "loss": 0.36867833137512207, + "loss_ce": 8.703065395820886e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00738525390625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 380352836, + "step": 6788 + }, + { + "epoch": 15.120267260579064, + "grad_norm": 17.455827713012695, + "learning_rate": 1e-06, + "loss": 0.3954, + "num_input_tokens_seen": 380406124, + "step": 6789 + }, + { + "epoch": 15.120267260579064, + "loss": 0.3701331913471222, + "loss_ce": 7.704219751758501e-05, + "loss_iou": 0.15234375, + "loss_num": 0.01312255859375, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 380406124, + "step": 6789 + }, + { + "epoch": 15.122494432071269, + "grad_norm": 33.90388870239258, + "learning_rate": 1e-06, + "loss": 0.5768, + "num_input_tokens_seen": 380459684, + "step": 6790 + }, + { + "epoch": 15.122494432071269, + "loss": 0.4020087718963623, + "loss_ce": 9.228185808751732e-05, + "loss_iou": 0.166015625, + "loss_num": 0.01416015625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 380459684, + "step": 6790 + }, + { + "epoch": 15.124721603563474, + "grad_norm": 16.799598693847656, + "learning_rate": 1e-06, + "loss": 0.4023, + "num_input_tokens_seen": 380516820, + "step": 6791 + }, + { + "epoch": 15.124721603563474, + "loss": 0.44541066884994507, + "loss_ce": 9.819894330576062e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.0113525390625, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 380516820, + "step": 6791 + }, + { + "epoch": 15.126948775055679, + "grad_norm": 15.393778800964355, + "learning_rate": 1e-06, + "loss": 0.6106, + "num_input_tokens_seen": 380575020, + "step": 6792 + }, + { + "epoch": 15.126948775055679, + "loss": 0.702007532119751, + "loss_ce": 0.00010328319331165403, + "loss_iou": 0.2890625, + "loss_num": 0.0250244140625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 380575020, + "step": 6792 + }, + { + "epoch": 15.129175946547884, + "grad_norm": 12.515711784362793, + "learning_rate": 1e-06, + "loss": 0.6247, + "num_input_tokens_seen": 380631440, + "step": 6793 + }, + { + "epoch": 15.129175946547884, + "loss": 0.8331481218338013, + "loss_ce": 0.00014029248268343508, + "loss_iou": 0.31640625, + "loss_num": 0.0400390625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 380631440, + "step": 6793 + }, + { + "epoch": 15.131403118040089, + "grad_norm": 14.25368595123291, + "learning_rate": 1e-06, + "loss": 0.2824, + "num_input_tokens_seen": 380690028, + "step": 6794 + }, + { + "epoch": 15.131403118040089, + "loss": 0.3566052317619324, + "loss_ce": 0.00015993951819837093, + "loss_iou": 0.1552734375, + "loss_num": 0.00927734375, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 380690028, + "step": 6794 + }, + { + "epoch": 15.133630289532293, + "grad_norm": 12.242840766906738, + "learning_rate": 1e-06, + "loss": 0.4051, + "num_input_tokens_seen": 380747684, + "step": 6795 + }, + { + "epoch": 15.133630289532293, + "loss": 0.33505433797836304, + "loss_ce": 9.341866825707257e-05, + "loss_iou": 0.158203125, + "loss_num": 0.00372314453125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 380747684, + "step": 6795 + }, + { + "epoch": 15.135857461024498, + "grad_norm": 15.275101661682129, + "learning_rate": 1e-06, + "loss": 0.3874, + "num_input_tokens_seen": 380803156, + "step": 6796 + }, + { + "epoch": 15.135857461024498, + "loss": 0.4023858308792114, + "loss_ce": 0.0001641570997890085, + "loss_iou": 0.17578125, + "loss_num": 0.0103759765625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 380803156, + "step": 6796 + }, + { + "epoch": 15.138084632516703, + "grad_norm": 16.61083221435547, + "learning_rate": 1e-06, + "loss": 0.5824, + "num_input_tokens_seen": 380860000, + "step": 6797 + }, + { + "epoch": 15.138084632516703, + "loss": 0.5763627290725708, + "loss_ce": 0.00012979336315765977, + "loss_iou": 0.251953125, + "loss_num": 0.014404296875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 380860000, + "step": 6797 + }, + { + "epoch": 15.140311804008908, + "grad_norm": 17.85312271118164, + "learning_rate": 1e-06, + "loss": 0.3495, + "num_input_tokens_seen": 380914548, + "step": 6798 + }, + { + "epoch": 15.140311804008908, + "loss": 0.2694869935512543, + "loss_ce": 7.780264422763139e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.00714111328125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 380914548, + "step": 6798 + }, + { + "epoch": 15.142538975501113, + "grad_norm": 26.615489959716797, + "learning_rate": 1e-06, + "loss": 0.4413, + "num_input_tokens_seen": 380972508, + "step": 6799 + }, + { + "epoch": 15.142538975501113, + "loss": 0.4014506936073303, + "loss_ce": 8.351253200089559e-05, + "loss_iou": 0.162109375, + "loss_num": 0.01531982421875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 380972508, + "step": 6799 + }, + { + "epoch": 15.144766146993318, + "grad_norm": 15.07624626159668, + "learning_rate": 1e-06, + "loss": 0.3529, + "num_input_tokens_seen": 381027844, + "step": 6800 + }, + { + "epoch": 15.144766146993318, + "loss": 0.41293323040008545, + "loss_ce": 9.14369520614855e-05, + "loss_iou": 0.1875, + "loss_num": 0.00775146484375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 381027844, + "step": 6800 + }, + { + "epoch": 15.146993318485523, + "grad_norm": 18.466833114624023, + "learning_rate": 1e-06, + "loss": 0.489, + "num_input_tokens_seen": 381084744, + "step": 6801 + }, + { + "epoch": 15.146993318485523, + "loss": 0.37544137239456177, + "loss_ce": 7.517053745687008e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.007598876953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 381084744, + "step": 6801 + }, + { + "epoch": 15.14922048997773, + "grad_norm": 18.854787826538086, + "learning_rate": 1e-06, + "loss": 0.3958, + "num_input_tokens_seen": 381139260, + "step": 6802 + }, + { + "epoch": 15.14922048997773, + "loss": 0.386452853679657, + "loss_ce": 0.00010031522833742201, + "loss_iou": 0.1689453125, + "loss_num": 0.009521484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 381139260, + "step": 6802 + }, + { + "epoch": 15.151447661469934, + "grad_norm": 28.44632339477539, + "learning_rate": 1e-06, + "loss": 0.3858, + "num_input_tokens_seen": 381197604, + "step": 6803 + }, + { + "epoch": 15.151447661469934, + "loss": 0.4399263262748718, + "loss_ce": 0.0002290659467689693, + "loss_iou": 0.1962890625, + "loss_num": 0.0096435546875, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 381197604, + "step": 6803 + }, + { + "epoch": 15.153674832962139, + "grad_norm": 36.481109619140625, + "learning_rate": 1e-06, + "loss": 0.4761, + "num_input_tokens_seen": 381253820, + "step": 6804 + }, + { + "epoch": 15.153674832962139, + "loss": 0.38608303666114807, + "loss_ce": 9.673433669377118e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00982666015625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 381253820, + "step": 6804 + }, + { + "epoch": 15.155902004454344, + "grad_norm": 98.84033203125, + "learning_rate": 1e-06, + "loss": 0.3979, + "num_input_tokens_seen": 381309320, + "step": 6805 + }, + { + "epoch": 15.155902004454344, + "loss": 0.35694801807403564, + "loss_ce": 0.00016701644926797599, + "loss_iou": 0.1552734375, + "loss_num": 0.00921630859375, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 381309320, + "step": 6805 + }, + { + "epoch": 15.158129175946549, + "grad_norm": 18.540637969970703, + "learning_rate": 1e-06, + "loss": 0.3822, + "num_input_tokens_seen": 381365092, + "step": 6806 + }, + { + "epoch": 15.158129175946549, + "loss": 0.40906059741973877, + "loss_ce": 0.00012505166523624212, + "loss_iou": 0.181640625, + "loss_num": 0.00909423828125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 381365092, + "step": 6806 + }, + { + "epoch": 15.160356347438753, + "grad_norm": 25.038162231445312, + "learning_rate": 1e-06, + "loss": 0.6632, + "num_input_tokens_seen": 381421476, + "step": 6807 + }, + { + "epoch": 15.160356347438753, + "loss": 0.6734679937362671, + "loss_ce": 0.00012814889487344772, + "loss_iou": 0.30078125, + "loss_num": 0.014404296875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 381421476, + "step": 6807 + }, + { + "epoch": 15.162583518930958, + "grad_norm": 19.82026481628418, + "learning_rate": 1e-06, + "loss": 0.538, + "num_input_tokens_seen": 381479976, + "step": 6808 + }, + { + "epoch": 15.162583518930958, + "loss": 0.40109795331954956, + "loss_ce": 9.697194036561996e-05, + "loss_iou": 0.1875, + "loss_num": 0.0052490234375, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 381479976, + "step": 6808 + }, + { + "epoch": 15.164810690423163, + "grad_norm": 23.628137588500977, + "learning_rate": 1e-06, + "loss": 0.448, + "num_input_tokens_seen": 381534776, + "step": 6809 + }, + { + "epoch": 15.164810690423163, + "loss": 0.5230967402458191, + "loss_ce": 0.0001475035387557, + "loss_iou": 0.216796875, + "loss_num": 0.01806640625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 381534776, + "step": 6809 + }, + { + "epoch": 15.167037861915368, + "grad_norm": 21.607646942138672, + "learning_rate": 1e-06, + "loss": 0.504, + "num_input_tokens_seen": 381592192, + "step": 6810 + }, + { + "epoch": 15.167037861915368, + "loss": 0.5715093612670898, + "loss_ce": 9.827417670749128e-05, + "loss_iou": 0.22265625, + "loss_num": 0.0255126953125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 381592192, + "step": 6810 + }, + { + "epoch": 15.169265033407573, + "grad_norm": 17.767423629760742, + "learning_rate": 1e-06, + "loss": 0.4062, + "num_input_tokens_seen": 381650596, + "step": 6811 + }, + { + "epoch": 15.169265033407573, + "loss": 0.3915339708328247, + "loss_ce": 0.00011550119234016165, + "loss_iou": 0.1787109375, + "loss_num": 0.006805419921875, + "loss_xval": 0.390625, + "num_input_tokens_seen": 381650596, + "step": 6811 + }, + { + "epoch": 15.171492204899778, + "grad_norm": 13.659879684448242, + "learning_rate": 1e-06, + "loss": 0.4005, + "num_input_tokens_seen": 381706044, + "step": 6812 + }, + { + "epoch": 15.171492204899778, + "loss": 0.37923288345336914, + "loss_ce": 8.25146198621951e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0128173828125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 381706044, + "step": 6812 + }, + { + "epoch": 15.173719376391983, + "grad_norm": 14.41109561920166, + "learning_rate": 1e-06, + "loss": 0.4001, + "num_input_tokens_seen": 381759412, + "step": 6813 + }, + { + "epoch": 15.173719376391983, + "loss": 0.38645416498184204, + "loss_ce": 0.00010162763646803796, + "loss_iou": 0.166015625, + "loss_num": 0.01080322265625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 381759412, + "step": 6813 + }, + { + "epoch": 15.175946547884188, + "grad_norm": 15.983702659606934, + "learning_rate": 1e-06, + "loss": 0.4464, + "num_input_tokens_seen": 381817464, + "step": 6814 + }, + { + "epoch": 15.175946547884188, + "loss": 0.45959311723709106, + "loss_ce": 0.00012045353651046753, + "loss_iou": 0.20703125, + "loss_num": 0.00921630859375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 381817464, + "step": 6814 + }, + { + "epoch": 15.178173719376392, + "grad_norm": 22.976959228515625, + "learning_rate": 1e-06, + "loss": 0.5359, + "num_input_tokens_seen": 381870696, + "step": 6815 + }, + { + "epoch": 15.178173719376392, + "loss": 0.3837730288505554, + "loss_ce": 0.0001670640049269423, + "loss_iou": 0.16015625, + "loss_num": 0.01275634765625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 381870696, + "step": 6815 + }, + { + "epoch": 15.180400890868597, + "grad_norm": 23.866098403930664, + "learning_rate": 1e-06, + "loss": 0.5447, + "num_input_tokens_seen": 381925464, + "step": 6816 + }, + { + "epoch": 15.180400890868597, + "loss": 0.3768770694732666, + "loss_ce": 0.0001070558573701419, + "loss_iou": 0.1767578125, + "loss_num": 0.0047607421875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 381925464, + "step": 6816 + }, + { + "epoch": 15.182628062360802, + "grad_norm": 21.5544490814209, + "learning_rate": 1e-06, + "loss": 0.5746, + "num_input_tokens_seen": 381979292, + "step": 6817 + }, + { + "epoch": 15.182628062360802, + "loss": 0.6533318758010864, + "loss_ce": 0.0001337053836323321, + "loss_iou": 0.283203125, + "loss_num": 0.017822265625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 381979292, + "step": 6817 + }, + { + "epoch": 15.184855233853007, + "grad_norm": 21.73903465270996, + "learning_rate": 1e-06, + "loss": 0.4286, + "num_input_tokens_seen": 382034532, + "step": 6818 + }, + { + "epoch": 15.184855233853007, + "loss": 0.4159244894981384, + "loss_ce": 0.00015300727682188153, + "loss_iou": 0.1806640625, + "loss_num": 0.0111083984375, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 382034532, + "step": 6818 + }, + { + "epoch": 15.187082405345212, + "grad_norm": 14.082677841186523, + "learning_rate": 1e-06, + "loss": 0.345, + "num_input_tokens_seen": 382089388, + "step": 6819 + }, + { + "epoch": 15.187082405345212, + "loss": 0.3454124331474304, + "loss_ce": 7.550232112407684e-05, + "loss_iou": 0.14453125, + "loss_num": 0.0111083984375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 382089388, + "step": 6819 + }, + { + "epoch": 15.189309576837417, + "grad_norm": 13.859386444091797, + "learning_rate": 1e-06, + "loss": 0.3842, + "num_input_tokens_seen": 382148896, + "step": 6820 + }, + { + "epoch": 15.189309576837417, + "loss": 0.33224329352378845, + "loss_ce": 8.99658989510499e-05, + "loss_iou": 0.1484375, + "loss_num": 0.0069580078125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 382148896, + "step": 6820 + }, + { + "epoch": 15.191536748329622, + "grad_norm": 21.40119171142578, + "learning_rate": 1e-06, + "loss": 0.5285, + "num_input_tokens_seen": 382204260, + "step": 6821 + }, + { + "epoch": 15.191536748329622, + "loss": 0.38690587878227234, + "loss_ce": 9.558402234688401e-05, + "loss_iou": 0.171875, + "loss_num": 0.00848388671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 382204260, + "step": 6821 + }, + { + "epoch": 15.193763919821826, + "grad_norm": 23.956090927124023, + "learning_rate": 1e-06, + "loss": 0.2846, + "num_input_tokens_seen": 382261104, + "step": 6822 + }, + { + "epoch": 15.193763919821826, + "loss": 0.33018404245376587, + "loss_ce": 0.0001059087153407745, + "loss_iou": 0.1533203125, + "loss_num": 0.00469970703125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 382261104, + "step": 6822 + }, + { + "epoch": 15.195991091314031, + "grad_norm": 29.63949203491211, + "learning_rate": 1e-06, + "loss": 0.4216, + "num_input_tokens_seen": 382316244, + "step": 6823 + }, + { + "epoch": 15.195991091314031, + "loss": 0.27267590165138245, + "loss_ce": 9.288682485930622e-05, + "loss_iou": 0.125, + "loss_num": 0.00439453125, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 382316244, + "step": 6823 + }, + { + "epoch": 15.198218262806236, + "grad_norm": 25.926671981811523, + "learning_rate": 1e-06, + "loss": 0.5995, + "num_input_tokens_seen": 382367780, + "step": 6824 + }, + { + "epoch": 15.198218262806236, + "loss": 0.7526465654373169, + "loss_ce": 0.0001441580825485289, + "loss_iou": 0.3203125, + "loss_num": 0.0225830078125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 382367780, + "step": 6824 + }, + { + "epoch": 15.200445434298441, + "grad_norm": 32.10568618774414, + "learning_rate": 1e-06, + "loss": 0.5171, + "num_input_tokens_seen": 382423572, + "step": 6825 + }, + { + "epoch": 15.200445434298441, + "loss": 0.5426996946334839, + "loss_ce": 9.716699423734099e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.0128173828125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 382423572, + "step": 6825 + }, + { + "epoch": 15.202672605790646, + "grad_norm": 30.0792293548584, + "learning_rate": 1e-06, + "loss": 0.4723, + "num_input_tokens_seen": 382476324, + "step": 6826 + }, + { + "epoch": 15.202672605790646, + "loss": 0.47158297896385193, + "loss_ce": 0.0001474320306442678, + "loss_iou": 0.2060546875, + "loss_num": 0.01165771484375, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 382476324, + "step": 6826 + }, + { + "epoch": 15.20489977728285, + "grad_norm": 25.993913650512695, + "learning_rate": 1e-06, + "loss": 0.3837, + "num_input_tokens_seen": 382531860, + "step": 6827 + }, + { + "epoch": 15.20489977728285, + "loss": 0.37444430589675903, + "loss_ce": 0.00011569406342459843, + "loss_iou": 0.15625, + "loss_num": 0.012451171875, + "loss_xval": 0.375, + "num_input_tokens_seen": 382531860, + "step": 6827 + }, + { + "epoch": 15.207126948775056, + "grad_norm": 20.49637794494629, + "learning_rate": 1e-06, + "loss": 0.5971, + "num_input_tokens_seen": 382587412, + "step": 6828 + }, + { + "epoch": 15.207126948775056, + "loss": 0.459941565990448, + "loss_ce": 0.00010270022903569043, + "loss_iou": 0.2041015625, + "loss_num": 0.010498046875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 382587412, + "step": 6828 + }, + { + "epoch": 15.20935412026726, + "grad_norm": 16.51508331298828, + "learning_rate": 1e-06, + "loss": 0.4748, + "num_input_tokens_seen": 382642572, + "step": 6829 + }, + { + "epoch": 15.20935412026726, + "loss": 0.5448847413063049, + "loss_ce": 8.491726475767791e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.01556396484375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 382642572, + "step": 6829 + }, + { + "epoch": 15.211581291759465, + "grad_norm": 17.06146240234375, + "learning_rate": 1e-06, + "loss": 0.4023, + "num_input_tokens_seen": 382699556, + "step": 6830 + }, + { + "epoch": 15.211581291759465, + "loss": 0.424904465675354, + "loss_ce": 0.00022184928820934147, + "loss_iou": 0.1904296875, + "loss_num": 0.0086669921875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 382699556, + "step": 6830 + }, + { + "epoch": 15.21380846325167, + "grad_norm": 18.930295944213867, + "learning_rate": 1e-06, + "loss": 0.4568, + "num_input_tokens_seen": 382757356, + "step": 6831 + }, + { + "epoch": 15.21380846325167, + "loss": 0.5095734596252441, + "loss_ce": 0.0004182119155302644, + "loss_iou": 0.201171875, + "loss_num": 0.0211181640625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 382757356, + "step": 6831 + }, + { + "epoch": 15.216035634743875, + "grad_norm": 21.91002655029297, + "learning_rate": 1e-06, + "loss": 0.375, + "num_input_tokens_seen": 382811564, + "step": 6832 + }, + { + "epoch": 15.216035634743875, + "loss": 0.40269356966018677, + "loss_ce": 0.00010566625132923946, + "loss_iou": 0.1826171875, + "loss_num": 0.0076904296875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 382811564, + "step": 6832 + }, + { + "epoch": 15.21826280623608, + "grad_norm": 26.341665267944336, + "learning_rate": 1e-06, + "loss": 0.4079, + "num_input_tokens_seen": 382865496, + "step": 6833 + }, + { + "epoch": 15.21826280623608, + "loss": 0.4928325414657593, + "loss_ce": 9.574595605954528e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.02001953125, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 382865496, + "step": 6833 + }, + { + "epoch": 15.220489977728285, + "grad_norm": 21.50320816040039, + "learning_rate": 1e-06, + "loss": 0.355, + "num_input_tokens_seen": 382922332, + "step": 6834 + }, + { + "epoch": 15.220489977728285, + "loss": 0.38630521297454834, + "loss_ce": 0.0007308792555704713, + "loss_iou": 0.162109375, + "loss_num": 0.01226806640625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 382922332, + "step": 6834 + }, + { + "epoch": 15.22271714922049, + "grad_norm": 22.518569946289062, + "learning_rate": 1e-06, + "loss": 0.4254, + "num_input_tokens_seen": 382980300, + "step": 6835 + }, + { + "epoch": 15.22271714922049, + "loss": 0.3738847076892853, + "loss_ce": 0.00010540773655520752, + "loss_iou": 0.16796875, + "loss_num": 0.00750732421875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 382980300, + "step": 6835 + }, + { + "epoch": 15.224944320712694, + "grad_norm": 20.747737884521484, + "learning_rate": 1e-06, + "loss": 0.4773, + "num_input_tokens_seen": 383038560, + "step": 6836 + }, + { + "epoch": 15.224944320712694, + "loss": 0.6503742337226868, + "loss_ce": 0.000105653190985322, + "loss_iou": 0.267578125, + "loss_num": 0.0228271484375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 383038560, + "step": 6836 + }, + { + "epoch": 15.2271714922049, + "grad_norm": 31.612144470214844, + "learning_rate": 1e-06, + "loss": 0.3559, + "num_input_tokens_seen": 383093332, + "step": 6837 + }, + { + "epoch": 15.2271714922049, + "loss": 0.3483448326587677, + "loss_ce": 7.823983469279483e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.010009765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 383093332, + "step": 6837 + }, + { + "epoch": 15.229398663697104, + "grad_norm": 27.15004539489746, + "learning_rate": 1e-06, + "loss": 0.3923, + "num_input_tokens_seen": 383150088, + "step": 6838 + }, + { + "epoch": 15.229398663697104, + "loss": 0.3999464511871338, + "loss_ce": 0.00028826179914176464, + "loss_iou": 0.1787109375, + "loss_num": 0.00836181640625, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 383150088, + "step": 6838 + }, + { + "epoch": 15.231625835189309, + "grad_norm": 20.663650512695312, + "learning_rate": 1e-06, + "loss": 0.5478, + "num_input_tokens_seen": 383208636, + "step": 6839 + }, + { + "epoch": 15.231625835189309, + "loss": 0.5868840217590332, + "loss_ce": 9.200301428791136e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.021728515625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 383208636, + "step": 6839 + }, + { + "epoch": 15.233853006681514, + "grad_norm": 15.867166519165039, + "learning_rate": 1e-06, + "loss": 0.4572, + "num_input_tokens_seen": 383262456, + "step": 6840 + }, + { + "epoch": 15.233853006681514, + "loss": 0.5546283721923828, + "loss_ce": 0.0018329141894355416, + "loss_iou": 0.244140625, + "loss_num": 0.01251220703125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 383262456, + "step": 6840 + }, + { + "epoch": 15.236080178173719, + "grad_norm": 22.131988525390625, + "learning_rate": 1e-06, + "loss": 0.3841, + "num_input_tokens_seen": 383319500, + "step": 6841 + }, + { + "epoch": 15.236080178173719, + "loss": 0.3393481373786926, + "loss_ce": 0.00011472964251879603, + "loss_iou": 0.1396484375, + "loss_num": 0.0120849609375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 383319500, + "step": 6841 + }, + { + "epoch": 15.238307349665924, + "grad_norm": 17.503231048583984, + "learning_rate": 1e-06, + "loss": 0.5277, + "num_input_tokens_seen": 383373476, + "step": 6842 + }, + { + "epoch": 15.238307349665924, + "loss": 0.6524415612220764, + "loss_ce": 9.780684194993228e-05, + "loss_iou": 0.267578125, + "loss_num": 0.023681640625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 383373476, + "step": 6842 + }, + { + "epoch": 15.240534521158128, + "grad_norm": 27.421920776367188, + "learning_rate": 1e-06, + "loss": 0.3908, + "num_input_tokens_seen": 383430396, + "step": 6843 + }, + { + "epoch": 15.240534521158128, + "loss": 0.2873254120349884, + "loss_ce": 9.397394023835659e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.0064697265625, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 383430396, + "step": 6843 + }, + { + "epoch": 15.242761692650333, + "grad_norm": 18.538227081298828, + "learning_rate": 1e-06, + "loss": 0.8737, + "num_input_tokens_seen": 383486732, + "step": 6844 + }, + { + "epoch": 15.242761692650333, + "loss": 0.9400348663330078, + "loss_ce": 9.340968244941905e-05, + "loss_iou": 0.3984375, + "loss_num": 0.028564453125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 383486732, + "step": 6844 + }, + { + "epoch": 15.244988864142538, + "grad_norm": 18.570964813232422, + "learning_rate": 1e-06, + "loss": 0.4564, + "num_input_tokens_seen": 383544740, + "step": 6845 + }, + { + "epoch": 15.244988864142538, + "loss": 0.507795512676239, + "loss_ce": 0.00010509089042898268, + "loss_iou": 0.1962890625, + "loss_num": 0.0230712890625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 383544740, + "step": 6845 + }, + { + "epoch": 15.247216035634743, + "grad_norm": 25.994709014892578, + "learning_rate": 1e-06, + "loss": 0.3808, + "num_input_tokens_seen": 383601352, + "step": 6846 + }, + { + "epoch": 15.247216035634743, + "loss": 0.4001128673553467, + "loss_ce": 8.847533172229305e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.0076904296875, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 383601352, + "step": 6846 + }, + { + "epoch": 15.249443207126948, + "grad_norm": 30.125736236572266, + "learning_rate": 1e-06, + "loss": 0.3879, + "num_input_tokens_seen": 383656492, + "step": 6847 + }, + { + "epoch": 15.249443207126948, + "loss": 0.33361074328422546, + "loss_ce": 0.00011466215801192448, + "loss_iou": 0.1494140625, + "loss_num": 0.00677490234375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 383656492, + "step": 6847 + }, + { + "epoch": 15.251670378619155, + "grad_norm": 16.290237426757812, + "learning_rate": 1e-06, + "loss": 0.5262, + "num_input_tokens_seen": 383713968, + "step": 6848 + }, + { + "epoch": 15.251670378619155, + "loss": 0.4467582702636719, + "loss_ce": 0.00010298557754140347, + "loss_iou": 0.1904296875, + "loss_num": 0.01300048828125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 383713968, + "step": 6848 + }, + { + "epoch": 15.25389755011136, + "grad_norm": 32.09817886352539, + "learning_rate": 1e-06, + "loss": 0.5307, + "num_input_tokens_seen": 383769468, + "step": 6849 + }, + { + "epoch": 15.25389755011136, + "loss": 0.5073274970054626, + "loss_ce": 0.0001253713999176398, + "loss_iou": 0.2021484375, + "loss_num": 0.0203857421875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 383769468, + "step": 6849 + }, + { + "epoch": 15.256124721603564, + "grad_norm": 19.521888732910156, + "learning_rate": 1e-06, + "loss": 0.4386, + "num_input_tokens_seen": 383826068, + "step": 6850 + }, + { + "epoch": 15.256124721603564, + "loss": 0.3947462737560272, + "loss_ce": 9.295322524849325e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.0191650390625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 383826068, + "step": 6850 + }, + { + "epoch": 15.25835189309577, + "grad_norm": 18.782377243041992, + "learning_rate": 1e-06, + "loss": 0.2575, + "num_input_tokens_seen": 383883012, + "step": 6851 + }, + { + "epoch": 15.25835189309577, + "loss": 0.2647382318973541, + "loss_ce": 8.9788663899526e-05, + "loss_iou": 0.109375, + "loss_num": 0.009033203125, + "loss_xval": 0.265625, + "num_input_tokens_seen": 383883012, + "step": 6851 + }, + { + "epoch": 15.260579064587974, + "grad_norm": 21.034971237182617, + "learning_rate": 1e-06, + "loss": 0.4045, + "num_input_tokens_seen": 383936956, + "step": 6852 + }, + { + "epoch": 15.260579064587974, + "loss": 0.4515724182128906, + "loss_ce": 9.537780715618283e-05, + "loss_iou": 0.185546875, + "loss_num": 0.015869140625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 383936956, + "step": 6852 + }, + { + "epoch": 15.262806236080179, + "grad_norm": 32.299102783203125, + "learning_rate": 1e-06, + "loss": 0.3529, + "num_input_tokens_seen": 383993404, + "step": 6853 + }, + { + "epoch": 15.262806236080179, + "loss": 0.31591275334358215, + "loss_ce": 0.00011685363278957084, + "loss_iou": 0.142578125, + "loss_num": 0.00628662109375, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 383993404, + "step": 6853 + }, + { + "epoch": 15.265033407572384, + "grad_norm": 17.330768585205078, + "learning_rate": 1e-06, + "loss": 0.485, + "num_input_tokens_seen": 384048856, + "step": 6854 + }, + { + "epoch": 15.265033407572384, + "loss": 0.5928666591644287, + "loss_ce": 0.00021531574020627886, + "loss_iou": 0.2255859375, + "loss_num": 0.0281982421875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 384048856, + "step": 6854 + }, + { + "epoch": 15.267260579064589, + "grad_norm": 19.941410064697266, + "learning_rate": 1e-06, + "loss": 0.4413, + "num_input_tokens_seen": 384104048, + "step": 6855 + }, + { + "epoch": 15.267260579064589, + "loss": 0.40443968772888184, + "loss_ce": 8.178211282938719e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.0111083984375, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 384104048, + "step": 6855 + }, + { + "epoch": 15.269487750556793, + "grad_norm": 19.013338088989258, + "learning_rate": 1e-06, + "loss": 0.52, + "num_input_tokens_seen": 384155284, + "step": 6856 + }, + { + "epoch": 15.269487750556793, + "loss": 0.3980572819709778, + "loss_ce": 0.00010806175851030275, + "loss_iou": 0.169921875, + "loss_num": 0.01177978515625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 384155284, + "step": 6856 + }, + { + "epoch": 15.271714922048998, + "grad_norm": 17.63640594482422, + "learning_rate": 1e-06, + "loss": 0.4304, + "num_input_tokens_seen": 384212084, + "step": 6857 + }, + { + "epoch": 15.271714922048998, + "loss": 0.46271538734436035, + "loss_ce": 0.00025199330411851406, + "loss_iou": 0.1845703125, + "loss_num": 0.0184326171875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 384212084, + "step": 6857 + }, + { + "epoch": 15.273942093541203, + "grad_norm": 17.39361572265625, + "learning_rate": 1e-06, + "loss": 0.5069, + "num_input_tokens_seen": 384266828, + "step": 6858 + }, + { + "epoch": 15.273942093541203, + "loss": 0.4388231039047241, + "loss_ce": 0.0001023877048282884, + "loss_iou": 0.18359375, + "loss_num": 0.01422119140625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 384266828, + "step": 6858 + }, + { + "epoch": 15.276169265033408, + "grad_norm": 20.657392501831055, + "learning_rate": 1e-06, + "loss": 0.3784, + "num_input_tokens_seen": 384320760, + "step": 6859 + }, + { + "epoch": 15.276169265033408, + "loss": 0.3058285713195801, + "loss_ce": 0.00010348795331083238, + "loss_iou": 0.140625, + "loss_num": 0.004791259765625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 384320760, + "step": 6859 + }, + { + "epoch": 15.278396436525613, + "grad_norm": 17.122583389282227, + "learning_rate": 1e-06, + "loss": 0.3612, + "num_input_tokens_seen": 384377020, + "step": 6860 + }, + { + "epoch": 15.278396436525613, + "loss": 0.33143696188926697, + "loss_ce": 0.0001076041953638196, + "loss_iou": 0.138671875, + "loss_num": 0.0107421875, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 384377020, + "step": 6860 + }, + { + "epoch": 15.280623608017818, + "grad_norm": 24.28658676147461, + "learning_rate": 1e-06, + "loss": 0.4031, + "num_input_tokens_seen": 384431080, + "step": 6861 + }, + { + "epoch": 15.280623608017818, + "loss": 0.518648624420166, + "loss_ce": 9.391328057972714e-05, + "loss_iou": 0.21875, + "loss_num": 0.0162353515625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 384431080, + "step": 6861 + }, + { + "epoch": 15.282850779510023, + "grad_norm": 11.215580940246582, + "learning_rate": 1e-06, + "loss": 0.304, + "num_input_tokens_seen": 384487584, + "step": 6862 + }, + { + "epoch": 15.282850779510023, + "loss": 0.31601682305336, + "loss_ce": 9.885276085697114e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 384487584, + "step": 6862 + }, + { + "epoch": 15.285077951002227, + "grad_norm": 22.595476150512695, + "learning_rate": 1e-06, + "loss": 0.3841, + "num_input_tokens_seen": 384544196, + "step": 6863 + }, + { + "epoch": 15.285077951002227, + "loss": 0.49600616097450256, + "loss_ce": 0.00015653966693207622, + "loss_iou": 0.2021484375, + "loss_num": 0.018310546875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 384544196, + "step": 6863 + }, + { + "epoch": 15.287305122494432, + "grad_norm": 65.94528198242188, + "learning_rate": 1e-06, + "loss": 0.4559, + "num_input_tokens_seen": 384598184, + "step": 6864 + }, + { + "epoch": 15.287305122494432, + "loss": 0.5419918894767761, + "loss_ce": 0.00012176702875876799, + "loss_iou": 0.248046875, + "loss_num": 0.009033203125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 384598184, + "step": 6864 + }, + { + "epoch": 15.289532293986637, + "grad_norm": 18.129741668701172, + "learning_rate": 1e-06, + "loss": 0.4641, + "num_input_tokens_seen": 384655212, + "step": 6865 + }, + { + "epoch": 15.289532293986637, + "loss": 0.5154638290405273, + "loss_ce": 8.296032319776714e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0177001953125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 384655212, + "step": 6865 + }, + { + "epoch": 15.291759465478842, + "grad_norm": 33.346805572509766, + "learning_rate": 1e-06, + "loss": 0.4791, + "num_input_tokens_seen": 384711224, + "step": 6866 + }, + { + "epoch": 15.291759465478842, + "loss": 0.5016932487487793, + "loss_ce": 0.00010639546962920576, + "loss_iou": 0.2294921875, + "loss_num": 0.0086669921875, + "loss_xval": 0.5, + "num_input_tokens_seen": 384711224, + "step": 6866 + }, + { + "epoch": 15.293986636971047, + "grad_norm": 17.753189086914062, + "learning_rate": 1e-06, + "loss": 0.507, + "num_input_tokens_seen": 384770584, + "step": 6867 + }, + { + "epoch": 15.293986636971047, + "loss": 0.47503072023391724, + "loss_ce": 0.00011617955169640481, + "loss_iou": 0.1953125, + "loss_num": 0.0169677734375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 384770584, + "step": 6867 + }, + { + "epoch": 15.296213808463252, + "grad_norm": 21.835477828979492, + "learning_rate": 1e-06, + "loss": 0.3904, + "num_input_tokens_seen": 384827512, + "step": 6868 + }, + { + "epoch": 15.296213808463252, + "loss": 0.3441203534603119, + "loss_ce": 8.04224400781095e-05, + "loss_iou": 0.142578125, + "loss_num": 0.0118408203125, + "loss_xval": 0.34375, + "num_input_tokens_seen": 384827512, + "step": 6868 + }, + { + "epoch": 15.298440979955457, + "grad_norm": 11.53410816192627, + "learning_rate": 1e-06, + "loss": 0.3595, + "num_input_tokens_seen": 384884920, + "step": 6869 + }, + { + "epoch": 15.298440979955457, + "loss": 0.42830324172973633, + "loss_ce": 8.056573278736323e-05, + "loss_iou": 0.189453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 384884920, + "step": 6869 + }, + { + "epoch": 15.300668151447661, + "grad_norm": 24.92241668701172, + "learning_rate": 1e-06, + "loss": 0.4735, + "num_input_tokens_seen": 384940092, + "step": 6870 + }, + { + "epoch": 15.300668151447661, + "loss": 0.30489498376846313, + "loss_ce": 8.541756687918678e-05, + "loss_iou": 0.1328125, + "loss_num": 0.0078125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 384940092, + "step": 6870 + }, + { + "epoch": 15.302895322939866, + "grad_norm": 24.968305587768555, + "learning_rate": 1e-06, + "loss": 0.48, + "num_input_tokens_seen": 384996776, + "step": 6871 + }, + { + "epoch": 15.302895322939866, + "loss": 0.4336788058280945, + "loss_ce": 8.504305151291192e-05, + "loss_iou": 0.181640625, + "loss_num": 0.01385498046875, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 384996776, + "step": 6871 + }, + { + "epoch": 15.305122494432071, + "grad_norm": 15.246042251586914, + "learning_rate": 1e-06, + "loss": 0.4207, + "num_input_tokens_seen": 385056728, + "step": 6872 + }, + { + "epoch": 15.305122494432071, + "loss": 0.35528939962387085, + "loss_ce": 9.531193063594401e-05, + "loss_iou": 0.150390625, + "loss_num": 0.01080322265625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 385056728, + "step": 6872 + }, + { + "epoch": 15.307349665924276, + "grad_norm": 18.476036071777344, + "learning_rate": 1e-06, + "loss": 0.3138, + "num_input_tokens_seen": 385113780, + "step": 6873 + }, + { + "epoch": 15.307349665924276, + "loss": 0.3595203459262848, + "loss_ce": 0.00011481612455099821, + "loss_iou": 0.1640625, + "loss_num": 0.006439208984375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 385113780, + "step": 6873 + }, + { + "epoch": 15.309576837416481, + "grad_norm": 17.357139587402344, + "learning_rate": 1e-06, + "loss": 0.3486, + "num_input_tokens_seen": 385169428, + "step": 6874 + }, + { + "epoch": 15.309576837416481, + "loss": 0.45047512650489807, + "loss_ce": 9.672968735685572e-05, + "loss_iou": 0.205078125, + "loss_num": 0.0079345703125, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 385169428, + "step": 6874 + }, + { + "epoch": 15.311804008908686, + "grad_norm": 16.00808334350586, + "learning_rate": 1e-06, + "loss": 0.4884, + "num_input_tokens_seen": 385226272, + "step": 6875 + }, + { + "epoch": 15.311804008908686, + "loss": 0.48656389117240906, + "loss_ce": 0.00011366438411641866, + "loss_iou": 0.2109375, + "loss_num": 0.01300048828125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 385226272, + "step": 6875 + }, + { + "epoch": 15.31403118040089, + "grad_norm": 22.285694122314453, + "learning_rate": 1e-06, + "loss": 0.3616, + "num_input_tokens_seen": 385281308, + "step": 6876 + }, + { + "epoch": 15.31403118040089, + "loss": 0.30913233757019043, + "loss_ce": 8.081403211690485e-05, + "loss_iou": 0.11767578125, + "loss_num": 0.01470947265625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 385281308, + "step": 6876 + }, + { + "epoch": 15.316258351893095, + "grad_norm": 406.5437927246094, + "learning_rate": 1e-06, + "loss": 0.4264, + "num_input_tokens_seen": 385338752, + "step": 6877 + }, + { + "epoch": 15.316258351893095, + "loss": 0.3918250799179077, + "loss_ce": 0.00010145184933207929, + "loss_iou": 0.17578125, + "loss_num": 0.007781982421875, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 385338752, + "step": 6877 + }, + { + "epoch": 15.3184855233853, + "grad_norm": 17.710416793823242, + "learning_rate": 1e-06, + "loss": 0.3374, + "num_input_tokens_seen": 385396576, + "step": 6878 + }, + { + "epoch": 15.3184855233853, + "loss": 0.35166865587234497, + "loss_ce": 0.00010614388156682253, + "loss_iou": 0.1552734375, + "loss_num": 0.00836181640625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 385396576, + "step": 6878 + }, + { + "epoch": 15.320712694877505, + "grad_norm": 16.165138244628906, + "learning_rate": 1e-06, + "loss": 0.4187, + "num_input_tokens_seen": 385454284, + "step": 6879 + }, + { + "epoch": 15.320712694877505, + "loss": 0.4211636483669281, + "loss_ce": 0.00014315356384031475, + "loss_iou": 0.185546875, + "loss_num": 0.0098876953125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 385454284, + "step": 6879 + }, + { + "epoch": 15.32293986636971, + "grad_norm": 19.196983337402344, + "learning_rate": 1e-06, + "loss": 0.4114, + "num_input_tokens_seen": 385511284, + "step": 6880 + }, + { + "epoch": 15.32293986636971, + "loss": 0.42685467004776, + "loss_ce": 9.687192505225539e-05, + "loss_iou": 0.181640625, + "loss_num": 0.01263427734375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 385511284, + "step": 6880 + }, + { + "epoch": 15.325167037861915, + "grad_norm": 25.625106811523438, + "learning_rate": 1e-06, + "loss": 0.3899, + "num_input_tokens_seen": 385565236, + "step": 6881 + }, + { + "epoch": 15.325167037861915, + "loss": 0.3682980537414551, + "loss_ce": 0.00013401404430624098, + "loss_iou": 0.1640625, + "loss_num": 0.0078125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 385565236, + "step": 6881 + }, + { + "epoch": 15.32739420935412, + "grad_norm": 21.3586368560791, + "learning_rate": 1e-06, + "loss": 0.4805, + "num_input_tokens_seen": 385619852, + "step": 6882 + }, + { + "epoch": 15.32739420935412, + "loss": 0.5712690353393555, + "loss_ce": 0.00010209472384303808, + "loss_iou": 0.26953125, + "loss_num": 0.006378173828125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 385619852, + "step": 6882 + }, + { + "epoch": 15.329621380846325, + "grad_norm": 20.074663162231445, + "learning_rate": 1e-06, + "loss": 0.4422, + "num_input_tokens_seen": 385675444, + "step": 6883 + }, + { + "epoch": 15.329621380846325, + "loss": 0.44748926162719727, + "loss_ce": 0.00010156280768569559, + "loss_iou": 0.1943359375, + "loss_num": 0.0115966796875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 385675444, + "step": 6883 + }, + { + "epoch": 15.33184855233853, + "grad_norm": 19.188108444213867, + "learning_rate": 1e-06, + "loss": 0.5017, + "num_input_tokens_seen": 385731136, + "step": 6884 + }, + { + "epoch": 15.33184855233853, + "loss": 0.269458532333374, + "loss_ce": 0.0001714447425911203, + "loss_iou": 0.123046875, + "loss_num": 0.004638671875, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 385731136, + "step": 6884 + }, + { + "epoch": 15.334075723830734, + "grad_norm": 16.712444305419922, + "learning_rate": 1e-06, + "loss": 0.4189, + "num_input_tokens_seen": 385788012, + "step": 6885 + }, + { + "epoch": 15.334075723830734, + "loss": 0.4538283050060272, + "loss_ce": 9.298422082792968e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.00982666015625, + "loss_xval": 0.453125, + "num_input_tokens_seen": 385788012, + "step": 6885 + }, + { + "epoch": 15.33630289532294, + "grad_norm": 20.65199089050293, + "learning_rate": 1e-06, + "loss": 0.5626, + "num_input_tokens_seen": 385846652, + "step": 6886 + }, + { + "epoch": 15.33630289532294, + "loss": 0.44176751375198364, + "loss_ce": 0.00011710204125847667, + "loss_iou": 0.2001953125, + "loss_num": 0.00823974609375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 385846652, + "step": 6886 + }, + { + "epoch": 15.338530066815144, + "grad_norm": 36.08283996582031, + "learning_rate": 1e-06, + "loss": 0.4439, + "num_input_tokens_seen": 385903884, + "step": 6887 + }, + { + "epoch": 15.338530066815144, + "loss": 0.3505267798900604, + "loss_ce": 0.00018498269491828978, + "loss_iou": 0.1513671875, + "loss_num": 0.0093994140625, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 385903884, + "step": 6887 + }, + { + "epoch": 15.340757238307349, + "grad_norm": 27.132768630981445, + "learning_rate": 1e-06, + "loss": 0.4734, + "num_input_tokens_seen": 385958580, + "step": 6888 + }, + { + "epoch": 15.340757238307349, + "loss": 0.42540186643600464, + "loss_ce": 0.00010891577403526753, + "loss_iou": 0.19140625, + "loss_num": 0.00848388671875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 385958580, + "step": 6888 + }, + { + "epoch": 15.342984409799554, + "grad_norm": 18.235618591308594, + "learning_rate": 1e-06, + "loss": 0.4809, + "num_input_tokens_seen": 386016000, + "step": 6889 + }, + { + "epoch": 15.342984409799554, + "loss": 0.4169900715351105, + "loss_ce": 0.00011995389650110155, + "loss_iou": 0.1982421875, + "loss_num": 0.004058837890625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 386016000, + "step": 6889 + }, + { + "epoch": 15.345211581291759, + "grad_norm": 19.752094268798828, + "learning_rate": 1e-06, + "loss": 0.6642, + "num_input_tokens_seen": 386071424, + "step": 6890 + }, + { + "epoch": 15.345211581291759, + "loss": 0.9156758189201355, + "loss_ce": 0.00014846479461994022, + "loss_iou": 0.380859375, + "loss_num": 0.03076171875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 386071424, + "step": 6890 + }, + { + "epoch": 15.347438752783964, + "grad_norm": 20.173500061035156, + "learning_rate": 1e-06, + "loss": 0.5695, + "num_input_tokens_seen": 386121956, + "step": 6891 + }, + { + "epoch": 15.347438752783964, + "loss": 0.2851613163948059, + "loss_ce": 9.661800868343562e-05, + "loss_iou": 0.126953125, + "loss_num": 0.006134033203125, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 386121956, + "step": 6891 + }, + { + "epoch": 15.34966592427617, + "grad_norm": 42.235107421875, + "learning_rate": 1e-06, + "loss": 0.5524, + "num_input_tokens_seen": 386174436, + "step": 6892 + }, + { + "epoch": 15.34966592427617, + "loss": 0.5490976572036743, + "loss_ce": 0.00014746160013601184, + "loss_iou": 0.236328125, + "loss_num": 0.01519775390625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 386174436, + "step": 6892 + }, + { + "epoch": 15.351893095768375, + "grad_norm": 62.30870056152344, + "learning_rate": 1e-06, + "loss": 0.4912, + "num_input_tokens_seen": 386229148, + "step": 6893 + }, + { + "epoch": 15.351893095768375, + "loss": 0.4774854779243469, + "loss_ce": 0.00012952039833180606, + "loss_iou": 0.203125, + "loss_num": 0.014404296875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 386229148, + "step": 6893 + }, + { + "epoch": 15.35412026726058, + "grad_norm": 16.55988883972168, + "learning_rate": 1e-06, + "loss": 0.3568, + "num_input_tokens_seen": 386284964, + "step": 6894 + }, + { + "epoch": 15.35412026726058, + "loss": 0.4656769633293152, + "loss_ce": 9.315234638052061e-05, + "loss_iou": 0.185546875, + "loss_num": 0.018798828125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 386284964, + "step": 6894 + }, + { + "epoch": 15.356347438752785, + "grad_norm": 13.955078125, + "learning_rate": 1e-06, + "loss": 0.3558, + "num_input_tokens_seen": 386342464, + "step": 6895 + }, + { + "epoch": 15.356347438752785, + "loss": 0.3424103856086731, + "loss_ce": 0.00012523065379355103, + "loss_iou": 0.154296875, + "loss_num": 0.00665283203125, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 386342464, + "step": 6895 + }, + { + "epoch": 15.35857461024499, + "grad_norm": 17.421804428100586, + "learning_rate": 1e-06, + "loss": 0.707, + "num_input_tokens_seen": 386396108, + "step": 6896 + }, + { + "epoch": 15.35857461024499, + "loss": 0.33755582571029663, + "loss_ce": 9.246024274034426e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00732421875, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 386396108, + "step": 6896 + }, + { + "epoch": 15.360801781737194, + "grad_norm": 20.638612747192383, + "learning_rate": 1e-06, + "loss": 0.3819, + "num_input_tokens_seen": 386452760, + "step": 6897 + }, + { + "epoch": 15.360801781737194, + "loss": 0.267736554145813, + "loss_ce": 9.742352995090187e-05, + "loss_iou": 0.11962890625, + "loss_num": 0.00567626953125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 386452760, + "step": 6897 + }, + { + "epoch": 15.3630289532294, + "grad_norm": 17.11157989501953, + "learning_rate": 1e-06, + "loss": 0.4, + "num_input_tokens_seen": 386511592, + "step": 6898 + }, + { + "epoch": 15.3630289532294, + "loss": 0.3749087154865265, + "loss_ce": 9.182207577396184e-05, + "loss_iou": 0.154296875, + "loss_num": 0.0130615234375, + "loss_xval": 0.375, + "num_input_tokens_seen": 386511592, + "step": 6898 + }, + { + "epoch": 15.365256124721604, + "grad_norm": 12.453452110290527, + "learning_rate": 1e-06, + "loss": 0.3186, + "num_input_tokens_seen": 386568560, + "step": 6899 + }, + { + "epoch": 15.365256124721604, + "loss": 0.2840937376022339, + "loss_ce": 9.713226609164849e-05, + "loss_iou": 0.130859375, + "loss_num": 0.00445556640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 386568560, + "step": 6899 + }, + { + "epoch": 15.367483296213809, + "grad_norm": 19.97893524169922, + "learning_rate": 1e-06, + "loss": 0.3577, + "num_input_tokens_seen": 386623992, + "step": 6900 + }, + { + "epoch": 15.367483296213809, + "loss": 0.3777737617492676, + "loss_ce": 8.821256778901443e-05, + "loss_iou": 0.16796875, + "loss_num": 0.00799560546875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 386623992, + "step": 6900 + }, + { + "epoch": 15.369710467706014, + "grad_norm": 22.45269775390625, + "learning_rate": 1e-06, + "loss": 0.3661, + "num_input_tokens_seen": 386681452, + "step": 6901 + }, + { + "epoch": 15.369710467706014, + "loss": 0.2848760187625885, + "loss_ce": 8.596775296609849e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.00726318359375, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 386681452, + "step": 6901 + }, + { + "epoch": 15.371937639198219, + "grad_norm": 14.801541328430176, + "learning_rate": 1e-06, + "loss": 0.4713, + "num_input_tokens_seen": 386737060, + "step": 6902 + }, + { + "epoch": 15.371937639198219, + "loss": 0.5325612425804138, + "loss_ce": 9.053810936165974e-05, + "loss_iou": 0.232421875, + "loss_num": 0.0133056640625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 386737060, + "step": 6902 + }, + { + "epoch": 15.374164810690424, + "grad_norm": 41.93655776977539, + "learning_rate": 1e-06, + "loss": 0.3774, + "num_input_tokens_seen": 386792788, + "step": 6903 + }, + { + "epoch": 15.374164810690424, + "loss": 0.4628770351409912, + "loss_ce": 0.00010845393990166485, + "loss_iou": 0.2177734375, + "loss_num": 0.00543212890625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 386792788, + "step": 6903 + }, + { + "epoch": 15.376391982182628, + "grad_norm": 16.000699996948242, + "learning_rate": 1e-06, + "loss": 0.3095, + "num_input_tokens_seen": 386851464, + "step": 6904 + }, + { + "epoch": 15.376391982182628, + "loss": 0.2996518611907959, + "loss_ce": 9.130668331636116e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.006317138671875, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 386851464, + "step": 6904 + }, + { + "epoch": 15.378619153674833, + "grad_norm": 19.54730987548828, + "learning_rate": 1e-06, + "loss": 0.36, + "num_input_tokens_seen": 386903512, + "step": 6905 + }, + { + "epoch": 15.378619153674833, + "loss": 0.5413779020309448, + "loss_ce": 0.0004843665228690952, + "loss_iou": 0.203125, + "loss_num": 0.0269775390625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 386903512, + "step": 6905 + }, + { + "epoch": 15.380846325167038, + "grad_norm": 14.333488464355469, + "learning_rate": 1e-06, + "loss": 0.3866, + "num_input_tokens_seen": 386958488, + "step": 6906 + }, + { + "epoch": 15.380846325167038, + "loss": 0.4710484445095062, + "loss_ce": 0.0001011955610010773, + "loss_iou": 0.197265625, + "loss_num": 0.01519775390625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 386958488, + "step": 6906 + }, + { + "epoch": 15.383073496659243, + "grad_norm": 32.25919723510742, + "learning_rate": 1e-06, + "loss": 0.3692, + "num_input_tokens_seen": 387014204, + "step": 6907 + }, + { + "epoch": 15.383073496659243, + "loss": 0.31186944246292114, + "loss_ce": 0.0001018614784698002, + "loss_iou": 0.140625, + "loss_num": 0.00616455078125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 387014204, + "step": 6907 + }, + { + "epoch": 15.385300668151448, + "grad_norm": 22.024864196777344, + "learning_rate": 1e-06, + "loss": 0.307, + "num_input_tokens_seen": 387067160, + "step": 6908 + }, + { + "epoch": 15.385300668151448, + "loss": 0.329712450504303, + "loss_ce": 9.209779818775132e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.004852294921875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 387067160, + "step": 6908 + }, + { + "epoch": 15.387527839643653, + "grad_norm": 19.91106414794922, + "learning_rate": 1e-06, + "loss": 0.4446, + "num_input_tokens_seen": 387124480, + "step": 6909 + }, + { + "epoch": 15.387527839643653, + "loss": 0.3715466856956482, + "loss_ce": 8.674498531036079e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.005767822265625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 387124480, + "step": 6909 + }, + { + "epoch": 15.389755011135858, + "grad_norm": 23.40995979309082, + "learning_rate": 1e-06, + "loss": 0.349, + "num_input_tokens_seen": 387183660, + "step": 6910 + }, + { + "epoch": 15.389755011135858, + "loss": 0.41477489471435547, + "loss_ce": 0.00010205684520769864, + "loss_iou": 0.1943359375, + "loss_num": 0.00518798828125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 387183660, + "step": 6910 + }, + { + "epoch": 15.391982182628063, + "grad_norm": 20.379980087280273, + "learning_rate": 1e-06, + "loss": 0.4204, + "num_input_tokens_seen": 387243208, + "step": 6911 + }, + { + "epoch": 15.391982182628063, + "loss": 0.4162381589412689, + "loss_ce": 0.0001004426449071616, + "loss_iou": 0.1953125, + "loss_num": 0.00494384765625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 387243208, + "step": 6911 + }, + { + "epoch": 15.394209354120267, + "grad_norm": 13.906294822692871, + "learning_rate": 1e-06, + "loss": 0.3458, + "num_input_tokens_seen": 387298652, + "step": 6912 + }, + { + "epoch": 15.394209354120267, + "loss": 0.3168086111545563, + "loss_ce": 9.718221554066986e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.004180908203125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 387298652, + "step": 6912 + }, + { + "epoch": 15.396436525612472, + "grad_norm": 18.725072860717773, + "learning_rate": 1e-06, + "loss": 0.5747, + "num_input_tokens_seen": 387353036, + "step": 6913 + }, + { + "epoch": 15.396436525612472, + "loss": 0.6614874601364136, + "loss_ce": 0.00011045205610571429, + "loss_iou": 0.28515625, + "loss_num": 0.01806640625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 387353036, + "step": 6913 + }, + { + "epoch": 15.398663697104677, + "grad_norm": 17.18587875366211, + "learning_rate": 1e-06, + "loss": 0.3727, + "num_input_tokens_seen": 387408840, + "step": 6914 + }, + { + "epoch": 15.398663697104677, + "loss": 0.35583341121673584, + "loss_ce": 0.00012054783292114735, + "loss_iou": 0.16796875, + "loss_num": 0.00390625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 387408840, + "step": 6914 + }, + { + "epoch": 15.400890868596882, + "grad_norm": 33.79663848876953, + "learning_rate": 1e-06, + "loss": 0.4239, + "num_input_tokens_seen": 387464996, + "step": 6915 + }, + { + "epoch": 15.400890868596882, + "loss": 0.4479781687259674, + "loss_ce": 0.00010222237324342132, + "loss_iou": 0.2041015625, + "loss_num": 0.007781982421875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 387464996, + "step": 6915 + }, + { + "epoch": 15.403118040089087, + "grad_norm": 28.106069564819336, + "learning_rate": 1e-06, + "loss": 0.6411, + "num_input_tokens_seen": 387520852, + "step": 6916 + }, + { + "epoch": 15.403118040089087, + "loss": 0.5481947660446167, + "loss_ce": 9.903394675347954e-05, + "loss_iou": 0.234375, + "loss_num": 0.015869140625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 387520852, + "step": 6916 + }, + { + "epoch": 15.405345211581292, + "grad_norm": 14.857207298278809, + "learning_rate": 1e-06, + "loss": 0.4853, + "num_input_tokens_seen": 387573816, + "step": 6917 + }, + { + "epoch": 15.405345211581292, + "loss": 0.49757710099220276, + "loss_ce": 0.00011006787826772779, + "loss_iou": 0.2138671875, + "loss_num": 0.01385498046875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 387573816, + "step": 6917 + }, + { + "epoch": 15.407572383073497, + "grad_norm": 25.532848358154297, + "learning_rate": 1e-06, + "loss": 0.3972, + "num_input_tokens_seen": 387631328, + "step": 6918 + }, + { + "epoch": 15.407572383073497, + "loss": 0.32285118103027344, + "loss_ce": 9.72603156697005e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00604248046875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 387631328, + "step": 6918 + }, + { + "epoch": 15.409799554565701, + "grad_norm": 32.86465072631836, + "learning_rate": 1e-06, + "loss": 0.5478, + "num_input_tokens_seen": 387688500, + "step": 6919 + }, + { + "epoch": 15.409799554565701, + "loss": 0.7613215446472168, + "loss_ce": 9.104014316108078e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0262451171875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 387688500, + "step": 6919 + }, + { + "epoch": 15.412026726057906, + "grad_norm": 30.64493179321289, + "learning_rate": 1e-06, + "loss": 0.4451, + "num_input_tokens_seen": 387743892, + "step": 6920 + }, + { + "epoch": 15.412026726057906, + "loss": 0.4576044976711273, + "loss_ce": 8.494246867485344e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.015380859375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 387743892, + "step": 6920 + }, + { + "epoch": 15.414253897550111, + "grad_norm": 21.50493621826172, + "learning_rate": 1e-06, + "loss": 0.6023, + "num_input_tokens_seen": 387800900, + "step": 6921 + }, + { + "epoch": 15.414253897550111, + "loss": 0.6438992023468018, + "loss_ce": 0.00010035550076281652, + "loss_iou": 0.28125, + "loss_num": 0.0166015625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 387800900, + "step": 6921 + }, + { + "epoch": 15.416481069042316, + "grad_norm": 20.963998794555664, + "learning_rate": 1e-06, + "loss": 0.3538, + "num_input_tokens_seen": 387858872, + "step": 6922 + }, + { + "epoch": 15.416481069042316, + "loss": 0.4122142493724823, + "loss_ce": 0.00010487253894098103, + "loss_iou": 0.1796875, + "loss_num": 0.01068115234375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 387858872, + "step": 6922 + }, + { + "epoch": 15.41870824053452, + "grad_norm": 20.77920913696289, + "learning_rate": 1e-06, + "loss": 0.4124, + "num_input_tokens_seen": 387913556, + "step": 6923 + }, + { + "epoch": 15.41870824053452, + "loss": 0.2547425329685211, + "loss_ce": 0.00022592084133066237, + "loss_iou": 0.11181640625, + "loss_num": 0.006134033203125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 387913556, + "step": 6923 + }, + { + "epoch": 15.420935412026726, + "grad_norm": 14.842677116394043, + "learning_rate": 1e-06, + "loss": 0.3154, + "num_input_tokens_seen": 387967992, + "step": 6924 + }, + { + "epoch": 15.420935412026726, + "loss": 0.35966259241104126, + "loss_ce": 0.00010450358968228102, + "loss_iou": 0.16015625, + "loss_num": 0.0078125, + "loss_xval": 0.359375, + "num_input_tokens_seen": 387967992, + "step": 6924 + }, + { + "epoch": 15.42316258351893, + "grad_norm": 19.513763427734375, + "learning_rate": 1e-06, + "loss": 0.449, + "num_input_tokens_seen": 388025440, + "step": 6925 + }, + { + "epoch": 15.42316258351893, + "loss": 0.4793444275856018, + "loss_ce": 9.635625610826537e-05, + "loss_iou": 0.203125, + "loss_num": 0.0147705078125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 388025440, + "step": 6925 + }, + { + "epoch": 15.425389755011135, + "grad_norm": 14.279714584350586, + "learning_rate": 1e-06, + "loss": 0.4925, + "num_input_tokens_seen": 388082980, + "step": 6926 + }, + { + "epoch": 15.425389755011135, + "loss": 0.5041942596435547, + "loss_ce": 0.00041012922883965075, + "loss_iou": 0.2041015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 388082980, + "step": 6926 + }, + { + "epoch": 15.42761692650334, + "grad_norm": 44.215213775634766, + "learning_rate": 1e-06, + "loss": 0.4468, + "num_input_tokens_seen": 388138776, + "step": 6927 + }, + { + "epoch": 15.42761692650334, + "loss": 0.4352813959121704, + "loss_ce": 0.00010072036093333736, + "loss_iou": 0.1884765625, + "loss_num": 0.01153564453125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 388138776, + "step": 6927 + }, + { + "epoch": 15.429844097995545, + "grad_norm": 14.542059898376465, + "learning_rate": 1e-06, + "loss": 0.428, + "num_input_tokens_seen": 388193644, + "step": 6928 + }, + { + "epoch": 15.429844097995545, + "loss": 0.5707679986953735, + "loss_ce": 8.928313036449254e-05, + "loss_iou": 0.2421875, + "loss_num": 0.01708984375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 388193644, + "step": 6928 + }, + { + "epoch": 15.43207126948775, + "grad_norm": 21.284255981445312, + "learning_rate": 1e-06, + "loss": 0.5289, + "num_input_tokens_seen": 388250780, + "step": 6929 + }, + { + "epoch": 15.43207126948775, + "loss": 0.6608084440231323, + "loss_ce": 0.0001029052073135972, + "loss_iou": 0.287109375, + "loss_num": 0.0172119140625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 388250780, + "step": 6929 + }, + { + "epoch": 15.434298440979955, + "grad_norm": 11.952805519104004, + "learning_rate": 1e-06, + "loss": 0.3343, + "num_input_tokens_seen": 388305896, + "step": 6930 + }, + { + "epoch": 15.434298440979955, + "loss": 0.4233115315437317, + "loss_ce": 9.375169611303136e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.018310546875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 388305896, + "step": 6930 + }, + { + "epoch": 15.43652561247216, + "grad_norm": 21.607667922973633, + "learning_rate": 1e-06, + "loss": 0.4612, + "num_input_tokens_seen": 388362324, + "step": 6931 + }, + { + "epoch": 15.43652561247216, + "loss": 0.3633829951286316, + "loss_ce": 0.0001628020836506039, + "loss_iou": 0.1650390625, + "loss_num": 0.006500244140625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 388362324, + "step": 6931 + }, + { + "epoch": 15.438752783964365, + "grad_norm": 26.418800354003906, + "learning_rate": 1e-06, + "loss": 0.4414, + "num_input_tokens_seen": 388417860, + "step": 6932 + }, + { + "epoch": 15.438752783964365, + "loss": 0.38741356134414673, + "loss_ce": 8.447450818493962e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.00848388671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 388417860, + "step": 6932 + }, + { + "epoch": 15.44097995545657, + "grad_norm": 21.265588760375977, + "learning_rate": 1e-06, + "loss": 0.4136, + "num_input_tokens_seen": 388472732, + "step": 6933 + }, + { + "epoch": 15.44097995545657, + "loss": 0.42595887184143066, + "loss_ce": 0.00011659698066068813, + "loss_iou": 0.201171875, + "loss_num": 0.004638671875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 388472732, + "step": 6933 + }, + { + "epoch": 15.443207126948774, + "grad_norm": 18.719799041748047, + "learning_rate": 1e-06, + "loss": 0.4502, + "num_input_tokens_seen": 388531568, + "step": 6934 + }, + { + "epoch": 15.443207126948774, + "loss": 0.4275781214237213, + "loss_ce": 8.788651030045003e-05, + "loss_iou": 0.181640625, + "loss_num": 0.01263427734375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 388531568, + "step": 6934 + }, + { + "epoch": 15.44543429844098, + "grad_norm": 13.221081733703613, + "learning_rate": 1e-06, + "loss": 0.4555, + "num_input_tokens_seen": 388587640, + "step": 6935 + }, + { + "epoch": 15.44543429844098, + "loss": 0.4827903211116791, + "loss_ce": 0.0001243370061274618, + "loss_iou": 0.2119140625, + "loss_num": 0.011962890625, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 388587640, + "step": 6935 + }, + { + "epoch": 15.447661469933184, + "grad_norm": 15.271649360656738, + "learning_rate": 1e-06, + "loss": 0.3251, + "num_input_tokens_seen": 388645528, + "step": 6936 + }, + { + "epoch": 15.447661469933184, + "loss": 0.35192036628723145, + "loss_ce": 8.32095684017986e-05, + "loss_iou": 0.154296875, + "loss_num": 0.0087890625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 388645528, + "step": 6936 + }, + { + "epoch": 15.449888641425389, + "grad_norm": 18.44422149658203, + "learning_rate": 1e-06, + "loss": 0.5482, + "num_input_tokens_seen": 388698432, + "step": 6937 + }, + { + "epoch": 15.449888641425389, + "loss": 0.569794774055481, + "loss_ce": 9.262157982448116e-05, + "loss_iou": 0.2578125, + "loss_num": 0.010986328125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 388698432, + "step": 6937 + }, + { + "epoch": 15.452115812917596, + "grad_norm": 23.565710067749023, + "learning_rate": 1e-06, + "loss": 0.3753, + "num_input_tokens_seen": 388757224, + "step": 6938 + }, + { + "epoch": 15.452115812917596, + "loss": 0.30654728412628174, + "loss_ce": 8.977011020760983e-05, + "loss_iou": 0.140625, + "loss_num": 0.00518798828125, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 388757224, + "step": 6938 + }, + { + "epoch": 15.4543429844098, + "grad_norm": 13.924105644226074, + "learning_rate": 1e-06, + "loss": 0.3849, + "num_input_tokens_seen": 388812656, + "step": 6939 + }, + { + "epoch": 15.4543429844098, + "loss": 0.21841883659362793, + "loss_ce": 9.608666005078703e-05, + "loss_iou": 0.09619140625, + "loss_num": 0.005157470703125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 388812656, + "step": 6939 + }, + { + "epoch": 15.456570155902005, + "grad_norm": 21.45891571044922, + "learning_rate": 1e-06, + "loss": 0.432, + "num_input_tokens_seen": 388867552, + "step": 6940 + }, + { + "epoch": 15.456570155902005, + "loss": 0.4227088689804077, + "loss_ce": 0.00010145263513550162, + "loss_iou": 0.1767578125, + "loss_num": 0.013671875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 388867552, + "step": 6940 + }, + { + "epoch": 15.45879732739421, + "grad_norm": 19.371652603149414, + "learning_rate": 1e-06, + "loss": 0.631, + "num_input_tokens_seen": 388923044, + "step": 6941 + }, + { + "epoch": 15.45879732739421, + "loss": 0.6530581712722778, + "loss_ce": 0.00010402440966572613, + "loss_iou": 0.29296875, + "loss_num": 0.01324462890625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 388923044, + "step": 6941 + }, + { + "epoch": 15.461024498886415, + "grad_norm": 24.440471649169922, + "learning_rate": 1e-06, + "loss": 0.425, + "num_input_tokens_seen": 388976632, + "step": 6942 + }, + { + "epoch": 15.461024498886415, + "loss": 0.4324702322483063, + "loss_ce": 9.719458466861397e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.010498046875, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 388976632, + "step": 6942 + }, + { + "epoch": 15.46325167037862, + "grad_norm": 18.36454963684082, + "learning_rate": 1e-06, + "loss": 0.3942, + "num_input_tokens_seen": 389031268, + "step": 6943 + }, + { + "epoch": 15.46325167037862, + "loss": 0.39659687876701355, + "loss_ce": 0.00011250589159317315, + "loss_iou": 0.177734375, + "loss_num": 0.00823974609375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 389031268, + "step": 6943 + }, + { + "epoch": 15.465478841870825, + "grad_norm": 17.621511459350586, + "learning_rate": 1e-06, + "loss": 0.4569, + "num_input_tokens_seen": 389089160, + "step": 6944 + }, + { + "epoch": 15.465478841870825, + "loss": 0.33285778760910034, + "loss_ce": 9.410581697011366e-05, + "loss_iou": 0.15625, + "loss_num": 0.00421142578125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 389089160, + "step": 6944 + }, + { + "epoch": 15.46770601336303, + "grad_norm": 16.278667449951172, + "learning_rate": 1e-06, + "loss": 0.3584, + "num_input_tokens_seen": 389144868, + "step": 6945 + }, + { + "epoch": 15.46770601336303, + "loss": 0.3341924548149109, + "loss_ce": 8.60134678077884e-05, + "loss_iou": 0.14453125, + "loss_num": 0.009033203125, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 389144868, + "step": 6945 + }, + { + "epoch": 15.469933184855234, + "grad_norm": 14.054006576538086, + "learning_rate": 1e-06, + "loss": 0.4937, + "num_input_tokens_seen": 389202724, + "step": 6946 + }, + { + "epoch": 15.469933184855234, + "loss": 0.4199259281158447, + "loss_ce": 0.00012615637388080359, + "loss_iou": 0.1689453125, + "loss_num": 0.016357421875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 389202724, + "step": 6946 + }, + { + "epoch": 15.47216035634744, + "grad_norm": 16.913305282592773, + "learning_rate": 1e-06, + "loss": 0.5997, + "num_input_tokens_seen": 389260720, + "step": 6947 + }, + { + "epoch": 15.47216035634744, + "loss": 0.46518251299858093, + "loss_ce": 9.461917215958238e-05, + "loss_iou": 0.203125, + "loss_num": 0.0115966796875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 389260720, + "step": 6947 + }, + { + "epoch": 15.474387527839644, + "grad_norm": 18.17692756652832, + "learning_rate": 1e-06, + "loss": 0.5238, + "num_input_tokens_seen": 389315804, + "step": 6948 + }, + { + "epoch": 15.474387527839644, + "loss": 0.5708150863647461, + "loss_ce": 7.536027260357514e-05, + "loss_iou": 0.244140625, + "loss_num": 0.0166015625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 389315804, + "step": 6948 + }, + { + "epoch": 15.476614699331849, + "grad_norm": 30.645221710205078, + "learning_rate": 1e-06, + "loss": 0.393, + "num_input_tokens_seen": 389373028, + "step": 6949 + }, + { + "epoch": 15.476614699331849, + "loss": 0.37821051478385925, + "loss_ce": 9.771598706720397e-05, + "loss_iou": 0.162109375, + "loss_num": 0.01080322265625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 389373028, + "step": 6949 + }, + { + "epoch": 15.478841870824054, + "grad_norm": 23.542387008666992, + "learning_rate": 1e-06, + "loss": 0.5772, + "num_input_tokens_seen": 389429224, + "step": 6950 + }, + { + "epoch": 15.478841870824054, + "loss": 0.7504802942276001, + "loss_ce": 0.001334758591838181, + "loss_iou": 0.306640625, + "loss_num": 0.0272216796875, + "loss_xval": 0.75, + "num_input_tokens_seen": 389429224, + "step": 6950 + }, + { + "epoch": 15.481069042316259, + "grad_norm": 27.925491333007812, + "learning_rate": 1e-06, + "loss": 0.3852, + "num_input_tokens_seen": 389486864, + "step": 6951 + }, + { + "epoch": 15.481069042316259, + "loss": 0.48157191276550293, + "loss_ce": 0.00012661112123169005, + "loss_iou": 0.166015625, + "loss_num": 0.029541015625, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 389486864, + "step": 6951 + }, + { + "epoch": 15.483296213808464, + "grad_norm": 15.1088285446167, + "learning_rate": 1e-06, + "loss": 0.3681, + "num_input_tokens_seen": 389542096, + "step": 6952 + }, + { + "epoch": 15.483296213808464, + "loss": 0.41208964586257935, + "loss_ce": 0.00010235629451926798, + "loss_iou": 0.1943359375, + "loss_num": 0.004608154296875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 389542096, + "step": 6952 + }, + { + "epoch": 15.485523385300668, + "grad_norm": 20.11035919189453, + "learning_rate": 1e-06, + "loss": 0.5138, + "num_input_tokens_seen": 389596724, + "step": 6953 + }, + { + "epoch": 15.485523385300668, + "loss": 0.37680959701538086, + "loss_ce": 0.00010061028297059238, + "loss_iou": 0.1669921875, + "loss_num": 0.0086669921875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 389596724, + "step": 6953 + }, + { + "epoch": 15.487750556792873, + "grad_norm": 13.750123977661133, + "learning_rate": 1e-06, + "loss": 0.5883, + "num_input_tokens_seen": 389652700, + "step": 6954 + }, + { + "epoch": 15.487750556792873, + "loss": 0.6634094715118408, + "loss_ce": 0.0001404019130859524, + "loss_iou": 0.26171875, + "loss_num": 0.028076171875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 389652700, + "step": 6954 + }, + { + "epoch": 15.489977728285078, + "grad_norm": 13.668987274169922, + "learning_rate": 1e-06, + "loss": 0.3403, + "num_input_tokens_seen": 389710304, + "step": 6955 + }, + { + "epoch": 15.489977728285078, + "loss": 0.33821815252304077, + "loss_ce": 8.338829502463341e-05, + "loss_iou": 0.1484375, + "loss_num": 0.00811767578125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 389710304, + "step": 6955 + }, + { + "epoch": 15.492204899777283, + "grad_norm": 20.135528564453125, + "learning_rate": 1e-06, + "loss": 0.4608, + "num_input_tokens_seen": 389764472, + "step": 6956 + }, + { + "epoch": 15.492204899777283, + "loss": 0.46713533997535706, + "loss_ce": 9.433674858883023e-05, + "loss_iou": 0.208984375, + "loss_num": 0.01007080078125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 389764472, + "step": 6956 + }, + { + "epoch": 15.494432071269488, + "grad_norm": 19.5330867767334, + "learning_rate": 1e-06, + "loss": 0.3886, + "num_input_tokens_seen": 389820184, + "step": 6957 + }, + { + "epoch": 15.494432071269488, + "loss": 0.46810320019721985, + "loss_ce": 8.56061524245888e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.00958251953125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 389820184, + "step": 6957 + }, + { + "epoch": 15.496659242761693, + "grad_norm": 15.649205207824707, + "learning_rate": 1e-06, + "loss": 0.3468, + "num_input_tokens_seen": 389875816, + "step": 6958 + }, + { + "epoch": 15.496659242761693, + "loss": 0.3824237287044525, + "loss_ce": 9.9498953204602e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.005157470703125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 389875816, + "step": 6958 + }, + { + "epoch": 15.498886414253898, + "grad_norm": 17.48607063293457, + "learning_rate": 1e-06, + "loss": 0.4428, + "num_input_tokens_seen": 389934296, + "step": 6959 + }, + { + "epoch": 15.498886414253898, + "loss": 0.45505648851394653, + "loss_ce": 0.00010044968803413212, + "loss_iou": 0.1953125, + "loss_num": 0.0126953125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 389934296, + "step": 6959 + }, + { + "epoch": 15.501113585746102, + "grad_norm": 23.723533630371094, + "learning_rate": 1e-06, + "loss": 0.3997, + "num_input_tokens_seen": 389988784, + "step": 6960 + }, + { + "epoch": 15.501113585746102, + "loss": 0.5000880360603333, + "loss_ce": 8.804388926364481e-05, + "loss_iou": 0.224609375, + "loss_num": 0.01025390625, + "loss_xval": 0.5, + "num_input_tokens_seen": 389988784, + "step": 6960 + }, + { + "epoch": 15.503340757238307, + "grad_norm": 24.47084617614746, + "learning_rate": 1e-06, + "loss": 0.3908, + "num_input_tokens_seen": 390042176, + "step": 6961 + }, + { + "epoch": 15.503340757238307, + "loss": 0.3538658022880554, + "loss_ce": 0.00010604046110529453, + "loss_iou": 0.1650390625, + "loss_num": 0.00494384765625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 390042176, + "step": 6961 + }, + { + "epoch": 15.505567928730512, + "grad_norm": 19.05753517150879, + "learning_rate": 1e-06, + "loss": 0.3267, + "num_input_tokens_seen": 390098484, + "step": 6962 + }, + { + "epoch": 15.505567928730512, + "loss": 0.3786473274230957, + "loss_ce": 0.00010729986388469115, + "loss_iou": 0.166015625, + "loss_num": 0.00921630859375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 390098484, + "step": 6962 + }, + { + "epoch": 15.507795100222717, + "grad_norm": 10.61585807800293, + "learning_rate": 1e-06, + "loss": 0.3648, + "num_input_tokens_seen": 390156300, + "step": 6963 + }, + { + "epoch": 15.507795100222717, + "loss": 0.378567099571228, + "loss_ce": 8.807641279418021e-05, + "loss_iou": 0.16015625, + "loss_num": 0.01165771484375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 390156300, + "step": 6963 + }, + { + "epoch": 15.510022271714922, + "grad_norm": 25.818944931030273, + "learning_rate": 1e-06, + "loss": 0.3777, + "num_input_tokens_seen": 390210808, + "step": 6964 + }, + { + "epoch": 15.510022271714922, + "loss": 0.42782527208328247, + "loss_ce": 9.0899906354025e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.017333984375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 390210808, + "step": 6964 + }, + { + "epoch": 15.512249443207127, + "grad_norm": 17.772178649902344, + "learning_rate": 1e-06, + "loss": 0.3301, + "num_input_tokens_seen": 390266472, + "step": 6965 + }, + { + "epoch": 15.512249443207127, + "loss": 0.40292441844940186, + "loss_ce": 9.23790066735819e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.006011962890625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 390266472, + "step": 6965 + }, + { + "epoch": 15.514476614699332, + "grad_norm": 29.70581817626953, + "learning_rate": 1e-06, + "loss": 0.4923, + "num_input_tokens_seen": 390323712, + "step": 6966 + }, + { + "epoch": 15.514476614699332, + "loss": 0.4616038203239441, + "loss_ce": 0.00011701375478878617, + "loss_iou": 0.208984375, + "loss_num": 0.008544921875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 390323712, + "step": 6966 + }, + { + "epoch": 15.516703786191536, + "grad_norm": 50.45325469970703, + "learning_rate": 1e-06, + "loss": 0.3747, + "num_input_tokens_seen": 390380440, + "step": 6967 + }, + { + "epoch": 15.516703786191536, + "loss": 0.3414144814014435, + "loss_ce": 0.00010588267468847334, + "loss_iou": 0.150390625, + "loss_num": 0.00811767578125, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 390380440, + "step": 6967 + }, + { + "epoch": 15.518930957683741, + "grad_norm": 18.55625343322754, + "learning_rate": 1e-06, + "loss": 0.4821, + "num_input_tokens_seen": 390437412, + "step": 6968 + }, + { + "epoch": 15.518930957683741, + "loss": 0.48657822608947754, + "loss_ce": 0.00012801533739548177, + "loss_iou": 0.2265625, + "loss_num": 0.006378173828125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 390437412, + "step": 6968 + }, + { + "epoch": 15.521158129175946, + "grad_norm": 25.324066162109375, + "learning_rate": 1e-06, + "loss": 0.4971, + "num_input_tokens_seen": 390491840, + "step": 6969 + }, + { + "epoch": 15.521158129175946, + "loss": 0.536230206489563, + "loss_ce": 9.739406232256442e-05, + "loss_iou": 0.23046875, + "loss_num": 0.01483154296875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 390491840, + "step": 6969 + }, + { + "epoch": 15.523385300668151, + "grad_norm": 18.16643714904785, + "learning_rate": 1e-06, + "loss": 0.4849, + "num_input_tokens_seen": 390546924, + "step": 6970 + }, + { + "epoch": 15.523385300668151, + "loss": 0.54595947265625, + "loss_ce": 9.153402788797393e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.01153564453125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 390546924, + "step": 6970 + }, + { + "epoch": 15.525612472160356, + "grad_norm": 15.818894386291504, + "learning_rate": 1e-06, + "loss": 0.5592, + "num_input_tokens_seen": 390603308, + "step": 6971 + }, + { + "epoch": 15.525612472160356, + "loss": 0.597541093826294, + "loss_ce": 0.00012892311497125775, + "loss_iou": 0.265625, + "loss_num": 0.0135498046875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 390603308, + "step": 6971 + }, + { + "epoch": 15.52783964365256, + "grad_norm": 30.140596389770508, + "learning_rate": 1e-06, + "loss": 0.5127, + "num_input_tokens_seen": 390657280, + "step": 6972 + }, + { + "epoch": 15.52783964365256, + "loss": 0.6534337401390076, + "loss_ce": 0.00011343492224114016, + "loss_iou": 0.294921875, + "loss_num": 0.01251220703125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 390657280, + "step": 6972 + }, + { + "epoch": 15.530066815144766, + "grad_norm": 19.85612678527832, + "learning_rate": 1e-06, + "loss": 0.5059, + "num_input_tokens_seen": 390714956, + "step": 6973 + }, + { + "epoch": 15.530066815144766, + "loss": 0.764450192451477, + "loss_ce": 0.00010690266208257526, + "loss_iou": 0.34765625, + "loss_num": 0.01385498046875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 390714956, + "step": 6973 + }, + { + "epoch": 15.53229398663697, + "grad_norm": 12.298739433288574, + "learning_rate": 1e-06, + "loss": 0.5395, + "num_input_tokens_seen": 390772920, + "step": 6974 + }, + { + "epoch": 15.53229398663697, + "loss": 0.7317103147506714, + "loss_ce": 0.00026501319371163845, + "loss_iou": 0.296875, + "loss_num": 0.0277099609375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 390772920, + "step": 6974 + }, + { + "epoch": 15.534521158129175, + "grad_norm": 22.31592559814453, + "learning_rate": 1e-06, + "loss": 0.4543, + "num_input_tokens_seen": 390826668, + "step": 6975 + }, + { + "epoch": 15.534521158129175, + "loss": 0.5942932367324829, + "loss_ce": 0.00017704666242934763, + "loss_iou": 0.267578125, + "loss_num": 0.01165771484375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 390826668, + "step": 6975 + }, + { + "epoch": 15.53674832962138, + "grad_norm": 18.439233779907227, + "learning_rate": 1e-06, + "loss": 0.4188, + "num_input_tokens_seen": 390882348, + "step": 6976 + }, + { + "epoch": 15.53674832962138, + "loss": 0.2939029932022095, + "loss_ce": 9.499942825641483e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.006103515625, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 390882348, + "step": 6976 + }, + { + "epoch": 15.538975501113585, + "grad_norm": 16.87866973876953, + "learning_rate": 1e-06, + "loss": 0.3277, + "num_input_tokens_seen": 390939420, + "step": 6977 + }, + { + "epoch": 15.538975501113585, + "loss": 0.36519938707351685, + "loss_ce": 8.707845699973404e-05, + "loss_iou": 0.15234375, + "loss_num": 0.01214599609375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 390939420, + "step": 6977 + }, + { + "epoch": 15.54120267260579, + "grad_norm": 14.497117042541504, + "learning_rate": 1e-06, + "loss": 0.3234, + "num_input_tokens_seen": 390992964, + "step": 6978 + }, + { + "epoch": 15.54120267260579, + "loss": 0.36044928431510925, + "loss_ce": 9.772789780981839e-05, + "loss_iou": 0.171875, + "loss_num": 0.00347900390625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 390992964, + "step": 6978 + }, + { + "epoch": 15.543429844097995, + "grad_norm": 18.7347412109375, + "learning_rate": 1e-06, + "loss": 0.3939, + "num_input_tokens_seen": 391049216, + "step": 6979 + }, + { + "epoch": 15.543429844097995, + "loss": 0.4501742124557495, + "loss_ce": 0.00010095632751472294, + "loss_iou": 0.19921875, + "loss_num": 0.01043701171875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 391049216, + "step": 6979 + }, + { + "epoch": 15.5456570155902, + "grad_norm": 18.551483154296875, + "learning_rate": 1e-06, + "loss": 0.4831, + "num_input_tokens_seen": 391105972, + "step": 6980 + }, + { + "epoch": 15.5456570155902, + "loss": 0.5650371313095093, + "loss_ce": 9.57687952904962e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0106201171875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 391105972, + "step": 6980 + }, + { + "epoch": 15.547884187082406, + "grad_norm": 18.95163345336914, + "learning_rate": 1e-06, + "loss": 0.4954, + "num_input_tokens_seen": 391159136, + "step": 6981 + }, + { + "epoch": 15.547884187082406, + "loss": 0.5984832048416138, + "loss_ce": 9.453899110667408e-05, + "loss_iou": 0.2734375, + "loss_num": 0.01025390625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 391159136, + "step": 6981 + }, + { + "epoch": 15.550111358574611, + "grad_norm": 15.434839248657227, + "learning_rate": 1e-06, + "loss": 0.3884, + "num_input_tokens_seen": 391215408, + "step": 6982 + }, + { + "epoch": 15.550111358574611, + "loss": 0.46562230587005615, + "loss_ce": 0.00010716939868871123, + "loss_iou": 0.208984375, + "loss_num": 0.00946044921875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 391215408, + "step": 6982 + }, + { + "epoch": 15.552338530066816, + "grad_norm": 56.02677536010742, + "learning_rate": 1e-06, + "loss": 0.5136, + "num_input_tokens_seen": 391270184, + "step": 6983 + }, + { + "epoch": 15.552338530066816, + "loss": 0.5048550367355347, + "loss_ce": 9.426505130250007e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.00921630859375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 391270184, + "step": 6983 + }, + { + "epoch": 15.55456570155902, + "grad_norm": 37.09487533569336, + "learning_rate": 1e-06, + "loss": 0.4122, + "num_input_tokens_seen": 391323132, + "step": 6984 + }, + { + "epoch": 15.55456570155902, + "loss": 0.43672704696655273, + "loss_ce": 8.155644172802567e-05, + "loss_iou": 0.19140625, + "loss_num": 0.01055908203125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 391323132, + "step": 6984 + }, + { + "epoch": 15.556792873051226, + "grad_norm": 29.41912078857422, + "learning_rate": 1e-06, + "loss": 0.4368, + "num_input_tokens_seen": 391378804, + "step": 6985 + }, + { + "epoch": 15.556792873051226, + "loss": 0.45505693554878235, + "loss_ce": 0.00010087570990435779, + "loss_iou": 0.2021484375, + "loss_num": 0.00994873046875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 391378804, + "step": 6985 + }, + { + "epoch": 15.55902004454343, + "grad_norm": 22.739253997802734, + "learning_rate": 1e-06, + "loss": 0.5131, + "num_input_tokens_seen": 391434260, + "step": 6986 + }, + { + "epoch": 15.55902004454343, + "loss": 0.5328000783920288, + "loss_ce": 8.528250327799469e-05, + "loss_iou": 0.232421875, + "loss_num": 0.013671875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 391434260, + "step": 6986 + }, + { + "epoch": 15.561247216035635, + "grad_norm": 15.653422355651855, + "learning_rate": 1e-06, + "loss": 0.5016, + "num_input_tokens_seen": 391493312, + "step": 6987 + }, + { + "epoch": 15.561247216035635, + "loss": 0.4914650619029999, + "loss_ce": 0.0001320439187111333, + "loss_iou": 0.22265625, + "loss_num": 0.0093994140625, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 391493312, + "step": 6987 + }, + { + "epoch": 15.56347438752784, + "grad_norm": 21.306421279907227, + "learning_rate": 1e-06, + "loss": 0.5295, + "num_input_tokens_seen": 391548072, + "step": 6988 + }, + { + "epoch": 15.56347438752784, + "loss": 0.6857566833496094, + "loss_ce": 8.777156472206116e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0225830078125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 391548072, + "step": 6988 + }, + { + "epoch": 15.565701559020045, + "grad_norm": 18.79554557800293, + "learning_rate": 1e-06, + "loss": 0.5781, + "num_input_tokens_seen": 391602092, + "step": 6989 + }, + { + "epoch": 15.565701559020045, + "loss": 0.6327000856399536, + "loss_ce": 0.00013169870362617075, + "loss_iou": 0.279296875, + "loss_num": 0.01470947265625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 391602092, + "step": 6989 + }, + { + "epoch": 15.56792873051225, + "grad_norm": 19.208559036254883, + "learning_rate": 1e-06, + "loss": 0.4444, + "num_input_tokens_seen": 391659220, + "step": 6990 + }, + { + "epoch": 15.56792873051225, + "loss": 0.40756696462631226, + "loss_ce": 9.625191160012037e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.01092529296875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 391659220, + "step": 6990 + }, + { + "epoch": 15.570155902004455, + "grad_norm": 25.114105224609375, + "learning_rate": 1e-06, + "loss": 0.5278, + "num_input_tokens_seen": 391713976, + "step": 6991 + }, + { + "epoch": 15.570155902004455, + "loss": 0.760166347026825, + "loss_ce": 0.0001565346319694072, + "loss_iou": 0.29296875, + "loss_num": 0.034912109375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 391713976, + "step": 6991 + }, + { + "epoch": 15.57238307349666, + "grad_norm": 19.96893310546875, + "learning_rate": 1e-06, + "loss": 0.4927, + "num_input_tokens_seen": 391768912, + "step": 6992 + }, + { + "epoch": 15.57238307349666, + "loss": 0.5373374223709106, + "loss_ce": 0.00010592768376227468, + "loss_iou": 0.244140625, + "loss_num": 0.00994873046875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 391768912, + "step": 6992 + }, + { + "epoch": 15.574610244988865, + "grad_norm": 16.1031494140625, + "learning_rate": 1e-06, + "loss": 0.6124, + "num_input_tokens_seen": 391825536, + "step": 6993 + }, + { + "epoch": 15.574610244988865, + "loss": 0.762206494808197, + "loss_ce": 0.00012153427815064788, + "loss_iou": 0.296875, + "loss_num": 0.0341796875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 391825536, + "step": 6993 + }, + { + "epoch": 15.57683741648107, + "grad_norm": 20.991905212402344, + "learning_rate": 1e-06, + "loss": 0.6052, + "num_input_tokens_seen": 391879656, + "step": 6994 + }, + { + "epoch": 15.57683741648107, + "loss": 0.7651537656784058, + "loss_ce": 0.00020015102927573025, + "loss_iou": 0.283203125, + "loss_num": 0.039794921875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 391879656, + "step": 6994 + }, + { + "epoch": 15.579064587973274, + "grad_norm": 16.341785430908203, + "learning_rate": 1e-06, + "loss": 0.4781, + "num_input_tokens_seen": 391935488, + "step": 6995 + }, + { + "epoch": 15.579064587973274, + "loss": 0.5291576385498047, + "loss_ce": 0.00010491380817256868, + "loss_iou": 0.220703125, + "loss_num": 0.0174560546875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 391935488, + "step": 6995 + }, + { + "epoch": 15.58129175946548, + "grad_norm": 19.71142578125, + "learning_rate": 1e-06, + "loss": 0.5139, + "num_input_tokens_seen": 391991892, + "step": 6996 + }, + { + "epoch": 15.58129175946548, + "loss": 0.44407549500465393, + "loss_ce": 0.00010574980115052313, + "loss_iou": 0.203125, + "loss_num": 0.00750732421875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 391991892, + "step": 6996 + }, + { + "epoch": 15.583518930957684, + "grad_norm": 22.070716857910156, + "learning_rate": 1e-06, + "loss": 0.3774, + "num_input_tokens_seen": 392046924, + "step": 6997 + }, + { + "epoch": 15.583518930957684, + "loss": 0.2757876515388489, + "loss_ce": 9.184512600768358e-05, + "loss_iou": 0.1162109375, + "loss_num": 0.0086669921875, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 392046924, + "step": 6997 + }, + { + "epoch": 15.585746102449889, + "grad_norm": 20.057531356811523, + "learning_rate": 1e-06, + "loss": 0.4721, + "num_input_tokens_seen": 392104208, + "step": 6998 + }, + { + "epoch": 15.585746102449889, + "loss": 0.5038794279098511, + "loss_ce": 9.52341069933027e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0150146484375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 392104208, + "step": 6998 + }, + { + "epoch": 15.587973273942094, + "grad_norm": 25.237815856933594, + "learning_rate": 1e-06, + "loss": 0.5194, + "num_input_tokens_seen": 392161208, + "step": 6999 + }, + { + "epoch": 15.587973273942094, + "loss": 0.46581047773361206, + "loss_ce": 0.00011221040040254593, + "loss_iou": 0.2060546875, + "loss_num": 0.01080322265625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 392161208, + "step": 6999 + }, + { + "epoch": 15.590200445434299, + "grad_norm": 15.100346565246582, + "learning_rate": 1e-06, + "loss": 0.4739, + "num_input_tokens_seen": 392214772, + "step": 7000 + }, + { + "epoch": 15.590200445434299, + "eval_seeclick_web_CIoU": 0.5901551246643066, + "eval_seeclick_web_GIoU": 0.5888436436653137, + "eval_seeclick_web_IoU": 0.6081466972827911, + "eval_seeclick_web_MAE_all": 0.015473631210625172, + "eval_seeclick_web_MAE_h": 0.0076336238998919725, + "eval_seeclick_web_MAE_w": 0.015680983662605286, + "eval_seeclick_web_MAE_x_boxes": 0.008724939078092575, + "eval_seeclick_web_MAE_y_boxes": 0.021282089641317725, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.907034695148468, + "eval_seeclick_web_loss_ce": 0.00015554412675555795, + "eval_seeclick_web_loss_iou": 0.4166259765625, + "eval_seeclick_web_loss_num": 0.01241302490234375, + "eval_seeclick_web_loss_xval": 0.895263671875, + "eval_seeclick_web_runtime": 24.1637, + "eval_seeclick_web_samples_per_second": 2.069, + "eval_seeclick_web_steps_per_second": 0.083, + "num_input_tokens_seen": 392214772, + "step": 7000 + }, + { + "epoch": 15.590200445434299, + "eval_icons_CIoU": 0.2777545750141144, + "eval_icons_GIoU": 0.30234697461128235, + "eval_icons_IoU": 0.3519442528486252, + "eval_icons_MAE_all": 0.05883444473147392, + "eval_icons_MAE_h": 0.031461546663194895, + "eval_icons_MAE_w": 0.058418434113264084, + "eval_icons_MAE_x_boxes": 0.06006164848804474, + "eval_icons_MAE_y_boxes": 0.03738272096961737, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7067599296569824, + "eval_icons_loss_ce": 0.00019056371820624918, + "eval_icons_loss_iou": 0.668701171875, + "eval_icons_loss_num": 0.05805206298828125, + "eval_icons_loss_xval": 1.628662109375, + "eval_icons_runtime": 19.373, + "eval_icons_samples_per_second": 2.581, + "eval_icons_steps_per_second": 0.103, + "num_input_tokens_seen": 392214772, + "step": 7000 + }, + { + "epoch": 15.590200445434299, + "eval_screenspot_CIoU": 0.371360719203949, + "eval_screenspot_GIoU": 0.3886072834332784, + "eval_screenspot_IoU": 0.4512639542420705, + "eval_screenspot_MAE_all": 0.055713951587677, + "eval_screenspot_MAE_h": 0.039317984133958817, + "eval_screenspot_MAE_w": 0.06650510802865028, + "eval_screenspot_MAE_x_boxes": 0.07384055045743783, + "eval_screenspot_MAE_y_boxes": 0.03706817328929901, + "eval_screenspot_inside_bbox": 0.7145833373069763, + "eval_screenspot_loss": 1.5692362785339355, + "eval_screenspot_loss_ce": 0.00024274400008531907, + "eval_screenspot_loss_iou": 0.6532389322916666, + "eval_screenspot_loss_num": 0.06329091389973958, + "eval_screenspot_loss_xval": 1.6232096354166667, + "eval_screenspot_runtime": 37.4679, + "eval_screenspot_samples_per_second": 2.375, + "eval_screenspot_steps_per_second": 0.08, + "num_input_tokens_seen": 392214772, + "step": 7000 + }, + { + "epoch": 15.590200445434299, + "eval_compot_CIoU": 0.3413173258304596, + "eval_compot_GIoU": 0.35517509281635284, + "eval_compot_IoU": 0.401169016957283, + "eval_compot_MAE_all": 0.019693247973918915, + "eval_compot_MAE_h": 0.013937031384557486, + "eval_compot_MAE_w": 0.021615090779960155, + "eval_compot_MAE_x_boxes": 0.02992706559598446, + "eval_compot_MAE_y_boxes": 0.0066331722773611546, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.4276500940322876, + "eval_compot_loss_ce": 0.00015015306416898966, + "eval_compot_loss_iou": 0.6561279296875, + "eval_compot_loss_num": 0.018407821655273438, + "eval_compot_loss_xval": 1.404296875, + "eval_compot_runtime": 24.1014, + "eval_compot_samples_per_second": 2.075, + "eval_compot_steps_per_second": 0.083, + "num_input_tokens_seen": 392214772, + "step": 7000 + }, + { + "epoch": 15.590200445434299, + "eval_custom_ui_val_CIoU": 0.4745243142048518, + "eval_custom_ui_val_GIoU": 0.4812171955903371, + "eval_custom_ui_val_IoU": 0.535526971022288, + "eval_custom_ui_val_MAE_all": 0.027974836269600525, + "eval_custom_ui_val_MAE_h": 0.015499611799087789, + "eval_custom_ui_val_MAE_w": 0.03617638742758168, + "eval_custom_ui_val_MAE_x_boxes": 0.033723721021993294, + "eval_custom_ui_val_MAE_y_boxes": 0.013699741387325857, + "eval_custom_ui_val_inside_bbox": 0.7685185207260979, + "eval_custom_ui_val_loss": 1.1788626909255981, + "eval_custom_ui_val_loss_ce": 0.00017328551944552196, + "eval_custom_ui_val_loss_iou": 0.5056423611111112, + "eval_custom_ui_val_loss_num": 0.02455192142062717, + "eval_custom_ui_val_loss_xval": 1.1339518229166667, + "eval_custom_ui_val_runtime": 72.2545, + "eval_custom_ui_val_samples_per_second": 3.668, + "eval_custom_ui_val_steps_per_second": 0.125, + "num_input_tokens_seen": 392214772, + "step": 7000 + }, + { + "epoch": 15.590200445434299, + "loss": 0.8509615063667297, + "loss_ce": 0.0001314119144808501, + "loss_iou": 0.3828125, + "loss_num": 0.01708984375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 392214772, + "step": 7000 + }, + { + "epoch": 15.592427616926503, + "grad_norm": 15.370309829711914, + "learning_rate": 1e-06, + "loss": 0.5216, + "num_input_tokens_seen": 392269336, + "step": 7001 + }, + { + "epoch": 15.592427616926503, + "loss": 0.47645553946495056, + "loss_ce": 0.00013718298578169197, + "loss_iou": 0.2099609375, + "loss_num": 0.01123046875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 392269336, + "step": 7001 + }, + { + "epoch": 15.594654788418708, + "grad_norm": 14.073122024536133, + "learning_rate": 1e-06, + "loss": 0.4764, + "num_input_tokens_seen": 392324300, + "step": 7002 + }, + { + "epoch": 15.594654788418708, + "loss": 0.3416048288345337, + "loss_ce": 0.00011310819536447525, + "loss_iou": 0.1552734375, + "loss_num": 0.006195068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 392324300, + "step": 7002 + }, + { + "epoch": 15.596881959910913, + "grad_norm": 23.41605567932129, + "learning_rate": 1e-06, + "loss": 0.4436, + "num_input_tokens_seen": 392381052, + "step": 7003 + }, + { + "epoch": 15.596881959910913, + "loss": 0.4482382535934448, + "loss_ce": 0.00036228023236617446, + "loss_iou": 0.19921875, + "loss_num": 0.0098876953125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 392381052, + "step": 7003 + }, + { + "epoch": 15.599109131403118, + "grad_norm": 18.33258628845215, + "learning_rate": 1e-06, + "loss": 0.3708, + "num_input_tokens_seen": 392438500, + "step": 7004 + }, + { + "epoch": 15.599109131403118, + "loss": 0.49753135442733765, + "loss_ce": 9.481675806455314e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.019287109375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 392438500, + "step": 7004 + }, + { + "epoch": 15.601336302895323, + "grad_norm": 40.55979537963867, + "learning_rate": 1e-06, + "loss": 0.4622, + "num_input_tokens_seen": 392491684, + "step": 7005 + }, + { + "epoch": 15.601336302895323, + "loss": 0.2954270541667938, + "loss_ce": 0.00013897480675950646, + "loss_iou": 0.1318359375, + "loss_num": 0.006195068359375, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 392491684, + "step": 7005 + }, + { + "epoch": 15.603563474387528, + "grad_norm": 10.46609115600586, + "learning_rate": 1e-06, + "loss": 0.4256, + "num_input_tokens_seen": 392550124, + "step": 7006 + }, + { + "epoch": 15.603563474387528, + "loss": 0.4542039632797241, + "loss_ce": 0.00010239450784865767, + "loss_iou": 0.2001953125, + "loss_num": 0.01080322265625, + "loss_xval": 0.453125, + "num_input_tokens_seen": 392550124, + "step": 7006 + }, + { + "epoch": 15.605790645879733, + "grad_norm": 15.158596992492676, + "learning_rate": 1e-06, + "loss": 0.3997, + "num_input_tokens_seen": 392606256, + "step": 7007 + }, + { + "epoch": 15.605790645879733, + "loss": 0.473002552986145, + "loss_ce": 0.00010216711962129921, + "loss_iou": 0.1865234375, + "loss_num": 0.02001953125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 392606256, + "step": 7007 + }, + { + "epoch": 15.608017817371937, + "grad_norm": 23.731393814086914, + "learning_rate": 1e-06, + "loss": 0.4986, + "num_input_tokens_seen": 392661192, + "step": 7008 + }, + { + "epoch": 15.608017817371937, + "loss": 0.4049649238586426, + "loss_ce": 8.820135553833097e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.00732421875, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 392661192, + "step": 7008 + }, + { + "epoch": 15.610244988864142, + "grad_norm": 15.854848861694336, + "learning_rate": 1e-06, + "loss": 0.4375, + "num_input_tokens_seen": 392716600, + "step": 7009 + }, + { + "epoch": 15.610244988864142, + "loss": 0.5096275806427002, + "loss_ce": 0.00010606721480144188, + "loss_iou": 0.20703125, + "loss_num": 0.01904296875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 392716600, + "step": 7009 + }, + { + "epoch": 15.612472160356347, + "grad_norm": 13.901849746704102, + "learning_rate": 1e-06, + "loss": 0.4194, + "num_input_tokens_seen": 392774900, + "step": 7010 + }, + { + "epoch": 15.612472160356347, + "loss": 0.4252671003341675, + "loss_ce": 9.621331264497712e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0084228515625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 392774900, + "step": 7010 + }, + { + "epoch": 15.614699331848552, + "grad_norm": 13.334481239318848, + "learning_rate": 1e-06, + "loss": 0.4398, + "num_input_tokens_seen": 392830348, + "step": 7011 + }, + { + "epoch": 15.614699331848552, + "loss": 0.6181551218032837, + "loss_ce": 0.00011313124559819698, + "loss_iou": 0.244140625, + "loss_num": 0.02587890625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 392830348, + "step": 7011 + }, + { + "epoch": 15.616926503340757, + "grad_norm": 17.49168586730957, + "learning_rate": 1e-06, + "loss": 0.6052, + "num_input_tokens_seen": 392887200, + "step": 7012 + }, + { + "epoch": 15.616926503340757, + "loss": 0.7159167528152466, + "loss_ce": 9.638856136007234e-05, + "loss_iou": 0.310546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 392887200, + "step": 7012 + }, + { + "epoch": 15.619153674832962, + "grad_norm": 18.257720947265625, + "learning_rate": 1e-06, + "loss": 0.3736, + "num_input_tokens_seen": 392945796, + "step": 7013 + }, + { + "epoch": 15.619153674832962, + "loss": 0.4207872748374939, + "loss_ce": 0.000132970220874995, + "loss_iou": 0.1796875, + "loss_num": 0.012451171875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 392945796, + "step": 7013 + }, + { + "epoch": 15.621380846325167, + "grad_norm": 21.57740020751953, + "learning_rate": 1e-06, + "loss": 0.3268, + "num_input_tokens_seen": 393004392, + "step": 7014 + }, + { + "epoch": 15.621380846325167, + "loss": 0.26535099744796753, + "loss_ce": 9.219862113241106e-05, + "loss_iou": 0.12109375, + "loss_num": 0.00457763671875, + "loss_xval": 0.265625, + "num_input_tokens_seen": 393004392, + "step": 7014 + }, + { + "epoch": 15.623608017817372, + "grad_norm": 17.83563232421875, + "learning_rate": 1e-06, + "loss": 0.3994, + "num_input_tokens_seen": 393060176, + "step": 7015 + }, + { + "epoch": 15.623608017817372, + "loss": 0.3122119903564453, + "loss_ce": 7.820721657481045e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.00799560546875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 393060176, + "step": 7015 + }, + { + "epoch": 15.625835189309576, + "grad_norm": 29.299760818481445, + "learning_rate": 1e-06, + "loss": 0.5213, + "num_input_tokens_seen": 393118056, + "step": 7016 + }, + { + "epoch": 15.625835189309576, + "loss": 0.37060514092445374, + "loss_ce": 0.00012174376752227545, + "loss_iou": 0.16796875, + "loss_num": 0.006927490234375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 393118056, + "step": 7016 + }, + { + "epoch": 15.628062360801781, + "grad_norm": 12.648885726928711, + "learning_rate": 1e-06, + "loss": 0.5216, + "num_input_tokens_seen": 393177148, + "step": 7017 + }, + { + "epoch": 15.628062360801781, + "loss": 0.40695855021476746, + "loss_ce": 9.820509876590222e-05, + "loss_iou": 0.189453125, + "loss_num": 0.00543212890625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 393177148, + "step": 7017 + }, + { + "epoch": 15.630289532293986, + "grad_norm": 24.881315231323242, + "learning_rate": 1e-06, + "loss": 0.3718, + "num_input_tokens_seen": 393231232, + "step": 7018 + }, + { + "epoch": 15.630289532293986, + "loss": 0.41211479902267456, + "loss_ce": 0.00012749881716445088, + "loss_iou": 0.1689453125, + "loss_num": 0.0147705078125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 393231232, + "step": 7018 + }, + { + "epoch": 15.632516703786191, + "grad_norm": 21.462350845336914, + "learning_rate": 1e-06, + "loss": 0.5006, + "num_input_tokens_seen": 393285416, + "step": 7019 + }, + { + "epoch": 15.632516703786191, + "loss": 0.383044958114624, + "loss_ce": 0.00023245607735589147, + "loss_iou": 0.1767578125, + "loss_num": 0.005767822265625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 393285416, + "step": 7019 + }, + { + "epoch": 15.634743875278396, + "grad_norm": 18.3259220123291, + "learning_rate": 1e-06, + "loss": 0.5313, + "num_input_tokens_seen": 393342488, + "step": 7020 + }, + { + "epoch": 15.634743875278396, + "loss": 0.6249858140945435, + "loss_ce": 0.00016893941210582852, + "loss_iou": 0.259765625, + "loss_num": 0.02099609375, + "loss_xval": 0.625, + "num_input_tokens_seen": 393342488, + "step": 7020 + }, + { + "epoch": 15.6369710467706, + "grad_norm": 33.112945556640625, + "learning_rate": 1e-06, + "loss": 0.5863, + "num_input_tokens_seen": 393400080, + "step": 7021 + }, + { + "epoch": 15.6369710467706, + "loss": 0.4414959251880646, + "loss_ce": 8.966858149506152e-05, + "loss_iou": 0.1953125, + "loss_num": 0.01025390625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 393400080, + "step": 7021 + }, + { + "epoch": 15.639198218262806, + "grad_norm": 16.2619571685791, + "learning_rate": 1e-06, + "loss": 0.3958, + "num_input_tokens_seen": 393456744, + "step": 7022 + }, + { + "epoch": 15.639198218262806, + "loss": 0.37460190057754517, + "loss_ce": 9.020163997774944e-05, + "loss_iou": 0.16796875, + "loss_num": 0.007720947265625, + "loss_xval": 0.375, + "num_input_tokens_seen": 393456744, + "step": 7022 + }, + { + "epoch": 15.64142538975501, + "grad_norm": 20.06168556213379, + "learning_rate": 1e-06, + "loss": 0.3711, + "num_input_tokens_seen": 393511708, + "step": 7023 + }, + { + "epoch": 15.64142538975501, + "loss": 0.4522230625152588, + "loss_ce": 7.460695633199066e-05, + "loss_iou": 0.1875, + "loss_num": 0.0155029296875, + "loss_xval": 0.453125, + "num_input_tokens_seen": 393511708, + "step": 7023 + }, + { + "epoch": 15.643652561247215, + "grad_norm": 13.850775718688965, + "learning_rate": 1e-06, + "loss": 0.4639, + "num_input_tokens_seen": 393569980, + "step": 7024 + }, + { + "epoch": 15.643652561247215, + "loss": 0.4485822916030884, + "loss_ce": 9.598617180017754e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.01348876953125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 393569980, + "step": 7024 + }, + { + "epoch": 15.64587973273942, + "grad_norm": 24.194141387939453, + "learning_rate": 1e-06, + "loss": 0.6029, + "num_input_tokens_seen": 393623688, + "step": 7025 + }, + { + "epoch": 15.64587973273942, + "loss": 0.7536381483078003, + "loss_ce": 9.815217345021665e-05, + "loss_iou": 0.283203125, + "loss_num": 0.037353515625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 393623688, + "step": 7025 + }, + { + "epoch": 15.648106904231625, + "grad_norm": 149.88299560546875, + "learning_rate": 1e-06, + "loss": 0.4324, + "num_input_tokens_seen": 393678112, + "step": 7026 + }, + { + "epoch": 15.648106904231625, + "loss": 0.4286256432533264, + "loss_ce": 9.779322135727853e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.0157470703125, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 393678112, + "step": 7026 + }, + { + "epoch": 15.65033407572383, + "grad_norm": 18.4648494720459, + "learning_rate": 1e-06, + "loss": 0.5376, + "num_input_tokens_seen": 393735444, + "step": 7027 + }, + { + "epoch": 15.65033407572383, + "loss": 0.5531010627746582, + "loss_ce": 0.00012253447494003922, + "loss_iou": 0.2158203125, + "loss_num": 0.0245361328125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 393735444, + "step": 7027 + }, + { + "epoch": 15.652561247216035, + "grad_norm": 23.74418830871582, + "learning_rate": 1e-06, + "loss": 0.4233, + "num_input_tokens_seen": 393789508, + "step": 7028 + }, + { + "epoch": 15.652561247216035, + "loss": 0.3642292618751526, + "loss_ce": 9.35211282921955e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.01080322265625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 393789508, + "step": 7028 + }, + { + "epoch": 15.654788418708241, + "grad_norm": 18.24901580810547, + "learning_rate": 1e-06, + "loss": 0.4294, + "num_input_tokens_seen": 393845576, + "step": 7029 + }, + { + "epoch": 15.654788418708241, + "loss": 0.337370365858078, + "loss_ce": 9.009480709210038e-05, + "loss_iou": 0.150390625, + "loss_num": 0.007293701171875, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 393845576, + "step": 7029 + }, + { + "epoch": 15.657015590200446, + "grad_norm": 18.35692596435547, + "learning_rate": 1e-06, + "loss": 0.3768, + "num_input_tokens_seen": 393901840, + "step": 7030 + }, + { + "epoch": 15.657015590200446, + "loss": 0.3451303243637085, + "loss_ce": 0.00015963747864589095, + "loss_iou": 0.158203125, + "loss_num": 0.005462646484375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 393901840, + "step": 7030 + }, + { + "epoch": 15.659242761692651, + "grad_norm": 19.633066177368164, + "learning_rate": 1e-06, + "loss": 0.3587, + "num_input_tokens_seen": 393956624, + "step": 7031 + }, + { + "epoch": 15.659242761692651, + "loss": 0.23722299933433533, + "loss_ce": 0.00010140843369299546, + "loss_iou": 0.1083984375, + "loss_num": 0.004058837890625, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 393956624, + "step": 7031 + }, + { + "epoch": 15.661469933184856, + "grad_norm": 13.26457691192627, + "learning_rate": 1e-06, + "loss": 0.4878, + "num_input_tokens_seen": 394015648, + "step": 7032 + }, + { + "epoch": 15.661469933184856, + "loss": 0.5062072277069092, + "loss_ce": 0.0003479141159914434, + "loss_iou": 0.2041015625, + "loss_num": 0.01953125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 394015648, + "step": 7032 + }, + { + "epoch": 15.66369710467706, + "grad_norm": 15.56800651550293, + "learning_rate": 1e-06, + "loss": 0.4825, + "num_input_tokens_seen": 394068676, + "step": 7033 + }, + { + "epoch": 15.66369710467706, + "loss": 0.38187873363494873, + "loss_ce": 0.0001038167392835021, + "loss_iou": 0.1552734375, + "loss_num": 0.01409912109375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 394068676, + "step": 7033 + }, + { + "epoch": 15.665924276169266, + "grad_norm": 13.747230529785156, + "learning_rate": 1e-06, + "loss": 0.502, + "num_input_tokens_seen": 394125676, + "step": 7034 + }, + { + "epoch": 15.665924276169266, + "loss": 0.47450119256973267, + "loss_ce": 0.0001359964517178014, + "loss_iou": 0.208984375, + "loss_num": 0.01129150390625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 394125676, + "step": 7034 + }, + { + "epoch": 15.66815144766147, + "grad_norm": 13.67151165008545, + "learning_rate": 1e-06, + "loss": 0.3227, + "num_input_tokens_seen": 394182648, + "step": 7035 + }, + { + "epoch": 15.66815144766147, + "loss": 0.3714517056941986, + "loss_ce": 0.0001138186635216698, + "loss_iou": 0.1689453125, + "loss_num": 0.006683349609375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 394182648, + "step": 7035 + }, + { + "epoch": 15.670378619153675, + "grad_norm": 27.722789764404297, + "learning_rate": 1e-06, + "loss": 0.4209, + "num_input_tokens_seen": 394236828, + "step": 7036 + }, + { + "epoch": 15.670378619153675, + "loss": 0.4359535276889801, + "loss_ce": 0.00010151089372811839, + "loss_iou": 0.189453125, + "loss_num": 0.011474609375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 394236828, + "step": 7036 + }, + { + "epoch": 15.67260579064588, + "grad_norm": 18.144771575927734, + "learning_rate": 1e-06, + "loss": 0.4676, + "num_input_tokens_seen": 394291552, + "step": 7037 + }, + { + "epoch": 15.67260579064588, + "loss": 0.4619390070438385, + "loss_ce": 8.598440763307735e-05, + "loss_iou": 0.201171875, + "loss_num": 0.011962890625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 394291552, + "step": 7037 + }, + { + "epoch": 15.674832962138085, + "grad_norm": 15.883332252502441, + "learning_rate": 1e-06, + "loss": 0.3619, + "num_input_tokens_seen": 394349988, + "step": 7038 + }, + { + "epoch": 15.674832962138085, + "loss": 0.3775450587272644, + "loss_ce": 0.00010363116598455235, + "loss_iou": 0.166015625, + "loss_num": 0.00897216796875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 394349988, + "step": 7038 + }, + { + "epoch": 15.67706013363029, + "grad_norm": 18.735139846801758, + "learning_rate": 1e-06, + "loss": 0.3232, + "num_input_tokens_seen": 394405816, + "step": 7039 + }, + { + "epoch": 15.67706013363029, + "loss": 0.30405598878860474, + "loss_ce": 0.0001009251645882614, + "loss_iou": 0.12890625, + "loss_num": 0.0093994140625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 394405816, + "step": 7039 + }, + { + "epoch": 15.679287305122495, + "grad_norm": 32.7242546081543, + "learning_rate": 1e-06, + "loss": 0.5001, + "num_input_tokens_seen": 394458988, + "step": 7040 + }, + { + "epoch": 15.679287305122495, + "loss": 0.6007954478263855, + "loss_ce": 8.743096987018362e-05, + "loss_iou": 0.265625, + "loss_num": 0.0137939453125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 394458988, + "step": 7040 + }, + { + "epoch": 15.6815144766147, + "grad_norm": 17.01080322265625, + "learning_rate": 1e-06, + "loss": 0.4883, + "num_input_tokens_seen": 394517044, + "step": 7041 + }, + { + "epoch": 15.6815144766147, + "loss": 0.6097177267074585, + "loss_ce": 9.863986633718014e-05, + "loss_iou": 0.26953125, + "loss_num": 0.014404296875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 394517044, + "step": 7041 + }, + { + "epoch": 15.683741648106905, + "grad_norm": 23.347501754760742, + "learning_rate": 1e-06, + "loss": 0.4004, + "num_input_tokens_seen": 394575972, + "step": 7042 + }, + { + "epoch": 15.683741648106905, + "loss": 0.424541175365448, + "loss_ce": 0.00010270069469697773, + "loss_iou": 0.193359375, + "loss_num": 0.007659912109375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 394575972, + "step": 7042 + }, + { + "epoch": 15.68596881959911, + "grad_norm": 17.75396156311035, + "learning_rate": 1e-06, + "loss": 0.4391, + "num_input_tokens_seen": 394631332, + "step": 7043 + }, + { + "epoch": 15.68596881959911, + "loss": 0.5316604375839233, + "loss_ce": 0.0010207871673628688, + "loss_iou": 0.2275390625, + "loss_num": 0.01519775390625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 394631332, + "step": 7043 + }, + { + "epoch": 15.688195991091314, + "grad_norm": 20.462841033935547, + "learning_rate": 1e-06, + "loss": 0.2736, + "num_input_tokens_seen": 394687344, + "step": 7044 + }, + { + "epoch": 15.688195991091314, + "loss": 0.30839377641677856, + "loss_ce": 0.0001052148436428979, + "loss_iou": 0.12890625, + "loss_num": 0.0101318359375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 394687344, + "step": 7044 + }, + { + "epoch": 15.690423162583519, + "grad_norm": 17.69515037536621, + "learning_rate": 1e-06, + "loss": 0.5623, + "num_input_tokens_seen": 394742256, + "step": 7045 + }, + { + "epoch": 15.690423162583519, + "loss": 0.6370512247085571, + "loss_ce": 0.00011880889360327274, + "loss_iou": 0.279296875, + "loss_num": 0.0155029296875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 394742256, + "step": 7045 + }, + { + "epoch": 15.692650334075724, + "grad_norm": 19.867359161376953, + "learning_rate": 1e-06, + "loss": 0.4736, + "num_input_tokens_seen": 394796212, + "step": 7046 + }, + { + "epoch": 15.692650334075724, + "loss": 0.5707098245620728, + "loss_ce": 0.00015316563076339662, + "loss_iou": 0.2353515625, + "loss_num": 0.0198974609375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 394796212, + "step": 7046 + }, + { + "epoch": 15.694877505567929, + "grad_norm": 17.22439193725586, + "learning_rate": 1e-06, + "loss": 0.5096, + "num_input_tokens_seen": 394849460, + "step": 7047 + }, + { + "epoch": 15.694877505567929, + "loss": 0.4576007127761841, + "loss_ce": 8.11615027487278e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0091552734375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 394849460, + "step": 7047 + }, + { + "epoch": 15.697104677060134, + "grad_norm": 19.11503791809082, + "learning_rate": 1e-06, + "loss": 0.3946, + "num_input_tokens_seen": 394907576, + "step": 7048 + }, + { + "epoch": 15.697104677060134, + "loss": 0.29610675573349, + "loss_ce": 8.626521594123915e-05, + "loss_iou": 0.1328125, + "loss_num": 0.006256103515625, + "loss_xval": 0.296875, + "num_input_tokens_seen": 394907576, + "step": 7048 + }, + { + "epoch": 15.699331848552339, + "grad_norm": 14.611241340637207, + "learning_rate": 1e-06, + "loss": 0.3639, + "num_input_tokens_seen": 394963124, + "step": 7049 + }, + { + "epoch": 15.699331848552339, + "loss": 0.4972745180130005, + "loss_ce": 8.216071000788361e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.015625, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 394963124, + "step": 7049 + }, + { + "epoch": 15.701559020044543, + "grad_norm": 22.508859634399414, + "learning_rate": 1e-06, + "loss": 0.4025, + "num_input_tokens_seen": 395020276, + "step": 7050 + }, + { + "epoch": 15.701559020044543, + "loss": 0.35203513503074646, + "loss_ce": 0.00010644704889273271, + "loss_iou": 0.154296875, + "loss_num": 0.0086669921875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 395020276, + "step": 7050 + }, + { + "epoch": 15.703786191536748, + "grad_norm": 24.426239013671875, + "learning_rate": 1e-06, + "loss": 0.5065, + "num_input_tokens_seen": 395078896, + "step": 7051 + }, + { + "epoch": 15.703786191536748, + "loss": 0.530193030834198, + "loss_ce": 0.00010265262972097844, + "loss_iou": 0.2333984375, + "loss_num": 0.0128173828125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 395078896, + "step": 7051 + }, + { + "epoch": 15.706013363028953, + "grad_norm": 21.90961456298828, + "learning_rate": 1e-06, + "loss": 0.6311, + "num_input_tokens_seen": 395137768, + "step": 7052 + }, + { + "epoch": 15.706013363028953, + "loss": 0.6914348006248474, + "loss_ce": 8.963010623119771e-05, + "loss_iou": 0.267578125, + "loss_num": 0.03125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 395137768, + "step": 7052 + }, + { + "epoch": 15.708240534521158, + "grad_norm": 17.655078887939453, + "learning_rate": 1e-06, + "loss": 0.4446, + "num_input_tokens_seen": 395194468, + "step": 7053 + }, + { + "epoch": 15.708240534521158, + "loss": 0.49838685989379883, + "loss_ce": 9.587279055267572e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0162353515625, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 395194468, + "step": 7053 + }, + { + "epoch": 15.710467706013363, + "grad_norm": 16.85371971130371, + "learning_rate": 1e-06, + "loss": 0.7519, + "num_input_tokens_seen": 395248904, + "step": 7054 + }, + { + "epoch": 15.710467706013363, + "loss": 0.6303013563156128, + "loss_ce": 0.0001133399928221479, + "loss_iou": 0.248046875, + "loss_num": 0.027099609375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 395248904, + "step": 7054 + }, + { + "epoch": 15.712694877505568, + "grad_norm": 18.303388595581055, + "learning_rate": 1e-06, + "loss": 0.349, + "num_input_tokens_seen": 395306728, + "step": 7055 + }, + { + "epoch": 15.712694877505568, + "loss": 0.32140427827835083, + "loss_ce": 0.00011521350825205445, + "loss_iou": 0.13671875, + "loss_num": 0.009765625, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 395306728, + "step": 7055 + }, + { + "epoch": 15.714922048997773, + "grad_norm": 16.185035705566406, + "learning_rate": 1e-06, + "loss": 0.4021, + "num_input_tokens_seen": 395362652, + "step": 7056 + }, + { + "epoch": 15.714922048997773, + "loss": 0.24985191226005554, + "loss_ce": 9.60544275585562e-05, + "loss_iou": 0.1103515625, + "loss_num": 0.005706787109375, + "loss_xval": 0.25, + "num_input_tokens_seen": 395362652, + "step": 7056 + }, + { + "epoch": 15.717149220489977, + "grad_norm": 12.218123435974121, + "learning_rate": 1e-06, + "loss": 0.4379, + "num_input_tokens_seen": 395419380, + "step": 7057 + }, + { + "epoch": 15.717149220489977, + "loss": 0.4651813805103302, + "loss_ce": 9.347945888293907e-05, + "loss_iou": 0.203125, + "loss_num": 0.0115966796875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 395419380, + "step": 7057 + }, + { + "epoch": 15.719376391982182, + "grad_norm": 19.603456497192383, + "learning_rate": 1e-06, + "loss": 0.3471, + "num_input_tokens_seen": 395475324, + "step": 7058 + }, + { + "epoch": 15.719376391982182, + "loss": 0.33774223923683167, + "loss_ce": 9.575536387274042e-05, + "loss_iou": 0.154296875, + "loss_num": 0.005828857421875, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 395475324, + "step": 7058 + }, + { + "epoch": 15.721603563474387, + "grad_norm": 25.467702865600586, + "learning_rate": 1e-06, + "loss": 0.379, + "num_input_tokens_seen": 395532144, + "step": 7059 + }, + { + "epoch": 15.721603563474387, + "loss": 0.3512808680534363, + "loss_ce": 8.456782961729914e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.010498046875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 395532144, + "step": 7059 + }, + { + "epoch": 15.723830734966592, + "grad_norm": 18.630399703979492, + "learning_rate": 1e-06, + "loss": 0.3926, + "num_input_tokens_seen": 395587848, + "step": 7060 + }, + { + "epoch": 15.723830734966592, + "loss": 0.4616337716579437, + "loss_ce": 8.594746032031253e-05, + "loss_iou": 0.1953125, + "loss_num": 0.01422119140625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 395587848, + "step": 7060 + }, + { + "epoch": 15.726057906458797, + "grad_norm": 14.84992504119873, + "learning_rate": 1e-06, + "loss": 0.6171, + "num_input_tokens_seen": 395642736, + "step": 7061 + }, + { + "epoch": 15.726057906458797, + "loss": 0.6872535943984985, + "loss_ce": 0.0001198096142616123, + "loss_iou": 0.265625, + "loss_num": 0.031494140625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 395642736, + "step": 7061 + }, + { + "epoch": 15.728285077951002, + "grad_norm": 23.57808494567871, + "learning_rate": 1e-06, + "loss": 0.4608, + "num_input_tokens_seen": 395699808, + "step": 7062 + }, + { + "epoch": 15.728285077951002, + "loss": 0.6477620601654053, + "loss_ce": 0.00011803221423178911, + "loss_iou": 0.298828125, + "loss_num": 0.01031494140625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 395699808, + "step": 7062 + }, + { + "epoch": 15.730512249443207, + "grad_norm": 15.21978759765625, + "learning_rate": 1e-06, + "loss": 0.4271, + "num_input_tokens_seen": 395757824, + "step": 7063 + }, + { + "epoch": 15.730512249443207, + "loss": 0.3883220851421356, + "loss_ce": 7.74528380134143e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00799560546875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 395757824, + "step": 7063 + }, + { + "epoch": 15.732739420935411, + "grad_norm": 29.952592849731445, + "learning_rate": 1e-06, + "loss": 0.3861, + "num_input_tokens_seen": 395815416, + "step": 7064 + }, + { + "epoch": 15.732739420935411, + "loss": 0.37809085845947266, + "loss_ce": 0.00010012884740717709, + "loss_iou": 0.1728515625, + "loss_num": 0.006500244140625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 395815416, + "step": 7064 + }, + { + "epoch": 15.734966592427616, + "grad_norm": 16.03706169128418, + "learning_rate": 1e-06, + "loss": 0.4989, + "num_input_tokens_seen": 395868684, + "step": 7065 + }, + { + "epoch": 15.734966592427616, + "loss": 0.523518443107605, + "loss_ce": 8.08899276307784e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.0142822265625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 395868684, + "step": 7065 + }, + { + "epoch": 15.737193763919821, + "grad_norm": 16.18170928955078, + "learning_rate": 1e-06, + "loss": 0.3421, + "num_input_tokens_seen": 395925000, + "step": 7066 + }, + { + "epoch": 15.737193763919821, + "loss": 0.4338296949863434, + "loss_ce": 0.00011389805149519816, + "loss_iou": 0.1982421875, + "loss_num": 0.007568359375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 395925000, + "step": 7066 + }, + { + "epoch": 15.739420935412026, + "grad_norm": 11.560032844543457, + "learning_rate": 1e-06, + "loss": 0.3395, + "num_input_tokens_seen": 395979500, + "step": 7067 + }, + { + "epoch": 15.739420935412026, + "loss": 0.26005110144615173, + "loss_ce": 0.00010237214155495167, + "loss_iou": 0.107421875, + "loss_num": 0.009033203125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 395979500, + "step": 7067 + }, + { + "epoch": 15.74164810690423, + "grad_norm": 24.783843994140625, + "learning_rate": 1e-06, + "loss": 0.3423, + "num_input_tokens_seen": 396037980, + "step": 7068 + }, + { + "epoch": 15.74164810690423, + "loss": 0.2767605781555176, + "loss_ce": 8.823502867016941e-05, + "loss_iou": 0.10791015625, + "loss_num": 0.01214599609375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 396037980, + "step": 7068 + }, + { + "epoch": 15.743875278396436, + "grad_norm": 13.664560317993164, + "learning_rate": 1e-06, + "loss": 0.3566, + "num_input_tokens_seen": 396095480, + "step": 7069 + }, + { + "epoch": 15.743875278396436, + "loss": 0.30287209153175354, + "loss_ce": 7.669955084566027e-05, + "loss_iou": 0.1328125, + "loss_num": 0.00750732421875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 396095480, + "step": 7069 + }, + { + "epoch": 15.74610244988864, + "grad_norm": 17.73408317565918, + "learning_rate": 1e-06, + "loss": 0.4481, + "num_input_tokens_seen": 396152320, + "step": 7070 + }, + { + "epoch": 15.74610244988864, + "loss": 0.5743053555488586, + "loss_ce": 8.658809383632615e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.019287109375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 396152320, + "step": 7070 + }, + { + "epoch": 15.748329621380847, + "grad_norm": 16.201440811157227, + "learning_rate": 1e-06, + "loss": 0.4466, + "num_input_tokens_seen": 396212012, + "step": 7071 + }, + { + "epoch": 15.748329621380847, + "loss": 0.5690320730209351, + "loss_ce": 0.00018442686996422708, + "loss_iou": 0.232421875, + "loss_num": 0.0211181640625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 396212012, + "step": 7071 + }, + { + "epoch": 15.750556792873052, + "grad_norm": 15.789632797241211, + "learning_rate": 1e-06, + "loss": 0.3196, + "num_input_tokens_seen": 396266208, + "step": 7072 + }, + { + "epoch": 15.750556792873052, + "loss": 0.30217671394348145, + "loss_ce": 0.00011373275629011914, + "loss_iou": 0.126953125, + "loss_num": 0.00970458984375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 396266208, + "step": 7072 + }, + { + "epoch": 15.752783964365257, + "grad_norm": 18.404560089111328, + "learning_rate": 1e-06, + "loss": 0.4347, + "num_input_tokens_seen": 396325024, + "step": 7073 + }, + { + "epoch": 15.752783964365257, + "loss": 0.2929561734199524, + "loss_ce": 7.899131742306054e-05, + "loss_iou": 0.12060546875, + "loss_num": 0.0103759765625, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 396325024, + "step": 7073 + }, + { + "epoch": 15.755011135857462, + "grad_norm": 24.089750289916992, + "learning_rate": 1e-06, + "loss": 0.5156, + "num_input_tokens_seen": 396382244, + "step": 7074 + }, + { + "epoch": 15.755011135857462, + "loss": 0.2893972396850586, + "loss_ce": 9.059577132575214e-05, + "loss_iou": 0.125, + "loss_num": 0.00799560546875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 396382244, + "step": 7074 + }, + { + "epoch": 15.757238307349667, + "grad_norm": 13.719793319702148, + "learning_rate": 1e-06, + "loss": 0.3745, + "num_input_tokens_seen": 396438428, + "step": 7075 + }, + { + "epoch": 15.757238307349667, + "loss": 0.41726016998291016, + "loss_ce": 0.00014590269711334258, + "loss_iou": 0.1650390625, + "loss_num": 0.017333984375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 396438428, + "step": 7075 + }, + { + "epoch": 15.759465478841872, + "grad_norm": 17.886260986328125, + "learning_rate": 1e-06, + "loss": 0.5086, + "num_input_tokens_seen": 396492232, + "step": 7076 + }, + { + "epoch": 15.759465478841872, + "loss": 0.2809308171272278, + "loss_ce": 0.00010807791841216385, + "loss_iou": 0.11083984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 396492232, + "step": 7076 + }, + { + "epoch": 15.761692650334076, + "grad_norm": 27.26051902770996, + "learning_rate": 1e-06, + "loss": 0.4125, + "num_input_tokens_seen": 396546276, + "step": 7077 + }, + { + "epoch": 15.761692650334076, + "loss": 0.391597181558609, + "loss_ce": 0.00011767500836867839, + "loss_iou": 0.1552734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.390625, + "num_input_tokens_seen": 396546276, + "step": 7077 + }, + { + "epoch": 15.763919821826281, + "grad_norm": 19.251379013061523, + "learning_rate": 1e-06, + "loss": 0.6037, + "num_input_tokens_seen": 396600784, + "step": 7078 + }, + { + "epoch": 15.763919821826281, + "loss": 0.5241034030914307, + "loss_ce": 0.00011657152936095372, + "loss_iou": 0.234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 396600784, + "step": 7078 + }, + { + "epoch": 15.766146993318486, + "grad_norm": 15.035192489624023, + "learning_rate": 1e-06, + "loss": 0.579, + "num_input_tokens_seen": 396656868, + "step": 7079 + }, + { + "epoch": 15.766146993318486, + "loss": 0.699531078338623, + "loss_ce": 0.00031230467720888555, + "loss_iou": 0.296875, + "loss_num": 0.021240234375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 396656868, + "step": 7079 + }, + { + "epoch": 15.768374164810691, + "grad_norm": 23.482166290283203, + "learning_rate": 1e-06, + "loss": 0.4239, + "num_input_tokens_seen": 396711564, + "step": 7080 + }, + { + "epoch": 15.768374164810691, + "loss": 0.4484688639640808, + "loss_ce": 0.00010461645433679223, + "loss_iou": 0.18359375, + "loss_num": 0.0162353515625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 396711564, + "step": 7080 + }, + { + "epoch": 15.770601336302896, + "grad_norm": 15.677742958068848, + "learning_rate": 1e-06, + "loss": 0.4709, + "num_input_tokens_seen": 396769364, + "step": 7081 + }, + { + "epoch": 15.770601336302896, + "loss": 0.4059576392173767, + "loss_ce": 0.00016540827346034348, + "loss_iou": 0.177734375, + "loss_num": 0.01019287109375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 396769364, + "step": 7081 + }, + { + "epoch": 15.7728285077951, + "grad_norm": 28.17963409423828, + "learning_rate": 1e-06, + "loss": 0.4652, + "num_input_tokens_seen": 396826500, + "step": 7082 + }, + { + "epoch": 15.7728285077951, + "loss": 0.4379936456680298, + "loss_ce": 0.00012745258572977036, + "loss_iou": 0.1728515625, + "loss_num": 0.018310546875, + "loss_xval": 0.4375, + "num_input_tokens_seen": 396826500, + "step": 7082 + }, + { + "epoch": 15.775055679287306, + "grad_norm": 15.786062240600586, + "learning_rate": 1e-06, + "loss": 0.4024, + "num_input_tokens_seen": 396881984, + "step": 7083 + }, + { + "epoch": 15.775055679287306, + "loss": 0.3337669372558594, + "loss_ce": 0.00014876520435791463, + "loss_iou": 0.1533203125, + "loss_num": 0.00537109375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 396881984, + "step": 7083 + }, + { + "epoch": 15.77728285077951, + "grad_norm": 18.75821304321289, + "learning_rate": 1e-06, + "loss": 0.5366, + "num_input_tokens_seen": 396937636, + "step": 7084 + }, + { + "epoch": 15.77728285077951, + "loss": 0.43820372223854065, + "loss_ce": 9.338198287878186e-05, + "loss_iou": 0.197265625, + "loss_num": 0.00872802734375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 396937636, + "step": 7084 + }, + { + "epoch": 15.779510022271715, + "grad_norm": 21.541553497314453, + "learning_rate": 1e-06, + "loss": 0.3054, + "num_input_tokens_seen": 396991964, + "step": 7085 + }, + { + "epoch": 15.779510022271715, + "loss": 0.25417301058769226, + "loss_ce": 0.0002667623048182577, + "loss_iou": 0.10693359375, + "loss_num": 0.008056640625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 396991964, + "step": 7085 + }, + { + "epoch": 15.78173719376392, + "grad_norm": 21.87734603881836, + "learning_rate": 1e-06, + "loss": 0.3177, + "num_input_tokens_seen": 397048488, + "step": 7086 + }, + { + "epoch": 15.78173719376392, + "loss": 0.32540494203567505, + "loss_ce": 8.75709592946805e-05, + "loss_iou": 0.134765625, + "loss_num": 0.01116943359375, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 397048488, + "step": 7086 + }, + { + "epoch": 15.783964365256125, + "grad_norm": 21.914344787597656, + "learning_rate": 1e-06, + "loss": 0.3939, + "num_input_tokens_seen": 397103460, + "step": 7087 + }, + { + "epoch": 15.783964365256125, + "loss": 0.4146553874015808, + "loss_ce": 0.00010459712939336896, + "loss_iou": 0.166015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 397103460, + "step": 7087 + }, + { + "epoch": 15.78619153674833, + "grad_norm": 21.92269515991211, + "learning_rate": 1e-06, + "loss": 0.3736, + "num_input_tokens_seen": 397158120, + "step": 7088 + }, + { + "epoch": 15.78619153674833, + "loss": 0.3409271240234375, + "loss_ce": 0.00010680149716790766, + "loss_iou": 0.1611328125, + "loss_num": 0.003692626953125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 397158120, + "step": 7088 + }, + { + "epoch": 15.788418708240535, + "grad_norm": 19.19719696044922, + "learning_rate": 1e-06, + "loss": 0.4235, + "num_input_tokens_seen": 397215380, + "step": 7089 + }, + { + "epoch": 15.788418708240535, + "loss": 0.45488834381103516, + "loss_ce": 0.00017644368926994503, + "loss_iou": 0.2041015625, + "loss_num": 0.00946044921875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 397215380, + "step": 7089 + }, + { + "epoch": 15.79064587973274, + "grad_norm": 21.13319969177246, + "learning_rate": 1e-06, + "loss": 0.4486, + "num_input_tokens_seen": 397270500, + "step": 7090 + }, + { + "epoch": 15.79064587973274, + "loss": 0.3929722011089325, + "loss_ce": 0.00014992158685345203, + "loss_iou": 0.17578125, + "loss_num": 0.0081787109375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 397270500, + "step": 7090 + }, + { + "epoch": 15.792873051224944, + "grad_norm": 13.024203300476074, + "learning_rate": 1e-06, + "loss": 0.4421, + "num_input_tokens_seen": 397328104, + "step": 7091 + }, + { + "epoch": 15.792873051224944, + "loss": 0.5068001747131348, + "loss_ce": 8.632005483377725e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.01177978515625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 397328104, + "step": 7091 + }, + { + "epoch": 15.79510022271715, + "grad_norm": 17.045452117919922, + "learning_rate": 1e-06, + "loss": 0.3977, + "num_input_tokens_seen": 397384764, + "step": 7092 + }, + { + "epoch": 15.79510022271715, + "loss": 0.3076792359352112, + "loss_ce": 9.255674376618117e-05, + "loss_iou": 0.138671875, + "loss_num": 0.00592041015625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 397384764, + "step": 7092 + }, + { + "epoch": 15.797327394209354, + "grad_norm": 15.998059272766113, + "learning_rate": 1e-06, + "loss": 0.3722, + "num_input_tokens_seen": 397438400, + "step": 7093 + }, + { + "epoch": 15.797327394209354, + "loss": 0.3463239073753357, + "loss_ce": 0.00013250944903120399, + "loss_iou": 0.1474609375, + "loss_num": 0.01007080078125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 397438400, + "step": 7093 + }, + { + "epoch": 15.799554565701559, + "grad_norm": 19.02997589111328, + "learning_rate": 1e-06, + "loss": 0.4442, + "num_input_tokens_seen": 397496316, + "step": 7094 + }, + { + "epoch": 15.799554565701559, + "loss": 0.39536595344543457, + "loss_ce": 0.0001022747892420739, + "loss_iou": 0.1796875, + "loss_num": 0.0069580078125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 397496316, + "step": 7094 + }, + { + "epoch": 15.801781737193764, + "grad_norm": 22.2495059967041, + "learning_rate": 1e-06, + "loss": 0.482, + "num_input_tokens_seen": 397551668, + "step": 7095 + }, + { + "epoch": 15.801781737193764, + "loss": 0.5242829918861389, + "loss_ce": 0.00029618252301588655, + "loss_iou": 0.2138671875, + "loss_num": 0.0194091796875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 397551668, + "step": 7095 + }, + { + "epoch": 15.804008908685969, + "grad_norm": 14.987038612365723, + "learning_rate": 1e-06, + "loss": 0.4245, + "num_input_tokens_seen": 397609464, + "step": 7096 + }, + { + "epoch": 15.804008908685969, + "loss": 0.41660401225090027, + "loss_ce": 0.00010012378334067762, + "loss_iou": 0.1748046875, + "loss_num": 0.01336669921875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 397609464, + "step": 7096 + }, + { + "epoch": 15.806236080178174, + "grad_norm": 14.618922233581543, + "learning_rate": 1e-06, + "loss": 0.4714, + "num_input_tokens_seen": 397667576, + "step": 7097 + }, + { + "epoch": 15.806236080178174, + "loss": 0.5953065156936646, + "loss_ce": 9.167171083390713e-05, + "loss_iou": 0.275390625, + "loss_num": 0.00848388671875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 397667576, + "step": 7097 + }, + { + "epoch": 15.808463251670378, + "grad_norm": 14.448540687561035, + "learning_rate": 1e-06, + "loss": 0.4018, + "num_input_tokens_seen": 397722704, + "step": 7098 + }, + { + "epoch": 15.808463251670378, + "loss": 0.486581951379776, + "loss_ce": 0.00010122812818735838, + "loss_iou": 0.1943359375, + "loss_num": 0.0196533203125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 397722704, + "step": 7098 + }, + { + "epoch": 15.810690423162583, + "grad_norm": 15.517051696777344, + "learning_rate": 1e-06, + "loss": 0.3963, + "num_input_tokens_seen": 397776580, + "step": 7099 + }, + { + "epoch": 15.810690423162583, + "loss": 0.36538606882095337, + "loss_ce": 0.00012117931328248233, + "loss_iou": 0.158203125, + "loss_num": 0.00982666015625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 397776580, + "step": 7099 + }, + { + "epoch": 15.812917594654788, + "grad_norm": 23.810832977294922, + "learning_rate": 1e-06, + "loss": 0.4988, + "num_input_tokens_seen": 397832360, + "step": 7100 + }, + { + "epoch": 15.812917594654788, + "loss": 0.6032587885856628, + "loss_ce": 0.00010939198546111584, + "loss_iou": 0.265625, + "loss_num": 0.01385498046875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 397832360, + "step": 7100 + }, + { + "epoch": 15.815144766146993, + "grad_norm": 15.830489158630371, + "learning_rate": 1e-06, + "loss": 0.5576, + "num_input_tokens_seen": 397888856, + "step": 7101 + }, + { + "epoch": 15.815144766146993, + "loss": 0.607633113861084, + "loss_ce": 8.912877819966525e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.0228271484375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 397888856, + "step": 7101 + }, + { + "epoch": 15.817371937639198, + "grad_norm": 19.18913459777832, + "learning_rate": 1e-06, + "loss": 0.4931, + "num_input_tokens_seen": 397941060, + "step": 7102 + }, + { + "epoch": 15.817371937639198, + "loss": 0.3700322210788727, + "loss_ce": 9.815287921810523e-05, + "loss_iou": 0.1484375, + "loss_num": 0.0145263671875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 397941060, + "step": 7102 + }, + { + "epoch": 15.819599109131403, + "grad_norm": 13.465466499328613, + "learning_rate": 1e-06, + "loss": 0.3357, + "num_input_tokens_seen": 397996392, + "step": 7103 + }, + { + "epoch": 15.819599109131403, + "loss": 0.395857036113739, + "loss_ce": 0.00010508089326322079, + "loss_iou": 0.177734375, + "loss_num": 0.00811767578125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 397996392, + "step": 7103 + }, + { + "epoch": 15.821826280623608, + "grad_norm": 17.05833625793457, + "learning_rate": 1e-06, + "loss": 0.4311, + "num_input_tokens_seen": 398053248, + "step": 7104 + }, + { + "epoch": 15.821826280623608, + "loss": 0.4999108910560608, + "loss_ce": 0.0001550541928736493, + "loss_iou": 0.2060546875, + "loss_num": 0.0177001953125, + "loss_xval": 0.5, + "num_input_tokens_seen": 398053248, + "step": 7104 + }, + { + "epoch": 15.824053452115812, + "grad_norm": 20.213592529296875, + "learning_rate": 1e-06, + "loss": 0.4103, + "num_input_tokens_seen": 398110088, + "step": 7105 + }, + { + "epoch": 15.824053452115812, + "loss": 0.5278196334838867, + "loss_ce": 0.00010968356218654662, + "loss_iou": 0.224609375, + "loss_num": 0.015625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 398110088, + "step": 7105 + }, + { + "epoch": 15.826280623608017, + "grad_norm": 17.705123901367188, + "learning_rate": 1e-06, + "loss": 0.515, + "num_input_tokens_seen": 398168524, + "step": 7106 + }, + { + "epoch": 15.826280623608017, + "loss": 0.39392590522766113, + "loss_ce": 0.0007374440901912749, + "loss_iou": 0.1630859375, + "loss_num": 0.0133056640625, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 398168524, + "step": 7106 + }, + { + "epoch": 15.828507795100222, + "grad_norm": 11.036231994628906, + "learning_rate": 1e-06, + "loss": 0.5384, + "num_input_tokens_seen": 398226688, + "step": 7107 + }, + { + "epoch": 15.828507795100222, + "loss": 0.5651922225952148, + "loss_ce": 0.00012873421655967832, + "loss_iou": 0.2314453125, + "loss_num": 0.0205078125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 398226688, + "step": 7107 + }, + { + "epoch": 15.830734966592427, + "grad_norm": 19.0125789642334, + "learning_rate": 1e-06, + "loss": 0.4158, + "num_input_tokens_seen": 398282356, + "step": 7108 + }, + { + "epoch": 15.830734966592427, + "loss": 0.28864729404449463, + "loss_ce": 7.308388012461364e-05, + "loss_iou": 0.12451171875, + "loss_num": 0.00799560546875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 398282356, + "step": 7108 + }, + { + "epoch": 15.832962138084632, + "grad_norm": 21.866071701049805, + "learning_rate": 1e-06, + "loss": 0.4139, + "num_input_tokens_seen": 398339436, + "step": 7109 + }, + { + "epoch": 15.832962138084632, + "loss": 0.5395206809043884, + "loss_ce": 9.19756930670701e-05, + "loss_iou": 0.2421875, + "loss_num": 0.0108642578125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 398339436, + "step": 7109 + }, + { + "epoch": 15.835189309576837, + "grad_norm": 25.225812911987305, + "learning_rate": 1e-06, + "loss": 0.5883, + "num_input_tokens_seen": 398395324, + "step": 7110 + }, + { + "epoch": 15.835189309576837, + "loss": 0.5729167461395264, + "loss_ce": 0.00016287056496366858, + "loss_iou": 0.25390625, + "loss_num": 0.01318359375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 398395324, + "step": 7110 + }, + { + "epoch": 15.837416481069042, + "grad_norm": 17.75248908996582, + "learning_rate": 1e-06, + "loss": 0.3587, + "num_input_tokens_seen": 398450532, + "step": 7111 + }, + { + "epoch": 15.837416481069042, + "loss": 0.3196753263473511, + "loss_ce": 9.522202162770554e-05, + "loss_iou": 0.142578125, + "loss_num": 0.00677490234375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 398450532, + "step": 7111 + }, + { + "epoch": 15.839643652561247, + "grad_norm": 18.968854904174805, + "learning_rate": 1e-06, + "loss": 0.3703, + "num_input_tokens_seen": 398505144, + "step": 7112 + }, + { + "epoch": 15.839643652561247, + "loss": 0.3004349172115326, + "loss_ce": 8.092600910458714e-05, + "loss_iou": 0.134765625, + "loss_num": 0.006195068359375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 398505144, + "step": 7112 + }, + { + "epoch": 15.841870824053451, + "grad_norm": 18.018905639648438, + "learning_rate": 1e-06, + "loss": 0.3754, + "num_input_tokens_seen": 398560608, + "step": 7113 + }, + { + "epoch": 15.841870824053451, + "loss": 0.3709629476070404, + "loss_ce": 0.0001133316254708916, + "loss_iou": 0.1611328125, + "loss_num": 0.00958251953125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 398560608, + "step": 7113 + }, + { + "epoch": 15.844097995545656, + "grad_norm": 24.227947235107422, + "learning_rate": 1e-06, + "loss": 0.4965, + "num_input_tokens_seen": 398617888, + "step": 7114 + }, + { + "epoch": 15.844097995545656, + "loss": 0.5535033941268921, + "loss_ce": 9.764648712007329e-05, + "loss_iou": 0.2421875, + "loss_num": 0.013671875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 398617888, + "step": 7114 + }, + { + "epoch": 15.846325167037861, + "grad_norm": 23.27136993408203, + "learning_rate": 1e-06, + "loss": 0.58, + "num_input_tokens_seen": 398671732, + "step": 7115 + }, + { + "epoch": 15.846325167037861, + "loss": 0.7010478973388672, + "loss_ce": 0.00010489902342669666, + "loss_iou": 0.3046875, + "loss_num": 0.018310546875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 398671732, + "step": 7115 + }, + { + "epoch": 15.848552338530066, + "grad_norm": 18.09553337097168, + "learning_rate": 1e-06, + "loss": 0.4632, + "num_input_tokens_seen": 398728176, + "step": 7116 + }, + { + "epoch": 15.848552338530066, + "loss": 0.4016414284706116, + "loss_ce": 9.112786210607737e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.0125732421875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 398728176, + "step": 7116 + }, + { + "epoch": 15.85077951002227, + "grad_norm": 17.273834228515625, + "learning_rate": 1e-06, + "loss": 0.6624, + "num_input_tokens_seen": 398783004, + "step": 7117 + }, + { + "epoch": 15.85077951002227, + "loss": 0.753657341003418, + "loss_ce": 0.00011729233665391803, + "loss_iou": 0.26953125, + "loss_num": 0.042724609375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 398783004, + "step": 7117 + }, + { + "epoch": 15.853006681514476, + "grad_norm": 12.48859977722168, + "learning_rate": 1e-06, + "loss": 0.4666, + "num_input_tokens_seen": 398842220, + "step": 7118 + }, + { + "epoch": 15.853006681514476, + "loss": 0.3523826599121094, + "loss_ce": 8.77102866070345e-05, + "loss_iou": 0.138671875, + "loss_num": 0.01513671875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 398842220, + "step": 7118 + }, + { + "epoch": 15.855233853006682, + "grad_norm": 13.259708404541016, + "learning_rate": 1e-06, + "loss": 0.4148, + "num_input_tokens_seen": 398899104, + "step": 7119 + }, + { + "epoch": 15.855233853006682, + "loss": 0.5045324563980103, + "loss_ce": 0.00013790541561320424, + "loss_iou": 0.1923828125, + "loss_num": 0.0240478515625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 398899104, + "step": 7119 + }, + { + "epoch": 15.857461024498887, + "grad_norm": 18.383527755737305, + "learning_rate": 1e-06, + "loss": 0.4292, + "num_input_tokens_seen": 398952944, + "step": 7120 + }, + { + "epoch": 15.857461024498887, + "loss": 0.42771950364112854, + "loss_ce": 0.00010719949204940349, + "loss_iou": 0.2001953125, + "loss_num": 0.005645751953125, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 398952944, + "step": 7120 + }, + { + "epoch": 15.859688195991092, + "grad_norm": 44.14472579956055, + "learning_rate": 1e-06, + "loss": 0.4647, + "num_input_tokens_seen": 399007520, + "step": 7121 + }, + { + "epoch": 15.859688195991092, + "loss": 0.501062273979187, + "loss_ce": 8.574766980018467e-05, + "loss_iou": 0.201171875, + "loss_num": 0.0198974609375, + "loss_xval": 0.5, + "num_input_tokens_seen": 399007520, + "step": 7121 + }, + { + "epoch": 15.861915367483297, + "grad_norm": 19.604982376098633, + "learning_rate": 1e-06, + "loss": 0.5346, + "num_input_tokens_seen": 399062428, + "step": 7122 + }, + { + "epoch": 15.861915367483297, + "loss": 0.5388393998146057, + "loss_ce": 8.2079553976655e-05, + "loss_iou": 0.220703125, + "loss_num": 0.0194091796875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 399062428, + "step": 7122 + }, + { + "epoch": 15.864142538975502, + "grad_norm": 15.576171875, + "learning_rate": 1e-06, + "loss": 0.4607, + "num_input_tokens_seen": 399117136, + "step": 7123 + }, + { + "epoch": 15.864142538975502, + "loss": 0.6172924637794495, + "loss_ce": 0.00010496602772036567, + "loss_iou": 0.28515625, + "loss_num": 0.00933837890625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 399117136, + "step": 7123 + }, + { + "epoch": 15.866369710467707, + "grad_norm": 20.8918514251709, + "learning_rate": 1e-06, + "loss": 0.4582, + "num_input_tokens_seen": 399172568, + "step": 7124 + }, + { + "epoch": 15.866369710467707, + "loss": 0.3362141251564026, + "loss_ce": 9.351145854452625e-05, + "loss_iou": 0.142578125, + "loss_num": 0.01025390625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 399172568, + "step": 7124 + }, + { + "epoch": 15.868596881959911, + "grad_norm": 14.206762313842773, + "learning_rate": 1e-06, + "loss": 0.3939, + "num_input_tokens_seen": 399229784, + "step": 7125 + }, + { + "epoch": 15.868596881959911, + "loss": 0.3726486265659332, + "loss_ce": 9.005493484437466e-05, + "loss_iou": 0.154296875, + "loss_num": 0.01300048828125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 399229784, + "step": 7125 + }, + { + "epoch": 15.870824053452116, + "grad_norm": 40.32673263549805, + "learning_rate": 1e-06, + "loss": 0.4629, + "num_input_tokens_seen": 399285780, + "step": 7126 + }, + { + "epoch": 15.870824053452116, + "loss": 0.49388349056243896, + "loss_ce": 0.00010904869122896343, + "loss_iou": 0.234375, + "loss_num": 0.004852294921875, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 399285780, + "step": 7126 + }, + { + "epoch": 15.873051224944321, + "grad_norm": 13.4733304977417, + "learning_rate": 1e-06, + "loss": 0.5213, + "num_input_tokens_seen": 399342376, + "step": 7127 + }, + { + "epoch": 15.873051224944321, + "loss": 0.5921471118927002, + "loss_ce": 0.00010613157064653933, + "loss_iou": 0.265625, + "loss_num": 0.01226806640625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 399342376, + "step": 7127 + }, + { + "epoch": 15.875278396436526, + "grad_norm": 15.447736740112305, + "learning_rate": 1e-06, + "loss": 0.4606, + "num_input_tokens_seen": 399400404, + "step": 7128 + }, + { + "epoch": 15.875278396436526, + "loss": 0.5045484304428101, + "loss_ce": 0.00015390958287753165, + "loss_iou": 0.2158203125, + "loss_num": 0.0146484375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 399400404, + "step": 7128 + }, + { + "epoch": 15.877505567928731, + "grad_norm": 15.931106567382812, + "learning_rate": 1e-06, + "loss": 0.4239, + "num_input_tokens_seen": 399455484, + "step": 7129 + }, + { + "epoch": 15.877505567928731, + "loss": 0.4633306860923767, + "loss_ce": 0.00031800862052477896, + "loss_iou": 0.19921875, + "loss_num": 0.0128173828125, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 399455484, + "step": 7129 + }, + { + "epoch": 15.879732739420936, + "grad_norm": 17.251869201660156, + "learning_rate": 1e-06, + "loss": 0.3316, + "num_input_tokens_seen": 399510620, + "step": 7130 + }, + { + "epoch": 15.879732739420936, + "loss": 0.31160733103752136, + "loss_ce": 8.388744026888162e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.00946044921875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 399510620, + "step": 7130 + }, + { + "epoch": 15.88195991091314, + "grad_norm": 20.6621150970459, + "learning_rate": 1e-06, + "loss": 0.367, + "num_input_tokens_seen": 399567148, + "step": 7131 + }, + { + "epoch": 15.88195991091314, + "loss": 0.39595168828964233, + "loss_ce": 7.766317867208272e-05, + "loss_iou": 0.18359375, + "loss_num": 0.005767822265625, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 399567148, + "step": 7131 + }, + { + "epoch": 15.884187082405345, + "grad_norm": 20.26247215270996, + "learning_rate": 1e-06, + "loss": 0.5707, + "num_input_tokens_seen": 399619636, + "step": 7132 + }, + { + "epoch": 15.884187082405345, + "loss": 0.6330655217170715, + "loss_ce": 0.000130962478579022, + "loss_iou": 0.267578125, + "loss_num": 0.02001953125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 399619636, + "step": 7132 + }, + { + "epoch": 15.88641425389755, + "grad_norm": 19.18486213684082, + "learning_rate": 1e-06, + "loss": 0.5153, + "num_input_tokens_seen": 399674064, + "step": 7133 + }, + { + "epoch": 15.88641425389755, + "loss": 0.5503383874893188, + "loss_ce": 0.00010647171438904479, + "loss_iou": 0.228515625, + "loss_num": 0.0185546875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 399674064, + "step": 7133 + }, + { + "epoch": 15.888641425389755, + "grad_norm": 16.185976028442383, + "learning_rate": 1e-06, + "loss": 0.3956, + "num_input_tokens_seen": 399728452, + "step": 7134 + }, + { + "epoch": 15.888641425389755, + "loss": 0.24313417077064514, + "loss_ce": 9.21574974199757e-05, + "loss_iou": 0.10888671875, + "loss_num": 0.005096435546875, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 399728452, + "step": 7134 + }, + { + "epoch": 15.89086859688196, + "grad_norm": 18.77434539794922, + "learning_rate": 1e-06, + "loss": 0.5478, + "num_input_tokens_seen": 399783984, + "step": 7135 + }, + { + "epoch": 15.89086859688196, + "loss": 0.4603026211261749, + "loss_ce": 9.756326471688226e-05, + "loss_iou": 0.203125, + "loss_num": 0.0106201171875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 399783984, + "step": 7135 + }, + { + "epoch": 15.893095768374165, + "grad_norm": 17.734636306762695, + "learning_rate": 1e-06, + "loss": 0.53, + "num_input_tokens_seen": 399839268, + "step": 7136 + }, + { + "epoch": 15.893095768374165, + "loss": 0.6124453544616699, + "loss_ce": 0.00014065181312616915, + "loss_iou": 0.251953125, + "loss_num": 0.0216064453125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 399839268, + "step": 7136 + }, + { + "epoch": 15.89532293986637, + "grad_norm": 20.12199592590332, + "learning_rate": 1e-06, + "loss": 0.4646, + "num_input_tokens_seen": 399896836, + "step": 7137 + }, + { + "epoch": 15.89532293986637, + "loss": 0.5352360010147095, + "loss_ce": 7.976061169756576e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.013427734375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 399896836, + "step": 7137 + }, + { + "epoch": 15.897550111358575, + "grad_norm": 20.401044845581055, + "learning_rate": 1e-06, + "loss": 0.5685, + "num_input_tokens_seen": 399954484, + "step": 7138 + }, + { + "epoch": 15.897550111358575, + "loss": 0.512428879737854, + "loss_ce": 9.979259630199522e-05, + "loss_iou": 0.212890625, + "loss_num": 0.0172119140625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 399954484, + "step": 7138 + }, + { + "epoch": 15.89977728285078, + "grad_norm": 33.50958251953125, + "learning_rate": 1e-06, + "loss": 0.4488, + "num_input_tokens_seen": 400013840, + "step": 7139 + }, + { + "epoch": 15.89977728285078, + "loss": 0.44518011808395386, + "loss_ce": 0.00015754257037770003, + "loss_iou": 0.19140625, + "loss_num": 0.01263427734375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 400013840, + "step": 7139 + }, + { + "epoch": 15.902004454342984, + "grad_norm": 14.90358829498291, + "learning_rate": 1e-06, + "loss": 0.6293, + "num_input_tokens_seen": 400068508, + "step": 7140 + }, + { + "epoch": 15.902004454342984, + "loss": 0.7256982326507568, + "loss_ce": 0.00011228243238292634, + "loss_iou": 0.294921875, + "loss_num": 0.0277099609375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 400068508, + "step": 7140 + }, + { + "epoch": 15.90423162583519, + "grad_norm": 15.052915573120117, + "learning_rate": 1e-06, + "loss": 0.3924, + "num_input_tokens_seen": 400125004, + "step": 7141 + }, + { + "epoch": 15.90423162583519, + "loss": 0.49386632442474365, + "loss_ce": 0.00015298393554985523, + "loss_iou": 0.1884765625, + "loss_num": 0.0235595703125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 400125004, + "step": 7141 + }, + { + "epoch": 15.906458797327394, + "grad_norm": 21.69573402404785, + "learning_rate": 1e-06, + "loss": 0.4718, + "num_input_tokens_seen": 400179284, + "step": 7142 + }, + { + "epoch": 15.906458797327394, + "loss": 0.2900165319442749, + "loss_ce": 9.953400876838714e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.0067138671875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 400179284, + "step": 7142 + }, + { + "epoch": 15.908685968819599, + "grad_norm": 17.714357376098633, + "learning_rate": 1e-06, + "loss": 0.5839, + "num_input_tokens_seen": 400233780, + "step": 7143 + }, + { + "epoch": 15.908685968819599, + "loss": 0.5741137862205505, + "loss_ce": 0.0001391808473272249, + "loss_iou": 0.265625, + "loss_num": 0.0089111328125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 400233780, + "step": 7143 + }, + { + "epoch": 15.910913140311804, + "grad_norm": 32.93324279785156, + "learning_rate": 1e-06, + "loss": 0.5251, + "num_input_tokens_seen": 400291408, + "step": 7144 + }, + { + "epoch": 15.910913140311804, + "loss": 0.4064697325229645, + "loss_ce": 9.767108713276684e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.00897216796875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 400291408, + "step": 7144 + }, + { + "epoch": 15.913140311804009, + "grad_norm": 19.784011840820312, + "learning_rate": 1e-06, + "loss": 0.5329, + "num_input_tokens_seen": 400348664, + "step": 7145 + }, + { + "epoch": 15.913140311804009, + "loss": 0.4993619918823242, + "loss_ce": 9.443063754588366e-05, + "loss_iou": 0.216796875, + "loss_num": 0.01300048828125, + "loss_xval": 0.5, + "num_input_tokens_seen": 400348664, + "step": 7145 + }, + { + "epoch": 15.915367483296214, + "grad_norm": 17.10857391357422, + "learning_rate": 1e-06, + "loss": 0.4308, + "num_input_tokens_seen": 400402800, + "step": 7146 + }, + { + "epoch": 15.915367483296214, + "loss": 0.5981404781341553, + "loss_ce": 0.0002400614321231842, + "loss_iou": 0.251953125, + "loss_num": 0.0191650390625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 400402800, + "step": 7146 + }, + { + "epoch": 15.917594654788418, + "grad_norm": 18.01358413696289, + "learning_rate": 1e-06, + "loss": 0.4396, + "num_input_tokens_seen": 400460196, + "step": 7147 + }, + { + "epoch": 15.917594654788418, + "loss": 0.524014949798584, + "loss_ce": 8.919274841900915e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0211181640625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 400460196, + "step": 7147 + }, + { + "epoch": 15.919821826280623, + "grad_norm": 16.308366775512695, + "learning_rate": 1e-06, + "loss": 0.5253, + "num_input_tokens_seen": 400517080, + "step": 7148 + }, + { + "epoch": 15.919821826280623, + "loss": 0.4072136878967285, + "loss_ce": 0.00010919298802036792, + "loss_iou": 0.1591796875, + "loss_num": 0.017578125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 400517080, + "step": 7148 + }, + { + "epoch": 15.922048997772828, + "grad_norm": 14.491548538208008, + "learning_rate": 1e-06, + "loss": 0.4755, + "num_input_tokens_seen": 400572672, + "step": 7149 + }, + { + "epoch": 15.922048997772828, + "loss": 0.4918016791343689, + "loss_ce": 0.0001024569064611569, + "loss_iou": 0.224609375, + "loss_num": 0.0084228515625, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 400572672, + "step": 7149 + }, + { + "epoch": 15.924276169265033, + "grad_norm": 25.39553451538086, + "learning_rate": 1e-06, + "loss": 0.5337, + "num_input_tokens_seen": 400628244, + "step": 7150 + }, + { + "epoch": 15.924276169265033, + "loss": 0.423916757106781, + "loss_ce": 8.863602124620229e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.015625, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 400628244, + "step": 7150 + }, + { + "epoch": 15.926503340757238, + "grad_norm": 18.745763778686523, + "learning_rate": 1e-06, + "loss": 0.4148, + "num_input_tokens_seen": 400685080, + "step": 7151 + }, + { + "epoch": 15.926503340757238, + "loss": 0.4842875599861145, + "loss_ce": 0.00012620513734873384, + "loss_iou": 0.189453125, + "loss_num": 0.021240234375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 400685080, + "step": 7151 + }, + { + "epoch": 15.928730512249443, + "grad_norm": 21.89429473876953, + "learning_rate": 1e-06, + "loss": 0.4732, + "num_input_tokens_seen": 400736332, + "step": 7152 + }, + { + "epoch": 15.928730512249443, + "loss": 0.4724288582801819, + "loss_ce": 7.779937004670501e-05, + "loss_iou": 0.197265625, + "loss_num": 0.015869140625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 400736332, + "step": 7152 + }, + { + "epoch": 15.930957683741648, + "grad_norm": 22.908029556274414, + "learning_rate": 1e-06, + "loss": 0.2874, + "num_input_tokens_seen": 400792008, + "step": 7153 + }, + { + "epoch": 15.930957683741648, + "loss": 0.2497827112674713, + "loss_ce": 8.78690043464303e-05, + "loss_iou": 0.11376953125, + "loss_num": 0.004364013671875, + "loss_xval": 0.25, + "num_input_tokens_seen": 400792008, + "step": 7153 + }, + { + "epoch": 15.933184855233852, + "grad_norm": 16.189254760742188, + "learning_rate": 1e-06, + "loss": 0.4473, + "num_input_tokens_seen": 400849396, + "step": 7154 + }, + { + "epoch": 15.933184855233852, + "loss": 0.4695594310760498, + "loss_ce": 7.704535528318956e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0103759765625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 400849396, + "step": 7154 + }, + { + "epoch": 15.935412026726057, + "grad_norm": 19.59444236755371, + "learning_rate": 1e-06, + "loss": 0.4609, + "num_input_tokens_seen": 400905600, + "step": 7155 + }, + { + "epoch": 15.935412026726057, + "loss": 0.26319292187690735, + "loss_ce": 0.0001313859538640827, + "loss_iou": 0.1103515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 400905600, + "step": 7155 + }, + { + "epoch": 15.937639198218262, + "grad_norm": 14.831480979919434, + "learning_rate": 1e-06, + "loss": 0.5633, + "num_input_tokens_seen": 400964476, + "step": 7156 + }, + { + "epoch": 15.937639198218262, + "loss": 0.6740528345108032, + "loss_ce": 0.00010258887050440535, + "loss_iou": 0.275390625, + "loss_num": 0.0247802734375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 400964476, + "step": 7156 + }, + { + "epoch": 15.939866369710467, + "grad_norm": 22.283842086791992, + "learning_rate": 1e-06, + "loss": 0.4229, + "num_input_tokens_seen": 401020968, + "step": 7157 + }, + { + "epoch": 15.939866369710467, + "loss": 0.48083391785621643, + "loss_ce": 9.051487722899765e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0107421875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 401020968, + "step": 7157 + }, + { + "epoch": 15.942093541202672, + "grad_norm": 13.97571849822998, + "learning_rate": 1e-06, + "loss": 0.4721, + "num_input_tokens_seen": 401075944, + "step": 7158 + }, + { + "epoch": 15.942093541202672, + "loss": 0.3242437243461609, + "loss_ce": 8.600985893281177e-05, + "loss_iou": 0.1484375, + "loss_num": 0.00555419921875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 401075944, + "step": 7158 + }, + { + "epoch": 15.944320712694877, + "grad_norm": 25.421710968017578, + "learning_rate": 1e-06, + "loss": 0.7803, + "num_input_tokens_seen": 401130976, + "step": 7159 + }, + { + "epoch": 15.944320712694877, + "loss": 0.8276197910308838, + "loss_ce": 0.00010511695290915668, + "loss_iou": 0.3359375, + "loss_num": 0.031494140625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 401130976, + "step": 7159 + }, + { + "epoch": 15.946547884187082, + "grad_norm": 39.614234924316406, + "learning_rate": 1e-06, + "loss": 0.4332, + "num_input_tokens_seen": 401186464, + "step": 7160 + }, + { + "epoch": 15.946547884187082, + "loss": 0.483007550239563, + "loss_ce": 9.739773668115959e-05, + "loss_iou": 0.19921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 401186464, + "step": 7160 + }, + { + "epoch": 15.948775055679288, + "grad_norm": 16.9848690032959, + "learning_rate": 1e-06, + "loss": 0.5029, + "num_input_tokens_seen": 401242136, + "step": 7161 + }, + { + "epoch": 15.948775055679288, + "loss": 0.5525789260864258, + "loss_ce": 8.865697600413114e-05, + "loss_iou": 0.232421875, + "loss_num": 0.0174560546875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 401242136, + "step": 7161 + }, + { + "epoch": 15.951002227171493, + "grad_norm": 19.457195281982422, + "learning_rate": 1e-06, + "loss": 0.4659, + "num_input_tokens_seen": 401297980, + "step": 7162 + }, + { + "epoch": 15.951002227171493, + "loss": 0.28521448373794556, + "loss_ce": 8.872566104400903e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.005218505859375, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 401297980, + "step": 7162 + }, + { + "epoch": 15.953229398663698, + "grad_norm": 13.675963401794434, + "learning_rate": 1e-06, + "loss": 0.3121, + "num_input_tokens_seen": 401355992, + "step": 7163 + }, + { + "epoch": 15.953229398663698, + "loss": 0.300504207611084, + "loss_ce": 8.916326623875648e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.005035400390625, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 401355992, + "step": 7163 + }, + { + "epoch": 15.955456570155903, + "grad_norm": 17.08403205871582, + "learning_rate": 1e-06, + "loss": 0.6135, + "num_input_tokens_seen": 401410732, + "step": 7164 + }, + { + "epoch": 15.955456570155903, + "loss": 0.6625217199325562, + "loss_ce": 0.0001681805297266692, + "loss_iou": 0.27734375, + "loss_num": 0.021484375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 401410732, + "step": 7164 + }, + { + "epoch": 15.957683741648108, + "grad_norm": 16.065011978149414, + "learning_rate": 1e-06, + "loss": 0.4816, + "num_input_tokens_seen": 401466736, + "step": 7165 + }, + { + "epoch": 15.957683741648108, + "loss": 0.605895459651947, + "loss_ce": 0.00018256741168443114, + "loss_iou": 0.2392578125, + "loss_num": 0.0255126953125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 401466736, + "step": 7165 + }, + { + "epoch": 15.959910913140313, + "grad_norm": 15.503355979919434, + "learning_rate": 1e-06, + "loss": 0.5208, + "num_input_tokens_seen": 401525152, + "step": 7166 + }, + { + "epoch": 15.959910913140313, + "loss": 0.6843163967132568, + "loss_ce": 0.00011229477968299761, + "loss_iou": 0.306640625, + "loss_num": 0.01446533203125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 401525152, + "step": 7166 + }, + { + "epoch": 15.962138084632517, + "grad_norm": 23.097681045532227, + "learning_rate": 1e-06, + "loss": 0.3613, + "num_input_tokens_seen": 401576820, + "step": 7167 + }, + { + "epoch": 15.962138084632517, + "loss": 0.289186954498291, + "loss_ce": 0.0001244572049472481, + "loss_iou": 0.11083984375, + "loss_num": 0.01348876953125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 401576820, + "step": 7167 + }, + { + "epoch": 15.964365256124722, + "grad_norm": 13.968826293945312, + "learning_rate": 1e-06, + "loss": 0.3522, + "num_input_tokens_seen": 401630452, + "step": 7168 + }, + { + "epoch": 15.964365256124722, + "loss": 0.37191227078437805, + "loss_ce": 8.611210068920627e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.00994873046875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 401630452, + "step": 7168 + }, + { + "epoch": 15.966592427616927, + "grad_norm": 16.292312622070312, + "learning_rate": 1e-06, + "loss": 0.3667, + "num_input_tokens_seen": 401686368, + "step": 7169 + }, + { + "epoch": 15.966592427616927, + "loss": 0.37168073654174805, + "loss_ce": 9.872023656498641e-05, + "loss_iou": 0.16796875, + "loss_num": 0.006988525390625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 401686368, + "step": 7169 + }, + { + "epoch": 15.968819599109132, + "grad_norm": 30.845420837402344, + "learning_rate": 1e-06, + "loss": 0.4289, + "num_input_tokens_seen": 401743020, + "step": 7170 + }, + { + "epoch": 15.968819599109132, + "loss": 0.4194309413433075, + "loss_ce": 0.00011940038530156016, + "loss_iou": 0.1787109375, + "loss_num": 0.01220703125, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 401743020, + "step": 7170 + }, + { + "epoch": 15.971046770601337, + "grad_norm": 24.887876510620117, + "learning_rate": 1e-06, + "loss": 0.5927, + "num_input_tokens_seen": 401795184, + "step": 7171 + }, + { + "epoch": 15.971046770601337, + "loss": 0.7264291048049927, + "loss_ce": 0.00011069556057918817, + "loss_iou": 0.30859375, + "loss_num": 0.021484375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 401795184, + "step": 7171 + }, + { + "epoch": 15.973273942093542, + "grad_norm": 14.26672077178955, + "learning_rate": 1e-06, + "loss": 0.4417, + "num_input_tokens_seen": 401852640, + "step": 7172 + }, + { + "epoch": 15.973273942093542, + "loss": 0.4775257706642151, + "loss_ce": 0.00010878611647058278, + "loss_iou": 0.2138671875, + "loss_num": 0.00994873046875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 401852640, + "step": 7172 + }, + { + "epoch": 15.975501113585747, + "grad_norm": 20.04558753967285, + "learning_rate": 1e-06, + "loss": 0.3823, + "num_input_tokens_seen": 401909140, + "step": 7173 + }, + { + "epoch": 15.975501113585747, + "loss": 0.35957685112953186, + "loss_ce": 7.978198118507862e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00579833984375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 401909140, + "step": 7173 + }, + { + "epoch": 15.977728285077951, + "grad_norm": 14.882387161254883, + "learning_rate": 1e-06, + "loss": 0.4257, + "num_input_tokens_seen": 401968400, + "step": 7174 + }, + { + "epoch": 15.977728285077951, + "loss": 0.48474133014678955, + "loss_ce": 0.00012219653581269085, + "loss_iou": 0.220703125, + "loss_num": 0.0086669921875, + "loss_xval": 0.484375, + "num_input_tokens_seen": 401968400, + "step": 7174 + }, + { + "epoch": 15.979955456570156, + "grad_norm": 12.847021102905273, + "learning_rate": 1e-06, + "loss": 0.3644, + "num_input_tokens_seen": 402023480, + "step": 7175 + }, + { + "epoch": 15.979955456570156, + "loss": 0.4103115499019623, + "loss_ce": 9.426410542801023e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.01287841796875, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 402023480, + "step": 7175 + }, + { + "epoch": 15.982182628062361, + "grad_norm": 13.715554237365723, + "learning_rate": 1e-06, + "loss": 0.3894, + "num_input_tokens_seen": 402080180, + "step": 7176 + }, + { + "epoch": 15.982182628062361, + "loss": 0.47568291425704956, + "loss_ce": 9.699161455500871e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.01202392578125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 402080180, + "step": 7176 + }, + { + "epoch": 15.984409799554566, + "grad_norm": 16.425464630126953, + "learning_rate": 1e-06, + "loss": 0.3162, + "num_input_tokens_seen": 402135848, + "step": 7177 + }, + { + "epoch": 15.984409799554566, + "loss": 0.38852375745773315, + "loss_ce": 9.600124030839652e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.007171630859375, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 402135848, + "step": 7177 + }, + { + "epoch": 15.98663697104677, + "grad_norm": 15.072603225708008, + "learning_rate": 1e-06, + "loss": 0.4141, + "num_input_tokens_seen": 402191316, + "step": 7178 + }, + { + "epoch": 15.98663697104677, + "loss": 0.48317813873291016, + "loss_ce": 0.00014590806677006185, + "loss_iou": 0.2080078125, + "loss_num": 0.01336669921875, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 402191316, + "step": 7178 + }, + { + "epoch": 15.988864142538976, + "grad_norm": 26.326980590820312, + "learning_rate": 1e-06, + "loss": 0.4093, + "num_input_tokens_seen": 402246396, + "step": 7179 + }, + { + "epoch": 15.988864142538976, + "loss": 0.3864772319793701, + "loss_ce": 0.0001246822066605091, + "loss_iou": 0.1708984375, + "loss_num": 0.00872802734375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 402246396, + "step": 7179 + }, + { + "epoch": 15.99109131403118, + "grad_norm": 14.615377426147461, + "learning_rate": 1e-06, + "loss": 0.3313, + "num_input_tokens_seen": 402303500, + "step": 7180 + }, + { + "epoch": 15.99109131403118, + "loss": 0.2933087646961212, + "loss_ce": 0.00015690606960561126, + "loss_iou": 0.12060546875, + "loss_num": 0.01025390625, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 402303500, + "step": 7180 + }, + { + "epoch": 15.993318485523385, + "grad_norm": 20.349151611328125, + "learning_rate": 1e-06, + "loss": 0.3356, + "num_input_tokens_seen": 402358536, + "step": 7181 + }, + { + "epoch": 15.993318485523385, + "loss": 0.3299248516559601, + "loss_ce": 9.087211219593883e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.00860595703125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 402358536, + "step": 7181 + }, + { + "epoch": 15.99554565701559, + "grad_norm": 14.283751487731934, + "learning_rate": 1e-06, + "loss": 0.3803, + "num_input_tokens_seen": 402414524, + "step": 7182 + }, + { + "epoch": 15.99554565701559, + "loss": 0.2790136933326721, + "loss_ce": 8.300953777506948e-05, + "loss_iou": 0.1025390625, + "loss_num": 0.01470947265625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 402414524, + "step": 7182 + }, + { + "epoch": 15.997772828507795, + "grad_norm": 53.64034652709961, + "learning_rate": 1e-06, + "loss": 0.4319, + "num_input_tokens_seen": 402473320, + "step": 7183 + }, + { + "epoch": 15.997772828507795, + "loss": 0.478466272354126, + "loss_ce": 0.00019476463785395026, + "loss_iou": 0.2236328125, + "loss_num": 0.006195068359375, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 402473320, + "step": 7183 + }, + { + "epoch": 16.0, + "grad_norm": 29.848583221435547, + "learning_rate": 1e-06, + "loss": 0.4615, + "num_input_tokens_seen": 402529284, + "step": 7184 + }, + { + "epoch": 16.0, + "loss": 0.5135668516159058, + "loss_ce": 0.00013914526789449155, + "loss_iou": 0.220703125, + "loss_num": 0.01422119140625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 402529284, + "step": 7184 + }, + { + "epoch": 16.002227171492205, + "grad_norm": 27.619600296020508, + "learning_rate": 1e-06, + "loss": 0.4962, + "num_input_tokens_seen": 402585352, + "step": 7185 + }, + { + "epoch": 16.002227171492205, + "loss": 0.6742956042289734, + "loss_ce": 0.00010129276779480278, + "loss_iou": 0.279296875, + "loss_num": 0.0233154296875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 402585352, + "step": 7185 + }, + { + "epoch": 16.00445434298441, + "grad_norm": 20.31178855895996, + "learning_rate": 1e-06, + "loss": 0.5351, + "num_input_tokens_seen": 402638996, + "step": 7186 + }, + { + "epoch": 16.00445434298441, + "loss": 0.5318279266357422, + "loss_ce": 8.96200945135206e-05, + "loss_iou": 0.23828125, + "loss_num": 0.0113525390625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 402638996, + "step": 7186 + }, + { + "epoch": 16.006681514476615, + "grad_norm": 17.070863723754883, + "learning_rate": 1e-06, + "loss": 0.448, + "num_input_tokens_seen": 402695996, + "step": 7187 + }, + { + "epoch": 16.006681514476615, + "loss": 0.4115889072418213, + "loss_ce": 8.989499474409968e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.0140380859375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 402695996, + "step": 7187 + }, + { + "epoch": 16.00890868596882, + "grad_norm": 12.187263488769531, + "learning_rate": 1e-06, + "loss": 0.2493, + "num_input_tokens_seen": 402753988, + "step": 7188 + }, + { + "epoch": 16.00890868596882, + "loss": 0.24472574889659882, + "loss_ce": 9.685206168796867e-05, + "loss_iou": 0.10498046875, + "loss_num": 0.006927490234375, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 402753988, + "step": 7188 + }, + { + "epoch": 16.011135857461024, + "grad_norm": 13.688761711120605, + "learning_rate": 1e-06, + "loss": 0.3247, + "num_input_tokens_seen": 402810468, + "step": 7189 + }, + { + "epoch": 16.011135857461024, + "loss": 0.42870235443115234, + "loss_ce": 0.00011348059342708439, + "loss_iou": 0.1826171875, + "loss_num": 0.01263427734375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 402810468, + "step": 7189 + }, + { + "epoch": 16.01336302895323, + "grad_norm": 17.964651107788086, + "learning_rate": 1e-06, + "loss": 0.5188, + "num_input_tokens_seen": 402862988, + "step": 7190 + }, + { + "epoch": 16.01336302895323, + "loss": 0.5445123314857483, + "loss_ce": 7.876359450165182e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.029052734375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 402862988, + "step": 7190 + }, + { + "epoch": 16.015590200445434, + "grad_norm": 22.054534912109375, + "learning_rate": 1e-06, + "loss": 0.3839, + "num_input_tokens_seen": 402919908, + "step": 7191 + }, + { + "epoch": 16.015590200445434, + "loss": 0.3673054575920105, + "loss_ce": 0.00011793937301263213, + "loss_iou": 0.171875, + "loss_num": 0.004669189453125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 402919908, + "step": 7191 + }, + { + "epoch": 16.01781737193764, + "grad_norm": 16.937606811523438, + "learning_rate": 1e-06, + "loss": 0.4729, + "num_input_tokens_seen": 402976004, + "step": 7192 + }, + { + "epoch": 16.01781737193764, + "loss": 0.5479812026023865, + "loss_ce": 0.0001907070109155029, + "loss_iou": 0.2421875, + "loss_num": 0.0126953125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 402976004, + "step": 7192 + }, + { + "epoch": 16.020044543429844, + "grad_norm": 21.24249839782715, + "learning_rate": 1e-06, + "loss": 0.4644, + "num_input_tokens_seen": 403032388, + "step": 7193 + }, + { + "epoch": 16.020044543429844, + "loss": 0.3727763891220093, + "loss_ce": 0.00015674906899221241, + "loss_iou": 0.1708984375, + "loss_num": 0.006072998046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 403032388, + "step": 7193 + }, + { + "epoch": 16.02227171492205, + "grad_norm": 15.852849960327148, + "learning_rate": 1e-06, + "loss": 0.4715, + "num_input_tokens_seen": 403091080, + "step": 7194 + }, + { + "epoch": 16.02227171492205, + "loss": 0.5445390939712524, + "loss_ce": 0.00010548095451667905, + "loss_iou": 0.232421875, + "loss_num": 0.01611328125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 403091080, + "step": 7194 + }, + { + "epoch": 16.024498886414253, + "grad_norm": 22.950151443481445, + "learning_rate": 1e-06, + "loss": 0.648, + "num_input_tokens_seen": 403148524, + "step": 7195 + }, + { + "epoch": 16.024498886414253, + "loss": 0.6838928461074829, + "loss_ce": 0.00017702819604892284, + "loss_iou": 0.2734375, + "loss_num": 0.02783203125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 403148524, + "step": 7195 + }, + { + "epoch": 16.02672605790646, + "grad_norm": 21.878934860229492, + "learning_rate": 1e-06, + "loss": 0.5133, + "num_input_tokens_seen": 403205004, + "step": 7196 + }, + { + "epoch": 16.02672605790646, + "loss": 0.5003616809844971, + "loss_ce": 0.0003617034526541829, + "loss_iou": 0.2255859375, + "loss_num": 0.0096435546875, + "loss_xval": 0.5, + "num_input_tokens_seen": 403205004, + "step": 7196 + }, + { + "epoch": 16.028953229398663, + "grad_norm": 24.652055740356445, + "learning_rate": 1e-06, + "loss": 0.5203, + "num_input_tokens_seen": 403256684, + "step": 7197 + }, + { + "epoch": 16.028953229398663, + "loss": 0.38059282302856445, + "loss_ce": 9.965993376681581e-05, + "loss_iou": 0.15625, + "loss_num": 0.01361083984375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 403256684, + "step": 7197 + }, + { + "epoch": 16.031180400890868, + "grad_norm": 14.730175018310547, + "learning_rate": 1e-06, + "loss": 0.5587, + "num_input_tokens_seen": 403313248, + "step": 7198 + }, + { + "epoch": 16.031180400890868, + "loss": 0.6153490543365479, + "loss_ce": 0.00011463207920314744, + "loss_iou": 0.259765625, + "loss_num": 0.019287109375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 403313248, + "step": 7198 + }, + { + "epoch": 16.033407572383073, + "grad_norm": 18.867799758911133, + "learning_rate": 1e-06, + "loss": 0.4701, + "num_input_tokens_seen": 403371832, + "step": 7199 + }, + { + "epoch": 16.033407572383073, + "loss": 0.6135093569755554, + "loss_ce": 0.0002280865446664393, + "loss_iou": 0.26953125, + "loss_num": 0.01519775390625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 403371832, + "step": 7199 + }, + { + "epoch": 16.035634743875278, + "grad_norm": 15.737879753112793, + "learning_rate": 1e-06, + "loss": 0.6626, + "num_input_tokens_seen": 403428312, + "step": 7200 + }, + { + "epoch": 16.035634743875278, + "loss": 0.7531107068061829, + "loss_ce": 0.00018101301975548267, + "loss_iou": 0.337890625, + "loss_num": 0.01519775390625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 403428312, + "step": 7200 + }, + { + "epoch": 16.037861915367483, + "grad_norm": 16.80666160583496, + "learning_rate": 1e-06, + "loss": 0.4065, + "num_input_tokens_seen": 403486724, + "step": 7201 + }, + { + "epoch": 16.037861915367483, + "loss": 0.47910380363464355, + "loss_ce": 9.990914259105921e-05, + "loss_iou": 0.22265625, + "loss_num": 0.006500244140625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 403486724, + "step": 7201 + }, + { + "epoch": 16.040089086859687, + "grad_norm": 11.938071250915527, + "learning_rate": 1e-06, + "loss": 0.4147, + "num_input_tokens_seen": 403541852, + "step": 7202 + }, + { + "epoch": 16.040089086859687, + "loss": 0.4437464475631714, + "loss_ce": 8.189181244233623e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.0133056640625, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 403541852, + "step": 7202 + }, + { + "epoch": 16.042316258351892, + "grad_norm": 33.16254806518555, + "learning_rate": 1e-06, + "loss": 0.4485, + "num_input_tokens_seen": 403598276, + "step": 7203 + }, + { + "epoch": 16.042316258351892, + "loss": 0.3794906437397003, + "loss_ce": 9.610810957383364e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0087890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 403598276, + "step": 7203 + }, + { + "epoch": 16.044543429844097, + "grad_norm": 28.809633255004883, + "learning_rate": 1e-06, + "loss": 0.4134, + "num_input_tokens_seen": 403657000, + "step": 7204 + }, + { + "epoch": 16.044543429844097, + "loss": 0.3428630530834198, + "loss_ce": 8.960704144556075e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.006378173828125, + "loss_xval": 0.34375, + "num_input_tokens_seen": 403657000, + "step": 7204 + }, + { + "epoch": 16.046770601336302, + "grad_norm": 25.093759536743164, + "learning_rate": 1e-06, + "loss": 0.6919, + "num_input_tokens_seen": 403711572, + "step": 7205 + }, + { + "epoch": 16.046770601336302, + "loss": 0.38008594512939453, + "loss_ce": 8.106790482997894e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.01165771484375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 403711572, + "step": 7205 + }, + { + "epoch": 16.048997772828507, + "grad_norm": 19.109130859375, + "learning_rate": 1e-06, + "loss": 0.2554, + "num_input_tokens_seen": 403767064, + "step": 7206 + }, + { + "epoch": 16.048997772828507, + "loss": 0.22988614439964294, + "loss_ce": 8.877603249857202e-05, + "loss_iou": 0.10009765625, + "loss_num": 0.005889892578125, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 403767064, + "step": 7206 + }, + { + "epoch": 16.051224944320712, + "grad_norm": 14.356725692749023, + "learning_rate": 1e-06, + "loss": 0.4188, + "num_input_tokens_seen": 403823508, + "step": 7207 + }, + { + "epoch": 16.051224944320712, + "loss": 0.4483864903450012, + "loss_ce": 0.0001443219225620851, + "loss_iou": 0.1923828125, + "loss_num": 0.0126953125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 403823508, + "step": 7207 + }, + { + "epoch": 16.053452115812917, + "grad_norm": 18.274364471435547, + "learning_rate": 1e-06, + "loss": 0.4482, + "num_input_tokens_seen": 403878140, + "step": 7208 + }, + { + "epoch": 16.053452115812917, + "loss": 0.4601691961288452, + "loss_ce": 8.618818537797779e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01263427734375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 403878140, + "step": 7208 + }, + { + "epoch": 16.05567928730512, + "grad_norm": 19.279592514038086, + "learning_rate": 1e-06, + "loss": 0.5412, + "num_input_tokens_seen": 403932144, + "step": 7209 + }, + { + "epoch": 16.05567928730512, + "loss": 0.6549313068389893, + "loss_ce": 0.00014613490202464163, + "loss_iou": 0.267578125, + "loss_num": 0.0242919921875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 403932144, + "step": 7209 + }, + { + "epoch": 16.057906458797326, + "grad_norm": 15.034238815307617, + "learning_rate": 1e-06, + "loss": 0.471, + "num_input_tokens_seen": 403987788, + "step": 7210 + }, + { + "epoch": 16.057906458797326, + "loss": 0.43674468994140625, + "loss_ce": 9.918860450852662e-05, + "loss_iou": 0.201171875, + "loss_num": 0.006988525390625, + "loss_xval": 0.4375, + "num_input_tokens_seen": 403987788, + "step": 7210 + }, + { + "epoch": 16.06013363028953, + "grad_norm": 13.465814590454102, + "learning_rate": 1e-06, + "loss": 0.3559, + "num_input_tokens_seen": 404044384, + "step": 7211 + }, + { + "epoch": 16.06013363028953, + "loss": 0.3224736154079437, + "loss_ce": 8.591696678195149e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.007781982421875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 404044384, + "step": 7211 + }, + { + "epoch": 16.062360801781736, + "grad_norm": 18.50025749206543, + "learning_rate": 1e-06, + "loss": 0.369, + "num_input_tokens_seen": 404099392, + "step": 7212 + }, + { + "epoch": 16.062360801781736, + "loss": 0.35518187284469604, + "loss_ce": 7.935409666970372e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.01123046875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 404099392, + "step": 7212 + }, + { + "epoch": 16.06458797327394, + "grad_norm": 12.4852294921875, + "learning_rate": 1e-06, + "loss": 0.4552, + "num_input_tokens_seen": 404153904, + "step": 7213 + }, + { + "epoch": 16.06458797327394, + "loss": 0.5071730017662048, + "loss_ce": 9.29402667679824e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0157470703125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 404153904, + "step": 7213 + }, + { + "epoch": 16.066815144766146, + "grad_norm": 22.292587280273438, + "learning_rate": 1e-06, + "loss": 0.5293, + "num_input_tokens_seen": 404207456, + "step": 7214 + }, + { + "epoch": 16.066815144766146, + "loss": 0.3804709315299988, + "loss_ce": 9.985938959289342e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.007537841796875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 404207456, + "step": 7214 + }, + { + "epoch": 16.06904231625835, + "grad_norm": 21.88534927368164, + "learning_rate": 1e-06, + "loss": 0.4697, + "num_input_tokens_seen": 404262880, + "step": 7215 + }, + { + "epoch": 16.06904231625835, + "loss": 0.6787058115005493, + "loss_ce": 0.0001169023453257978, + "loss_iou": 0.314453125, + "loss_num": 0.009765625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 404262880, + "step": 7215 + }, + { + "epoch": 16.071269487750556, + "grad_norm": 29.59630584716797, + "learning_rate": 1e-06, + "loss": 0.5467, + "num_input_tokens_seen": 404316780, + "step": 7216 + }, + { + "epoch": 16.071269487750556, + "loss": 0.505081057548523, + "loss_ce": 7.61750852689147e-05, + "loss_iou": 0.220703125, + "loss_num": 0.012451171875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 404316780, + "step": 7216 + }, + { + "epoch": 16.07349665924276, + "grad_norm": 14.112740516662598, + "learning_rate": 1e-06, + "loss": 0.4883, + "num_input_tokens_seen": 404374304, + "step": 7217 + }, + { + "epoch": 16.07349665924276, + "loss": 0.29110169410705566, + "loss_ce": 8.60571744851768e-05, + "loss_iou": 0.1328125, + "loss_num": 0.00518798828125, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 404374304, + "step": 7217 + }, + { + "epoch": 16.075723830734965, + "grad_norm": 17.42337417602539, + "learning_rate": 1e-06, + "loss": 0.3548, + "num_input_tokens_seen": 404430784, + "step": 7218 + }, + { + "epoch": 16.075723830734965, + "loss": 0.3221060037612915, + "loss_ce": 8.452765177935362e-05, + "loss_iou": 0.142578125, + "loss_num": 0.007537841796875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 404430784, + "step": 7218 + }, + { + "epoch": 16.07795100222717, + "grad_norm": 20.392663955688477, + "learning_rate": 1e-06, + "loss": 0.5018, + "num_input_tokens_seen": 404486928, + "step": 7219 + }, + { + "epoch": 16.07795100222717, + "loss": 0.45973044633865356, + "loss_ce": 7.468648254871368e-05, + "loss_iou": 0.205078125, + "loss_num": 0.0098876953125, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 404486928, + "step": 7219 + }, + { + "epoch": 16.080178173719375, + "grad_norm": 21.25473403930664, + "learning_rate": 1e-06, + "loss": 0.6253, + "num_input_tokens_seen": 404542168, + "step": 7220 + }, + { + "epoch": 16.080178173719375, + "loss": 0.576141357421875, + "loss_ce": 9.15601704036817e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0186767578125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 404542168, + "step": 7220 + }, + { + "epoch": 16.08240534521158, + "grad_norm": 15.591520309448242, + "learning_rate": 1e-06, + "loss": 0.4269, + "num_input_tokens_seen": 404597716, + "step": 7221 + }, + { + "epoch": 16.08240534521158, + "loss": 0.38363730907440186, + "loss_ce": 9.239626524504274e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.01068115234375, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 404597716, + "step": 7221 + }, + { + "epoch": 16.084632516703785, + "grad_norm": 34.75109100341797, + "learning_rate": 1e-06, + "loss": 0.4051, + "num_input_tokens_seen": 404652108, + "step": 7222 + }, + { + "epoch": 16.084632516703785, + "loss": 0.3343104124069214, + "loss_ce": 8.188547508325428e-05, + "loss_iou": 0.140625, + "loss_num": 0.0106201171875, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 404652108, + "step": 7222 + }, + { + "epoch": 16.08685968819599, + "grad_norm": 17.89012908935547, + "learning_rate": 1e-06, + "loss": 0.5697, + "num_input_tokens_seen": 404708964, + "step": 7223 + }, + { + "epoch": 16.08685968819599, + "loss": 0.3971855044364929, + "loss_ce": 9.079407755052671e-05, + "loss_iou": 0.18359375, + "loss_num": 0.005859375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 404708964, + "step": 7223 + }, + { + "epoch": 16.089086859688194, + "grad_norm": 18.168272018432617, + "learning_rate": 1e-06, + "loss": 0.4312, + "num_input_tokens_seen": 404760744, + "step": 7224 + }, + { + "epoch": 16.089086859688194, + "loss": 0.4721256494522095, + "loss_ce": 7.975117478054017e-05, + "loss_iou": 0.19921875, + "loss_num": 0.0147705078125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 404760744, + "step": 7224 + }, + { + "epoch": 16.0913140311804, + "grad_norm": 22.09956169128418, + "learning_rate": 1e-06, + "loss": 0.2769, + "num_input_tokens_seen": 404818936, + "step": 7225 + }, + { + "epoch": 16.0913140311804, + "loss": 0.19356948137283325, + "loss_ce": 8.803060336504132e-05, + "loss_iou": 0.087890625, + "loss_num": 0.0035400390625, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 404818936, + "step": 7225 + }, + { + "epoch": 16.093541202672604, + "grad_norm": 18.174219131469727, + "learning_rate": 1e-06, + "loss": 0.3931, + "num_input_tokens_seen": 404874408, + "step": 7226 + }, + { + "epoch": 16.093541202672604, + "loss": 0.40315836668014526, + "loss_ce": 8.220285963034257e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.01214599609375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 404874408, + "step": 7226 + }, + { + "epoch": 16.09576837416481, + "grad_norm": 21.083255767822266, + "learning_rate": 1e-06, + "loss": 0.2451, + "num_input_tokens_seen": 404932184, + "step": 7227 + }, + { + "epoch": 16.09576837416481, + "loss": 0.2461136281490326, + "loss_ce": 8.089626498986036e-05, + "loss_iou": 0.10693359375, + "loss_num": 0.0064697265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 404932184, + "step": 7227 + }, + { + "epoch": 16.097995545657014, + "grad_norm": 15.489051818847656, + "learning_rate": 1e-06, + "loss": 0.4338, + "num_input_tokens_seen": 404989408, + "step": 7228 + }, + { + "epoch": 16.097995545657014, + "loss": 0.46629413962364197, + "loss_ce": 0.00010758035205071792, + "loss_iou": 0.216796875, + "loss_num": 0.00634765625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 404989408, + "step": 7228 + }, + { + "epoch": 16.100222717149222, + "grad_norm": 28.974552154541016, + "learning_rate": 1e-06, + "loss": 0.5686, + "num_input_tokens_seen": 405044892, + "step": 7229 + }, + { + "epoch": 16.100222717149222, + "loss": 0.4830133318901062, + "loss_ce": 0.00010318079148419201, + "loss_iou": 0.220703125, + "loss_num": 0.00823974609375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 405044892, + "step": 7229 + }, + { + "epoch": 16.102449888641427, + "grad_norm": 27.26142692565918, + "learning_rate": 1e-06, + "loss": 0.5382, + "num_input_tokens_seen": 405102960, + "step": 7230 + }, + { + "epoch": 16.102449888641427, + "loss": 0.6526839137077332, + "loss_ce": 9.603158105164766e-05, + "loss_iou": 0.28125, + "loss_num": 0.0179443359375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 405102960, + "step": 7230 + }, + { + "epoch": 16.104677060133632, + "grad_norm": 33.493282318115234, + "learning_rate": 1e-06, + "loss": 0.4217, + "num_input_tokens_seen": 405159344, + "step": 7231 + }, + { + "epoch": 16.104677060133632, + "loss": 0.47552353143692017, + "loss_ce": 0.00012070016236975789, + "loss_iou": 0.1884765625, + "loss_num": 0.0196533203125, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 405159344, + "step": 7231 + }, + { + "epoch": 16.106904231625837, + "grad_norm": 16.54621124267578, + "learning_rate": 1e-06, + "loss": 0.3254, + "num_input_tokens_seen": 405215920, + "step": 7232 + }, + { + "epoch": 16.106904231625837, + "loss": 0.3362829387187958, + "loss_ce": 0.00010127984569408, + "loss_iou": 0.1494140625, + "loss_num": 0.00750732421875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 405215920, + "step": 7232 + }, + { + "epoch": 16.10913140311804, + "grad_norm": 15.172420501708984, + "learning_rate": 1e-06, + "loss": 0.271, + "num_input_tokens_seen": 405274772, + "step": 7233 + }, + { + "epoch": 16.10913140311804, + "loss": 0.1989382654428482, + "loss_ce": 8.573340164730325e-05, + "loss_iou": 0.08642578125, + "loss_num": 0.005126953125, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 405274772, + "step": 7233 + }, + { + "epoch": 16.111358574610247, + "grad_norm": 25.89063835144043, + "learning_rate": 1e-06, + "loss": 0.428, + "num_input_tokens_seen": 405328804, + "step": 7234 + }, + { + "epoch": 16.111358574610247, + "loss": 0.48993584513664246, + "loss_ce": 0.00012872781371697783, + "loss_iou": 0.1982421875, + "loss_num": 0.0185546875, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 405328804, + "step": 7234 + }, + { + "epoch": 16.11358574610245, + "grad_norm": 17.315288543701172, + "learning_rate": 1e-06, + "loss": 0.3599, + "num_input_tokens_seen": 405384920, + "step": 7235 + }, + { + "epoch": 16.11358574610245, + "loss": 0.40499159693717957, + "loss_ce": 8.437626820523292e-05, + "loss_iou": 0.181640625, + "loss_num": 0.00836181640625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 405384920, + "step": 7235 + }, + { + "epoch": 16.115812917594656, + "grad_norm": 11.827547073364258, + "learning_rate": 1e-06, + "loss": 0.4208, + "num_input_tokens_seen": 405440816, + "step": 7236 + }, + { + "epoch": 16.115812917594656, + "loss": 0.43206048011779785, + "loss_ce": 0.00011468880984466523, + "loss_iou": 0.1962890625, + "loss_num": 0.007720947265625, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 405440816, + "step": 7236 + }, + { + "epoch": 16.11804008908686, + "grad_norm": 16.36489486694336, + "learning_rate": 1e-06, + "loss": 0.4765, + "num_input_tokens_seen": 405497324, + "step": 7237 + }, + { + "epoch": 16.11804008908686, + "loss": 0.4784888029098511, + "loss_ce": 9.523934568278491e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0242919921875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 405497324, + "step": 7237 + }, + { + "epoch": 16.120267260579066, + "grad_norm": 19.48402214050293, + "learning_rate": 1e-06, + "loss": 0.521, + "num_input_tokens_seen": 405551216, + "step": 7238 + }, + { + "epoch": 16.120267260579066, + "loss": 0.6264359951019287, + "loss_ce": 9.326158760813996e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.028076171875, + "loss_xval": 0.625, + "num_input_tokens_seen": 405551216, + "step": 7238 + }, + { + "epoch": 16.12249443207127, + "grad_norm": 20.337753295898438, + "learning_rate": 1e-06, + "loss": 0.3115, + "num_input_tokens_seen": 405608940, + "step": 7239 + }, + { + "epoch": 16.12249443207127, + "loss": 0.27264925837516785, + "loss_ce": 0.000310403760522604, + "loss_iou": 0.11376953125, + "loss_num": 0.009033203125, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 405608940, + "step": 7239 + }, + { + "epoch": 16.124721603563476, + "grad_norm": 66.91169738769531, + "learning_rate": 1e-06, + "loss": 0.4479, + "num_input_tokens_seen": 405666224, + "step": 7240 + }, + { + "epoch": 16.124721603563476, + "loss": 0.5517283082008362, + "loss_ce": 9.25497297430411e-05, + "loss_iou": 0.26171875, + "loss_num": 0.00592041015625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 405666224, + "step": 7240 + }, + { + "epoch": 16.12694877505568, + "grad_norm": 14.807357788085938, + "learning_rate": 1e-06, + "loss": 0.4308, + "num_input_tokens_seen": 405722588, + "step": 7241 + }, + { + "epoch": 16.12694877505568, + "loss": 0.47714054584503174, + "loss_ce": 8.976385288406163e-05, + "loss_iou": 0.220703125, + "loss_num": 0.00738525390625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 405722588, + "step": 7241 + }, + { + "epoch": 16.129175946547885, + "grad_norm": 40.85698318481445, + "learning_rate": 1e-06, + "loss": 0.3856, + "num_input_tokens_seen": 405778132, + "step": 7242 + }, + { + "epoch": 16.129175946547885, + "loss": 0.40902432799339294, + "loss_ce": 8.878795779310167e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.008056640625, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 405778132, + "step": 7242 + }, + { + "epoch": 16.13140311804009, + "grad_norm": 19.070831298828125, + "learning_rate": 1e-06, + "loss": 0.4482, + "num_input_tokens_seen": 405834844, + "step": 7243 + }, + { + "epoch": 16.13140311804009, + "loss": 0.5067603588104248, + "loss_ce": 0.00010757060954347253, + "loss_iou": 0.216796875, + "loss_num": 0.01458740234375, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 405834844, + "step": 7243 + }, + { + "epoch": 16.133630289532295, + "grad_norm": 22.71406364440918, + "learning_rate": 1e-06, + "loss": 0.5471, + "num_input_tokens_seen": 405889304, + "step": 7244 + }, + { + "epoch": 16.133630289532295, + "loss": 0.4746703505516052, + "loss_ce": 9.150059486273676e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.009521484375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 405889304, + "step": 7244 + }, + { + "epoch": 16.1358574610245, + "grad_norm": 26.008378982543945, + "learning_rate": 1e-06, + "loss": 0.4208, + "num_input_tokens_seen": 405948244, + "step": 7245 + }, + { + "epoch": 16.1358574610245, + "loss": 0.44539159536361694, + "loss_ce": 7.910738349892199e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.006561279296875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 405948244, + "step": 7245 + }, + { + "epoch": 16.138084632516705, + "grad_norm": 17.593692779541016, + "learning_rate": 1e-06, + "loss": 0.6091, + "num_input_tokens_seen": 406004756, + "step": 7246 + }, + { + "epoch": 16.138084632516705, + "loss": 0.672180712223053, + "loss_ce": 0.00018365512369200587, + "loss_iou": 0.314453125, + "loss_num": 0.009033203125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 406004756, + "step": 7246 + }, + { + "epoch": 16.14031180400891, + "grad_norm": 31.90231704711914, + "learning_rate": 1e-06, + "loss": 0.6659, + "num_input_tokens_seen": 406061152, + "step": 7247 + }, + { + "epoch": 16.14031180400891, + "loss": 0.34200403094291687, + "loss_ce": 8.508679457008839e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00799560546875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 406061152, + "step": 7247 + }, + { + "epoch": 16.142538975501115, + "grad_norm": 13.510699272155762, + "learning_rate": 1e-06, + "loss": 0.2774, + "num_input_tokens_seen": 406116672, + "step": 7248 + }, + { + "epoch": 16.142538975501115, + "loss": 0.29828941822052, + "loss_ce": 7.161758549045771e-05, + "loss_iou": 0.125, + "loss_num": 0.00970458984375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 406116672, + "step": 7248 + }, + { + "epoch": 16.14476614699332, + "grad_norm": 13.294876098632812, + "learning_rate": 1e-06, + "loss": 0.3414, + "num_input_tokens_seen": 406175172, + "step": 7249 + }, + { + "epoch": 16.14476614699332, + "loss": 0.3227793574333191, + "loss_ce": 8.648384391563013e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.0031280517578125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 406175172, + "step": 7249 + }, + { + "epoch": 16.146993318485524, + "grad_norm": 18.024934768676758, + "learning_rate": 1e-06, + "loss": 0.4407, + "num_input_tokens_seen": 406234160, + "step": 7250 + }, + { + "epoch": 16.146993318485524, + "eval_seeclick_web_CIoU": 0.588862270116806, + "eval_seeclick_web_GIoU": 0.587228536605835, + "eval_seeclick_web_IoU": 0.6071678400039673, + "eval_seeclick_web_MAE_all": 0.015188148012384772, + "eval_seeclick_web_MAE_h": 0.007499874569475651, + "eval_seeclick_web_MAE_w": 0.015260101296007633, + "eval_seeclick_web_MAE_x_boxes": 0.008470539702102542, + "eval_seeclick_web_MAE_y_boxes": 0.02111952123232186, + "eval_seeclick_web_inside_bbox": 0.9166666567325592, + "eval_seeclick_web_loss": 0.9167152643203735, + "eval_seeclick_web_loss_ce": 0.00014891428872942924, + "eval_seeclick_web_loss_iou": 0.4229736328125, + "eval_seeclick_web_loss_num": 0.01216888427734375, + "eval_seeclick_web_loss_xval": 0.906982421875, + "eval_seeclick_web_runtime": 20.8185, + "eval_seeclick_web_samples_per_second": 2.402, + "eval_seeclick_web_steps_per_second": 0.096, + "num_input_tokens_seen": 406234160, + "step": 7250 + }, + { + "epoch": 16.146993318485524, + "eval_icons_CIoU": 0.26575663685798645, + "eval_icons_GIoU": 0.2966430187225342, + "eval_icons_IoU": 0.3484746217727661, + "eval_icons_MAE_all": 0.06336861476302147, + "eval_icons_MAE_h": 0.03217336814850569, + "eval_icons_MAE_w": 0.07706875540316105, + "eval_icons_MAE_x_boxes": 0.05414869636297226, + "eval_icons_MAE_y_boxes": 0.037681372836232185, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7173901796340942, + "eval_icons_loss_ce": 0.00019168824655935168, + "eval_icons_loss_iou": 0.66650390625, + "eval_icons_loss_num": 0.061092376708984375, + "eval_icons_loss_xval": 1.638671875, + "eval_icons_runtime": 20.0031, + "eval_icons_samples_per_second": 2.5, + "eval_icons_steps_per_second": 0.1, + "num_input_tokens_seen": 406234160, + "step": 7250 + }, + { + "epoch": 16.146993318485524, + "eval_screenspot_CIoU": 0.38737640778223675, + "eval_screenspot_GIoU": 0.40602253874142963, + "eval_screenspot_IoU": 0.45554816722869873, + "eval_screenspot_MAE_all": 0.05349355190992355, + "eval_screenspot_MAE_h": 0.039430550610025726, + "eval_screenspot_MAE_w": 0.0571071021258831, + "eval_screenspot_MAE_x_boxes": 0.0624094990392526, + "eval_screenspot_MAE_y_boxes": 0.0382879643390576, + "eval_screenspot_inside_bbox": 0.7041666706403097, + "eval_screenspot_loss": 1.5262062549591064, + "eval_screenspot_loss_ce": 0.00022643499445014945, + "eval_screenspot_loss_iou": 0.6376953125, + "eval_screenspot_loss_num": 0.06162389119466146, + "eval_screenspot_loss_xval": 1.58349609375, + "eval_screenspot_runtime": 33.0274, + "eval_screenspot_samples_per_second": 2.695, + "eval_screenspot_steps_per_second": 0.091, + "num_input_tokens_seen": 406234160, + "step": 7250 + }, + { + "epoch": 16.146993318485524, + "eval_compot_CIoU": 0.34477105736732483, + "eval_compot_GIoU": 0.3559461981058121, + "eval_compot_IoU": 0.40127159655094147, + "eval_compot_MAE_all": 0.01874891249462962, + "eval_compot_MAE_h": 0.011277861427515745, + "eval_compot_MAE_w": 0.02132485620677471, + "eval_compot_MAE_x_boxes": 0.029761233367025852, + "eval_compot_MAE_y_boxes": 0.0067885443568229675, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.402956247329712, + "eval_compot_loss_ce": 0.00014377458137460053, + "eval_compot_loss_iou": 0.6480712890625, + "eval_compot_loss_num": 0.017492294311523438, + "eval_compot_loss_xval": 1.3837890625, + "eval_compot_runtime": 20.4358, + "eval_compot_samples_per_second": 2.447, + "eval_compot_steps_per_second": 0.098, + "num_input_tokens_seen": 406234160, + "step": 7250 + }, + { + "epoch": 16.146993318485524, + "eval_custom_ui_val_CIoU": 0.4759764571984609, + "eval_custom_ui_val_GIoU": 0.48175321850511765, + "eval_custom_ui_val_IoU": 0.5367199348078834, + "eval_custom_ui_val_MAE_all": 0.027078964850968786, + "eval_custom_ui_val_MAE_h": 0.014058138916475905, + "eval_custom_ui_val_MAE_w": 0.036136620212346315, + "eval_custom_ui_val_MAE_x_boxes": 0.0336780981419401, + "eval_custom_ui_val_MAE_y_boxes": 0.012756779815794693, + "eval_custom_ui_val_inside_bbox": 0.7719907429483202, + "eval_custom_ui_val_loss": 1.1681312322616577, + "eval_custom_ui_val_loss_ce": 0.00017250773412848098, + "eval_custom_ui_val_loss_iou": 0.5040554470486112, + "eval_custom_ui_val_loss_num": 0.023714171515570745, + "eval_custom_ui_val_loss_xval": 1.1267903645833333, + "eval_custom_ui_val_runtime": 56.1097, + "eval_custom_ui_val_samples_per_second": 4.723, + "eval_custom_ui_val_steps_per_second": 0.16, + "num_input_tokens_seen": 406234160, + "step": 7250 + }, + { + "epoch": 16.146993318485524, + "loss": 0.8809858560562134, + "loss_ce": 0.0001264033926418051, + "loss_iou": 0.3984375, + "loss_num": 0.016845703125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 406234160, + "step": 7250 + }, + { + "epoch": 16.14922048997773, + "grad_norm": 13.051247596740723, + "learning_rate": 1e-06, + "loss": 0.2912, + "num_input_tokens_seen": 406287840, + "step": 7251 + }, + { + "epoch": 16.14922048997773, + "loss": 0.3833409249782562, + "loss_ce": 0.00010119502258021384, + "loss_iou": 0.1728515625, + "loss_num": 0.00738525390625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 406287840, + "step": 7251 + }, + { + "epoch": 16.151447661469934, + "grad_norm": 17.345809936523438, + "learning_rate": 1e-06, + "loss": 0.4775, + "num_input_tokens_seen": 406344524, + "step": 7252 + }, + { + "epoch": 16.151447661469934, + "loss": 0.3954039514064789, + "loss_ce": 7.925261161290109e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.00775146484375, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 406344524, + "step": 7252 + }, + { + "epoch": 16.15367483296214, + "grad_norm": 17.00496482849121, + "learning_rate": 1e-06, + "loss": 0.389, + "num_input_tokens_seen": 406399856, + "step": 7253 + }, + { + "epoch": 16.15367483296214, + "loss": 0.35250192880630493, + "loss_ce": 8.492142660543323e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.00927734375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 406399856, + "step": 7253 + }, + { + "epoch": 16.155902004454344, + "grad_norm": 13.97536563873291, + "learning_rate": 1e-06, + "loss": 0.3508, + "num_input_tokens_seen": 406457204, + "step": 7254 + }, + { + "epoch": 16.155902004454344, + "loss": 0.3879046142101288, + "loss_ce": 8.720727055333555e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.0067138671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 406457204, + "step": 7254 + }, + { + "epoch": 16.15812917594655, + "grad_norm": 14.464372634887695, + "learning_rate": 1e-06, + "loss": 0.4449, + "num_input_tokens_seen": 406513904, + "step": 7255 + }, + { + "epoch": 16.15812917594655, + "loss": 0.37365224957466125, + "loss_ce": 0.00023914946359582245, + "loss_iou": 0.1533203125, + "loss_num": 0.01348876953125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 406513904, + "step": 7255 + }, + { + "epoch": 16.160356347438753, + "grad_norm": 14.928121566772461, + "learning_rate": 1e-06, + "loss": 0.4961, + "num_input_tokens_seen": 406570348, + "step": 7256 + }, + { + "epoch": 16.160356347438753, + "loss": 0.7330120801925659, + "loss_ce": 0.00010193722846452147, + "loss_iou": 0.259765625, + "loss_num": 0.04248046875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 406570348, + "step": 7256 + }, + { + "epoch": 16.16258351893096, + "grad_norm": 20.846715927124023, + "learning_rate": 1e-06, + "loss": 0.6857, + "num_input_tokens_seen": 406624784, + "step": 7257 + }, + { + "epoch": 16.16258351893096, + "loss": 0.9515559673309326, + "loss_ce": 0.00013993156608194113, + "loss_iou": 0.369140625, + "loss_num": 0.042724609375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 406624784, + "step": 7257 + }, + { + "epoch": 16.164810690423163, + "grad_norm": 14.39743423461914, + "learning_rate": 1e-06, + "loss": 0.5994, + "num_input_tokens_seen": 406679488, + "step": 7258 + }, + { + "epoch": 16.164810690423163, + "loss": 0.450466126203537, + "loss_ce": 8.771609282121062e-05, + "loss_iou": 0.19921875, + "loss_num": 0.01025390625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 406679488, + "step": 7258 + }, + { + "epoch": 16.167037861915368, + "grad_norm": 31.99239158630371, + "learning_rate": 1e-06, + "loss": 0.3736, + "num_input_tokens_seen": 406737108, + "step": 7259 + }, + { + "epoch": 16.167037861915368, + "loss": 0.25009024143218994, + "loss_ce": 9.024170140037313e-05, + "loss_iou": 0.1083984375, + "loss_num": 0.006744384765625, + "loss_xval": 0.25, + "num_input_tokens_seen": 406737108, + "step": 7259 + }, + { + "epoch": 16.169265033407573, + "grad_norm": 28.246858596801758, + "learning_rate": 1e-06, + "loss": 0.3793, + "num_input_tokens_seen": 406794236, + "step": 7260 + }, + { + "epoch": 16.169265033407573, + "loss": 0.3895048499107361, + "loss_ce": 0.00010054315498564392, + "loss_iou": 0.17578125, + "loss_num": 0.007659912109375, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 406794236, + "step": 7260 + }, + { + "epoch": 16.171492204899778, + "grad_norm": 13.259971618652344, + "learning_rate": 1e-06, + "loss": 0.3625, + "num_input_tokens_seen": 406851408, + "step": 7261 + }, + { + "epoch": 16.171492204899778, + "loss": 0.39652132987976074, + "loss_ce": 9.797551319934428e-05, + "loss_iou": 0.17578125, + "loss_num": 0.00897216796875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 406851408, + "step": 7261 + }, + { + "epoch": 16.173719376391983, + "grad_norm": 18.107431411743164, + "learning_rate": 1e-06, + "loss": 0.4601, + "num_input_tokens_seen": 406909764, + "step": 7262 + }, + { + "epoch": 16.173719376391983, + "loss": 0.5472245216369629, + "loss_ce": 0.0001053560699801892, + "loss_iou": 0.236328125, + "loss_num": 0.01507568359375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 406909764, + "step": 7262 + }, + { + "epoch": 16.175946547884188, + "grad_norm": 19.771875381469727, + "learning_rate": 1e-06, + "loss": 0.4511, + "num_input_tokens_seen": 406967348, + "step": 7263 + }, + { + "epoch": 16.175946547884188, + "loss": 0.3307103216648102, + "loss_ce": 8.286767115350813e-05, + "loss_iou": 0.150390625, + "loss_num": 0.005889892578125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 406967348, + "step": 7263 + }, + { + "epoch": 16.178173719376392, + "grad_norm": 20.80531883239746, + "learning_rate": 1e-06, + "loss": 0.4105, + "num_input_tokens_seen": 407023044, + "step": 7264 + }, + { + "epoch": 16.178173719376392, + "loss": 0.40901893377304077, + "loss_ce": 8.336683094967157e-05, + "loss_iou": 0.189453125, + "loss_num": 0.006011962890625, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 407023044, + "step": 7264 + }, + { + "epoch": 16.180400890868597, + "grad_norm": 15.4229736328125, + "learning_rate": 1e-06, + "loss": 0.366, + "num_input_tokens_seen": 407078844, + "step": 7265 + }, + { + "epoch": 16.180400890868597, + "loss": 0.22904685139656067, + "loss_ce": 0.0001039912604028359, + "loss_iou": 0.09228515625, + "loss_num": 0.00885009765625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 407078844, + "step": 7265 + }, + { + "epoch": 16.182628062360802, + "grad_norm": 15.534099578857422, + "learning_rate": 1e-06, + "loss": 0.4986, + "num_input_tokens_seen": 407135112, + "step": 7266 + }, + { + "epoch": 16.182628062360802, + "loss": 0.41431134939193726, + "loss_ce": 0.00012679336941801012, + "loss_iou": 0.1826171875, + "loss_num": 0.00994873046875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 407135112, + "step": 7266 + }, + { + "epoch": 16.184855233853007, + "grad_norm": 11.986660957336426, + "learning_rate": 1e-06, + "loss": 0.3914, + "num_input_tokens_seen": 407193344, + "step": 7267 + }, + { + "epoch": 16.184855233853007, + "loss": 0.3952281177043915, + "loss_ce": 8.651654206914827e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.01300048828125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 407193344, + "step": 7267 + }, + { + "epoch": 16.187082405345212, + "grad_norm": 14.020734786987305, + "learning_rate": 1e-06, + "loss": 0.4324, + "num_input_tokens_seen": 407249716, + "step": 7268 + }, + { + "epoch": 16.187082405345212, + "loss": 0.48159003257751465, + "loss_ce": 0.0001447216491214931, + "loss_iou": 0.2060546875, + "loss_num": 0.013671875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 407249716, + "step": 7268 + }, + { + "epoch": 16.189309576837417, + "grad_norm": 11.799369812011719, + "learning_rate": 1e-06, + "loss": 0.3835, + "num_input_tokens_seen": 407306452, + "step": 7269 + }, + { + "epoch": 16.189309576837417, + "loss": 0.3408535122871399, + "loss_ce": 9.422671428183094e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.005950927734375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 407306452, + "step": 7269 + }, + { + "epoch": 16.19153674832962, + "grad_norm": 14.987945556640625, + "learning_rate": 1e-06, + "loss": 0.3707, + "num_input_tokens_seen": 407363312, + "step": 7270 + }, + { + "epoch": 16.19153674832962, + "loss": 0.2843882739543915, + "loss_ce": 8.651558891870081e-05, + "loss_iou": 0.12353515625, + "loss_num": 0.007476806640625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 407363312, + "step": 7270 + }, + { + "epoch": 16.193763919821826, + "grad_norm": 14.705431938171387, + "learning_rate": 1e-06, + "loss": 0.3561, + "num_input_tokens_seen": 407417848, + "step": 7271 + }, + { + "epoch": 16.193763919821826, + "loss": 0.30038201808929443, + "loss_ce": 8.906603034120053e-05, + "loss_iou": 0.123046875, + "loss_num": 0.01080322265625, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 407417848, + "step": 7271 + }, + { + "epoch": 16.19599109131403, + "grad_norm": 33.002174377441406, + "learning_rate": 1e-06, + "loss": 0.5219, + "num_input_tokens_seen": 407475472, + "step": 7272 + }, + { + "epoch": 16.19599109131403, + "loss": 0.510102391242981, + "loss_ce": 9.262951789423823e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 407475472, + "step": 7272 + }, + { + "epoch": 16.198218262806236, + "grad_norm": 29.614242553710938, + "learning_rate": 1e-06, + "loss": 0.3678, + "num_input_tokens_seen": 407531476, + "step": 7273 + }, + { + "epoch": 16.198218262806236, + "loss": 0.37849289178848267, + "loss_ce": 7.491311407648027e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00958251953125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 407531476, + "step": 7273 + }, + { + "epoch": 16.20044543429844, + "grad_norm": 16.283767700195312, + "learning_rate": 1e-06, + "loss": 0.4892, + "num_input_tokens_seen": 407587100, + "step": 7274 + }, + { + "epoch": 16.20044543429844, + "loss": 0.5279887914657593, + "loss_ce": 9.569715621182695e-05, + "loss_iou": 0.22265625, + "loss_num": 0.0166015625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 407587100, + "step": 7274 + }, + { + "epoch": 16.202672605790646, + "grad_norm": 16.31856346130371, + "learning_rate": 1e-06, + "loss": 0.4131, + "num_input_tokens_seen": 407643272, + "step": 7275 + }, + { + "epoch": 16.202672605790646, + "loss": 0.40553590655326843, + "loss_ce": 0.00010989317524945363, + "loss_iou": 0.17578125, + "loss_num": 0.01080322265625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 407643272, + "step": 7275 + }, + { + "epoch": 16.20489977728285, + "grad_norm": 18.60124969482422, + "learning_rate": 1e-06, + "loss": 0.3314, + "num_input_tokens_seen": 407699796, + "step": 7276 + }, + { + "epoch": 16.20489977728285, + "loss": 0.28377124667167664, + "loss_ce": 7.983081741258502e-05, + "loss_iou": 0.125, + "loss_num": 0.006591796875, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 407699796, + "step": 7276 + }, + { + "epoch": 16.207126948775056, + "grad_norm": 22.093128204345703, + "learning_rate": 1e-06, + "loss": 0.4329, + "num_input_tokens_seen": 407753556, + "step": 7277 + }, + { + "epoch": 16.207126948775056, + "loss": 0.4864369034767151, + "loss_ce": 0.00010880557238124311, + "loss_iou": 0.2138671875, + "loss_num": 0.0118408203125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 407753556, + "step": 7277 + }, + { + "epoch": 16.20935412026726, + "grad_norm": 17.99799156188965, + "learning_rate": 1e-06, + "loss": 0.3275, + "num_input_tokens_seen": 407811196, + "step": 7278 + }, + { + "epoch": 16.20935412026726, + "loss": 0.2680291533470154, + "loss_ce": 8.48027557367459e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.0054931640625, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 407811196, + "step": 7278 + }, + { + "epoch": 16.211581291759465, + "grad_norm": 62.915122985839844, + "learning_rate": 1e-06, + "loss": 0.4762, + "num_input_tokens_seen": 407866648, + "step": 7279 + }, + { + "epoch": 16.211581291759465, + "loss": 0.3208872675895691, + "loss_ce": 8.647728100186214e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.0042724609375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 407866648, + "step": 7279 + }, + { + "epoch": 16.21380846325167, + "grad_norm": 24.828845977783203, + "learning_rate": 1e-06, + "loss": 0.5674, + "num_input_tokens_seen": 407922120, + "step": 7280 + }, + { + "epoch": 16.21380846325167, + "loss": 0.4975318908691406, + "loss_ce": 9.53997514443472e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.01409912109375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 407922120, + "step": 7280 + }, + { + "epoch": 16.216035634743875, + "grad_norm": 16.575681686401367, + "learning_rate": 1e-06, + "loss": 0.4067, + "num_input_tokens_seen": 407974588, + "step": 7281 + }, + { + "epoch": 16.216035634743875, + "loss": 0.5100204944610596, + "loss_ce": 0.00104833475779742, + "loss_iou": 0.208984375, + "loss_num": 0.01806640625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 407974588, + "step": 7281 + }, + { + "epoch": 16.21826280623608, + "grad_norm": 16.462749481201172, + "learning_rate": 1e-06, + "loss": 0.3674, + "num_input_tokens_seen": 408030196, + "step": 7282 + }, + { + "epoch": 16.21826280623608, + "loss": 0.43087995052337646, + "loss_ce": 9.384381701238453e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.014892578125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 408030196, + "step": 7282 + }, + { + "epoch": 16.220489977728285, + "grad_norm": 18.638999938964844, + "learning_rate": 1e-06, + "loss": 0.4374, + "num_input_tokens_seen": 408084800, + "step": 7283 + }, + { + "epoch": 16.220489977728285, + "loss": 0.4949721395969391, + "loss_ce": 9.908679930958897e-05, + "loss_iou": 0.22265625, + "loss_num": 0.00982666015625, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 408084800, + "step": 7283 + }, + { + "epoch": 16.22271714922049, + "grad_norm": 13.85960578918457, + "learning_rate": 1e-06, + "loss": 0.3035, + "num_input_tokens_seen": 408142404, + "step": 7284 + }, + { + "epoch": 16.22271714922049, + "loss": 0.3523852527141571, + "loss_ce": 9.034241520566866e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.00970458984375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 408142404, + "step": 7284 + }, + { + "epoch": 16.224944320712694, + "grad_norm": 23.113548278808594, + "learning_rate": 1e-06, + "loss": 0.5333, + "num_input_tokens_seen": 408197056, + "step": 7285 + }, + { + "epoch": 16.224944320712694, + "loss": 0.5909112691879272, + "loss_ce": 9.095118002733216e-05, + "loss_iou": 0.271484375, + "loss_num": 0.00946044921875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 408197056, + "step": 7285 + }, + { + "epoch": 16.2271714922049, + "grad_norm": 19.409629821777344, + "learning_rate": 1e-06, + "loss": 0.3651, + "num_input_tokens_seen": 408249604, + "step": 7286 + }, + { + "epoch": 16.2271714922049, + "loss": 0.36336296796798706, + "loss_ce": 8.17275868030265e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.0067138671875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 408249604, + "step": 7286 + }, + { + "epoch": 16.229398663697104, + "grad_norm": 17.79453468322754, + "learning_rate": 1e-06, + "loss": 0.328, + "num_input_tokens_seen": 408306396, + "step": 7287 + }, + { + "epoch": 16.229398663697104, + "loss": 0.34055426716804504, + "loss_ce": 0.00010017414024332538, + "loss_iou": 0.158203125, + "loss_num": 0.004669189453125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 408306396, + "step": 7287 + }, + { + "epoch": 16.23162583518931, + "grad_norm": 18.441938400268555, + "learning_rate": 1e-06, + "loss": 0.4376, + "num_input_tokens_seen": 408362596, + "step": 7288 + }, + { + "epoch": 16.23162583518931, + "loss": 0.470440149307251, + "loss_ce": 0.00010323309834348038, + "loss_iou": 0.2158203125, + "loss_num": 0.00787353515625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 408362596, + "step": 7288 + }, + { + "epoch": 16.233853006681514, + "grad_norm": 24.354867935180664, + "learning_rate": 1e-06, + "loss": 0.3503, + "num_input_tokens_seen": 408417028, + "step": 7289 + }, + { + "epoch": 16.233853006681514, + "loss": 0.3560473918914795, + "loss_ce": 9.036185656441376e-05, + "loss_iou": 0.158203125, + "loss_num": 0.00799560546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 408417028, + "step": 7289 + }, + { + "epoch": 16.23608017817372, + "grad_norm": 15.969454765319824, + "learning_rate": 1e-06, + "loss": 0.5287, + "num_input_tokens_seen": 408472732, + "step": 7290 + }, + { + "epoch": 16.23608017817372, + "loss": 0.5947001576423645, + "loss_ce": 9.568793757352978e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0167236328125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 408472732, + "step": 7290 + }, + { + "epoch": 16.238307349665924, + "grad_norm": 19.128751754760742, + "learning_rate": 1e-06, + "loss": 0.3783, + "num_input_tokens_seen": 408531592, + "step": 7291 + }, + { + "epoch": 16.238307349665924, + "loss": 0.37436580657958984, + "loss_ce": 9.823799337027594e-05, + "loss_iou": 0.16015625, + "loss_num": 0.01092529296875, + "loss_xval": 0.375, + "num_input_tokens_seen": 408531592, + "step": 7291 + }, + { + "epoch": 16.24053452115813, + "grad_norm": 20.96180534362793, + "learning_rate": 1e-06, + "loss": 0.4015, + "num_input_tokens_seen": 408587248, + "step": 7292 + }, + { + "epoch": 16.24053452115813, + "loss": 0.3449326753616333, + "loss_ce": 8.403346146224067e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.006011962890625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 408587248, + "step": 7292 + }, + { + "epoch": 16.242761692650333, + "grad_norm": 16.29045867919922, + "learning_rate": 1e-06, + "loss": 0.3866, + "num_input_tokens_seen": 408643652, + "step": 7293 + }, + { + "epoch": 16.242761692650333, + "loss": 0.461269736289978, + "loss_ce": 8.806748519418761e-05, + "loss_iou": 0.208984375, + "loss_num": 0.00872802734375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 408643652, + "step": 7293 + }, + { + "epoch": 16.244988864142538, + "grad_norm": 16.806655883789062, + "learning_rate": 1e-06, + "loss": 0.4739, + "num_input_tokens_seen": 408696544, + "step": 7294 + }, + { + "epoch": 16.244988864142538, + "loss": 0.5039190053939819, + "loss_ce": 0.00013484872761182487, + "loss_iou": 0.212890625, + "loss_num": 0.0157470703125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 408696544, + "step": 7294 + }, + { + "epoch": 16.247216035634743, + "grad_norm": 19.150453567504883, + "learning_rate": 1e-06, + "loss": 0.5361, + "num_input_tokens_seen": 408752192, + "step": 7295 + }, + { + "epoch": 16.247216035634743, + "loss": 0.4750003218650818, + "loss_ce": 8.576853724662215e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.0111083984375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 408752192, + "step": 7295 + }, + { + "epoch": 16.249443207126948, + "grad_norm": 14.885661125183105, + "learning_rate": 1e-06, + "loss": 0.4392, + "num_input_tokens_seen": 408807428, + "step": 7296 + }, + { + "epoch": 16.249443207126948, + "loss": 0.3516565263271332, + "loss_ce": 9.402677096659318e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.00653076171875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 408807428, + "step": 7296 + }, + { + "epoch": 16.251670378619153, + "grad_norm": 20.185546875, + "learning_rate": 1e-06, + "loss": 0.6554, + "num_input_tokens_seen": 408862660, + "step": 7297 + }, + { + "epoch": 16.251670378619153, + "loss": 0.6360622644424438, + "loss_ce": 7.591473695356399e-05, + "loss_iou": 0.275390625, + "loss_num": 0.017333984375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 408862660, + "step": 7297 + }, + { + "epoch": 16.253897550111358, + "grad_norm": 24.526891708374023, + "learning_rate": 1e-06, + "loss": 0.5942, + "num_input_tokens_seen": 408915492, + "step": 7298 + }, + { + "epoch": 16.253897550111358, + "loss": 0.4688360095024109, + "loss_ce": 8.602187153883278e-05, + "loss_iou": 0.18359375, + "loss_num": 0.0203857421875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 408915492, + "step": 7298 + }, + { + "epoch": 16.256124721603562, + "grad_norm": 28.41362953186035, + "learning_rate": 1e-06, + "loss": 0.3349, + "num_input_tokens_seen": 408971936, + "step": 7299 + }, + { + "epoch": 16.256124721603562, + "loss": 0.3919934928417206, + "loss_ce": 0.00014778405602555722, + "loss_iou": 0.1806640625, + "loss_num": 0.005950927734375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 408971936, + "step": 7299 + }, + { + "epoch": 16.258351893095767, + "grad_norm": 16.66225814819336, + "learning_rate": 1e-06, + "loss": 0.4285, + "num_input_tokens_seen": 409026252, + "step": 7300 + }, + { + "epoch": 16.258351893095767, + "loss": 0.4181760251522064, + "loss_ce": 8.519048424204811e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.0098876953125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 409026252, + "step": 7300 + }, + { + "epoch": 16.260579064587972, + "grad_norm": 19.2691593170166, + "learning_rate": 1e-06, + "loss": 0.67, + "num_input_tokens_seen": 409084124, + "step": 7301 + }, + { + "epoch": 16.260579064587972, + "loss": 0.6911599636077881, + "loss_ce": 0.00011992135114269331, + "loss_iou": 0.275390625, + "loss_num": 0.0279541015625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 409084124, + "step": 7301 + }, + { + "epoch": 16.262806236080177, + "grad_norm": 14.751253128051758, + "learning_rate": 1e-06, + "loss": 0.3899, + "num_input_tokens_seen": 409141304, + "step": 7302 + }, + { + "epoch": 16.262806236080177, + "loss": 0.47578924894332886, + "loss_ce": 8.124149462673813e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01611328125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 409141304, + "step": 7302 + }, + { + "epoch": 16.265033407572382, + "grad_norm": 14.404290199279785, + "learning_rate": 1e-06, + "loss": 0.3463, + "num_input_tokens_seen": 409197796, + "step": 7303 + }, + { + "epoch": 16.265033407572382, + "loss": 0.26949194073677063, + "loss_ce": 8.277669257950038e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.003570556640625, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 409197796, + "step": 7303 + }, + { + "epoch": 16.267260579064587, + "grad_norm": 20.53440284729004, + "learning_rate": 1e-06, + "loss": 0.3827, + "num_input_tokens_seen": 409252580, + "step": 7304 + }, + { + "epoch": 16.267260579064587, + "loss": 0.4455221891403198, + "loss_ce": 8.760465425439179e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.01263427734375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 409252580, + "step": 7304 + }, + { + "epoch": 16.26948775055679, + "grad_norm": 28.248842239379883, + "learning_rate": 1e-06, + "loss": 0.6714, + "num_input_tokens_seen": 409309884, + "step": 7305 + }, + { + "epoch": 16.26948775055679, + "loss": 0.570942759513855, + "loss_ce": 0.0006302678375504911, + "loss_iou": 0.2412109375, + "loss_num": 0.017822265625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 409309884, + "step": 7305 + }, + { + "epoch": 16.271714922048996, + "grad_norm": 15.996672630310059, + "learning_rate": 1e-06, + "loss": 0.5986, + "num_input_tokens_seen": 409366592, + "step": 7306 + }, + { + "epoch": 16.271714922048996, + "loss": 0.5386756658554077, + "loss_ce": 0.000101389319752343, + "loss_iou": 0.2216796875, + "loss_num": 0.0189208984375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 409366592, + "step": 7306 + }, + { + "epoch": 16.2739420935412, + "grad_norm": 25.504066467285156, + "learning_rate": 1e-06, + "loss": 0.4661, + "num_input_tokens_seen": 409423384, + "step": 7307 + }, + { + "epoch": 16.2739420935412, + "loss": 0.5826142430305481, + "loss_ce": 9.470840450376272e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0107421875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 409423384, + "step": 7307 + }, + { + "epoch": 16.276169265033406, + "grad_norm": 16.374908447265625, + "learning_rate": 1e-06, + "loss": 0.5241, + "num_input_tokens_seen": 409479828, + "step": 7308 + }, + { + "epoch": 16.276169265033406, + "loss": 0.6848694086074829, + "loss_ce": 0.00017701313481666148, + "loss_iou": 0.30859375, + "loss_num": 0.01336669921875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 409479828, + "step": 7308 + }, + { + "epoch": 16.27839643652561, + "grad_norm": 17.140676498413086, + "learning_rate": 1e-06, + "loss": 0.5788, + "num_input_tokens_seen": 409536380, + "step": 7309 + }, + { + "epoch": 16.27839643652561, + "loss": 0.7437722682952881, + "loss_ce": 0.00011992130021099001, + "loss_iou": 0.29296875, + "loss_num": 0.031005859375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 409536380, + "step": 7309 + }, + { + "epoch": 16.280623608017816, + "grad_norm": 14.995177268981934, + "learning_rate": 1e-06, + "loss": 0.4511, + "num_input_tokens_seen": 409594828, + "step": 7310 + }, + { + "epoch": 16.280623608017816, + "loss": 0.3734976649284363, + "loss_ce": 8.45702743390575e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.00933837890625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 409594828, + "step": 7310 + }, + { + "epoch": 16.28285077951002, + "grad_norm": 16.360715866088867, + "learning_rate": 1e-06, + "loss": 0.4679, + "num_input_tokens_seen": 409650308, + "step": 7311 + }, + { + "epoch": 16.28285077951002, + "loss": 0.4848405718803406, + "loss_ce": 9.936667629517615e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.0098876953125, + "loss_xval": 0.484375, + "num_input_tokens_seen": 409650308, + "step": 7311 + }, + { + "epoch": 16.285077951002226, + "grad_norm": 21.59230613708496, + "learning_rate": 1e-06, + "loss": 0.4159, + "num_input_tokens_seen": 409706112, + "step": 7312 + }, + { + "epoch": 16.285077951002226, + "loss": 0.5259721875190735, + "loss_ce": 9.327764564659446e-05, + "loss_iou": 0.251953125, + "loss_num": 0.004608154296875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 409706112, + "step": 7312 + }, + { + "epoch": 16.28730512249443, + "grad_norm": 15.556495666503906, + "learning_rate": 1e-06, + "loss": 0.3937, + "num_input_tokens_seen": 409763944, + "step": 7313 + }, + { + "epoch": 16.28730512249443, + "loss": 0.3481227159500122, + "loss_ce": 0.00010025159281212837, + "loss_iou": 0.1611328125, + "loss_num": 0.0052490234375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 409763944, + "step": 7313 + }, + { + "epoch": 16.289532293986635, + "grad_norm": 17.400310516357422, + "learning_rate": 1e-06, + "loss": 0.4426, + "num_input_tokens_seen": 409815968, + "step": 7314 + }, + { + "epoch": 16.289532293986635, + "loss": 0.4380715787410736, + "loss_ce": 8.330351556651294e-05, + "loss_iou": 0.18359375, + "loss_num": 0.01416015625, + "loss_xval": 0.4375, + "num_input_tokens_seen": 409815968, + "step": 7314 + }, + { + "epoch": 16.29175946547884, + "grad_norm": 17.484792709350586, + "learning_rate": 1e-06, + "loss": 0.3899, + "num_input_tokens_seen": 409870240, + "step": 7315 + }, + { + "epoch": 16.29175946547884, + "loss": 0.42489010095596313, + "loss_ce": 8.540091221220791e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.0164794921875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 409870240, + "step": 7315 + }, + { + "epoch": 16.293986636971045, + "grad_norm": 14.591642379760742, + "learning_rate": 1e-06, + "loss": 0.3274, + "num_input_tokens_seen": 409926680, + "step": 7316 + }, + { + "epoch": 16.293986636971045, + "loss": 0.2797589600086212, + "loss_ce": 9.588080865796655e-05, + "loss_iou": 0.126953125, + "loss_num": 0.005126953125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 409926680, + "step": 7316 + }, + { + "epoch": 16.29621380846325, + "grad_norm": 13.230761528015137, + "learning_rate": 1e-06, + "loss": 0.3727, + "num_input_tokens_seen": 409985004, + "step": 7317 + }, + { + "epoch": 16.29621380846325, + "loss": 0.3750907778739929, + "loss_ce": 9.078568109543994e-05, + "loss_iou": 0.15625, + "loss_num": 0.0125732421875, + "loss_xval": 0.375, + "num_input_tokens_seen": 409985004, + "step": 7317 + }, + { + "epoch": 16.29844097995546, + "grad_norm": 32.36854934692383, + "learning_rate": 1e-06, + "loss": 0.4765, + "num_input_tokens_seen": 410040396, + "step": 7318 + }, + { + "epoch": 16.29844097995546, + "loss": 0.43508851528167725, + "loss_ce": 9.096259600482881e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.008544921875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 410040396, + "step": 7318 + }, + { + "epoch": 16.30066815144766, + "grad_norm": 20.330373764038086, + "learning_rate": 1e-06, + "loss": 0.3185, + "num_input_tokens_seen": 410093968, + "step": 7319 + }, + { + "epoch": 16.30066815144766, + "loss": 0.3120901584625244, + "loss_ce": 7.842134800739586e-05, + "loss_iou": 0.134765625, + "loss_num": 0.008544921875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 410093968, + "step": 7319 + }, + { + "epoch": 16.302895322939868, + "grad_norm": 15.776129722595215, + "learning_rate": 1e-06, + "loss": 0.3339, + "num_input_tokens_seen": 410149384, + "step": 7320 + }, + { + "epoch": 16.302895322939868, + "loss": 0.47568702697753906, + "loss_ce": 0.00010110878793057054, + "loss_iou": 0.2041015625, + "loss_num": 0.01348876953125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 410149384, + "step": 7320 + }, + { + "epoch": 16.305122494432073, + "grad_norm": 21.19259262084961, + "learning_rate": 1e-06, + "loss": 0.5786, + "num_input_tokens_seen": 410206596, + "step": 7321 + }, + { + "epoch": 16.305122494432073, + "loss": 0.36239707469940186, + "loss_ce": 9.236540063284338e-05, + "loss_iou": 0.1640625, + "loss_num": 0.006866455078125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 410206596, + "step": 7321 + }, + { + "epoch": 16.307349665924278, + "grad_norm": 22.16942596435547, + "learning_rate": 1e-06, + "loss": 0.4556, + "num_input_tokens_seen": 410261328, + "step": 7322 + }, + { + "epoch": 16.307349665924278, + "loss": 0.5388376712799072, + "loss_ce": 8.029842138057575e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.023681640625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 410261328, + "step": 7322 + }, + { + "epoch": 16.309576837416483, + "grad_norm": 21.34149169921875, + "learning_rate": 1e-06, + "loss": 0.4369, + "num_input_tokens_seen": 410317892, + "step": 7323 + }, + { + "epoch": 16.309576837416483, + "loss": 0.5326899886131287, + "loss_ce": 9.718516957946122e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.01318359375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 410317892, + "step": 7323 + }, + { + "epoch": 16.311804008908688, + "grad_norm": 22.547149658203125, + "learning_rate": 1e-06, + "loss": 0.3742, + "num_input_tokens_seen": 410372852, + "step": 7324 + }, + { + "epoch": 16.311804008908688, + "loss": 0.3052588105201721, + "loss_ce": 8.303693175548688e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.007659912109375, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 410372852, + "step": 7324 + }, + { + "epoch": 16.314031180400892, + "grad_norm": 24.133289337158203, + "learning_rate": 1e-06, + "loss": 0.3867, + "num_input_tokens_seen": 410428208, + "step": 7325 + }, + { + "epoch": 16.314031180400892, + "loss": 0.3308970034122467, + "loss_ce": 8.646737842354923e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.006256103515625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 410428208, + "step": 7325 + }, + { + "epoch": 16.316258351893097, + "grad_norm": 27.495380401611328, + "learning_rate": 1e-06, + "loss": 0.5133, + "num_input_tokens_seen": 410483916, + "step": 7326 + }, + { + "epoch": 16.316258351893097, + "loss": 0.5182612538337708, + "loss_ce": 7.278715202119201e-05, + "loss_iou": 0.2421875, + "loss_num": 0.0068359375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 410483916, + "step": 7326 + }, + { + "epoch": 16.318485523385302, + "grad_norm": 26.30192756652832, + "learning_rate": 1e-06, + "loss": 0.3812, + "num_input_tokens_seen": 410539204, + "step": 7327 + }, + { + "epoch": 16.318485523385302, + "loss": 0.47011250257492065, + "loss_ce": 0.00014180486323311925, + "loss_iou": 0.1982421875, + "loss_num": 0.01483154296875, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 410539204, + "step": 7327 + }, + { + "epoch": 16.320712694877507, + "grad_norm": 17.40894317626953, + "learning_rate": 1e-06, + "loss": 0.4687, + "num_input_tokens_seen": 410594636, + "step": 7328 + }, + { + "epoch": 16.320712694877507, + "loss": 0.44930049777030945, + "loss_ce": 8.17526743048802e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.01226806640625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 410594636, + "step": 7328 + }, + { + "epoch": 16.322939866369712, + "grad_norm": 21.12818145751953, + "learning_rate": 1e-06, + "loss": 0.3845, + "num_input_tokens_seen": 410651876, + "step": 7329 + }, + { + "epoch": 16.322939866369712, + "loss": 0.3459153175354004, + "loss_ce": 9.013438830152154e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.003936767578125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 410651876, + "step": 7329 + }, + { + "epoch": 16.325167037861917, + "grad_norm": 10.89534854888916, + "learning_rate": 1e-06, + "loss": 0.5001, + "num_input_tokens_seen": 410706904, + "step": 7330 + }, + { + "epoch": 16.325167037861917, + "loss": 0.46097275614738464, + "loss_ce": 9.627400140743703e-05, + "loss_iou": 0.205078125, + "loss_num": 0.01019287109375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 410706904, + "step": 7330 + }, + { + "epoch": 16.32739420935412, + "grad_norm": 20.05289077758789, + "learning_rate": 1e-06, + "loss": 0.4095, + "num_input_tokens_seen": 410763772, + "step": 7331 + }, + { + "epoch": 16.32739420935412, + "loss": 0.3861430883407593, + "loss_ce": 9.571410191711038e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.0096435546875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 410763772, + "step": 7331 + }, + { + "epoch": 16.329621380846326, + "grad_norm": 15.384933471679688, + "learning_rate": 1e-06, + "loss": 0.3819, + "num_input_tokens_seen": 410823420, + "step": 7332 + }, + { + "epoch": 16.329621380846326, + "loss": 0.4961845278739929, + "loss_ce": 9.074924309970811e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.00909423828125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 410823420, + "step": 7332 + }, + { + "epoch": 16.33184855233853, + "grad_norm": 24.686939239501953, + "learning_rate": 1e-06, + "loss": 0.4537, + "num_input_tokens_seen": 410877940, + "step": 7333 + }, + { + "epoch": 16.33184855233853, + "loss": 0.5593637824058533, + "loss_ce": 0.00015970882668625563, + "loss_iou": 0.234375, + "loss_num": 0.018310546875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 410877940, + "step": 7333 + }, + { + "epoch": 16.334075723830736, + "grad_norm": 22.968791961669922, + "learning_rate": 1e-06, + "loss": 0.5139, + "num_input_tokens_seen": 410932476, + "step": 7334 + }, + { + "epoch": 16.334075723830736, + "loss": 0.39212775230407715, + "loss_ce": 9.897441486828029e-05, + "loss_iou": 0.171875, + "loss_num": 0.00958251953125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 410932476, + "step": 7334 + }, + { + "epoch": 16.33630289532294, + "grad_norm": 15.322009086608887, + "learning_rate": 1e-06, + "loss": 0.4757, + "num_input_tokens_seen": 410990488, + "step": 7335 + }, + { + "epoch": 16.33630289532294, + "loss": 0.5929901003837585, + "loss_ce": 9.460109868086874e-05, + "loss_iou": 0.240234375, + "loss_num": 0.0223388671875, + "loss_xval": 0.59375, + "num_input_tokens_seen": 410990488, + "step": 7335 + }, + { + "epoch": 16.338530066815146, + "grad_norm": 34.48351287841797, + "learning_rate": 1e-06, + "loss": 0.5019, + "num_input_tokens_seen": 411046180, + "step": 7336 + }, + { + "epoch": 16.338530066815146, + "loss": 0.581402063369751, + "loss_ce": 0.00010323138849344105, + "loss_iou": 0.2578125, + "loss_num": 0.0135498046875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 411046180, + "step": 7336 + }, + { + "epoch": 16.34075723830735, + "grad_norm": 16.476621627807617, + "learning_rate": 1e-06, + "loss": 0.3056, + "num_input_tokens_seen": 411102856, + "step": 7337 + }, + { + "epoch": 16.34075723830735, + "loss": 0.1867981106042862, + "loss_ce": 9.157234308077022e-05, + "loss_iou": 0.08251953125, + "loss_num": 0.00439453125, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 411102856, + "step": 7337 + }, + { + "epoch": 16.342984409799556, + "grad_norm": 19.494869232177734, + "learning_rate": 1e-06, + "loss": 0.3802, + "num_input_tokens_seen": 411159860, + "step": 7338 + }, + { + "epoch": 16.342984409799556, + "loss": 0.3621300756931305, + "loss_ce": 0.0003136666491627693, + "loss_iou": 0.1455078125, + "loss_num": 0.01422119140625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 411159860, + "step": 7338 + }, + { + "epoch": 16.34521158129176, + "grad_norm": 19.032352447509766, + "learning_rate": 1e-06, + "loss": 0.4447, + "num_input_tokens_seen": 411215444, + "step": 7339 + }, + { + "epoch": 16.34521158129176, + "loss": 0.3872949481010437, + "loss_ce": 8.788703416939825e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.0130615234375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 411215444, + "step": 7339 + }, + { + "epoch": 16.347438752783965, + "grad_norm": 18.541751861572266, + "learning_rate": 1e-06, + "loss": 0.4312, + "num_input_tokens_seen": 411270292, + "step": 7340 + }, + { + "epoch": 16.347438752783965, + "loss": 0.5562987923622131, + "loss_ce": 8.540366252418607e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.015380859375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 411270292, + "step": 7340 + }, + { + "epoch": 16.34966592427617, + "grad_norm": 13.023123741149902, + "learning_rate": 1e-06, + "loss": 0.5157, + "num_input_tokens_seen": 411327140, + "step": 7341 + }, + { + "epoch": 16.34966592427617, + "loss": 0.4642055630683899, + "loss_ce": 9.424821473658085e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.0157470703125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 411327140, + "step": 7341 + }, + { + "epoch": 16.351893095768375, + "grad_norm": 18.090694427490234, + "learning_rate": 1e-06, + "loss": 0.428, + "num_input_tokens_seen": 411383152, + "step": 7342 + }, + { + "epoch": 16.351893095768375, + "loss": 0.46582120656967163, + "loss_ce": 0.0009774666978046298, + "loss_iou": 0.2021484375, + "loss_num": 0.011962890625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 411383152, + "step": 7342 + }, + { + "epoch": 16.35412026726058, + "grad_norm": 16.27545928955078, + "learning_rate": 1e-06, + "loss": 0.3941, + "num_input_tokens_seen": 411438268, + "step": 7343 + }, + { + "epoch": 16.35412026726058, + "loss": 0.41550177335739136, + "loss_ce": 9.647855767980218e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.0068359375, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 411438268, + "step": 7343 + }, + { + "epoch": 16.356347438752785, + "grad_norm": 22.559267044067383, + "learning_rate": 1e-06, + "loss": 0.4177, + "num_input_tokens_seen": 411493464, + "step": 7344 + }, + { + "epoch": 16.356347438752785, + "loss": 0.4500234127044678, + "loss_ce": 7.222830026876181e-05, + "loss_iou": 0.19140625, + "loss_num": 0.01348876953125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 411493464, + "step": 7344 + }, + { + "epoch": 16.35857461024499, + "grad_norm": 17.649221420288086, + "learning_rate": 1e-06, + "loss": 0.3428, + "num_input_tokens_seen": 411550844, + "step": 7345 + }, + { + "epoch": 16.35857461024499, + "loss": 0.46036189794540405, + "loss_ce": 9.580078767612576e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.013671875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 411550844, + "step": 7345 + }, + { + "epoch": 16.360801781737194, + "grad_norm": 19.68630027770996, + "learning_rate": 1e-06, + "loss": 0.3448, + "num_input_tokens_seen": 411606484, + "step": 7346 + }, + { + "epoch": 16.360801781737194, + "loss": 0.31284722685813904, + "loss_ce": 0.00010307719639968127, + "loss_iou": 0.14453125, + "loss_num": 0.00482177734375, + "loss_xval": 0.3125, + "num_input_tokens_seen": 411606484, + "step": 7346 + }, + { + "epoch": 16.3630289532294, + "grad_norm": 23.66335678100586, + "learning_rate": 1e-06, + "loss": 0.4744, + "num_input_tokens_seen": 411662964, + "step": 7347 + }, + { + "epoch": 16.3630289532294, + "loss": 0.38772842288017273, + "loss_ce": 0.00015515368431806564, + "loss_iou": 0.171875, + "loss_num": 0.00885009765625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 411662964, + "step": 7347 + }, + { + "epoch": 16.365256124721604, + "grad_norm": 16.446657180786133, + "learning_rate": 1e-06, + "loss": 0.4439, + "num_input_tokens_seen": 411718656, + "step": 7348 + }, + { + "epoch": 16.365256124721604, + "loss": 0.40172719955444336, + "loss_ce": 0.00011588144116103649, + "loss_iou": 0.16796875, + "loss_num": 0.01300048828125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 411718656, + "step": 7348 + }, + { + "epoch": 16.36748329621381, + "grad_norm": 12.772607803344727, + "learning_rate": 1e-06, + "loss": 0.354, + "num_input_tokens_seen": 411774636, + "step": 7349 + }, + { + "epoch": 16.36748329621381, + "loss": 0.3649415373802185, + "loss_ce": 7.335752889048308e-05, + "loss_iou": 0.154296875, + "loss_num": 0.0113525390625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 411774636, + "step": 7349 + }, + { + "epoch": 16.369710467706014, + "grad_norm": 199.87049865722656, + "learning_rate": 1e-06, + "loss": 0.4038, + "num_input_tokens_seen": 411828580, + "step": 7350 + }, + { + "epoch": 16.369710467706014, + "loss": 0.5851800441741943, + "loss_ce": 9.696916822576895e-05, + "loss_iou": 0.26171875, + "loss_num": 0.01251220703125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 411828580, + "step": 7350 + }, + { + "epoch": 16.37193763919822, + "grad_norm": 13.645878791809082, + "learning_rate": 1e-06, + "loss": 0.3657, + "num_input_tokens_seen": 411886588, + "step": 7351 + }, + { + "epoch": 16.37193763919822, + "loss": 0.5355052947998047, + "loss_ce": 0.00010485852544661611, + "loss_iou": 0.2138671875, + "loss_num": 0.021728515625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 411886588, + "step": 7351 + }, + { + "epoch": 16.374164810690424, + "grad_norm": 16.982749938964844, + "learning_rate": 1e-06, + "loss": 0.4847, + "num_input_tokens_seen": 411944540, + "step": 7352 + }, + { + "epoch": 16.374164810690424, + "loss": 0.48775994777679443, + "loss_ce": 8.901581168174744e-05, + "loss_iou": 0.212890625, + "loss_num": 0.01220703125, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 411944540, + "step": 7352 + }, + { + "epoch": 16.37639198218263, + "grad_norm": 19.0021915435791, + "learning_rate": 1e-06, + "loss": 0.5334, + "num_input_tokens_seen": 412002580, + "step": 7353 + }, + { + "epoch": 16.37639198218263, + "loss": 0.7117968797683716, + "loss_ce": 0.00012699057697318494, + "loss_iou": 0.32421875, + "loss_num": 0.0123291015625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 412002580, + "step": 7353 + }, + { + "epoch": 16.378619153674833, + "grad_norm": 16.590665817260742, + "learning_rate": 1e-06, + "loss": 0.4343, + "num_input_tokens_seen": 412059804, + "step": 7354 + }, + { + "epoch": 16.378619153674833, + "loss": 0.41806820034980774, + "loss_ce": 9.94568836176768e-05, + "loss_iou": 0.1796875, + "loss_num": 0.01165771484375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 412059804, + "step": 7354 + }, + { + "epoch": 16.380846325167038, + "grad_norm": 14.91476821899414, + "learning_rate": 1e-06, + "loss": 0.389, + "num_input_tokens_seen": 412117424, + "step": 7355 + }, + { + "epoch": 16.380846325167038, + "loss": 0.429162859916687, + "loss_ce": 8.569219789933413e-05, + "loss_iou": 0.17578125, + "loss_num": 0.01531982421875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 412117424, + "step": 7355 + }, + { + "epoch": 16.383073496659243, + "grad_norm": 21.37236976623535, + "learning_rate": 1e-06, + "loss": 0.3955, + "num_input_tokens_seen": 412175316, + "step": 7356 + }, + { + "epoch": 16.383073496659243, + "loss": 0.471897691488266, + "loss_ce": 9.593518188921735e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.009521484375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 412175316, + "step": 7356 + }, + { + "epoch": 16.385300668151448, + "grad_norm": 23.095491409301758, + "learning_rate": 1e-06, + "loss": 0.5033, + "num_input_tokens_seen": 412232752, + "step": 7357 + }, + { + "epoch": 16.385300668151448, + "loss": 0.42098525166511536, + "loss_ce": 8.681518374942243e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0125732421875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 412232752, + "step": 7357 + }, + { + "epoch": 16.387527839643653, + "grad_norm": 73.42839813232422, + "learning_rate": 1e-06, + "loss": 0.6915, + "num_input_tokens_seen": 412284664, + "step": 7358 + }, + { + "epoch": 16.387527839643653, + "loss": 0.7482743859291077, + "loss_ce": 0.00010545070108491927, + "loss_iou": 0.318359375, + "loss_num": 0.02197265625, + "loss_xval": 0.75, + "num_input_tokens_seen": 412284664, + "step": 7358 + }, + { + "epoch": 16.389755011135858, + "grad_norm": 24.8407039642334, + "learning_rate": 1e-06, + "loss": 0.5937, + "num_input_tokens_seen": 412342800, + "step": 7359 + }, + { + "epoch": 16.389755011135858, + "loss": 0.5182784795761108, + "loss_ce": 9.002141450764611e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.00799560546875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 412342800, + "step": 7359 + }, + { + "epoch": 16.391982182628063, + "grad_norm": 19.556819915771484, + "learning_rate": 1e-06, + "loss": 0.4151, + "num_input_tokens_seen": 412397900, + "step": 7360 + }, + { + "epoch": 16.391982182628063, + "loss": 0.3638564348220825, + "loss_ce": 8.691055700182915e-05, + "loss_iou": 0.15625, + "loss_num": 0.010498046875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 412397900, + "step": 7360 + }, + { + "epoch": 16.394209354120267, + "grad_norm": 27.42656707763672, + "learning_rate": 1e-06, + "loss": 0.4955, + "num_input_tokens_seen": 412454180, + "step": 7361 + }, + { + "epoch": 16.394209354120267, + "loss": 0.5466644763946533, + "loss_ce": 9.469907672610134e-05, + "loss_iou": 0.2421875, + "loss_num": 0.01226806640625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 412454180, + "step": 7361 + }, + { + "epoch": 16.396436525612472, + "grad_norm": 12.27857780456543, + "learning_rate": 1e-06, + "loss": 0.3187, + "num_input_tokens_seen": 412511772, + "step": 7362 + }, + { + "epoch": 16.396436525612472, + "loss": 0.31820547580718994, + "loss_ce": 9.021394362207502e-05, + "loss_iou": 0.13671875, + "loss_num": 0.00885009765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 412511772, + "step": 7362 + }, + { + "epoch": 16.398663697104677, + "grad_norm": 15.365641593933105, + "learning_rate": 1e-06, + "loss": 0.4709, + "num_input_tokens_seen": 412565312, + "step": 7363 + }, + { + "epoch": 16.398663697104677, + "loss": 0.44320639967918396, + "loss_ce": 9.116313594859093e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0172119140625, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 412565312, + "step": 7363 + }, + { + "epoch": 16.400890868596882, + "grad_norm": 16.197721481323242, + "learning_rate": 1e-06, + "loss": 0.4165, + "num_input_tokens_seen": 412619724, + "step": 7364 + }, + { + "epoch": 16.400890868596882, + "loss": 0.3934442400932312, + "loss_ce": 0.00013366495841182768, + "loss_iou": 0.177734375, + "loss_num": 0.0074462890625, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 412619724, + "step": 7364 + }, + { + "epoch": 16.403118040089087, + "grad_norm": 19.257944107055664, + "learning_rate": 1e-06, + "loss": 0.4529, + "num_input_tokens_seen": 412672252, + "step": 7365 + }, + { + "epoch": 16.403118040089087, + "loss": 0.3346201777458191, + "loss_ce": 8.649235678603873e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00689697265625, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 412672252, + "step": 7365 + }, + { + "epoch": 16.40534521158129, + "grad_norm": 35.989013671875, + "learning_rate": 1e-06, + "loss": 0.4365, + "num_input_tokens_seen": 412727280, + "step": 7366 + }, + { + "epoch": 16.40534521158129, + "loss": 0.46420300006866455, + "loss_ce": 9.16688732104376e-05, + "loss_iou": 0.197265625, + "loss_num": 0.0137939453125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 412727280, + "step": 7366 + }, + { + "epoch": 16.407572383073497, + "grad_norm": 19.286439895629883, + "learning_rate": 1e-06, + "loss": 0.4871, + "num_input_tokens_seen": 412782212, + "step": 7367 + }, + { + "epoch": 16.407572383073497, + "loss": 0.4057268500328064, + "loss_ce": 8.719813922652975e-05, + "loss_iou": 0.189453125, + "loss_num": 0.00531005859375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 412782212, + "step": 7367 + }, + { + "epoch": 16.4097995545657, + "grad_norm": 12.866009712219238, + "learning_rate": 1e-06, + "loss": 0.473, + "num_input_tokens_seen": 412836960, + "step": 7368 + }, + { + "epoch": 16.4097995545657, + "loss": 0.5923727750778198, + "loss_ce": 8.767165127210319e-05, + "loss_iou": 0.24609375, + "loss_num": 0.02001953125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 412836960, + "step": 7368 + }, + { + "epoch": 16.412026726057906, + "grad_norm": 18.949840545654297, + "learning_rate": 1e-06, + "loss": 0.4169, + "num_input_tokens_seen": 412894060, + "step": 7369 + }, + { + "epoch": 16.412026726057906, + "loss": 0.5453881025314331, + "loss_ce": 0.00022210404858924448, + "loss_iou": 0.2373046875, + "loss_num": 0.01397705078125, + "loss_xval": 0.546875, + "num_input_tokens_seen": 412894060, + "step": 7369 + }, + { + "epoch": 16.41425389755011, + "grad_norm": 16.93563461303711, + "learning_rate": 1e-06, + "loss": 0.4276, + "num_input_tokens_seen": 412950112, + "step": 7370 + }, + { + "epoch": 16.41425389755011, + "loss": 0.383827805519104, + "loss_ce": 0.00013031261914875358, + "loss_iou": 0.1669921875, + "loss_num": 0.01007080078125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 412950112, + "step": 7370 + }, + { + "epoch": 16.416481069042316, + "grad_norm": 20.606613159179688, + "learning_rate": 1e-06, + "loss": 0.4678, + "num_input_tokens_seen": 413005572, + "step": 7371 + }, + { + "epoch": 16.416481069042316, + "loss": 0.5181921720504761, + "loss_ce": 0.00012574487482197583, + "loss_iou": 0.22265625, + "loss_num": 0.0142822265625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 413005572, + "step": 7371 + }, + { + "epoch": 16.41870824053452, + "grad_norm": 24.132949829101562, + "learning_rate": 1e-06, + "loss": 0.5933, + "num_input_tokens_seen": 413061068, + "step": 7372 + }, + { + "epoch": 16.41870824053452, + "loss": 0.5776008367538452, + "loss_ce": 8.622092718724161e-05, + "loss_iou": 0.263671875, + "loss_num": 0.01007080078125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 413061068, + "step": 7372 + }, + { + "epoch": 16.420935412026726, + "grad_norm": 15.339935302734375, + "learning_rate": 1e-06, + "loss": 0.5645, + "num_input_tokens_seen": 413118180, + "step": 7373 + }, + { + "epoch": 16.420935412026726, + "loss": 0.5817098617553711, + "loss_ce": 0.0001058535126503557, + "loss_iou": 0.25390625, + "loss_num": 0.014892578125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 413118180, + "step": 7373 + }, + { + "epoch": 16.42316258351893, + "grad_norm": 17.126455307006836, + "learning_rate": 1e-06, + "loss": 0.4166, + "num_input_tokens_seen": 413176160, + "step": 7374 + }, + { + "epoch": 16.42316258351893, + "loss": 0.3593493700027466, + "loss_ce": 9.64459904935211e-05, + "loss_iou": 0.158203125, + "loss_num": 0.008544921875, + "loss_xval": 0.359375, + "num_input_tokens_seen": 413176160, + "step": 7374 + }, + { + "epoch": 16.425389755011135, + "grad_norm": 13.592398643493652, + "learning_rate": 1e-06, + "loss": 0.2656, + "num_input_tokens_seen": 413233164, + "step": 7375 + }, + { + "epoch": 16.425389755011135, + "loss": 0.23620259761810303, + "loss_ce": 8.810235158307478e-05, + "loss_iou": 0.10498046875, + "loss_num": 0.00506591796875, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 413233164, + "step": 7375 + }, + { + "epoch": 16.42761692650334, + "grad_norm": 13.771684646606445, + "learning_rate": 1e-06, + "loss": 0.4194, + "num_input_tokens_seen": 413290152, + "step": 7376 + }, + { + "epoch": 16.42761692650334, + "loss": 0.410991370677948, + "loss_ce": 0.00010270239727105945, + "loss_iou": 0.171875, + "loss_num": 0.013427734375, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 413290152, + "step": 7376 + }, + { + "epoch": 16.429844097995545, + "grad_norm": 12.945503234863281, + "learning_rate": 1e-06, + "loss": 0.2917, + "num_input_tokens_seen": 413346308, + "step": 7377 + }, + { + "epoch": 16.429844097995545, + "loss": 0.2603330910205841, + "loss_ce": 7.917418406577781e-05, + "loss_iou": 0.11962890625, + "loss_num": 0.004119873046875, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 413346308, + "step": 7377 + }, + { + "epoch": 16.43207126948775, + "grad_norm": 31.2889404296875, + "learning_rate": 1e-06, + "loss": 0.5393, + "num_input_tokens_seen": 413400924, + "step": 7378 + }, + { + "epoch": 16.43207126948775, + "loss": 0.45716169476509094, + "loss_ce": 9.228027192875743e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0078125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 413400924, + "step": 7378 + }, + { + "epoch": 16.434298440979955, + "grad_norm": 18.560880661010742, + "learning_rate": 1e-06, + "loss": 0.4677, + "num_input_tokens_seen": 413456024, + "step": 7379 + }, + { + "epoch": 16.434298440979955, + "loss": 0.580654501914978, + "loss_ce": 8.805579273030162e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.01806640625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 413456024, + "step": 7379 + }, + { + "epoch": 16.43652561247216, + "grad_norm": 16.95804786682129, + "learning_rate": 1e-06, + "loss": 0.6411, + "num_input_tokens_seen": 413513556, + "step": 7380 + }, + { + "epoch": 16.43652561247216, + "loss": 0.973118245601654, + "loss_ce": 9.582037455402315e-05, + "loss_iou": 0.37890625, + "loss_num": 0.04296875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 413513556, + "step": 7380 + }, + { + "epoch": 16.438752783964365, + "grad_norm": 17.053752899169922, + "learning_rate": 1e-06, + "loss": 0.4378, + "num_input_tokens_seen": 413569980, + "step": 7381 + }, + { + "epoch": 16.438752783964365, + "loss": 0.42026323080062866, + "loss_ce": 9.722512913867831e-05, + "loss_iou": 0.1796875, + "loss_num": 0.012451171875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 413569980, + "step": 7381 + }, + { + "epoch": 16.44097995545657, + "grad_norm": 18.919458389282227, + "learning_rate": 1e-06, + "loss": 0.4147, + "num_input_tokens_seen": 413627584, + "step": 7382 + }, + { + "epoch": 16.44097995545657, + "loss": 0.27885180711746216, + "loss_ce": 0.00010426974768051878, + "loss_iou": 0.11865234375, + "loss_num": 0.0081787109375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 413627584, + "step": 7382 + }, + { + "epoch": 16.443207126948774, + "grad_norm": 14.903229713439941, + "learning_rate": 1e-06, + "loss": 0.3204, + "num_input_tokens_seen": 413684008, + "step": 7383 + }, + { + "epoch": 16.443207126948774, + "loss": 0.30172598361968994, + "loss_ce": 9.026764746522531e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.007720947265625, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 413684008, + "step": 7383 + }, + { + "epoch": 16.44543429844098, + "grad_norm": 20.07790184020996, + "learning_rate": 1e-06, + "loss": 0.4495, + "num_input_tokens_seen": 413740804, + "step": 7384 + }, + { + "epoch": 16.44543429844098, + "loss": 0.4181979298591614, + "loss_ce": 0.00010710630158428103, + "loss_iou": 0.1953125, + "loss_num": 0.00555419921875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 413740804, + "step": 7384 + }, + { + "epoch": 16.447661469933184, + "grad_norm": 26.779462814331055, + "learning_rate": 1e-06, + "loss": 0.4354, + "num_input_tokens_seen": 413795972, + "step": 7385 + }, + { + "epoch": 16.447661469933184, + "loss": 0.5660018920898438, + "loss_ce": 8.394767064601183e-05, + "loss_iou": 0.251953125, + "loss_num": 0.01214599609375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 413795972, + "step": 7385 + }, + { + "epoch": 16.44988864142539, + "grad_norm": 33.413516998291016, + "learning_rate": 1e-06, + "loss": 0.3646, + "num_input_tokens_seen": 413850008, + "step": 7386 + }, + { + "epoch": 16.44988864142539, + "loss": 0.35230499505996704, + "loss_ce": 0.00019320733554195613, + "loss_iou": 0.16015625, + "loss_num": 0.006378173828125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 413850008, + "step": 7386 + }, + { + "epoch": 16.452115812917594, + "grad_norm": 16.137666702270508, + "learning_rate": 1e-06, + "loss": 0.3793, + "num_input_tokens_seen": 413908860, + "step": 7387 + }, + { + "epoch": 16.452115812917594, + "loss": 0.37570327520370483, + "loss_ce": 9.29224188439548e-05, + "loss_iou": 0.15625, + "loss_num": 0.012451171875, + "loss_xval": 0.375, + "num_input_tokens_seen": 413908860, + "step": 7387 + }, + { + "epoch": 16.4543429844098, + "grad_norm": 27.6522274017334, + "learning_rate": 1e-06, + "loss": 0.3894, + "num_input_tokens_seen": 413964468, + "step": 7388 + }, + { + "epoch": 16.4543429844098, + "loss": 0.36471468210220337, + "loss_ce": 9.06552595552057e-05, + "loss_iou": 0.16796875, + "loss_num": 0.005859375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 413964468, + "step": 7388 + }, + { + "epoch": 16.456570155902003, + "grad_norm": 19.11590003967285, + "learning_rate": 1e-06, + "loss": 0.4466, + "num_input_tokens_seen": 414020268, + "step": 7389 + }, + { + "epoch": 16.456570155902003, + "loss": 0.39550697803497314, + "loss_ce": 0.00012124592467444018, + "loss_iou": 0.16796875, + "loss_num": 0.01220703125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 414020268, + "step": 7389 + }, + { + "epoch": 16.45879732739421, + "grad_norm": 12.807024002075195, + "learning_rate": 1e-06, + "loss": 0.3725, + "num_input_tokens_seen": 414079048, + "step": 7390 + }, + { + "epoch": 16.45879732739421, + "loss": 0.3609822988510132, + "loss_ce": 8.14046070445329e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.01153564453125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 414079048, + "step": 7390 + }, + { + "epoch": 16.461024498886413, + "grad_norm": 15.505331039428711, + "learning_rate": 1e-06, + "loss": 0.3684, + "num_input_tokens_seen": 414134828, + "step": 7391 + }, + { + "epoch": 16.461024498886413, + "loss": 0.37520790100097656, + "loss_ce": 8.581792644690722e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.01031494140625, + "loss_xval": 0.375, + "num_input_tokens_seen": 414134828, + "step": 7391 + }, + { + "epoch": 16.463251670378618, + "grad_norm": 52.602256774902344, + "learning_rate": 1e-06, + "loss": 0.4731, + "num_input_tokens_seen": 414188716, + "step": 7392 + }, + { + "epoch": 16.463251670378618, + "loss": 0.5707922577857971, + "loss_ce": 8.304757648147643e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.0208740234375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 414188716, + "step": 7392 + }, + { + "epoch": 16.465478841870823, + "grad_norm": 17.48988151550293, + "learning_rate": 1e-06, + "loss": 0.5544, + "num_input_tokens_seen": 414247204, + "step": 7393 + }, + { + "epoch": 16.465478841870823, + "loss": 0.6307286024093628, + "loss_ce": 0.00011338148033246398, + "loss_iou": 0.263671875, + "loss_num": 0.0205078125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 414247204, + "step": 7393 + }, + { + "epoch": 16.467706013363028, + "grad_norm": 20.05725860595703, + "learning_rate": 1e-06, + "loss": 0.4437, + "num_input_tokens_seen": 414302244, + "step": 7394 + }, + { + "epoch": 16.467706013363028, + "loss": 0.4451483488082886, + "loss_ce": 7.999275112524629e-05, + "loss_iou": 0.19921875, + "loss_num": 0.00933837890625, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 414302244, + "step": 7394 + }, + { + "epoch": 16.469933184855233, + "grad_norm": 21.719507217407227, + "learning_rate": 1e-06, + "loss": 0.4859, + "num_input_tokens_seen": 414358148, + "step": 7395 + }, + { + "epoch": 16.469933184855233, + "loss": 0.5905551910400391, + "loss_ce": 0.00019262763089500368, + "loss_iou": 0.23046875, + "loss_num": 0.0257568359375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 414358148, + "step": 7395 + }, + { + "epoch": 16.472160356347437, + "grad_norm": 18.241609573364258, + "learning_rate": 1e-06, + "loss": 0.396, + "num_input_tokens_seen": 414413656, + "step": 7396 + }, + { + "epoch": 16.472160356347437, + "loss": 0.4245295226573944, + "loss_ce": 9.104832133743912e-05, + "loss_iou": 0.18359375, + "loss_num": 0.01153564453125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 414413656, + "step": 7396 + }, + { + "epoch": 16.474387527839642, + "grad_norm": 16.02747344970703, + "learning_rate": 1e-06, + "loss": 0.3917, + "num_input_tokens_seen": 414471236, + "step": 7397 + }, + { + "epoch": 16.474387527839642, + "loss": 0.4211219251155853, + "loss_ce": 0.00010141961683984846, + "loss_iou": 0.19140625, + "loss_num": 0.007781982421875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 414471236, + "step": 7397 + }, + { + "epoch": 16.476614699331847, + "grad_norm": 26.210834503173828, + "learning_rate": 1e-06, + "loss": 0.3714, + "num_input_tokens_seen": 414526900, + "step": 7398 + }, + { + "epoch": 16.476614699331847, + "loss": 0.36032408475875854, + "loss_ce": 9.459605644224212e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.0068359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 414526900, + "step": 7398 + }, + { + "epoch": 16.478841870824052, + "grad_norm": 21.00611114501953, + "learning_rate": 1e-06, + "loss": 0.3868, + "num_input_tokens_seen": 414582196, + "step": 7399 + }, + { + "epoch": 16.478841870824052, + "loss": 0.3853411376476288, + "loss_ce": 8.721851918380708e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0167236328125, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 414582196, + "step": 7399 + }, + { + "epoch": 16.481069042316257, + "grad_norm": 17.40221405029297, + "learning_rate": 1e-06, + "loss": 0.5933, + "num_input_tokens_seen": 414638744, + "step": 7400 + }, + { + "epoch": 16.481069042316257, + "loss": 0.7026082277297974, + "loss_ce": 9.35388816287741e-05, + "loss_iou": 0.28125, + "loss_num": 0.02734375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 414638744, + "step": 7400 + }, + { + "epoch": 16.48329621380846, + "grad_norm": 17.026206970214844, + "learning_rate": 1e-06, + "loss": 0.5015, + "num_input_tokens_seen": 414695508, + "step": 7401 + }, + { + "epoch": 16.48329621380846, + "loss": 0.5965702533721924, + "loss_ce": 0.00013468590623233467, + "loss_iou": 0.2353515625, + "loss_num": 0.02490234375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 414695508, + "step": 7401 + }, + { + "epoch": 16.485523385300667, + "grad_norm": 20.803653717041016, + "learning_rate": 1e-06, + "loss": 0.4458, + "num_input_tokens_seen": 414750064, + "step": 7402 + }, + { + "epoch": 16.485523385300667, + "loss": 0.4915461540222168, + "loss_ce": 0.00015212838479783386, + "loss_iou": 0.216796875, + "loss_num": 0.01165771484375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 414750064, + "step": 7402 + }, + { + "epoch": 16.48775055679287, + "grad_norm": 18.564687728881836, + "learning_rate": 1e-06, + "loss": 0.4463, + "num_input_tokens_seen": 414807600, + "step": 7403 + }, + { + "epoch": 16.48775055679287, + "loss": 0.4674866199493408, + "loss_ce": 7.938436465337873e-05, + "loss_iou": 0.20703125, + "loss_num": 0.01092529296875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 414807600, + "step": 7403 + }, + { + "epoch": 16.489977728285076, + "grad_norm": 19.705589294433594, + "learning_rate": 1e-06, + "loss": 0.4589, + "num_input_tokens_seen": 414862628, + "step": 7404 + }, + { + "epoch": 16.489977728285076, + "loss": 0.524620771408081, + "loss_ce": 8.46216207719408e-05, + "loss_iou": 0.22265625, + "loss_num": 0.0159912109375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 414862628, + "step": 7404 + }, + { + "epoch": 16.49220489977728, + "grad_norm": 15.944623947143555, + "learning_rate": 1e-06, + "loss": 0.2974, + "num_input_tokens_seen": 414921268, + "step": 7405 + }, + { + "epoch": 16.49220489977728, + "loss": 0.3903467059135437, + "loss_ce": 8.790481660980731e-05, + "loss_iou": 0.15625, + "loss_num": 0.0155029296875, + "loss_xval": 0.390625, + "num_input_tokens_seen": 414921268, + "step": 7405 + }, + { + "epoch": 16.494432071269486, + "grad_norm": 16.492647171020508, + "learning_rate": 1e-06, + "loss": 0.3803, + "num_input_tokens_seen": 414979472, + "step": 7406 + }, + { + "epoch": 16.494432071269486, + "loss": 0.46630802750587463, + "loss_ce": 0.00012149002577643842, + "loss_iou": 0.212890625, + "loss_num": 0.00811767578125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 414979472, + "step": 7406 + }, + { + "epoch": 16.49665924276169, + "grad_norm": 19.485671997070312, + "learning_rate": 1e-06, + "loss": 0.4414, + "num_input_tokens_seen": 415036420, + "step": 7407 + }, + { + "epoch": 16.49665924276169, + "loss": 0.3690652847290039, + "loss_ce": 7.723318412899971e-05, + "loss_iou": 0.16015625, + "loss_num": 0.0096435546875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 415036420, + "step": 7407 + }, + { + "epoch": 16.498886414253896, + "grad_norm": 29.99024200439453, + "learning_rate": 1e-06, + "loss": 0.4787, + "num_input_tokens_seen": 415092432, + "step": 7408 + }, + { + "epoch": 16.498886414253896, + "loss": 0.40274959802627563, + "loss_ce": 0.00010066555842058733, + "loss_iou": 0.1787109375, + "loss_num": 0.00909423828125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 415092432, + "step": 7408 + }, + { + "epoch": 16.501113585746104, + "grad_norm": 19.012744903564453, + "learning_rate": 1e-06, + "loss": 0.2874, + "num_input_tokens_seen": 415148876, + "step": 7409 + }, + { + "epoch": 16.501113585746104, + "loss": 0.2875651717185974, + "loss_ce": 8.95664852578193e-05, + "loss_iou": 0.1328125, + "loss_num": 0.00421142578125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 415148876, + "step": 7409 + }, + { + "epoch": 16.50334075723831, + "grad_norm": 14.734273910522461, + "learning_rate": 1e-06, + "loss": 0.6548, + "num_input_tokens_seen": 415204496, + "step": 7410 + }, + { + "epoch": 16.50334075723831, + "loss": 0.7657289505004883, + "loss_ce": 0.00010389051749370992, + "loss_iou": 0.326171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 415204496, + "step": 7410 + }, + { + "epoch": 16.505567928730514, + "grad_norm": 16.262977600097656, + "learning_rate": 1e-06, + "loss": 0.4765, + "num_input_tokens_seen": 415260244, + "step": 7411 + }, + { + "epoch": 16.505567928730514, + "loss": 0.28719890117645264, + "loss_ce": 8.953356882557273e-05, + "loss_iou": 0.126953125, + "loss_num": 0.006439208984375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 415260244, + "step": 7411 + }, + { + "epoch": 16.50779510022272, + "grad_norm": 21.31691551208496, + "learning_rate": 1e-06, + "loss": 0.3835, + "num_input_tokens_seen": 415314104, + "step": 7412 + }, + { + "epoch": 16.50779510022272, + "loss": 0.3603422939777374, + "loss_ce": 0.00011278928286628798, + "loss_iou": 0.1630859375, + "loss_num": 0.00689697265625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 415314104, + "step": 7412 + }, + { + "epoch": 16.510022271714924, + "grad_norm": 20.083385467529297, + "learning_rate": 1e-06, + "loss": 0.4469, + "num_input_tokens_seen": 415370528, + "step": 7413 + }, + { + "epoch": 16.510022271714924, + "loss": 0.42683807015419006, + "loss_ce": 8.025132410693914e-05, + "loss_iou": 0.197265625, + "loss_num": 0.00653076171875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 415370528, + "step": 7413 + }, + { + "epoch": 16.51224944320713, + "grad_norm": 22.564624786376953, + "learning_rate": 1e-06, + "loss": 0.388, + "num_input_tokens_seen": 415430264, + "step": 7414 + }, + { + "epoch": 16.51224944320713, + "loss": 0.38656115531921387, + "loss_ce": 8.655471901874989e-05, + "loss_iou": 0.17578125, + "loss_num": 0.007110595703125, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 415430264, + "step": 7414 + }, + { + "epoch": 16.514476614699333, + "grad_norm": 17.342039108276367, + "learning_rate": 1e-06, + "loss": 0.4182, + "num_input_tokens_seen": 415486144, + "step": 7415 + }, + { + "epoch": 16.514476614699333, + "loss": 0.3627013564109802, + "loss_ce": 9.151514677796513e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.01123046875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 415486144, + "step": 7415 + }, + { + "epoch": 16.51670378619154, + "grad_norm": 34.493228912353516, + "learning_rate": 1e-06, + "loss": 0.4162, + "num_input_tokens_seen": 415543736, + "step": 7416 + }, + { + "epoch": 16.51670378619154, + "loss": 0.42243343591690063, + "loss_ce": 0.00031430544913746417, + "loss_iou": 0.185546875, + "loss_num": 0.010009765625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 415543736, + "step": 7416 + }, + { + "epoch": 16.518930957683743, + "grad_norm": 26.808746337890625, + "learning_rate": 1e-06, + "loss": 0.4272, + "num_input_tokens_seen": 415601828, + "step": 7417 + }, + { + "epoch": 16.518930957683743, + "loss": 0.5273232460021973, + "loss_ce": 0.00010160428064409643, + "loss_iou": 0.2041015625, + "loss_num": 0.023681640625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 415601828, + "step": 7417 + }, + { + "epoch": 16.521158129175948, + "grad_norm": 9.941266059875488, + "learning_rate": 1e-06, + "loss": 0.3009, + "num_input_tokens_seen": 415660184, + "step": 7418 + }, + { + "epoch": 16.521158129175948, + "loss": 0.23453950881958008, + "loss_ce": 7.294982788152993e-05, + "loss_iou": 0.09912109375, + "loss_num": 0.00732421875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 415660184, + "step": 7418 + }, + { + "epoch": 16.523385300668153, + "grad_norm": 26.528013229370117, + "learning_rate": 1e-06, + "loss": 0.6055, + "num_input_tokens_seen": 415715484, + "step": 7419 + }, + { + "epoch": 16.523385300668153, + "loss": 0.8102438449859619, + "loss_ce": 0.000429451436502859, + "loss_iou": 0.3125, + "loss_num": 0.036865234375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 415715484, + "step": 7419 + }, + { + "epoch": 16.525612472160358, + "grad_norm": 19.867149353027344, + "learning_rate": 1e-06, + "loss": 0.3117, + "num_input_tokens_seen": 415770232, + "step": 7420 + }, + { + "epoch": 16.525612472160358, + "loss": 0.26326221227645874, + "loss_ce": 7.859835750423372e-05, + "loss_iou": 0.12158203125, + "loss_num": 0.0040283203125, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 415770232, + "step": 7420 + }, + { + "epoch": 16.527839643652563, + "grad_norm": 19.17835807800293, + "learning_rate": 1e-06, + "loss": 0.4112, + "num_input_tokens_seen": 415825868, + "step": 7421 + }, + { + "epoch": 16.527839643652563, + "loss": 0.43073534965515137, + "loss_ce": 7.126775744836777e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.01458740234375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 415825868, + "step": 7421 + }, + { + "epoch": 16.530066815144767, + "grad_norm": 17.873729705810547, + "learning_rate": 1e-06, + "loss": 0.3862, + "num_input_tokens_seen": 415882708, + "step": 7422 + }, + { + "epoch": 16.530066815144767, + "loss": 0.26814356446266174, + "loss_ce": 7.717055268585682e-05, + "loss_iou": 0.119140625, + "loss_num": 0.006072998046875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 415882708, + "step": 7422 + }, + { + "epoch": 16.532293986636972, + "grad_norm": 16.114519119262695, + "learning_rate": 1e-06, + "loss": 0.5852, + "num_input_tokens_seen": 415938224, + "step": 7423 + }, + { + "epoch": 16.532293986636972, + "loss": 0.6861782073974609, + "loss_ce": 0.000265107664745301, + "loss_iou": 0.29296875, + "loss_num": 0.0196533203125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 415938224, + "step": 7423 + }, + { + "epoch": 16.534521158129177, + "grad_norm": 21.42132568359375, + "learning_rate": 1e-06, + "loss": 0.3615, + "num_input_tokens_seen": 415992012, + "step": 7424 + }, + { + "epoch": 16.534521158129177, + "loss": 0.25483426451683044, + "loss_ce": 7.351664680754766e-05, + "loss_iou": 0.10546875, + "loss_num": 0.00885009765625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 415992012, + "step": 7424 + }, + { + "epoch": 16.536748329621382, + "grad_norm": 28.71778678894043, + "learning_rate": 1e-06, + "loss": 0.556, + "num_input_tokens_seen": 416048204, + "step": 7425 + }, + { + "epoch": 16.536748329621382, + "loss": 0.48399072885513306, + "loss_ce": 0.00010403442865936086, + "loss_iou": 0.2255859375, + "loss_num": 0.00640869140625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 416048204, + "step": 7425 + }, + { + "epoch": 16.538975501113587, + "grad_norm": 19.75933265686035, + "learning_rate": 1e-06, + "loss": 0.5605, + "num_input_tokens_seen": 416101644, + "step": 7426 + }, + { + "epoch": 16.538975501113587, + "loss": 0.74393230676651, + "loss_ce": 0.00018841384735424072, + "loss_iou": 0.294921875, + "loss_num": 0.0306396484375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 416101644, + "step": 7426 + }, + { + "epoch": 16.54120267260579, + "grad_norm": 22.49873924255371, + "learning_rate": 1e-06, + "loss": 0.3687, + "num_input_tokens_seen": 416160076, + "step": 7427 + }, + { + "epoch": 16.54120267260579, + "loss": 0.33309027552604675, + "loss_ce": 8.246669312939048e-05, + "loss_iou": 0.150390625, + "loss_num": 0.006500244140625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 416160076, + "step": 7427 + }, + { + "epoch": 16.543429844097997, + "grad_norm": 13.900813102722168, + "learning_rate": 1e-06, + "loss": 0.4714, + "num_input_tokens_seen": 416217540, + "step": 7428 + }, + { + "epoch": 16.543429844097997, + "loss": 0.5082305669784546, + "loss_ce": 0.00017388846026733518, + "loss_iou": 0.2021484375, + "loss_num": 0.0206298828125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 416217540, + "step": 7428 + }, + { + "epoch": 16.5456570155902, + "grad_norm": 21.1329402923584, + "learning_rate": 1e-06, + "loss": 0.4258, + "num_input_tokens_seen": 416274740, + "step": 7429 + }, + { + "epoch": 16.5456570155902, + "loss": 0.3279839754104614, + "loss_ce": 0.00010313085658708587, + "loss_iou": 0.1318359375, + "loss_num": 0.01300048828125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 416274740, + "step": 7429 + }, + { + "epoch": 16.547884187082406, + "grad_norm": 20.564817428588867, + "learning_rate": 1e-06, + "loss": 0.3677, + "num_input_tokens_seen": 416330560, + "step": 7430 + }, + { + "epoch": 16.547884187082406, + "loss": 0.31831276416778564, + "loss_ce": 7.547263521701097e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.00762939453125, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 416330560, + "step": 7430 + }, + { + "epoch": 16.55011135857461, + "grad_norm": 19.32866859436035, + "learning_rate": 1e-06, + "loss": 0.3478, + "num_input_tokens_seen": 416384232, + "step": 7431 + }, + { + "epoch": 16.55011135857461, + "loss": 0.2748854160308838, + "loss_ce": 0.00010513007873669267, + "loss_iou": 0.1240234375, + "loss_num": 0.00537109375, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 416384232, + "step": 7431 + }, + { + "epoch": 16.552338530066816, + "grad_norm": 16.105804443359375, + "learning_rate": 1e-06, + "loss": 0.4259, + "num_input_tokens_seen": 416442036, + "step": 7432 + }, + { + "epoch": 16.552338530066816, + "loss": 0.5960502624511719, + "loss_ce": 0.00010297551489202306, + "loss_iou": 0.248046875, + "loss_num": 0.0198974609375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 416442036, + "step": 7432 + }, + { + "epoch": 16.55456570155902, + "grad_norm": 26.98290252685547, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 416495444, + "step": 7433 + }, + { + "epoch": 16.55456570155902, + "loss": 0.2754194438457489, + "loss_ce": 8.982772123999894e-05, + "loss_iou": 0.12109375, + "loss_num": 0.006500244140625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 416495444, + "step": 7433 + }, + { + "epoch": 16.556792873051226, + "grad_norm": 23.57514762878418, + "learning_rate": 1e-06, + "loss": 0.5139, + "num_input_tokens_seen": 416550332, + "step": 7434 + }, + { + "epoch": 16.556792873051226, + "loss": 0.44662949442863464, + "loss_ce": 9.629656415199861e-05, + "loss_iou": 0.1875, + "loss_num": 0.01409912109375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 416550332, + "step": 7434 + }, + { + "epoch": 16.55902004454343, + "grad_norm": 15.292926788330078, + "learning_rate": 1e-06, + "loss": 0.5265, + "num_input_tokens_seen": 416605772, + "step": 7435 + }, + { + "epoch": 16.55902004454343, + "loss": 0.6938190460205078, + "loss_ce": 0.00015451818762812763, + "loss_iou": 0.259765625, + "loss_num": 0.034912109375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 416605772, + "step": 7435 + }, + { + "epoch": 16.561247216035635, + "grad_norm": 29.28976058959961, + "learning_rate": 1e-06, + "loss": 0.5638, + "num_input_tokens_seen": 416662900, + "step": 7436 + }, + { + "epoch": 16.561247216035635, + "loss": 0.43191277980804443, + "loss_ce": 8.905126014724374e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.011474609375, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 416662900, + "step": 7436 + }, + { + "epoch": 16.56347438752784, + "grad_norm": 17.69676971435547, + "learning_rate": 1e-06, + "loss": 0.4027, + "num_input_tokens_seen": 416718660, + "step": 7437 + }, + { + "epoch": 16.56347438752784, + "loss": 0.368381142616272, + "loss_ce": 9.499008592683822e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.0035552978515625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 416718660, + "step": 7437 + }, + { + "epoch": 16.565701559020045, + "grad_norm": 21.888330459594727, + "learning_rate": 1e-06, + "loss": 0.4108, + "num_input_tokens_seen": 416775236, + "step": 7438 + }, + { + "epoch": 16.565701559020045, + "loss": 0.48281651735305786, + "loss_ce": 8.945601439336315e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.00872802734375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 416775236, + "step": 7438 + }, + { + "epoch": 16.56792873051225, + "grad_norm": 15.91091537475586, + "learning_rate": 1e-06, + "loss": 0.4277, + "num_input_tokens_seen": 416831260, + "step": 7439 + }, + { + "epoch": 16.56792873051225, + "loss": 0.4805620312690735, + "loss_ce": 9.329438034910709e-05, + "loss_iou": 0.21875, + "loss_num": 0.00836181640625, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 416831260, + "step": 7439 + }, + { + "epoch": 16.570155902004455, + "grad_norm": 32.6405143737793, + "learning_rate": 1e-06, + "loss": 0.4557, + "num_input_tokens_seen": 416887804, + "step": 7440 + }, + { + "epoch": 16.570155902004455, + "loss": 0.47945916652679443, + "loss_ce": 8.904725837055594e-05, + "loss_iou": 0.21875, + "loss_num": 0.00848388671875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 416887804, + "step": 7440 + }, + { + "epoch": 16.57238307349666, + "grad_norm": 17.98710060119629, + "learning_rate": 1e-06, + "loss": 0.5698, + "num_input_tokens_seen": 416944556, + "step": 7441 + }, + { + "epoch": 16.57238307349666, + "loss": 0.5035039782524109, + "loss_ce": 8.599211287219077e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.0201416015625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 416944556, + "step": 7441 + }, + { + "epoch": 16.574610244988865, + "grad_norm": 17.1207332611084, + "learning_rate": 1e-06, + "loss": 0.5436, + "num_input_tokens_seen": 417000276, + "step": 7442 + }, + { + "epoch": 16.574610244988865, + "loss": 0.4915321469306946, + "loss_ce": 7.706407632213086e-05, + "loss_iou": 0.208984375, + "loss_num": 0.01458740234375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 417000276, + "step": 7442 + }, + { + "epoch": 16.57683741648107, + "grad_norm": 12.052204132080078, + "learning_rate": 1e-06, + "loss": 0.3561, + "num_input_tokens_seen": 417056968, + "step": 7443 + }, + { + "epoch": 16.57683741648107, + "loss": 0.3951103687286377, + "loss_ce": 9.083513577934355e-05, + "loss_iou": 0.150390625, + "loss_num": 0.0186767578125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 417056968, + "step": 7443 + }, + { + "epoch": 16.579064587973274, + "grad_norm": 29.809144973754883, + "learning_rate": 1e-06, + "loss": 0.3865, + "num_input_tokens_seen": 417111816, + "step": 7444 + }, + { + "epoch": 16.579064587973274, + "loss": 0.3363809287548065, + "loss_ce": 7.720827125012875e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.006866455078125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 417111816, + "step": 7444 + }, + { + "epoch": 16.58129175946548, + "grad_norm": 43.40556716918945, + "learning_rate": 1e-06, + "loss": 0.3145, + "num_input_tokens_seen": 417170836, + "step": 7445 + }, + { + "epoch": 16.58129175946548, + "loss": 0.2581978440284729, + "loss_ce": 8.018691733013839e-05, + "loss_iou": 0.1123046875, + "loss_num": 0.00653076171875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 417170836, + "step": 7445 + }, + { + "epoch": 16.583518930957684, + "grad_norm": 19.96676254272461, + "learning_rate": 1e-06, + "loss": 0.2797, + "num_input_tokens_seen": 417226100, + "step": 7446 + }, + { + "epoch": 16.583518930957684, + "loss": 0.307026207447052, + "loss_ce": 8.040900866035372e-05, + "loss_iou": 0.13671875, + "loss_num": 0.006744384765625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 417226100, + "step": 7446 + }, + { + "epoch": 16.58574610244989, + "grad_norm": 18.28860855102539, + "learning_rate": 1e-06, + "loss": 0.4555, + "num_input_tokens_seen": 417282572, + "step": 7447 + }, + { + "epoch": 16.58574610244989, + "loss": 0.434162974357605, + "loss_ce": 8.092315692920238e-05, + "loss_iou": 0.185546875, + "loss_num": 0.01263427734375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 417282572, + "step": 7447 + }, + { + "epoch": 16.587973273942094, + "grad_norm": 19.922115325927734, + "learning_rate": 1e-06, + "loss": 0.3285, + "num_input_tokens_seen": 417336348, + "step": 7448 + }, + { + "epoch": 16.587973273942094, + "loss": 0.2561919093132019, + "loss_ce": 8.838169014779851e-05, + "loss_iou": 0.1162109375, + "loss_num": 0.00482177734375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 417336348, + "step": 7448 + }, + { + "epoch": 16.5902004454343, + "grad_norm": 15.854660034179688, + "learning_rate": 1e-06, + "loss": 0.3569, + "num_input_tokens_seen": 417393976, + "step": 7449 + }, + { + "epoch": 16.5902004454343, + "loss": 0.4261277914047241, + "loss_ce": 0.00010242144344374537, + "loss_iou": 0.1962890625, + "loss_num": 0.006561279296875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 417393976, + "step": 7449 + }, + { + "epoch": 16.592427616926503, + "grad_norm": 15.823163986206055, + "learning_rate": 1e-06, + "loss": 0.4953, + "num_input_tokens_seen": 417453068, + "step": 7450 + }, + { + "epoch": 16.592427616926503, + "loss": 0.43979281187057495, + "loss_ce": 9.553506970405579e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.0205078125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 417453068, + "step": 7450 + }, + { + "epoch": 16.59465478841871, + "grad_norm": 15.928594589233398, + "learning_rate": 1e-06, + "loss": 0.5031, + "num_input_tokens_seen": 417509392, + "step": 7451 + }, + { + "epoch": 16.59465478841871, + "loss": 0.5577445030212402, + "loss_ce": 0.00012731979950331151, + "loss_iou": 0.25, + "loss_num": 0.01123046875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 417509392, + "step": 7451 + }, + { + "epoch": 16.596881959910913, + "grad_norm": 20.19134521484375, + "learning_rate": 1e-06, + "loss": 0.3192, + "num_input_tokens_seen": 417565928, + "step": 7452 + }, + { + "epoch": 16.596881959910913, + "loss": 0.2749907076358795, + "loss_ce": 8.836391498334706e-05, + "loss_iou": 0.10107421875, + "loss_num": 0.01458740234375, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 417565928, + "step": 7452 + }, + { + "epoch": 16.599109131403118, + "grad_norm": 28.828327178955078, + "learning_rate": 1e-06, + "loss": 0.4603, + "num_input_tokens_seen": 417621032, + "step": 7453 + }, + { + "epoch": 16.599109131403118, + "loss": 0.46700233221054077, + "loss_ce": 8.33997328300029e-05, + "loss_iou": 0.220703125, + "loss_num": 0.00494384765625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 417621032, + "step": 7453 + }, + { + "epoch": 16.601336302895323, + "grad_norm": 42.66580581665039, + "learning_rate": 1e-06, + "loss": 0.5839, + "num_input_tokens_seen": 417678064, + "step": 7454 + }, + { + "epoch": 16.601336302895323, + "loss": 0.4696599841117859, + "loss_ce": 0.00042174485861323774, + "loss_iou": 0.2041015625, + "loss_num": 0.01214599609375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 417678064, + "step": 7454 + }, + { + "epoch": 16.603563474387528, + "grad_norm": 13.862714767456055, + "learning_rate": 1e-06, + "loss": 0.2689, + "num_input_tokens_seen": 417734624, + "step": 7455 + }, + { + "epoch": 16.603563474387528, + "loss": 0.2475118190050125, + "loss_ce": 7.529581489507109e-05, + "loss_iou": 0.1064453125, + "loss_num": 0.006805419921875, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 417734624, + "step": 7455 + }, + { + "epoch": 16.605790645879733, + "grad_norm": 18.104511260986328, + "learning_rate": 1e-06, + "loss": 0.3205, + "num_input_tokens_seen": 417790132, + "step": 7456 + }, + { + "epoch": 16.605790645879733, + "loss": 0.2718583941459656, + "loss_ce": 0.00012988239177502692, + "loss_iou": 0.109375, + "loss_num": 0.0106201171875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 417790132, + "step": 7456 + }, + { + "epoch": 16.608017817371937, + "grad_norm": 15.675362586975098, + "learning_rate": 1e-06, + "loss": 0.455, + "num_input_tokens_seen": 417844852, + "step": 7457 + }, + { + "epoch": 16.608017817371937, + "loss": 0.5134113430976868, + "loss_ce": 0.00010569683945504948, + "loss_iou": 0.212890625, + "loss_num": 0.017578125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 417844852, + "step": 7457 + }, + { + "epoch": 16.610244988864142, + "grad_norm": 18.084177017211914, + "learning_rate": 1e-06, + "loss": 0.5179, + "num_input_tokens_seen": 417901268, + "step": 7458 + }, + { + "epoch": 16.610244988864142, + "loss": 0.5478125810623169, + "loss_ce": 8.308600808959454e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.013916015625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 417901268, + "step": 7458 + }, + { + "epoch": 16.612472160356347, + "grad_norm": 15.700066566467285, + "learning_rate": 1e-06, + "loss": 0.4414, + "num_input_tokens_seen": 417957084, + "step": 7459 + }, + { + "epoch": 16.612472160356347, + "loss": 0.4971623420715332, + "loss_ce": 9.201420471072197e-05, + "loss_iou": 0.20703125, + "loss_num": 0.016357421875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 417957084, + "step": 7459 + }, + { + "epoch": 16.614699331848552, + "grad_norm": 19.968868255615234, + "learning_rate": 1e-06, + "loss": 0.4322, + "num_input_tokens_seen": 418013952, + "step": 7460 + }, + { + "epoch": 16.614699331848552, + "loss": 0.4334394335746765, + "loss_ce": 8.979369886219501e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.0074462890625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 418013952, + "step": 7460 + }, + { + "epoch": 16.616926503340757, + "grad_norm": 15.141510009765625, + "learning_rate": 1e-06, + "loss": 0.3758, + "num_input_tokens_seen": 418069596, + "step": 7461 + }, + { + "epoch": 16.616926503340757, + "loss": 0.35867738723754883, + "loss_ce": 9.585694351699203e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.009521484375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 418069596, + "step": 7461 + }, + { + "epoch": 16.619153674832962, + "grad_norm": 16.423025131225586, + "learning_rate": 1e-06, + "loss": 0.5082, + "num_input_tokens_seen": 418125016, + "step": 7462 + }, + { + "epoch": 16.619153674832962, + "loss": 0.7706419229507446, + "loss_ce": 0.00013412557018455118, + "loss_iou": 0.30078125, + "loss_num": 0.033935546875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 418125016, + "step": 7462 + }, + { + "epoch": 16.621380846325167, + "grad_norm": 21.270771026611328, + "learning_rate": 1e-06, + "loss": 0.5281, + "num_input_tokens_seen": 418180920, + "step": 7463 + }, + { + "epoch": 16.621380846325167, + "loss": 0.5241183042526245, + "loss_ce": 7.047592953313142e-05, + "loss_iou": 0.203125, + "loss_num": 0.023681640625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 418180920, + "step": 7463 + }, + { + "epoch": 16.62360801781737, + "grad_norm": 18.848526000976562, + "learning_rate": 1e-06, + "loss": 0.4706, + "num_input_tokens_seen": 418236248, + "step": 7464 + }, + { + "epoch": 16.62360801781737, + "loss": 0.5589928030967712, + "loss_ce": 0.0002769696293398738, + "loss_iou": 0.232421875, + "loss_num": 0.018798828125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 418236248, + "step": 7464 + }, + { + "epoch": 16.625835189309576, + "grad_norm": 18.260950088500977, + "learning_rate": 1e-06, + "loss": 0.4122, + "num_input_tokens_seen": 418292596, + "step": 7465 + }, + { + "epoch": 16.625835189309576, + "loss": 0.5881445407867432, + "loss_ce": 0.00013190052413847297, + "loss_iou": 0.255859375, + "loss_num": 0.015380859375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 418292596, + "step": 7465 + }, + { + "epoch": 16.62806236080178, + "grad_norm": 19.902740478515625, + "learning_rate": 1e-06, + "loss": 0.4294, + "num_input_tokens_seen": 418348084, + "step": 7466 + }, + { + "epoch": 16.62806236080178, + "loss": 0.4913012981414795, + "loss_ce": 9.037186828209087e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.00726318359375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 418348084, + "step": 7466 + }, + { + "epoch": 16.630289532293986, + "grad_norm": 23.93308448791504, + "learning_rate": 1e-06, + "loss": 0.3016, + "num_input_tokens_seen": 418404308, + "step": 7467 + }, + { + "epoch": 16.630289532293986, + "loss": 0.2502018213272095, + "loss_ce": 7.976325287017971e-05, + "loss_iou": 0.11572265625, + "loss_num": 0.0037689208984375, + "loss_xval": 0.25, + "num_input_tokens_seen": 418404308, + "step": 7467 + }, + { + "epoch": 16.63251670378619, + "grad_norm": 16.425756454467773, + "learning_rate": 1e-06, + "loss": 0.4722, + "num_input_tokens_seen": 418460596, + "step": 7468 + }, + { + "epoch": 16.63251670378619, + "loss": 0.5028359889984131, + "loss_ce": 8.939744293456897e-05, + "loss_iou": 0.23046875, + "loss_num": 0.00860595703125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 418460596, + "step": 7468 + }, + { + "epoch": 16.634743875278396, + "grad_norm": 15.644196510314941, + "learning_rate": 1e-06, + "loss": 0.5605, + "num_input_tokens_seen": 418516840, + "step": 7469 + }, + { + "epoch": 16.634743875278396, + "loss": 0.8118726015090942, + "loss_ce": 0.00010501188808120787, + "loss_iou": 0.306640625, + "loss_num": 0.03955078125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 418516840, + "step": 7469 + }, + { + "epoch": 16.6369710467706, + "grad_norm": 21.61148452758789, + "learning_rate": 1e-06, + "loss": 0.5139, + "num_input_tokens_seen": 418575128, + "step": 7470 + }, + { + "epoch": 16.6369710467706, + "loss": 0.6065396070480347, + "loss_ce": 9.430487989448011e-05, + "loss_iou": 0.271484375, + "loss_num": 0.01239013671875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 418575128, + "step": 7470 + }, + { + "epoch": 16.639198218262806, + "grad_norm": 17.30372428894043, + "learning_rate": 1e-06, + "loss": 0.5143, + "num_input_tokens_seen": 418631736, + "step": 7471 + }, + { + "epoch": 16.639198218262806, + "loss": 0.32015693187713623, + "loss_ce": 8.858228102326393e-05, + "loss_iou": 0.134765625, + "loss_num": 0.0098876953125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 418631736, + "step": 7471 + }, + { + "epoch": 16.64142538975501, + "grad_norm": 20.17923355102539, + "learning_rate": 1e-06, + "loss": 0.3716, + "num_input_tokens_seen": 418685912, + "step": 7472 + }, + { + "epoch": 16.64142538975501, + "loss": 0.39355170726776123, + "loss_ce": 0.0007294651586562395, + "loss_iou": 0.1826171875, + "loss_num": 0.005645751953125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 418685912, + "step": 7472 + }, + { + "epoch": 16.643652561247215, + "grad_norm": 21.51079559326172, + "learning_rate": 1e-06, + "loss": 0.386, + "num_input_tokens_seen": 418739856, + "step": 7473 + }, + { + "epoch": 16.643652561247215, + "loss": 0.3053746223449707, + "loss_ce": 7.678715337533504e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.00897216796875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 418739856, + "step": 7473 + }, + { + "epoch": 16.64587973273942, + "grad_norm": 18.511350631713867, + "learning_rate": 1e-06, + "loss": 0.5517, + "num_input_tokens_seen": 418796664, + "step": 7474 + }, + { + "epoch": 16.64587973273942, + "loss": 0.4539787769317627, + "loss_ce": 0.0003654793545138091, + "loss_iou": 0.19140625, + "loss_num": 0.0142822265625, + "loss_xval": 0.453125, + "num_input_tokens_seen": 418796664, + "step": 7474 + }, + { + "epoch": 16.648106904231625, + "grad_norm": 18.879634857177734, + "learning_rate": 1e-06, + "loss": 0.5122, + "num_input_tokens_seen": 418853068, + "step": 7475 + }, + { + "epoch": 16.648106904231625, + "loss": 0.6420133113861084, + "loss_ce": 0.00010659612598828971, + "loss_iou": 0.296875, + "loss_num": 0.00970458984375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 418853068, + "step": 7475 + }, + { + "epoch": 16.65033407572383, + "grad_norm": 22.095844268798828, + "learning_rate": 1e-06, + "loss": 0.4475, + "num_input_tokens_seen": 418907716, + "step": 7476 + }, + { + "epoch": 16.65033407572383, + "loss": 0.5320791006088257, + "loss_ce": 9.662572119850665e-05, + "loss_iou": 0.251953125, + "loss_num": 0.00555419921875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 418907716, + "step": 7476 + }, + { + "epoch": 16.652561247216035, + "grad_norm": 22.207090377807617, + "learning_rate": 1e-06, + "loss": 0.4054, + "num_input_tokens_seen": 418963728, + "step": 7477 + }, + { + "epoch": 16.652561247216035, + "loss": 0.3793591856956482, + "loss_ce": 8.672128024045378e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.00677490234375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 418963728, + "step": 7477 + }, + { + "epoch": 16.65478841870824, + "grad_norm": 16.67801856994629, + "learning_rate": 1e-06, + "loss": 0.6013, + "num_input_tokens_seen": 419016604, + "step": 7478 + }, + { + "epoch": 16.65478841870824, + "loss": 0.6391165256500244, + "loss_ce": 7.844362698961049e-05, + "loss_iou": 0.279296875, + "loss_num": 0.016357421875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 419016604, + "step": 7478 + }, + { + "epoch": 16.657015590200444, + "grad_norm": 14.033241271972656, + "learning_rate": 1e-06, + "loss": 0.4418, + "num_input_tokens_seen": 419074052, + "step": 7479 + }, + { + "epoch": 16.657015590200444, + "loss": 0.3826572299003601, + "loss_ce": 8.888993761502206e-05, + "loss_iou": 0.177734375, + "loss_num": 0.0054931640625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 419074052, + "step": 7479 + }, + { + "epoch": 16.65924276169265, + "grad_norm": 13.08681869506836, + "learning_rate": 1e-06, + "loss": 0.3011, + "num_input_tokens_seen": 419133076, + "step": 7480 + }, + { + "epoch": 16.65924276169265, + "loss": 0.18371227383613586, + "loss_ce": 8.800254727248102e-05, + "loss_iou": 0.07373046875, + "loss_num": 0.007110595703125, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 419133076, + "step": 7480 + }, + { + "epoch": 16.661469933184854, + "grad_norm": 19.100345611572266, + "learning_rate": 1e-06, + "loss": 0.3935, + "num_input_tokens_seen": 419186412, + "step": 7481 + }, + { + "epoch": 16.661469933184854, + "loss": 0.3841213583946228, + "loss_ce": 0.00021022261353209615, + "loss_iou": 0.1748046875, + "loss_num": 0.006591796875, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 419186412, + "step": 7481 + }, + { + "epoch": 16.66369710467706, + "grad_norm": 15.811746597290039, + "learning_rate": 1e-06, + "loss": 0.3697, + "num_input_tokens_seen": 419244064, + "step": 7482 + }, + { + "epoch": 16.66369710467706, + "loss": 0.36989980936050415, + "loss_ce": 8.779930794844404e-05, + "loss_iou": 0.169921875, + "loss_num": 0.0059814453125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 419244064, + "step": 7482 + }, + { + "epoch": 16.665924276169264, + "grad_norm": 19.878259658813477, + "learning_rate": 1e-06, + "loss": 0.4357, + "num_input_tokens_seen": 419300828, + "step": 7483 + }, + { + "epoch": 16.665924276169264, + "loss": 0.3201012909412384, + "loss_ce": 9.395321103511378e-05, + "loss_iou": 0.134765625, + "loss_num": 0.01007080078125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 419300828, + "step": 7483 + }, + { + "epoch": 16.66815144766147, + "grad_norm": 18.946975708007812, + "learning_rate": 1e-06, + "loss": 0.4566, + "num_input_tokens_seen": 419355872, + "step": 7484 + }, + { + "epoch": 16.66815144766147, + "loss": 0.48397648334503174, + "loss_ce": 8.974310185294598e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.00750732421875, + "loss_xval": 0.484375, + "num_input_tokens_seen": 419355872, + "step": 7484 + }, + { + "epoch": 16.670378619153674, + "grad_norm": 30.613773345947266, + "learning_rate": 1e-06, + "loss": 0.4782, + "num_input_tokens_seen": 419411584, + "step": 7485 + }, + { + "epoch": 16.670378619153674, + "loss": 0.42331117391586304, + "loss_ce": 0.00021546825882978737, + "loss_iou": 0.16796875, + "loss_num": 0.0174560546875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 419411584, + "step": 7485 + }, + { + "epoch": 16.67260579064588, + "grad_norm": 12.629964828491211, + "learning_rate": 1e-06, + "loss": 0.4128, + "num_input_tokens_seen": 419468052, + "step": 7486 + }, + { + "epoch": 16.67260579064588, + "loss": 0.5439175367355347, + "loss_ce": 9.432664228370413e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.01611328125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 419468052, + "step": 7486 + }, + { + "epoch": 16.674832962138083, + "grad_norm": 21.88045883178711, + "learning_rate": 1e-06, + "loss": 0.3252, + "num_input_tokens_seen": 419522816, + "step": 7487 + }, + { + "epoch": 16.674832962138083, + "loss": 0.3819087743759155, + "loss_ce": 0.00013384762860368937, + "loss_iou": 0.1640625, + "loss_num": 0.01080322265625, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 419522816, + "step": 7487 + }, + { + "epoch": 16.677060133630288, + "grad_norm": 20.802846908569336, + "learning_rate": 1e-06, + "loss": 0.4882, + "num_input_tokens_seen": 419576664, + "step": 7488 + }, + { + "epoch": 16.677060133630288, + "loss": 0.3609151840209961, + "loss_ce": 0.00010585598647594452, + "loss_iou": 0.171875, + "loss_num": 0.003204345703125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 419576664, + "step": 7488 + }, + { + "epoch": 16.679287305122493, + "grad_norm": 30.43656349182129, + "learning_rate": 1e-06, + "loss": 0.4603, + "num_input_tokens_seen": 419631152, + "step": 7489 + }, + { + "epoch": 16.679287305122493, + "loss": 0.5530772805213928, + "loss_ce": 9.875621617538854e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.01104736328125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 419631152, + "step": 7489 + }, + { + "epoch": 16.681514476614698, + "grad_norm": 24.13094139099121, + "learning_rate": 1e-06, + "loss": 0.4435, + "num_input_tokens_seen": 419687488, + "step": 7490 + }, + { + "epoch": 16.681514476614698, + "loss": 0.37974900007247925, + "loss_ce": 0.00011031976464437321, + "loss_iou": 0.1708984375, + "loss_num": 0.007476806640625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 419687488, + "step": 7490 + }, + { + "epoch": 16.683741648106903, + "grad_norm": 15.836715698242188, + "learning_rate": 1e-06, + "loss": 0.4628, + "num_input_tokens_seen": 419745188, + "step": 7491 + }, + { + "epoch": 16.683741648106903, + "loss": 0.4252764582633972, + "loss_ce": 0.0001055425891536288, + "loss_iou": 0.169921875, + "loss_num": 0.01708984375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 419745188, + "step": 7491 + }, + { + "epoch": 16.685968819599108, + "grad_norm": 13.860366821289062, + "learning_rate": 1e-06, + "loss": 0.7326, + "num_input_tokens_seen": 419798892, + "step": 7492 + }, + { + "epoch": 16.685968819599108, + "loss": 0.8469661474227905, + "loss_ce": 0.00010327581549063325, + "loss_iou": 0.33984375, + "loss_num": 0.033935546875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 419798892, + "step": 7492 + }, + { + "epoch": 16.688195991091312, + "grad_norm": 18.5252742767334, + "learning_rate": 1e-06, + "loss": 0.4905, + "num_input_tokens_seen": 419852848, + "step": 7493 + }, + { + "epoch": 16.688195991091312, + "loss": 0.4520108699798584, + "loss_ce": 0.00012183596845716238, + "loss_iou": 0.193359375, + "loss_num": 0.01287841796875, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 419852848, + "step": 7493 + }, + { + "epoch": 16.690423162583517, + "grad_norm": 33.927978515625, + "learning_rate": 1e-06, + "loss": 0.4021, + "num_input_tokens_seen": 419909276, + "step": 7494 + }, + { + "epoch": 16.690423162583517, + "loss": 0.4526256322860718, + "loss_ce": 0.00013765764015261084, + "loss_iou": 0.2060546875, + "loss_num": 0.0081787109375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 419909276, + "step": 7494 + }, + { + "epoch": 16.692650334075722, + "grad_norm": 17.114513397216797, + "learning_rate": 1e-06, + "loss": 0.4049, + "num_input_tokens_seen": 419965156, + "step": 7495 + }, + { + "epoch": 16.692650334075722, + "loss": 0.5079430341720581, + "loss_ce": 0.00019155110931023955, + "loss_iou": 0.23046875, + "loss_num": 0.00933837890625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 419965156, + "step": 7495 + }, + { + "epoch": 16.694877505567927, + "grad_norm": 20.878402709960938, + "learning_rate": 1e-06, + "loss": 0.3769, + "num_input_tokens_seen": 420021460, + "step": 7496 + }, + { + "epoch": 16.694877505567927, + "loss": 0.5149946212768555, + "loss_ce": 0.00010204176942352206, + "loss_iou": 0.19921875, + "loss_num": 0.0235595703125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 420021460, + "step": 7496 + }, + { + "epoch": 16.697104677060132, + "grad_norm": 11.510066032409668, + "learning_rate": 1e-06, + "loss": 0.3932, + "num_input_tokens_seen": 420077988, + "step": 7497 + }, + { + "epoch": 16.697104677060132, + "loss": 0.4444170892238617, + "loss_ce": 8.116405660985038e-05, + "loss_iou": 0.19140625, + "loss_num": 0.01214599609375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 420077988, + "step": 7497 + }, + { + "epoch": 16.69933184855234, + "grad_norm": 21.444026947021484, + "learning_rate": 1e-06, + "loss": 0.4323, + "num_input_tokens_seen": 420135408, + "step": 7498 + }, + { + "epoch": 16.69933184855234, + "loss": 0.34426337480545044, + "loss_ce": 8.61354055814445e-05, + "loss_iou": 0.16015625, + "loss_num": 0.004638671875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 420135408, + "step": 7498 + }, + { + "epoch": 16.70155902004454, + "grad_norm": 18.71256446838379, + "learning_rate": 1e-06, + "loss": 0.3837, + "num_input_tokens_seen": 420191188, + "step": 7499 + }, + { + "epoch": 16.70155902004454, + "loss": 0.5224635601043701, + "loss_ce": 0.00012473194510675967, + "loss_iou": 0.2333984375, + "loss_num": 0.01129150390625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 420191188, + "step": 7499 + }, + { + "epoch": 16.70378619153675, + "grad_norm": 29.126888275146484, + "learning_rate": 1e-06, + "loss": 0.3387, + "num_input_tokens_seen": 420248812, + "step": 7500 + }, + { + "epoch": 16.70378619153675, + "eval_seeclick_web_CIoU": 0.5869028568267822, + "eval_seeclick_web_GIoU": 0.5851646363735199, + "eval_seeclick_web_IoU": 0.6060521006584167, + "eval_seeclick_web_MAE_all": 0.015174605650827289, + "eval_seeclick_web_MAE_h": 0.007236489560455084, + "eval_seeclick_web_MAE_w": 0.015413875225931406, + "eval_seeclick_web_MAE_x_boxes": 0.008164346683770418, + "eval_seeclick_web_MAE_y_boxes": 0.02128999726846814, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 0.9088981747627258, + "eval_seeclick_web_loss_ce": 0.00014426549023482949, + "eval_seeclick_web_loss_iou": 0.419921875, + "eval_seeclick_web_loss_num": 0.01212310791015625, + "eval_seeclick_web_loss_xval": 0.900146484375, + "eval_seeclick_web_runtime": 24.9651, + "eval_seeclick_web_samples_per_second": 2.003, + "eval_seeclick_web_steps_per_second": 0.08, + "num_input_tokens_seen": 420248812, + "step": 7500 + }, + { + "epoch": 16.70378619153675, + "eval_icons_CIoU": 0.2704924941062927, + "eval_icons_GIoU": 0.2915039211511612, + "eval_icons_IoU": 0.34834006428718567, + "eval_icons_MAE_all": 0.059374475851655006, + "eval_icons_MAE_h": 0.03465741407126188, + "eval_icons_MAE_w": 0.058007813058793545, + "eval_icons_MAE_x_boxes": 0.05980631522834301, + "eval_icons_MAE_y_boxes": 0.03711246699094772, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7317432165145874, + "eval_icons_loss_ce": 0.0001763724285410717, + "eval_icons_loss_iou": 0.680908203125, + "eval_icons_loss_num": 0.0583648681640625, + "eval_icons_loss_xval": 1.654541015625, + "eval_icons_runtime": 24.5042, + "eval_icons_samples_per_second": 2.04, + "eval_icons_steps_per_second": 0.082, + "num_input_tokens_seen": 420248812, + "step": 7500 + }, + { + "epoch": 16.70378619153675, + "eval_screenspot_CIoU": 0.37447473406791687, + "eval_screenspot_GIoU": 0.392286479473114, + "eval_screenspot_IoU": 0.4470125635464986, + "eval_screenspot_MAE_all": 0.057117752730846405, + "eval_screenspot_MAE_h": 0.039663772409160934, + "eval_screenspot_MAE_w": 0.06271877388159434, + "eval_screenspot_MAE_x_boxes": 0.06692921556532383, + "eval_screenspot_MAE_y_boxes": 0.03935454785823822, + "eval_screenspot_inside_bbox": 0.693750003973643, + "eval_screenspot_loss": 1.5693501234054565, + "eval_screenspot_loss_ce": 0.0001859396434156224, + "eval_screenspot_loss_iou": 0.651611328125, + "eval_screenspot_loss_num": 0.06552505493164062, + "eval_screenspot_loss_xval": 1.6298828125, + "eval_screenspot_runtime": 40.939, + "eval_screenspot_samples_per_second": 2.174, + "eval_screenspot_steps_per_second": 0.073, + "num_input_tokens_seen": 420248812, + "step": 7500 + }, + { + "epoch": 16.70378619153675, + "eval_compot_CIoU": 0.346510648727417, + "eval_compot_GIoU": 0.35510683059692383, + "eval_compot_IoU": 0.4062563627958298, + "eval_compot_MAE_all": 0.01756852399557829, + "eval_compot_MAE_h": 0.00829399167560041, + "eval_compot_MAE_w": 0.02114854846149683, + "eval_compot_MAE_x_boxes": 0.029594723135232925, + "eval_compot_MAE_y_boxes": 0.0067301481030881405, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3873258829116821, + "eval_compot_loss_ce": 0.00013870510883862153, + "eval_compot_loss_iou": 0.6431884765625, + "eval_compot_loss_num": 0.016324996948242188, + "eval_compot_loss_xval": 1.366943359375, + "eval_compot_runtime": 25.1596, + "eval_compot_samples_per_second": 1.987, + "eval_compot_steps_per_second": 0.079, + "num_input_tokens_seen": 420248812, + "step": 7500 + }, + { + "epoch": 16.70378619153675, + "eval_custom_ui_val_CIoU": 0.47488271362251705, + "eval_custom_ui_val_GIoU": 0.4806961201959186, + "eval_custom_ui_val_IoU": 0.5364741186300913, + "eval_custom_ui_val_MAE_all": 0.027154105249792337, + "eval_custom_ui_val_MAE_h": 0.014837595815252926, + "eval_custom_ui_val_MAE_w": 0.036442533497595124, + "eval_custom_ui_val_MAE_x_boxes": 0.03312135862910913, + "eval_custom_ui_val_MAE_y_boxes": 0.012457883761574825, + "eval_custom_ui_val_inside_bbox": 0.7754629651705424, + "eval_custom_ui_val_loss": 1.1677404642105103, + "eval_custom_ui_val_loss_ce": 0.00016540858794340037, + "eval_custom_ui_val_loss_iou": 0.5015055338541666, + "eval_custom_ui_val_loss_num": 0.023720741271972656, + "eval_custom_ui_val_loss_xval": 1.1214735243055556, + "eval_custom_ui_val_runtime": 78.9911, + "eval_custom_ui_val_samples_per_second": 3.355, + "eval_custom_ui_val_steps_per_second": 0.114, + "num_input_tokens_seen": 420248812, + "step": 7500 + }, + { + "epoch": 16.70378619153675, + "loss": 0.8367924690246582, + "loss_ce": 0.00012252142187207937, + "loss_iou": 0.376953125, + "loss_num": 0.016845703125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 420248812, + "step": 7500 + }, + { + "epoch": 16.706013363028955, + "grad_norm": 16.65875244140625, + "learning_rate": 1e-06, + "loss": 0.4408, + "num_input_tokens_seen": 420303256, + "step": 7501 + }, + { + "epoch": 16.706013363028955, + "loss": 0.3736181855201721, + "loss_ce": 8.305851952172816e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.01116943359375, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 420303256, + "step": 7501 + }, + { + "epoch": 16.70824053452116, + "grad_norm": 17.692901611328125, + "learning_rate": 1e-06, + "loss": 0.5898, + "num_input_tokens_seen": 420356912, + "step": 7502 + }, + { + "epoch": 16.70824053452116, + "loss": 0.33262205123901367, + "loss_ce": 0.00010252789070364088, + "loss_iou": 0.142578125, + "loss_num": 0.00958251953125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 420356912, + "step": 7502 + }, + { + "epoch": 16.710467706013365, + "grad_norm": 19.661197662353516, + "learning_rate": 1e-06, + "loss": 0.4288, + "num_input_tokens_seen": 420414376, + "step": 7503 + }, + { + "epoch": 16.710467706013365, + "loss": 0.4624839723110199, + "loss_ce": 8.163749589584768e-05, + "loss_iou": 0.181640625, + "loss_num": 0.0196533203125, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 420414376, + "step": 7503 + }, + { + "epoch": 16.71269487750557, + "grad_norm": 16.957685470581055, + "learning_rate": 1e-06, + "loss": 0.4464, + "num_input_tokens_seen": 420470196, + "step": 7504 + }, + { + "epoch": 16.71269487750557, + "loss": 0.39802345633506775, + "loss_ce": 7.423176430165768e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0130615234375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 420470196, + "step": 7504 + }, + { + "epoch": 16.714922048997774, + "grad_norm": 18.1544246673584, + "learning_rate": 1e-06, + "loss": 0.4431, + "num_input_tokens_seen": 420525872, + "step": 7505 + }, + { + "epoch": 16.714922048997774, + "loss": 0.48571863770484924, + "loss_ce": 0.00024499627761542797, + "loss_iou": 0.2001953125, + "loss_num": 0.016845703125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 420525872, + "step": 7505 + }, + { + "epoch": 16.71714922048998, + "grad_norm": 14.577059745788574, + "learning_rate": 1e-06, + "loss": 0.5386, + "num_input_tokens_seen": 420582216, + "step": 7506 + }, + { + "epoch": 16.71714922048998, + "loss": 0.6150659918785095, + "loss_ce": 0.00013681976997759193, + "loss_iou": 0.2451171875, + "loss_num": 0.0247802734375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 420582216, + "step": 7506 + }, + { + "epoch": 16.719376391982184, + "grad_norm": 16.228076934814453, + "learning_rate": 1e-06, + "loss": 0.3875, + "num_input_tokens_seen": 420636876, + "step": 7507 + }, + { + "epoch": 16.719376391982184, + "loss": 0.5354748964309692, + "loss_ce": 7.452783756889403e-05, + "loss_iou": 0.240234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 420636876, + "step": 7507 + }, + { + "epoch": 16.72160356347439, + "grad_norm": 17.355379104614258, + "learning_rate": 1e-06, + "loss": 0.3967, + "num_input_tokens_seen": 420692648, + "step": 7508 + }, + { + "epoch": 16.72160356347439, + "loss": 0.5319057703018188, + "loss_ce": 0.00010642388951964676, + "loss_iou": 0.2216796875, + "loss_num": 0.017578125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 420692648, + "step": 7508 + }, + { + "epoch": 16.723830734966594, + "grad_norm": 22.888402938842773, + "learning_rate": 1e-06, + "loss": 0.3304, + "num_input_tokens_seen": 420749380, + "step": 7509 + }, + { + "epoch": 16.723830734966594, + "loss": 0.2873430550098419, + "loss_ce": 0.00011160832218592986, + "loss_iou": 0.1142578125, + "loss_num": 0.01171875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 420749380, + "step": 7509 + }, + { + "epoch": 16.7260579064588, + "grad_norm": 23.103485107421875, + "learning_rate": 1e-06, + "loss": 0.5022, + "num_input_tokens_seen": 420805372, + "step": 7510 + }, + { + "epoch": 16.7260579064588, + "loss": 0.6403497457504272, + "loss_ce": 9.102303010877222e-05, + "loss_iou": 0.296875, + "loss_num": 0.00958251953125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 420805372, + "step": 7510 + }, + { + "epoch": 16.728285077951004, + "grad_norm": 26.94756317138672, + "learning_rate": 1e-06, + "loss": 0.4418, + "num_input_tokens_seen": 420859376, + "step": 7511 + }, + { + "epoch": 16.728285077951004, + "loss": 0.3712967336177826, + "loss_ce": 8.091152994893491e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00787353515625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 420859376, + "step": 7511 + }, + { + "epoch": 16.73051224944321, + "grad_norm": 29.820947647094727, + "learning_rate": 1e-06, + "loss": 0.4807, + "num_input_tokens_seen": 420916012, + "step": 7512 + }, + { + "epoch": 16.73051224944321, + "loss": 0.5431552529335022, + "loss_ce": 0.00018649011326488107, + "loss_iou": 0.2265625, + "loss_num": 0.0179443359375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 420916012, + "step": 7512 + }, + { + "epoch": 16.732739420935413, + "grad_norm": 16.320466995239258, + "learning_rate": 1e-06, + "loss": 0.7292, + "num_input_tokens_seen": 420970600, + "step": 7513 + }, + { + "epoch": 16.732739420935413, + "loss": 0.877778172492981, + "loss_ce": 9.26764914765954e-05, + "loss_iou": 0.34765625, + "loss_num": 0.036376953125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 420970600, + "step": 7513 + }, + { + "epoch": 16.734966592427618, + "grad_norm": 19.527677536010742, + "learning_rate": 1e-06, + "loss": 0.3925, + "num_input_tokens_seen": 421024580, + "step": 7514 + }, + { + "epoch": 16.734966592427618, + "loss": 0.29793840646743774, + "loss_ce": 8.684221393195912e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.0045166015625, + "loss_xval": 0.296875, + "num_input_tokens_seen": 421024580, + "step": 7514 + }, + { + "epoch": 16.737193763919823, + "grad_norm": 16.6092472076416, + "learning_rate": 1e-06, + "loss": 0.4416, + "num_input_tokens_seen": 421080592, + "step": 7515 + }, + { + "epoch": 16.737193763919823, + "loss": 0.32314634323120117, + "loss_ce": 8.72400269145146e-05, + "loss_iou": 0.1328125, + "loss_num": 0.01141357421875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 421080592, + "step": 7515 + }, + { + "epoch": 16.739420935412028, + "grad_norm": 18.05164337158203, + "learning_rate": 1e-06, + "loss": 0.4574, + "num_input_tokens_seen": 421138628, + "step": 7516 + }, + { + "epoch": 16.739420935412028, + "loss": 0.44442254304885864, + "loss_ce": 8.662776235723868e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.013671875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 421138628, + "step": 7516 + }, + { + "epoch": 16.741648106904233, + "grad_norm": 34.90769958496094, + "learning_rate": 1e-06, + "loss": 0.3832, + "num_input_tokens_seen": 421194844, + "step": 7517 + }, + { + "epoch": 16.741648106904233, + "loss": 0.4219837486743927, + "loss_ce": 0.00010874809231609106, + "loss_iou": 0.197265625, + "loss_num": 0.00555419921875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 421194844, + "step": 7517 + }, + { + "epoch": 16.743875278396438, + "grad_norm": 19.655269622802734, + "learning_rate": 1e-06, + "loss": 0.404, + "num_input_tokens_seen": 421248904, + "step": 7518 + }, + { + "epoch": 16.743875278396438, + "loss": 0.48480457067489624, + "loss_ce": 0.00012440019054338336, + "loss_iou": 0.216796875, + "loss_num": 0.01019287109375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 421248904, + "step": 7518 + }, + { + "epoch": 16.746102449888642, + "grad_norm": 22.647844314575195, + "learning_rate": 1e-06, + "loss": 0.671, + "num_input_tokens_seen": 421304408, + "step": 7519 + }, + { + "epoch": 16.746102449888642, + "loss": 0.8310278654098511, + "loss_ce": 9.524515189696103e-05, + "loss_iou": 0.353515625, + "loss_num": 0.025146484375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 421304408, + "step": 7519 + }, + { + "epoch": 16.748329621380847, + "grad_norm": 12.694985389709473, + "learning_rate": 1e-06, + "loss": 0.3328, + "num_input_tokens_seen": 421359956, + "step": 7520 + }, + { + "epoch": 16.748329621380847, + "loss": 0.3517601191997528, + "loss_ce": 7.55441069486551e-05, + "loss_iou": 0.158203125, + "loss_num": 0.007232666015625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 421359956, + "step": 7520 + }, + { + "epoch": 16.750556792873052, + "grad_norm": 26.37984275817871, + "learning_rate": 1e-06, + "loss": 0.635, + "num_input_tokens_seen": 421418924, + "step": 7521 + }, + { + "epoch": 16.750556792873052, + "loss": 0.6089690923690796, + "loss_ce": 8.241091563832015e-05, + "loss_iou": 0.255859375, + "loss_num": 0.019775390625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 421418924, + "step": 7521 + }, + { + "epoch": 16.752783964365257, + "grad_norm": 19.50495719909668, + "learning_rate": 1e-06, + "loss": 0.3985, + "num_input_tokens_seen": 421474016, + "step": 7522 + }, + { + "epoch": 16.752783964365257, + "loss": 0.46183595061302185, + "loss_ce": 0.00010498787742108107, + "loss_iou": 0.203125, + "loss_num": 0.01123046875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 421474016, + "step": 7522 + }, + { + "epoch": 16.755011135857462, + "grad_norm": 19.55943489074707, + "learning_rate": 1e-06, + "loss": 0.5932, + "num_input_tokens_seen": 421528604, + "step": 7523 + }, + { + "epoch": 16.755011135857462, + "loss": 0.672928512096405, + "loss_ce": 0.00019901388441212475, + "loss_iou": 0.279296875, + "loss_num": 0.02294921875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 421528604, + "step": 7523 + }, + { + "epoch": 16.757238307349667, + "grad_norm": 18.727272033691406, + "learning_rate": 1e-06, + "loss": 0.4604, + "num_input_tokens_seen": 421582348, + "step": 7524 + }, + { + "epoch": 16.757238307349667, + "loss": 0.43595296144485474, + "loss_ce": 0.00016193735064007342, + "loss_iou": 0.1962890625, + "loss_num": 0.008544921875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 421582348, + "step": 7524 + }, + { + "epoch": 16.75946547884187, + "grad_norm": 14.18976879119873, + "learning_rate": 1e-06, + "loss": 0.3096, + "num_input_tokens_seen": 421637456, + "step": 7525 + }, + { + "epoch": 16.75946547884187, + "loss": 0.2345210313796997, + "loss_ce": 8.500301919411868e-05, + "loss_iou": 0.10888671875, + "loss_num": 0.0033416748046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 421637456, + "step": 7525 + }, + { + "epoch": 16.761692650334076, + "grad_norm": 14.92958927154541, + "learning_rate": 1e-06, + "loss": 0.4751, + "num_input_tokens_seen": 421694176, + "step": 7526 + }, + { + "epoch": 16.761692650334076, + "loss": 0.3878576159477234, + "loss_ce": 0.00010129041038453579, + "loss_iou": 0.1591796875, + "loss_num": 0.0140380859375, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 421694176, + "step": 7526 + }, + { + "epoch": 16.76391982182628, + "grad_norm": 23.31275749206543, + "learning_rate": 1e-06, + "loss": 0.5295, + "num_input_tokens_seen": 421751032, + "step": 7527 + }, + { + "epoch": 16.76391982182628, + "loss": 0.5836684703826904, + "loss_ce": 8.08104668976739e-05, + "loss_iou": 0.224609375, + "loss_num": 0.02685546875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 421751032, + "step": 7527 + }, + { + "epoch": 16.766146993318486, + "grad_norm": 24.81498146057129, + "learning_rate": 1e-06, + "loss": 0.4622, + "num_input_tokens_seen": 421806836, + "step": 7528 + }, + { + "epoch": 16.766146993318486, + "loss": 0.37533727288246155, + "loss_ce": 9.314088674727827e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.005767822265625, + "loss_xval": 0.375, + "num_input_tokens_seen": 421806836, + "step": 7528 + }, + { + "epoch": 16.76837416481069, + "grad_norm": 23.45658302307129, + "learning_rate": 1e-06, + "loss": 0.4903, + "num_input_tokens_seen": 421863004, + "step": 7529 + }, + { + "epoch": 16.76837416481069, + "loss": 0.6420989036560059, + "loss_ce": 0.00013110964209772646, + "loss_iou": 0.291015625, + "loss_num": 0.011962890625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 421863004, + "step": 7529 + }, + { + "epoch": 16.770601336302896, + "grad_norm": 17.918804168701172, + "learning_rate": 1e-06, + "loss": 0.3386, + "num_input_tokens_seen": 421918756, + "step": 7530 + }, + { + "epoch": 16.770601336302896, + "loss": 0.3590957224369049, + "loss_ce": 8.693411655258387e-05, + "loss_iou": 0.150390625, + "loss_num": 0.0115966796875, + "loss_xval": 0.359375, + "num_input_tokens_seen": 421918756, + "step": 7530 + }, + { + "epoch": 16.7728285077951, + "grad_norm": 21.4240665435791, + "learning_rate": 1e-06, + "loss": 0.4388, + "num_input_tokens_seen": 421975608, + "step": 7531 + }, + { + "epoch": 16.7728285077951, + "loss": 0.3268599510192871, + "loss_ce": 7.773490506224334e-05, + "loss_iou": 0.138671875, + "loss_num": 0.009765625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 421975608, + "step": 7531 + }, + { + "epoch": 16.775055679287306, + "grad_norm": 22.135826110839844, + "learning_rate": 1e-06, + "loss": 0.2985, + "num_input_tokens_seen": 422029336, + "step": 7532 + }, + { + "epoch": 16.775055679287306, + "loss": 0.2817379832267761, + "loss_ce": 9.12583782337606e-05, + "loss_iou": 0.12109375, + "loss_num": 0.00799560546875, + "loss_xval": 0.28125, + "num_input_tokens_seen": 422029336, + "step": 7532 + }, + { + "epoch": 16.77728285077951, + "grad_norm": 12.753730773925781, + "learning_rate": 1e-06, + "loss": 0.4948, + "num_input_tokens_seen": 422087488, + "step": 7533 + }, + { + "epoch": 16.77728285077951, + "loss": 0.6205785274505615, + "loss_ce": 0.00046129614929668605, + "loss_iou": 0.2734375, + "loss_num": 0.01483154296875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 422087488, + "step": 7533 + }, + { + "epoch": 16.779510022271715, + "grad_norm": 17.831968307495117, + "learning_rate": 1e-06, + "loss": 0.3904, + "num_input_tokens_seen": 422144960, + "step": 7534 + }, + { + "epoch": 16.779510022271715, + "loss": 0.3965558111667633, + "loss_ce": 7.143749098759145e-05, + "loss_iou": 0.150390625, + "loss_num": 0.0194091796875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 422144960, + "step": 7534 + }, + { + "epoch": 16.78173719376392, + "grad_norm": 19.048486709594727, + "learning_rate": 1e-06, + "loss": 0.2356, + "num_input_tokens_seen": 422202632, + "step": 7535 + }, + { + "epoch": 16.78173719376392, + "loss": 0.19454200565814972, + "loss_ce": 8.399138459935784e-05, + "loss_iou": 0.0888671875, + "loss_num": 0.003265380859375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 422202632, + "step": 7535 + }, + { + "epoch": 16.783964365256125, + "grad_norm": 15.023216247558594, + "learning_rate": 1e-06, + "loss": 0.3777, + "num_input_tokens_seen": 422258820, + "step": 7536 + }, + { + "epoch": 16.783964365256125, + "loss": 0.25117045640945435, + "loss_ce": 7.181320688687265e-05, + "loss_iou": 0.1162109375, + "loss_num": 0.003814697265625, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 422258820, + "step": 7536 + }, + { + "epoch": 16.78619153674833, + "grad_norm": 12.973251342773438, + "learning_rate": 1e-06, + "loss": 0.421, + "num_input_tokens_seen": 422317024, + "step": 7537 + }, + { + "epoch": 16.78619153674833, + "loss": 0.37338966131210327, + "loss_ce": 9.861498983809724e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.01092529296875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 422317024, + "step": 7537 + }, + { + "epoch": 16.788418708240535, + "grad_norm": 26.241365432739258, + "learning_rate": 1e-06, + "loss": 0.3026, + "num_input_tokens_seen": 422372340, + "step": 7538 + }, + { + "epoch": 16.788418708240535, + "loss": 0.31962519884109497, + "loss_ce": 0.00010615789506118745, + "loss_iou": 0.1513671875, + "loss_num": 0.0032806396484375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 422372340, + "step": 7538 + }, + { + "epoch": 16.79064587973274, + "grad_norm": 17.4132080078125, + "learning_rate": 1e-06, + "loss": 0.3208, + "num_input_tokens_seen": 422427068, + "step": 7539 + }, + { + "epoch": 16.79064587973274, + "loss": 0.4059646427631378, + "loss_ce": 8.086010348051786e-05, + "loss_iou": 0.1875, + "loss_num": 0.006317138671875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 422427068, + "step": 7539 + }, + { + "epoch": 16.792873051224944, + "grad_norm": 18.924129486083984, + "learning_rate": 1e-06, + "loss": 0.4015, + "num_input_tokens_seen": 422485208, + "step": 7540 + }, + { + "epoch": 16.792873051224944, + "loss": 0.549044668674469, + "loss_ce": 9.448261698707938e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.0181884765625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 422485208, + "step": 7540 + }, + { + "epoch": 16.79510022271715, + "grad_norm": 21.377471923828125, + "learning_rate": 1e-06, + "loss": 0.2579, + "num_input_tokens_seen": 422541288, + "step": 7541 + }, + { + "epoch": 16.79510022271715, + "loss": 0.22633783519268036, + "loss_ce": 8.05138552095741e-05, + "loss_iou": 0.09619140625, + "loss_num": 0.006805419921875, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 422541288, + "step": 7541 + }, + { + "epoch": 16.797327394209354, + "grad_norm": 18.096935272216797, + "learning_rate": 1e-06, + "loss": 0.4972, + "num_input_tokens_seen": 422598728, + "step": 7542 + }, + { + "epoch": 16.797327394209354, + "loss": 0.598351240158081, + "loss_ce": 8.468693704344332e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.025146484375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 422598728, + "step": 7542 + }, + { + "epoch": 16.79955456570156, + "grad_norm": 18.19603157043457, + "learning_rate": 1e-06, + "loss": 0.463, + "num_input_tokens_seen": 422653832, + "step": 7543 + }, + { + "epoch": 16.79955456570156, + "loss": 0.5140941143035889, + "loss_ce": 8.656126010464504e-05, + "loss_iou": 0.205078125, + "loss_num": 0.0205078125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 422653832, + "step": 7543 + }, + { + "epoch": 16.801781737193764, + "grad_norm": 20.3741397857666, + "learning_rate": 1e-06, + "loss": 0.3391, + "num_input_tokens_seen": 422711052, + "step": 7544 + }, + { + "epoch": 16.801781737193764, + "loss": 0.37486082315444946, + "loss_ce": 0.00010497802577447146, + "loss_iou": 0.173828125, + "loss_num": 0.00518798828125, + "loss_xval": 0.375, + "num_input_tokens_seen": 422711052, + "step": 7544 + }, + { + "epoch": 16.80400890868597, + "grad_norm": 15.383708953857422, + "learning_rate": 1e-06, + "loss": 0.4458, + "num_input_tokens_seen": 422768836, + "step": 7545 + }, + { + "epoch": 16.80400890868597, + "loss": 0.2621135711669922, + "loss_ce": 8.963444997789338e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.004302978515625, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 422768836, + "step": 7545 + }, + { + "epoch": 16.806236080178174, + "grad_norm": 19.3375186920166, + "learning_rate": 1e-06, + "loss": 0.3409, + "num_input_tokens_seen": 422826720, + "step": 7546 + }, + { + "epoch": 16.806236080178174, + "loss": 0.23085373640060425, + "loss_ce": 7.98294713604264e-05, + "loss_iou": 0.107421875, + "loss_num": 0.003204345703125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 422826720, + "step": 7546 + }, + { + "epoch": 16.80846325167038, + "grad_norm": 27.87537956237793, + "learning_rate": 1e-06, + "loss": 0.3236, + "num_input_tokens_seen": 422881060, + "step": 7547 + }, + { + "epoch": 16.80846325167038, + "loss": 0.2520201802253723, + "loss_ce": 6.706906424369663e-05, + "loss_iou": 0.08837890625, + "loss_num": 0.01513671875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 422881060, + "step": 7547 + }, + { + "epoch": 16.810690423162583, + "grad_norm": 9.865744590759277, + "learning_rate": 1e-06, + "loss": 0.3187, + "num_input_tokens_seen": 422938468, + "step": 7548 + }, + { + "epoch": 16.810690423162583, + "loss": 0.39917415380477905, + "loss_ce": 0.00012633406731765717, + "loss_iou": 0.1787109375, + "loss_num": 0.00830078125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 422938468, + "step": 7548 + }, + { + "epoch": 16.812917594654788, + "grad_norm": 18.907991409301758, + "learning_rate": 1e-06, + "loss": 0.4061, + "num_input_tokens_seen": 422996068, + "step": 7549 + }, + { + "epoch": 16.812917594654788, + "loss": 0.5208408832550049, + "loss_ce": 8.891297329682857e-05, + "loss_iou": 0.21875, + "loss_num": 0.0167236328125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 422996068, + "step": 7549 + }, + { + "epoch": 16.815144766146993, + "grad_norm": 14.58835506439209, + "learning_rate": 1e-06, + "loss": 0.3215, + "num_input_tokens_seen": 423051768, + "step": 7550 + }, + { + "epoch": 16.815144766146993, + "loss": 0.4090169668197632, + "loss_ce": 8.143430750351399e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.00628662109375, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 423051768, + "step": 7550 + }, + { + "epoch": 16.817371937639198, + "grad_norm": 14.23434066772461, + "learning_rate": 1e-06, + "loss": 0.3621, + "num_input_tokens_seen": 423107452, + "step": 7551 + }, + { + "epoch": 16.817371937639198, + "loss": 0.2738141119480133, + "loss_ce": 7.142306276364252e-05, + "loss_iou": 0.119140625, + "loss_num": 0.00701904296875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 423107452, + "step": 7551 + }, + { + "epoch": 16.819599109131403, + "grad_norm": 17.120817184448242, + "learning_rate": 1e-06, + "loss": 0.4439, + "num_input_tokens_seen": 423165408, + "step": 7552 + }, + { + "epoch": 16.819599109131403, + "loss": 0.466147780418396, + "loss_ce": 8.328772673849016e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.0185546875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 423165408, + "step": 7552 + }, + { + "epoch": 16.821826280623608, + "grad_norm": 16.54876708984375, + "learning_rate": 1e-06, + "loss": 0.5828, + "num_input_tokens_seen": 423220424, + "step": 7553 + }, + { + "epoch": 16.821826280623608, + "loss": 0.656104326248169, + "loss_ce": 9.84652797342278e-05, + "loss_iou": 0.28515625, + "loss_num": 0.017333984375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 423220424, + "step": 7553 + }, + { + "epoch": 16.824053452115812, + "grad_norm": 29.920957565307617, + "learning_rate": 1e-06, + "loss": 0.5121, + "num_input_tokens_seen": 423275472, + "step": 7554 + }, + { + "epoch": 16.824053452115812, + "loss": 0.4944070279598236, + "loss_ce": 8.32905643619597e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.02099609375, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 423275472, + "step": 7554 + }, + { + "epoch": 16.826280623608017, + "grad_norm": 14.726850509643555, + "learning_rate": 1e-06, + "loss": 0.3757, + "num_input_tokens_seen": 423332856, + "step": 7555 + }, + { + "epoch": 16.826280623608017, + "loss": 0.4061638116836548, + "loss_ce": 9.694288019090891e-05, + "loss_iou": 0.16796875, + "loss_num": 0.013916015625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 423332856, + "step": 7555 + }, + { + "epoch": 16.828507795100222, + "grad_norm": 18.766006469726562, + "learning_rate": 1e-06, + "loss": 0.5588, + "num_input_tokens_seen": 423388552, + "step": 7556 + }, + { + "epoch": 16.828507795100222, + "loss": 0.5013834834098816, + "loss_ce": 0.00010174483759328723, + "loss_iou": 0.205078125, + "loss_num": 0.018310546875, + "loss_xval": 0.5, + "num_input_tokens_seen": 423388552, + "step": 7556 + }, + { + "epoch": 16.830734966592427, + "grad_norm": 16.07285499572754, + "learning_rate": 1e-06, + "loss": 0.4426, + "num_input_tokens_seen": 423443352, + "step": 7557 + }, + { + "epoch": 16.830734966592427, + "loss": 0.531631588935852, + "loss_ce": 0.00013745043543167412, + "loss_iou": 0.236328125, + "loss_num": 0.01177978515625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 423443352, + "step": 7557 + }, + { + "epoch": 16.832962138084632, + "grad_norm": 22.96206283569336, + "learning_rate": 1e-06, + "loss": 0.2828, + "num_input_tokens_seen": 423497072, + "step": 7558 + }, + { + "epoch": 16.832962138084632, + "loss": 0.2979274392127991, + "loss_ce": 7.588087464682758e-05, + "loss_iou": 0.12353515625, + "loss_num": 0.01025390625, + "loss_xval": 0.296875, + "num_input_tokens_seen": 423497072, + "step": 7558 + }, + { + "epoch": 16.835189309576837, + "grad_norm": 14.747753143310547, + "learning_rate": 1e-06, + "loss": 0.3194, + "num_input_tokens_seen": 423554452, + "step": 7559 + }, + { + "epoch": 16.835189309576837, + "loss": 0.41189420223236084, + "loss_ce": 9.000718273455277e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.005218505859375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 423554452, + "step": 7559 + }, + { + "epoch": 16.83741648106904, + "grad_norm": 13.279129981994629, + "learning_rate": 1e-06, + "loss": 0.3928, + "num_input_tokens_seen": 423612016, + "step": 7560 + }, + { + "epoch": 16.83741648106904, + "loss": 0.32673531770706177, + "loss_ce": 7.517023186665028e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.01025390625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 423612016, + "step": 7560 + }, + { + "epoch": 16.839643652561247, + "grad_norm": 15.924846649169922, + "learning_rate": 1e-06, + "loss": 0.392, + "num_input_tokens_seen": 423668608, + "step": 7561 + }, + { + "epoch": 16.839643652561247, + "loss": 0.2862039804458618, + "loss_ce": 7.118898065527901e-05, + "loss_iou": 0.12353515625, + "loss_num": 0.00775146484375, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 423668608, + "step": 7561 + }, + { + "epoch": 16.84187082405345, + "grad_norm": 19.908687591552734, + "learning_rate": 1e-06, + "loss": 0.5825, + "num_input_tokens_seen": 423725484, + "step": 7562 + }, + { + "epoch": 16.84187082405345, + "loss": 0.5031726360321045, + "loss_ce": 0.00012085959315299988, + "loss_iou": 0.2236328125, + "loss_num": 0.01116943359375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 423725484, + "step": 7562 + }, + { + "epoch": 16.844097995545656, + "grad_norm": 25.447425842285156, + "learning_rate": 1e-06, + "loss": 0.3432, + "num_input_tokens_seen": 423778384, + "step": 7563 + }, + { + "epoch": 16.844097995545656, + "loss": 0.3695857524871826, + "loss_ce": 7.891730638220906e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.01019287109375, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 423778384, + "step": 7563 + }, + { + "epoch": 16.84632516703786, + "grad_norm": 13.935490608215332, + "learning_rate": 1e-06, + "loss": 0.4838, + "num_input_tokens_seen": 423834376, + "step": 7564 + }, + { + "epoch": 16.84632516703786, + "loss": 0.4895142912864685, + "loss_ce": 0.00013441329065244645, + "loss_iou": 0.1845703125, + "loss_num": 0.024169921875, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 423834376, + "step": 7564 + }, + { + "epoch": 16.848552338530066, + "grad_norm": 16.72061538696289, + "learning_rate": 1e-06, + "loss": 0.3445, + "num_input_tokens_seen": 423890956, + "step": 7565 + }, + { + "epoch": 16.848552338530066, + "loss": 0.4084116220474243, + "loss_ce": 0.00014747484237886965, + "loss_iou": 0.1826171875, + "loss_num": 0.00848388671875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 423890956, + "step": 7565 + }, + { + "epoch": 16.85077951002227, + "grad_norm": 26.282730102539062, + "learning_rate": 1e-06, + "loss": 0.519, + "num_input_tokens_seen": 423947064, + "step": 7566 + }, + { + "epoch": 16.85077951002227, + "loss": 0.6066428422927856, + "loss_ce": 7.544091204181314e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0196533203125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 423947064, + "step": 7566 + }, + { + "epoch": 16.853006681514476, + "grad_norm": 15.286888122558594, + "learning_rate": 1e-06, + "loss": 0.4031, + "num_input_tokens_seen": 424005024, + "step": 7567 + }, + { + "epoch": 16.853006681514476, + "loss": 0.29945024847984314, + "loss_ce": 7.280143472598866e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.007171630859375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 424005024, + "step": 7567 + }, + { + "epoch": 16.85523385300668, + "grad_norm": 22.391944885253906, + "learning_rate": 1e-06, + "loss": 0.3159, + "num_input_tokens_seen": 424061520, + "step": 7568 + }, + { + "epoch": 16.85523385300668, + "loss": 0.3320120573043823, + "loss_ce": 0.00010290154023095965, + "loss_iou": 0.15234375, + "loss_num": 0.005401611328125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 424061520, + "step": 7568 + }, + { + "epoch": 16.857461024498885, + "grad_norm": 28.07231903076172, + "learning_rate": 1e-06, + "loss": 0.512, + "num_input_tokens_seen": 424115184, + "step": 7569 + }, + { + "epoch": 16.857461024498885, + "loss": 0.4680614471435547, + "loss_ce": 0.00010489917622180656, + "loss_iou": 0.212890625, + "loss_num": 0.00860595703125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 424115184, + "step": 7569 + }, + { + "epoch": 16.85968819599109, + "grad_norm": 23.56254768371582, + "learning_rate": 1e-06, + "loss": 0.3468, + "num_input_tokens_seen": 424169644, + "step": 7570 + }, + { + "epoch": 16.85968819599109, + "loss": 0.41865983605384827, + "loss_ce": 8.075643563643098e-05, + "loss_iou": 0.1953125, + "loss_num": 0.0054931640625, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 424169644, + "step": 7570 + }, + { + "epoch": 16.861915367483295, + "grad_norm": 25.315942764282227, + "learning_rate": 1e-06, + "loss": 0.4083, + "num_input_tokens_seen": 424227580, + "step": 7571 + }, + { + "epoch": 16.861915367483295, + "loss": 0.37649667263031006, + "loss_ce": 9.28444933379069e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00872802734375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 424227580, + "step": 7571 + }, + { + "epoch": 16.8641425389755, + "grad_norm": 15.719208717346191, + "learning_rate": 1e-06, + "loss": 0.4909, + "num_input_tokens_seen": 424283408, + "step": 7572 + }, + { + "epoch": 16.8641425389755, + "loss": 0.4299257695674896, + "loss_ce": 0.00011618290591286495, + "loss_iou": 0.185546875, + "loss_num": 0.01165771484375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 424283408, + "step": 7572 + }, + { + "epoch": 16.866369710467705, + "grad_norm": 28.304363250732422, + "learning_rate": 1e-06, + "loss": 0.2948, + "num_input_tokens_seen": 424338840, + "step": 7573 + }, + { + "epoch": 16.866369710467705, + "loss": 0.3035458028316498, + "loss_ce": 7.902114157332107e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.0078125, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 424338840, + "step": 7573 + }, + { + "epoch": 16.86859688195991, + "grad_norm": 13.032916069030762, + "learning_rate": 1e-06, + "loss": 0.5414, + "num_input_tokens_seen": 424394268, + "step": 7574 + }, + { + "epoch": 16.86859688195991, + "loss": 0.5727271437644958, + "loss_ce": 9.532412514090538e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.0196533203125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 424394268, + "step": 7574 + }, + { + "epoch": 16.870824053452115, + "grad_norm": 16.96046257019043, + "learning_rate": 1e-06, + "loss": 0.4261, + "num_input_tokens_seen": 424453868, + "step": 7575 + }, + { + "epoch": 16.870824053452115, + "loss": 0.38206472992897034, + "loss_ce": 0.0001067298071575351, + "loss_iou": 0.173828125, + "loss_num": 0.007080078125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 424453868, + "step": 7575 + }, + { + "epoch": 16.87305122494432, + "grad_norm": 23.169174194335938, + "learning_rate": 1e-06, + "loss": 0.3393, + "num_input_tokens_seen": 424510884, + "step": 7576 + }, + { + "epoch": 16.87305122494432, + "loss": 0.33035528659820557, + "loss_ce": 9.405257151229307e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.00927734375, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 424510884, + "step": 7576 + }, + { + "epoch": 16.875278396436524, + "grad_norm": 20.488079071044922, + "learning_rate": 1e-06, + "loss": 0.3433, + "num_input_tokens_seen": 424566384, + "step": 7577 + }, + { + "epoch": 16.875278396436524, + "loss": 0.32358628511428833, + "loss_ce": 9.996739390771836e-05, + "loss_iou": 0.142578125, + "loss_num": 0.00750732421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 424566384, + "step": 7577 + }, + { + "epoch": 16.87750556792873, + "grad_norm": 13.715553283691406, + "learning_rate": 1e-06, + "loss": 0.3461, + "num_input_tokens_seen": 424623228, + "step": 7578 + }, + { + "epoch": 16.87750556792873, + "loss": 0.3440733253955841, + "loss_ce": 7.916930189821869e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.0107421875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 424623228, + "step": 7578 + }, + { + "epoch": 16.879732739420934, + "grad_norm": 16.320846557617188, + "learning_rate": 1e-06, + "loss": 0.5924, + "num_input_tokens_seen": 424682444, + "step": 7579 + }, + { + "epoch": 16.879732739420934, + "loss": 0.6377947330474854, + "loss_ce": 9.944755584001541e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0181884765625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 424682444, + "step": 7579 + }, + { + "epoch": 16.88195991091314, + "grad_norm": 18.41837501525879, + "learning_rate": 1e-06, + "loss": 0.6497, + "num_input_tokens_seen": 424740464, + "step": 7580 + }, + { + "epoch": 16.88195991091314, + "loss": 0.6376847624778748, + "loss_ce": 0.00011151684884680435, + "loss_iou": 0.26953125, + "loss_num": 0.0196533203125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 424740464, + "step": 7580 + }, + { + "epoch": 16.884187082405344, + "grad_norm": 16.372474670410156, + "learning_rate": 1e-06, + "loss": 0.3357, + "num_input_tokens_seen": 424794412, + "step": 7581 + }, + { + "epoch": 16.884187082405344, + "loss": 0.28870633244514465, + "loss_ce": 7.107005512807518e-05, + "loss_iou": 0.1181640625, + "loss_num": 0.01043701171875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 424794412, + "step": 7581 + }, + { + "epoch": 16.88641425389755, + "grad_norm": 23.682388305664062, + "learning_rate": 1e-06, + "loss": 0.4056, + "num_input_tokens_seen": 424852012, + "step": 7582 + }, + { + "epoch": 16.88641425389755, + "loss": 0.34498876333236694, + "loss_ce": 7.910602289484814e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0079345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 424852012, + "step": 7582 + }, + { + "epoch": 16.888641425389753, + "grad_norm": 22.36638069152832, + "learning_rate": 1e-06, + "loss": 0.4973, + "num_input_tokens_seen": 424903972, + "step": 7583 + }, + { + "epoch": 16.888641425389753, + "loss": 0.41586142778396606, + "loss_ce": 8.992112998384982e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.008544921875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 424903972, + "step": 7583 + }, + { + "epoch": 16.89086859688196, + "grad_norm": 28.00014305114746, + "learning_rate": 1e-06, + "loss": 0.5041, + "num_input_tokens_seen": 424960808, + "step": 7584 + }, + { + "epoch": 16.89086859688196, + "loss": 0.5798003077507019, + "loss_ce": 8.836777124088258e-05, + "loss_iou": 0.251953125, + "loss_num": 0.0155029296875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 424960808, + "step": 7584 + }, + { + "epoch": 16.893095768374163, + "grad_norm": 18.74024200439453, + "learning_rate": 1e-06, + "loss": 0.4247, + "num_input_tokens_seen": 425018468, + "step": 7585 + }, + { + "epoch": 16.893095768374163, + "loss": 0.477266788482666, + "loss_ce": 9.394106746185571e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.01043701171875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 425018468, + "step": 7585 + }, + { + "epoch": 16.895322939866368, + "grad_norm": 20.79447364807129, + "learning_rate": 1e-06, + "loss": 0.4927, + "num_input_tokens_seen": 425074148, + "step": 7586 + }, + { + "epoch": 16.895322939866368, + "loss": 0.7077399492263794, + "loss_ce": 9.840876737143844e-05, + "loss_iou": 0.279296875, + "loss_num": 0.029541015625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 425074148, + "step": 7586 + }, + { + "epoch": 16.897550111358576, + "grad_norm": 23.163110733032227, + "learning_rate": 1e-06, + "loss": 0.3244, + "num_input_tokens_seen": 425129428, + "step": 7587 + }, + { + "epoch": 16.897550111358576, + "loss": 0.3205108642578125, + "loss_ce": 7.629128958797082e-05, + "loss_iou": 0.142578125, + "loss_num": 0.007049560546875, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 425129428, + "step": 7587 + }, + { + "epoch": 16.899777282850778, + "grad_norm": 23.743364334106445, + "learning_rate": 1e-06, + "loss": 0.4087, + "num_input_tokens_seen": 425184424, + "step": 7588 + }, + { + "epoch": 16.899777282850778, + "loss": 0.5007613897323608, + "loss_ce": 0.00015102376346476376, + "loss_iou": 0.2255859375, + "loss_num": 0.00994873046875, + "loss_xval": 0.5, + "num_input_tokens_seen": 425184424, + "step": 7588 + }, + { + "epoch": 16.902004454342986, + "grad_norm": 22.105934143066406, + "learning_rate": 1e-06, + "loss": 0.3156, + "num_input_tokens_seen": 425241216, + "step": 7589 + }, + { + "epoch": 16.902004454342986, + "loss": 0.30233582854270935, + "loss_ce": 8.974589582066983e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.00604248046875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 425241216, + "step": 7589 + }, + { + "epoch": 16.90423162583519, + "grad_norm": 26.043792724609375, + "learning_rate": 1e-06, + "loss": 0.6462, + "num_input_tokens_seen": 425294740, + "step": 7590 + }, + { + "epoch": 16.90423162583519, + "loss": 0.3308909833431244, + "loss_ce": 8.044774585869163e-05, + "loss_iou": 0.140625, + "loss_num": 0.00982666015625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 425294740, + "step": 7590 + }, + { + "epoch": 16.906458797327396, + "grad_norm": 25.259113311767578, + "learning_rate": 1e-06, + "loss": 0.4507, + "num_input_tokens_seen": 425352476, + "step": 7591 + }, + { + "epoch": 16.906458797327396, + "loss": 0.44027650356292725, + "loss_ce": 9.096147550735623e-05, + "loss_iou": 0.201171875, + "loss_num": 0.007568359375, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 425352476, + "step": 7591 + }, + { + "epoch": 16.9086859688196, + "grad_norm": 18.93207359313965, + "learning_rate": 1e-06, + "loss": 0.4337, + "num_input_tokens_seen": 425410420, + "step": 7592 + }, + { + "epoch": 16.9086859688196, + "loss": 0.556984543800354, + "loss_ce": 9.982170013245195e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.0242919921875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 425410420, + "step": 7592 + }, + { + "epoch": 16.910913140311806, + "grad_norm": 17.402809143066406, + "learning_rate": 1e-06, + "loss": 0.3179, + "num_input_tokens_seen": 425468280, + "step": 7593 + }, + { + "epoch": 16.910913140311806, + "loss": 0.3924216330051422, + "loss_ce": 8.76395424711518e-05, + "loss_iou": 0.177734375, + "loss_num": 0.007568359375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 425468280, + "step": 7593 + }, + { + "epoch": 16.91314031180401, + "grad_norm": 14.503034591674805, + "learning_rate": 1e-06, + "loss": 0.6439, + "num_input_tokens_seen": 425525096, + "step": 7594 + }, + { + "epoch": 16.91314031180401, + "loss": 0.9615048170089722, + "loss_ce": 0.0005673256237059832, + "loss_iou": 0.365234375, + "loss_num": 0.04638671875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 425525096, + "step": 7594 + }, + { + "epoch": 16.915367483296215, + "grad_norm": 21.804555892944336, + "learning_rate": 1e-06, + "loss": 0.4446, + "num_input_tokens_seen": 425579188, + "step": 7595 + }, + { + "epoch": 16.915367483296215, + "loss": 0.41501274704933167, + "loss_ce": 9.576005686540157e-05, + "loss_iou": 0.19140625, + "loss_num": 0.00616455078125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 425579188, + "step": 7595 + }, + { + "epoch": 16.91759465478842, + "grad_norm": 17.96055793762207, + "learning_rate": 1e-06, + "loss": 0.435, + "num_input_tokens_seen": 425635356, + "step": 7596 + }, + { + "epoch": 16.91759465478842, + "loss": 0.44575822353363037, + "loss_ce": 7.95404048403725e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.0096435546875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 425635356, + "step": 7596 + }, + { + "epoch": 16.919821826280625, + "grad_norm": 22.727903366088867, + "learning_rate": 1e-06, + "loss": 0.4284, + "num_input_tokens_seen": 425691988, + "step": 7597 + }, + { + "epoch": 16.919821826280625, + "loss": 0.4949197769165039, + "loss_ce": 0.0004129420267418027, + "loss_iou": 0.2197265625, + "loss_num": 0.01092529296875, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 425691988, + "step": 7597 + }, + { + "epoch": 16.92204899777283, + "grad_norm": 19.787315368652344, + "learning_rate": 1e-06, + "loss": 0.3273, + "num_input_tokens_seen": 425749968, + "step": 7598 + }, + { + "epoch": 16.92204899777283, + "loss": 0.2782631516456604, + "loss_ce": 6.490422674687579e-05, + "loss_iou": 0.1123046875, + "loss_num": 0.0107421875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 425749968, + "step": 7598 + }, + { + "epoch": 16.924276169265035, + "grad_norm": 19.99041748046875, + "learning_rate": 1e-06, + "loss": 0.5476, + "num_input_tokens_seen": 425805264, + "step": 7599 + }, + { + "epoch": 16.924276169265035, + "loss": 0.7404612302780151, + "loss_ce": 0.00010478113836143166, + "loss_iou": 0.30078125, + "loss_num": 0.02783203125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 425805264, + "step": 7599 + }, + { + "epoch": 16.92650334075724, + "grad_norm": 20.051441192626953, + "learning_rate": 1e-06, + "loss": 0.3816, + "num_input_tokens_seen": 425860344, + "step": 7600 + }, + { + "epoch": 16.92650334075724, + "loss": 0.4353218674659729, + "loss_ce": 8.015791536308825e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0084228515625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 425860344, + "step": 7600 + }, + { + "epoch": 16.928730512249444, + "grad_norm": 19.339773178100586, + "learning_rate": 1e-06, + "loss": 0.5078, + "num_input_tokens_seen": 425916088, + "step": 7601 + }, + { + "epoch": 16.928730512249444, + "loss": 0.4659002423286438, + "loss_ce": 7.990589074324816e-05, + "loss_iou": 0.21484375, + "loss_num": 0.007171630859375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 425916088, + "step": 7601 + }, + { + "epoch": 16.93095768374165, + "grad_norm": 11.72046947479248, + "learning_rate": 1e-06, + "loss": 0.271, + "num_input_tokens_seen": 425971676, + "step": 7602 + }, + { + "epoch": 16.93095768374165, + "loss": 0.3369971513748169, + "loss_ce": 8.307769894599915e-05, + "loss_iou": 0.142578125, + "loss_num": 0.010498046875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 425971676, + "step": 7602 + }, + { + "epoch": 16.933184855233854, + "grad_norm": 26.19060516357422, + "learning_rate": 1e-06, + "loss": 0.43, + "num_input_tokens_seen": 426026612, + "step": 7603 + }, + { + "epoch": 16.933184855233854, + "loss": 0.33956053853034973, + "loss_ce": 8.299553883261979e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.01123046875, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 426026612, + "step": 7603 + }, + { + "epoch": 16.93541202672606, + "grad_norm": 36.85425567626953, + "learning_rate": 1e-06, + "loss": 0.3744, + "num_input_tokens_seen": 426084004, + "step": 7604 + }, + { + "epoch": 16.93541202672606, + "loss": 0.4844852089881897, + "loss_ce": 0.0004153858171775937, + "loss_iou": 0.21484375, + "loss_num": 0.01116943359375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 426084004, + "step": 7604 + }, + { + "epoch": 16.937639198218264, + "grad_norm": 22.670175552368164, + "learning_rate": 1e-06, + "loss": 0.3386, + "num_input_tokens_seen": 426139888, + "step": 7605 + }, + { + "epoch": 16.937639198218264, + "loss": 0.3556758165359497, + "loss_ce": 8.5016421508044e-05, + "loss_iou": 0.162109375, + "loss_num": 0.006103515625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 426139888, + "step": 7605 + }, + { + "epoch": 16.93986636971047, + "grad_norm": 18.98232650756836, + "learning_rate": 1e-06, + "loss": 0.4427, + "num_input_tokens_seen": 426196256, + "step": 7606 + }, + { + "epoch": 16.93986636971047, + "loss": 0.4649594724178314, + "loss_ce": 0.00011571809591259807, + "loss_iou": 0.205078125, + "loss_num": 0.01104736328125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 426196256, + "step": 7606 + }, + { + "epoch": 16.942093541202674, + "grad_norm": 774.5324096679688, + "learning_rate": 1e-06, + "loss": 0.3919, + "num_input_tokens_seen": 426253332, + "step": 7607 + }, + { + "epoch": 16.942093541202674, + "loss": 0.3354228436946869, + "loss_ce": 9.569604299031198e-05, + "loss_iou": 0.12890625, + "loss_num": 0.015625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 426253332, + "step": 7607 + }, + { + "epoch": 16.94432071269488, + "grad_norm": 53.58465576171875, + "learning_rate": 1e-06, + "loss": 0.4068, + "num_input_tokens_seen": 426310116, + "step": 7608 + }, + { + "epoch": 16.94432071269488, + "loss": 0.2681652903556824, + "loss_ce": 9.887861961033195e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.006988525390625, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 426310116, + "step": 7608 + }, + { + "epoch": 16.946547884187083, + "grad_norm": 19.653953552246094, + "learning_rate": 1e-06, + "loss": 0.3449, + "num_input_tokens_seen": 426367920, + "step": 7609 + }, + { + "epoch": 16.946547884187083, + "loss": 0.2864606976509094, + "loss_ce": 0.0001142588589573279, + "loss_iou": 0.126953125, + "loss_num": 0.00640869140625, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 426367920, + "step": 7609 + }, + { + "epoch": 16.948775055679288, + "grad_norm": 15.588455200195312, + "learning_rate": 1e-06, + "loss": 0.5204, + "num_input_tokens_seen": 426423652, + "step": 7610 + }, + { + "epoch": 16.948775055679288, + "loss": 0.5305978655815125, + "loss_ce": 8.031211473280564e-05, + "loss_iou": 0.240234375, + "loss_num": 0.01019287109375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 426423652, + "step": 7610 + }, + { + "epoch": 16.951002227171493, + "grad_norm": 14.135087013244629, + "learning_rate": 1e-06, + "loss": 0.3808, + "num_input_tokens_seen": 426480624, + "step": 7611 + }, + { + "epoch": 16.951002227171493, + "loss": 0.4105345606803894, + "loss_ce": 7.310426735784858e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.01123046875, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 426480624, + "step": 7611 + }, + { + "epoch": 16.953229398663698, + "grad_norm": 14.242335319519043, + "learning_rate": 1e-06, + "loss": 0.3675, + "num_input_tokens_seen": 426537756, + "step": 7612 + }, + { + "epoch": 16.953229398663698, + "loss": 0.37502074241638184, + "loss_ce": 8.180327131412923e-05, + "loss_iou": 0.171875, + "loss_num": 0.00628662109375, + "loss_xval": 0.375, + "num_input_tokens_seen": 426537756, + "step": 7612 + }, + { + "epoch": 16.955456570155903, + "grad_norm": 17.119190216064453, + "learning_rate": 1e-06, + "loss": 0.3499, + "num_input_tokens_seen": 426592748, + "step": 7613 + }, + { + "epoch": 16.955456570155903, + "loss": 0.3910788893699646, + "loss_ce": 8.76820704434067e-05, + "loss_iou": 0.158203125, + "loss_num": 0.0146484375, + "loss_xval": 0.390625, + "num_input_tokens_seen": 426592748, + "step": 7613 + }, + { + "epoch": 16.957683741648108, + "grad_norm": 19.266357421875, + "learning_rate": 1e-06, + "loss": 0.3833, + "num_input_tokens_seen": 426648636, + "step": 7614 + }, + { + "epoch": 16.957683741648108, + "loss": 0.27193689346313477, + "loss_ce": 8.629226795164868e-05, + "loss_iou": 0.12353515625, + "loss_num": 0.004852294921875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 426648636, + "step": 7614 + }, + { + "epoch": 16.959910913140313, + "grad_norm": 14.205963134765625, + "learning_rate": 1e-06, + "loss": 0.3386, + "num_input_tokens_seen": 426706392, + "step": 7615 + }, + { + "epoch": 16.959910913140313, + "loss": 0.3684968948364258, + "loss_ce": 8.871590398484841e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00726318359375, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 426706392, + "step": 7615 + }, + { + "epoch": 16.962138084632517, + "grad_norm": 31.090105056762695, + "learning_rate": 1e-06, + "loss": 0.4142, + "num_input_tokens_seen": 426762144, + "step": 7616 + }, + { + "epoch": 16.962138084632517, + "loss": 0.2914935052394867, + "loss_ce": 8.114362572086975e-05, + "loss_iou": 0.11962890625, + "loss_num": 0.0106201171875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 426762144, + "step": 7616 + }, + { + "epoch": 16.964365256124722, + "grad_norm": 16.874919891357422, + "learning_rate": 1e-06, + "loss": 0.3053, + "num_input_tokens_seen": 426820804, + "step": 7617 + }, + { + "epoch": 16.964365256124722, + "loss": 0.3068510890007019, + "loss_ce": 8.837327914079651e-05, + "loss_iou": 0.140625, + "loss_num": 0.00506591796875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 426820804, + "step": 7617 + }, + { + "epoch": 16.966592427616927, + "grad_norm": 21.651851654052734, + "learning_rate": 1e-06, + "loss": 0.6437, + "num_input_tokens_seen": 426878344, + "step": 7618 + }, + { + "epoch": 16.966592427616927, + "loss": 0.6224051713943481, + "loss_ce": 9.070623491425067e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0164794921875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 426878344, + "step": 7618 + }, + { + "epoch": 16.968819599109132, + "grad_norm": 15.368985176086426, + "learning_rate": 1e-06, + "loss": 0.4305, + "num_input_tokens_seen": 426936372, + "step": 7619 + }, + { + "epoch": 16.968819599109132, + "loss": 0.4007796049118042, + "loss_ce": 8.378856000490487e-05, + "loss_iou": 0.162109375, + "loss_num": 0.01544189453125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 426936372, + "step": 7619 + }, + { + "epoch": 16.971046770601337, + "grad_norm": 16.223318099975586, + "learning_rate": 1e-06, + "loss": 0.2849, + "num_input_tokens_seen": 426990840, + "step": 7620 + }, + { + "epoch": 16.971046770601337, + "loss": 0.3703581690788269, + "loss_ce": 0.00017992404173128307, + "loss_iou": 0.16796875, + "loss_num": 0.0069580078125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 426990840, + "step": 7620 + }, + { + "epoch": 16.97327394209354, + "grad_norm": 42.12100601196289, + "learning_rate": 1e-06, + "loss": 0.4774, + "num_input_tokens_seen": 427049316, + "step": 7621 + }, + { + "epoch": 16.97327394209354, + "loss": 0.4062255620956421, + "loss_ce": 9.765510185388848e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.010498046875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 427049316, + "step": 7621 + }, + { + "epoch": 16.975501113585747, + "grad_norm": 15.246018409729004, + "learning_rate": 1e-06, + "loss": 0.4491, + "num_input_tokens_seen": 427105040, + "step": 7622 + }, + { + "epoch": 16.975501113585747, + "loss": 0.4461430311203003, + "loss_ce": 9.8112752311863e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01007080078125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 427105040, + "step": 7622 + }, + { + "epoch": 16.97772828507795, + "grad_norm": 74.06937408447266, + "learning_rate": 1e-06, + "loss": 0.5622, + "num_input_tokens_seen": 427159736, + "step": 7623 + }, + { + "epoch": 16.97772828507795, + "loss": 0.5899186730384827, + "loss_ce": 7.488045957870781e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0140380859375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 427159736, + "step": 7623 + }, + { + "epoch": 16.979955456570156, + "grad_norm": 17.075176239013672, + "learning_rate": 1e-06, + "loss": 0.4932, + "num_input_tokens_seen": 427216528, + "step": 7624 + }, + { + "epoch": 16.979955456570156, + "loss": 0.5001233816146851, + "loss_ce": 0.00012338865781202912, + "loss_iou": 0.234375, + "loss_num": 0.00616455078125, + "loss_xval": 0.5, + "num_input_tokens_seen": 427216528, + "step": 7624 + }, + { + "epoch": 16.98218262806236, + "grad_norm": 12.647735595703125, + "learning_rate": 1e-06, + "loss": 0.3257, + "num_input_tokens_seen": 427274456, + "step": 7625 + }, + { + "epoch": 16.98218262806236, + "loss": 0.2577294707298279, + "loss_ce": 0.00010009224934037775, + "loss_iou": 0.1142578125, + "loss_num": 0.005889892578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 427274456, + "step": 7625 + }, + { + "epoch": 16.984409799554566, + "grad_norm": 14.19373607635498, + "learning_rate": 1e-06, + "loss": 0.5021, + "num_input_tokens_seen": 427328280, + "step": 7626 + }, + { + "epoch": 16.984409799554566, + "loss": 0.5511077642440796, + "loss_ce": 8.234484994318336e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.0174560546875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 427328280, + "step": 7626 + }, + { + "epoch": 16.98663697104677, + "grad_norm": 14.599117279052734, + "learning_rate": 1e-06, + "loss": 0.4076, + "num_input_tokens_seen": 427384024, + "step": 7627 + }, + { + "epoch": 16.98663697104677, + "loss": 0.40987348556518555, + "loss_ce": 8.344671368831769e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.00811767578125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 427384024, + "step": 7627 + }, + { + "epoch": 16.988864142538976, + "grad_norm": 34.7947998046875, + "learning_rate": 1e-06, + "loss": 0.391, + "num_input_tokens_seen": 427439656, + "step": 7628 + }, + { + "epoch": 16.988864142538976, + "loss": 0.4776158928871155, + "loss_ce": 7.684988668188453e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.00994873046875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 427439656, + "step": 7628 + }, + { + "epoch": 16.99109131403118, + "grad_norm": 14.100838661193848, + "learning_rate": 1e-06, + "loss": 0.3771, + "num_input_tokens_seen": 427495292, + "step": 7629 + }, + { + "epoch": 16.99109131403118, + "loss": 0.41444265842437744, + "loss_ce": 0.000105498475022614, + "loss_iou": 0.1845703125, + "loss_num": 0.0089111328125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 427495292, + "step": 7629 + }, + { + "epoch": 16.993318485523385, + "grad_norm": 25.862396240234375, + "learning_rate": 1e-06, + "loss": 0.3754, + "num_input_tokens_seen": 427552112, + "step": 7630 + }, + { + "epoch": 16.993318485523385, + "loss": 0.4190499782562256, + "loss_ce": 0.00010467211541254073, + "loss_iou": 0.1884765625, + "loss_num": 0.00848388671875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 427552112, + "step": 7630 + }, + { + "epoch": 16.99554565701559, + "grad_norm": 16.033910751342773, + "learning_rate": 1e-06, + "loss": 0.2646, + "num_input_tokens_seen": 427609520, + "step": 7631 + }, + { + "epoch": 16.99554565701559, + "loss": 0.28566277027130127, + "loss_ce": 7.926442776806653e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.007354736328125, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 427609520, + "step": 7631 + }, + { + "epoch": 16.997772828507795, + "grad_norm": 20.488983154296875, + "learning_rate": 1e-06, + "loss": 0.4234, + "num_input_tokens_seen": 427661616, + "step": 7632 + }, + { + "epoch": 16.997772828507795, + "loss": 0.40573614835739136, + "loss_ce": 9.650552237872034e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.021484375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 427661616, + "step": 7632 + }, + { + "epoch": 17.0, + "grad_norm": 23.362516403198242, + "learning_rate": 1e-06, + "loss": 0.3575, + "num_input_tokens_seen": 427716580, + "step": 7633 + }, + { + "epoch": 17.0, + "loss": 0.32436951994895935, + "loss_ce": 8.972680370789021e-05, + "loss_iou": 0.1484375, + "loss_num": 0.0052490234375, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 427716580, + "step": 7633 + }, + { + "epoch": 17.002227171492205, + "grad_norm": 17.92319107055664, + "learning_rate": 1e-06, + "loss": 0.4558, + "num_input_tokens_seen": 427773004, + "step": 7634 + }, + { + "epoch": 17.002227171492205, + "loss": 0.4817723333835602, + "loss_ce": 8.288519165944308e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.012939453125, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 427773004, + "step": 7634 + }, + { + "epoch": 17.00445434298441, + "grad_norm": 26.28542709350586, + "learning_rate": 1e-06, + "loss": 0.4383, + "num_input_tokens_seen": 427828620, + "step": 7635 + }, + { + "epoch": 17.00445434298441, + "loss": 0.4154224097728729, + "loss_ce": 7.814820855855942e-05, + "loss_iou": 0.185546875, + "loss_num": 0.00885009765625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 427828620, + "step": 7635 + }, + { + "epoch": 17.006681514476615, + "grad_norm": 18.24226188659668, + "learning_rate": 1e-06, + "loss": 0.3023, + "num_input_tokens_seen": 427885192, + "step": 7636 + }, + { + "epoch": 17.006681514476615, + "loss": 0.2849215865135193, + "loss_ce": 7.052010187180713e-05, + "loss_iou": 0.130859375, + "loss_num": 0.0045166015625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 427885192, + "step": 7636 + }, + { + "epoch": 17.00890868596882, + "grad_norm": 30.52312660217285, + "learning_rate": 1e-06, + "loss": 0.4783, + "num_input_tokens_seen": 427939364, + "step": 7637 + }, + { + "epoch": 17.00890868596882, + "loss": 0.46554726362228394, + "loss_ce": 9.315512579632923e-05, + "loss_iou": 0.19921875, + "loss_num": 0.01348876953125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 427939364, + "step": 7637 + }, + { + "epoch": 17.011135857461024, + "grad_norm": 66.4842300415039, + "learning_rate": 1e-06, + "loss": 0.3289, + "num_input_tokens_seen": 427998324, + "step": 7638 + }, + { + "epoch": 17.011135857461024, + "loss": 0.23030278086662292, + "loss_ce": 7.816310971975327e-05, + "loss_iou": 0.1044921875, + "loss_num": 0.00433349609375, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 427998324, + "step": 7638 + }, + { + "epoch": 17.01336302895323, + "grad_norm": 20.621551513671875, + "learning_rate": 1e-06, + "loss": 0.4443, + "num_input_tokens_seen": 428052968, + "step": 7639 + }, + { + "epoch": 17.01336302895323, + "loss": 0.502518355846405, + "loss_ce": 7.69448815844953e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.022705078125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 428052968, + "step": 7639 + }, + { + "epoch": 17.015590200445434, + "grad_norm": 16.662851333618164, + "learning_rate": 1e-06, + "loss": 0.6814, + "num_input_tokens_seen": 428110384, + "step": 7640 + }, + { + "epoch": 17.015590200445434, + "loss": 0.931036114692688, + "loss_ce": 0.00015838468971196562, + "loss_iou": 0.349609375, + "loss_num": 0.04638671875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 428110384, + "step": 7640 + }, + { + "epoch": 17.01781737193764, + "grad_norm": 22.56868553161621, + "learning_rate": 1e-06, + "loss": 0.2991, + "num_input_tokens_seen": 428166924, + "step": 7641 + }, + { + "epoch": 17.01781737193764, + "loss": 0.22560018301010132, + "loss_ce": 7.529689173679799e-05, + "loss_iou": 0.09423828125, + "loss_num": 0.0074462890625, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 428166924, + "step": 7641 + }, + { + "epoch": 17.020044543429844, + "grad_norm": 19.790267944335938, + "learning_rate": 1e-06, + "loss": 0.4237, + "num_input_tokens_seen": 428223112, + "step": 7642 + }, + { + "epoch": 17.020044543429844, + "loss": 0.424101322889328, + "loss_ce": 0.00015111861284822226, + "loss_iou": 0.185546875, + "loss_num": 0.0107421875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 428223112, + "step": 7642 + }, + { + "epoch": 17.02227171492205, + "grad_norm": 22.081754684448242, + "learning_rate": 1e-06, + "loss": 0.5015, + "num_input_tokens_seen": 428279696, + "step": 7643 + }, + { + "epoch": 17.02227171492205, + "loss": 0.5504837036132812, + "loss_ce": 0.00019074320152867585, + "loss_iou": 0.2265625, + "loss_num": 0.0196533203125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 428279696, + "step": 7643 + }, + { + "epoch": 17.024498886414253, + "grad_norm": 12.306164741516113, + "learning_rate": 1e-06, + "loss": 0.4179, + "num_input_tokens_seen": 428335044, + "step": 7644 + }, + { + "epoch": 17.024498886414253, + "loss": 0.5076683163642883, + "loss_ce": 9.997590677812696e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.005889892578125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 428335044, + "step": 7644 + }, + { + "epoch": 17.02672605790646, + "grad_norm": 21.696956634521484, + "learning_rate": 1e-06, + "loss": 0.48, + "num_input_tokens_seen": 428390008, + "step": 7645 + }, + { + "epoch": 17.02672605790646, + "loss": 0.4708196818828583, + "loss_ce": 0.00011655631533358246, + "loss_iou": 0.1982421875, + "loss_num": 0.01470947265625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 428390008, + "step": 7645 + }, + { + "epoch": 17.028953229398663, + "grad_norm": 36.35521697998047, + "learning_rate": 1e-06, + "loss": 0.3621, + "num_input_tokens_seen": 428447080, + "step": 7646 + }, + { + "epoch": 17.028953229398663, + "loss": 0.2672528028488159, + "loss_ce": 0.00016677916573826224, + "loss_iou": 0.10546875, + "loss_num": 0.0111083984375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 428447080, + "step": 7646 + }, + { + "epoch": 17.031180400890868, + "grad_norm": 21.043344497680664, + "learning_rate": 1e-06, + "loss": 0.3144, + "num_input_tokens_seen": 428503564, + "step": 7647 + }, + { + "epoch": 17.031180400890868, + "loss": 0.36540961265563965, + "loss_ce": 8.367877308046445e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.003997802734375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 428503564, + "step": 7647 + }, + { + "epoch": 17.033407572383073, + "grad_norm": 24.031978607177734, + "learning_rate": 1e-06, + "loss": 0.334, + "num_input_tokens_seen": 428559008, + "step": 7648 + }, + { + "epoch": 17.033407572383073, + "loss": 0.2920740842819214, + "loss_ce": 8.189551590476185e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.00958251953125, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 428559008, + "step": 7648 + }, + { + "epoch": 17.035634743875278, + "grad_norm": 21.48931121826172, + "learning_rate": 1e-06, + "loss": 0.4046, + "num_input_tokens_seen": 428613808, + "step": 7649 + }, + { + "epoch": 17.035634743875278, + "loss": 0.21085509657859802, + "loss_ce": 8.544648881070316e-05, + "loss_iou": 0.0947265625, + "loss_num": 0.0042724609375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 428613808, + "step": 7649 + }, + { + "epoch": 17.037861915367483, + "grad_norm": 13.954534530639648, + "learning_rate": 1e-06, + "loss": 0.4201, + "num_input_tokens_seen": 428672352, + "step": 7650 + }, + { + "epoch": 17.037861915367483, + "loss": 0.25243866443634033, + "loss_ce": 8.880048699211329e-05, + "loss_iou": 0.10986328125, + "loss_num": 0.006561279296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 428672352, + "step": 7650 + }, + { + "epoch": 17.040089086859687, + "grad_norm": 16.805583953857422, + "learning_rate": 1e-06, + "loss": 0.4008, + "num_input_tokens_seen": 428726452, + "step": 7651 + }, + { + "epoch": 17.040089086859687, + "loss": 0.3927674889564514, + "loss_ce": 6.728603329975158e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.01318359375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 428726452, + "step": 7651 + }, + { + "epoch": 17.042316258351892, + "grad_norm": 22.515275955200195, + "learning_rate": 1e-06, + "loss": 0.3941, + "num_input_tokens_seen": 428783940, + "step": 7652 + }, + { + "epoch": 17.042316258351892, + "loss": 0.4546872675418854, + "loss_ce": 9.743101691128686e-05, + "loss_iou": 0.19921875, + "loss_num": 0.010986328125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 428783940, + "step": 7652 + }, + { + "epoch": 17.044543429844097, + "grad_norm": 23.46599769592285, + "learning_rate": 1e-06, + "loss": 0.3476, + "num_input_tokens_seen": 428839472, + "step": 7653 + }, + { + "epoch": 17.044543429844097, + "loss": 0.33765748143196106, + "loss_ce": 7.202455890364945e-05, + "loss_iou": 0.1328125, + "loss_num": 0.01458740234375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 428839472, + "step": 7653 + }, + { + "epoch": 17.046770601336302, + "grad_norm": 21.571443557739258, + "learning_rate": 1e-06, + "loss": 0.3301, + "num_input_tokens_seen": 428896740, + "step": 7654 + }, + { + "epoch": 17.046770601336302, + "loss": 0.3378799855709076, + "loss_ce": 0.00011143009760417044, + "loss_iou": 0.1474609375, + "loss_num": 0.00836181640625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 428896740, + "step": 7654 + }, + { + "epoch": 17.048997772828507, + "grad_norm": 20.132381439208984, + "learning_rate": 1e-06, + "loss": 0.2645, + "num_input_tokens_seen": 428951072, + "step": 7655 + }, + { + "epoch": 17.048997772828507, + "loss": 0.24837635457515717, + "loss_ce": 8.534367952961475e-05, + "loss_iou": 0.1103515625, + "loss_num": 0.00543212890625, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 428951072, + "step": 7655 + }, + { + "epoch": 17.051224944320712, + "grad_norm": 20.21780014038086, + "learning_rate": 1e-06, + "loss": 0.4544, + "num_input_tokens_seen": 429007504, + "step": 7656 + }, + { + "epoch": 17.051224944320712, + "loss": 0.4101518392562866, + "loss_ce": 0.00011766105308197439, + "loss_iou": 0.1904296875, + "loss_num": 0.005859375, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 429007504, + "step": 7656 + }, + { + "epoch": 17.053452115812917, + "grad_norm": 17.552833557128906, + "learning_rate": 1e-06, + "loss": 0.4551, + "num_input_tokens_seen": 429061384, + "step": 7657 + }, + { + "epoch": 17.053452115812917, + "loss": 0.42342638969421387, + "loss_ce": 8.654448902234435e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.0069580078125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 429061384, + "step": 7657 + }, + { + "epoch": 17.05567928730512, + "grad_norm": 16.712745666503906, + "learning_rate": 1e-06, + "loss": 0.4366, + "num_input_tokens_seen": 429117780, + "step": 7658 + }, + { + "epoch": 17.05567928730512, + "loss": 0.5296164155006409, + "loss_ce": 7.536636258009821e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0091552734375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 429117780, + "step": 7658 + }, + { + "epoch": 17.057906458797326, + "grad_norm": 17.775182723999023, + "learning_rate": 1e-06, + "loss": 0.4586, + "num_input_tokens_seen": 429174152, + "step": 7659 + }, + { + "epoch": 17.057906458797326, + "loss": 0.34088775515556335, + "loss_ce": 6.743887206539512e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00946044921875, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 429174152, + "step": 7659 + }, + { + "epoch": 17.06013363028953, + "grad_norm": 18.0156192779541, + "learning_rate": 1e-06, + "loss": 0.3511, + "num_input_tokens_seen": 429231128, + "step": 7660 + }, + { + "epoch": 17.06013363028953, + "loss": 0.3127681612968445, + "loss_ce": 8.506246376782656e-05, + "loss_iou": 0.130859375, + "loss_num": 0.01025390625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 429231128, + "step": 7660 + }, + { + "epoch": 17.062360801781736, + "grad_norm": 52.67565155029297, + "learning_rate": 1e-06, + "loss": 0.6399, + "num_input_tokens_seen": 429284628, + "step": 7661 + }, + { + "epoch": 17.062360801781736, + "loss": 0.8818075060844421, + "loss_ce": 9.362171840621158e-05, + "loss_iou": 0.35546875, + "loss_num": 0.03466796875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 429284628, + "step": 7661 + }, + { + "epoch": 17.06458797327394, + "grad_norm": 27.630023956298828, + "learning_rate": 1e-06, + "loss": 0.4078, + "num_input_tokens_seen": 429338128, + "step": 7662 + }, + { + "epoch": 17.06458797327394, + "loss": 0.40474581718444824, + "loss_ce": 8.270953549072146e-05, + "loss_iou": 0.1875, + "loss_num": 0.005828857421875, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 429338128, + "step": 7662 + }, + { + "epoch": 17.066815144766146, + "grad_norm": 22.74899673461914, + "learning_rate": 1e-06, + "loss": 0.5521, + "num_input_tokens_seen": 429390620, + "step": 7663 + }, + { + "epoch": 17.066815144766146, + "loss": 0.45908236503601074, + "loss_ce": 9.800391853787005e-05, + "loss_iou": 0.19921875, + "loss_num": 0.011962890625, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 429390620, + "step": 7663 + }, + { + "epoch": 17.06904231625835, + "grad_norm": 16.609214782714844, + "learning_rate": 1e-06, + "loss": 0.4099, + "num_input_tokens_seen": 429445244, + "step": 7664 + }, + { + "epoch": 17.06904231625835, + "loss": 0.3943771719932556, + "loss_ce": 9.003834566101432e-05, + "loss_iou": 0.162109375, + "loss_num": 0.01409912109375, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 429445244, + "step": 7664 + }, + { + "epoch": 17.071269487750556, + "grad_norm": 21.4134578704834, + "learning_rate": 1e-06, + "loss": 0.5788, + "num_input_tokens_seen": 429502620, + "step": 7665 + }, + { + "epoch": 17.071269487750556, + "loss": 0.5071187615394592, + "loss_ce": 9.971446706913412e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.0118408203125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 429502620, + "step": 7665 + }, + { + "epoch": 17.07349665924276, + "grad_norm": 18.723894119262695, + "learning_rate": 1e-06, + "loss": 0.4943, + "num_input_tokens_seen": 429562224, + "step": 7666 + }, + { + "epoch": 17.07349665924276, + "loss": 0.47562456130981445, + "loss_ce": 0.0001607102749403566, + "loss_iou": 0.2080078125, + "loss_num": 0.011962890625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 429562224, + "step": 7666 + }, + { + "epoch": 17.075723830734965, + "grad_norm": 19.977685928344727, + "learning_rate": 1e-06, + "loss": 0.3345, + "num_input_tokens_seen": 429620100, + "step": 7667 + }, + { + "epoch": 17.075723830734965, + "loss": 0.31252235174179077, + "loss_ce": 8.34053716971539e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00836181640625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 429620100, + "step": 7667 + }, + { + "epoch": 17.07795100222717, + "grad_norm": 19.599905014038086, + "learning_rate": 1e-06, + "loss": 0.4729, + "num_input_tokens_seen": 429677708, + "step": 7668 + }, + { + "epoch": 17.07795100222717, + "loss": 0.6072638034820557, + "loss_ce": 8.606135088484734e-05, + "loss_iou": 0.28125, + "loss_num": 0.0091552734375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 429677708, + "step": 7668 + }, + { + "epoch": 17.080178173719375, + "grad_norm": 14.583081245422363, + "learning_rate": 1e-06, + "loss": 0.4426, + "num_input_tokens_seen": 429735204, + "step": 7669 + }, + { + "epoch": 17.080178173719375, + "loss": 0.43759340047836304, + "loss_ce": 9.339496318716556e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.005096435546875, + "loss_xval": 0.4375, + "num_input_tokens_seen": 429735204, + "step": 7669 + }, + { + "epoch": 17.08240534521158, + "grad_norm": 34.837467193603516, + "learning_rate": 1e-06, + "loss": 0.4082, + "num_input_tokens_seen": 429789724, + "step": 7670 + }, + { + "epoch": 17.08240534521158, + "loss": 0.3650968670845032, + "loss_ce": 0.00010662678687367588, + "loss_iou": 0.1689453125, + "loss_num": 0.0054931640625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 429789724, + "step": 7670 + }, + { + "epoch": 17.084632516703785, + "grad_norm": 16.056995391845703, + "learning_rate": 1e-06, + "loss": 0.332, + "num_input_tokens_seen": 429846636, + "step": 7671 + }, + { + "epoch": 17.084632516703785, + "loss": 0.30904102325439453, + "loss_ce": 8.104251901386306e-05, + "loss_iou": 0.13671875, + "loss_num": 0.006988525390625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 429846636, + "step": 7671 + }, + { + "epoch": 17.08685968819599, + "grad_norm": 19.151451110839844, + "learning_rate": 1e-06, + "loss": 0.4416, + "num_input_tokens_seen": 429905284, + "step": 7672 + }, + { + "epoch": 17.08685968819599, + "loss": 0.47447669506073, + "loss_ce": 0.0001114873739425093, + "loss_iou": 0.224609375, + "loss_num": 0.00518798828125, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 429905284, + "step": 7672 + }, + { + "epoch": 17.089086859688194, + "grad_norm": 14.329705238342285, + "learning_rate": 1e-06, + "loss": 0.6205, + "num_input_tokens_seen": 429960820, + "step": 7673 + }, + { + "epoch": 17.089086859688194, + "loss": 0.6159265041351318, + "loss_ce": 8.180980512406677e-05, + "loss_iou": 0.255859375, + "loss_num": 0.020751953125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 429960820, + "step": 7673 + }, + { + "epoch": 17.0913140311804, + "grad_norm": 12.72126579284668, + "learning_rate": 1e-06, + "loss": 0.2597, + "num_input_tokens_seen": 430014656, + "step": 7674 + }, + { + "epoch": 17.0913140311804, + "loss": 0.32735052704811096, + "loss_ce": 8.001940295798704e-05, + "loss_iou": 0.123046875, + "loss_num": 0.0162353515625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 430014656, + "step": 7674 + }, + { + "epoch": 17.093541202672604, + "grad_norm": 15.150796890258789, + "learning_rate": 1e-06, + "loss": 0.414, + "num_input_tokens_seen": 430071736, + "step": 7675 + }, + { + "epoch": 17.093541202672604, + "loss": 0.316550612449646, + "loss_ce": 8.332452125614509e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00482177734375, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 430071736, + "step": 7675 + }, + { + "epoch": 17.09576837416481, + "grad_norm": 22.740732192993164, + "learning_rate": 1e-06, + "loss": 0.4941, + "num_input_tokens_seen": 430127552, + "step": 7676 + }, + { + "epoch": 17.09576837416481, + "loss": 0.688327968120575, + "loss_ce": 9.552988194627687e-05, + "loss_iou": 0.283203125, + "loss_num": 0.024169921875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 430127552, + "step": 7676 + }, + { + "epoch": 17.097995545657014, + "grad_norm": 15.583344459533691, + "learning_rate": 1e-06, + "loss": 0.3403, + "num_input_tokens_seen": 430183984, + "step": 7677 + }, + { + "epoch": 17.097995545657014, + "loss": 0.2791343033313751, + "loss_ce": 8.15772291389294e-05, + "loss_iou": 0.11865234375, + "loss_num": 0.00823974609375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 430183984, + "step": 7677 + }, + { + "epoch": 17.100222717149222, + "grad_norm": 15.863219261169434, + "learning_rate": 1e-06, + "loss": 0.506, + "num_input_tokens_seen": 430239088, + "step": 7678 + }, + { + "epoch": 17.100222717149222, + "loss": 0.4502248167991638, + "loss_ce": 9.05133638298139e-05, + "loss_iou": 0.197265625, + "loss_num": 0.010986328125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 430239088, + "step": 7678 + }, + { + "epoch": 17.102449888641427, + "grad_norm": 17.146371841430664, + "learning_rate": 1e-06, + "loss": 0.4628, + "num_input_tokens_seen": 430293988, + "step": 7679 + }, + { + "epoch": 17.102449888641427, + "loss": 0.5290303826332092, + "loss_ce": 9.971238614525646e-05, + "loss_iou": 0.20703125, + "loss_num": 0.02294921875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 430293988, + "step": 7679 + }, + { + "epoch": 17.104677060133632, + "grad_norm": 21.910921096801758, + "learning_rate": 1e-06, + "loss": 0.4983, + "num_input_tokens_seen": 430350332, + "step": 7680 + }, + { + "epoch": 17.104677060133632, + "loss": 0.520658552646637, + "loss_ce": 8.970967610366642e-05, + "loss_iou": 0.203125, + "loss_num": 0.023193359375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 430350332, + "step": 7680 + }, + { + "epoch": 17.106904231625837, + "grad_norm": 21.317134857177734, + "learning_rate": 1e-06, + "loss": 0.4361, + "num_input_tokens_seen": 430407436, + "step": 7681 + }, + { + "epoch": 17.106904231625837, + "loss": 0.5610302090644836, + "loss_ce": 0.00011710502440109849, + "loss_iou": 0.234375, + "loss_num": 0.018310546875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 430407436, + "step": 7681 + }, + { + "epoch": 17.10913140311804, + "grad_norm": 15.316725730895996, + "learning_rate": 1e-06, + "loss": 0.3235, + "num_input_tokens_seen": 430464132, + "step": 7682 + }, + { + "epoch": 17.10913140311804, + "loss": 0.3164796233177185, + "loss_ce": 7.337974238907918e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.0098876953125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 430464132, + "step": 7682 + }, + { + "epoch": 17.111358574610247, + "grad_norm": 17.729398727416992, + "learning_rate": 1e-06, + "loss": 0.6469, + "num_input_tokens_seen": 430518768, + "step": 7683 + }, + { + "epoch": 17.111358574610247, + "loss": 0.49617958068847656, + "loss_ce": 8.584219176555052e-05, + "loss_iou": 0.216796875, + "loss_num": 0.012451171875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 430518768, + "step": 7683 + }, + { + "epoch": 17.11358574610245, + "grad_norm": 25.321056365966797, + "learning_rate": 1e-06, + "loss": 0.3942, + "num_input_tokens_seen": 430574312, + "step": 7684 + }, + { + "epoch": 17.11358574610245, + "loss": 0.2743665277957916, + "loss_ce": 8.980404527392238e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.006866455078125, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 430574312, + "step": 7684 + }, + { + "epoch": 17.115812917594656, + "grad_norm": 33.18670654296875, + "learning_rate": 1e-06, + "loss": 0.4818, + "num_input_tokens_seen": 430629924, + "step": 7685 + }, + { + "epoch": 17.115812917594656, + "loss": 0.4744425117969513, + "loss_ce": 7.726340845692903e-05, + "loss_iou": 0.201171875, + "loss_num": 0.01434326171875, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 430629924, + "step": 7685 + }, + { + "epoch": 17.11804008908686, + "grad_norm": 18.0744571685791, + "learning_rate": 1e-06, + "loss": 0.4743, + "num_input_tokens_seen": 430684496, + "step": 7686 + }, + { + "epoch": 17.11804008908686, + "loss": 0.4138874113559723, + "loss_ce": 6.904240581206977e-05, + "loss_iou": 0.1875, + "loss_num": 0.007537841796875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 430684496, + "step": 7686 + }, + { + "epoch": 17.120267260579066, + "grad_norm": 12.516131401062012, + "learning_rate": 1e-06, + "loss": 0.4325, + "num_input_tokens_seen": 430740936, + "step": 7687 + }, + { + "epoch": 17.120267260579066, + "loss": 0.41224995255470276, + "loss_ce": 0.00011005052510881796, + "loss_iou": 0.1923828125, + "loss_num": 0.00555419921875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 430740936, + "step": 7687 + }, + { + "epoch": 17.12249443207127, + "grad_norm": 14.209662437438965, + "learning_rate": 1e-06, + "loss": 0.3043, + "num_input_tokens_seen": 430797872, + "step": 7688 + }, + { + "epoch": 17.12249443207127, + "loss": 0.36360567808151245, + "loss_ce": 8.030241588130593e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00665283203125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 430797872, + "step": 7688 + }, + { + "epoch": 17.124721603563476, + "grad_norm": 24.326807022094727, + "learning_rate": 1e-06, + "loss": 0.4336, + "num_input_tokens_seen": 430856816, + "step": 7689 + }, + { + "epoch": 17.124721603563476, + "loss": 0.40657997131347656, + "loss_ce": 8.581295696785673e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 430856816, + "step": 7689 + }, + { + "epoch": 17.12694877505568, + "grad_norm": 15.567830085754395, + "learning_rate": 1e-06, + "loss": 0.4065, + "num_input_tokens_seen": 430913996, + "step": 7690 + }, + { + "epoch": 17.12694877505568, + "loss": 0.32644379138946533, + "loss_ce": 8.880942186806351e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.006256103515625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 430913996, + "step": 7690 + }, + { + "epoch": 17.129175946547885, + "grad_norm": 27.735408782958984, + "learning_rate": 1e-06, + "loss": 0.3461, + "num_input_tokens_seen": 430970364, + "step": 7691 + }, + { + "epoch": 17.129175946547885, + "loss": 0.3675483167171478, + "loss_ce": 0.00011668518709484488, + "loss_iou": 0.1591796875, + "loss_num": 0.00994873046875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 430970364, + "step": 7691 + }, + { + "epoch": 17.13140311804009, + "grad_norm": 13.408978462219238, + "learning_rate": 1e-06, + "loss": 0.435, + "num_input_tokens_seen": 431026824, + "step": 7692 + }, + { + "epoch": 17.13140311804009, + "loss": 0.49011173844337463, + "loss_ce": 0.00012149102985858917, + "loss_iou": 0.2021484375, + "loss_num": 0.0169677734375, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 431026824, + "step": 7692 + }, + { + "epoch": 17.133630289532295, + "grad_norm": 19.401525497436523, + "learning_rate": 1e-06, + "loss": 0.5539, + "num_input_tokens_seen": 431082556, + "step": 7693 + }, + { + "epoch": 17.133630289532295, + "loss": 0.5180162787437439, + "loss_ce": 7.190792530309409e-05, + "loss_iou": 0.22265625, + "loss_num": 0.014404296875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 431082556, + "step": 7693 + }, + { + "epoch": 17.1358574610245, + "grad_norm": 17.272777557373047, + "learning_rate": 1e-06, + "loss": 0.4486, + "num_input_tokens_seen": 431140336, + "step": 7694 + }, + { + "epoch": 17.1358574610245, + "loss": 0.39839935302734375, + "loss_ce": 8.391607843805104e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.01361083984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 431140336, + "step": 7694 + }, + { + "epoch": 17.138084632516705, + "grad_norm": 13.000822067260742, + "learning_rate": 1e-06, + "loss": 0.4472, + "num_input_tokens_seen": 431196616, + "step": 7695 + }, + { + "epoch": 17.138084632516705, + "loss": 0.39486390352249146, + "loss_ce": 8.850420999806374e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.00677490234375, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 431196616, + "step": 7695 + }, + { + "epoch": 17.14031180400891, + "grad_norm": 12.154135704040527, + "learning_rate": 1e-06, + "loss": 0.3779, + "num_input_tokens_seen": 431253236, + "step": 7696 + }, + { + "epoch": 17.14031180400891, + "loss": 0.3711104691028595, + "loss_ce": 7.775596168357879e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.005950927734375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 431253236, + "step": 7696 + }, + { + "epoch": 17.142538975501115, + "grad_norm": 19.33846092224121, + "learning_rate": 1e-06, + "loss": 0.5077, + "num_input_tokens_seen": 431311832, + "step": 7697 + }, + { + "epoch": 17.142538975501115, + "loss": 0.5122911930084229, + "loss_ce": 8.418020297540352e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.0145263671875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 431311832, + "step": 7697 + }, + { + "epoch": 17.14476614699332, + "grad_norm": 13.823887825012207, + "learning_rate": 1e-06, + "loss": 0.304, + "num_input_tokens_seen": 431367724, + "step": 7698 + }, + { + "epoch": 17.14476614699332, + "loss": 0.37503981590270996, + "loss_ce": 0.00010085223766509444, + "loss_iou": 0.16015625, + "loss_num": 0.01080322265625, + "loss_xval": 0.375, + "num_input_tokens_seen": 431367724, + "step": 7698 + }, + { + "epoch": 17.146993318485524, + "grad_norm": 20.612548828125, + "learning_rate": 1e-06, + "loss": 0.4507, + "num_input_tokens_seen": 431423316, + "step": 7699 + }, + { + "epoch": 17.146993318485524, + "loss": 0.5613565444946289, + "loss_ce": 7.721249130554497e-05, + "loss_iou": 0.205078125, + "loss_num": 0.0301513671875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 431423316, + "step": 7699 + }, + { + "epoch": 17.14922048997773, + "grad_norm": 23.749818801879883, + "learning_rate": 1e-06, + "loss": 0.4074, + "num_input_tokens_seen": 431478884, + "step": 7700 + }, + { + "epoch": 17.14922048997773, + "loss": 0.3793688118457794, + "loss_ce": 9.636204777052626e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0152587890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 431478884, + "step": 7700 + }, + { + "epoch": 17.151447661469934, + "grad_norm": 17.16455841064453, + "learning_rate": 1e-06, + "loss": 0.3772, + "num_input_tokens_seen": 431537440, + "step": 7701 + }, + { + "epoch": 17.151447661469934, + "loss": 0.4580909013748169, + "loss_ce": 8.308385440614074e-05, + "loss_iou": 0.185546875, + "loss_num": 0.01708984375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 431537440, + "step": 7701 + }, + { + "epoch": 17.15367483296214, + "grad_norm": 19.00710105895996, + "learning_rate": 1e-06, + "loss": 0.4474, + "num_input_tokens_seen": 431592904, + "step": 7702 + }, + { + "epoch": 17.15367483296214, + "loss": 0.4547297954559326, + "loss_ce": 7.894145528553054e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.01483154296875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 431592904, + "step": 7702 + }, + { + "epoch": 17.155902004454344, + "grad_norm": 14.55823040008545, + "learning_rate": 1e-06, + "loss": 0.2756, + "num_input_tokens_seen": 431651384, + "step": 7703 + }, + { + "epoch": 17.155902004454344, + "loss": 0.23866218328475952, + "loss_ce": 7.575111521873623e-05, + "loss_iou": 0.10546875, + "loss_num": 0.005462646484375, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 431651384, + "step": 7703 + }, + { + "epoch": 17.15812917594655, + "grad_norm": 13.731185913085938, + "learning_rate": 1e-06, + "loss": 0.37, + "num_input_tokens_seen": 431708828, + "step": 7704 + }, + { + "epoch": 17.15812917594655, + "loss": 0.347605437040329, + "loss_ce": 7.126451964722946e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.01043701171875, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 431708828, + "step": 7704 + }, + { + "epoch": 17.160356347438753, + "grad_norm": 21.002777099609375, + "learning_rate": 1e-06, + "loss": 0.5383, + "num_input_tokens_seen": 431765240, + "step": 7705 + }, + { + "epoch": 17.160356347438753, + "loss": 0.4782218933105469, + "loss_ce": 7.249362533912063e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.017822265625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 431765240, + "step": 7705 + }, + { + "epoch": 17.16258351893096, + "grad_norm": 21.7739315032959, + "learning_rate": 1e-06, + "loss": 0.6177, + "num_input_tokens_seen": 431820024, + "step": 7706 + }, + { + "epoch": 17.16258351893096, + "loss": 0.7398481369018555, + "loss_ce": 0.00010208625462837517, + "loss_iou": 0.330078125, + "loss_num": 0.015625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 431820024, + "step": 7706 + }, + { + "epoch": 17.164810690423163, + "grad_norm": 18.27759552001953, + "learning_rate": 1e-06, + "loss": 0.3539, + "num_input_tokens_seen": 431873500, + "step": 7707 + }, + { + "epoch": 17.164810690423163, + "loss": 0.3486563265323639, + "loss_ce": 8.456400246359408e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.004669189453125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 431873500, + "step": 7707 + }, + { + "epoch": 17.167037861915368, + "grad_norm": 32.455810546875, + "learning_rate": 1e-06, + "loss": 0.4563, + "num_input_tokens_seen": 431928992, + "step": 7708 + }, + { + "epoch": 17.167037861915368, + "loss": 0.5565398931503296, + "loss_ce": 8.237551082856953e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.029052734375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 431928992, + "step": 7708 + }, + { + "epoch": 17.169265033407573, + "grad_norm": 24.227785110473633, + "learning_rate": 1e-06, + "loss": 0.4164, + "num_input_tokens_seen": 431985988, + "step": 7709 + }, + { + "epoch": 17.169265033407573, + "loss": 0.21856805682182312, + "loss_ce": 9.271766612073407e-05, + "loss_iou": 0.10205078125, + "loss_num": 0.00274658203125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 431985988, + "step": 7709 + }, + { + "epoch": 17.171492204899778, + "grad_norm": 11.984196662902832, + "learning_rate": 1e-06, + "loss": 0.2504, + "num_input_tokens_seen": 432040140, + "step": 7710 + }, + { + "epoch": 17.171492204899778, + "loss": 0.2045476734638214, + "loss_ce": 7.990330050233752e-05, + "loss_iou": 0.087890625, + "loss_num": 0.0057373046875, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 432040140, + "step": 7710 + }, + { + "epoch": 17.173719376391983, + "grad_norm": 23.284042358398438, + "learning_rate": 1e-06, + "loss": 0.3402, + "num_input_tokens_seen": 432093984, + "step": 7711 + }, + { + "epoch": 17.173719376391983, + "loss": 0.3212023973464966, + "loss_ce": 6.591899727936834e-05, + "loss_iou": 0.138671875, + "loss_num": 0.00872802734375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 432093984, + "step": 7711 + }, + { + "epoch": 17.175946547884188, + "grad_norm": 15.671782493591309, + "learning_rate": 1e-06, + "loss": 0.3822, + "num_input_tokens_seen": 432149584, + "step": 7712 + }, + { + "epoch": 17.175946547884188, + "loss": 0.2187042385339737, + "loss_ce": 7.631281914655119e-05, + "loss_iou": 0.0869140625, + "loss_num": 0.0089111328125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 432149584, + "step": 7712 + }, + { + "epoch": 17.178173719376392, + "grad_norm": 17.098852157592773, + "learning_rate": 1e-06, + "loss": 0.3588, + "num_input_tokens_seen": 432206804, + "step": 7713 + }, + { + "epoch": 17.178173719376392, + "loss": 0.4782329201698303, + "loss_ce": 8.348520350409672e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.007537841796875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 432206804, + "step": 7713 + }, + { + "epoch": 17.180400890868597, + "grad_norm": 23.695720672607422, + "learning_rate": 1e-06, + "loss": 0.6842, + "num_input_tokens_seen": 432259452, + "step": 7714 + }, + { + "epoch": 17.180400890868597, + "loss": 0.6622397899627686, + "loss_ce": 0.00025244534481316805, + "loss_iou": 0.29296875, + "loss_num": 0.0155029296875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 432259452, + "step": 7714 + }, + { + "epoch": 17.182628062360802, + "grad_norm": 16.1234073638916, + "learning_rate": 1e-06, + "loss": 0.3404, + "num_input_tokens_seen": 432317140, + "step": 7715 + }, + { + "epoch": 17.182628062360802, + "loss": 0.2790215313434601, + "loss_ce": 9.086594945983961e-05, + "loss_iou": 0.12109375, + "loss_num": 0.007476806640625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 432317140, + "step": 7715 + }, + { + "epoch": 17.184855233853007, + "grad_norm": 21.128246307373047, + "learning_rate": 1e-06, + "loss": 0.4791, + "num_input_tokens_seen": 432375580, + "step": 7716 + }, + { + "epoch": 17.184855233853007, + "loss": 0.7009245753288269, + "loss_ce": 8.839939255267382e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0224609375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 432375580, + "step": 7716 + }, + { + "epoch": 17.187082405345212, + "grad_norm": 18.01925277709961, + "learning_rate": 1e-06, + "loss": 0.3742, + "num_input_tokens_seen": 432428196, + "step": 7717 + }, + { + "epoch": 17.187082405345212, + "loss": 0.48470473289489746, + "loss_ce": 8.559299749322236e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0115966796875, + "loss_xval": 0.484375, + "num_input_tokens_seen": 432428196, + "step": 7717 + }, + { + "epoch": 17.189309576837417, + "grad_norm": 19.113862991333008, + "learning_rate": 1e-06, + "loss": 0.4242, + "num_input_tokens_seen": 432482660, + "step": 7718 + }, + { + "epoch": 17.189309576837417, + "loss": 0.41426295042037964, + "loss_ce": 7.838521560188383e-05, + "loss_iou": 0.18359375, + "loss_num": 0.009521484375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 432482660, + "step": 7718 + }, + { + "epoch": 17.19153674832962, + "grad_norm": 18.660619735717773, + "learning_rate": 1e-06, + "loss": 0.3095, + "num_input_tokens_seen": 432539144, + "step": 7719 + }, + { + "epoch": 17.19153674832962, + "loss": 0.2771776020526886, + "loss_ce": 7.799692684784532e-05, + "loss_iou": 0.111328125, + "loss_num": 0.0108642578125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 432539144, + "step": 7719 + }, + { + "epoch": 17.193763919821826, + "grad_norm": 18.885120391845703, + "learning_rate": 1e-06, + "loss": 0.3698, + "num_input_tokens_seen": 432596392, + "step": 7720 + }, + { + "epoch": 17.193763919821826, + "loss": 0.4347517788410187, + "loss_ce": 0.00030352562316693366, + "loss_iou": 0.1962890625, + "loss_num": 0.0086669921875, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 432596392, + "step": 7720 + }, + { + "epoch": 17.19599109131403, + "grad_norm": 11.850611686706543, + "learning_rate": 1e-06, + "loss": 0.5695, + "num_input_tokens_seen": 432650932, + "step": 7721 + }, + { + "epoch": 17.19599109131403, + "loss": 0.5424901247024536, + "loss_ce": 7.069206185406074e-05, + "loss_iou": 0.22265625, + "loss_num": 0.019287109375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 432650932, + "step": 7721 + }, + { + "epoch": 17.198218262806236, + "grad_norm": 19.987720489501953, + "learning_rate": 1e-06, + "loss": 0.3179, + "num_input_tokens_seen": 432706504, + "step": 7722 + }, + { + "epoch": 17.198218262806236, + "loss": 0.3480537533760071, + "loss_ce": 9.233770833816379e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.006866455078125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 432706504, + "step": 7722 + }, + { + "epoch": 17.20044543429844, + "grad_norm": 18.74901008605957, + "learning_rate": 1e-06, + "loss": 0.4127, + "num_input_tokens_seen": 432765232, + "step": 7723 + }, + { + "epoch": 17.20044543429844, + "loss": 0.4682392477989197, + "loss_ce": 9.959404997061938e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0098876953125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 432765232, + "step": 7723 + }, + { + "epoch": 17.202672605790646, + "grad_norm": 18.15958023071289, + "learning_rate": 1e-06, + "loss": 0.3961, + "num_input_tokens_seen": 432820076, + "step": 7724 + }, + { + "epoch": 17.202672605790646, + "loss": 0.41411644220352173, + "loss_ce": 8.445019193459302e-05, + "loss_iou": 0.185546875, + "loss_num": 0.008544921875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 432820076, + "step": 7724 + }, + { + "epoch": 17.20489977728285, + "grad_norm": 27.001567840576172, + "learning_rate": 1e-06, + "loss": 0.4505, + "num_input_tokens_seen": 432877112, + "step": 7725 + }, + { + "epoch": 17.20489977728285, + "loss": 0.46639156341552734, + "loss_ce": 8.295044244732708e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01300048828125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 432877112, + "step": 7725 + }, + { + "epoch": 17.207126948775056, + "grad_norm": 30.94824981689453, + "learning_rate": 1e-06, + "loss": 0.3738, + "num_input_tokens_seen": 432934028, + "step": 7726 + }, + { + "epoch": 17.207126948775056, + "loss": 0.31847047805786133, + "loss_ce": 8.058200182858855e-05, + "loss_iou": 0.142578125, + "loss_num": 0.006591796875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 432934028, + "step": 7726 + }, + { + "epoch": 17.20935412026726, + "grad_norm": 17.023353576660156, + "learning_rate": 1e-06, + "loss": 0.4936, + "num_input_tokens_seen": 432991524, + "step": 7727 + }, + { + "epoch": 17.20935412026726, + "loss": 0.4206371307373047, + "loss_ce": 0.0002880272513721138, + "loss_iou": 0.1669921875, + "loss_num": 0.0174560546875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 432991524, + "step": 7727 + }, + { + "epoch": 17.211581291759465, + "grad_norm": 17.618732452392578, + "learning_rate": 1e-06, + "loss": 0.3742, + "num_input_tokens_seen": 433050212, + "step": 7728 + }, + { + "epoch": 17.211581291759465, + "loss": 0.3367721438407898, + "loss_ce": 0.0001022043579723686, + "loss_iou": 0.15234375, + "loss_num": 0.00653076171875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 433050212, + "step": 7728 + }, + { + "epoch": 17.21380846325167, + "grad_norm": 17.69324493408203, + "learning_rate": 1e-06, + "loss": 0.4379, + "num_input_tokens_seen": 433106864, + "step": 7729 + }, + { + "epoch": 17.21380846325167, + "loss": 0.5612530708312988, + "loss_ce": 9.585064981365576e-05, + "loss_iou": 0.26171875, + "loss_num": 0.007171630859375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 433106864, + "step": 7729 + }, + { + "epoch": 17.216035634743875, + "grad_norm": 20.909269332885742, + "learning_rate": 1e-06, + "loss": 0.3521, + "num_input_tokens_seen": 433163416, + "step": 7730 + }, + { + "epoch": 17.216035634743875, + "loss": 0.40844836831092834, + "loss_ce": 9.26676148083061e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.01190185546875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 433163416, + "step": 7730 + }, + { + "epoch": 17.21826280623608, + "grad_norm": 17.81989860534668, + "learning_rate": 1e-06, + "loss": 0.3125, + "num_input_tokens_seen": 433218392, + "step": 7731 + }, + { + "epoch": 17.21826280623608, + "loss": 0.32392922043800354, + "loss_ce": 7.668677426408976e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.01300048828125, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 433218392, + "step": 7731 + }, + { + "epoch": 17.220489977728285, + "grad_norm": 17.176774978637695, + "learning_rate": 1e-06, + "loss": 0.3715, + "num_input_tokens_seen": 433273928, + "step": 7732 + }, + { + "epoch": 17.220489977728285, + "loss": 0.35421961545944214, + "loss_ce": 9.36458382057026e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.005584716796875, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 433273928, + "step": 7732 + }, + { + "epoch": 17.22271714922049, + "grad_norm": 13.050132751464844, + "learning_rate": 1e-06, + "loss": 0.534, + "num_input_tokens_seen": 433330336, + "step": 7733 + }, + { + "epoch": 17.22271714922049, + "loss": 0.5098897814750671, + "loss_ce": 0.00012416887329891324, + "loss_iou": 0.21484375, + "loss_num": 0.015869140625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 433330336, + "step": 7733 + }, + { + "epoch": 17.224944320712694, + "grad_norm": 20.13163948059082, + "learning_rate": 1e-06, + "loss": 0.4318, + "num_input_tokens_seen": 433384892, + "step": 7734 + }, + { + "epoch": 17.224944320712694, + "loss": 0.34920066595077515, + "loss_ce": 7.957725756568834e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.007720947265625, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 433384892, + "step": 7734 + }, + { + "epoch": 17.2271714922049, + "grad_norm": 25.29994773864746, + "learning_rate": 1e-06, + "loss": 0.2885, + "num_input_tokens_seen": 433440724, + "step": 7735 + }, + { + "epoch": 17.2271714922049, + "loss": 0.22060546278953552, + "loss_ce": 8.54411773616448e-05, + "loss_iou": 0.1005859375, + "loss_num": 0.0038299560546875, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 433440724, + "step": 7735 + }, + { + "epoch": 17.229398663697104, + "grad_norm": 19.234081268310547, + "learning_rate": 1e-06, + "loss": 0.2669, + "num_input_tokens_seen": 433497228, + "step": 7736 + }, + { + "epoch": 17.229398663697104, + "loss": 0.3394434154033661, + "loss_ce": 8.795637404546142e-05, + "loss_iou": 0.142578125, + "loss_num": 0.0107421875, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 433497228, + "step": 7736 + }, + { + "epoch": 17.23162583518931, + "grad_norm": 25.836366653442383, + "learning_rate": 1e-06, + "loss": 0.4823, + "num_input_tokens_seen": 433553476, + "step": 7737 + }, + { + "epoch": 17.23162583518931, + "loss": 0.45820939540863037, + "loss_ce": 7.951643783599138e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.0172119140625, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 433553476, + "step": 7737 + }, + { + "epoch": 17.233853006681514, + "grad_norm": 34.01211166381836, + "learning_rate": 1e-06, + "loss": 0.4521, + "num_input_tokens_seen": 433608588, + "step": 7738 + }, + { + "epoch": 17.233853006681514, + "loss": 0.5483601093292236, + "loss_ce": 8.125473686959594e-05, + "loss_iou": 0.244140625, + "loss_num": 0.011962890625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 433608588, + "step": 7738 + }, + { + "epoch": 17.23608017817372, + "grad_norm": 26.347332000732422, + "learning_rate": 1e-06, + "loss": 0.403, + "num_input_tokens_seen": 433665036, + "step": 7739 + }, + { + "epoch": 17.23608017817372, + "loss": 0.4297289252281189, + "loss_ce": 0.00010247422324027866, + "loss_iou": 0.1806640625, + "loss_num": 0.01373291015625, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 433665036, + "step": 7739 + }, + { + "epoch": 17.238307349665924, + "grad_norm": 16.483013153076172, + "learning_rate": 1e-06, + "loss": 0.4577, + "num_input_tokens_seen": 433721384, + "step": 7740 + }, + { + "epoch": 17.238307349665924, + "loss": 0.40596240758895874, + "loss_ce": 7.860736513976008e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0098876953125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 433721384, + "step": 7740 + }, + { + "epoch": 17.24053452115813, + "grad_norm": 16.415891647338867, + "learning_rate": 1e-06, + "loss": 0.34, + "num_input_tokens_seen": 433779720, + "step": 7741 + }, + { + "epoch": 17.24053452115813, + "loss": 0.3139420747756958, + "loss_ce": 9.928335202857852e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.007598876953125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 433779720, + "step": 7741 + }, + { + "epoch": 17.242761692650333, + "grad_norm": 13.877442359924316, + "learning_rate": 1e-06, + "loss": 0.335, + "num_input_tokens_seen": 433835132, + "step": 7742 + }, + { + "epoch": 17.242761692650333, + "loss": 0.32295700907707214, + "loss_ce": 8.10332567198202e-05, + "loss_iou": 0.14453125, + "loss_num": 0.00677490234375, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 433835132, + "step": 7742 + }, + { + "epoch": 17.244988864142538, + "grad_norm": 25.894933700561523, + "learning_rate": 1e-06, + "loss": 0.4914, + "num_input_tokens_seen": 433890280, + "step": 7743 + }, + { + "epoch": 17.244988864142538, + "loss": 0.47230130434036255, + "loss_ce": 8.75686964718625e-05, + "loss_iou": 0.2109375, + "loss_num": 0.0103759765625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 433890280, + "step": 7743 + }, + { + "epoch": 17.247216035634743, + "grad_norm": 28.23295021057129, + "learning_rate": 1e-06, + "loss": 0.4699, + "num_input_tokens_seen": 433945752, + "step": 7744 + }, + { + "epoch": 17.247216035634743, + "loss": 0.6319034695625305, + "loss_ce": 0.00018957318388856947, + "loss_iou": 0.2578125, + "loss_num": 0.0234375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 433945752, + "step": 7744 + }, + { + "epoch": 17.249443207126948, + "grad_norm": 19.394866943359375, + "learning_rate": 1e-06, + "loss": 0.4551, + "num_input_tokens_seen": 434001564, + "step": 7745 + }, + { + "epoch": 17.249443207126948, + "loss": 0.531867504119873, + "loss_ce": 0.0002207824436482042, + "loss_iou": 0.2001953125, + "loss_num": 0.0263671875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 434001564, + "step": 7745 + }, + { + "epoch": 17.251670378619153, + "grad_norm": 31.168285369873047, + "learning_rate": 1e-06, + "loss": 0.4663, + "num_input_tokens_seen": 434058448, + "step": 7746 + }, + { + "epoch": 17.251670378619153, + "loss": 0.6216709613800049, + "loss_ce": 8.895625069271773e-05, + "loss_iou": 0.2734375, + "loss_num": 0.014892578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 434058448, + "step": 7746 + }, + { + "epoch": 17.253897550111358, + "grad_norm": 15.39581298828125, + "learning_rate": 1e-06, + "loss": 0.4586, + "num_input_tokens_seen": 434115584, + "step": 7747 + }, + { + "epoch": 17.253897550111358, + "loss": 0.5104674100875854, + "loss_ce": 9.140316979028285e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.0155029296875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 434115584, + "step": 7747 + }, + { + "epoch": 17.256124721603562, + "grad_norm": 26.95697593688965, + "learning_rate": 1e-06, + "loss": 0.4361, + "num_input_tokens_seen": 434169820, + "step": 7748 + }, + { + "epoch": 17.256124721603562, + "loss": 0.35939961671829224, + "loss_ce": 8.56606347952038e-05, + "loss_iou": 0.162109375, + "loss_num": 0.006988525390625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 434169820, + "step": 7748 + }, + { + "epoch": 17.258351893095767, + "grad_norm": 41.06592559814453, + "learning_rate": 1e-06, + "loss": 0.3006, + "num_input_tokens_seen": 434228656, + "step": 7749 + }, + { + "epoch": 17.258351893095767, + "loss": 0.29035407304763794, + "loss_ce": 0.0008032863843254745, + "loss_iou": 0.126953125, + "loss_num": 0.00726318359375, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 434228656, + "step": 7749 + }, + { + "epoch": 17.260579064587972, + "grad_norm": 12.836146354675293, + "learning_rate": 1e-06, + "loss": 0.2803, + "num_input_tokens_seen": 434285972, + "step": 7750 + }, + { + "epoch": 17.260579064587972, + "eval_seeclick_web_CIoU": 0.5904313921928406, + "eval_seeclick_web_GIoU": 0.5887089371681213, + "eval_seeclick_web_IoU": 0.6092908382415771, + "eval_seeclick_web_MAE_all": 0.015212189638987184, + "eval_seeclick_web_MAE_h": 0.007458887062966824, + "eval_seeclick_web_MAE_w": 0.01523585431277752, + "eval_seeclick_web_MAE_x_boxes": 0.007938009221106768, + "eval_seeclick_web_MAE_y_boxes": 0.021370060741901398, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 0.8967059254646301, + "eval_seeclick_web_loss_ce": 0.0001397554951836355, + "eval_seeclick_web_loss_iou": 0.413818359375, + "eval_seeclick_web_loss_num": 0.012087821960449219, + "eval_seeclick_web_loss_xval": 0.887451171875, + "eval_seeclick_web_runtime": 21.8766, + "eval_seeclick_web_samples_per_second": 2.286, + "eval_seeclick_web_steps_per_second": 0.091, + "num_input_tokens_seen": 434285972, + "step": 7750 + }, + { + "epoch": 17.260579064587972, + "eval_icons_CIoU": 0.25680967420339584, + "eval_icons_GIoU": 0.2801186218857765, + "eval_icons_IoU": 0.3349318951368332, + "eval_icons_MAE_all": 0.05978231504559517, + "eval_icons_MAE_h": 0.0324998227879405, + "eval_icons_MAE_w": 0.06001891568303108, + "eval_icons_MAE_x_boxes": 0.06087409518659115, + "eval_icons_MAE_y_boxes": 0.03790356032550335, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7364193201065063, + "eval_icons_loss_ce": 0.00016331803635694087, + "eval_icons_loss_iou": 0.6763916015625, + "eval_icons_loss_num": 0.052509307861328125, + "eval_icons_loss_xval": 1.615966796875, + "eval_icons_runtime": 21.8048, + "eval_icons_samples_per_second": 2.293, + "eval_icons_steps_per_second": 0.092, + "num_input_tokens_seen": 434285972, + "step": 7750 + }, + { + "epoch": 17.260579064587972, + "eval_screenspot_CIoU": 0.3815999726454417, + "eval_screenspot_GIoU": 0.3997166156768799, + "eval_screenspot_IoU": 0.45147113005320233, + "eval_screenspot_MAE_all": 0.055647388100624084, + "eval_screenspot_MAE_h": 0.03920063997308413, + "eval_screenspot_MAE_w": 0.06165233999490738, + "eval_screenspot_MAE_x_boxes": 0.06318879996736844, + "eval_screenspot_MAE_y_boxes": 0.03922058828175068, + "eval_screenspot_inside_bbox": 0.7145833373069763, + "eval_screenspot_loss": 1.546026349067688, + "eval_screenspot_loss_ce": 0.00020924270696317157, + "eval_screenspot_loss_iou": 0.6417643229166666, + "eval_screenspot_loss_num": 0.06403223673502605, + "eval_screenspot_loss_xval": 1.6028645833333333, + "eval_screenspot_runtime": 37.6194, + "eval_screenspot_samples_per_second": 2.366, + "eval_screenspot_steps_per_second": 0.08, + "num_input_tokens_seen": 434285972, + "step": 7750 + }, + { + "epoch": 17.260579064587972, + "eval_compot_CIoU": 0.3500685393810272, + "eval_compot_GIoU": 0.36062242090702057, + "eval_compot_IoU": 0.4054069072008133, + "eval_compot_MAE_all": 0.0187700055539608, + "eval_compot_MAE_h": 0.010957159101963043, + "eval_compot_MAE_w": 0.02098592184484005, + "eval_compot_MAE_x_boxes": 0.03000013902783394, + "eval_compot_MAE_y_boxes": 0.006979038938879967, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3751345872879028, + "eval_compot_loss_ce": 0.00013413318083621562, + "eval_compot_loss_iou": 0.6326904296875, + "eval_compot_loss_num": 0.017452239990234375, + "eval_compot_loss_xval": 1.35302734375, + "eval_compot_runtime": 23.5427, + "eval_compot_samples_per_second": 2.124, + "eval_compot_steps_per_second": 0.085, + "num_input_tokens_seen": 434285972, + "step": 7750 + }, + { + "epoch": 17.260579064587972, + "eval_custom_ui_val_CIoU": 0.47184327410327065, + "eval_custom_ui_val_GIoU": 0.4758959710597992, + "eval_custom_ui_val_IoU": 0.5333681570159065, + "eval_custom_ui_val_MAE_all": 0.027272911483628884, + "eval_custom_ui_val_MAE_h": 0.014620246106965674, + "eval_custom_ui_val_MAE_w": 0.03669780415172378, + "eval_custom_ui_val_MAE_x_boxes": 0.03337866820705434, + "eval_custom_ui_val_MAE_y_boxes": 0.012776042127774822, + "eval_custom_ui_val_inside_bbox": 0.7754629651705424, + "eval_custom_ui_val_loss": 1.177891492843628, + "eval_custom_ui_val_loss_ce": 0.00015998236550432112, + "eval_custom_ui_val_loss_iou": 0.5063612196180556, + "eval_custom_ui_val_loss_num": 0.023825857374403212, + "eval_custom_ui_val_loss_xval": 1.1314561631944444, + "eval_custom_ui_val_runtime": 65.7899, + "eval_custom_ui_val_samples_per_second": 4.028, + "eval_custom_ui_val_steps_per_second": 0.137, + "num_input_tokens_seen": 434285972, + "step": 7750 + }, + { + "epoch": 17.260579064587972, + "loss": 0.8485028743743896, + "loss_ce": 0.00011421847739256918, + "loss_iou": 0.3828125, + "loss_num": 0.016845703125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 434285972, + "step": 7750 + }, + { + "epoch": 17.262806236080177, + "grad_norm": 40.981712341308594, + "learning_rate": 1e-06, + "loss": 0.4852, + "num_input_tokens_seen": 434344348, + "step": 7751 + }, + { + "epoch": 17.262806236080177, + "loss": 0.6921710968017578, + "loss_ce": 0.00021551080862991512, + "loss_iou": 0.263671875, + "loss_num": 0.03271484375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 434344348, + "step": 7751 + }, + { + "epoch": 17.265033407572382, + "grad_norm": 21.000991821289062, + "learning_rate": 1e-06, + "loss": 0.3312, + "num_input_tokens_seen": 434397948, + "step": 7752 + }, + { + "epoch": 17.265033407572382, + "loss": 0.3175837993621826, + "loss_ce": 7.892360736150295e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.005340576171875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 434397948, + "step": 7752 + }, + { + "epoch": 17.267260579064587, + "grad_norm": 19.446720123291016, + "learning_rate": 1e-06, + "loss": 0.4803, + "num_input_tokens_seen": 434452904, + "step": 7753 + }, + { + "epoch": 17.267260579064587, + "loss": 0.5616533756256104, + "loss_ce": 9.945797501131892e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.0196533203125, + "loss_xval": 0.5625, + "num_input_tokens_seen": 434452904, + "step": 7753 + }, + { + "epoch": 17.26948775055679, + "grad_norm": 18.333148956298828, + "learning_rate": 1e-06, + "loss": 0.4172, + "num_input_tokens_seen": 434508660, + "step": 7754 + }, + { + "epoch": 17.26948775055679, + "loss": 0.34176281094551086, + "loss_ce": 8.800373325357214e-05, + "loss_iou": 0.138671875, + "loss_num": 0.012939453125, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 434508660, + "step": 7754 + }, + { + "epoch": 17.271714922048996, + "grad_norm": 19.104904174804688, + "learning_rate": 1e-06, + "loss": 0.3037, + "num_input_tokens_seen": 434565492, + "step": 7755 + }, + { + "epoch": 17.271714922048996, + "loss": 0.27949851751327515, + "loss_ce": 7.9582110629417e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.0091552734375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 434565492, + "step": 7755 + }, + { + "epoch": 17.2739420935412, + "grad_norm": 16.93346405029297, + "learning_rate": 1e-06, + "loss": 0.3254, + "num_input_tokens_seen": 434619016, + "step": 7756 + }, + { + "epoch": 17.2739420935412, + "loss": 0.2303803563117981, + "loss_ce": 9.471694647800177e-05, + "loss_iou": 0.09326171875, + "loss_num": 0.00872802734375, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 434619016, + "step": 7756 + }, + { + "epoch": 17.276169265033406, + "grad_norm": 15.693754196166992, + "learning_rate": 1e-06, + "loss": 0.3854, + "num_input_tokens_seen": 434674412, + "step": 7757 + }, + { + "epoch": 17.276169265033406, + "loss": 0.5041153430938721, + "loss_ce": 8.705217624083161e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.0191650390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 434674412, + "step": 7757 + }, + { + "epoch": 17.27839643652561, + "grad_norm": 44.67047882080078, + "learning_rate": 1e-06, + "loss": 0.3596, + "num_input_tokens_seen": 434728900, + "step": 7758 + }, + { + "epoch": 17.27839643652561, + "loss": 0.4251922070980072, + "loss_ce": 8.232867548940703e-05, + "loss_iou": 0.1953125, + "loss_num": 0.006744384765625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 434728900, + "step": 7758 + }, + { + "epoch": 17.280623608017816, + "grad_norm": 49.442901611328125, + "learning_rate": 1e-06, + "loss": 0.3936, + "num_input_tokens_seen": 434784116, + "step": 7759 + }, + { + "epoch": 17.280623608017816, + "loss": 0.41439324617385864, + "loss_ce": 8.657870057504624e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.0137939453125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 434784116, + "step": 7759 + }, + { + "epoch": 17.28285077951002, + "grad_norm": 19.590133666992188, + "learning_rate": 1e-06, + "loss": 0.4671, + "num_input_tokens_seen": 434838652, + "step": 7760 + }, + { + "epoch": 17.28285077951002, + "loss": 0.31147128343582153, + "loss_ce": 6.993389979470521e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.0072021484375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 434838652, + "step": 7760 + }, + { + "epoch": 17.285077951002226, + "grad_norm": 18.076194763183594, + "learning_rate": 1e-06, + "loss": 0.408, + "num_input_tokens_seen": 434892168, + "step": 7761 + }, + { + "epoch": 17.285077951002226, + "loss": 0.35101962089538574, + "loss_ce": 6.74750772304833e-05, + "loss_iou": 0.15234375, + "loss_num": 0.009033203125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 434892168, + "step": 7761 + }, + { + "epoch": 17.28730512249443, + "grad_norm": 20.00171661376953, + "learning_rate": 1e-06, + "loss": 0.3366, + "num_input_tokens_seen": 434949224, + "step": 7762 + }, + { + "epoch": 17.28730512249443, + "loss": 0.38625282049179077, + "loss_ce": 8.34054226288572e-05, + "loss_iou": 0.17578125, + "loss_num": 0.007110595703125, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 434949224, + "step": 7762 + }, + { + "epoch": 17.289532293986635, + "grad_norm": 24.69287109375, + "learning_rate": 1e-06, + "loss": 0.35, + "num_input_tokens_seen": 435001896, + "step": 7763 + }, + { + "epoch": 17.289532293986635, + "loss": 0.408402681350708, + "loss_ce": 7.747893687337637e-05, + "loss_iou": 0.173828125, + "loss_num": 0.01220703125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 435001896, + "step": 7763 + }, + { + "epoch": 17.29175946547884, + "grad_norm": 11.8654203414917, + "learning_rate": 1e-06, + "loss": 0.2978, + "num_input_tokens_seen": 435057448, + "step": 7764 + }, + { + "epoch": 17.29175946547884, + "loss": 0.35043925046920776, + "loss_ce": 9.744616545503959e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.01171875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 435057448, + "step": 7764 + }, + { + "epoch": 17.293986636971045, + "grad_norm": 15.643658638000488, + "learning_rate": 1e-06, + "loss": 0.3667, + "num_input_tokens_seen": 435114576, + "step": 7765 + }, + { + "epoch": 17.293986636971045, + "loss": 0.2830941081047058, + "loss_ce": 7.410335820168257e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.005615234375, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 435114576, + "step": 7765 + }, + { + "epoch": 17.29621380846325, + "grad_norm": 28.67169189453125, + "learning_rate": 1e-06, + "loss": 0.3936, + "num_input_tokens_seen": 435170508, + "step": 7766 + }, + { + "epoch": 17.29621380846325, + "loss": 0.4348151683807373, + "loss_ce": 0.00012277913629077375, + "loss_iou": 0.189453125, + "loss_num": 0.010986328125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 435170508, + "step": 7766 + }, + { + "epoch": 17.29844097995546, + "grad_norm": 40.479034423828125, + "learning_rate": 1e-06, + "loss": 0.5411, + "num_input_tokens_seen": 435225040, + "step": 7767 + }, + { + "epoch": 17.29844097995546, + "loss": 0.44416916370391846, + "loss_ce": 7.737807754892856e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.015625, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 435225040, + "step": 7767 + }, + { + "epoch": 17.30066815144766, + "grad_norm": 19.786476135253906, + "learning_rate": 1e-06, + "loss": 0.275, + "num_input_tokens_seen": 435280060, + "step": 7768 + }, + { + "epoch": 17.30066815144766, + "loss": 0.25245094299316406, + "loss_ce": 7.053982699289918e-05, + "loss_iou": 0.1162109375, + "loss_num": 0.0040283203125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 435280060, + "step": 7768 + }, + { + "epoch": 17.302895322939868, + "grad_norm": 25.475353240966797, + "learning_rate": 1e-06, + "loss": 0.4287, + "num_input_tokens_seen": 435337596, + "step": 7769 + }, + { + "epoch": 17.302895322939868, + "loss": 0.4136672914028168, + "loss_ce": 9.308782318839803e-05, + "loss_iou": 0.1875, + "loss_num": 0.0076904296875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 435337596, + "step": 7769 + }, + { + "epoch": 17.305122494432073, + "grad_norm": 13.328644752502441, + "learning_rate": 1e-06, + "loss": 0.3073, + "num_input_tokens_seen": 435394108, + "step": 7770 + }, + { + "epoch": 17.305122494432073, + "loss": 0.34184500575065613, + "loss_ce": 7.864900544518605e-05, + "loss_iou": 0.15234375, + "loss_num": 0.007568359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 435394108, + "step": 7770 + }, + { + "epoch": 17.307349665924278, + "grad_norm": 19.182100296020508, + "learning_rate": 1e-06, + "loss": 0.4608, + "num_input_tokens_seen": 435448756, + "step": 7771 + }, + { + "epoch": 17.307349665924278, + "loss": 0.577645480632782, + "loss_ce": 6.979001045692712e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.0169677734375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 435448756, + "step": 7771 + }, + { + "epoch": 17.309576837416483, + "grad_norm": 22.650524139404297, + "learning_rate": 1e-06, + "loss": 0.3382, + "num_input_tokens_seen": 435505496, + "step": 7772 + }, + { + "epoch": 17.309576837416483, + "loss": 0.413726806640625, + "loss_ce": 9.157357271760702e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.0081787109375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 435505496, + "step": 7772 + }, + { + "epoch": 17.311804008908688, + "grad_norm": 22.297222137451172, + "learning_rate": 1e-06, + "loss": 0.5371, + "num_input_tokens_seen": 435561892, + "step": 7773 + }, + { + "epoch": 17.311804008908688, + "loss": 0.3261268436908722, + "loss_ce": 7.704827294219285e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.01007080078125, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 435561892, + "step": 7773 + }, + { + "epoch": 17.314031180400892, + "grad_norm": 23.894563674926758, + "learning_rate": 1e-06, + "loss": 0.38, + "num_input_tokens_seen": 435617896, + "step": 7774 + }, + { + "epoch": 17.314031180400892, + "loss": 0.3799806237220764, + "loss_ce": 9.779801621334627e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.0062255859375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 435617896, + "step": 7774 + }, + { + "epoch": 17.316258351893097, + "grad_norm": 21.17926597595215, + "learning_rate": 1e-06, + "loss": 0.4776, + "num_input_tokens_seen": 435674376, + "step": 7775 + }, + { + "epoch": 17.316258351893097, + "loss": 0.47175133228302, + "loss_ce": 7.16680588084273e-05, + "loss_iou": 0.21484375, + "loss_num": 0.00848388671875, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 435674376, + "step": 7775 + }, + { + "epoch": 17.318485523385302, + "grad_norm": 18.95273208618164, + "learning_rate": 1e-06, + "loss": 0.3432, + "num_input_tokens_seen": 435734040, + "step": 7776 + }, + { + "epoch": 17.318485523385302, + "loss": 0.3788660764694214, + "loss_ce": 8.189848449546844e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0084228515625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 435734040, + "step": 7776 + }, + { + "epoch": 17.320712694877507, + "grad_norm": 12.807289123535156, + "learning_rate": 1e-06, + "loss": 0.3866, + "num_input_tokens_seen": 435790640, + "step": 7777 + }, + { + "epoch": 17.320712694877507, + "loss": 0.34432125091552734, + "loss_ce": 8.297587919514626e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00860595703125, + "loss_xval": 0.34375, + "num_input_tokens_seen": 435790640, + "step": 7777 + }, + { + "epoch": 17.322939866369712, + "grad_norm": 19.11941146850586, + "learning_rate": 1e-06, + "loss": 0.4882, + "num_input_tokens_seen": 435847892, + "step": 7778 + }, + { + "epoch": 17.322939866369712, + "loss": 0.5650254487991333, + "loss_ce": 8.401433296967298e-05, + "loss_iou": 0.228515625, + "loss_num": 0.021728515625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 435847892, + "step": 7778 + }, + { + "epoch": 17.325167037861917, + "grad_norm": 19.643787384033203, + "learning_rate": 1e-06, + "loss": 0.4666, + "num_input_tokens_seen": 435903812, + "step": 7779 + }, + { + "epoch": 17.325167037861917, + "loss": 0.39589911699295044, + "loss_ce": 8.612501551397145e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0126953125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 435903812, + "step": 7779 + }, + { + "epoch": 17.32739420935412, + "grad_norm": 22.365102767944336, + "learning_rate": 1e-06, + "loss": 0.4378, + "num_input_tokens_seen": 435960584, + "step": 7780 + }, + { + "epoch": 17.32739420935412, + "loss": 0.39949724078178406, + "loss_ce": 8.315553714055568e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.01092529296875, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 435960584, + "step": 7780 + }, + { + "epoch": 17.329621380846326, + "grad_norm": 27.921226501464844, + "learning_rate": 1e-06, + "loss": 0.4324, + "num_input_tokens_seen": 436016512, + "step": 7781 + }, + { + "epoch": 17.329621380846326, + "loss": 0.5156280994415283, + "loss_ce": 0.00012516917195171118, + "loss_iou": 0.224609375, + "loss_num": 0.01312255859375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 436016512, + "step": 7781 + }, + { + "epoch": 17.33184855233853, + "grad_norm": 24.685932159423828, + "learning_rate": 1e-06, + "loss": 0.4589, + "num_input_tokens_seen": 436072952, + "step": 7782 + }, + { + "epoch": 17.33184855233853, + "loss": 0.41859763860702515, + "loss_ce": 7.957972411531955e-05, + "loss_iou": 0.1953125, + "loss_num": 0.0054931640625, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 436072952, + "step": 7782 + }, + { + "epoch": 17.334075723830736, + "grad_norm": 30.725080490112305, + "learning_rate": 1e-06, + "loss": 0.5439, + "num_input_tokens_seen": 436127800, + "step": 7783 + }, + { + "epoch": 17.334075723830736, + "loss": 0.4986262917518616, + "loss_ce": 9.114194836001843e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.007232666015625, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 436127800, + "step": 7783 + }, + { + "epoch": 17.33630289532294, + "grad_norm": 20.326061248779297, + "learning_rate": 1e-06, + "loss": 0.3683, + "num_input_tokens_seen": 436187140, + "step": 7784 + }, + { + "epoch": 17.33630289532294, + "loss": 0.24855035543441772, + "loss_ce": 7.622801786055788e-05, + "loss_iou": 0.11083984375, + "loss_num": 0.00537109375, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 436187140, + "step": 7784 + }, + { + "epoch": 17.338530066815146, + "grad_norm": 21.3463077545166, + "learning_rate": 1e-06, + "loss": 0.4456, + "num_input_tokens_seen": 436242216, + "step": 7785 + }, + { + "epoch": 17.338530066815146, + "loss": 0.3916824460029602, + "loss_ce": 8.088418690022081e-05, + "loss_iou": 0.169921875, + "loss_num": 0.01025390625, + "loss_xval": 0.390625, + "num_input_tokens_seen": 436242216, + "step": 7785 + }, + { + "epoch": 17.34075723830735, + "grad_norm": 17.23738670349121, + "learning_rate": 1e-06, + "loss": 0.467, + "num_input_tokens_seen": 436301260, + "step": 7786 + }, + { + "epoch": 17.34075723830735, + "loss": 0.549955427646637, + "loss_ce": 8.968937618192285e-05, + "loss_iou": 0.21875, + "loss_num": 0.0223388671875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 436301260, + "step": 7786 + }, + { + "epoch": 17.342984409799556, + "grad_norm": 15.01907730102539, + "learning_rate": 1e-06, + "loss": 0.3399, + "num_input_tokens_seen": 436357676, + "step": 7787 + }, + { + "epoch": 17.342984409799556, + "loss": 0.34413158893585205, + "loss_ce": 7.641033153049648e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.010498046875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 436357676, + "step": 7787 + }, + { + "epoch": 17.34521158129176, + "grad_norm": 17.24225425720215, + "learning_rate": 1e-06, + "loss": 0.3079, + "num_input_tokens_seen": 436412808, + "step": 7788 + }, + { + "epoch": 17.34521158129176, + "loss": 0.2603484094142914, + "loss_ce": 7.925922545837238e-05, + "loss_iou": 0.12060546875, + "loss_num": 0.0037994384765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 436412808, + "step": 7788 + }, + { + "epoch": 17.347438752783965, + "grad_norm": 17.269451141357422, + "learning_rate": 1e-06, + "loss": 0.457, + "num_input_tokens_seen": 436468436, + "step": 7789 + }, + { + "epoch": 17.347438752783965, + "loss": 0.6248146891593933, + "loss_ce": 0.00011986246681772172, + "loss_iou": 0.275390625, + "loss_num": 0.0146484375, + "loss_xval": 0.625, + "num_input_tokens_seen": 436468436, + "step": 7789 + }, + { + "epoch": 17.34966592427617, + "grad_norm": 28.061927795410156, + "learning_rate": 1e-06, + "loss": 0.3785, + "num_input_tokens_seen": 436526876, + "step": 7790 + }, + { + "epoch": 17.34966592427617, + "loss": 0.4654099941253662, + "loss_ce": 7.796321006026119e-05, + "loss_iou": 0.205078125, + "loss_num": 0.0111083984375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 436526876, + "step": 7790 + }, + { + "epoch": 17.351893095768375, + "grad_norm": 10.565998077392578, + "learning_rate": 1e-06, + "loss": 0.3896, + "num_input_tokens_seen": 436583952, + "step": 7791 + }, + { + "epoch": 17.351893095768375, + "loss": 0.30404287576675415, + "loss_ce": 8.778244227869436e-05, + "loss_iou": 0.134765625, + "loss_num": 0.006805419921875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 436583952, + "step": 7791 + }, + { + "epoch": 17.35412026726058, + "grad_norm": 15.32834243774414, + "learning_rate": 1e-06, + "loss": 0.4977, + "num_input_tokens_seen": 436640224, + "step": 7792 + }, + { + "epoch": 17.35412026726058, + "loss": 0.5239353179931641, + "loss_ce": 7.061338692437857e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.0162353515625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 436640224, + "step": 7792 + }, + { + "epoch": 17.356347438752785, + "grad_norm": 34.02385711669922, + "learning_rate": 1e-06, + "loss": 0.6217, + "num_input_tokens_seen": 436698680, + "step": 7793 + }, + { + "epoch": 17.356347438752785, + "loss": 0.467492938041687, + "loss_ce": 8.570231148041785e-05, + "loss_iou": 0.21484375, + "loss_num": 0.007537841796875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 436698680, + "step": 7793 + }, + { + "epoch": 17.35857461024499, + "grad_norm": 20.18070411682129, + "learning_rate": 1e-06, + "loss": 0.408, + "num_input_tokens_seen": 436753572, + "step": 7794 + }, + { + "epoch": 17.35857461024499, + "loss": 0.2802870273590088, + "loss_ce": 7.461066707037389e-05, + "loss_iou": 0.119140625, + "loss_num": 0.00823974609375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 436753572, + "step": 7794 + }, + { + "epoch": 17.360801781737194, + "grad_norm": 18.094005584716797, + "learning_rate": 1e-06, + "loss": 0.4583, + "num_input_tokens_seen": 436807624, + "step": 7795 + }, + { + "epoch": 17.360801781737194, + "loss": 0.48153021931648254, + "loss_ce": 8.488957246299833e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.0172119140625, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 436807624, + "step": 7795 + }, + { + "epoch": 17.3630289532294, + "grad_norm": 13.45447826385498, + "learning_rate": 1e-06, + "loss": 0.3153, + "num_input_tokens_seen": 436863496, + "step": 7796 + }, + { + "epoch": 17.3630289532294, + "loss": 0.3097211420536041, + "loss_ce": 8.980316488305107e-05, + "loss_iou": 0.1328125, + "loss_num": 0.00872802734375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 436863496, + "step": 7796 + }, + { + "epoch": 17.365256124721604, + "grad_norm": 16.23236083984375, + "learning_rate": 1e-06, + "loss": 0.3146, + "num_input_tokens_seen": 436920076, + "step": 7797 + }, + { + "epoch": 17.365256124721604, + "loss": 0.3300950825214386, + "loss_ce": 7.799094601068646e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.010009765625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 436920076, + "step": 7797 + }, + { + "epoch": 17.36748329621381, + "grad_norm": 16.27332305908203, + "learning_rate": 1e-06, + "loss": 0.4944, + "num_input_tokens_seen": 436975300, + "step": 7798 + }, + { + "epoch": 17.36748329621381, + "loss": 0.3892417550086975, + "loss_ce": 8.160505967680365e-05, + "loss_iou": 0.171875, + "loss_num": 0.0091552734375, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 436975300, + "step": 7798 + }, + { + "epoch": 17.369710467706014, + "grad_norm": 13.5247163772583, + "learning_rate": 1e-06, + "loss": 0.508, + "num_input_tokens_seen": 437030760, + "step": 7799 + }, + { + "epoch": 17.369710467706014, + "loss": 0.5794344544410706, + "loss_ce": 8.873187471181154e-05, + "loss_iou": 0.259765625, + "loss_num": 0.01171875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 437030760, + "step": 7799 + }, + { + "epoch": 17.37193763919822, + "grad_norm": 16.678621292114258, + "learning_rate": 1e-06, + "loss": 0.4149, + "num_input_tokens_seen": 437086492, + "step": 7800 + }, + { + "epoch": 17.37193763919822, + "loss": 0.6766193509101868, + "loss_ce": 0.00010563358955550939, + "loss_iou": 0.283203125, + "loss_num": 0.0220947265625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 437086492, + "step": 7800 + }, + { + "epoch": 17.374164810690424, + "grad_norm": 16.469213485717773, + "learning_rate": 1e-06, + "loss": 0.3798, + "num_input_tokens_seen": 437142412, + "step": 7801 + }, + { + "epoch": 17.374164810690424, + "loss": 0.413549542427063, + "loss_ce": 9.739689994603395e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.004150390625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 437142412, + "step": 7801 + }, + { + "epoch": 17.37639198218263, + "grad_norm": 27.44245719909668, + "learning_rate": 1e-06, + "loss": 0.4157, + "num_input_tokens_seen": 437199560, + "step": 7802 + }, + { + "epoch": 17.37639198218263, + "loss": 0.3337703347206116, + "loss_ce": 0.00027421663980931044, + "loss_iou": 0.1494140625, + "loss_num": 0.00689697265625, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 437199560, + "step": 7802 + }, + { + "epoch": 17.378619153674833, + "grad_norm": 24.417043685913086, + "learning_rate": 1e-06, + "loss": 0.3968, + "num_input_tokens_seen": 437253536, + "step": 7803 + }, + { + "epoch": 17.378619153674833, + "loss": 0.3829765319824219, + "loss_ce": 7.25047430023551e-05, + "loss_iou": 0.16015625, + "loss_num": 0.01251220703125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 437253536, + "step": 7803 + }, + { + "epoch": 17.380846325167038, + "grad_norm": 18.77859878540039, + "learning_rate": 1e-06, + "loss": 0.4349, + "num_input_tokens_seen": 437311192, + "step": 7804 + }, + { + "epoch": 17.380846325167038, + "loss": 0.4672635793685913, + "loss_ce": 0.00010047997056972235, + "loss_iou": 0.220703125, + "loss_num": 0.005401611328125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 437311192, + "step": 7804 + }, + { + "epoch": 17.383073496659243, + "grad_norm": 14.215685844421387, + "learning_rate": 1e-06, + "loss": 0.5266, + "num_input_tokens_seen": 437368228, + "step": 7805 + }, + { + "epoch": 17.383073496659243, + "loss": 0.6981062293052673, + "loss_ce": 0.00035229395143687725, + "loss_iou": 0.306640625, + "loss_num": 0.0167236328125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 437368228, + "step": 7805 + }, + { + "epoch": 17.385300668151448, + "grad_norm": 13.988162994384766, + "learning_rate": 1e-06, + "loss": 0.3419, + "num_input_tokens_seen": 437426224, + "step": 7806 + }, + { + "epoch": 17.385300668151448, + "loss": 0.3153742551803589, + "loss_ce": 6.663544627372175e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.0072021484375, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 437426224, + "step": 7806 + }, + { + "epoch": 17.387527839643653, + "grad_norm": 15.999502182006836, + "learning_rate": 1e-06, + "loss": 0.4759, + "num_input_tokens_seen": 437483276, + "step": 7807 + }, + { + "epoch": 17.387527839643653, + "loss": 0.4618630111217499, + "loss_ce": 7.104083488229662e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0181884765625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 437483276, + "step": 7807 + }, + { + "epoch": 17.389755011135858, + "grad_norm": 19.859189987182617, + "learning_rate": 1e-06, + "loss": 0.5076, + "num_input_tokens_seen": 437539780, + "step": 7808 + }, + { + "epoch": 17.389755011135858, + "loss": 0.4627245366573334, + "loss_ce": 7.806568464729935e-05, + "loss_iou": 0.201171875, + "loss_num": 0.01214599609375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 437539780, + "step": 7808 + }, + { + "epoch": 17.391982182628063, + "grad_norm": 18.35259437561035, + "learning_rate": 1e-06, + "loss": 0.4693, + "num_input_tokens_seen": 437593776, + "step": 7809 + }, + { + "epoch": 17.391982182628063, + "loss": 0.6174068450927734, + "loss_ce": 9.7297815955244e-05, + "loss_iou": 0.275390625, + "loss_num": 0.01336669921875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 437593776, + "step": 7809 + }, + { + "epoch": 17.394209354120267, + "grad_norm": 22.22394371032715, + "learning_rate": 1e-06, + "loss": 0.4799, + "num_input_tokens_seen": 437648068, + "step": 7810 + }, + { + "epoch": 17.394209354120267, + "loss": 0.5823743939399719, + "loss_ce": 0.00016007671365514398, + "loss_iou": 0.255859375, + "loss_num": 0.0137939453125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 437648068, + "step": 7810 + }, + { + "epoch": 17.396436525612472, + "grad_norm": 44.06281280517578, + "learning_rate": 1e-06, + "loss": 0.5151, + "num_input_tokens_seen": 437703580, + "step": 7811 + }, + { + "epoch": 17.396436525612472, + "loss": 0.674892783164978, + "loss_ce": 8.813235035631806e-05, + "loss_iou": 0.28125, + "loss_num": 0.022705078125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 437703580, + "step": 7811 + }, + { + "epoch": 17.398663697104677, + "grad_norm": 23.47769546508789, + "learning_rate": 1e-06, + "loss": 0.5547, + "num_input_tokens_seen": 437757572, + "step": 7812 + }, + { + "epoch": 17.398663697104677, + "loss": 0.5415530204772949, + "loss_ce": 0.0001712157973088324, + "loss_iou": 0.240234375, + "loss_num": 0.0120849609375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 437757572, + "step": 7812 + }, + { + "epoch": 17.400890868596882, + "grad_norm": 17.036544799804688, + "learning_rate": 1e-06, + "loss": 0.4651, + "num_input_tokens_seen": 437810472, + "step": 7813 + }, + { + "epoch": 17.400890868596882, + "loss": 0.3841232657432556, + "loss_ce": 9.007104381453246e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0103759765625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 437810472, + "step": 7813 + }, + { + "epoch": 17.403118040089087, + "grad_norm": 17.07404327392578, + "learning_rate": 1e-06, + "loss": 0.3801, + "num_input_tokens_seen": 437868112, + "step": 7814 + }, + { + "epoch": 17.403118040089087, + "loss": 0.4032171964645386, + "loss_ce": 7.997561624506488e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.01300048828125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 437868112, + "step": 7814 + }, + { + "epoch": 17.40534521158129, + "grad_norm": 13.92210578918457, + "learning_rate": 1e-06, + "loss": 0.3186, + "num_input_tokens_seen": 437923616, + "step": 7815 + }, + { + "epoch": 17.40534521158129, + "loss": 0.40229788422584534, + "loss_ce": 7.62125855544582e-05, + "loss_iou": 0.16796875, + "loss_num": 0.01312255859375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 437923616, + "step": 7815 + }, + { + "epoch": 17.407572383073497, + "grad_norm": 19.43670082092285, + "learning_rate": 1e-06, + "loss": 0.4878, + "num_input_tokens_seen": 437979980, + "step": 7816 + }, + { + "epoch": 17.407572383073497, + "loss": 0.7343358397483826, + "loss_ce": 8.290271944133565e-05, + "loss_iou": 0.306640625, + "loss_num": 0.02392578125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 437979980, + "step": 7816 + }, + { + "epoch": 17.4097995545657, + "grad_norm": 13.963506698608398, + "learning_rate": 1e-06, + "loss": 0.4524, + "num_input_tokens_seen": 438033348, + "step": 7817 + }, + { + "epoch": 17.4097995545657, + "loss": 0.41097211837768555, + "loss_ce": 8.345420064870268e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.0123291015625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 438033348, + "step": 7817 + }, + { + "epoch": 17.412026726057906, + "grad_norm": 18.236024856567383, + "learning_rate": 1e-06, + "loss": 0.3729, + "num_input_tokens_seen": 438091792, + "step": 7818 + }, + { + "epoch": 17.412026726057906, + "loss": 0.31764712929725647, + "loss_ce": 8.121843711705878e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.00848388671875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 438091792, + "step": 7818 + }, + { + "epoch": 17.41425389755011, + "grad_norm": 19.653343200683594, + "learning_rate": 1e-06, + "loss": 0.2597, + "num_input_tokens_seen": 438148088, + "step": 7819 + }, + { + "epoch": 17.41425389755011, + "loss": 0.3163628578186035, + "loss_ce": 7.866387022659183e-05, + "loss_iou": 0.14453125, + "loss_num": 0.00531005859375, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 438148088, + "step": 7819 + }, + { + "epoch": 17.416481069042316, + "grad_norm": 16.326854705810547, + "learning_rate": 1e-06, + "loss": 0.4654, + "num_input_tokens_seen": 438205184, + "step": 7820 + }, + { + "epoch": 17.416481069042316, + "loss": 0.4878746271133423, + "loss_ce": 8.16921892692335e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.0130615234375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 438205184, + "step": 7820 + }, + { + "epoch": 17.41870824053452, + "grad_norm": 29.167247772216797, + "learning_rate": 1e-06, + "loss": 0.3659, + "num_input_tokens_seen": 438260072, + "step": 7821 + }, + { + "epoch": 17.41870824053452, + "loss": 0.40425050258636475, + "loss_ce": 7.568299042759463e-05, + "loss_iou": 0.185546875, + "loss_num": 0.00665283203125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 438260072, + "step": 7821 + }, + { + "epoch": 17.420935412026726, + "grad_norm": 18.79923439025879, + "learning_rate": 1e-06, + "loss": 0.4383, + "num_input_tokens_seen": 438314384, + "step": 7822 + }, + { + "epoch": 17.420935412026726, + "loss": 0.5045883655548096, + "loss_ce": 7.173811900429428e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.022216796875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 438314384, + "step": 7822 + }, + { + "epoch": 17.42316258351893, + "grad_norm": 15.445906639099121, + "learning_rate": 1e-06, + "loss": 0.3509, + "num_input_tokens_seen": 438370632, + "step": 7823 + }, + { + "epoch": 17.42316258351893, + "loss": 0.43685096502304077, + "loss_ce": 8.341444481629878e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.00830078125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 438370632, + "step": 7823 + }, + { + "epoch": 17.425389755011135, + "grad_norm": 32.27328109741211, + "learning_rate": 1e-06, + "loss": 0.3304, + "num_input_tokens_seen": 438423192, + "step": 7824 + }, + { + "epoch": 17.425389755011135, + "loss": 0.33272212743759155, + "loss_ce": 8.052479824982584e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00811767578125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 438423192, + "step": 7824 + }, + { + "epoch": 17.42761692650334, + "grad_norm": 23.52182960510254, + "learning_rate": 1e-06, + "loss": 0.6017, + "num_input_tokens_seen": 438477292, + "step": 7825 + }, + { + "epoch": 17.42761692650334, + "loss": 0.5533384680747986, + "loss_ce": 0.00011578819248825312, + "loss_iou": 0.2578125, + "loss_num": 0.007781982421875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 438477292, + "step": 7825 + }, + { + "epoch": 17.429844097995545, + "grad_norm": 26.976110458374023, + "learning_rate": 1e-06, + "loss": 0.4182, + "num_input_tokens_seen": 438531796, + "step": 7826 + }, + { + "epoch": 17.429844097995545, + "loss": 0.4023064076900482, + "loss_ce": 8.469966996926814e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0101318359375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 438531796, + "step": 7826 + }, + { + "epoch": 17.43207126948775, + "grad_norm": 19.38138198852539, + "learning_rate": 1e-06, + "loss": 0.4998, + "num_input_tokens_seen": 438589904, + "step": 7827 + }, + { + "epoch": 17.43207126948775, + "loss": 0.3881371021270752, + "loss_ce": 7.558944344054908e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.0133056640625, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 438589904, + "step": 7827 + }, + { + "epoch": 17.434298440979955, + "grad_norm": 31.262649536132812, + "learning_rate": 1e-06, + "loss": 0.3436, + "num_input_tokens_seen": 438643728, + "step": 7828 + }, + { + "epoch": 17.434298440979955, + "loss": 0.3436218798160553, + "loss_ce": 0.00011602583253988996, + "loss_iou": 0.1552734375, + "loss_num": 0.006744384765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 438643728, + "step": 7828 + }, + { + "epoch": 17.43652561247216, + "grad_norm": 14.571344375610352, + "learning_rate": 1e-06, + "loss": 0.3795, + "num_input_tokens_seen": 438701432, + "step": 7829 + }, + { + "epoch": 17.43652561247216, + "loss": 0.4284363389015198, + "loss_ce": 9.162020432995632e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.0120849609375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 438701432, + "step": 7829 + }, + { + "epoch": 17.438752783964365, + "grad_norm": 25.686256408691406, + "learning_rate": 1e-06, + "loss": 0.3957, + "num_input_tokens_seen": 438755012, + "step": 7830 + }, + { + "epoch": 17.438752783964365, + "loss": 0.42792049050331116, + "loss_ce": 0.00012508549843914807, + "loss_iou": 0.181640625, + "loss_num": 0.01312255859375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 438755012, + "step": 7830 + }, + { + "epoch": 17.44097995545657, + "grad_norm": 17.335966110229492, + "learning_rate": 1e-06, + "loss": 0.4715, + "num_input_tokens_seen": 438807680, + "step": 7831 + }, + { + "epoch": 17.44097995545657, + "loss": 0.4803728461265564, + "loss_ce": 8.723569044377655e-05, + "loss_iou": 0.203125, + "loss_num": 0.01483154296875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 438807680, + "step": 7831 + }, + { + "epoch": 17.443207126948774, + "grad_norm": 24.54792022705078, + "learning_rate": 1e-06, + "loss": 0.4759, + "num_input_tokens_seen": 438864696, + "step": 7832 + }, + { + "epoch": 17.443207126948774, + "loss": 0.5594247579574585, + "loss_ce": 9.856373799266294e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.01318359375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 438864696, + "step": 7832 + }, + { + "epoch": 17.44543429844098, + "grad_norm": 21.406185150146484, + "learning_rate": 1e-06, + "loss": 0.4039, + "num_input_tokens_seen": 438918952, + "step": 7833 + }, + { + "epoch": 17.44543429844098, + "loss": 0.5075805187225342, + "loss_ce": 0.00013420640607364476, + "loss_iou": 0.220703125, + "loss_num": 0.01348876953125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 438918952, + "step": 7833 + }, + { + "epoch": 17.447661469933184, + "grad_norm": 21.712177276611328, + "learning_rate": 1e-06, + "loss": 0.264, + "num_input_tokens_seen": 438976448, + "step": 7834 + }, + { + "epoch": 17.447661469933184, + "loss": 0.1915939748287201, + "loss_ce": 6.564041541423649e-05, + "loss_iou": 0.08203125, + "loss_num": 0.00555419921875, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 438976448, + "step": 7834 + }, + { + "epoch": 17.44988864142539, + "grad_norm": 16.437280654907227, + "learning_rate": 1e-06, + "loss": 0.3436, + "num_input_tokens_seen": 439030240, + "step": 7835 + }, + { + "epoch": 17.44988864142539, + "loss": 0.2427883744239807, + "loss_ce": 8.206407801480964e-05, + "loss_iou": 0.10791015625, + "loss_num": 0.005340576171875, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 439030240, + "step": 7835 + }, + { + "epoch": 17.452115812917594, + "grad_norm": 16.297969818115234, + "learning_rate": 1e-06, + "loss": 0.3661, + "num_input_tokens_seen": 439087720, + "step": 7836 + }, + { + "epoch": 17.452115812917594, + "loss": 0.3819359540939331, + "loss_ce": 0.00010002277849707752, + "loss_iou": 0.1767578125, + "loss_num": 0.005767822265625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 439087720, + "step": 7836 + }, + { + "epoch": 17.4543429844098, + "grad_norm": 14.235069274902344, + "learning_rate": 1e-06, + "loss": 0.4091, + "num_input_tokens_seen": 439142152, + "step": 7837 + }, + { + "epoch": 17.4543429844098, + "loss": 0.363969087600708, + "loss_ce": 7.749867654638365e-05, + "loss_iou": 0.150390625, + "loss_num": 0.01251220703125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 439142152, + "step": 7837 + }, + { + "epoch": 17.456570155902003, + "grad_norm": 18.100412368774414, + "learning_rate": 1e-06, + "loss": 0.4347, + "num_input_tokens_seen": 439198644, + "step": 7838 + }, + { + "epoch": 17.456570155902003, + "loss": 0.47042351961135864, + "loss_ce": 8.66147456690669e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01422119140625, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 439198644, + "step": 7838 + }, + { + "epoch": 17.45879732739421, + "grad_norm": 17.923023223876953, + "learning_rate": 1e-06, + "loss": 0.5361, + "num_input_tokens_seen": 439253288, + "step": 7839 + }, + { + "epoch": 17.45879732739421, + "loss": 0.31425806879997253, + "loss_ce": 0.0004763224278576672, + "loss_iou": 0.140625, + "loss_num": 0.006500244140625, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 439253288, + "step": 7839 + }, + { + "epoch": 17.461024498886413, + "grad_norm": 19.96112632751465, + "learning_rate": 1e-06, + "loss": 0.4533, + "num_input_tokens_seen": 439308972, + "step": 7840 + }, + { + "epoch": 17.461024498886413, + "loss": 0.40413880348205566, + "loss_ce": 8.605894981883466e-05, + "loss_iou": 0.1875, + "loss_num": 0.00567626953125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 439308972, + "step": 7840 + }, + { + "epoch": 17.463251670378618, + "grad_norm": 15.366341590881348, + "learning_rate": 1e-06, + "loss": 0.2831, + "num_input_tokens_seen": 439367272, + "step": 7841 + }, + { + "epoch": 17.463251670378618, + "loss": 0.33577966690063477, + "loss_ce": 8.628957584733143e-05, + "loss_iou": 0.138671875, + "loss_num": 0.0115966796875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 439367272, + "step": 7841 + }, + { + "epoch": 17.465478841870823, + "grad_norm": 15.852892875671387, + "learning_rate": 1e-06, + "loss": 0.5239, + "num_input_tokens_seen": 439424584, + "step": 7842 + }, + { + "epoch": 17.465478841870823, + "loss": 0.6492735147476196, + "loss_ce": 0.00010354960249969736, + "loss_iou": 0.279296875, + "loss_num": 0.018310546875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 439424584, + "step": 7842 + }, + { + "epoch": 17.467706013363028, + "grad_norm": 14.707501411437988, + "learning_rate": 1e-06, + "loss": 0.3847, + "num_input_tokens_seen": 439479788, + "step": 7843 + }, + { + "epoch": 17.467706013363028, + "loss": 0.31831464171409607, + "loss_ce": 7.733616075711325e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.00958251953125, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 439479788, + "step": 7843 + }, + { + "epoch": 17.469933184855233, + "grad_norm": 42.824195861816406, + "learning_rate": 1e-06, + "loss": 0.4873, + "num_input_tokens_seen": 439536572, + "step": 7844 + }, + { + "epoch": 17.469933184855233, + "loss": 0.3835739493370056, + "loss_ce": 9.006427717395127e-05, + "loss_iou": 0.177734375, + "loss_num": 0.005767822265625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 439536572, + "step": 7844 + }, + { + "epoch": 17.472160356347437, + "grad_norm": 14.055753707885742, + "learning_rate": 1e-06, + "loss": 0.564, + "num_input_tokens_seen": 439594084, + "step": 7845 + }, + { + "epoch": 17.472160356347437, + "loss": 0.6532701253890991, + "loss_ce": 7.185361755546182e-05, + "loss_iou": 0.265625, + "loss_num": 0.0245361328125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 439594084, + "step": 7845 + }, + { + "epoch": 17.474387527839642, + "grad_norm": 24.9837589263916, + "learning_rate": 1e-06, + "loss": 0.4575, + "num_input_tokens_seen": 439649312, + "step": 7846 + }, + { + "epoch": 17.474387527839642, + "loss": 0.45571112632751465, + "loss_ce": 0.000144733494380489, + "loss_iou": 0.1845703125, + "loss_num": 0.0174560546875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 439649312, + "step": 7846 + }, + { + "epoch": 17.476614699331847, + "grad_norm": 12.236130714416504, + "learning_rate": 1e-06, + "loss": 0.3456, + "num_input_tokens_seen": 439705312, + "step": 7847 + }, + { + "epoch": 17.476614699331847, + "loss": 0.4908040165901184, + "loss_ce": 8.134340168908238e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.011962890625, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 439705312, + "step": 7847 + }, + { + "epoch": 17.478841870824052, + "grad_norm": 15.341350555419922, + "learning_rate": 1e-06, + "loss": 0.2637, + "num_input_tokens_seen": 439762020, + "step": 7848 + }, + { + "epoch": 17.478841870824052, + "loss": 0.2432081252336502, + "loss_ce": 0.00010509882849873975, + "loss_iou": 0.10302734375, + "loss_num": 0.00750732421875, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 439762020, + "step": 7848 + }, + { + "epoch": 17.481069042316257, + "grad_norm": 20.993581771850586, + "learning_rate": 1e-06, + "loss": 0.3698, + "num_input_tokens_seen": 439818004, + "step": 7849 + }, + { + "epoch": 17.481069042316257, + "loss": 0.41051971912384033, + "loss_ce": 0.00011930659093195572, + "loss_iou": 0.1904296875, + "loss_num": 0.00567626953125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 439818004, + "step": 7849 + }, + { + "epoch": 17.48329621380846, + "grad_norm": 26.052579879760742, + "learning_rate": 1e-06, + "loss": 0.4034, + "num_input_tokens_seen": 439876168, + "step": 7850 + }, + { + "epoch": 17.48329621380846, + "loss": 0.42060598731040955, + "loss_ce": 7.376498251687735e-05, + "loss_iou": 0.1875, + "loss_num": 0.0091552734375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 439876168, + "step": 7850 + }, + { + "epoch": 17.485523385300667, + "grad_norm": 15.98454475402832, + "learning_rate": 1e-06, + "loss": 0.4183, + "num_input_tokens_seen": 439929952, + "step": 7851 + }, + { + "epoch": 17.485523385300667, + "loss": 0.3435441553592682, + "loss_ce": 6.880345608806238e-05, + "loss_iou": 0.142578125, + "loss_num": 0.01171875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 439929952, + "step": 7851 + }, + { + "epoch": 17.48775055679287, + "grad_norm": 27.797683715820312, + "learning_rate": 1e-06, + "loss": 0.3736, + "num_input_tokens_seen": 439986488, + "step": 7852 + }, + { + "epoch": 17.48775055679287, + "loss": 0.3528550863265991, + "loss_ce": 7.186994480434805e-05, + "loss_iou": 0.162109375, + "loss_num": 0.005645751953125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 439986488, + "step": 7852 + }, + { + "epoch": 17.489977728285076, + "grad_norm": 17.55838966369629, + "learning_rate": 1e-06, + "loss": 0.4012, + "num_input_tokens_seen": 440043948, + "step": 7853 + }, + { + "epoch": 17.489977728285076, + "loss": 0.28866440057754517, + "loss_ce": 9.020272409543395e-05, + "loss_iou": 0.12890625, + "loss_num": 0.006134033203125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 440043948, + "step": 7853 + }, + { + "epoch": 17.49220489977728, + "grad_norm": 16.255395889282227, + "learning_rate": 1e-06, + "loss": 0.5026, + "num_input_tokens_seen": 440101172, + "step": 7854 + }, + { + "epoch": 17.49220489977728, + "loss": 0.4820084571838379, + "loss_ce": 7.487165566999465e-05, + "loss_iou": 0.220703125, + "loss_num": 0.00836181640625, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 440101172, + "step": 7854 + }, + { + "epoch": 17.494432071269486, + "grad_norm": 13.98792839050293, + "learning_rate": 1e-06, + "loss": 0.3042, + "num_input_tokens_seen": 440158728, + "step": 7855 + }, + { + "epoch": 17.494432071269486, + "loss": 0.27856212854385376, + "loss_ce": 8.1585232692305e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.00457763671875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 440158728, + "step": 7855 + }, + { + "epoch": 17.49665924276169, + "grad_norm": 15.809954643249512, + "learning_rate": 1e-06, + "loss": 0.4814, + "num_input_tokens_seen": 440214632, + "step": 7856 + }, + { + "epoch": 17.49665924276169, + "loss": 0.5641576647758484, + "loss_ce": 7.077249756548554e-05, + "loss_iou": 0.20703125, + "loss_num": 0.030029296875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 440214632, + "step": 7856 + }, + { + "epoch": 17.498886414253896, + "grad_norm": 19.271411895751953, + "learning_rate": 1e-06, + "loss": 0.3269, + "num_input_tokens_seen": 440273540, + "step": 7857 + }, + { + "epoch": 17.498886414253896, + "loss": 0.36080411076545715, + "loss_ce": 8.634180994704366e-05, + "loss_iou": 0.15234375, + "loss_num": 0.01123046875, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 440273540, + "step": 7857 + }, + { + "epoch": 17.501113585746104, + "grad_norm": 28.240005493164062, + "learning_rate": 1e-06, + "loss": 0.3847, + "num_input_tokens_seen": 440328284, + "step": 7858 + }, + { + "epoch": 17.501113585746104, + "loss": 0.3158857226371765, + "loss_ce": 8.983007137430832e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.006378173828125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 440328284, + "step": 7858 + }, + { + "epoch": 17.50334075723831, + "grad_norm": 21.047122955322266, + "learning_rate": 1e-06, + "loss": 0.566, + "num_input_tokens_seen": 440384876, + "step": 7859 + }, + { + "epoch": 17.50334075723831, + "loss": 0.5007518529891968, + "loss_ce": 0.00014154997188597918, + "loss_iou": 0.1943359375, + "loss_num": 0.0225830078125, + "loss_xval": 0.5, + "num_input_tokens_seen": 440384876, + "step": 7859 + }, + { + "epoch": 17.505567928730514, + "grad_norm": 14.648688316345215, + "learning_rate": 1e-06, + "loss": 0.3644, + "num_input_tokens_seen": 440440236, + "step": 7860 + }, + { + "epoch": 17.505567928730514, + "loss": 0.33874061703681946, + "loss_ce": 0.00020912080071866512, + "loss_iou": 0.1484375, + "loss_num": 0.0081787109375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 440440236, + "step": 7860 + }, + { + "epoch": 17.50779510022272, + "grad_norm": 14.066313743591309, + "learning_rate": 1e-06, + "loss": 0.3047, + "num_input_tokens_seen": 440495832, + "step": 7861 + }, + { + "epoch": 17.50779510022272, + "loss": 0.2872577905654907, + "loss_ce": 8.740053453948349e-05, + "loss_iou": 0.11474609375, + "loss_num": 0.011474609375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 440495832, + "step": 7861 + }, + { + "epoch": 17.510022271714924, + "grad_norm": 14.307774543762207, + "learning_rate": 1e-06, + "loss": 0.3997, + "num_input_tokens_seen": 440551848, + "step": 7862 + }, + { + "epoch": 17.510022271714924, + "loss": 0.47919386625289917, + "loss_ce": 6.788225437048823e-05, + "loss_iou": 0.197265625, + "loss_num": 0.01708984375, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 440551848, + "step": 7862 + }, + { + "epoch": 17.51224944320713, + "grad_norm": 21.205509185791016, + "learning_rate": 1e-06, + "loss": 0.359, + "num_input_tokens_seen": 440605772, + "step": 7863 + }, + { + "epoch": 17.51224944320713, + "loss": 0.2875545918941498, + "loss_ce": 7.898983312770724e-05, + "loss_iou": 0.125, + "loss_num": 0.007415771484375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 440605772, + "step": 7863 + }, + { + "epoch": 17.514476614699333, + "grad_norm": 17.232234954833984, + "learning_rate": 1e-06, + "loss": 0.4147, + "num_input_tokens_seen": 440664340, + "step": 7864 + }, + { + "epoch": 17.514476614699333, + "loss": 0.43732959032058716, + "loss_ce": 7.375919085461646e-05, + "loss_iou": 0.193359375, + "loss_num": 0.01025390625, + "loss_xval": 0.4375, + "num_input_tokens_seen": 440664340, + "step": 7864 + }, + { + "epoch": 17.51670378619154, + "grad_norm": 13.662554740905762, + "learning_rate": 1e-06, + "loss": 0.351, + "num_input_tokens_seen": 440720316, + "step": 7865 + }, + { + "epoch": 17.51670378619154, + "loss": 0.3087965250015259, + "loss_ce": 8.073220669757575e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.010498046875, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 440720316, + "step": 7865 + }, + { + "epoch": 17.518930957683743, + "grad_norm": 27.160282135009766, + "learning_rate": 1e-06, + "loss": 0.4684, + "num_input_tokens_seen": 440777844, + "step": 7866 + }, + { + "epoch": 17.518930957683743, + "loss": 0.5026195049285889, + "loss_ce": 8.654060366097838e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.01409912109375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 440777844, + "step": 7866 + }, + { + "epoch": 17.521158129175948, + "grad_norm": 16.337854385375977, + "learning_rate": 1e-06, + "loss": 0.4732, + "num_input_tokens_seen": 440835848, + "step": 7867 + }, + { + "epoch": 17.521158129175948, + "loss": 0.5095036625862122, + "loss_ce": 0.00010422736522741616, + "loss_iou": 0.2138671875, + "loss_num": 0.016357421875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 440835848, + "step": 7867 + }, + { + "epoch": 17.523385300668153, + "grad_norm": 16.995683670043945, + "learning_rate": 1e-06, + "loss": 0.4265, + "num_input_tokens_seen": 440894060, + "step": 7868 + }, + { + "epoch": 17.523385300668153, + "loss": 0.2940109968185425, + "loss_ce": 6.568758544744924e-05, + "loss_iou": 0.126953125, + "loss_num": 0.0081787109375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 440894060, + "step": 7868 + }, + { + "epoch": 17.525612472160358, + "grad_norm": 19.353191375732422, + "learning_rate": 1e-06, + "loss": 0.4849, + "num_input_tokens_seen": 440950212, + "step": 7869 + }, + { + "epoch": 17.525612472160358, + "loss": 0.3159940838813782, + "loss_ce": 7.612561603309587e-05, + "loss_iou": 0.125, + "loss_num": 0.01318359375, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 440950212, + "step": 7869 + }, + { + "epoch": 17.527839643652563, + "grad_norm": 29.26142692565918, + "learning_rate": 1e-06, + "loss": 0.5345, + "num_input_tokens_seen": 441005288, + "step": 7870 + }, + { + "epoch": 17.527839643652563, + "loss": 0.500335693359375, + "loss_ce": 9.152606799034402e-05, + "loss_iou": 0.2109375, + "loss_num": 0.015625, + "loss_xval": 0.5, + "num_input_tokens_seen": 441005288, + "step": 7870 + }, + { + "epoch": 17.530066815144767, + "grad_norm": 21.974380493164062, + "learning_rate": 1e-06, + "loss": 0.4412, + "num_input_tokens_seen": 441060244, + "step": 7871 + }, + { + "epoch": 17.530066815144767, + "loss": 0.42531251907348633, + "loss_ce": 8.057255035964772e-05, + "loss_iou": 0.19140625, + "loss_num": 0.00848388671875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 441060244, + "step": 7871 + }, + { + "epoch": 17.532293986636972, + "grad_norm": 12.20707893371582, + "learning_rate": 1e-06, + "loss": 0.3677, + "num_input_tokens_seen": 441116740, + "step": 7872 + }, + { + "epoch": 17.532293986636972, + "loss": 0.22715777158737183, + "loss_ce": 6.883872265461832e-05, + "loss_iou": 0.0908203125, + "loss_num": 0.0091552734375, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 441116740, + "step": 7872 + }, + { + "epoch": 17.534521158129177, + "grad_norm": 55.13029479980469, + "learning_rate": 1e-06, + "loss": 0.4155, + "num_input_tokens_seen": 441171904, + "step": 7873 + }, + { + "epoch": 17.534521158129177, + "loss": 0.3325577676296234, + "loss_ce": 9.927057544700801e-05, + "loss_iou": 0.140625, + "loss_num": 0.01025390625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 441171904, + "step": 7873 + }, + { + "epoch": 17.536748329621382, + "grad_norm": 16.75377082824707, + "learning_rate": 1e-06, + "loss": 0.4681, + "num_input_tokens_seen": 441228244, + "step": 7874 + }, + { + "epoch": 17.536748329621382, + "loss": 0.4394185543060303, + "loss_ce": 8.750054985284805e-05, + "loss_iou": 0.181640625, + "loss_num": 0.01544189453125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 441228244, + "step": 7874 + }, + { + "epoch": 17.538975501113587, + "grad_norm": 15.606276512145996, + "learning_rate": 1e-06, + "loss": 0.6399, + "num_input_tokens_seen": 441283016, + "step": 7875 + }, + { + "epoch": 17.538975501113587, + "loss": 0.5754181742668152, + "loss_ce": 0.0001007895843940787, + "loss_iou": 0.255859375, + "loss_num": 0.01251220703125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 441283016, + "step": 7875 + }, + { + "epoch": 17.54120267260579, + "grad_norm": 12.660006523132324, + "learning_rate": 1e-06, + "loss": 0.4726, + "num_input_tokens_seen": 441340588, + "step": 7876 + }, + { + "epoch": 17.54120267260579, + "loss": 0.37886592745780945, + "loss_ce": 8.173068636097014e-05, + "loss_iou": 0.15625, + "loss_num": 0.0133056640625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 441340588, + "step": 7876 + }, + { + "epoch": 17.543429844097997, + "grad_norm": 28.50706672668457, + "learning_rate": 1e-06, + "loss": 0.5147, + "num_input_tokens_seen": 441396552, + "step": 7877 + }, + { + "epoch": 17.543429844097997, + "loss": 0.40114733576774597, + "loss_ce": 8.532906213076785e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00823974609375, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 441396552, + "step": 7877 + }, + { + "epoch": 17.5456570155902, + "grad_norm": 18.953811645507812, + "learning_rate": 1e-06, + "loss": 0.3756, + "num_input_tokens_seen": 441454428, + "step": 7878 + }, + { + "epoch": 17.5456570155902, + "loss": 0.40304529666900635, + "loss_ce": 9.119759488385171e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.01226806640625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 441454428, + "step": 7878 + }, + { + "epoch": 17.547884187082406, + "grad_norm": 40.04744338989258, + "learning_rate": 1e-06, + "loss": 0.3407, + "num_input_tokens_seen": 441509888, + "step": 7879 + }, + { + "epoch": 17.547884187082406, + "loss": 0.3022136688232422, + "loss_ce": 8.962298306869343e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.00775146484375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 441509888, + "step": 7879 + }, + { + "epoch": 17.55011135857461, + "grad_norm": 23.62949562072754, + "learning_rate": 1e-06, + "loss": 0.3179, + "num_input_tokens_seen": 441567796, + "step": 7880 + }, + { + "epoch": 17.55011135857461, + "loss": 0.26015806198120117, + "loss_ce": 8.726240776013583e-05, + "loss_iou": 0.11328125, + "loss_num": 0.006805419921875, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 441567796, + "step": 7880 + }, + { + "epoch": 17.552338530066816, + "grad_norm": 11.53111743927002, + "learning_rate": 1e-06, + "loss": 0.5086, + "num_input_tokens_seen": 441624796, + "step": 7881 + }, + { + "epoch": 17.552338530066816, + "loss": 0.4687040448188782, + "loss_ce": 7.612221816089004e-05, + "loss_iou": 0.1953125, + "loss_num": 0.01544189453125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 441624796, + "step": 7881 + }, + { + "epoch": 17.55456570155902, + "grad_norm": 18.621740341186523, + "learning_rate": 1e-06, + "loss": 0.4156, + "num_input_tokens_seen": 441681052, + "step": 7882 + }, + { + "epoch": 17.55456570155902, + "loss": 0.4146304726600647, + "loss_ce": 7.969325815793127e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.006622314453125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 441681052, + "step": 7882 + }, + { + "epoch": 17.556792873051226, + "grad_norm": 19.41095542907715, + "learning_rate": 1e-06, + "loss": 0.426, + "num_input_tokens_seen": 441736492, + "step": 7883 + }, + { + "epoch": 17.556792873051226, + "loss": 0.48913177847862244, + "loss_ce": 0.00011811777949333191, + "loss_iou": 0.1943359375, + "loss_num": 0.0198974609375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 441736492, + "step": 7883 + }, + { + "epoch": 17.55902004454343, + "grad_norm": 225.51150512695312, + "learning_rate": 1e-06, + "loss": 0.4115, + "num_input_tokens_seen": 441791384, + "step": 7884 + }, + { + "epoch": 17.55902004454343, + "loss": 0.42270350456237793, + "loss_ce": 9.607183164916933e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.006683349609375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 441791384, + "step": 7884 + }, + { + "epoch": 17.561247216035635, + "grad_norm": 22.389850616455078, + "learning_rate": 1e-06, + "loss": 0.4788, + "num_input_tokens_seen": 441846172, + "step": 7885 + }, + { + "epoch": 17.561247216035635, + "loss": 0.44719523191452026, + "loss_ce": 0.00011274641292402521, + "loss_iou": 0.19140625, + "loss_num": 0.01275634765625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 441846172, + "step": 7885 + }, + { + "epoch": 17.56347438752784, + "grad_norm": 25.177894592285156, + "learning_rate": 1e-06, + "loss": 0.4156, + "num_input_tokens_seen": 441901144, + "step": 7886 + }, + { + "epoch": 17.56347438752784, + "loss": 0.5812680721282959, + "loss_ce": 9.13147086976096e-05, + "loss_iou": 0.255859375, + "loss_num": 0.014404296875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 441901144, + "step": 7886 + }, + { + "epoch": 17.565701559020045, + "grad_norm": 20.27315902709961, + "learning_rate": 1e-06, + "loss": 0.3407, + "num_input_tokens_seen": 441956844, + "step": 7887 + }, + { + "epoch": 17.565701559020045, + "loss": 0.32326167821884155, + "loss_ce": 8.051642362261191e-05, + "loss_iou": 0.142578125, + "loss_num": 0.007537841796875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 441956844, + "step": 7887 + }, + { + "epoch": 17.56792873051225, + "grad_norm": 15.661298751831055, + "learning_rate": 1e-06, + "loss": 0.359, + "num_input_tokens_seen": 442013600, + "step": 7888 + }, + { + "epoch": 17.56792873051225, + "loss": 0.3030818700790405, + "loss_ce": 0.00010335086699342355, + "loss_iou": 0.1337890625, + "loss_num": 0.0069580078125, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 442013600, + "step": 7888 + }, + { + "epoch": 17.570155902004455, + "grad_norm": 16.606544494628906, + "learning_rate": 1e-06, + "loss": 0.3319, + "num_input_tokens_seen": 442070068, + "step": 7889 + }, + { + "epoch": 17.570155902004455, + "loss": 0.34052973985671997, + "loss_ce": 7.563138206023723e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.01226806640625, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 442070068, + "step": 7889 + }, + { + "epoch": 17.57238307349666, + "grad_norm": 17.5957088470459, + "learning_rate": 1e-06, + "loss": 0.307, + "num_input_tokens_seen": 442125372, + "step": 7890 + }, + { + "epoch": 17.57238307349666, + "loss": 0.29927778244018555, + "loss_ce": 8.342567889485508e-05, + "loss_iou": 0.125, + "loss_num": 0.00970458984375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 442125372, + "step": 7890 + }, + { + "epoch": 17.574610244988865, + "grad_norm": 22.874229431152344, + "learning_rate": 1e-06, + "loss": 0.3021, + "num_input_tokens_seen": 442180908, + "step": 7891 + }, + { + "epoch": 17.574610244988865, + "loss": 0.34810495376586914, + "loss_ce": 8.246798097388819e-05, + "loss_iou": 0.1484375, + "loss_num": 0.01007080078125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 442180908, + "step": 7891 + }, + { + "epoch": 17.57683741648107, + "grad_norm": 12.980911254882812, + "learning_rate": 1e-06, + "loss": 0.5465, + "num_input_tokens_seen": 442237344, + "step": 7892 + }, + { + "epoch": 17.57683741648107, + "loss": 0.6626755595207214, + "loss_ce": 7.78659014031291e-05, + "loss_iou": 0.267578125, + "loss_num": 0.025390625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 442237344, + "step": 7892 + }, + { + "epoch": 17.579064587973274, + "grad_norm": 16.26838493347168, + "learning_rate": 1e-06, + "loss": 0.3547, + "num_input_tokens_seen": 442291704, + "step": 7893 + }, + { + "epoch": 17.579064587973274, + "loss": 0.18787457048892975, + "loss_ce": 6.940308230696246e-05, + "loss_iou": 0.07763671875, + "loss_num": 0.006500244140625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 442291704, + "step": 7893 + }, + { + "epoch": 17.58129175946548, + "grad_norm": 19.98689079284668, + "learning_rate": 1e-06, + "loss": 0.5964, + "num_input_tokens_seen": 442348288, + "step": 7894 + }, + { + "epoch": 17.58129175946548, + "loss": 0.5989159345626831, + "loss_ce": 0.00028316857060417533, + "loss_iou": 0.263671875, + "loss_num": 0.0142822265625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 442348288, + "step": 7894 + }, + { + "epoch": 17.583518930957684, + "grad_norm": 16.321231842041016, + "learning_rate": 1e-06, + "loss": 0.354, + "num_input_tokens_seen": 442404364, + "step": 7895 + }, + { + "epoch": 17.583518930957684, + "loss": 0.3744066655635834, + "loss_ce": 7.803218613844365e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00750732421875, + "loss_xval": 0.375, + "num_input_tokens_seen": 442404364, + "step": 7895 + }, + { + "epoch": 17.58574610244989, + "grad_norm": 18.746238708496094, + "learning_rate": 1e-06, + "loss": 0.3205, + "num_input_tokens_seen": 442461344, + "step": 7896 + }, + { + "epoch": 17.58574610244989, + "loss": 0.2867514491081238, + "loss_ce": 6.934157863724977e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.007110595703125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 442461344, + "step": 7896 + }, + { + "epoch": 17.587973273942094, + "grad_norm": 25.406570434570312, + "learning_rate": 1e-06, + "loss": 0.4222, + "num_input_tokens_seen": 442516584, + "step": 7897 + }, + { + "epoch": 17.587973273942094, + "loss": 0.33216458559036255, + "loss_ce": 7.232959615066648e-05, + "loss_iou": 0.150390625, + "loss_num": 0.006195068359375, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 442516584, + "step": 7897 + }, + { + "epoch": 17.5902004454343, + "grad_norm": 18.10183334350586, + "learning_rate": 1e-06, + "loss": 0.3157, + "num_input_tokens_seen": 442573824, + "step": 7898 + }, + { + "epoch": 17.5902004454343, + "loss": 0.22795072197914124, + "loss_ce": 7.596083742100745e-05, + "loss_iou": 0.0966796875, + "loss_num": 0.0068359375, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 442573824, + "step": 7898 + }, + { + "epoch": 17.592427616926503, + "grad_norm": 38.653526306152344, + "learning_rate": 1e-06, + "loss": 0.5042, + "num_input_tokens_seen": 442629168, + "step": 7899 + }, + { + "epoch": 17.592427616926503, + "loss": 0.3851657509803772, + "loss_ce": 9.493608376942575e-05, + "loss_iou": 0.17578125, + "loss_num": 0.00677490234375, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 442629168, + "step": 7899 + }, + { + "epoch": 17.59465478841871, + "grad_norm": 32.544410705566406, + "learning_rate": 1e-06, + "loss": 0.5607, + "num_input_tokens_seen": 442683144, + "step": 7900 + }, + { + "epoch": 17.59465478841871, + "loss": 0.556228756904602, + "loss_ce": 7.639994146302342e-05, + "loss_iou": 0.25390625, + "loss_num": 0.010009765625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 442683144, + "step": 7900 + }, + { + "epoch": 17.596881959910913, + "grad_norm": 19.76438331604004, + "learning_rate": 1e-06, + "loss": 0.5191, + "num_input_tokens_seen": 442737092, + "step": 7901 + }, + { + "epoch": 17.596881959910913, + "loss": 0.436367928981781, + "loss_ce": 7.337699935305864e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.008056640625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 442737092, + "step": 7901 + }, + { + "epoch": 17.599109131403118, + "grad_norm": 25.040283203125, + "learning_rate": 1e-06, + "loss": 0.5854, + "num_input_tokens_seen": 442790552, + "step": 7902 + }, + { + "epoch": 17.599109131403118, + "loss": 0.5836904048919678, + "loss_ce": 7.226417073979974e-05, + "loss_iou": 0.26953125, + "loss_num": 0.00897216796875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 442790552, + "step": 7902 + }, + { + "epoch": 17.601336302895323, + "grad_norm": 25.69877052307129, + "learning_rate": 1e-06, + "loss": 0.4327, + "num_input_tokens_seen": 442845024, + "step": 7903 + }, + { + "epoch": 17.601336302895323, + "loss": 0.28219154477119446, + "loss_ce": 8.705261279828846e-05, + "loss_iou": 0.12158203125, + "loss_num": 0.00787353515625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 442845024, + "step": 7903 + }, + { + "epoch": 17.603563474387528, + "grad_norm": 22.128719329833984, + "learning_rate": 1e-06, + "loss": 0.3273, + "num_input_tokens_seen": 442901976, + "step": 7904 + }, + { + "epoch": 17.603563474387528, + "loss": 0.382587730884552, + "loss_ce": 8.040809188969433e-05, + "loss_iou": 0.169921875, + "loss_num": 0.00848388671875, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 442901976, + "step": 7904 + }, + { + "epoch": 17.605790645879733, + "grad_norm": 21.455659866333008, + "learning_rate": 1e-06, + "loss": 0.5433, + "num_input_tokens_seen": 442959192, + "step": 7905 + }, + { + "epoch": 17.605790645879733, + "loss": 0.4209858179092407, + "loss_ce": 8.739200711715966e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.00799560546875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 442959192, + "step": 7905 + }, + { + "epoch": 17.608017817371937, + "grad_norm": 19.420513153076172, + "learning_rate": 1e-06, + "loss": 0.3687, + "num_input_tokens_seen": 443016136, + "step": 7906 + }, + { + "epoch": 17.608017817371937, + "loss": 0.3827753961086273, + "loss_ce": 8.494692883687094e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0135498046875, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 443016136, + "step": 7906 + }, + { + "epoch": 17.610244988864142, + "grad_norm": 15.976387023925781, + "learning_rate": 1e-06, + "loss": 0.6103, + "num_input_tokens_seen": 443072296, + "step": 7907 + }, + { + "epoch": 17.610244988864142, + "loss": 0.8074872493743896, + "loss_ce": 0.0001141867833212018, + "loss_iou": 0.310546875, + "loss_num": 0.037353515625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 443072296, + "step": 7907 + }, + { + "epoch": 17.612472160356347, + "grad_norm": 17.13764190673828, + "learning_rate": 1e-06, + "loss": 0.3673, + "num_input_tokens_seen": 443128500, + "step": 7908 + }, + { + "epoch": 17.612472160356347, + "loss": 0.4594786763191223, + "loss_ce": 0.00012808736937586218, + "loss_iou": 0.181640625, + "loss_num": 0.019287109375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 443128500, + "step": 7908 + }, + { + "epoch": 17.614699331848552, + "grad_norm": 17.32769203186035, + "learning_rate": 1e-06, + "loss": 0.3735, + "num_input_tokens_seen": 443183760, + "step": 7909 + }, + { + "epoch": 17.614699331848552, + "loss": 0.44984328746795654, + "loss_ce": 7.522152736783028e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01068115234375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 443183760, + "step": 7909 + }, + { + "epoch": 17.616926503340757, + "grad_norm": 22.6795711517334, + "learning_rate": 1e-06, + "loss": 0.3399, + "num_input_tokens_seen": 443237420, + "step": 7910 + }, + { + "epoch": 17.616926503340757, + "loss": 0.4001952111721039, + "loss_ce": 0.0001708113995846361, + "loss_iou": 0.1787109375, + "loss_num": 0.00836181640625, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 443237420, + "step": 7910 + }, + { + "epoch": 17.619153674832962, + "grad_norm": 74.46725463867188, + "learning_rate": 1e-06, + "loss": 0.504, + "num_input_tokens_seen": 443295776, + "step": 7911 + }, + { + "epoch": 17.619153674832962, + "loss": 0.5105748772621155, + "loss_ce": 7.682420255150646e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.013427734375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 443295776, + "step": 7911 + }, + { + "epoch": 17.621380846325167, + "grad_norm": 14.628600120544434, + "learning_rate": 1e-06, + "loss": 0.3959, + "num_input_tokens_seen": 443352484, + "step": 7912 + }, + { + "epoch": 17.621380846325167, + "loss": 0.5070550441741943, + "loss_ce": 9.699978545540944e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.01043701171875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 443352484, + "step": 7912 + }, + { + "epoch": 17.62360801781737, + "grad_norm": 19.935930252075195, + "learning_rate": 1e-06, + "loss": 0.4519, + "num_input_tokens_seen": 443406844, + "step": 7913 + }, + { + "epoch": 17.62360801781737, + "loss": 0.47246021032333374, + "loss_ce": 7.862341590225697e-05, + "loss_iou": 0.2109375, + "loss_num": 0.01007080078125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 443406844, + "step": 7913 + }, + { + "epoch": 17.625835189309576, + "grad_norm": 18.648386001586914, + "learning_rate": 1e-06, + "loss": 0.37, + "num_input_tokens_seen": 443463412, + "step": 7914 + }, + { + "epoch": 17.625835189309576, + "loss": 0.4554082155227661, + "loss_ce": 8.59397478052415e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.01043701171875, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 443463412, + "step": 7914 + }, + { + "epoch": 17.62806236080178, + "grad_norm": 11.401290893554688, + "learning_rate": 1e-06, + "loss": 0.4383, + "num_input_tokens_seen": 443517876, + "step": 7915 + }, + { + "epoch": 17.62806236080178, + "loss": 0.4097170829772949, + "loss_ce": 7.962982635945082e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.0113525390625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 443517876, + "step": 7915 + }, + { + "epoch": 17.630289532293986, + "grad_norm": 15.960351943969727, + "learning_rate": 1e-06, + "loss": 0.4956, + "num_input_tokens_seen": 443575536, + "step": 7916 + }, + { + "epoch": 17.630289532293986, + "loss": 0.5167785882949829, + "loss_ce": 0.00011598135461099446, + "loss_iou": 0.2158203125, + "loss_num": 0.01708984375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 443575536, + "step": 7916 + }, + { + "epoch": 17.63251670378619, + "grad_norm": 21.701749801635742, + "learning_rate": 1e-06, + "loss": 0.4341, + "num_input_tokens_seen": 443631704, + "step": 7917 + }, + { + "epoch": 17.63251670378619, + "loss": 0.501663863658905, + "loss_ce": 7.692172948736697e-05, + "loss_iou": 0.2265625, + "loss_num": 0.009765625, + "loss_xval": 0.5, + "num_input_tokens_seen": 443631704, + "step": 7917 + }, + { + "epoch": 17.634743875278396, + "grad_norm": 14.116578102111816, + "learning_rate": 1e-06, + "loss": 0.4556, + "num_input_tokens_seen": 443688408, + "step": 7918 + }, + { + "epoch": 17.634743875278396, + "loss": 0.4800468385219574, + "loss_ce": 6.637441401835531e-05, + "loss_iou": 0.197265625, + "loss_num": 0.016845703125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 443688408, + "step": 7918 + }, + { + "epoch": 17.6369710467706, + "grad_norm": 83.376708984375, + "learning_rate": 1e-06, + "loss": 0.4785, + "num_input_tokens_seen": 443745244, + "step": 7919 + }, + { + "epoch": 17.6369710467706, + "loss": 0.38171082735061646, + "loss_ce": 0.00011904077837243676, + "loss_iou": 0.1650390625, + "loss_num": 0.01031494140625, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 443745244, + "step": 7919 + }, + { + "epoch": 17.639198218262806, + "grad_norm": 18.27570152282715, + "learning_rate": 1e-06, + "loss": 0.4711, + "num_input_tokens_seen": 443804444, + "step": 7920 + }, + { + "epoch": 17.639198218262806, + "loss": 0.5395940542221069, + "loss_ce": 0.00010429859685245901, + "loss_iou": 0.2265625, + "loss_num": 0.01708984375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 443804444, + "step": 7920 + }, + { + "epoch": 17.64142538975501, + "grad_norm": 18.891740798950195, + "learning_rate": 1e-06, + "loss": 0.4308, + "num_input_tokens_seen": 443860448, + "step": 7921 + }, + { + "epoch": 17.64142538975501, + "loss": 0.2655089795589447, + "loss_ce": 6.710184970870614e-05, + "loss_iou": 0.11962890625, + "loss_num": 0.005157470703125, + "loss_xval": 0.265625, + "num_input_tokens_seen": 443860448, + "step": 7921 + }, + { + "epoch": 17.643652561247215, + "grad_norm": 21.501697540283203, + "learning_rate": 1e-06, + "loss": 0.2732, + "num_input_tokens_seen": 443917392, + "step": 7922 + }, + { + "epoch": 17.643652561247215, + "loss": 0.34542766213417053, + "loss_ce": 9.074142144527286e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.006317138671875, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 443917392, + "step": 7922 + }, + { + "epoch": 17.64587973273942, + "grad_norm": 25.702478408813477, + "learning_rate": 1e-06, + "loss": 0.5289, + "num_input_tokens_seen": 443970952, + "step": 7923 + }, + { + "epoch": 17.64587973273942, + "loss": 0.6104388236999512, + "loss_ce": 8.727777458261698e-05, + "loss_iou": 0.26953125, + "loss_num": 0.01458740234375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 443970952, + "step": 7923 + }, + { + "epoch": 17.648106904231625, + "grad_norm": 17.689945220947266, + "learning_rate": 1e-06, + "loss": 0.4139, + "num_input_tokens_seen": 444025404, + "step": 7924 + }, + { + "epoch": 17.648106904231625, + "loss": 0.3999115824699402, + "loss_ce": 8.553590305382386e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00799560546875, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 444025404, + "step": 7924 + }, + { + "epoch": 17.65033407572383, + "grad_norm": 17.883432388305664, + "learning_rate": 1e-06, + "loss": 0.4983, + "num_input_tokens_seen": 444081664, + "step": 7925 + }, + { + "epoch": 17.65033407572383, + "loss": 0.4602789878845215, + "loss_ce": 7.389666279777884e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.0167236328125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 444081664, + "step": 7925 + }, + { + "epoch": 17.652561247216035, + "grad_norm": 16.05899429321289, + "learning_rate": 1e-06, + "loss": 0.472, + "num_input_tokens_seen": 444141192, + "step": 7926 + }, + { + "epoch": 17.652561247216035, + "loss": 0.3934362232685089, + "loss_ce": 9.515189594822004e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.0103759765625, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 444141192, + "step": 7926 + }, + { + "epoch": 17.65478841870824, + "grad_norm": 26.469680786132812, + "learning_rate": 1e-06, + "loss": 0.4919, + "num_input_tokens_seen": 444197160, + "step": 7927 + }, + { + "epoch": 17.65478841870824, + "loss": 0.4767637252807617, + "loss_ce": 7.915783498901874e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 444197160, + "step": 7927 + }, + { + "epoch": 17.657015590200444, + "grad_norm": 13.821784973144531, + "learning_rate": 1e-06, + "loss": 0.3612, + "num_input_tokens_seen": 444252964, + "step": 7928 + }, + { + "epoch": 17.657015590200444, + "loss": 0.23647625744342804, + "loss_ce": 8.710073598194867e-05, + "loss_iou": 0.10302734375, + "loss_num": 0.006103515625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 444252964, + "step": 7928 + }, + { + "epoch": 17.65924276169265, + "grad_norm": 16.62885284423828, + "learning_rate": 1e-06, + "loss": 0.386, + "num_input_tokens_seen": 444307512, + "step": 7929 + }, + { + "epoch": 17.65924276169265, + "loss": 0.4383907914161682, + "loss_ce": 6.681391096208245e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.020263671875, + "loss_xval": 0.4375, + "num_input_tokens_seen": 444307512, + "step": 7929 + }, + { + "epoch": 17.661469933184854, + "grad_norm": 20.293445587158203, + "learning_rate": 1e-06, + "loss": 0.4577, + "num_input_tokens_seen": 444363292, + "step": 7930 + }, + { + "epoch": 17.661469933184854, + "loss": 0.3440844714641571, + "loss_ce": 9.034699178300798e-05, + "loss_iou": 0.146484375, + "loss_num": 0.01025390625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 444363292, + "step": 7930 + }, + { + "epoch": 17.66369710467706, + "grad_norm": 19.445138931274414, + "learning_rate": 1e-06, + "loss": 0.4071, + "num_input_tokens_seen": 444417980, + "step": 7931 + }, + { + "epoch": 17.66369710467706, + "loss": 0.35793596506118774, + "loss_ce": 8.685909415362403e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.0069580078125, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 444417980, + "step": 7931 + }, + { + "epoch": 17.665924276169264, + "grad_norm": 21.812707901000977, + "learning_rate": 1e-06, + "loss": 0.4571, + "num_input_tokens_seen": 444475180, + "step": 7932 + }, + { + "epoch": 17.665924276169264, + "loss": 0.48831433057785034, + "loss_ce": 9.407131437910721e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.0089111328125, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 444475180, + "step": 7932 + }, + { + "epoch": 17.66815144766147, + "grad_norm": 21.94272804260254, + "learning_rate": 1e-06, + "loss": 0.4738, + "num_input_tokens_seen": 444530092, + "step": 7933 + }, + { + "epoch": 17.66815144766147, + "loss": 0.4068112075328827, + "loss_ce": 7.29096500435844e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.0128173828125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 444530092, + "step": 7933 + }, + { + "epoch": 17.670378619153674, + "grad_norm": 21.521142959594727, + "learning_rate": 1e-06, + "loss": 0.5135, + "num_input_tokens_seen": 444585348, + "step": 7934 + }, + { + "epoch": 17.670378619153674, + "loss": 0.5115725994110107, + "loss_ce": 9.7946947789751e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.01434326171875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 444585348, + "step": 7934 + }, + { + "epoch": 17.67260579064588, + "grad_norm": 14.11042308807373, + "learning_rate": 1e-06, + "loss": 0.2493, + "num_input_tokens_seen": 444641960, + "step": 7935 + }, + { + "epoch": 17.67260579064588, + "loss": 0.23591431975364685, + "loss_ce": 7.447184179909527e-05, + "loss_iou": 0.1064453125, + "loss_num": 0.004547119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 444641960, + "step": 7935 + }, + { + "epoch": 17.674832962138083, + "grad_norm": 23.0136775970459, + "learning_rate": 1e-06, + "loss": 0.4793, + "num_input_tokens_seen": 444695372, + "step": 7936 + }, + { + "epoch": 17.674832962138083, + "loss": 0.3692181408405304, + "loss_ce": 7.750838994979858e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.008544921875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 444695372, + "step": 7936 + }, + { + "epoch": 17.677060133630288, + "grad_norm": 20.372121810913086, + "learning_rate": 1e-06, + "loss": 0.414, + "num_input_tokens_seen": 444750148, + "step": 7937 + }, + { + "epoch": 17.677060133630288, + "loss": 0.34128010272979736, + "loss_ce": 9.356189548270777e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.004547119140625, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 444750148, + "step": 7937 + }, + { + "epoch": 17.679287305122493, + "grad_norm": 23.466459274291992, + "learning_rate": 1e-06, + "loss": 0.3234, + "num_input_tokens_seen": 444809496, + "step": 7938 + }, + { + "epoch": 17.679287305122493, + "loss": 0.36702263355255127, + "loss_ce": 7.92567734606564e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.008056640625, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 444809496, + "step": 7938 + }, + { + "epoch": 17.681514476614698, + "grad_norm": 18.88533592224121, + "learning_rate": 1e-06, + "loss": 0.5686, + "num_input_tokens_seen": 444865560, + "step": 7939 + }, + { + "epoch": 17.681514476614698, + "loss": 0.4967142343521118, + "loss_ce": 0.00013218897220212966, + "loss_iou": 0.2060546875, + "loss_num": 0.0169677734375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 444865560, + "step": 7939 + }, + { + "epoch": 17.683741648106903, + "grad_norm": 31.180578231811523, + "learning_rate": 1e-06, + "loss": 0.5194, + "num_input_tokens_seen": 444921728, + "step": 7940 + }, + { + "epoch": 17.683741648106903, + "loss": 0.3380870223045349, + "loss_ce": 0.0001658698165556416, + "loss_iou": 0.1455078125, + "loss_num": 0.00921630859375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 444921728, + "step": 7940 + }, + { + "epoch": 17.685968819599108, + "grad_norm": 28.334543228149414, + "learning_rate": 1e-06, + "loss": 0.4694, + "num_input_tokens_seen": 444976224, + "step": 7941 + }, + { + "epoch": 17.685968819599108, + "loss": 0.44913750886917114, + "loss_ce": 0.00010187811858486384, + "loss_iou": 0.1953125, + "loss_num": 0.01177978515625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 444976224, + "step": 7941 + }, + { + "epoch": 17.688195991091312, + "grad_norm": 19.259824752807617, + "learning_rate": 1e-06, + "loss": 0.368, + "num_input_tokens_seen": 445031004, + "step": 7942 + }, + { + "epoch": 17.688195991091312, + "loss": 0.35163217782974243, + "loss_ce": 6.968076922930777e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.00750732421875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 445031004, + "step": 7942 + }, + { + "epoch": 17.690423162583517, + "grad_norm": 15.660816192626953, + "learning_rate": 1e-06, + "loss": 0.4662, + "num_input_tokens_seen": 445088476, + "step": 7943 + }, + { + "epoch": 17.690423162583517, + "loss": 0.40098705887794495, + "loss_ce": 0.00010816691064974293, + "loss_iou": 0.185546875, + "loss_num": 0.005859375, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 445088476, + "step": 7943 + }, + { + "epoch": 17.692650334075722, + "grad_norm": 63.67619705200195, + "learning_rate": 1e-06, + "loss": 0.5689, + "num_input_tokens_seen": 445146920, + "step": 7944 + }, + { + "epoch": 17.692650334075722, + "loss": 0.620599091053009, + "loss_ce": 0.00011569763591978699, + "loss_iou": 0.275390625, + "loss_num": 0.01373291015625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 445146920, + "step": 7944 + }, + { + "epoch": 17.694877505567927, + "grad_norm": 29.446853637695312, + "learning_rate": 1e-06, + "loss": 0.4434, + "num_input_tokens_seen": 445199824, + "step": 7945 + }, + { + "epoch": 17.694877505567927, + "loss": 0.44784268736839294, + "loss_ce": 8.878282096702605e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0108642578125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 445199824, + "step": 7945 + }, + { + "epoch": 17.697104677060132, + "grad_norm": 25.64476203918457, + "learning_rate": 1e-06, + "loss": 0.4774, + "num_input_tokens_seen": 445257296, + "step": 7946 + }, + { + "epoch": 17.697104677060132, + "loss": 0.3815593123435974, + "loss_ce": 8.9571054559201e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.0172119140625, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 445257296, + "step": 7946 + }, + { + "epoch": 17.69933184855234, + "grad_norm": 16.938840866088867, + "learning_rate": 1e-06, + "loss": 0.3748, + "num_input_tokens_seen": 445314848, + "step": 7947 + }, + { + "epoch": 17.69933184855234, + "loss": 0.32621848583221436, + "loss_ce": 7.714293315075338e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.0098876953125, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 445314848, + "step": 7947 + }, + { + "epoch": 17.70155902004454, + "grad_norm": 15.301907539367676, + "learning_rate": 1e-06, + "loss": 0.2937, + "num_input_tokens_seen": 445372204, + "step": 7948 + }, + { + "epoch": 17.70155902004454, + "loss": 0.30735015869140625, + "loss_ce": 9.919532749336213e-05, + "loss_iou": 0.138671875, + "loss_num": 0.006103515625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 445372204, + "step": 7948 + }, + { + "epoch": 17.70378619153675, + "grad_norm": 16.031461715698242, + "learning_rate": 1e-06, + "loss": 0.4855, + "num_input_tokens_seen": 445430708, + "step": 7949 + }, + { + "epoch": 17.70378619153675, + "loss": 0.49899452924728394, + "loss_ce": 9.31676768232137e-05, + "loss_iou": 0.18359375, + "loss_num": 0.0262451171875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 445430708, + "step": 7949 + }, + { + "epoch": 17.706013363028955, + "grad_norm": 13.996493339538574, + "learning_rate": 1e-06, + "loss": 0.4567, + "num_input_tokens_seen": 445485268, + "step": 7950 + }, + { + "epoch": 17.706013363028955, + "loss": 0.4317079484462738, + "loss_ce": 6.729987217113376e-05, + "loss_iou": 0.1796875, + "loss_num": 0.01434326171875, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 445485268, + "step": 7950 + }, + { + "epoch": 17.70824053452116, + "grad_norm": 19.51962661743164, + "learning_rate": 1e-06, + "loss": 0.5369, + "num_input_tokens_seen": 445538736, + "step": 7951 + }, + { + "epoch": 17.70824053452116, + "loss": 0.7260493636131287, + "loss_ce": 9.723611583467573e-05, + "loss_iou": 0.310546875, + "loss_num": 0.020751953125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 445538736, + "step": 7951 + }, + { + "epoch": 17.710467706013365, + "grad_norm": 34.36812210083008, + "learning_rate": 1e-06, + "loss": 0.6776, + "num_input_tokens_seen": 445593288, + "step": 7952 + }, + { + "epoch": 17.710467706013365, + "loss": 0.8874142169952393, + "loss_ce": 8.507441089022905e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0185546875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 445593288, + "step": 7952 + }, + { + "epoch": 17.71269487750557, + "grad_norm": 19.97995948791504, + "learning_rate": 1e-06, + "loss": 0.3888, + "num_input_tokens_seen": 445651940, + "step": 7953 + }, + { + "epoch": 17.71269487750557, + "loss": 0.35836881399154663, + "loss_ce": 9.244780812878162e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.01019287109375, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 445651940, + "step": 7953 + }, + { + "epoch": 17.714922048997774, + "grad_norm": 18.99198341369629, + "learning_rate": 1e-06, + "loss": 0.3816, + "num_input_tokens_seen": 445707972, + "step": 7954 + }, + { + "epoch": 17.714922048997774, + "loss": 0.43836966156959534, + "loss_ce": 7.619359530508518e-05, + "loss_iou": 0.193359375, + "loss_num": 0.01043701171875, + "loss_xval": 0.4375, + "num_input_tokens_seen": 445707972, + "step": 7954 + }, + { + "epoch": 17.71714922048998, + "grad_norm": 24.05302619934082, + "learning_rate": 1e-06, + "loss": 0.4304, + "num_input_tokens_seen": 445765012, + "step": 7955 + }, + { + "epoch": 17.71714922048998, + "loss": 0.5598816871643066, + "loss_ce": 0.00012829234765376896, + "loss_iou": 0.24609375, + "loss_num": 0.013427734375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 445765012, + "step": 7955 + }, + { + "epoch": 17.719376391982184, + "grad_norm": 22.349224090576172, + "learning_rate": 1e-06, + "loss": 0.4478, + "num_input_tokens_seen": 445820252, + "step": 7956 + }, + { + "epoch": 17.719376391982184, + "loss": 0.5640408992767334, + "loss_ce": 7.6019496191293e-05, + "loss_iou": 0.2265625, + "loss_num": 0.0220947265625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 445820252, + "step": 7956 + }, + { + "epoch": 17.72160356347439, + "grad_norm": 33.075538635253906, + "learning_rate": 1e-06, + "loss": 0.399, + "num_input_tokens_seen": 445875052, + "step": 7957 + }, + { + "epoch": 17.72160356347439, + "loss": 0.39026761054992676, + "loss_ce": 6.987876258790493e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00860595703125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 445875052, + "step": 7957 + }, + { + "epoch": 17.723830734966594, + "grad_norm": 16.910858154296875, + "learning_rate": 1e-06, + "loss": 0.4148, + "num_input_tokens_seen": 445928940, + "step": 7958 + }, + { + "epoch": 17.723830734966594, + "loss": 0.2792437970638275, + "loss_ce": 6.90067681716755e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.0091552734375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 445928940, + "step": 7958 + }, + { + "epoch": 17.7260579064588, + "grad_norm": 26.71137046813965, + "learning_rate": 1e-06, + "loss": 0.3061, + "num_input_tokens_seen": 445983828, + "step": 7959 + }, + { + "epoch": 17.7260579064588, + "loss": 0.20442020893096924, + "loss_ce": 7.449374243151397e-05, + "loss_iou": 0.0908203125, + "loss_num": 0.004547119140625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 445983828, + "step": 7959 + }, + { + "epoch": 17.728285077951004, + "grad_norm": 18.750337600708008, + "learning_rate": 1e-06, + "loss": 0.4311, + "num_input_tokens_seen": 446040604, + "step": 7960 + }, + { + "epoch": 17.728285077951004, + "loss": 0.4378310739994049, + "loss_ce": 8.695643191458657e-05, + "loss_iou": 0.181640625, + "loss_num": 0.0147705078125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 446040604, + "step": 7960 + }, + { + "epoch": 17.73051224944321, + "grad_norm": 21.329652786254883, + "learning_rate": 1e-06, + "loss": 0.6707, + "num_input_tokens_seen": 446096288, + "step": 7961 + }, + { + "epoch": 17.73051224944321, + "loss": 0.8002760410308838, + "loss_ce": 0.00034930004039779305, + "loss_iou": 0.32421875, + "loss_num": 0.0299072265625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 446096288, + "step": 7961 + }, + { + "epoch": 17.732739420935413, + "grad_norm": 19.80221176147461, + "learning_rate": 1e-06, + "loss": 0.4496, + "num_input_tokens_seen": 446153240, + "step": 7962 + }, + { + "epoch": 17.732739420935413, + "loss": 0.37191396951675415, + "loss_ce": 8.780106145422906e-05, + "loss_iou": 0.1484375, + "loss_num": 0.0150146484375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 446153240, + "step": 7962 + }, + { + "epoch": 17.734966592427618, + "grad_norm": 23.22903823852539, + "learning_rate": 1e-06, + "loss": 0.5215, + "num_input_tokens_seen": 446209048, + "step": 7963 + }, + { + "epoch": 17.734966592427618, + "loss": 0.5368286967277527, + "loss_ce": 8.552963117836043e-05, + "loss_iou": 0.2421875, + "loss_num": 0.01055908203125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 446209048, + "step": 7963 + }, + { + "epoch": 17.737193763919823, + "grad_norm": 22.78981590270996, + "learning_rate": 1e-06, + "loss": 0.3933, + "num_input_tokens_seen": 446266012, + "step": 7964 + }, + { + "epoch": 17.737193763919823, + "loss": 0.48521602153778076, + "loss_ce": 0.00010859415488084778, + "loss_iou": 0.2236328125, + "loss_num": 0.007659912109375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 446266012, + "step": 7964 + }, + { + "epoch": 17.739420935412028, + "grad_norm": 19.937816619873047, + "learning_rate": 1e-06, + "loss": 0.2245, + "num_input_tokens_seen": 446323072, + "step": 7965 + }, + { + "epoch": 17.739420935412028, + "loss": 0.2009565830230713, + "loss_ce": 8.989279740490019e-05, + "loss_iou": 0.087890625, + "loss_num": 0.005126953125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 446323072, + "step": 7965 + }, + { + "epoch": 17.741648106904233, + "grad_norm": 15.368204116821289, + "learning_rate": 1e-06, + "loss": 0.4583, + "num_input_tokens_seen": 446378856, + "step": 7966 + }, + { + "epoch": 17.741648106904233, + "loss": 0.5300416350364685, + "loss_ce": 7.336361159104854e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 446378856, + "step": 7966 + }, + { + "epoch": 17.743875278396438, + "grad_norm": 22.195009231567383, + "learning_rate": 1e-06, + "loss": 0.3353, + "num_input_tokens_seen": 446434664, + "step": 7967 + }, + { + "epoch": 17.743875278396438, + "loss": 0.3601817786693573, + "loss_ce": 7.436297892127186e-05, + "loss_iou": 0.1484375, + "loss_num": 0.01251220703125, + "loss_xval": 0.359375, + "num_input_tokens_seen": 446434664, + "step": 7967 + }, + { + "epoch": 17.746102449888642, + "grad_norm": 13.607110977172852, + "learning_rate": 1e-06, + "loss": 0.4167, + "num_input_tokens_seen": 446490256, + "step": 7968 + }, + { + "epoch": 17.746102449888642, + "loss": 0.5662601590156555, + "loss_ce": 9.807750029722229e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 446490256, + "step": 7968 + }, + { + "epoch": 17.748329621380847, + "grad_norm": 14.89394474029541, + "learning_rate": 1e-06, + "loss": 0.3675, + "num_input_tokens_seen": 446545948, + "step": 7969 + }, + { + "epoch": 17.748329621380847, + "loss": 0.38515520095825195, + "loss_ce": 8.440592500846833e-05, + "loss_iou": 0.166015625, + "loss_num": 0.01055908203125, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 446545948, + "step": 7969 + }, + { + "epoch": 17.750556792873052, + "grad_norm": 18.065866470336914, + "learning_rate": 1e-06, + "loss": 0.3868, + "num_input_tokens_seen": 446601844, + "step": 7970 + }, + { + "epoch": 17.750556792873052, + "loss": 0.3480460047721863, + "loss_ce": 8.455889474134892e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.00909423828125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 446601844, + "step": 7970 + }, + { + "epoch": 17.752783964365257, + "grad_norm": 17.822830200195312, + "learning_rate": 1e-06, + "loss": 0.4991, + "num_input_tokens_seen": 446657308, + "step": 7971 + }, + { + "epoch": 17.752783964365257, + "loss": 0.5141241550445557, + "loss_ce": 8.604546019341797e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 446657308, + "step": 7971 + }, + { + "epoch": 17.755011135857462, + "grad_norm": 19.482011795043945, + "learning_rate": 1e-06, + "loss": 0.4116, + "num_input_tokens_seen": 446714312, + "step": 7972 + }, + { + "epoch": 17.755011135857462, + "loss": 0.3692222833633423, + "loss_ce": 8.163502207025886e-05, + "loss_iou": 0.173828125, + "loss_num": 0.004425048828125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 446714312, + "step": 7972 + }, + { + "epoch": 17.757238307349667, + "grad_norm": 17.875993728637695, + "learning_rate": 1e-06, + "loss": 0.3371, + "num_input_tokens_seen": 446768140, + "step": 7973 + }, + { + "epoch": 17.757238307349667, + "loss": 0.3051214814186096, + "loss_ce": 6.776393274776638e-05, + "loss_iou": 0.130859375, + "loss_num": 0.00860595703125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 446768140, + "step": 7973 + }, + { + "epoch": 17.75946547884187, + "grad_norm": 16.569456100463867, + "learning_rate": 1e-06, + "loss": 0.617, + "num_input_tokens_seen": 446822136, + "step": 7974 + }, + { + "epoch": 17.75946547884187, + "loss": 0.8183131814002991, + "loss_ce": 0.0001368634111713618, + "loss_iou": 0.275390625, + "loss_num": 0.052978515625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 446822136, + "step": 7974 + }, + { + "epoch": 17.761692650334076, + "grad_norm": 18.004579544067383, + "learning_rate": 1e-06, + "loss": 0.3327, + "num_input_tokens_seen": 446878228, + "step": 7975 + }, + { + "epoch": 17.761692650334076, + "loss": 0.2270025908946991, + "loss_ce": 7.388347876258194e-05, + "loss_iou": 0.09814453125, + "loss_num": 0.006072998046875, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 446878228, + "step": 7975 + }, + { + "epoch": 17.76391982182628, + "grad_norm": 45.05828857421875, + "learning_rate": 1e-06, + "loss": 0.4351, + "num_input_tokens_seen": 446934920, + "step": 7976 + }, + { + "epoch": 17.76391982182628, + "loss": 0.36751073598861694, + "loss_ce": 0.00010961330553982407, + "loss_iou": 0.1396484375, + "loss_num": 0.0174560546875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 446934920, + "step": 7976 + }, + { + "epoch": 17.766146993318486, + "grad_norm": 19.841493606567383, + "learning_rate": 1e-06, + "loss": 0.5039, + "num_input_tokens_seen": 446989424, + "step": 7977 + }, + { + "epoch": 17.766146993318486, + "loss": 0.4566991627216339, + "loss_ce": 9.516975842416286e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0147705078125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 446989424, + "step": 7977 + }, + { + "epoch": 17.76837416481069, + "grad_norm": 18.41941261291504, + "learning_rate": 1e-06, + "loss": 0.4241, + "num_input_tokens_seen": 447046508, + "step": 7978 + }, + { + "epoch": 17.76837416481069, + "loss": 0.3700698912143707, + "loss_ce": 7.478601764887571e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.00732421875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 447046508, + "step": 7978 + }, + { + "epoch": 17.770601336302896, + "grad_norm": 19.740684509277344, + "learning_rate": 1e-06, + "loss": 0.3984, + "num_input_tokens_seen": 447104012, + "step": 7979 + }, + { + "epoch": 17.770601336302896, + "loss": 0.27607256174087524, + "loss_ce": 7.156394713092595e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.003875732421875, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 447104012, + "step": 7979 + }, + { + "epoch": 17.7728285077951, + "grad_norm": 15.659636497497559, + "learning_rate": 1e-06, + "loss": 0.2204, + "num_input_tokens_seen": 447159184, + "step": 7980 + }, + { + "epoch": 17.7728285077951, + "loss": 0.18346147239208221, + "loss_ce": 8.134550444083288e-05, + "loss_iou": 0.0732421875, + "loss_num": 0.007476806640625, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 447159184, + "step": 7980 + }, + { + "epoch": 17.775055679287306, + "grad_norm": 18.540023803710938, + "learning_rate": 1e-06, + "loss": 0.3167, + "num_input_tokens_seen": 447212524, + "step": 7981 + }, + { + "epoch": 17.775055679287306, + "loss": 0.33039259910583496, + "loss_ce": 7.033500878605992e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.00860595703125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 447212524, + "step": 7981 + }, + { + "epoch": 17.77728285077951, + "grad_norm": 26.151935577392578, + "learning_rate": 1e-06, + "loss": 0.4296, + "num_input_tokens_seen": 447268040, + "step": 7982 + }, + { + "epoch": 17.77728285077951, + "loss": 0.48384368419647217, + "loss_ce": 7.902232027845457e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.00909423828125, + "loss_xval": 0.484375, + "num_input_tokens_seen": 447268040, + "step": 7982 + }, + { + "epoch": 17.779510022271715, + "grad_norm": 19.188148498535156, + "learning_rate": 1e-06, + "loss": 0.4998, + "num_input_tokens_seen": 447323576, + "step": 7983 + }, + { + "epoch": 17.779510022271715, + "loss": 0.35879120230674744, + "loss_ce": 8.758992771618068e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00787353515625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 447323576, + "step": 7983 + }, + { + "epoch": 17.78173719376392, + "grad_norm": 20.830575942993164, + "learning_rate": 1e-06, + "loss": 0.5727, + "num_input_tokens_seen": 447379108, + "step": 7984 + }, + { + "epoch": 17.78173719376392, + "loss": 0.5355324745178223, + "loss_ce": 0.00013207507436163723, + "loss_iou": 0.228515625, + "loss_num": 0.015869140625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 447379108, + "step": 7984 + }, + { + "epoch": 17.783964365256125, + "grad_norm": 24.485782623291016, + "learning_rate": 1e-06, + "loss": 0.3293, + "num_input_tokens_seen": 447436116, + "step": 7985 + }, + { + "epoch": 17.783964365256125, + "loss": 0.38479992747306824, + "loss_ce": 9.53392154769972e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00909423828125, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 447436116, + "step": 7985 + }, + { + "epoch": 17.78619153674833, + "grad_norm": 27.353883743286133, + "learning_rate": 1e-06, + "loss": 0.3948, + "num_input_tokens_seen": 447492812, + "step": 7986 + }, + { + "epoch": 17.78619153674833, + "loss": 0.2935987710952759, + "loss_ce": 8.068819442996755e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.00909423828125, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 447492812, + "step": 7986 + }, + { + "epoch": 17.788418708240535, + "grad_norm": 17.87607192993164, + "learning_rate": 1e-06, + "loss": 0.35, + "num_input_tokens_seen": 447549280, + "step": 7987 + }, + { + "epoch": 17.788418708240535, + "loss": 0.3173452615737915, + "loss_ce": 8.452979091089219e-05, + "loss_iou": 0.13671875, + "loss_num": 0.00897216796875, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 447549280, + "step": 7987 + }, + { + "epoch": 17.79064587973274, + "grad_norm": 21.554283142089844, + "learning_rate": 1e-06, + "loss": 0.4563, + "num_input_tokens_seen": 447603876, + "step": 7988 + }, + { + "epoch": 17.79064587973274, + "loss": 0.5782496929168701, + "loss_ce": 0.00012468949717003852, + "loss_iou": 0.25, + "loss_num": 0.01513671875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 447603876, + "step": 7988 + }, + { + "epoch": 17.792873051224944, + "grad_norm": 17.834577560424805, + "learning_rate": 1e-06, + "loss": 0.3622, + "num_input_tokens_seen": 447660636, + "step": 7989 + }, + { + "epoch": 17.792873051224944, + "loss": 0.22425898909568787, + "loss_ce": 7.684796582907438e-05, + "loss_iou": 0.103515625, + "loss_num": 0.0034332275390625, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 447660636, + "step": 7989 + }, + { + "epoch": 17.79510022271715, + "grad_norm": 16.187517166137695, + "learning_rate": 1e-06, + "loss": 0.3169, + "num_input_tokens_seen": 447715244, + "step": 7990 + }, + { + "epoch": 17.79510022271715, + "loss": 0.3078160881996155, + "loss_ce": 7.682182331336662e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.009521484375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 447715244, + "step": 7990 + }, + { + "epoch": 17.797327394209354, + "grad_norm": 30.0116024017334, + "learning_rate": 1e-06, + "loss": 0.3553, + "num_input_tokens_seen": 447771124, + "step": 7991 + }, + { + "epoch": 17.797327394209354, + "loss": 0.2718275189399719, + "loss_ce": 9.136189328273758e-05, + "loss_iou": 0.123046875, + "loss_num": 0.005126953125, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 447771124, + "step": 7991 + }, + { + "epoch": 17.79955456570156, + "grad_norm": 20.42539405822754, + "learning_rate": 1e-06, + "loss": 0.4136, + "num_input_tokens_seen": 447826024, + "step": 7992 + }, + { + "epoch": 17.79955456570156, + "loss": 0.4224761426448822, + "loss_ce": 0.00011284490756224841, + "loss_iou": 0.1826171875, + "loss_num": 0.01141357421875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 447826024, + "step": 7992 + }, + { + "epoch": 17.801781737193764, + "grad_norm": 15.1818208694458, + "learning_rate": 1e-06, + "loss": 0.494, + "num_input_tokens_seen": 447881836, + "step": 7993 + }, + { + "epoch": 17.801781737193764, + "loss": 0.37318992614746094, + "loss_ce": 8.200886804843321e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.014892578125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 447881836, + "step": 7993 + }, + { + "epoch": 17.80400890868597, + "grad_norm": 11.075129508972168, + "learning_rate": 1e-06, + "loss": 0.5868, + "num_input_tokens_seen": 447938492, + "step": 7994 + }, + { + "epoch": 17.80400890868597, + "loss": 0.8123493790626526, + "loss_ce": 0.0001545300183352083, + "loss_iou": 0.341796875, + "loss_num": 0.0260009765625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 447938492, + "step": 7994 + }, + { + "epoch": 17.806236080178174, + "grad_norm": 26.124393463134766, + "learning_rate": 1e-06, + "loss": 0.5909, + "num_input_tokens_seen": 447992224, + "step": 7995 + }, + { + "epoch": 17.806236080178174, + "loss": 0.5432973504066467, + "loss_ce": 8.448238804703578e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0123291015625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 447992224, + "step": 7995 + }, + { + "epoch": 17.80846325167038, + "grad_norm": 21.8154296875, + "learning_rate": 1e-06, + "loss": 0.3901, + "num_input_tokens_seen": 448051428, + "step": 7996 + }, + { + "epoch": 17.80846325167038, + "loss": 0.4403107762336731, + "loss_ce": 0.0001252087822649628, + "loss_iou": 0.1953125, + "loss_num": 0.009765625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 448051428, + "step": 7996 + }, + { + "epoch": 17.810690423162583, + "grad_norm": 29.165897369384766, + "learning_rate": 1e-06, + "loss": 0.4263, + "num_input_tokens_seen": 448108228, + "step": 7997 + }, + { + "epoch": 17.810690423162583, + "loss": 0.35651570558547974, + "loss_ce": 7.038043258944526e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0084228515625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 448108228, + "step": 7997 + }, + { + "epoch": 17.812917594654788, + "grad_norm": 26.357572555541992, + "learning_rate": 1e-06, + "loss": 0.3326, + "num_input_tokens_seen": 448163460, + "step": 7998 + }, + { + "epoch": 17.812917594654788, + "loss": 0.2939828336238861, + "loss_ce": 9.854609379544854e-05, + "loss_iou": 0.134765625, + "loss_num": 0.004852294921875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 448163460, + "step": 7998 + }, + { + "epoch": 17.815144766146993, + "grad_norm": 17.523059844970703, + "learning_rate": 1e-06, + "loss": 0.3924, + "num_input_tokens_seen": 448220148, + "step": 7999 + }, + { + "epoch": 17.815144766146993, + "loss": 0.29866713285446167, + "loss_ce": 8.312883437611163e-05, + "loss_iou": 0.138671875, + "loss_num": 0.00439453125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 448220148, + "step": 7999 + }, + { + "epoch": 17.817371937639198, + "grad_norm": 21.25289535522461, + "learning_rate": 1e-06, + "loss": 0.5296, + "num_input_tokens_seen": 448275732, + "step": 8000 + }, + { + "epoch": 17.817371937639198, + "eval_seeclick_web_CIoU": 0.5856780111789703, + "eval_seeclick_web_GIoU": 0.5835351943969727, + "eval_seeclick_web_IoU": 0.6051326394081116, + "eval_seeclick_web_MAE_all": 0.015136118279770017, + "eval_seeclick_web_MAE_h": 0.007184438407421112, + "eval_seeclick_web_MAE_w": 0.015148711390793324, + "eval_seeclick_web_MAE_x_boxes": 0.008295744191855192, + "eval_seeclick_web_MAE_y_boxes": 0.021377818658947945, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 0.9110763072967529, + "eval_seeclick_web_loss_ce": 0.00013513932935893536, + "eval_seeclick_web_loss_iou": 0.4219970703125, + "eval_seeclick_web_loss_num": 0.012152671813964844, + "eval_seeclick_web_loss_xval": 0.905029296875, + "eval_seeclick_web_runtime": 23.0578, + "eval_seeclick_web_samples_per_second": 2.168, + "eval_seeclick_web_steps_per_second": 0.087, + "num_input_tokens_seen": 448275732, + "step": 8000 + }, + { + "epoch": 17.817371937639198, + "eval_icons_CIoU": 0.2600770592689514, + "eval_icons_GIoU": 0.28505839407444, + "eval_icons_IoU": 0.3360164016485214, + "eval_icons_MAE_all": 0.05904686264693737, + "eval_icons_MAE_h": 0.03290587291121483, + "eval_icons_MAE_w": 0.06054178345948458, + "eval_icons_MAE_x_boxes": 0.05734286643564701, + "eval_icons_MAE_y_boxes": 0.03782099112868309, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.714854121208191, + "eval_icons_loss_ce": 0.0001619036338524893, + "eval_icons_loss_iou": 0.6759033203125, + "eval_icons_loss_num": 0.058238983154296875, + "eval_icons_loss_xval": 1.643798828125, + "eval_icons_runtime": 21.0775, + "eval_icons_samples_per_second": 2.372, + "eval_icons_steps_per_second": 0.095, + "num_input_tokens_seen": 448275732, + "step": 8000 + }, + { + "epoch": 17.817371937639198, + "eval_screenspot_CIoU": 0.3892778257528941, + "eval_screenspot_GIoU": 0.4072861274083455, + "eval_screenspot_IoU": 0.45707400639851886, + "eval_screenspot_MAE_all": 0.053338187436262764, + "eval_screenspot_MAE_h": 0.03920335695147514, + "eval_screenspot_MAE_w": 0.05588290343681971, + "eval_screenspot_MAE_x_boxes": 0.062214924643437065, + "eval_screenspot_MAE_y_boxes": 0.03902036137878895, + "eval_screenspot_inside_bbox": 0.725000003973643, + "eval_screenspot_loss": 1.5115993022918701, + "eval_screenspot_loss_ce": 0.00019147307709014663, + "eval_screenspot_loss_iou": 0.6321614583333334, + "eval_screenspot_loss_num": 0.061505635579427086, + "eval_screenspot_loss_xval": 1.5716145833333333, + "eval_screenspot_runtime": 38.7561, + "eval_screenspot_samples_per_second": 2.296, + "eval_screenspot_steps_per_second": 0.077, + "num_input_tokens_seen": 448275732, + "step": 8000 + }, + { + "epoch": 17.817371937639198, + "eval_compot_CIoU": 0.3504429906606674, + "eval_compot_GIoU": 0.35775288939476013, + "eval_compot_IoU": 0.4085061550140381, + "eval_compot_MAE_all": 0.01769328536465764, + "eval_compot_MAE_h": 0.008335361024364829, + "eval_compot_MAE_w": 0.020572240464389324, + "eval_compot_MAE_x_boxes": 0.029930624179542065, + "eval_compot_MAE_y_boxes": 0.007017011754214764, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3885656595230103, + "eval_compot_loss_ce": 0.0001280211581615731, + "eval_compot_loss_iou": 0.6407470703125, + "eval_compot_loss_num": 0.016317367553710938, + "eval_compot_loss_xval": 1.364013671875, + "eval_compot_runtime": 20.8794, + "eval_compot_samples_per_second": 2.395, + "eval_compot_steps_per_second": 0.096, + "num_input_tokens_seen": 448275732, + "step": 8000 + }, + { + "epoch": 17.817371937639198, + "eval_custom_ui_val_CIoU": 0.4733108580112457, + "eval_custom_ui_val_GIoU": 0.47808146807882523, + "eval_custom_ui_val_IoU": 0.535189237859514, + "eval_custom_ui_val_MAE_all": 0.027121951182683308, + "eval_custom_ui_val_MAE_h": 0.014743615692067478, + "eval_custom_ui_val_MAE_w": 0.03644685043642918, + "eval_custom_ui_val_MAE_x_boxes": 0.033436041299460664, + "eval_custom_ui_val_MAE_y_boxes": 0.012357140529072948, + "eval_custom_ui_val_inside_bbox": 0.7685185207260979, + "eval_custom_ui_val_loss": 1.1683473587036133, + "eval_custom_ui_val_loss_ce": 0.00015340095301831348, + "eval_custom_ui_val_loss_iou": 0.5008816189236112, + "eval_custom_ui_val_loss_num": 0.0236766603257921, + "eval_custom_ui_val_loss_xval": 1.1200358072916667, + "eval_custom_ui_val_runtime": 70.4441, + "eval_custom_ui_val_samples_per_second": 3.762, + "eval_custom_ui_val_steps_per_second": 0.128, + "num_input_tokens_seen": 448275732, + "step": 8000 + }, + { + "epoch": 17.817371937639198, + "loss": 0.8243292570114136, + "loss_ce": 0.00011052313493564725, + "loss_iou": 0.37109375, + "loss_num": 0.0167236328125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 448275732, + "step": 8000 + }, + { + "epoch": 17.819599109131403, + "grad_norm": 16.163515090942383, + "learning_rate": 1e-06, + "loss": 0.3863, + "num_input_tokens_seen": 448332804, + "step": 8001 + }, + { + "epoch": 17.819599109131403, + "loss": 0.4817669987678528, + "loss_ce": 7.758761057630181e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.013427734375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 448332804, + "step": 8001 + }, + { + "epoch": 17.821826280623608, + "grad_norm": 21.57859992980957, + "learning_rate": 1e-06, + "loss": 0.4429, + "num_input_tokens_seen": 448389324, + "step": 8002 + }, + { + "epoch": 17.821826280623608, + "loss": 0.41340628266334534, + "loss_ce": 7.620293035870418e-05, + "loss_iou": 0.19140625, + "loss_num": 0.005950927734375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 448389324, + "step": 8002 + }, + { + "epoch": 17.824053452115812, + "grad_norm": 21.757307052612305, + "learning_rate": 1e-06, + "loss": 0.4946, + "num_input_tokens_seen": 448446192, + "step": 8003 + }, + { + "epoch": 17.824053452115812, + "loss": 0.5204148292541504, + "loss_ce": 0.00015119729505386204, + "loss_iou": 0.224609375, + "loss_num": 0.0140380859375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 448446192, + "step": 8003 + }, + { + "epoch": 17.826280623608017, + "grad_norm": 18.847604751586914, + "learning_rate": 1e-06, + "loss": 0.3709, + "num_input_tokens_seen": 448503164, + "step": 8004 + }, + { + "epoch": 17.826280623608017, + "loss": 0.2775651812553406, + "loss_ce": 6.884684989927337e-05, + "loss_iou": 0.111328125, + "loss_num": 0.0108642578125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 448503164, + "step": 8004 + }, + { + "epoch": 17.828507795100222, + "grad_norm": 20.84021759033203, + "learning_rate": 1e-06, + "loss": 0.4736, + "num_input_tokens_seen": 448558900, + "step": 8005 + }, + { + "epoch": 17.828507795100222, + "loss": 0.31135812401771545, + "loss_ce": 7.882342470111325e-05, + "loss_iou": 0.146484375, + "loss_num": 0.0038299560546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 448558900, + "step": 8005 + }, + { + "epoch": 17.830734966592427, + "grad_norm": 16.282339096069336, + "learning_rate": 1e-06, + "loss": 0.398, + "num_input_tokens_seen": 448614056, + "step": 8006 + }, + { + "epoch": 17.830734966592427, + "loss": 0.30350208282470703, + "loss_ce": 9.633424633648247e-05, + "loss_iou": 0.13671875, + "loss_num": 0.006103515625, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 448614056, + "step": 8006 + }, + { + "epoch": 17.832962138084632, + "grad_norm": 16.245391845703125, + "learning_rate": 1e-06, + "loss": 0.3856, + "num_input_tokens_seen": 448668056, + "step": 8007 + }, + { + "epoch": 17.832962138084632, + "loss": 0.30444949865341187, + "loss_ce": 6.718316581100225e-05, + "loss_iou": 0.13671875, + "loss_num": 0.006195068359375, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 448668056, + "step": 8007 + }, + { + "epoch": 17.835189309576837, + "grad_norm": 21.52396011352539, + "learning_rate": 1e-06, + "loss": 0.4856, + "num_input_tokens_seen": 448724780, + "step": 8008 + }, + { + "epoch": 17.835189309576837, + "loss": 0.36812153458595276, + "loss_ce": 7.954495958983898e-05, + "loss_iou": 0.158203125, + "loss_num": 0.01007080078125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 448724780, + "step": 8008 + }, + { + "epoch": 17.83741648106904, + "grad_norm": 15.395381927490234, + "learning_rate": 1e-06, + "loss": 0.4704, + "num_input_tokens_seen": 448781896, + "step": 8009 + }, + { + "epoch": 17.83741648106904, + "loss": 0.42657172679901123, + "loss_ce": 8.857337525114417e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0093994140625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 448781896, + "step": 8009 + }, + { + "epoch": 17.839643652561247, + "grad_norm": 14.230578422546387, + "learning_rate": 1e-06, + "loss": 0.5784, + "num_input_tokens_seen": 448835332, + "step": 8010 + }, + { + "epoch": 17.839643652561247, + "loss": 0.7218839526176453, + "loss_ce": 8.216348942369223e-05, + "loss_iou": 0.3125, + "loss_num": 0.0191650390625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 448835332, + "step": 8010 + }, + { + "epoch": 17.84187082405345, + "grad_norm": 25.663774490356445, + "learning_rate": 1e-06, + "loss": 0.2717, + "num_input_tokens_seen": 448890712, + "step": 8011 + }, + { + "epoch": 17.84187082405345, + "loss": 0.25164851546287537, + "loss_ce": 6.160917837405577e-05, + "loss_iou": 0.1142578125, + "loss_num": 0.00469970703125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 448890712, + "step": 8011 + }, + { + "epoch": 17.844097995545656, + "grad_norm": 16.865924835205078, + "learning_rate": 1e-06, + "loss": 0.4874, + "num_input_tokens_seen": 448948344, + "step": 8012 + }, + { + "epoch": 17.844097995545656, + "loss": 0.7613823413848877, + "loss_ce": 9.086247882805765e-05, + "loss_iou": 0.328125, + "loss_num": 0.0205078125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 448948344, + "step": 8012 + }, + { + "epoch": 17.84632516703786, + "grad_norm": 31.958927154541016, + "learning_rate": 1e-06, + "loss": 0.3505, + "num_input_tokens_seen": 449001604, + "step": 8013 + }, + { + "epoch": 17.84632516703786, + "loss": 0.2753995656967163, + "loss_ce": 6.995358853600919e-05, + "loss_iou": 0.11279296875, + "loss_num": 0.009765625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 449001604, + "step": 8013 + }, + { + "epoch": 17.848552338530066, + "grad_norm": 19.097925186157227, + "learning_rate": 1e-06, + "loss": 0.4448, + "num_input_tokens_seen": 449057784, + "step": 8014 + }, + { + "epoch": 17.848552338530066, + "loss": 0.42127907276153564, + "loss_ce": 7.547304267063737e-05, + "loss_iou": 0.17578125, + "loss_num": 0.01373291015625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 449057784, + "step": 8014 + }, + { + "epoch": 17.85077951002227, + "grad_norm": 22.88896942138672, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 449115216, + "step": 8015 + }, + { + "epoch": 17.85077951002227, + "loss": 0.40796226263046265, + "loss_ce": 0.0004915721947327256, + "loss_iou": 0.1611328125, + "loss_num": 0.0172119140625, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 449115216, + "step": 8015 + }, + { + "epoch": 17.853006681514476, + "grad_norm": 17.308486938476562, + "learning_rate": 1e-06, + "loss": 0.3195, + "num_input_tokens_seen": 449171056, + "step": 8016 + }, + { + "epoch": 17.853006681514476, + "loss": 0.23818965256214142, + "loss_ce": 9.149865218205377e-05, + "loss_iou": 0.10888671875, + "loss_num": 0.004119873046875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 449171056, + "step": 8016 + }, + { + "epoch": 17.85523385300668, + "grad_norm": 26.236623764038086, + "learning_rate": 1e-06, + "loss": 0.3454, + "num_input_tokens_seen": 449226700, + "step": 8017 + }, + { + "epoch": 17.85523385300668, + "loss": 0.30879464745521545, + "loss_ce": 7.88304241723381e-05, + "loss_iou": 0.140625, + "loss_num": 0.005523681640625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 449226700, + "step": 8017 + }, + { + "epoch": 17.857461024498885, + "grad_norm": 20.232860565185547, + "learning_rate": 1e-06, + "loss": 0.4247, + "num_input_tokens_seen": 449282184, + "step": 8018 + }, + { + "epoch": 17.857461024498885, + "loss": 0.5520839691162109, + "loss_ce": 8.206926577258855e-05, + "loss_iou": 0.2265625, + "loss_num": 0.0198974609375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 449282184, + "step": 8018 + }, + { + "epoch": 17.85968819599109, + "grad_norm": 16.037681579589844, + "learning_rate": 1e-06, + "loss": 0.4071, + "num_input_tokens_seen": 449338720, + "step": 8019 + }, + { + "epoch": 17.85968819599109, + "loss": 0.3350412845611572, + "loss_ce": 8.032634650589898e-05, + "loss_iou": 0.1484375, + "loss_num": 0.00750732421875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 449338720, + "step": 8019 + }, + { + "epoch": 17.861915367483295, + "grad_norm": 15.602432250976562, + "learning_rate": 1e-06, + "loss": 0.3929, + "num_input_tokens_seen": 449394160, + "step": 8020 + }, + { + "epoch": 17.861915367483295, + "loss": 0.3082042932510376, + "loss_ce": 9.883566235657781e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.00665283203125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 449394160, + "step": 8020 + }, + { + "epoch": 17.8641425389755, + "grad_norm": 20.469484329223633, + "learning_rate": 1e-06, + "loss": 0.2791, + "num_input_tokens_seen": 449448936, + "step": 8021 + }, + { + "epoch": 17.8641425389755, + "loss": 0.3228263556957245, + "loss_ce": 7.245552114909515e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.01263427734375, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 449448936, + "step": 8021 + }, + { + "epoch": 17.866369710467705, + "grad_norm": 11.62727165222168, + "learning_rate": 1e-06, + "loss": 0.3996, + "num_input_tokens_seen": 449505480, + "step": 8022 + }, + { + "epoch": 17.866369710467705, + "loss": 0.4349091947078705, + "loss_ce": 9.47638473007828e-05, + "loss_iou": 0.1875, + "loss_num": 0.01202392578125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 449505480, + "step": 8022 + }, + { + "epoch": 17.86859688195991, + "grad_norm": 24.577762603759766, + "learning_rate": 1e-06, + "loss": 0.4352, + "num_input_tokens_seen": 449561952, + "step": 8023 + }, + { + "epoch": 17.86859688195991, + "loss": 0.5243306756019592, + "loss_ce": 0.00016074010636657476, + "loss_iou": 0.2373046875, + "loss_num": 0.009765625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 449561952, + "step": 8023 + }, + { + "epoch": 17.870824053452115, + "grad_norm": 24.447031021118164, + "learning_rate": 1e-06, + "loss": 0.3999, + "num_input_tokens_seen": 449614092, + "step": 8024 + }, + { + "epoch": 17.870824053452115, + "loss": 0.44069501757621765, + "loss_ce": 8.22499132482335e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.01214599609375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 449614092, + "step": 8024 + }, + { + "epoch": 17.87305122494432, + "grad_norm": 22.536909103393555, + "learning_rate": 1e-06, + "loss": 0.5064, + "num_input_tokens_seen": 449669248, + "step": 8025 + }, + { + "epoch": 17.87305122494432, + "loss": 0.4587002098560333, + "loss_ce": 8.205789345083758e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.012939453125, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 449669248, + "step": 8025 + }, + { + "epoch": 17.875278396436524, + "grad_norm": 16.079544067382812, + "learning_rate": 1e-06, + "loss": 0.4124, + "num_input_tokens_seen": 449726032, + "step": 8026 + }, + { + "epoch": 17.875278396436524, + "loss": 0.402201771736145, + "loss_ce": 0.00010215782094746828, + "loss_iou": 0.173828125, + "loss_num": 0.0108642578125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 449726032, + "step": 8026 + }, + { + "epoch": 17.87750556792873, + "grad_norm": 16.75348472595215, + "learning_rate": 1e-06, + "loss": 0.589, + "num_input_tokens_seen": 449782292, + "step": 8027 + }, + { + "epoch": 17.87750556792873, + "loss": 0.5953076481819153, + "loss_ce": 9.280974336434156e-05, + "loss_iou": 0.244140625, + "loss_num": 0.021484375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 449782292, + "step": 8027 + }, + { + "epoch": 17.879732739420934, + "grad_norm": 54.10028839111328, + "learning_rate": 1e-06, + "loss": 0.4516, + "num_input_tokens_seen": 449837808, + "step": 8028 + }, + { + "epoch": 17.879732739420934, + "loss": 0.6945455074310303, + "loss_ce": 8.752994472160935e-05, + "loss_iou": 0.26953125, + "loss_num": 0.031494140625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 449837808, + "step": 8028 + }, + { + "epoch": 17.88195991091314, + "grad_norm": 18.08180046081543, + "learning_rate": 1e-06, + "loss": 0.4964, + "num_input_tokens_seen": 449896016, + "step": 8029 + }, + { + "epoch": 17.88195991091314, + "loss": 0.36804813146591187, + "loss_ce": 0.00049442338058725, + "loss_iou": 0.169921875, + "loss_num": 0.005706787109375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 449896016, + "step": 8029 + }, + { + "epoch": 17.884187082405344, + "grad_norm": 20.480897903442383, + "learning_rate": 1e-06, + "loss": 0.4051, + "num_input_tokens_seen": 449952976, + "step": 8030 + }, + { + "epoch": 17.884187082405344, + "loss": 0.34723442792892456, + "loss_ce": 6.646300607826561e-05, + "loss_iou": 0.154296875, + "loss_num": 0.007598876953125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 449952976, + "step": 8030 + }, + { + "epoch": 17.88641425389755, + "grad_norm": 38.36183166503906, + "learning_rate": 1e-06, + "loss": 0.4273, + "num_input_tokens_seen": 450010648, + "step": 8031 + }, + { + "epoch": 17.88641425389755, + "loss": 0.3772750496864319, + "loss_ce": 7.779923907946795e-05, + "loss_iou": 0.162109375, + "loss_num": 0.01068115234375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 450010648, + "step": 8031 + }, + { + "epoch": 17.888641425389753, + "grad_norm": 21.646387100219727, + "learning_rate": 1e-06, + "loss": 0.3668, + "num_input_tokens_seen": 450065440, + "step": 8032 + }, + { + "epoch": 17.888641425389753, + "loss": 0.363721638917923, + "loss_ce": 7.417659799102694e-05, + "loss_iou": 0.142578125, + "loss_num": 0.0155029296875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 450065440, + "step": 8032 + }, + { + "epoch": 17.89086859688196, + "grad_norm": 14.319405555725098, + "learning_rate": 1e-06, + "loss": 0.3845, + "num_input_tokens_seen": 450122044, + "step": 8033 + }, + { + "epoch": 17.89086859688196, + "loss": 0.41428041458129883, + "loss_ce": 9.586406667949632e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.01129150390625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 450122044, + "step": 8033 + }, + { + "epoch": 17.893095768374163, + "grad_norm": 35.73308563232422, + "learning_rate": 1e-06, + "loss": 0.4115, + "num_input_tokens_seen": 450178668, + "step": 8034 + }, + { + "epoch": 17.893095768374163, + "loss": 0.4911773204803467, + "loss_ce": 8.844825788401067e-05, + "loss_iou": 0.203125, + "loss_num": 0.016845703125, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 450178668, + "step": 8034 + }, + { + "epoch": 17.895322939866368, + "grad_norm": 20.128206253051758, + "learning_rate": 1e-06, + "loss": 0.2906, + "num_input_tokens_seen": 450235868, + "step": 8035 + }, + { + "epoch": 17.895322939866368, + "loss": 0.31039196252822876, + "loss_ce": 8.923389395931736e-05, + "loss_iou": 0.13671875, + "loss_num": 0.00732421875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 450235868, + "step": 8035 + }, + { + "epoch": 17.897550111358576, + "grad_norm": 16.04209327697754, + "learning_rate": 1e-06, + "loss": 0.3579, + "num_input_tokens_seen": 450292616, + "step": 8036 + }, + { + "epoch": 17.897550111358576, + "loss": 0.3482053279876709, + "loss_ce": 0.00018285455007571727, + "loss_iou": 0.16015625, + "loss_num": 0.00579833984375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 450292616, + "step": 8036 + }, + { + "epoch": 17.899777282850778, + "grad_norm": 25.872989654541016, + "learning_rate": 1e-06, + "loss": 0.4616, + "num_input_tokens_seen": 450348424, + "step": 8037 + }, + { + "epoch": 17.899777282850778, + "loss": 0.4966566562652588, + "loss_ce": 7.462559005944058e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0157470703125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 450348424, + "step": 8037 + }, + { + "epoch": 17.902004454342986, + "grad_norm": 17.968185424804688, + "learning_rate": 1e-06, + "loss": 0.397, + "num_input_tokens_seen": 450404344, + "step": 8038 + }, + { + "epoch": 17.902004454342986, + "loss": 0.43942493200302124, + "loss_ce": 9.388932812726125e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.00927734375, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 450404344, + "step": 8038 + }, + { + "epoch": 17.90423162583519, + "grad_norm": 22.768774032592773, + "learning_rate": 1e-06, + "loss": 0.4194, + "num_input_tokens_seen": 450457360, + "step": 8039 + }, + { + "epoch": 17.90423162583519, + "loss": 0.4151099920272827, + "loss_ce": 7.091661973390728e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.01080322265625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 450457360, + "step": 8039 + }, + { + "epoch": 17.906458797327396, + "grad_norm": 11.735021591186523, + "learning_rate": 1e-06, + "loss": 0.4589, + "num_input_tokens_seen": 450512100, + "step": 8040 + }, + { + "epoch": 17.906458797327396, + "loss": 0.5093483924865723, + "loss_ce": 0.0001931376027641818, + "loss_iou": 0.224609375, + "loss_num": 0.011962890625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 450512100, + "step": 8040 + }, + { + "epoch": 17.9086859688196, + "grad_norm": 16.40501594543457, + "learning_rate": 1e-06, + "loss": 0.4686, + "num_input_tokens_seen": 450569724, + "step": 8041 + }, + { + "epoch": 17.9086859688196, + "loss": 0.3620176613330841, + "loss_ce": 7.919156632851809e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0050048828125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 450569724, + "step": 8041 + }, + { + "epoch": 17.910913140311806, + "grad_norm": 18.876176834106445, + "learning_rate": 1e-06, + "loss": 0.336, + "num_input_tokens_seen": 450626440, + "step": 8042 + }, + { + "epoch": 17.910913140311806, + "loss": 0.3145880401134491, + "loss_ce": 7.385006756521761e-05, + "loss_iou": 0.14453125, + "loss_num": 0.005096435546875, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 450626440, + "step": 8042 + }, + { + "epoch": 17.91314031180401, + "grad_norm": 12.777106285095215, + "learning_rate": 1e-06, + "loss": 0.3849, + "num_input_tokens_seen": 450684836, + "step": 8043 + }, + { + "epoch": 17.91314031180401, + "loss": 0.5031352043151855, + "loss_ce": 8.34753445815295e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.00885009765625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 450684836, + "step": 8043 + }, + { + "epoch": 17.915367483296215, + "grad_norm": 30.933420181274414, + "learning_rate": 1e-06, + "loss": 0.357, + "num_input_tokens_seen": 450739768, + "step": 8044 + }, + { + "epoch": 17.915367483296215, + "loss": 0.3753129243850708, + "loss_ce": 6.877203122712672e-05, + "loss_iou": 0.1640625, + "loss_num": 0.009521484375, + "loss_xval": 0.375, + "num_input_tokens_seen": 450739768, + "step": 8044 + }, + { + "epoch": 17.91759465478842, + "grad_norm": 21.37887191772461, + "learning_rate": 1e-06, + "loss": 0.3017, + "num_input_tokens_seen": 450796720, + "step": 8045 + }, + { + "epoch": 17.91759465478842, + "loss": 0.34577932953834534, + "loss_ce": 7.619416282977909e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.006256103515625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 450796720, + "step": 8045 + }, + { + "epoch": 17.919821826280625, + "grad_norm": 21.42198944091797, + "learning_rate": 1e-06, + "loss": 0.5496, + "num_input_tokens_seen": 450852760, + "step": 8046 + }, + { + "epoch": 17.919821826280625, + "loss": 0.5246258974075317, + "loss_ce": 8.982230065157637e-05, + "loss_iou": 0.23046875, + "loss_num": 0.01263427734375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 450852760, + "step": 8046 + }, + { + "epoch": 17.92204899777283, + "grad_norm": 19.944995880126953, + "learning_rate": 1e-06, + "loss": 0.3975, + "num_input_tokens_seen": 450909692, + "step": 8047 + }, + { + "epoch": 17.92204899777283, + "loss": 0.4114391803741455, + "loss_ce": 6.225903052836657e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0081787109375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 450909692, + "step": 8047 + }, + { + "epoch": 17.924276169265035, + "grad_norm": 22.188114166259766, + "learning_rate": 1e-06, + "loss": 0.4788, + "num_input_tokens_seen": 450968200, + "step": 8048 + }, + { + "epoch": 17.924276169265035, + "loss": 0.4510143995285034, + "loss_ce": 8.66201298777014e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.01312255859375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 450968200, + "step": 8048 + }, + { + "epoch": 17.92650334075724, + "grad_norm": 16.4459228515625, + "learning_rate": 1e-06, + "loss": 0.3947, + "num_input_tokens_seen": 451022056, + "step": 8049 + }, + { + "epoch": 17.92650334075724, + "loss": 0.35028839111328125, + "loss_ce": 6.865533214295283e-05, + "loss_iou": 0.142578125, + "loss_num": 0.01318359375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 451022056, + "step": 8049 + }, + { + "epoch": 17.928730512249444, + "grad_norm": 14.635376930236816, + "learning_rate": 1e-06, + "loss": 0.3044, + "num_input_tokens_seen": 451078512, + "step": 8050 + }, + { + "epoch": 17.928730512249444, + "loss": 0.28396010398864746, + "loss_ce": 8.557151886634529e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.0072021484375, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 451078512, + "step": 8050 + }, + { + "epoch": 17.93095768374165, + "grad_norm": 24.16907501220703, + "learning_rate": 1e-06, + "loss": 0.4527, + "num_input_tokens_seen": 451133112, + "step": 8051 + }, + { + "epoch": 17.93095768374165, + "loss": 0.5191126465797424, + "loss_ce": 0.0003138238680548966, + "loss_iou": 0.1923828125, + "loss_num": 0.026611328125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 451133112, + "step": 8051 + }, + { + "epoch": 17.933184855233854, + "grad_norm": 13.90976333618164, + "learning_rate": 1e-06, + "loss": 0.3535, + "num_input_tokens_seen": 451190024, + "step": 8052 + }, + { + "epoch": 17.933184855233854, + "loss": 0.3013458847999573, + "loss_ce": 7.635784277226776e-05, + "loss_iou": 0.140625, + "loss_num": 0.004180908203125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 451190024, + "step": 8052 + }, + { + "epoch": 17.93541202672606, + "grad_norm": 17.68203353881836, + "learning_rate": 1e-06, + "loss": 0.3295, + "num_input_tokens_seen": 451243952, + "step": 8053 + }, + { + "epoch": 17.93541202672606, + "loss": 0.3571292757987976, + "loss_ce": 7.360937888734043e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.00701904296875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 451243952, + "step": 8053 + }, + { + "epoch": 17.937639198218264, + "grad_norm": 24.68354034423828, + "learning_rate": 1e-06, + "loss": 0.4696, + "num_input_tokens_seen": 451299776, + "step": 8054 + }, + { + "epoch": 17.937639198218264, + "loss": 0.3794766664505005, + "loss_ce": 8.214540139306337e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00836181640625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 451299776, + "step": 8054 + }, + { + "epoch": 17.93986636971047, + "grad_norm": 12.557443618774414, + "learning_rate": 1e-06, + "loss": 0.3637, + "num_input_tokens_seen": 451354904, + "step": 8055 + }, + { + "epoch": 17.93986636971047, + "loss": 0.26801323890686035, + "loss_ce": 6.891635712236166e-05, + "loss_iou": 0.12109375, + "loss_num": 0.005157470703125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 451354904, + "step": 8055 + }, + { + "epoch": 17.942093541202674, + "grad_norm": 31.136751174926758, + "learning_rate": 1e-06, + "loss": 0.4939, + "num_input_tokens_seen": 451406784, + "step": 8056 + }, + { + "epoch": 17.942093541202674, + "loss": 0.5043526291847229, + "loss_ce": 8.016474748728797e-05, + "loss_iou": 0.2265625, + "loss_num": 0.010498046875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 451406784, + "step": 8056 + }, + { + "epoch": 17.94432071269488, + "grad_norm": 12.46948528289795, + "learning_rate": 1e-06, + "loss": 0.3284, + "num_input_tokens_seen": 451464456, + "step": 8057 + }, + { + "epoch": 17.94432071269488, + "loss": 0.29316914081573486, + "loss_ce": 7.832865230739117e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.004425048828125, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 451464456, + "step": 8057 + }, + { + "epoch": 17.946547884187083, + "grad_norm": 26.139951705932617, + "learning_rate": 1e-06, + "loss": 0.3216, + "num_input_tokens_seen": 451517900, + "step": 8058 + }, + { + "epoch": 17.946547884187083, + "loss": 0.3385404348373413, + "loss_ce": 0.00010048142576124519, + "loss_iou": 0.1572265625, + "loss_num": 0.004852294921875, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 451517900, + "step": 8058 + }, + { + "epoch": 17.948775055679288, + "grad_norm": 20.38810920715332, + "learning_rate": 1e-06, + "loss": 0.3234, + "num_input_tokens_seen": 451574048, + "step": 8059 + }, + { + "epoch": 17.948775055679288, + "loss": 0.2848048210144043, + "loss_ce": 7.584408012917265e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.005096435546875, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 451574048, + "step": 8059 + }, + { + "epoch": 17.951002227171493, + "grad_norm": 17.65831756591797, + "learning_rate": 1e-06, + "loss": 0.5641, + "num_input_tokens_seen": 451632452, + "step": 8060 + }, + { + "epoch": 17.951002227171493, + "loss": 0.38423237204551697, + "loss_ce": 7.70928745623678e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.006805419921875, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 451632452, + "step": 8060 + }, + { + "epoch": 17.953229398663698, + "grad_norm": 10.756651878356934, + "learning_rate": 1e-06, + "loss": 0.362, + "num_input_tokens_seen": 451687680, + "step": 8061 + }, + { + "epoch": 17.953229398663698, + "loss": 0.46239495277404785, + "loss_ce": 8.412712486460805e-05, + "loss_iou": 0.171875, + "loss_num": 0.0238037109375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 451687680, + "step": 8061 + }, + { + "epoch": 17.955456570155903, + "grad_norm": 22.19445037841797, + "learning_rate": 1e-06, + "loss": 0.4, + "num_input_tokens_seen": 451746496, + "step": 8062 + }, + { + "epoch": 17.955456570155903, + "loss": 0.3811803460121155, + "loss_ce": 7.682880095671862e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.009521484375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 451746496, + "step": 8062 + }, + { + "epoch": 17.957683741648108, + "grad_norm": 14.64289665222168, + "learning_rate": 1e-06, + "loss": 0.3557, + "num_input_tokens_seen": 451802292, + "step": 8063 + }, + { + "epoch": 17.957683741648108, + "loss": 0.3765408396720886, + "loss_ce": 0.00010646959708537906, + "loss_iou": 0.1572265625, + "loss_num": 0.01239013671875, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 451802292, + "step": 8063 + }, + { + "epoch": 17.959910913140313, + "grad_norm": 20.15620994567871, + "learning_rate": 1e-06, + "loss": 0.5471, + "num_input_tokens_seen": 451854968, + "step": 8064 + }, + { + "epoch": 17.959910913140313, + "loss": 0.5528594255447388, + "loss_ce": 6.400723214028403e-05, + "loss_iou": 0.2421875, + "loss_num": 0.01373291015625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 451854968, + "step": 8064 + }, + { + "epoch": 17.962138084632517, + "grad_norm": 16.74308967590332, + "learning_rate": 1e-06, + "loss": 0.3935, + "num_input_tokens_seen": 451913180, + "step": 8065 + }, + { + "epoch": 17.962138084632517, + "loss": 0.5324406623840332, + "loss_ce": 9.207165567204356e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.01239013671875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 451913180, + "step": 8065 + }, + { + "epoch": 17.964365256124722, + "grad_norm": 17.335187911987305, + "learning_rate": 1e-06, + "loss": 0.5185, + "num_input_tokens_seen": 451969652, + "step": 8066 + }, + { + "epoch": 17.964365256124722, + "loss": 0.47579044103622437, + "loss_ce": 8.243897173088044e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0194091796875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 451969652, + "step": 8066 + }, + { + "epoch": 17.966592427616927, + "grad_norm": 20.39455795288086, + "learning_rate": 1e-06, + "loss": 0.4586, + "num_input_tokens_seen": 452024588, + "step": 8067 + }, + { + "epoch": 17.966592427616927, + "loss": 0.6492767333984375, + "loss_ce": 0.00010679697152227163, + "loss_iou": 0.26171875, + "loss_num": 0.0247802734375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 452024588, + "step": 8067 + }, + { + "epoch": 17.968819599109132, + "grad_norm": 21.565893173217773, + "learning_rate": 1e-06, + "loss": 0.3403, + "num_input_tokens_seen": 452079184, + "step": 8068 + }, + { + "epoch": 17.968819599109132, + "loss": 0.3689712882041931, + "loss_ce": 7.478379120584577e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.007598876953125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 452079184, + "step": 8068 + }, + { + "epoch": 17.971046770601337, + "grad_norm": 53.53007507324219, + "learning_rate": 1e-06, + "loss": 0.4302, + "num_input_tokens_seen": 452132824, + "step": 8069 + }, + { + "epoch": 17.971046770601337, + "loss": 0.3132534623146057, + "loss_ce": 0.00014310533879324794, + "loss_iou": 0.146484375, + "loss_num": 0.004058837890625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 452132824, + "step": 8069 + }, + { + "epoch": 17.97327394209354, + "grad_norm": 25.093297958374023, + "learning_rate": 1e-06, + "loss": 0.3963, + "num_input_tokens_seen": 452188908, + "step": 8070 + }, + { + "epoch": 17.97327394209354, + "loss": 0.3288014531135559, + "loss_ce": 6.609824777115136e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 452188908, + "step": 8070 + }, + { + "epoch": 17.975501113585747, + "grad_norm": 14.360114097595215, + "learning_rate": 1e-06, + "loss": 0.4598, + "num_input_tokens_seen": 452245072, + "step": 8071 + }, + { + "epoch": 17.975501113585747, + "loss": 0.49251696467399597, + "loss_ce": 8.531761704944074e-05, + "loss_iou": 0.203125, + "loss_num": 0.01708984375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 452245072, + "step": 8071 + }, + { + "epoch": 17.97772828507795, + "grad_norm": 19.300607681274414, + "learning_rate": 1e-06, + "loss": 0.4567, + "num_input_tokens_seen": 452302856, + "step": 8072 + }, + { + "epoch": 17.97772828507795, + "loss": 0.47664445638656616, + "loss_ce": 8.195844566216692e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0125732421875, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 452302856, + "step": 8072 + }, + { + "epoch": 17.979955456570156, + "grad_norm": 22.800689697265625, + "learning_rate": 1e-06, + "loss": 0.3213, + "num_input_tokens_seen": 452358036, + "step": 8073 + }, + { + "epoch": 17.979955456570156, + "loss": 0.30914774537086487, + "loss_ce": 6.572413258254528e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.01153564453125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 452358036, + "step": 8073 + }, + { + "epoch": 17.98218262806236, + "grad_norm": 18.20235824584961, + "learning_rate": 1e-06, + "loss": 0.3886, + "num_input_tokens_seen": 452412620, + "step": 8074 + }, + { + "epoch": 17.98218262806236, + "loss": 0.3233228921890259, + "loss_ce": 8.070748299360275e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0072021484375, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 452412620, + "step": 8074 + }, + { + "epoch": 17.984409799554566, + "grad_norm": 14.406597137451172, + "learning_rate": 1e-06, + "loss": 0.4968, + "num_input_tokens_seen": 452469772, + "step": 8075 + }, + { + "epoch": 17.984409799554566, + "loss": 0.5883782505989075, + "loss_ce": 0.00012145160872023553, + "loss_iou": 0.25, + "loss_num": 0.017333984375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 452469772, + "step": 8075 + }, + { + "epoch": 17.98663697104677, + "grad_norm": 26.771181106567383, + "learning_rate": 1e-06, + "loss": 0.4041, + "num_input_tokens_seen": 452523692, + "step": 8076 + }, + { + "epoch": 17.98663697104677, + "loss": 0.5332375764846802, + "loss_ce": 9.549761307425797e-05, + "loss_iou": 0.2109375, + "loss_num": 0.0220947265625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 452523692, + "step": 8076 + }, + { + "epoch": 17.988864142538976, + "grad_norm": 15.690217971801758, + "learning_rate": 1e-06, + "loss": 0.3856, + "num_input_tokens_seen": 452581640, + "step": 8077 + }, + { + "epoch": 17.988864142538976, + "loss": 0.3996119499206543, + "loss_ce": 0.0001978981599677354, + "loss_iou": 0.166015625, + "loss_num": 0.01348876953125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 452581640, + "step": 8077 + }, + { + "epoch": 17.99109131403118, + "grad_norm": 20.969528198242188, + "learning_rate": 1e-06, + "loss": 0.3346, + "num_input_tokens_seen": 452634292, + "step": 8078 + }, + { + "epoch": 17.99109131403118, + "loss": 0.3364979922771454, + "loss_ce": 7.221752457553521e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.005035400390625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 452634292, + "step": 8078 + }, + { + "epoch": 17.993318485523385, + "grad_norm": 22.376567840576172, + "learning_rate": 1e-06, + "loss": 0.4623, + "num_input_tokens_seen": 452691208, + "step": 8079 + }, + { + "epoch": 17.993318485523385, + "loss": 0.38116979598999023, + "loss_ce": 6.630241841776296e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.007232666015625, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 452691208, + "step": 8079 + }, + { + "epoch": 17.99554565701559, + "grad_norm": 25.332250595092773, + "learning_rate": 1e-06, + "loss": 0.38, + "num_input_tokens_seen": 452744012, + "step": 8080 + }, + { + "epoch": 17.99554565701559, + "loss": 0.5036153793334961, + "loss_ce": 0.00019747592159546912, + "loss_iou": 0.21875, + "loss_num": 0.0133056640625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 452744012, + "step": 8080 + }, + { + "epoch": 17.997772828507795, + "grad_norm": 21.98955726623535, + "learning_rate": 1e-06, + "loss": 0.3497, + "num_input_tokens_seen": 452801908, + "step": 8081 + }, + { + "epoch": 17.997772828507795, + "loss": 0.3676411509513855, + "loss_ce": 8.742515638004988e-05, + "loss_iou": 0.166015625, + "loss_num": 0.007080078125, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 452801908, + "step": 8081 + }, + { + "epoch": 18.0, + "grad_norm": 16.604522705078125, + "learning_rate": 1e-06, + "loss": 0.4416, + "num_input_tokens_seen": 452859412, + "step": 8082 + }, + { + "epoch": 18.0, + "loss": 0.5428001880645752, + "loss_ce": 7.557860226370394e-05, + "loss_iou": 0.216796875, + "loss_num": 0.02197265625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 452859412, + "step": 8082 + }, + { + "epoch": 18.002227171492205, + "grad_norm": 13.554404258728027, + "learning_rate": 1e-06, + "loss": 0.3327, + "num_input_tokens_seen": 452915892, + "step": 8083 + }, + { + "epoch": 18.002227171492205, + "loss": 0.16937774419784546, + "loss_ce": 6.622253567911685e-05, + "loss_iou": 0.0712890625, + "loss_num": 0.005401611328125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 452915892, + "step": 8083 + }, + { + "epoch": 18.00445434298441, + "grad_norm": 21.166494369506836, + "learning_rate": 1e-06, + "loss": 0.4786, + "num_input_tokens_seen": 452973948, + "step": 8084 + }, + { + "epoch": 18.00445434298441, + "loss": 0.3788573145866394, + "loss_ce": 7.313516107387841e-05, + "loss_iou": 0.17578125, + "loss_num": 0.005584716796875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 452973948, + "step": 8084 + }, + { + "epoch": 18.006681514476615, + "grad_norm": 20.805810928344727, + "learning_rate": 1e-06, + "loss": 0.4782, + "num_input_tokens_seen": 453030260, + "step": 8085 + }, + { + "epoch": 18.006681514476615, + "loss": 0.5902899503707886, + "loss_ce": 8.001519017852843e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.0230712890625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 453030260, + "step": 8085 + }, + { + "epoch": 18.00890868596882, + "grad_norm": 16.5931396484375, + "learning_rate": 1e-06, + "loss": 0.4174, + "num_input_tokens_seen": 453085656, + "step": 8086 + }, + { + "epoch": 18.00890868596882, + "loss": 0.4264695942401886, + "loss_ce": 7.800738967489451e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0108642578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 453085656, + "step": 8086 + }, + { + "epoch": 18.011135857461024, + "grad_norm": 20.476335525512695, + "learning_rate": 1e-06, + "loss": 0.3997, + "num_input_tokens_seen": 453141140, + "step": 8087 + }, + { + "epoch": 18.011135857461024, + "loss": 0.40523386001586914, + "loss_ce": 0.0002045718429144472, + "loss_iou": 0.1884765625, + "loss_num": 0.005462646484375, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 453141140, + "step": 8087 + }, + { + "epoch": 18.01336302895323, + "grad_norm": 26.6048641204834, + "learning_rate": 1e-06, + "loss": 0.3262, + "num_input_tokens_seen": 453199108, + "step": 8088 + }, + { + "epoch": 18.01336302895323, + "loss": 0.3548569679260254, + "loss_ce": 5.9605521528283134e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.008056640625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 453199108, + "step": 8088 + }, + { + "epoch": 18.015590200445434, + "grad_norm": 28.440725326538086, + "learning_rate": 1e-06, + "loss": 0.56, + "num_input_tokens_seen": 453254600, + "step": 8089 + }, + { + "epoch": 18.015590200445434, + "loss": 0.6332626342773438, + "loss_ce": 8.392542076762766e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0137939453125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 453254600, + "step": 8089 + }, + { + "epoch": 18.01781737193764, + "grad_norm": 17.789857864379883, + "learning_rate": 1e-06, + "loss": 0.5431, + "num_input_tokens_seen": 453311020, + "step": 8090 + }, + { + "epoch": 18.01781737193764, + "loss": 0.7038158774375916, + "loss_ce": 8.052912016864866e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0201416015625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 453311020, + "step": 8090 + }, + { + "epoch": 18.020044543429844, + "grad_norm": 19.747709274291992, + "learning_rate": 1e-06, + "loss": 0.378, + "num_input_tokens_seen": 453364652, + "step": 8091 + }, + { + "epoch": 18.020044543429844, + "loss": 0.47260355949401855, + "loss_ce": 6.938957085367292e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.00970458984375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 453364652, + "step": 8091 + }, + { + "epoch": 18.02227171492205, + "grad_norm": 24.440204620361328, + "learning_rate": 1e-06, + "loss": 0.4914, + "num_input_tokens_seen": 453419676, + "step": 8092 + }, + { + "epoch": 18.02227171492205, + "loss": 0.43805378675460815, + "loss_ce": 6.548190140165389e-05, + "loss_iou": 0.173828125, + "loss_num": 0.01806640625, + "loss_xval": 0.4375, + "num_input_tokens_seen": 453419676, + "step": 8092 + }, + { + "epoch": 18.024498886414253, + "grad_norm": 14.660680770874023, + "learning_rate": 1e-06, + "loss": 0.4239, + "num_input_tokens_seen": 453476280, + "step": 8093 + }, + { + "epoch": 18.024498886414253, + "loss": 0.3206265866756439, + "loss_ce": 6.993835268076509e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.009033203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 453476280, + "step": 8093 + }, + { + "epoch": 18.02672605790646, + "grad_norm": 22.54802894592285, + "learning_rate": 1e-06, + "loss": 0.4878, + "num_input_tokens_seen": 453529916, + "step": 8094 + }, + { + "epoch": 18.02672605790646, + "loss": 0.37210169434547424, + "loss_ce": 9.240545477950945e-05, + "loss_iou": 0.16796875, + "loss_num": 0.007354736328125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 453529916, + "step": 8094 + }, + { + "epoch": 18.028953229398663, + "grad_norm": 20.00684928894043, + "learning_rate": 1e-06, + "loss": 0.3406, + "num_input_tokens_seen": 453583004, + "step": 8095 + }, + { + "epoch": 18.028953229398663, + "loss": 0.39649391174316406, + "loss_ce": 7.059978815959767e-05, + "loss_iou": 0.171875, + "loss_num": 0.0106201171875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 453583004, + "step": 8095 + }, + { + "epoch": 18.031180400890868, + "grad_norm": 25.416080474853516, + "learning_rate": 1e-06, + "loss": 0.5202, + "num_input_tokens_seen": 453637924, + "step": 8096 + }, + { + "epoch": 18.031180400890868, + "loss": 0.5738502740859985, + "loss_ce": 0.00011981255374848843, + "loss_iou": 0.255859375, + "loss_num": 0.0128173828125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 453637924, + "step": 8096 + }, + { + "epoch": 18.033407572383073, + "grad_norm": 17.065658569335938, + "learning_rate": 1e-06, + "loss": 0.4977, + "num_input_tokens_seen": 453693352, + "step": 8097 + }, + { + "epoch": 18.033407572383073, + "loss": 0.32081741094589233, + "loss_ce": 7.766792987240478e-05, + "loss_iou": 0.12890625, + "loss_num": 0.0126953125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 453693352, + "step": 8097 + }, + { + "epoch": 18.035634743875278, + "grad_norm": 18.961402893066406, + "learning_rate": 1e-06, + "loss": 0.4149, + "num_input_tokens_seen": 453748036, + "step": 8098 + }, + { + "epoch": 18.035634743875278, + "loss": 0.41537126898765564, + "loss_ce": 8.809100836515427e-05, + "loss_iou": 0.169921875, + "loss_num": 0.014892578125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 453748036, + "step": 8098 + }, + { + "epoch": 18.037861915367483, + "grad_norm": 19.813962936401367, + "learning_rate": 1e-06, + "loss": 0.2732, + "num_input_tokens_seen": 453804100, + "step": 8099 + }, + { + "epoch": 18.037861915367483, + "loss": 0.27010267972946167, + "loss_ce": 8.316422463394701e-05, + "loss_iou": 0.1142578125, + "loss_num": 0.00823974609375, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 453804100, + "step": 8099 + }, + { + "epoch": 18.040089086859687, + "grad_norm": 21.94256019592285, + "learning_rate": 1e-06, + "loss": 0.3819, + "num_input_tokens_seen": 453862180, + "step": 8100 + }, + { + "epoch": 18.040089086859687, + "loss": 0.40962302684783936, + "loss_ce": 7.715914398431778e-05, + "loss_iou": 0.189453125, + "loss_num": 0.00628662109375, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 453862180, + "step": 8100 + }, + { + "epoch": 18.042316258351892, + "grad_norm": 26.7554988861084, + "learning_rate": 1e-06, + "loss": 0.5004, + "num_input_tokens_seen": 453918752, + "step": 8101 + }, + { + "epoch": 18.042316258351892, + "loss": 0.33896124362945557, + "loss_ce": 9.407360630575567e-05, + "loss_iou": 0.1484375, + "loss_num": 0.00830078125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 453918752, + "step": 8101 + }, + { + "epoch": 18.044543429844097, + "grad_norm": 15.414385795593262, + "learning_rate": 1e-06, + "loss": 0.4937, + "num_input_tokens_seen": 453970240, + "step": 8102 + }, + { + "epoch": 18.044543429844097, + "loss": 0.5033870935440063, + "loss_ce": 9.12140094442293e-05, + "loss_iou": 0.203125, + "loss_num": 0.019287109375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 453970240, + "step": 8102 + }, + { + "epoch": 18.046770601336302, + "grad_norm": 19.02543067932129, + "learning_rate": 1e-06, + "loss": 0.3916, + "num_input_tokens_seen": 454026744, + "step": 8103 + }, + { + "epoch": 18.046770601336302, + "loss": 0.46980106830596924, + "loss_ce": 7.446320523740724e-05, + "loss_iou": 0.1953125, + "loss_num": 0.015869140625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 454026744, + "step": 8103 + }, + { + "epoch": 18.048997772828507, + "grad_norm": 14.910806655883789, + "learning_rate": 1e-06, + "loss": 0.3787, + "num_input_tokens_seen": 454084836, + "step": 8104 + }, + { + "epoch": 18.048997772828507, + "loss": 0.37930968403816223, + "loss_ce": 9.826038149185479e-05, + "loss_iou": 0.17578125, + "loss_num": 0.005462646484375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 454084836, + "step": 8104 + }, + { + "epoch": 18.051224944320712, + "grad_norm": 15.848285675048828, + "learning_rate": 1e-06, + "loss": 0.512, + "num_input_tokens_seen": 454140536, + "step": 8105 + }, + { + "epoch": 18.051224944320712, + "loss": 0.5733276009559631, + "loss_ce": 8.542699652025476e-05, + "loss_iou": 0.251953125, + "loss_num": 0.01416015625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 454140536, + "step": 8105 + }, + { + "epoch": 18.053452115812917, + "grad_norm": 18.083738327026367, + "learning_rate": 1e-06, + "loss": 0.3539, + "num_input_tokens_seen": 454198948, + "step": 8106 + }, + { + "epoch": 18.053452115812917, + "loss": 0.46540844440460205, + "loss_ce": 7.641864067409188e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.0081787109375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 454198948, + "step": 8106 + }, + { + "epoch": 18.05567928730512, + "grad_norm": 19.562829971313477, + "learning_rate": 1e-06, + "loss": 0.3211, + "num_input_tokens_seen": 454254564, + "step": 8107 + }, + { + "epoch": 18.05567928730512, + "loss": 0.32736125588417053, + "loss_ce": 9.073851106222719e-05, + "loss_iou": 0.140625, + "loss_num": 0.0093994140625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 454254564, + "step": 8107 + }, + { + "epoch": 18.057906458797326, + "grad_norm": 28.230247497558594, + "learning_rate": 1e-06, + "loss": 0.516, + "num_input_tokens_seen": 454310376, + "step": 8108 + }, + { + "epoch": 18.057906458797326, + "loss": 0.46714556217193604, + "loss_ce": 0.00010453614231664687, + "loss_iou": 0.16015625, + "loss_num": 0.029296875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 454310376, + "step": 8108 + }, + { + "epoch": 18.06013363028953, + "grad_norm": 12.480415344238281, + "learning_rate": 1e-06, + "loss": 0.2964, + "num_input_tokens_seen": 454367776, + "step": 8109 + }, + { + "epoch": 18.06013363028953, + "loss": 0.356157511472702, + "loss_ce": 7.839675527065992e-05, + "loss_iou": 0.14453125, + "loss_num": 0.01336669921875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 454367776, + "step": 8109 + }, + { + "epoch": 18.062360801781736, + "grad_norm": 22.18619728088379, + "learning_rate": 1e-06, + "loss": 0.4995, + "num_input_tokens_seen": 454423244, + "step": 8110 + }, + { + "epoch": 18.062360801781736, + "loss": 0.5585031509399414, + "loss_ce": 0.00015356890799012035, + "loss_iou": 0.232421875, + "loss_num": 0.0189208984375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 454423244, + "step": 8110 + }, + { + "epoch": 18.06458797327394, + "grad_norm": 25.902963638305664, + "learning_rate": 1e-06, + "loss": 0.522, + "num_input_tokens_seen": 454480660, + "step": 8111 + }, + { + "epoch": 18.06458797327394, + "loss": 0.49017781019210815, + "loss_ce": 6.550169200636446e-05, + "loss_iou": 0.216796875, + "loss_num": 0.01153564453125, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 454480660, + "step": 8111 + }, + { + "epoch": 18.066815144766146, + "grad_norm": 27.04472541809082, + "learning_rate": 1e-06, + "loss": 0.3989, + "num_input_tokens_seen": 454539016, + "step": 8112 + }, + { + "epoch": 18.066815144766146, + "loss": 0.26980409026145935, + "loss_ce": 8.019209781195968e-05, + "loss_iou": 0.12158203125, + "loss_num": 0.005340576171875, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 454539016, + "step": 8112 + }, + { + "epoch": 18.06904231625835, + "grad_norm": 15.210087776184082, + "learning_rate": 1e-06, + "loss": 0.5414, + "num_input_tokens_seen": 454595484, + "step": 8113 + }, + { + "epoch": 18.06904231625835, + "loss": 0.652846097946167, + "loss_ce": 7.511470903409645e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0284423828125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 454595484, + "step": 8113 + }, + { + "epoch": 18.071269487750556, + "grad_norm": 24.391822814941406, + "learning_rate": 1e-06, + "loss": 0.5438, + "num_input_tokens_seen": 454647108, + "step": 8114 + }, + { + "epoch": 18.071269487750556, + "loss": 0.5746660828590393, + "loss_ce": 8.115639502648264e-05, + "loss_iou": 0.271484375, + "loss_num": 0.006683349609375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 454647108, + "step": 8114 + }, + { + "epoch": 18.07349665924276, + "grad_norm": 13.966802597045898, + "learning_rate": 1e-06, + "loss": 0.3106, + "num_input_tokens_seen": 454704844, + "step": 8115 + }, + { + "epoch": 18.07349665924276, + "loss": 0.31044334173202515, + "loss_ce": 7.955444743856788e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.0103759765625, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 454704844, + "step": 8115 + }, + { + "epoch": 18.075723830734965, + "grad_norm": 18.81356430053711, + "learning_rate": 1e-06, + "loss": 0.4646, + "num_input_tokens_seen": 454762604, + "step": 8116 + }, + { + "epoch": 18.075723830734965, + "loss": 0.4306223690509796, + "loss_ce": 8.036733197513968e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.00701904296875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 454762604, + "step": 8116 + }, + { + "epoch": 18.07795100222717, + "grad_norm": 13.788688659667969, + "learning_rate": 1e-06, + "loss": 0.249, + "num_input_tokens_seen": 454818024, + "step": 8117 + }, + { + "epoch": 18.07795100222717, + "loss": 0.23960940539836884, + "loss_ce": 7.69186153775081e-05, + "loss_iou": 0.1025390625, + "loss_num": 0.00689697265625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 454818024, + "step": 8117 + }, + { + "epoch": 18.080178173719375, + "grad_norm": 14.488852500915527, + "learning_rate": 1e-06, + "loss": 0.3516, + "num_input_tokens_seen": 454873348, + "step": 8118 + }, + { + "epoch": 18.080178173719375, + "loss": 0.40378397703170776, + "loss_ce": 9.744857379700989e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.0137939453125, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 454873348, + "step": 8118 + }, + { + "epoch": 18.08240534521158, + "grad_norm": 27.27067756652832, + "learning_rate": 1e-06, + "loss": 0.4236, + "num_input_tokens_seen": 454929640, + "step": 8119 + }, + { + "epoch": 18.08240534521158, + "loss": 0.3224753439426422, + "loss_ce": 8.763029472902417e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.00469970703125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 454929640, + "step": 8119 + }, + { + "epoch": 18.084632516703785, + "grad_norm": 18.79537010192871, + "learning_rate": 1e-06, + "loss": 0.4925, + "num_input_tokens_seen": 454984244, + "step": 8120 + }, + { + "epoch": 18.084632516703785, + "loss": 0.47800394892692566, + "loss_ce": 0.00015972901019267738, + "loss_iou": 0.203125, + "loss_num": 0.0142822265625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 454984244, + "step": 8120 + }, + { + "epoch": 18.08685968819599, + "grad_norm": 15.249739646911621, + "learning_rate": 1e-06, + "loss": 0.3372, + "num_input_tokens_seen": 455041144, + "step": 8121 + }, + { + "epoch": 18.08685968819599, + "loss": 0.2875575125217438, + "loss_ce": 8.190819789888337e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.004669189453125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 455041144, + "step": 8121 + }, + { + "epoch": 18.089086859688194, + "grad_norm": 14.420731544494629, + "learning_rate": 1e-06, + "loss": 0.4776, + "num_input_tokens_seen": 455097144, + "step": 8122 + }, + { + "epoch": 18.089086859688194, + "loss": 0.3784327805042267, + "loss_ce": 0.00013688384206034243, + "loss_iou": 0.1572265625, + "loss_num": 0.01263427734375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 455097144, + "step": 8122 + }, + { + "epoch": 18.0913140311804, + "grad_norm": 19.662689208984375, + "learning_rate": 1e-06, + "loss": 0.4817, + "num_input_tokens_seen": 455151808, + "step": 8123 + }, + { + "epoch": 18.0913140311804, + "loss": 0.567717969417572, + "loss_ce": 9.098585724132136e-05, + "loss_iou": 0.244140625, + "loss_num": 0.0159912109375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 455151808, + "step": 8123 + }, + { + "epoch": 18.093541202672604, + "grad_norm": 19.106225967407227, + "learning_rate": 1e-06, + "loss": 0.2999, + "num_input_tokens_seen": 455207776, + "step": 8124 + }, + { + "epoch": 18.093541202672604, + "loss": 0.33509403467178345, + "loss_ce": 7.206852023955435e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.006591796875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 455207776, + "step": 8124 + }, + { + "epoch": 18.09576837416481, + "grad_norm": 16.473522186279297, + "learning_rate": 1e-06, + "loss": 0.4367, + "num_input_tokens_seen": 455264208, + "step": 8125 + }, + { + "epoch": 18.09576837416481, + "loss": 0.4178709387779236, + "loss_ce": 8.530144987162203e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.014404296875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 455264208, + "step": 8125 + }, + { + "epoch": 18.097995545657014, + "grad_norm": 26.5972900390625, + "learning_rate": 1e-06, + "loss": 0.3408, + "num_input_tokens_seen": 455323352, + "step": 8126 + }, + { + "epoch": 18.097995545657014, + "loss": 0.3901003897190094, + "loss_ce": 8.57616396388039e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00616455078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 455323352, + "step": 8126 + }, + { + "epoch": 18.100222717149222, + "grad_norm": 21.1645565032959, + "learning_rate": 1e-06, + "loss": 0.4185, + "num_input_tokens_seen": 455377804, + "step": 8127 + }, + { + "epoch": 18.100222717149222, + "loss": 0.5210694074630737, + "loss_ce": 7.333118992391974e-05, + "loss_iou": 0.2265625, + "loss_num": 0.01361083984375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 455377804, + "step": 8127 + }, + { + "epoch": 18.102449888641427, + "grad_norm": 13.508529663085938, + "learning_rate": 1e-06, + "loss": 0.5152, + "num_input_tokens_seen": 455436200, + "step": 8128 + }, + { + "epoch": 18.102449888641427, + "loss": 0.6549245119094849, + "loss_ce": 6.30651629762724e-05, + "loss_iou": 0.265625, + "loss_num": 0.0247802734375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 455436200, + "step": 8128 + }, + { + "epoch": 18.104677060133632, + "grad_norm": 36.7231330871582, + "learning_rate": 1e-06, + "loss": 0.3776, + "num_input_tokens_seen": 455492220, + "step": 8129 + }, + { + "epoch": 18.104677060133632, + "loss": 0.3046456277370453, + "loss_ce": 8.02054419182241e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.007354736328125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 455492220, + "step": 8129 + }, + { + "epoch": 18.106904231625837, + "grad_norm": 19.406326293945312, + "learning_rate": 1e-06, + "loss": 0.4491, + "num_input_tokens_seen": 455548928, + "step": 8130 + }, + { + "epoch": 18.106904231625837, + "loss": 0.3191646337509155, + "loss_ce": 7.285462925210595e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.0057373046875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 455548928, + "step": 8130 + }, + { + "epoch": 18.10913140311804, + "grad_norm": 12.066944122314453, + "learning_rate": 1e-06, + "loss": 0.4965, + "num_input_tokens_seen": 455605732, + "step": 8131 + }, + { + "epoch": 18.10913140311804, + "loss": 0.49812638759613037, + "loss_ce": 7.951979932840914e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.02099609375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 455605732, + "step": 8131 + }, + { + "epoch": 18.111358574610247, + "grad_norm": 12.560493469238281, + "learning_rate": 1e-06, + "loss": 0.3447, + "num_input_tokens_seen": 455660540, + "step": 8132 + }, + { + "epoch": 18.111358574610247, + "loss": 0.4943099915981293, + "loss_ce": 0.00010833313717739657, + "loss_iou": 0.212890625, + "loss_num": 0.01361083984375, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 455660540, + "step": 8132 + }, + { + "epoch": 18.11358574610245, + "grad_norm": 23.26740264892578, + "learning_rate": 1e-06, + "loss": 0.4059, + "num_input_tokens_seen": 455712784, + "step": 8133 + }, + { + "epoch": 18.11358574610245, + "loss": 0.533281683921814, + "loss_ce": 7.859165634727105e-05, + "loss_iou": 0.234375, + "loss_num": 0.01275634765625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 455712784, + "step": 8133 + }, + { + "epoch": 18.115812917594656, + "grad_norm": 23.393814086914062, + "learning_rate": 1e-06, + "loss": 0.3933, + "num_input_tokens_seen": 455769952, + "step": 8134 + }, + { + "epoch": 18.115812917594656, + "loss": 0.4792192578315735, + "loss_ce": 9.330162720289081e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.007080078125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 455769952, + "step": 8134 + }, + { + "epoch": 18.11804008908686, + "grad_norm": 23.41754913330078, + "learning_rate": 1e-06, + "loss": 0.3536, + "num_input_tokens_seen": 455826364, + "step": 8135 + }, + { + "epoch": 18.11804008908686, + "loss": 0.39287176728248596, + "loss_ce": 7.998933142516762e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.00933837890625, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 455826364, + "step": 8135 + }, + { + "epoch": 18.120267260579066, + "grad_norm": 18.5450382232666, + "learning_rate": 1e-06, + "loss": 0.5557, + "num_input_tokens_seen": 455882520, + "step": 8136 + }, + { + "epoch": 18.120267260579066, + "loss": 0.4983789920806885, + "loss_ce": 8.797197369858623e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.0189208984375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 455882520, + "step": 8136 + }, + { + "epoch": 18.12249443207127, + "grad_norm": 21.666677474975586, + "learning_rate": 1e-06, + "loss": 0.6017, + "num_input_tokens_seen": 455937676, + "step": 8137 + }, + { + "epoch": 18.12249443207127, + "loss": 0.7193117141723633, + "loss_ce": 7.343379547819495e-05, + "loss_iou": 0.31640625, + "loss_num": 0.017333984375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 455937676, + "step": 8137 + }, + { + "epoch": 18.124721603563476, + "grad_norm": 16.64803695678711, + "learning_rate": 1e-06, + "loss": 0.2924, + "num_input_tokens_seen": 455993536, + "step": 8138 + }, + { + "epoch": 18.124721603563476, + "loss": 0.39221036434173584, + "loss_ce": 0.00018156672012992203, + "loss_iou": 0.1533203125, + "loss_num": 0.016845703125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 455993536, + "step": 8138 + }, + { + "epoch": 18.12694877505568, + "grad_norm": 19.38474464416504, + "learning_rate": 1e-06, + "loss": 0.3963, + "num_input_tokens_seen": 456050968, + "step": 8139 + }, + { + "epoch": 18.12694877505568, + "loss": 0.3352212905883789, + "loss_ce": 7.726570765953511e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.0079345703125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 456050968, + "step": 8139 + }, + { + "epoch": 18.129175946547885, + "grad_norm": 19.141530990600586, + "learning_rate": 1e-06, + "loss": 0.3805, + "num_input_tokens_seen": 456106612, + "step": 8140 + }, + { + "epoch": 18.129175946547885, + "loss": 0.3776403069496155, + "loss_ce": 7.682391151320189e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0084228515625, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 456106612, + "step": 8140 + }, + { + "epoch": 18.13140311804009, + "grad_norm": 26.25833511352539, + "learning_rate": 1e-06, + "loss": 0.4945, + "num_input_tokens_seen": 456162008, + "step": 8141 + }, + { + "epoch": 18.13140311804009, + "loss": 0.3847208023071289, + "loss_ce": 7.726027979515493e-05, + "loss_iou": 0.177734375, + "loss_num": 0.005889892578125, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 456162008, + "step": 8141 + }, + { + "epoch": 18.133630289532295, + "grad_norm": 16.26892852783203, + "learning_rate": 1e-06, + "loss": 0.3777, + "num_input_tokens_seen": 456218592, + "step": 8142 + }, + { + "epoch": 18.133630289532295, + "loss": 0.29285314679145813, + "loss_ce": 9.801473061088473e-05, + "loss_iou": 0.1142578125, + "loss_num": 0.0128173828125, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 456218592, + "step": 8142 + }, + { + "epoch": 18.1358574610245, + "grad_norm": 13.948265075683594, + "learning_rate": 1e-06, + "loss": 0.3755, + "num_input_tokens_seen": 456276108, + "step": 8143 + }, + { + "epoch": 18.1358574610245, + "loss": 0.2600770592689514, + "loss_ce": 6.72862006467767e-05, + "loss_iou": 0.1181640625, + "loss_num": 0.0047607421875, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 456276108, + "step": 8143 + }, + { + "epoch": 18.138084632516705, + "grad_norm": 14.333640098571777, + "learning_rate": 1e-06, + "loss": 0.3341, + "num_input_tokens_seen": 456331636, + "step": 8144 + }, + { + "epoch": 18.138084632516705, + "loss": 0.37764212489128113, + "loss_ce": 0.0008721151389181614, + "loss_iou": 0.169921875, + "loss_num": 0.00738525390625, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 456331636, + "step": 8144 + }, + { + "epoch": 18.14031180400891, + "grad_norm": 16.1193904876709, + "learning_rate": 1e-06, + "loss": 0.2635, + "num_input_tokens_seen": 456388988, + "step": 8145 + }, + { + "epoch": 18.14031180400891, + "loss": 0.30690258741378784, + "loss_ce": 7.886123057687655e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.0084228515625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 456388988, + "step": 8145 + }, + { + "epoch": 18.142538975501115, + "grad_norm": 16.353914260864258, + "learning_rate": 1e-06, + "loss": 0.371, + "num_input_tokens_seen": 456447512, + "step": 8146 + }, + { + "epoch": 18.142538975501115, + "loss": 0.4683547616004944, + "loss_ce": 9.304599370807409e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0169677734375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 456447512, + "step": 8146 + }, + { + "epoch": 18.14476614699332, + "grad_norm": 21.104829788208008, + "learning_rate": 1e-06, + "loss": 0.3263, + "num_input_tokens_seen": 456506516, + "step": 8147 + }, + { + "epoch": 18.14476614699332, + "loss": 0.24555513262748718, + "loss_ce": 7.17366419848986e-05, + "loss_iou": 0.111328125, + "loss_num": 0.00457763671875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 456506516, + "step": 8147 + }, + { + "epoch": 18.146993318485524, + "grad_norm": 16.034555435180664, + "learning_rate": 1e-06, + "loss": 0.4458, + "num_input_tokens_seen": 456563372, + "step": 8148 + }, + { + "epoch": 18.146993318485524, + "loss": 0.2350102663040161, + "loss_ce": 8.594193059252575e-05, + "loss_iou": 0.10888671875, + "loss_num": 0.00335693359375, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 456563372, + "step": 8148 + }, + { + "epoch": 18.14922048997773, + "grad_norm": 21.92203140258789, + "learning_rate": 1e-06, + "loss": 0.5644, + "num_input_tokens_seen": 456620288, + "step": 8149 + }, + { + "epoch": 18.14922048997773, + "loss": 0.2608814239501953, + "loss_ce": 7.819505117367953e-05, + "loss_iou": 0.12353515625, + "loss_num": 0.0027313232421875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 456620288, + "step": 8149 + }, + { + "epoch": 18.151447661469934, + "grad_norm": 14.489221572875977, + "learning_rate": 1e-06, + "loss": 0.306, + "num_input_tokens_seen": 456677872, + "step": 8150 + }, + { + "epoch": 18.151447661469934, + "loss": 0.31026989221572876, + "loss_ce": 8.923574932850897e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.00677490234375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 456677872, + "step": 8150 + }, + { + "epoch": 18.15367483296214, + "grad_norm": 15.693221092224121, + "learning_rate": 1e-06, + "loss": 0.3977, + "num_input_tokens_seen": 456730576, + "step": 8151 + }, + { + "epoch": 18.15367483296214, + "loss": 0.35370901226997375, + "loss_ce": 7.132487371563911e-05, + "loss_iou": 0.15234375, + "loss_num": 0.00970458984375, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 456730576, + "step": 8151 + }, + { + "epoch": 18.155902004454344, + "grad_norm": 21.944501876831055, + "learning_rate": 1e-06, + "loss": 0.4647, + "num_input_tokens_seen": 456787084, + "step": 8152 + }, + { + "epoch": 18.155902004454344, + "loss": 0.48086851835250854, + "loss_ce": 9.459961438551545e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.0201416015625, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 456787084, + "step": 8152 + }, + { + "epoch": 18.15812917594655, + "grad_norm": 17.893390655517578, + "learning_rate": 1e-06, + "loss": 0.409, + "num_input_tokens_seen": 456845316, + "step": 8153 + }, + { + "epoch": 18.15812917594655, + "loss": 0.4042748212814331, + "loss_ce": 0.00010002141789300367, + "loss_iou": 0.1875, + "loss_num": 0.005706787109375, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 456845316, + "step": 8153 + }, + { + "epoch": 18.160356347438753, + "grad_norm": 18.064180374145508, + "learning_rate": 1e-06, + "loss": 0.3617, + "num_input_tokens_seen": 456902888, + "step": 8154 + }, + { + "epoch": 18.160356347438753, + "loss": 0.41487082839012146, + "loss_ce": 7.58875539759174e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.016357421875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 456902888, + "step": 8154 + }, + { + "epoch": 18.16258351893096, + "grad_norm": 16.817781448364258, + "learning_rate": 1e-06, + "loss": 0.4589, + "num_input_tokens_seen": 456955400, + "step": 8155 + }, + { + "epoch": 18.16258351893096, + "loss": 0.702592134475708, + "loss_ce": 0.00013854095595888793, + "loss_iou": 0.287109375, + "loss_num": 0.025390625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 456955400, + "step": 8155 + }, + { + "epoch": 18.164810690423163, + "grad_norm": 16.054969787597656, + "learning_rate": 1e-06, + "loss": 0.3455, + "num_input_tokens_seen": 457011064, + "step": 8156 + }, + { + "epoch": 18.164810690423163, + "loss": 0.3856889605522156, + "loss_ce": 6.883072637720034e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.008056640625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 457011064, + "step": 8156 + }, + { + "epoch": 18.167037861915368, + "grad_norm": 16.399335861206055, + "learning_rate": 1e-06, + "loss": 0.3965, + "num_input_tokens_seen": 457064464, + "step": 8157 + }, + { + "epoch": 18.167037861915368, + "loss": 0.4999640882015228, + "loss_ce": 8.612703823018819e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0177001953125, + "loss_xval": 0.5, + "num_input_tokens_seen": 457064464, + "step": 8157 + }, + { + "epoch": 18.169265033407573, + "grad_norm": 26.273801803588867, + "learning_rate": 1e-06, + "loss": 0.2832, + "num_input_tokens_seen": 457120308, + "step": 8158 + }, + { + "epoch": 18.169265033407573, + "loss": 0.3040543496608734, + "loss_ce": 9.926770871970803e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.0048828125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 457120308, + "step": 8158 + }, + { + "epoch": 18.171492204899778, + "grad_norm": 21.627939224243164, + "learning_rate": 1e-06, + "loss": 0.5024, + "num_input_tokens_seen": 457175336, + "step": 8159 + }, + { + "epoch": 18.171492204899778, + "loss": 0.5851208567619324, + "loss_ce": 9.887100895866752e-05, + "loss_iou": 0.267578125, + "loss_num": 0.01031494140625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 457175336, + "step": 8159 + }, + { + "epoch": 18.173719376391983, + "grad_norm": 36.85986328125, + "learning_rate": 1e-06, + "loss": 0.5207, + "num_input_tokens_seen": 457232364, + "step": 8160 + }, + { + "epoch": 18.173719376391983, + "loss": 0.4390491247177124, + "loss_ce": 8.427293505519629e-05, + "loss_iou": 0.197265625, + "loss_num": 0.0086669921875, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 457232364, + "step": 8160 + }, + { + "epoch": 18.175946547884188, + "grad_norm": 27.42588996887207, + "learning_rate": 1e-06, + "loss": 0.4134, + "num_input_tokens_seen": 457287852, + "step": 8161 + }, + { + "epoch": 18.175946547884188, + "loss": 0.3699635863304138, + "loss_ce": 9.052493987837806e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 457287852, + "step": 8161 + }, + { + "epoch": 18.178173719376392, + "grad_norm": 19.563459396362305, + "learning_rate": 1e-06, + "loss": 0.4402, + "num_input_tokens_seen": 457342640, + "step": 8162 + }, + { + "epoch": 18.178173719376392, + "loss": 0.5288839936256409, + "loss_ce": 7.537108467658982e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.01708984375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 457342640, + "step": 8162 + }, + { + "epoch": 18.180400890868597, + "grad_norm": 17.342567443847656, + "learning_rate": 1e-06, + "loss": 0.3396, + "num_input_tokens_seen": 457396200, + "step": 8163 + }, + { + "epoch": 18.180400890868597, + "loss": 0.36558669805526733, + "loss_ce": 7.767054921714589e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.005340576171875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 457396200, + "step": 8163 + }, + { + "epoch": 18.182628062360802, + "grad_norm": 21.52007293701172, + "learning_rate": 1e-06, + "loss": 0.3781, + "num_input_tokens_seen": 457453456, + "step": 8164 + }, + { + "epoch": 18.182628062360802, + "loss": 0.2819899618625641, + "loss_ce": 6.859673885628581e-05, + "loss_iou": 0.11767578125, + "loss_num": 0.00933837890625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 457453456, + "step": 8164 + }, + { + "epoch": 18.184855233853007, + "grad_norm": 23.580726623535156, + "learning_rate": 1e-06, + "loss": 0.4272, + "num_input_tokens_seen": 457510128, + "step": 8165 + }, + { + "epoch": 18.184855233853007, + "loss": 0.5174942016601562, + "loss_ce": 0.00016021478222683072, + "loss_iou": 0.2353515625, + "loss_num": 0.00921630859375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 457510128, + "step": 8165 + }, + { + "epoch": 18.187082405345212, + "grad_norm": 17.68349266052246, + "learning_rate": 1e-06, + "loss": 0.3081, + "num_input_tokens_seen": 457566892, + "step": 8166 + }, + { + "epoch": 18.187082405345212, + "loss": 0.4296377897262573, + "loss_ce": 7.235370139824226e-05, + "loss_iou": 0.173828125, + "loss_num": 0.016357421875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 457566892, + "step": 8166 + }, + { + "epoch": 18.189309576837417, + "grad_norm": 15.33013916015625, + "learning_rate": 1e-06, + "loss": 0.3858, + "num_input_tokens_seen": 457624908, + "step": 8167 + }, + { + "epoch": 18.189309576837417, + "loss": 0.34201037883758545, + "loss_ce": 9.140933980233967e-05, + "loss_iou": 0.154296875, + "loss_num": 0.00677490234375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 457624908, + "step": 8167 + }, + { + "epoch": 18.19153674832962, + "grad_norm": 25.988941192626953, + "learning_rate": 1e-06, + "loss": 0.2713, + "num_input_tokens_seen": 457681264, + "step": 8168 + }, + { + "epoch": 18.19153674832962, + "loss": 0.2663070559501648, + "loss_ce": 7.169348828028888e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.004364013671875, + "loss_xval": 0.265625, + "num_input_tokens_seen": 457681264, + "step": 8168 + }, + { + "epoch": 18.193763919821826, + "grad_norm": 17.212726593017578, + "learning_rate": 1e-06, + "loss": 0.3666, + "num_input_tokens_seen": 457737384, + "step": 8169 + }, + { + "epoch": 18.193763919821826, + "loss": 0.3459562063217163, + "loss_ce": 8.524451550329104e-05, + "loss_iou": 0.142578125, + "loss_num": 0.01214599609375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 457737384, + "step": 8169 + }, + { + "epoch": 18.19599109131403, + "grad_norm": 42.238746643066406, + "learning_rate": 1e-06, + "loss": 0.4758, + "num_input_tokens_seen": 457792120, + "step": 8170 + }, + { + "epoch": 18.19599109131403, + "loss": 0.571884036064148, + "loss_ce": 0.00010671824566088617, + "loss_iou": 0.2578125, + "loss_num": 0.0115966796875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 457792120, + "step": 8170 + }, + { + "epoch": 18.198218262806236, + "grad_norm": 18.594070434570312, + "learning_rate": 1e-06, + "loss": 0.4922, + "num_input_tokens_seen": 457846880, + "step": 8171 + }, + { + "epoch": 18.198218262806236, + "loss": 0.5872501134872437, + "loss_ce": 9.18953082873486e-05, + "loss_iou": 0.263671875, + "loss_num": 0.01165771484375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 457846880, + "step": 8171 + }, + { + "epoch": 18.20044543429844, + "grad_norm": 16.824237823486328, + "learning_rate": 1e-06, + "loss": 0.5248, + "num_input_tokens_seen": 457901312, + "step": 8172 + }, + { + "epoch": 18.20044543429844, + "loss": 0.3980518877506256, + "loss_ce": 0.00010268213372910395, + "loss_iou": 0.1796875, + "loss_num": 0.0079345703125, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 457901312, + "step": 8172 + }, + { + "epoch": 18.202672605790646, + "grad_norm": 14.367027282714844, + "learning_rate": 1e-06, + "loss": 0.5356, + "num_input_tokens_seen": 457956652, + "step": 8173 + }, + { + "epoch": 18.202672605790646, + "loss": 0.544747531414032, + "loss_ce": 6.98043149895966e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.0242919921875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 457956652, + "step": 8173 + }, + { + "epoch": 18.20489977728285, + "grad_norm": 16.34149742126465, + "learning_rate": 1e-06, + "loss": 0.4287, + "num_input_tokens_seen": 458012632, + "step": 8174 + }, + { + "epoch": 18.20489977728285, + "loss": 0.5932860374450684, + "loss_ce": 0.0001463656226405874, + "loss_iou": 0.240234375, + "loss_num": 0.022705078125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 458012632, + "step": 8174 + }, + { + "epoch": 18.207126948775056, + "grad_norm": 14.004996299743652, + "learning_rate": 1e-06, + "loss": 0.3419, + "num_input_tokens_seen": 458068888, + "step": 8175 + }, + { + "epoch": 18.207126948775056, + "loss": 0.31001418828964233, + "loss_ce": 7.767122588120401e-05, + "loss_iou": 0.134765625, + "loss_num": 0.008056640625, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 458068888, + "step": 8175 + }, + { + "epoch": 18.20935412026726, + "grad_norm": 15.622814178466797, + "learning_rate": 1e-06, + "loss": 0.7315, + "num_input_tokens_seen": 458126280, + "step": 8176 + }, + { + "epoch": 18.20935412026726, + "loss": 0.942706286907196, + "loss_ce": 7.930257561383769e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0302734375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 458126280, + "step": 8176 + }, + { + "epoch": 18.211581291759465, + "grad_norm": 19.05216407775879, + "learning_rate": 1e-06, + "loss": 0.4613, + "num_input_tokens_seen": 458184116, + "step": 8177 + }, + { + "epoch": 18.211581291759465, + "loss": 0.4942248463630676, + "loss_ce": 8.419141522608697e-05, + "loss_iou": 0.224609375, + "loss_num": 0.009033203125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 458184116, + "step": 8177 + }, + { + "epoch": 18.21380846325167, + "grad_norm": 14.203112602233887, + "learning_rate": 1e-06, + "loss": 0.2897, + "num_input_tokens_seen": 458239480, + "step": 8178 + }, + { + "epoch": 18.21380846325167, + "loss": 0.18030065298080444, + "loss_ce": 7.909360283520073e-05, + "loss_iou": 0.06982421875, + "loss_num": 0.0081787109375, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 458239480, + "step": 8178 + }, + { + "epoch": 18.216035634743875, + "grad_norm": 18.163633346557617, + "learning_rate": 1e-06, + "loss": 0.3865, + "num_input_tokens_seen": 458296128, + "step": 8179 + }, + { + "epoch": 18.216035634743875, + "loss": 0.39704078435897827, + "loss_ce": 6.810402555856854e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00994873046875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 458296128, + "step": 8179 + }, + { + "epoch": 18.21826280623608, + "grad_norm": 11.550317764282227, + "learning_rate": 1e-06, + "loss": 0.2529, + "num_input_tokens_seen": 458351960, + "step": 8180 + }, + { + "epoch": 18.21826280623608, + "loss": 0.25586456060409546, + "loss_ce": 6.619263149332255e-05, + "loss_iou": 0.1142578125, + "loss_num": 0.005523681640625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 458351960, + "step": 8180 + }, + { + "epoch": 18.220489977728285, + "grad_norm": 22.90071678161621, + "learning_rate": 1e-06, + "loss": 0.2964, + "num_input_tokens_seen": 458407868, + "step": 8181 + }, + { + "epoch": 18.220489977728285, + "loss": 0.27848535776138306, + "loss_ce": 7.347905193455517e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.005279541015625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 458407868, + "step": 8181 + }, + { + "epoch": 18.22271714922049, + "grad_norm": 19.092378616333008, + "learning_rate": 1e-06, + "loss": 0.2401, + "num_input_tokens_seen": 458464468, + "step": 8182 + }, + { + "epoch": 18.22271714922049, + "loss": 0.25201308727264404, + "loss_ce": 5.996804611640982e-05, + "loss_iou": 0.11181640625, + "loss_num": 0.00567626953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 458464468, + "step": 8182 + }, + { + "epoch": 18.224944320712694, + "grad_norm": 21.963666915893555, + "learning_rate": 1e-06, + "loss": 0.4337, + "num_input_tokens_seen": 458520888, + "step": 8183 + }, + { + "epoch": 18.224944320712694, + "loss": 0.37996935844421387, + "loss_ce": 8.6537329480052e-05, + "loss_iou": 0.158203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 458520888, + "step": 8183 + }, + { + "epoch": 18.2271714922049, + "grad_norm": 25.58934211730957, + "learning_rate": 1e-06, + "loss": 0.5241, + "num_input_tokens_seen": 458575140, + "step": 8184 + }, + { + "epoch": 18.2271714922049, + "loss": 0.3485790491104126, + "loss_ce": 6.830115307820961e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.01324462890625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 458575140, + "step": 8184 + }, + { + "epoch": 18.229398663697104, + "grad_norm": 12.794227600097656, + "learning_rate": 1e-06, + "loss": 0.3202, + "num_input_tokens_seen": 458631632, + "step": 8185 + }, + { + "epoch": 18.229398663697104, + "loss": 0.21777865290641785, + "loss_ce": 6.625376408919692e-05, + "loss_iou": 0.07861328125, + "loss_num": 0.0120849609375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 458631632, + "step": 8185 + }, + { + "epoch": 18.23162583518931, + "grad_norm": 18.660388946533203, + "learning_rate": 1e-06, + "loss": 0.4079, + "num_input_tokens_seen": 458688408, + "step": 8186 + }, + { + "epoch": 18.23162583518931, + "loss": 0.34663307666778564, + "loss_ce": 7.544427353423089e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.008544921875, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 458688408, + "step": 8186 + }, + { + "epoch": 18.233853006681514, + "grad_norm": 23.186193466186523, + "learning_rate": 1e-06, + "loss": 0.5892, + "num_input_tokens_seen": 458743956, + "step": 8187 + }, + { + "epoch": 18.233853006681514, + "loss": 0.42682188749313354, + "loss_ce": 6.407788896467537e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0081787109375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 458743956, + "step": 8187 + }, + { + "epoch": 18.23608017817372, + "grad_norm": 21.697927474975586, + "learning_rate": 1e-06, + "loss": 0.3607, + "num_input_tokens_seen": 458798376, + "step": 8188 + }, + { + "epoch": 18.23608017817372, + "loss": 0.3312641382217407, + "loss_ce": 8.736809832043946e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0087890625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 458798376, + "step": 8188 + }, + { + "epoch": 18.238307349665924, + "grad_norm": 14.134407997131348, + "learning_rate": 1e-06, + "loss": 0.4647, + "num_input_tokens_seen": 458852484, + "step": 8189 + }, + { + "epoch": 18.238307349665924, + "loss": 0.35396501421928406, + "loss_ce": 8.317639003507793e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.00567626953125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 458852484, + "step": 8189 + }, + { + "epoch": 18.24053452115813, + "grad_norm": 38.25514602661133, + "learning_rate": 1e-06, + "loss": 0.613, + "num_input_tokens_seen": 458906996, + "step": 8190 + }, + { + "epoch": 18.24053452115813, + "loss": 0.5307399034500122, + "loss_ce": 0.00022236474615056068, + "loss_iou": 0.23828125, + "loss_num": 0.0108642578125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 458906996, + "step": 8190 + }, + { + "epoch": 18.242761692650333, + "grad_norm": 15.433220863342285, + "learning_rate": 1e-06, + "loss": 0.3704, + "num_input_tokens_seen": 458964332, + "step": 8191 + }, + { + "epoch": 18.242761692650333, + "loss": 0.35981857776641846, + "loss_ce": 7.736931729596108e-05, + "loss_iou": 0.162109375, + "loss_num": 0.0072021484375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 458964332, + "step": 8191 + }, + { + "epoch": 18.244988864142538, + "grad_norm": 20.43923568725586, + "learning_rate": 1e-06, + "loss": 0.3444, + "num_input_tokens_seen": 459017136, + "step": 8192 + }, + { + "epoch": 18.244988864142538, + "loss": 0.4209337830543518, + "loss_ce": 9.635718015488237e-05, + "loss_iou": 0.189453125, + "loss_num": 0.00823974609375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 459017136, + "step": 8192 + }, + { + "epoch": 18.247216035634743, + "grad_norm": 17.763324737548828, + "learning_rate": 1e-06, + "loss": 0.373, + "num_input_tokens_seen": 459070944, + "step": 8193 + }, + { + "epoch": 18.247216035634743, + "loss": 0.2321740686893463, + "loss_ce": 8.788384002400562e-05, + "loss_iou": 0.10400390625, + "loss_num": 0.004730224609375, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 459070944, + "step": 8193 + }, + { + "epoch": 18.249443207126948, + "grad_norm": 25.71845245361328, + "learning_rate": 1e-06, + "loss": 0.2999, + "num_input_tokens_seen": 459126108, + "step": 8194 + }, + { + "epoch": 18.249443207126948, + "loss": 0.31814372539520264, + "loss_ce": 8.950403571361676e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.00634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 459126108, + "step": 8194 + }, + { + "epoch": 18.251670378619153, + "grad_norm": 36.80335235595703, + "learning_rate": 1e-06, + "loss": 0.5035, + "num_input_tokens_seen": 459183928, + "step": 8195 + }, + { + "epoch": 18.251670378619153, + "loss": 0.5821275115013123, + "loss_ce": 9.623746882425621e-05, + "loss_iou": 0.2578125, + "loss_num": 0.01324462890625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 459183928, + "step": 8195 + }, + { + "epoch": 18.253897550111358, + "grad_norm": 19.333406448364258, + "learning_rate": 1e-06, + "loss": 0.3609, + "num_input_tokens_seen": 459241312, + "step": 8196 + }, + { + "epoch": 18.253897550111358, + "loss": 0.4230514466762543, + "loss_ce": 7.78458925196901e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.0123291015625, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 459241312, + "step": 8196 + }, + { + "epoch": 18.256124721603562, + "grad_norm": 22.73419189453125, + "learning_rate": 1e-06, + "loss": 0.5667, + "num_input_tokens_seen": 459298740, + "step": 8197 + }, + { + "epoch": 18.256124721603562, + "loss": 0.6623133420944214, + "loss_ce": 8.191996312234551e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0096435546875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 459298740, + "step": 8197 + }, + { + "epoch": 18.258351893095767, + "grad_norm": 19.6635684967041, + "learning_rate": 1e-06, + "loss": 0.35, + "num_input_tokens_seen": 459354172, + "step": 8198 + }, + { + "epoch": 18.258351893095767, + "loss": 0.2642420530319214, + "loss_ce": 8.189349318854511e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.004058837890625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 459354172, + "step": 8198 + }, + { + "epoch": 18.260579064587972, + "grad_norm": 14.02831745147705, + "learning_rate": 1e-06, + "loss": 0.3704, + "num_input_tokens_seen": 459409124, + "step": 8199 + }, + { + "epoch": 18.260579064587972, + "loss": 0.37897345423698425, + "loss_ce": 6.719774683006108e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.006561279296875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 459409124, + "step": 8199 + }, + { + "epoch": 18.262806236080177, + "grad_norm": 21.54979133605957, + "learning_rate": 1e-06, + "loss": 0.469, + "num_input_tokens_seen": 459463080, + "step": 8200 + }, + { + "epoch": 18.262806236080177, + "loss": 0.46509498357772827, + "loss_ce": 6.810689228586853e-05, + "loss_iou": 0.18359375, + "loss_num": 0.01953125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 459463080, + "step": 8200 + }, + { + "epoch": 18.265033407572382, + "grad_norm": 15.60781192779541, + "learning_rate": 1e-06, + "loss": 0.3833, + "num_input_tokens_seen": 459518640, + "step": 8201 + }, + { + "epoch": 18.265033407572382, + "loss": 0.31758350133895874, + "loss_ce": 7.863431528676301e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.008544921875, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 459518640, + "step": 8201 + }, + { + "epoch": 18.267260579064587, + "grad_norm": 25.999467849731445, + "learning_rate": 1e-06, + "loss": 0.4274, + "num_input_tokens_seen": 459574496, + "step": 8202 + }, + { + "epoch": 18.267260579064587, + "loss": 0.3373531103134155, + "loss_ce": 7.284604362212121e-05, + "loss_iou": 0.150390625, + "loss_num": 0.0072021484375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 459574496, + "step": 8202 + }, + { + "epoch": 18.26948775055679, + "grad_norm": 23.961406707763672, + "learning_rate": 1e-06, + "loss": 0.2495, + "num_input_tokens_seen": 459631000, + "step": 8203 + }, + { + "epoch": 18.26948775055679, + "loss": 0.2549530863761902, + "loss_ce": 7.027012179605663e-05, + "loss_iou": 0.1162109375, + "loss_num": 0.004547119140625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 459631000, + "step": 8203 + }, + { + "epoch": 18.271714922048996, + "grad_norm": 13.854894638061523, + "learning_rate": 1e-06, + "loss": 0.4288, + "num_input_tokens_seen": 459689184, + "step": 8204 + }, + { + "epoch": 18.271714922048996, + "loss": 0.32967978715896606, + "loss_ce": 8.993731171358377e-05, + "loss_iou": 0.150390625, + "loss_num": 0.005767822265625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 459689184, + "step": 8204 + }, + { + "epoch": 18.2739420935412, + "grad_norm": 15.130828857421875, + "learning_rate": 1e-06, + "loss": 0.331, + "num_input_tokens_seen": 459744348, + "step": 8205 + }, + { + "epoch": 18.2739420935412, + "loss": 0.3028114438056946, + "loss_ce": 0.0002601708984002471, + "loss_iou": 0.1357421875, + "loss_num": 0.00628662109375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 459744348, + "step": 8205 + }, + { + "epoch": 18.276169265033406, + "grad_norm": 22.3679256439209, + "learning_rate": 1e-06, + "loss": 0.4245, + "num_input_tokens_seen": 459800880, + "step": 8206 + }, + { + "epoch": 18.276169265033406, + "loss": 0.3924823999404907, + "loss_ce": 8.740788325667381e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00921630859375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 459800880, + "step": 8206 + }, + { + "epoch": 18.27839643652561, + "grad_norm": 18.131832122802734, + "learning_rate": 1e-06, + "loss": 0.4461, + "num_input_tokens_seen": 459856516, + "step": 8207 + }, + { + "epoch": 18.27839643652561, + "loss": 0.4801885783672333, + "loss_ce": 8.601776062278077e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0203857421875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 459856516, + "step": 8207 + }, + { + "epoch": 18.280623608017816, + "grad_norm": 17.009628295898438, + "learning_rate": 1e-06, + "loss": 0.4747, + "num_input_tokens_seen": 459912960, + "step": 8208 + }, + { + "epoch": 18.280623608017816, + "loss": 0.41185104846954346, + "loss_ce": 0.00010787278006318957, + "loss_iou": 0.181640625, + "loss_num": 0.0096435546875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 459912960, + "step": 8208 + }, + { + "epoch": 18.28285077951002, + "grad_norm": 16.9517765045166, + "learning_rate": 1e-06, + "loss": 0.4285, + "num_input_tokens_seen": 459971620, + "step": 8209 + }, + { + "epoch": 18.28285077951002, + "loss": 0.43293631076812744, + "loss_ce": 7.499181083403528e-05, + "loss_iou": 0.181640625, + "loss_num": 0.01409912109375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 459971620, + "step": 8209 + }, + { + "epoch": 18.285077951002226, + "grad_norm": 19.251338958740234, + "learning_rate": 1e-06, + "loss": 0.3578, + "num_input_tokens_seen": 460028300, + "step": 8210 + }, + { + "epoch": 18.285077951002226, + "loss": 0.38544806838035583, + "loss_ce": 7.210190233308822e-05, + "loss_iou": 0.162109375, + "loss_num": 0.01214599609375, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 460028300, + "step": 8210 + }, + { + "epoch": 18.28730512249443, + "grad_norm": 18.597429275512695, + "learning_rate": 1e-06, + "loss": 0.3693, + "num_input_tokens_seen": 460084228, + "step": 8211 + }, + { + "epoch": 18.28730512249443, + "loss": 0.2983230948448181, + "loss_ce": 0.00022741041902918369, + "loss_iou": 0.12890625, + "loss_num": 0.00811767578125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 460084228, + "step": 8211 + }, + { + "epoch": 18.289532293986635, + "grad_norm": 17.22545623779297, + "learning_rate": 1e-06, + "loss": 0.264, + "num_input_tokens_seen": 460139016, + "step": 8212 + }, + { + "epoch": 18.289532293986635, + "loss": 0.2883983552455902, + "loss_ce": 6.828906043665484e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.005767822265625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 460139016, + "step": 8212 + }, + { + "epoch": 18.29175946547884, + "grad_norm": 34.40812301635742, + "learning_rate": 1e-06, + "loss": 0.4567, + "num_input_tokens_seen": 460194692, + "step": 8213 + }, + { + "epoch": 18.29175946547884, + "loss": 0.3748340904712677, + "loss_ce": 7.823290070518851e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00909423828125, + "loss_xval": 0.375, + "num_input_tokens_seen": 460194692, + "step": 8213 + }, + { + "epoch": 18.293986636971045, + "grad_norm": 21.1818904876709, + "learning_rate": 1e-06, + "loss": 0.51, + "num_input_tokens_seen": 460251096, + "step": 8214 + }, + { + "epoch": 18.293986636971045, + "loss": 0.6246699690818787, + "loss_ce": 0.00015822870773263276, + "loss_iou": 0.25390625, + "loss_num": 0.0234375, + "loss_xval": 0.625, + "num_input_tokens_seen": 460251096, + "step": 8214 + }, + { + "epoch": 18.29621380846325, + "grad_norm": 20.41522216796875, + "learning_rate": 1e-06, + "loss": 0.4014, + "num_input_tokens_seen": 460309636, + "step": 8215 + }, + { + "epoch": 18.29621380846325, + "loss": 0.46027785539627075, + "loss_ce": 7.279778947122395e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.007171630859375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 460309636, + "step": 8215 + }, + { + "epoch": 18.29844097995546, + "grad_norm": 17.60694694519043, + "learning_rate": 1e-06, + "loss": 0.3979, + "num_input_tokens_seen": 460365224, + "step": 8216 + }, + { + "epoch": 18.29844097995546, + "loss": 0.3157610297203064, + "loss_ce": 8.721975609660149e-05, + "loss_iou": 0.125, + "loss_num": 0.012939453125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 460365224, + "step": 8216 + }, + { + "epoch": 18.30066815144766, + "grad_norm": 27.434608459472656, + "learning_rate": 1e-06, + "loss": 0.4209, + "num_input_tokens_seen": 460423160, + "step": 8217 + }, + { + "epoch": 18.30066815144766, + "loss": 0.5255805850028992, + "loss_ce": 6.787220627302304e-05, + "loss_iou": 0.23828125, + "loss_num": 0.00982666015625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 460423160, + "step": 8217 + }, + { + "epoch": 18.302895322939868, + "grad_norm": 15.852324485778809, + "learning_rate": 1e-06, + "loss": 0.3734, + "num_input_tokens_seen": 460480832, + "step": 8218 + }, + { + "epoch": 18.302895322939868, + "loss": 0.3597148060798645, + "loss_ce": 9.567091183271259e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00787353515625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 460480832, + "step": 8218 + }, + { + "epoch": 18.305122494432073, + "grad_norm": 16.711715698242188, + "learning_rate": 1e-06, + "loss": 0.5244, + "num_input_tokens_seen": 460534584, + "step": 8219 + }, + { + "epoch": 18.305122494432073, + "loss": 0.605729341506958, + "loss_ce": 7.750350050628185e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.0301513671875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 460534584, + "step": 8219 + }, + { + "epoch": 18.307349665924278, + "grad_norm": 24.398696899414062, + "learning_rate": 1e-06, + "loss": 0.4038, + "num_input_tokens_seen": 460591852, + "step": 8220 + }, + { + "epoch": 18.307349665924278, + "loss": 0.3758271336555481, + "loss_ce": 9.471758676227182e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.0113525390625, + "loss_xval": 0.375, + "num_input_tokens_seen": 460591852, + "step": 8220 + }, + { + "epoch": 18.309576837416483, + "grad_norm": 15.600603103637695, + "learning_rate": 1e-06, + "loss": 0.4869, + "num_input_tokens_seen": 460650120, + "step": 8221 + }, + { + "epoch": 18.309576837416483, + "loss": 0.6405990123748779, + "loss_ce": 9.607061656424776e-05, + "loss_iou": 0.287109375, + "loss_num": 0.01348876953125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 460650120, + "step": 8221 + }, + { + "epoch": 18.311804008908688, + "grad_norm": 14.608988761901855, + "learning_rate": 1e-06, + "loss": 0.3083, + "num_input_tokens_seen": 460708680, + "step": 8222 + }, + { + "epoch": 18.311804008908688, + "loss": 0.31593137979507446, + "loss_ce": 7.447078678524122e-05, + "loss_iou": 0.12890625, + "loss_num": 0.01153564453125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 460708680, + "step": 8222 + }, + { + "epoch": 18.314031180400892, + "grad_norm": 12.612223625183105, + "learning_rate": 1e-06, + "loss": 0.431, + "num_input_tokens_seen": 460762612, + "step": 8223 + }, + { + "epoch": 18.314031180400892, + "loss": 0.5143847465515137, + "loss_ce": 0.00010250776540488005, + "loss_iou": 0.216796875, + "loss_num": 0.015869140625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 460762612, + "step": 8223 + }, + { + "epoch": 18.316258351893097, + "grad_norm": 19.262920379638672, + "learning_rate": 1e-06, + "loss": 0.5283, + "num_input_tokens_seen": 460814596, + "step": 8224 + }, + { + "epoch": 18.316258351893097, + "loss": 0.6533957123756409, + "loss_ce": 7.540466322097927e-05, + "loss_iou": 0.28125, + "loss_num": 0.0185546875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 460814596, + "step": 8224 + }, + { + "epoch": 18.318485523385302, + "grad_norm": 17.134294509887695, + "learning_rate": 1e-06, + "loss": 0.3269, + "num_input_tokens_seen": 460871852, + "step": 8225 + }, + { + "epoch": 18.318485523385302, + "loss": 0.2810301184654236, + "loss_ce": 8.527821046300232e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.003570556640625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 460871852, + "step": 8225 + }, + { + "epoch": 18.320712694877507, + "grad_norm": 16.73043441772461, + "learning_rate": 1e-06, + "loss": 0.4727, + "num_input_tokens_seen": 460928820, + "step": 8226 + }, + { + "epoch": 18.320712694877507, + "loss": 0.4601028263568878, + "loss_ce": 8.087064634310082e-05, + "loss_iou": 0.177734375, + "loss_num": 0.0208740234375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 460928820, + "step": 8226 + }, + { + "epoch": 18.322939866369712, + "grad_norm": 13.59985065460205, + "learning_rate": 1e-06, + "loss": 0.4533, + "num_input_tokens_seen": 460985356, + "step": 8227 + }, + { + "epoch": 18.322939866369712, + "loss": 0.43281298875808716, + "loss_ce": 0.00019579757645260543, + "loss_iou": 0.201171875, + "loss_num": 0.006256103515625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 460985356, + "step": 8227 + }, + { + "epoch": 18.325167037861917, + "grad_norm": 12.674905776977539, + "learning_rate": 1e-06, + "loss": 0.3026, + "num_input_tokens_seen": 461042168, + "step": 8228 + }, + { + "epoch": 18.325167037861917, + "loss": 0.3006775975227356, + "loss_ce": 7.942065712995827e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.00811767578125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 461042168, + "step": 8228 + }, + { + "epoch": 18.32739420935412, + "grad_norm": 17.03211784362793, + "learning_rate": 1e-06, + "loss": 0.4486, + "num_input_tokens_seen": 461097504, + "step": 8229 + }, + { + "epoch": 18.32739420935412, + "loss": 0.50324547290802, + "loss_ce": 7.163839472923428e-05, + "loss_iou": 0.21875, + "loss_num": 0.01324462890625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 461097504, + "step": 8229 + }, + { + "epoch": 18.329621380846326, + "grad_norm": 12.527713775634766, + "learning_rate": 1e-06, + "loss": 0.3258, + "num_input_tokens_seen": 461152312, + "step": 8230 + }, + { + "epoch": 18.329621380846326, + "loss": 0.2726093530654907, + "loss_ce": 7.59223330533132e-05, + "loss_iou": 0.10693359375, + "loss_num": 0.01177978515625, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 461152312, + "step": 8230 + }, + { + "epoch": 18.33184855233853, + "grad_norm": 17.93193244934082, + "learning_rate": 1e-06, + "loss": 0.3163, + "num_input_tokens_seen": 461207068, + "step": 8231 + }, + { + "epoch": 18.33184855233853, + "loss": 0.2764345407485962, + "loss_ce": 6.737266085110605e-05, + "loss_iou": 0.1123046875, + "loss_num": 0.0103759765625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 461207068, + "step": 8231 + }, + { + "epoch": 18.334075723830736, + "grad_norm": 15.471948623657227, + "learning_rate": 1e-06, + "loss": 0.3574, + "num_input_tokens_seen": 461262992, + "step": 8232 + }, + { + "epoch": 18.334075723830736, + "loss": 0.38583964109420776, + "loss_ce": 6.69407527311705e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.007293701171875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 461262992, + "step": 8232 + }, + { + "epoch": 18.33630289532294, + "grad_norm": 19.41752815246582, + "learning_rate": 1e-06, + "loss": 0.2833, + "num_input_tokens_seen": 461317216, + "step": 8233 + }, + { + "epoch": 18.33630289532294, + "loss": 0.2918124198913574, + "loss_ce": 6.43762614345178e-05, + "loss_iou": 0.125, + "loss_num": 0.0081787109375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 461317216, + "step": 8233 + }, + { + "epoch": 18.338530066815146, + "grad_norm": 21.109203338623047, + "learning_rate": 1e-06, + "loss": 0.5718, + "num_input_tokens_seen": 461372448, + "step": 8234 + }, + { + "epoch": 18.338530066815146, + "loss": 0.7163882851600647, + "loss_ce": 7.966908742673695e-05, + "loss_iou": 0.330078125, + "loss_num": 0.01116943359375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 461372448, + "step": 8234 + }, + { + "epoch": 18.34075723830735, + "grad_norm": 11.767685890197754, + "learning_rate": 1e-06, + "loss": 0.3292, + "num_input_tokens_seen": 461426004, + "step": 8235 + }, + { + "epoch": 18.34075723830735, + "loss": 0.2593403160572052, + "loss_ce": 0.0002003002300625667, + "loss_iou": 0.11328125, + "loss_num": 0.0064697265625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 461426004, + "step": 8235 + }, + { + "epoch": 18.342984409799556, + "grad_norm": 17.519113540649414, + "learning_rate": 1e-06, + "loss": 0.3103, + "num_input_tokens_seen": 461482340, + "step": 8236 + }, + { + "epoch": 18.342984409799556, + "loss": 0.20156915485858917, + "loss_ce": 9.21003520488739e-05, + "loss_iou": 0.091796875, + "loss_num": 0.003631591796875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 461482340, + "step": 8236 + }, + { + "epoch": 18.34521158129176, + "grad_norm": 33.767704010009766, + "learning_rate": 1e-06, + "loss": 0.3042, + "num_input_tokens_seen": 461537340, + "step": 8237 + }, + { + "epoch": 18.34521158129176, + "loss": 0.31417620182037354, + "loss_ce": 8.927624730858952e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.007720947265625, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 461537340, + "step": 8237 + }, + { + "epoch": 18.347438752783965, + "grad_norm": 12.72602653503418, + "learning_rate": 1e-06, + "loss": 0.2971, + "num_input_tokens_seen": 461593824, + "step": 8238 + }, + { + "epoch": 18.347438752783965, + "loss": 0.3771233558654785, + "loss_ce": 7.865474617574364e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.014892578125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 461593824, + "step": 8238 + }, + { + "epoch": 18.34966592427617, + "grad_norm": 19.39862632751465, + "learning_rate": 1e-06, + "loss": 0.3585, + "num_input_tokens_seen": 461648568, + "step": 8239 + }, + { + "epoch": 18.34966592427617, + "loss": 0.33659249544143677, + "loss_ce": 7.516345067415386e-05, + "loss_iou": 0.1484375, + "loss_num": 0.007720947265625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 461648568, + "step": 8239 + }, + { + "epoch": 18.351893095768375, + "grad_norm": 19.25311851501465, + "learning_rate": 1e-06, + "loss": 0.6112, + "num_input_tokens_seen": 461705024, + "step": 8240 + }, + { + "epoch": 18.351893095768375, + "loss": 0.6639153361320496, + "loss_ce": 9.700231021270156e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0211181640625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 461705024, + "step": 8240 + }, + { + "epoch": 18.35412026726058, + "grad_norm": 13.526534080505371, + "learning_rate": 1e-06, + "loss": 0.2696, + "num_input_tokens_seen": 461762520, + "step": 8241 + }, + { + "epoch": 18.35412026726058, + "loss": 0.23994842171669006, + "loss_ce": 8.790172432782128e-05, + "loss_iou": 0.109375, + "loss_num": 0.004119873046875, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 461762520, + "step": 8241 + }, + { + "epoch": 18.356347438752785, + "grad_norm": 16.30290412902832, + "learning_rate": 1e-06, + "loss": 0.3445, + "num_input_tokens_seen": 461820448, + "step": 8242 + }, + { + "epoch": 18.356347438752785, + "loss": 0.34260737895965576, + "loss_ce": 7.808158989064395e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0057373046875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 461820448, + "step": 8242 + }, + { + "epoch": 18.35857461024499, + "grad_norm": 16.620193481445312, + "learning_rate": 1e-06, + "loss": 0.2799, + "num_input_tokens_seen": 461876008, + "step": 8243 + }, + { + "epoch": 18.35857461024499, + "loss": 0.2714713215827942, + "loss_ce": 0.00010899978951783851, + "loss_iou": 0.0947265625, + "loss_num": 0.016357421875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 461876008, + "step": 8243 + }, + { + "epoch": 18.360801781737194, + "grad_norm": 15.673192977905273, + "learning_rate": 1e-06, + "loss": 0.4081, + "num_input_tokens_seen": 461932492, + "step": 8244 + }, + { + "epoch": 18.360801781737194, + "loss": 0.37256646156311035, + "loss_ce": 6.890152872074395e-05, + "loss_iou": 0.15625, + "loss_num": 0.01220703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 461932492, + "step": 8244 + }, + { + "epoch": 18.3630289532294, + "grad_norm": 15.485980987548828, + "learning_rate": 1e-06, + "loss": 0.4505, + "num_input_tokens_seen": 461989756, + "step": 8245 + }, + { + "epoch": 18.3630289532294, + "loss": 0.4861689805984497, + "loss_ce": 8.499999967170879e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.015625, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 461989756, + "step": 8245 + }, + { + "epoch": 18.365256124721604, + "grad_norm": 31.88662338256836, + "learning_rate": 1e-06, + "loss": 0.5343, + "num_input_tokens_seen": 462045504, + "step": 8246 + }, + { + "epoch": 18.365256124721604, + "loss": 0.636799156665802, + "loss_ce": 8.0388635979034e-05, + "loss_iou": 0.255859375, + "loss_num": 0.024658203125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 462045504, + "step": 8246 + }, + { + "epoch": 18.36748329621381, + "grad_norm": 17.961257934570312, + "learning_rate": 1e-06, + "loss": 0.3355, + "num_input_tokens_seen": 462101768, + "step": 8247 + }, + { + "epoch": 18.36748329621381, + "loss": 0.29511559009552, + "loss_ce": 7.165400893427432e-05, + "loss_iou": 0.11767578125, + "loss_num": 0.011962890625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 462101768, + "step": 8247 + }, + { + "epoch": 18.369710467706014, + "grad_norm": 17.18083381652832, + "learning_rate": 1e-06, + "loss": 0.5334, + "num_input_tokens_seen": 462158008, + "step": 8248 + }, + { + "epoch": 18.369710467706014, + "loss": 0.5379419922828674, + "loss_ce": 0.00010018746252171695, + "loss_iou": 0.234375, + "loss_num": 0.01361083984375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 462158008, + "step": 8248 + }, + { + "epoch": 18.37193763919822, + "grad_norm": 20.73891258239746, + "learning_rate": 1e-06, + "loss": 0.3165, + "num_input_tokens_seen": 462213512, + "step": 8249 + }, + { + "epoch": 18.37193763919822, + "loss": 0.3529818058013916, + "loss_ce": 7.652836939087138e-05, + "loss_iou": 0.154296875, + "loss_num": 0.009033203125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 462213512, + "step": 8249 + }, + { + "epoch": 18.374164810690424, + "grad_norm": 18.682357788085938, + "learning_rate": 1e-06, + "loss": 0.3058, + "num_input_tokens_seen": 462271092, + "step": 8250 + }, + { + "epoch": 18.374164810690424, + "eval_seeclick_web_CIoU": 0.5856335759162903, + "eval_seeclick_web_GIoU": 0.5846323072910309, + "eval_seeclick_web_IoU": 0.604880303144455, + "eval_seeclick_web_MAE_all": 0.015254544327035546, + "eval_seeclick_web_MAE_h": 0.0071105193346738815, + "eval_seeclick_web_MAE_w": 0.015218731015920639, + "eval_seeclick_web_MAE_x_boxes": 0.008255395339801908, + "eval_seeclick_web_MAE_y_boxes": 0.02131559420377016, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 0.9020078778266907, + "eval_seeclick_web_loss_ce": 0.00012946181232109666, + "eval_seeclick_web_loss_iou": 0.41748046875, + "eval_seeclick_web_loss_num": 0.012150764465332031, + "eval_seeclick_web_loss_xval": 0.896484375, + "eval_seeclick_web_runtime": 24.3274, + "eval_seeclick_web_samples_per_second": 2.055, + "eval_seeclick_web_steps_per_second": 0.082, + "num_input_tokens_seen": 462271092, + "step": 8250 + }, + { + "epoch": 18.374164810690424, + "eval_icons_CIoU": 0.2648079916834831, + "eval_icons_GIoU": 0.2893849313259125, + "eval_icons_IoU": 0.341605544090271, + "eval_icons_MAE_all": 0.06054504215717316, + "eval_icons_MAE_h": 0.03238860424607992, + "eval_icons_MAE_w": 0.06985406205058098, + "eval_icons_MAE_x_boxes": 0.05330132879316807, + "eval_icons_MAE_y_boxes": 0.03792322427034378, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7429778575897217, + "eval_icons_loss_ce": 0.00015769631136208773, + "eval_icons_loss_iou": 0.6812744140625, + "eval_icons_loss_num": 0.059600830078125, + "eval_icons_loss_xval": 1.66064453125, + "eval_icons_runtime": 23.8883, + "eval_icons_samples_per_second": 2.093, + "eval_icons_steps_per_second": 0.084, + "num_input_tokens_seen": 462271092, + "step": 8250 + }, + { + "epoch": 18.374164810690424, + "eval_screenspot_CIoU": 0.3744619091351827, + "eval_screenspot_GIoU": 0.391864409049352, + "eval_screenspot_IoU": 0.4460846583048503, + "eval_screenspot_MAE_all": 0.056168291717767715, + "eval_screenspot_MAE_h": 0.03892662810782591, + "eval_screenspot_MAE_w": 0.0636625016729037, + "eval_screenspot_MAE_x_boxes": 0.06623268065353234, + "eval_screenspot_MAE_y_boxes": 0.03843462746590376, + "eval_screenspot_inside_bbox": 0.7145833373069763, + "eval_screenspot_loss": 1.5575822591781616, + "eval_screenspot_loss_ce": 0.00017815509151356915, + "eval_screenspot_loss_iou": 0.6473795572916666, + "eval_screenspot_loss_num": 0.06395212809244792, + "eval_screenspot_loss_xval": 1.6150716145833333, + "eval_screenspot_runtime": 40.5304, + "eval_screenspot_samples_per_second": 2.196, + "eval_screenspot_steps_per_second": 0.074, + "num_input_tokens_seen": 462271092, + "step": 8250 + }, + { + "epoch": 18.374164810690424, + "eval_compot_CIoU": 0.3456965386867523, + "eval_compot_GIoU": 0.3530429005622864, + "eval_compot_IoU": 0.4042937010526657, + "eval_compot_MAE_all": 0.017928121611475945, + "eval_compot_MAE_h": 0.008343497524037957, + "eval_compot_MAE_w": 0.020461782813072205, + "eval_compot_MAE_x_boxes": 0.030165866017341614, + "eval_compot_MAE_y_boxes": 0.006972244009375572, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.4054356813430786, + "eval_compot_loss_ce": 0.00012335985229583457, + "eval_compot_loss_iou": 0.6524658203125, + "eval_compot_loss_num": 0.016485214233398438, + "eval_compot_loss_xval": 1.385986328125, + "eval_compot_runtime": 25.2618, + "eval_compot_samples_per_second": 1.979, + "eval_compot_steps_per_second": 0.079, + "num_input_tokens_seen": 462271092, + "step": 8250 + }, + { + "epoch": 18.374164810690424, + "eval_custom_ui_val_CIoU": 0.4731468657652537, + "eval_custom_ui_val_GIoU": 0.4772413820028305, + "eval_custom_ui_val_IoU": 0.5357410940859053, + "eval_custom_ui_val_MAE_all": 0.02722639176580641, + "eval_custom_ui_val_MAE_h": 0.013777732486940093, + "eval_custom_ui_val_MAE_w": 0.03672494749642081, + "eval_custom_ui_val_MAE_x_boxes": 0.03293217242591911, + "eval_custom_ui_val_MAE_y_boxes": 0.013356222900458507, + "eval_custom_ui_val_inside_bbox": 0.7719907429483202, + "eval_custom_ui_val_loss": 1.1706430912017822, + "eval_custom_ui_val_loss_ce": 0.0001427585027866169, + "eval_custom_ui_val_loss_iou": 0.501953125, + "eval_custom_ui_val_loss_num": 0.0238491694132487, + "eval_custom_ui_val_loss_xval": 1.1229383680555556, + "eval_custom_ui_val_runtime": 76.2841, + "eval_custom_ui_val_samples_per_second": 3.474, + "eval_custom_ui_val_steps_per_second": 0.118, + "num_input_tokens_seen": 462271092, + "step": 8250 + }, + { + "epoch": 18.374164810690424, + "loss": 0.8289626836776733, + "loss_ce": 0.00010525943071115762, + "loss_iou": 0.373046875, + "loss_num": 0.0166015625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 462271092, + "step": 8250 + }, + { + "epoch": 18.37639198218263, + "grad_norm": 33.0701789855957, + "learning_rate": 1e-06, + "loss": 0.4336, + "num_input_tokens_seen": 462326668, + "step": 8251 + }, + { + "epoch": 18.37639198218263, + "loss": 0.485931396484375, + "loss_ce": 9.15604323381558e-05, + "loss_iou": 0.228515625, + "loss_num": 0.0057373046875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 462326668, + "step": 8251 + }, + { + "epoch": 18.378619153674833, + "grad_norm": 18.959814071655273, + "learning_rate": 1e-06, + "loss": 0.3617, + "num_input_tokens_seen": 462381308, + "step": 8252 + }, + { + "epoch": 18.378619153674833, + "loss": 0.35047417879104614, + "loss_ce": 7.13385088602081e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.003143310546875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 462381308, + "step": 8252 + }, + { + "epoch": 18.380846325167038, + "grad_norm": 18.871074676513672, + "learning_rate": 1e-06, + "loss": 0.3387, + "num_input_tokens_seen": 462439476, + "step": 8253 + }, + { + "epoch": 18.380846325167038, + "loss": 0.4407457113265991, + "loss_ce": 7.18739174772054e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.0087890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 462439476, + "step": 8253 + }, + { + "epoch": 18.383073496659243, + "grad_norm": 17.004552841186523, + "learning_rate": 1e-06, + "loss": 0.3689, + "num_input_tokens_seen": 462492792, + "step": 8254 + }, + { + "epoch": 18.383073496659243, + "loss": 0.34425753355026245, + "loss_ce": 8.024969429243356e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00848388671875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 462492792, + "step": 8254 + }, + { + "epoch": 18.385300668151448, + "grad_norm": 20.56110382080078, + "learning_rate": 1e-06, + "loss": 0.3938, + "num_input_tokens_seen": 462549204, + "step": 8255 + }, + { + "epoch": 18.385300668151448, + "loss": 0.4753004312515259, + "loss_ce": 8.071900811046362e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.0181884765625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 462549204, + "step": 8255 + }, + { + "epoch": 18.387527839643653, + "grad_norm": 21.641469955444336, + "learning_rate": 1e-06, + "loss": 0.3759, + "num_input_tokens_seen": 462604868, + "step": 8256 + }, + { + "epoch": 18.387527839643653, + "loss": 0.42918264865875244, + "loss_ce": 7.497478509321809e-05, + "loss_iou": 0.162109375, + "loss_num": 0.0208740234375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 462604868, + "step": 8256 + }, + { + "epoch": 18.389755011135858, + "grad_norm": 10.594487190246582, + "learning_rate": 1e-06, + "loss": 0.4945, + "num_input_tokens_seen": 462662936, + "step": 8257 + }, + { + "epoch": 18.389755011135858, + "loss": 0.47706377506256104, + "loss_ce": 7.405664655379951e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.012939453125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 462662936, + "step": 8257 + }, + { + "epoch": 18.391982182628063, + "grad_norm": 19.134235382080078, + "learning_rate": 1e-06, + "loss": 0.4228, + "num_input_tokens_seen": 462717820, + "step": 8258 + }, + { + "epoch": 18.391982182628063, + "loss": 0.30488812923431396, + "loss_ce": 7.853595889173448e-05, + "loss_iou": 0.1328125, + "loss_num": 0.0079345703125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 462717820, + "step": 8258 + }, + { + "epoch": 18.394209354120267, + "grad_norm": 20.989978790283203, + "learning_rate": 1e-06, + "loss": 0.3927, + "num_input_tokens_seen": 462772384, + "step": 8259 + }, + { + "epoch": 18.394209354120267, + "loss": 0.435162216424942, + "loss_ce": 7.693507359363139e-05, + "loss_iou": 0.185546875, + "loss_num": 0.012939453125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 462772384, + "step": 8259 + }, + { + "epoch": 18.396436525612472, + "grad_norm": 14.269499778747559, + "learning_rate": 1e-06, + "loss": 0.355, + "num_input_tokens_seen": 462827028, + "step": 8260 + }, + { + "epoch": 18.396436525612472, + "loss": 0.31690651178359985, + "loss_ce": 7.30217871023342e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.00506591796875, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 462827028, + "step": 8260 + }, + { + "epoch": 18.398663697104677, + "grad_norm": 15.133282661437988, + "learning_rate": 1e-06, + "loss": 0.55, + "num_input_tokens_seen": 462883484, + "step": 8261 + }, + { + "epoch": 18.398663697104677, + "loss": 0.5562236905097961, + "loss_ce": 0.0002544423332437873, + "loss_iou": 0.2041015625, + "loss_num": 0.029541015625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 462883484, + "step": 8261 + }, + { + "epoch": 18.400890868596882, + "grad_norm": 18.654014587402344, + "learning_rate": 1e-06, + "loss": 0.3458, + "num_input_tokens_seen": 462941596, + "step": 8262 + }, + { + "epoch": 18.400890868596882, + "loss": 0.4173450469970703, + "loss_ce": 0.00010873953578993678, + "loss_iou": 0.17578125, + "loss_num": 0.01312255859375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 462941596, + "step": 8262 + }, + { + "epoch": 18.403118040089087, + "grad_norm": 21.48236656188965, + "learning_rate": 1e-06, + "loss": 0.3443, + "num_input_tokens_seen": 462997916, + "step": 8263 + }, + { + "epoch": 18.403118040089087, + "loss": 0.29500818252563477, + "loss_ce": 8.628673094790429e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.003997802734375, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 462997916, + "step": 8263 + }, + { + "epoch": 18.40534521158129, + "grad_norm": 29.181659698486328, + "learning_rate": 1e-06, + "loss": 0.4467, + "num_input_tokens_seen": 463053976, + "step": 8264 + }, + { + "epoch": 18.40534521158129, + "loss": 0.5064201354980469, + "loss_ce": 7.253700459841639e-05, + "loss_iou": 0.236328125, + "loss_num": 0.00689697265625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 463053976, + "step": 8264 + }, + { + "epoch": 18.407572383073497, + "grad_norm": 18.721237182617188, + "learning_rate": 1e-06, + "loss": 0.4064, + "num_input_tokens_seen": 463111504, + "step": 8265 + }, + { + "epoch": 18.407572383073497, + "loss": 0.45783811807632446, + "loss_ce": 7.442037167493254e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.007568359375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 463111504, + "step": 8265 + }, + { + "epoch": 18.4097995545657, + "grad_norm": 57.14765548706055, + "learning_rate": 1e-06, + "loss": 0.7487, + "num_input_tokens_seen": 463168420, + "step": 8266 + }, + { + "epoch": 18.4097995545657, + "loss": 0.6589952707290649, + "loss_ce": 5.971048813080415e-05, + "loss_iou": 0.29296875, + "loss_num": 0.01495361328125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 463168420, + "step": 8266 + }, + { + "epoch": 18.412026726057906, + "grad_norm": 18.461252212524414, + "learning_rate": 1e-06, + "loss": 0.3474, + "num_input_tokens_seen": 463225216, + "step": 8267 + }, + { + "epoch": 18.412026726057906, + "loss": 0.38068366050720215, + "loss_ce": 6.842733273515478e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.005462646484375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 463225216, + "step": 8267 + }, + { + "epoch": 18.41425389755011, + "grad_norm": 23.637659072875977, + "learning_rate": 1e-06, + "loss": 0.5465, + "num_input_tokens_seen": 463279816, + "step": 8268 + }, + { + "epoch": 18.41425389755011, + "loss": 0.513511061668396, + "loss_ce": 8.326944953296334e-05, + "loss_iou": 0.232421875, + "loss_num": 0.00958251953125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 463279816, + "step": 8268 + }, + { + "epoch": 18.416481069042316, + "grad_norm": 19.79255485534668, + "learning_rate": 1e-06, + "loss": 0.5626, + "num_input_tokens_seen": 463336080, + "step": 8269 + }, + { + "epoch": 18.416481069042316, + "loss": 0.5551245808601379, + "loss_ce": 7.088375423336402e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.0208740234375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 463336080, + "step": 8269 + }, + { + "epoch": 18.41870824053452, + "grad_norm": 22.363147735595703, + "learning_rate": 1e-06, + "loss": 0.3449, + "num_input_tokens_seen": 463392188, + "step": 8270 + }, + { + "epoch": 18.41870824053452, + "loss": 0.27490442991256714, + "loss_ce": 6.311971810646355e-05, + "loss_iou": 0.1181640625, + "loss_num": 0.0078125, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 463392188, + "step": 8270 + }, + { + "epoch": 18.420935412026726, + "grad_norm": 12.046845436096191, + "learning_rate": 1e-06, + "loss": 0.3125, + "num_input_tokens_seen": 463450560, + "step": 8271 + }, + { + "epoch": 18.420935412026726, + "loss": 0.33826929330825806, + "loss_ce": 7.349113002419472e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.01190185546875, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 463450560, + "step": 8271 + }, + { + "epoch": 18.42316258351893, + "grad_norm": 14.040665626525879, + "learning_rate": 1e-06, + "loss": 0.3608, + "num_input_tokens_seen": 463504208, + "step": 8272 + }, + { + "epoch": 18.42316258351893, + "loss": 0.2680038809776306, + "loss_ce": 0.00012056898412993178, + "loss_iou": 0.10986328125, + "loss_num": 0.00970458984375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 463504208, + "step": 8272 + }, + { + "epoch": 18.425389755011135, + "grad_norm": 20.083757400512695, + "learning_rate": 1e-06, + "loss": 0.3636, + "num_input_tokens_seen": 463562476, + "step": 8273 + }, + { + "epoch": 18.425389755011135, + "loss": 0.23639193177223206, + "loss_ce": 6.381591811077669e-05, + "loss_iou": 0.107421875, + "loss_num": 0.0042724609375, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 463562476, + "step": 8273 + }, + { + "epoch": 18.42761692650334, + "grad_norm": 22.100793838500977, + "learning_rate": 1e-06, + "loss": 0.427, + "num_input_tokens_seen": 463617160, + "step": 8274 + }, + { + "epoch": 18.42761692650334, + "loss": 0.23444892466068268, + "loss_ce": 5.8669171266956255e-05, + "loss_iou": 0.0869140625, + "loss_num": 0.01214599609375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 463617160, + "step": 8274 + }, + { + "epoch": 18.429844097995545, + "grad_norm": 20.41813087463379, + "learning_rate": 1e-06, + "loss": 0.4329, + "num_input_tokens_seen": 463673804, + "step": 8275 + }, + { + "epoch": 18.429844097995545, + "loss": 0.38324350118637085, + "loss_ce": 6.480171578004956e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0135498046875, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 463673804, + "step": 8275 + }, + { + "epoch": 18.43207126948775, + "grad_norm": 17.835351943969727, + "learning_rate": 1e-06, + "loss": 0.4192, + "num_input_tokens_seen": 463731528, + "step": 8276 + }, + { + "epoch": 18.43207126948775, + "loss": 0.3969186246395111, + "loss_ce": 6.806287274230272e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0130615234375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 463731528, + "step": 8276 + }, + { + "epoch": 18.434298440979955, + "grad_norm": 20.909772872924805, + "learning_rate": 1e-06, + "loss": 0.3829, + "num_input_tokens_seen": 463786092, + "step": 8277 + }, + { + "epoch": 18.434298440979955, + "loss": 0.35701119899749756, + "loss_ce": 7.761004235362634e-05, + "loss_iou": 0.14453125, + "loss_num": 0.01348876953125, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 463786092, + "step": 8277 + }, + { + "epoch": 18.43652561247216, + "grad_norm": 15.374547958374023, + "learning_rate": 1e-06, + "loss": 0.4143, + "num_input_tokens_seen": 463838012, + "step": 8278 + }, + { + "epoch": 18.43652561247216, + "loss": 0.4003612995147705, + "loss_ce": 9.272516763303429e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.0126953125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 463838012, + "step": 8278 + }, + { + "epoch": 18.438752783964365, + "grad_norm": 12.226360321044922, + "learning_rate": 1e-06, + "loss": 0.4797, + "num_input_tokens_seen": 463892444, + "step": 8279 + }, + { + "epoch": 18.438752783964365, + "loss": 0.42820942401885986, + "loss_ce": 7.8319848398678e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.01007080078125, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 463892444, + "step": 8279 + }, + { + "epoch": 18.44097995545657, + "grad_norm": 17.95158576965332, + "learning_rate": 1e-06, + "loss": 0.3166, + "num_input_tokens_seen": 463947952, + "step": 8280 + }, + { + "epoch": 18.44097995545657, + "loss": 0.3516421616077423, + "loss_ce": 7.965406985022128e-05, + "loss_iou": 0.146484375, + "loss_num": 0.01153564453125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 463947952, + "step": 8280 + }, + { + "epoch": 18.443207126948774, + "grad_norm": 16.93501853942871, + "learning_rate": 1e-06, + "loss": 0.4472, + "num_input_tokens_seen": 464005712, + "step": 8281 + }, + { + "epoch": 18.443207126948774, + "loss": 0.47952741384506226, + "loss_ce": 9.625754319131374e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.01324462890625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 464005712, + "step": 8281 + }, + { + "epoch": 18.44543429844098, + "grad_norm": 20.220458984375, + "learning_rate": 1e-06, + "loss": 0.4457, + "num_input_tokens_seen": 464062920, + "step": 8282 + }, + { + "epoch": 18.44543429844098, + "loss": 0.4217246472835541, + "loss_ce": 9.378813410876319e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0137939453125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 464062920, + "step": 8282 + }, + { + "epoch": 18.447661469933184, + "grad_norm": 15.873303413391113, + "learning_rate": 1e-06, + "loss": 0.2138, + "num_input_tokens_seen": 464116484, + "step": 8283 + }, + { + "epoch": 18.447661469933184, + "loss": 0.14678719639778137, + "loss_ce": 6.631132418988273e-05, + "loss_iou": 0.06298828125, + "loss_num": 0.004241943359375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 464116484, + "step": 8283 + }, + { + "epoch": 18.44988864142539, + "grad_norm": 18.4737548828125, + "learning_rate": 1e-06, + "loss": 0.41, + "num_input_tokens_seen": 464172992, + "step": 8284 + }, + { + "epoch": 18.44988864142539, + "loss": 0.3817276358604431, + "loss_ce": 7.478396582882851e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00872802734375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 464172992, + "step": 8284 + }, + { + "epoch": 18.452115812917594, + "grad_norm": 19.414098739624023, + "learning_rate": 1e-06, + "loss": 0.4462, + "num_input_tokens_seen": 464225548, + "step": 8285 + }, + { + "epoch": 18.452115812917594, + "loss": 0.42627277970314026, + "loss_ce": 0.00012532366963569075, + "loss_iou": 0.1865234375, + "loss_num": 0.01068115234375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 464225548, + "step": 8285 + }, + { + "epoch": 18.4543429844098, + "grad_norm": 16.32335090637207, + "learning_rate": 1e-06, + "loss": 0.3318, + "num_input_tokens_seen": 464281668, + "step": 8286 + }, + { + "epoch": 18.4543429844098, + "loss": 0.3537209630012512, + "loss_ce": 8.327406249009073e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.00787353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 464281668, + "step": 8286 + }, + { + "epoch": 18.456570155902003, + "grad_norm": 15.517791748046875, + "learning_rate": 1e-06, + "loss": 0.3719, + "num_input_tokens_seen": 464337160, + "step": 8287 + }, + { + "epoch": 18.456570155902003, + "loss": 0.3465074300765991, + "loss_ce": 0.0001023836521198973, + "loss_iou": 0.1376953125, + "loss_num": 0.01434326171875, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 464337160, + "step": 8287 + }, + { + "epoch": 18.45879732739421, + "grad_norm": 18.08324432373047, + "learning_rate": 1e-06, + "loss": 0.5502, + "num_input_tokens_seen": 464392772, + "step": 8288 + }, + { + "epoch": 18.45879732739421, + "loss": 0.6127073168754578, + "loss_ce": 0.00015847003669478, + "loss_iou": 0.23828125, + "loss_num": 0.0272216796875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 464392772, + "step": 8288 + }, + { + "epoch": 18.461024498886413, + "grad_norm": 18.83432960510254, + "learning_rate": 1e-06, + "loss": 0.3693, + "num_input_tokens_seen": 464448784, + "step": 8289 + }, + { + "epoch": 18.461024498886413, + "loss": 0.4016864001750946, + "loss_ce": 7.508075213991106e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.006378173828125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 464448784, + "step": 8289 + }, + { + "epoch": 18.463251670378618, + "grad_norm": 13.436317443847656, + "learning_rate": 1e-06, + "loss": 0.3396, + "num_input_tokens_seen": 464505648, + "step": 8290 + }, + { + "epoch": 18.463251670378618, + "loss": 0.35365384817123413, + "loss_ce": 7.717913103988394e-05, + "loss_iou": 0.15234375, + "loss_num": 0.009765625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 464505648, + "step": 8290 + }, + { + "epoch": 18.465478841870823, + "grad_norm": 19.951303482055664, + "learning_rate": 1e-06, + "loss": 0.3453, + "num_input_tokens_seen": 464559060, + "step": 8291 + }, + { + "epoch": 18.465478841870823, + "loss": 0.38332536816596985, + "loss_ce": 0.00014667093637399375, + "loss_iou": 0.1708984375, + "loss_num": 0.008056640625, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 464559060, + "step": 8291 + }, + { + "epoch": 18.467706013363028, + "grad_norm": 228.0105438232422, + "learning_rate": 1e-06, + "loss": 0.4727, + "num_input_tokens_seen": 464614584, + "step": 8292 + }, + { + "epoch": 18.467706013363028, + "loss": 0.3697110116481781, + "loss_ce": 8.208431245293468e-05, + "loss_iou": 0.150390625, + "loss_num": 0.013916015625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 464614584, + "step": 8292 + }, + { + "epoch": 18.469933184855233, + "grad_norm": 14.540281295776367, + "learning_rate": 1e-06, + "loss": 0.3425, + "num_input_tokens_seen": 464668316, + "step": 8293 + }, + { + "epoch": 18.469933184855233, + "loss": 0.34370023012161255, + "loss_ce": 7.22910335753113e-05, + "loss_iou": 0.15625, + "loss_num": 0.006072998046875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 464668316, + "step": 8293 + }, + { + "epoch": 18.472160356347437, + "grad_norm": 20.030624389648438, + "learning_rate": 1e-06, + "loss": 0.4044, + "num_input_tokens_seen": 464723240, + "step": 8294 + }, + { + "epoch": 18.472160356347437, + "loss": 0.40011394023895264, + "loss_ce": 8.954846998676658e-05, + "loss_iou": 0.18359375, + "loss_num": 0.00677490234375, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 464723240, + "step": 8294 + }, + { + "epoch": 18.474387527839642, + "grad_norm": 13.285386085510254, + "learning_rate": 1e-06, + "loss": 0.4026, + "num_input_tokens_seen": 464779508, + "step": 8295 + }, + { + "epoch": 18.474387527839642, + "loss": 0.223904550075531, + "loss_ce": 8.862309186952189e-05, + "loss_iou": 0.10498046875, + "loss_num": 0.0027008056640625, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 464779508, + "step": 8295 + }, + { + "epoch": 18.476614699331847, + "grad_norm": 18.240877151489258, + "learning_rate": 1e-06, + "loss": 0.3385, + "num_input_tokens_seen": 464834748, + "step": 8296 + }, + { + "epoch": 18.476614699331847, + "loss": 0.29583436250686646, + "loss_ce": 8.852417522575706e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.003936767578125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 464834748, + "step": 8296 + }, + { + "epoch": 18.478841870824052, + "grad_norm": 15.406899452209473, + "learning_rate": 1e-06, + "loss": 0.3808, + "num_input_tokens_seen": 464892396, + "step": 8297 + }, + { + "epoch": 18.478841870824052, + "loss": 0.379098504781723, + "loss_ce": 7.018136966507882e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00994873046875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 464892396, + "step": 8297 + }, + { + "epoch": 18.481069042316257, + "grad_norm": 22.26421546936035, + "learning_rate": 1e-06, + "loss": 0.3392, + "num_input_tokens_seen": 464949808, + "step": 8298 + }, + { + "epoch": 18.481069042316257, + "loss": 0.21200445294380188, + "loss_ce": 9.038990538101643e-05, + "loss_iou": 0.091796875, + "loss_num": 0.005645751953125, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 464949808, + "step": 8298 + }, + { + "epoch": 18.48329621380846, + "grad_norm": 15.141512870788574, + "learning_rate": 1e-06, + "loss": 0.403, + "num_input_tokens_seen": 465003316, + "step": 8299 + }, + { + "epoch": 18.48329621380846, + "loss": 0.4960575997829437, + "loss_ce": 8.59203573781997e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0216064453125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 465003316, + "step": 8299 + }, + { + "epoch": 18.485523385300667, + "grad_norm": 29.124523162841797, + "learning_rate": 1e-06, + "loss": 0.5493, + "num_input_tokens_seen": 465058828, + "step": 8300 + }, + { + "epoch": 18.485523385300667, + "loss": 0.6504954695701599, + "loss_ce": 0.0001048904232447967, + "loss_iou": 0.298828125, + "loss_num": 0.01068115234375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 465058828, + "step": 8300 + }, + { + "epoch": 18.48775055679287, + "grad_norm": 17.59904670715332, + "learning_rate": 1e-06, + "loss": 0.3864, + "num_input_tokens_seen": 465114628, + "step": 8301 + }, + { + "epoch": 18.48775055679287, + "loss": 0.263241708278656, + "loss_ce": 0.00011917389929294586, + "loss_iou": 0.115234375, + "loss_num": 0.00653076171875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 465114628, + "step": 8301 + }, + { + "epoch": 18.489977728285076, + "grad_norm": 22.050037384033203, + "learning_rate": 1e-06, + "loss": 0.4467, + "num_input_tokens_seen": 465169408, + "step": 8302 + }, + { + "epoch": 18.489977728285076, + "loss": 0.29093292355537415, + "loss_ce": 6.987818778725341e-05, + "loss_iou": 0.130859375, + "loss_num": 0.005889892578125, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 465169408, + "step": 8302 + }, + { + "epoch": 18.49220489977728, + "grad_norm": 20.6636962890625, + "learning_rate": 1e-06, + "loss": 0.3993, + "num_input_tokens_seen": 465224880, + "step": 8303 + }, + { + "epoch": 18.49220489977728, + "loss": 0.41500934958457947, + "loss_ce": 9.234360186383128e-05, + "loss_iou": 0.173828125, + "loss_num": 0.013671875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 465224880, + "step": 8303 + }, + { + "epoch": 18.494432071269486, + "grad_norm": 23.35464859008789, + "learning_rate": 1e-06, + "loss": 0.3545, + "num_input_tokens_seen": 465281588, + "step": 8304 + }, + { + "epoch": 18.494432071269486, + "loss": 0.4025862514972687, + "loss_ce": 7.466854003723711e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.01226806640625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 465281588, + "step": 8304 + }, + { + "epoch": 18.49665924276169, + "grad_norm": 18.54194450378418, + "learning_rate": 1e-06, + "loss": 0.4179, + "num_input_tokens_seen": 465336308, + "step": 8305 + }, + { + "epoch": 18.49665924276169, + "loss": 0.4852302670478821, + "loss_ce": 6.180583295645192e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.0159912109375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 465336308, + "step": 8305 + }, + { + "epoch": 18.498886414253896, + "grad_norm": 16.24866485595703, + "learning_rate": 1e-06, + "loss": 0.2652, + "num_input_tokens_seen": 465392124, + "step": 8306 + }, + { + "epoch": 18.498886414253896, + "loss": 0.3223702311515808, + "loss_ce": 0.00010461719648446888, + "loss_iou": 0.1474609375, + "loss_num": 0.005462646484375, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 465392124, + "step": 8306 + }, + { + "epoch": 18.501113585746104, + "grad_norm": 23.2437801361084, + "learning_rate": 1e-06, + "loss": 0.4014, + "num_input_tokens_seen": 465448524, + "step": 8307 + }, + { + "epoch": 18.501113585746104, + "loss": 0.32942575216293335, + "loss_ce": 8.003541734069586e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.006103515625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 465448524, + "step": 8307 + }, + { + "epoch": 18.50334075723831, + "grad_norm": 17.066225051879883, + "learning_rate": 1e-06, + "loss": 0.5872, + "num_input_tokens_seen": 465504656, + "step": 8308 + }, + { + "epoch": 18.50334075723831, + "loss": 0.6132311820983887, + "loss_ce": 7.201532571343705e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0260009765625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 465504656, + "step": 8308 + }, + { + "epoch": 18.505567928730514, + "grad_norm": 37.251121520996094, + "learning_rate": 1e-06, + "loss": 0.325, + "num_input_tokens_seen": 465560192, + "step": 8309 + }, + { + "epoch": 18.505567928730514, + "loss": 0.24585197865962982, + "loss_ce": 6.340416439343244e-05, + "loss_iou": 0.109375, + "loss_num": 0.00543212890625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 465560192, + "step": 8309 + }, + { + "epoch": 18.50779510022272, + "grad_norm": 16.032716751098633, + "learning_rate": 1e-06, + "loss": 0.5305, + "num_input_tokens_seen": 465618100, + "step": 8310 + }, + { + "epoch": 18.50779510022272, + "loss": 0.629966139793396, + "loss_ce": 8.333367441082373e-05, + "loss_iou": 0.25390625, + "loss_num": 0.024169921875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 465618100, + "step": 8310 + }, + { + "epoch": 18.510022271714924, + "grad_norm": 15.98995304107666, + "learning_rate": 1e-06, + "loss": 0.2552, + "num_input_tokens_seen": 465674400, + "step": 8311 + }, + { + "epoch": 18.510022271714924, + "loss": 0.27437126636505127, + "loss_ce": 7.928490958875045e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.0030364990234375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 465674400, + "step": 8311 + }, + { + "epoch": 18.51224944320713, + "grad_norm": 16.019752502441406, + "learning_rate": 1e-06, + "loss": 0.3386, + "num_input_tokens_seen": 465732188, + "step": 8312 + }, + { + "epoch": 18.51224944320713, + "loss": 0.5247637033462524, + "loss_ce": 0.00010548259160714224, + "loss_iou": 0.21484375, + "loss_num": 0.01904296875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 465732188, + "step": 8312 + }, + { + "epoch": 18.514476614699333, + "grad_norm": 17.876802444458008, + "learning_rate": 1e-06, + "loss": 0.3636, + "num_input_tokens_seen": 465786628, + "step": 8313 + }, + { + "epoch": 18.514476614699333, + "loss": 0.35969775915145874, + "loss_ce": 7.861069752834737e-05, + "loss_iou": 0.16796875, + "loss_num": 0.004791259765625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 465786628, + "step": 8313 + }, + { + "epoch": 18.51670378619154, + "grad_norm": 15.022839546203613, + "learning_rate": 1e-06, + "loss": 0.3146, + "num_input_tokens_seen": 465842532, + "step": 8314 + }, + { + "epoch": 18.51670378619154, + "loss": 0.35521650314331055, + "loss_ce": 0.00011394656030461192, + "loss_iou": 0.14453125, + "loss_num": 0.01318359375, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 465842532, + "step": 8314 + }, + { + "epoch": 18.518930957683743, + "grad_norm": 13.363021850585938, + "learning_rate": 1e-06, + "loss": 0.4604, + "num_input_tokens_seen": 465900596, + "step": 8315 + }, + { + "epoch": 18.518930957683743, + "loss": 0.583387017250061, + "loss_ce": 0.00013512340956367552, + "loss_iou": 0.224609375, + "loss_num": 0.02685546875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 465900596, + "step": 8315 + }, + { + "epoch": 18.521158129175948, + "grad_norm": 15.131711959838867, + "learning_rate": 1e-06, + "loss": 0.4687, + "num_input_tokens_seen": 465958308, + "step": 8316 + }, + { + "epoch": 18.521158129175948, + "loss": 0.5298267602920532, + "loss_ce": 7.216697849798948e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.01953125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 465958308, + "step": 8316 + }, + { + "epoch": 18.523385300668153, + "grad_norm": 12.049201965332031, + "learning_rate": 1e-06, + "loss": 0.3896, + "num_input_tokens_seen": 466015604, + "step": 8317 + }, + { + "epoch": 18.523385300668153, + "loss": 0.2444000393152237, + "loss_ce": 7.62987692723982e-05, + "loss_iou": 0.09130859375, + "loss_num": 0.01239013671875, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 466015604, + "step": 8317 + }, + { + "epoch": 18.525612472160358, + "grad_norm": 25.494447708129883, + "learning_rate": 1e-06, + "loss": 0.4866, + "num_input_tokens_seen": 466072012, + "step": 8318 + }, + { + "epoch": 18.525612472160358, + "loss": 0.5554917454719543, + "loss_ce": 7.183050183812156e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.0206298828125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 466072012, + "step": 8318 + }, + { + "epoch": 18.527839643652563, + "grad_norm": 32.9737548828125, + "learning_rate": 1e-06, + "loss": 0.5518, + "num_input_tokens_seen": 466126616, + "step": 8319 + }, + { + "epoch": 18.527839643652563, + "loss": 0.5966952443122864, + "loss_ce": 7.660294068045914e-05, + "loss_iou": 0.21484375, + "loss_num": 0.033447265625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 466126616, + "step": 8319 + }, + { + "epoch": 18.530066815144767, + "grad_norm": 12.370501518249512, + "learning_rate": 1e-06, + "loss": 0.299, + "num_input_tokens_seen": 466181408, + "step": 8320 + }, + { + "epoch": 18.530066815144767, + "loss": 0.357611745595932, + "loss_ce": 6.781626143492758e-05, + "loss_iou": 0.162109375, + "loss_num": 0.006805419921875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 466181408, + "step": 8320 + }, + { + "epoch": 18.532293986636972, + "grad_norm": 15.927129745483398, + "learning_rate": 1e-06, + "loss": 0.383, + "num_input_tokens_seen": 466238792, + "step": 8321 + }, + { + "epoch": 18.532293986636972, + "loss": 0.40339329838752747, + "loss_ce": 7.296907278941944e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.01556396484375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 466238792, + "step": 8321 + }, + { + "epoch": 18.534521158129177, + "grad_norm": 15.46563720703125, + "learning_rate": 1e-06, + "loss": 0.342, + "num_input_tokens_seen": 466291660, + "step": 8322 + }, + { + "epoch": 18.534521158129177, + "loss": 0.2837243676185608, + "loss_ce": 0.00015505831106565893, + "loss_iou": 0.12060546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 466291660, + "step": 8322 + }, + { + "epoch": 18.536748329621382, + "grad_norm": 16.087427139282227, + "learning_rate": 1e-06, + "loss": 0.5815, + "num_input_tokens_seen": 466344276, + "step": 8323 + }, + { + "epoch": 18.536748329621382, + "loss": 0.5009580850601196, + "loss_ce": 0.00010357146675232798, + "loss_iou": 0.2158203125, + "loss_num": 0.01385498046875, + "loss_xval": 0.5, + "num_input_tokens_seen": 466344276, + "step": 8323 + }, + { + "epoch": 18.538975501113587, + "grad_norm": 20.600358963012695, + "learning_rate": 1e-06, + "loss": 0.5411, + "num_input_tokens_seen": 466400420, + "step": 8324 + }, + { + "epoch": 18.538975501113587, + "loss": 0.47846150398254395, + "loss_ce": 6.796044181101024e-05, + "loss_iou": 0.212890625, + "loss_num": 0.0103759765625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 466400420, + "step": 8324 + }, + { + "epoch": 18.54120267260579, + "grad_norm": 26.053192138671875, + "learning_rate": 1e-06, + "loss": 0.3268, + "num_input_tokens_seen": 466453920, + "step": 8325 + }, + { + "epoch": 18.54120267260579, + "loss": 0.4151201546192169, + "loss_ce": 8.109994087135419e-05, + "loss_iou": 0.18359375, + "loss_num": 0.0093994140625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 466453920, + "step": 8325 + }, + { + "epoch": 18.543429844097997, + "grad_norm": 12.725341796875, + "learning_rate": 1e-06, + "loss": 0.3043, + "num_input_tokens_seen": 466510280, + "step": 8326 + }, + { + "epoch": 18.543429844097997, + "loss": 0.19257915019989014, + "loss_ce": 7.426021329592913e-05, + "loss_iou": 0.0888671875, + "loss_num": 0.0030517578125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 466510280, + "step": 8326 + }, + { + "epoch": 18.5456570155902, + "grad_norm": 19.8201904296875, + "learning_rate": 1e-06, + "loss": 0.6099, + "num_input_tokens_seen": 466565908, + "step": 8327 + }, + { + "epoch": 18.5456570155902, + "loss": 0.7313508987426758, + "loss_ce": 0.00014971070049796253, + "loss_iou": 0.296875, + "loss_num": 0.0277099609375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 466565908, + "step": 8327 + }, + { + "epoch": 18.547884187082406, + "grad_norm": 24.441503524780273, + "learning_rate": 1e-06, + "loss": 0.3867, + "num_input_tokens_seen": 466624756, + "step": 8328 + }, + { + "epoch": 18.547884187082406, + "loss": 0.46570825576782227, + "loss_ce": 7.106846169335768e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.00909423828125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 466624756, + "step": 8328 + }, + { + "epoch": 18.55011135857461, + "grad_norm": 16.419023513793945, + "learning_rate": 1e-06, + "loss": 0.3623, + "num_input_tokens_seen": 466682572, + "step": 8329 + }, + { + "epoch": 18.55011135857461, + "loss": 0.24810969829559326, + "loss_ce": 0.00036799628287553787, + "loss_iou": 0.11279296875, + "loss_num": 0.004425048828125, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 466682572, + "step": 8329 + }, + { + "epoch": 18.552338530066816, + "grad_norm": 29.954700469970703, + "learning_rate": 1e-06, + "loss": 0.2989, + "num_input_tokens_seen": 466739668, + "step": 8330 + }, + { + "epoch": 18.552338530066816, + "loss": 0.264358252286911, + "loss_ce": 7.602167170261964e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.004852294921875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 466739668, + "step": 8330 + }, + { + "epoch": 18.55456570155902, + "grad_norm": 21.188688278198242, + "learning_rate": 1e-06, + "loss": 0.4046, + "num_input_tokens_seen": 466798292, + "step": 8331 + }, + { + "epoch": 18.55456570155902, + "loss": 0.2884581685066223, + "loss_ce": 6.705736450385302e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.007415771484375, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 466798292, + "step": 8331 + }, + { + "epoch": 18.556792873051226, + "grad_norm": 13.528090476989746, + "learning_rate": 1e-06, + "loss": 0.4685, + "num_input_tokens_seen": 466856176, + "step": 8332 + }, + { + "epoch": 18.556792873051226, + "loss": 0.5392540693283081, + "loss_ce": 6.949950329726562e-05, + "loss_iou": 0.22265625, + "loss_num": 0.0186767578125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 466856176, + "step": 8332 + }, + { + "epoch": 18.55902004454343, + "grad_norm": 18.641672134399414, + "learning_rate": 1e-06, + "loss": 0.4738, + "num_input_tokens_seen": 466908128, + "step": 8333 + }, + { + "epoch": 18.55902004454343, + "loss": 0.4274500608444214, + "loss_ce": 8.191791857825592e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0140380859375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 466908128, + "step": 8333 + }, + { + "epoch": 18.561247216035635, + "grad_norm": 29.179141998291016, + "learning_rate": 1e-06, + "loss": 0.3837, + "num_input_tokens_seen": 466965624, + "step": 8334 + }, + { + "epoch": 18.561247216035635, + "loss": 0.3577282428741455, + "loss_ce": 6.22493025730364e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.0078125, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 466965624, + "step": 8334 + }, + { + "epoch": 18.56347438752784, + "grad_norm": 15.943428993225098, + "learning_rate": 1e-06, + "loss": 0.4853, + "num_input_tokens_seen": 467019384, + "step": 8335 + }, + { + "epoch": 18.56347438752784, + "loss": 0.24585062265396118, + "loss_ce": 6.203760858625174e-05, + "loss_iou": 0.099609375, + "loss_num": 0.0093994140625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 467019384, + "step": 8335 + }, + { + "epoch": 18.565701559020045, + "grad_norm": 17.852943420410156, + "learning_rate": 1e-06, + "loss": 0.4954, + "num_input_tokens_seen": 467076952, + "step": 8336 + }, + { + "epoch": 18.565701559020045, + "loss": 0.46540504693984985, + "loss_ce": 7.300135621335357e-05, + "loss_iou": 0.20703125, + "loss_num": 0.01031494140625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 467076952, + "step": 8336 + }, + { + "epoch": 18.56792873051225, + "grad_norm": 16.556804656982422, + "learning_rate": 1e-06, + "loss": 0.4165, + "num_input_tokens_seen": 467130340, + "step": 8337 + }, + { + "epoch": 18.56792873051225, + "loss": 0.4885514974594116, + "loss_ce": 8.713564602658153e-05, + "loss_iou": 0.1796875, + "loss_num": 0.0260009765625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 467130340, + "step": 8337 + }, + { + "epoch": 18.570155902004455, + "grad_norm": 18.44285011291504, + "learning_rate": 1e-06, + "loss": 0.4634, + "num_input_tokens_seen": 467187616, + "step": 8338 + }, + { + "epoch": 18.570155902004455, + "loss": 0.5214405059814453, + "loss_ce": 7.816489960532635e-05, + "loss_iou": 0.205078125, + "loss_num": 0.0223388671875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 467187616, + "step": 8338 + }, + { + "epoch": 18.57238307349666, + "grad_norm": 16.12818717956543, + "learning_rate": 1e-06, + "loss": 0.5461, + "num_input_tokens_seen": 467242808, + "step": 8339 + }, + { + "epoch": 18.57238307349666, + "loss": 0.49351948499679565, + "loss_ce": 0.00011128241749247536, + "loss_iou": 0.201171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 467242808, + "step": 8339 + }, + { + "epoch": 18.574610244988865, + "grad_norm": 13.428854942321777, + "learning_rate": 1e-06, + "loss": 0.3354, + "num_input_tokens_seen": 467299608, + "step": 8340 + }, + { + "epoch": 18.574610244988865, + "loss": 0.2194262444972992, + "loss_ce": 6.589026452274993e-05, + "loss_iou": 0.07470703125, + "loss_num": 0.01397705078125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 467299608, + "step": 8340 + }, + { + "epoch": 18.57683741648107, + "grad_norm": 341.0340881347656, + "learning_rate": 1e-06, + "loss": 0.552, + "num_input_tokens_seen": 467354420, + "step": 8341 + }, + { + "epoch": 18.57683741648107, + "loss": 0.6670901775360107, + "loss_ce": 9.794821380637586e-05, + "loss_iou": 0.287109375, + "loss_num": 0.018798828125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 467354420, + "step": 8341 + }, + { + "epoch": 18.579064587973274, + "grad_norm": 13.796717643737793, + "learning_rate": 1e-06, + "loss": 0.4222, + "num_input_tokens_seen": 467409192, + "step": 8342 + }, + { + "epoch": 18.579064587973274, + "loss": 0.46046194434165955, + "loss_ce": 7.377237488981336e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.01055908203125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 467409192, + "step": 8342 + }, + { + "epoch": 18.58129175946548, + "grad_norm": 24.182491302490234, + "learning_rate": 1e-06, + "loss": 0.4398, + "num_input_tokens_seen": 467464804, + "step": 8343 + }, + { + "epoch": 18.58129175946548, + "loss": 0.4643157720565796, + "loss_ce": 8.239349699579179e-05, + "loss_iou": 0.216796875, + "loss_num": 0.006134033203125, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 467464804, + "step": 8343 + }, + { + "epoch": 18.583518930957684, + "grad_norm": 29.6402645111084, + "learning_rate": 1e-06, + "loss": 0.5611, + "num_input_tokens_seen": 467520644, + "step": 8344 + }, + { + "epoch": 18.583518930957684, + "loss": 0.660957932472229, + "loss_ce": 6.922356260474771e-05, + "loss_iou": 0.2734375, + "loss_num": 0.023193359375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 467520644, + "step": 8344 + }, + { + "epoch": 18.58574610244989, + "grad_norm": 22.357826232910156, + "learning_rate": 1e-06, + "loss": 0.2284, + "num_input_tokens_seen": 467578040, + "step": 8345 + }, + { + "epoch": 18.58574610244989, + "loss": 0.16835345327854156, + "loss_ce": 7.953326712595299e-05, + "loss_iou": 0.06005859375, + "loss_num": 0.00970458984375, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 467578040, + "step": 8345 + }, + { + "epoch": 18.587973273942094, + "grad_norm": 42.021976470947266, + "learning_rate": 1e-06, + "loss": 0.4582, + "num_input_tokens_seen": 467633620, + "step": 8346 + }, + { + "epoch": 18.587973273942094, + "loss": 0.41333815455436707, + "loss_ce": 9.962193144019693e-05, + "loss_iou": 0.189453125, + "loss_num": 0.00701904296875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 467633620, + "step": 8346 + }, + { + "epoch": 18.5902004454343, + "grad_norm": 21.736515045166016, + "learning_rate": 1e-06, + "loss": 0.374, + "num_input_tokens_seen": 467691248, + "step": 8347 + }, + { + "epoch": 18.5902004454343, + "loss": 0.37519749999046326, + "loss_ce": 7.541398372268304e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.00665283203125, + "loss_xval": 0.375, + "num_input_tokens_seen": 467691248, + "step": 8347 + }, + { + "epoch": 18.592427616926503, + "grad_norm": 25.333066940307617, + "learning_rate": 1e-06, + "loss": 0.4718, + "num_input_tokens_seen": 467744092, + "step": 8348 + }, + { + "epoch": 18.592427616926503, + "loss": 0.39171791076660156, + "loss_ce": 0.00011635862028924748, + "loss_iou": 0.1748046875, + "loss_num": 0.00836181640625, + "loss_xval": 0.390625, + "num_input_tokens_seen": 467744092, + "step": 8348 + }, + { + "epoch": 18.59465478841871, + "grad_norm": 19.491870880126953, + "learning_rate": 1e-06, + "loss": 0.2949, + "num_input_tokens_seen": 467800704, + "step": 8349 + }, + { + "epoch": 18.59465478841871, + "loss": 0.2856776714324951, + "loss_ce": 9.417271940037608e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.0103759765625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 467800704, + "step": 8349 + }, + { + "epoch": 18.596881959910913, + "grad_norm": 11.345683097839355, + "learning_rate": 1e-06, + "loss": 0.2598, + "num_input_tokens_seen": 467858852, + "step": 8350 + }, + { + "epoch": 18.596881959910913, + "loss": 0.23634421825408936, + "loss_ce": 7.713548257015646e-05, + "loss_iou": 0.1044921875, + "loss_num": 0.005340576171875, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 467858852, + "step": 8350 + }, + { + "epoch": 18.599109131403118, + "grad_norm": 16.46609115600586, + "learning_rate": 1e-06, + "loss": 0.3114, + "num_input_tokens_seen": 467914508, + "step": 8351 + }, + { + "epoch": 18.599109131403118, + "loss": 0.2579895257949829, + "loss_ce": 0.00011599328718148172, + "loss_iou": 0.109375, + "loss_num": 0.007781982421875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 467914508, + "step": 8351 + }, + { + "epoch": 18.601336302895323, + "grad_norm": 21.424537658691406, + "learning_rate": 1e-06, + "loss": 0.4388, + "num_input_tokens_seen": 467969980, + "step": 8352 + }, + { + "epoch": 18.601336302895323, + "loss": 0.31832072138786316, + "loss_ce": 8.341785724041983e-05, + "loss_iou": 0.14453125, + "loss_num": 0.005767822265625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 467969980, + "step": 8352 + }, + { + "epoch": 18.603563474387528, + "grad_norm": 16.22272491455078, + "learning_rate": 1e-06, + "loss": 0.3837, + "num_input_tokens_seen": 468026936, + "step": 8353 + }, + { + "epoch": 18.603563474387528, + "loss": 0.42490488290786743, + "loss_ce": 0.00010020087211159989, + "loss_iou": 0.189453125, + "loss_num": 0.009033203125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 468026936, + "step": 8353 + }, + { + "epoch": 18.605790645879733, + "grad_norm": 19.997718811035156, + "learning_rate": 1e-06, + "loss": 0.3883, + "num_input_tokens_seen": 468082576, + "step": 8354 + }, + { + "epoch": 18.605790645879733, + "loss": 0.41608962416648865, + "loss_ce": 7.400992035400122e-05, + "loss_iou": 0.1953125, + "loss_num": 0.004974365234375, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 468082576, + "step": 8354 + }, + { + "epoch": 18.608017817371937, + "grad_norm": 18.959848403930664, + "learning_rate": 1e-06, + "loss": 0.3883, + "num_input_tokens_seen": 468139516, + "step": 8355 + }, + { + "epoch": 18.608017817371937, + "loss": 0.3627595603466034, + "loss_ce": 8.86675261426717e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.00732421875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 468139516, + "step": 8355 + }, + { + "epoch": 18.610244988864142, + "grad_norm": 10.21224308013916, + "learning_rate": 1e-06, + "loss": 0.4558, + "num_input_tokens_seen": 468196392, + "step": 8356 + }, + { + "epoch": 18.610244988864142, + "loss": 0.3999983072280884, + "loss_ce": 9.597405733074993e-05, + "loss_iou": 0.15234375, + "loss_num": 0.01904296875, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 468196392, + "step": 8356 + }, + { + "epoch": 18.612472160356347, + "grad_norm": 12.426268577575684, + "learning_rate": 1e-06, + "loss": 0.551, + "num_input_tokens_seen": 468252860, + "step": 8357 + }, + { + "epoch": 18.612472160356347, + "loss": 0.8520680069923401, + "loss_ce": 7.824598287697881e-05, + "loss_iou": 0.326171875, + "loss_num": 0.039306640625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 468252860, + "step": 8357 + }, + { + "epoch": 18.614699331848552, + "grad_norm": 19.070262908935547, + "learning_rate": 1e-06, + "loss": 0.3966, + "num_input_tokens_seen": 468308700, + "step": 8358 + }, + { + "epoch": 18.614699331848552, + "loss": 0.5038906931877136, + "loss_ce": 0.00010653713979991153, + "loss_iou": 0.201171875, + "loss_num": 0.0203857421875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 468308700, + "step": 8358 + }, + { + "epoch": 18.616926503340757, + "grad_norm": 29.98044776916504, + "learning_rate": 1e-06, + "loss": 0.6309, + "num_input_tokens_seen": 468363020, + "step": 8359 + }, + { + "epoch": 18.616926503340757, + "loss": 0.703240156173706, + "loss_ce": 0.00011515267397044227, + "loss_iou": 0.296875, + "loss_num": 0.021728515625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 468363020, + "step": 8359 + }, + { + "epoch": 18.619153674832962, + "grad_norm": 23.00288963317871, + "learning_rate": 1e-06, + "loss": 0.5153, + "num_input_tokens_seen": 468419740, + "step": 8360 + }, + { + "epoch": 18.619153674832962, + "loss": 0.5316950678825378, + "loss_ce": 7.883799844421446e-05, + "loss_iou": 0.220703125, + "loss_num": 0.0179443359375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 468419740, + "step": 8360 + }, + { + "epoch": 18.621380846325167, + "grad_norm": 26.114784240722656, + "learning_rate": 1e-06, + "loss": 0.4365, + "num_input_tokens_seen": 468475752, + "step": 8361 + }, + { + "epoch": 18.621380846325167, + "loss": 0.40563201904296875, + "loss_ce": 0.00011443370021879673, + "loss_iou": 0.1708984375, + "loss_num": 0.0126953125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 468475752, + "step": 8361 + }, + { + "epoch": 18.62360801781737, + "grad_norm": 25.014408111572266, + "learning_rate": 1e-06, + "loss": 0.3513, + "num_input_tokens_seen": 468529432, + "step": 8362 + }, + { + "epoch": 18.62360801781737, + "loss": 0.3612205982208252, + "loss_ce": 7.560283120255917e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.0084228515625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 468529432, + "step": 8362 + }, + { + "epoch": 18.625835189309576, + "grad_norm": 16.121501922607422, + "learning_rate": 1e-06, + "loss": 0.264, + "num_input_tokens_seen": 468587328, + "step": 8363 + }, + { + "epoch": 18.625835189309576, + "loss": 0.21666020154953003, + "loss_ce": 9.220950596500188e-05, + "loss_iou": 0.09326171875, + "loss_num": 0.00592041015625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 468587328, + "step": 8363 + }, + { + "epoch": 18.62806236080178, + "grad_norm": 17.978118896484375, + "learning_rate": 1e-06, + "loss": 0.4242, + "num_input_tokens_seen": 468644720, + "step": 8364 + }, + { + "epoch": 18.62806236080178, + "loss": 0.6226370334625244, + "loss_ce": 7.844380888855085e-05, + "loss_iou": 0.2578125, + "loss_num": 0.021484375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 468644720, + "step": 8364 + }, + { + "epoch": 18.630289532293986, + "grad_norm": 19.74319076538086, + "learning_rate": 1e-06, + "loss": 0.3705, + "num_input_tokens_seen": 468700752, + "step": 8365 + }, + { + "epoch": 18.630289532293986, + "loss": 0.3942309021949768, + "loss_ce": 6.587227107957006e-05, + "loss_iou": 0.14453125, + "loss_num": 0.0213623046875, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 468700752, + "step": 8365 + }, + { + "epoch": 18.63251670378619, + "grad_norm": 17.06964683532715, + "learning_rate": 1e-06, + "loss": 0.4573, + "num_input_tokens_seen": 468755944, + "step": 8366 + }, + { + "epoch": 18.63251670378619, + "loss": 0.28583765029907227, + "loss_ce": 7.103722600731999e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.004302978515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 468755944, + "step": 8366 + }, + { + "epoch": 18.634743875278396, + "grad_norm": 13.701447486877441, + "learning_rate": 1e-06, + "loss": 0.4467, + "num_input_tokens_seen": 468811364, + "step": 8367 + }, + { + "epoch": 18.634743875278396, + "loss": 0.3969283103942871, + "loss_ce": 7.772387471050024e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.00714111328125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 468811364, + "step": 8367 + }, + { + "epoch": 18.6369710467706, + "grad_norm": 20.248186111450195, + "learning_rate": 1e-06, + "loss": 0.3801, + "num_input_tokens_seen": 468869972, + "step": 8368 + }, + { + "epoch": 18.6369710467706, + "loss": 0.4675615429878235, + "loss_ce": 9.328986925538629e-05, + "loss_iou": 0.203125, + "loss_num": 0.011962890625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 468869972, + "step": 8368 + }, + { + "epoch": 18.639198218262806, + "grad_norm": 25.159513473510742, + "learning_rate": 1e-06, + "loss": 0.4678, + "num_input_tokens_seen": 468926332, + "step": 8369 + }, + { + "epoch": 18.639198218262806, + "loss": 0.600066065788269, + "loss_ce": 9.047788626048714e-05, + "loss_iou": 0.26953125, + "loss_num": 0.01239013671875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 468926332, + "step": 8369 + }, + { + "epoch": 18.64142538975501, + "grad_norm": 14.643291473388672, + "learning_rate": 1e-06, + "loss": 0.3295, + "num_input_tokens_seen": 468983940, + "step": 8370 + }, + { + "epoch": 18.64142538975501, + "loss": 0.25062063336372375, + "loss_ce": 7.132052996894345e-05, + "loss_iou": 0.09130859375, + "loss_num": 0.01361083984375, + "loss_xval": 0.25, + "num_input_tokens_seen": 468983940, + "step": 8370 + }, + { + "epoch": 18.643652561247215, + "grad_norm": 15.840713500976562, + "learning_rate": 1e-06, + "loss": 0.3142, + "num_input_tokens_seen": 469040776, + "step": 8371 + }, + { + "epoch": 18.643652561247215, + "loss": 0.3157404363155365, + "loss_ce": 6.659841164946556e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.006439208984375, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 469040776, + "step": 8371 + }, + { + "epoch": 18.64587973273942, + "grad_norm": 40.83860397338867, + "learning_rate": 1e-06, + "loss": 0.4205, + "num_input_tokens_seen": 469098340, + "step": 8372 + }, + { + "epoch": 18.64587973273942, + "loss": 0.46250006556510925, + "loss_ce": 9.772660268936306e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.0078125, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 469098340, + "step": 8372 + }, + { + "epoch": 18.648106904231625, + "grad_norm": 16.642513275146484, + "learning_rate": 1e-06, + "loss": 0.3309, + "num_input_tokens_seen": 469155160, + "step": 8373 + }, + { + "epoch": 18.648106904231625, + "loss": 0.38674429059028625, + "loss_ce": 8.656815043650568e-05, + "loss_iou": 0.177734375, + "loss_num": 0.006134033203125, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 469155160, + "step": 8373 + }, + { + "epoch": 18.65033407572383, + "grad_norm": 16.026294708251953, + "learning_rate": 1e-06, + "loss": 0.4616, + "num_input_tokens_seen": 469211288, + "step": 8374 + }, + { + "epoch": 18.65033407572383, + "loss": 0.40791958570480347, + "loss_ce": 8.267381781479344e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.00677490234375, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 469211288, + "step": 8374 + }, + { + "epoch": 18.652561247216035, + "grad_norm": 26.638465881347656, + "learning_rate": 1e-06, + "loss": 0.3375, + "num_input_tokens_seen": 469267800, + "step": 8375 + }, + { + "epoch": 18.652561247216035, + "loss": 0.3067091703414917, + "loss_ce": 6.856051186332479e-05, + "loss_iou": 0.13671875, + "loss_num": 0.00653076171875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 469267800, + "step": 8375 + }, + { + "epoch": 18.65478841870824, + "grad_norm": 12.635597229003906, + "learning_rate": 1e-06, + "loss": 0.4255, + "num_input_tokens_seen": 469325572, + "step": 8376 + }, + { + "epoch": 18.65478841870824, + "loss": 0.5839434266090393, + "loss_ce": 8.110229828162119e-05, + "loss_iou": 0.259765625, + "loss_num": 0.01312255859375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 469325572, + "step": 8376 + }, + { + "epoch": 18.657015590200444, + "grad_norm": 15.969237327575684, + "learning_rate": 1e-06, + "loss": 0.3131, + "num_input_tokens_seen": 469381652, + "step": 8377 + }, + { + "epoch": 18.657015590200444, + "loss": 0.3782818019390106, + "loss_ce": 7.744554022792727e-05, + "loss_iou": 0.173828125, + "loss_num": 0.006195068359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 469381652, + "step": 8377 + }, + { + "epoch": 18.65924276169265, + "grad_norm": 16.6336727142334, + "learning_rate": 1e-06, + "loss": 0.3071, + "num_input_tokens_seen": 469437196, + "step": 8378 + }, + { + "epoch": 18.65924276169265, + "loss": 0.32802021503448486, + "loss_ce": 7.833550625946373e-05, + "loss_iou": 0.126953125, + "loss_num": 0.01507568359375, + "loss_xval": 0.328125, + "num_input_tokens_seen": 469437196, + "step": 8378 + }, + { + "epoch": 18.661469933184854, + "grad_norm": 80.5313491821289, + "learning_rate": 1e-06, + "loss": 0.3585, + "num_input_tokens_seen": 469493580, + "step": 8379 + }, + { + "epoch": 18.661469933184854, + "loss": 0.3814215064048767, + "loss_ce": 7.384659693343565e-05, + "loss_iou": 0.169921875, + "loss_num": 0.0081787109375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 469493580, + "step": 8379 + }, + { + "epoch": 18.66369710467706, + "grad_norm": 16.55701446533203, + "learning_rate": 1e-06, + "loss": 0.5018, + "num_input_tokens_seen": 469551092, + "step": 8380 + }, + { + "epoch": 18.66369710467706, + "loss": 0.4989837408065796, + "loss_ce": 8.234399138018489e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.0159912109375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 469551092, + "step": 8380 + }, + { + "epoch": 18.665924276169264, + "grad_norm": 23.459165573120117, + "learning_rate": 1e-06, + "loss": 0.5548, + "num_input_tokens_seen": 469611232, + "step": 8381 + }, + { + "epoch": 18.665924276169264, + "loss": 0.6222162246704102, + "loss_ce": 8.488097955705598e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0150146484375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 469611232, + "step": 8381 + }, + { + "epoch": 18.66815144766147, + "grad_norm": 17.257272720336914, + "learning_rate": 1e-06, + "loss": 0.3304, + "num_input_tokens_seen": 469664948, + "step": 8382 + }, + { + "epoch": 18.66815144766147, + "loss": 0.2271890938282013, + "loss_ce": 7.72664716350846e-05, + "loss_iou": 0.1005859375, + "loss_num": 0.0052490234375, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 469664948, + "step": 8382 + }, + { + "epoch": 18.670378619153674, + "grad_norm": 20.108436584472656, + "learning_rate": 1e-06, + "loss": 0.405, + "num_input_tokens_seen": 469718928, + "step": 8383 + }, + { + "epoch": 18.670378619153674, + "loss": 0.42782682180404663, + "loss_ce": 9.24727282836102e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0096435546875, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 469718928, + "step": 8383 + }, + { + "epoch": 18.67260579064588, + "grad_norm": 17.33344841003418, + "learning_rate": 1e-06, + "loss": 0.4995, + "num_input_tokens_seen": 469777728, + "step": 8384 + }, + { + "epoch": 18.67260579064588, + "loss": 0.449005663394928, + "loss_ce": 9.207701805280522e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.014404296875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 469777728, + "step": 8384 + }, + { + "epoch": 18.674832962138083, + "grad_norm": 19.488948822021484, + "learning_rate": 1e-06, + "loss": 0.5887, + "num_input_tokens_seen": 469832624, + "step": 8385 + }, + { + "epoch": 18.674832962138083, + "loss": 0.3829535245895386, + "loss_ce": 7.999550871318206e-05, + "loss_iou": 0.1640625, + "loss_num": 0.0107421875, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 469832624, + "step": 8385 + }, + { + "epoch": 18.677060133630288, + "grad_norm": 13.389050483703613, + "learning_rate": 1e-06, + "loss": 0.4056, + "num_input_tokens_seen": 469891116, + "step": 8386 + }, + { + "epoch": 18.677060133630288, + "loss": 0.35558634996414185, + "loss_ce": 8.708509267307818e-05, + "loss_iou": 0.154296875, + "loss_num": 0.00946044921875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 469891116, + "step": 8386 + }, + { + "epoch": 18.679287305122493, + "grad_norm": 14.323442459106445, + "learning_rate": 1e-06, + "loss": 0.3495, + "num_input_tokens_seen": 469946560, + "step": 8387 + }, + { + "epoch": 18.679287305122493, + "loss": 0.21974313259124756, + "loss_ce": 7.760837615933269e-05, + "loss_iou": 0.095703125, + "loss_num": 0.005584716796875, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 469946560, + "step": 8387 + }, + { + "epoch": 18.681514476614698, + "grad_norm": 28.984878540039062, + "learning_rate": 1e-06, + "loss": 0.2812, + "num_input_tokens_seen": 470002268, + "step": 8388 + }, + { + "epoch": 18.681514476614698, + "loss": 0.24085469543933868, + "loss_ce": 7.100608490873128e-05, + "loss_iou": 0.107421875, + "loss_num": 0.00518798828125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 470002268, + "step": 8388 + }, + { + "epoch": 18.683741648106903, + "grad_norm": 15.400225639343262, + "learning_rate": 1e-06, + "loss": 0.4376, + "num_input_tokens_seen": 470058180, + "step": 8389 + }, + { + "epoch": 18.683741648106903, + "loss": 0.5878542065620422, + "loss_ce": 8.563668234273791e-05, + "loss_iou": 0.25390625, + "loss_num": 0.016357421875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 470058180, + "step": 8389 + }, + { + "epoch": 18.685968819599108, + "grad_norm": 23.56451988220215, + "learning_rate": 1e-06, + "loss": 0.5334, + "num_input_tokens_seen": 470113364, + "step": 8390 + }, + { + "epoch": 18.685968819599108, + "loss": 0.6698175668716431, + "loss_ce": 7.881040801294148e-05, + "loss_iou": 0.2734375, + "loss_num": 0.024658203125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 470113364, + "step": 8390 + }, + { + "epoch": 18.688195991091312, + "grad_norm": 20.264080047607422, + "learning_rate": 1e-06, + "loss": 0.4449, + "num_input_tokens_seen": 470171080, + "step": 8391 + }, + { + "epoch": 18.688195991091312, + "loss": 0.6437484622001648, + "loss_ce": 7.168846786953509e-05, + "loss_iou": 0.2578125, + "loss_num": 0.025146484375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 470171080, + "step": 8391 + }, + { + "epoch": 18.690423162583517, + "grad_norm": 18.708011627197266, + "learning_rate": 1e-06, + "loss": 0.4008, + "num_input_tokens_seen": 470227556, + "step": 8392 + }, + { + "epoch": 18.690423162583517, + "loss": 0.3530987799167633, + "loss_ce": 7.14488051016815e-05, + "loss_iou": 0.1640625, + "loss_num": 0.005126953125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 470227556, + "step": 8392 + }, + { + "epoch": 18.692650334075722, + "grad_norm": 33.3140754699707, + "learning_rate": 1e-06, + "loss": 0.4063, + "num_input_tokens_seen": 470282536, + "step": 8393 + }, + { + "epoch": 18.692650334075722, + "loss": 0.36571255326271057, + "loss_ce": 0.0001119803127949126, + "loss_iou": 0.166015625, + "loss_num": 0.0067138671875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 470282536, + "step": 8393 + }, + { + "epoch": 18.694877505567927, + "grad_norm": 14.095598220825195, + "learning_rate": 1e-06, + "loss": 0.4011, + "num_input_tokens_seen": 470339848, + "step": 8394 + }, + { + "epoch": 18.694877505567927, + "loss": 0.37506699562072754, + "loss_ce": 6.698207289446145e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00848388671875, + "loss_xval": 0.375, + "num_input_tokens_seen": 470339848, + "step": 8394 + }, + { + "epoch": 18.697104677060132, + "grad_norm": 22.200273513793945, + "learning_rate": 1e-06, + "loss": 0.4252, + "num_input_tokens_seen": 470393704, + "step": 8395 + }, + { + "epoch": 18.697104677060132, + "loss": 0.5692760348320007, + "loss_ce": 6.21723011136055e-05, + "loss_iou": 0.25390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 470393704, + "step": 8395 + }, + { + "epoch": 18.69933184855234, + "grad_norm": 19.075931549072266, + "learning_rate": 1e-06, + "loss": 0.4925, + "num_input_tokens_seen": 470450140, + "step": 8396 + }, + { + "epoch": 18.69933184855234, + "loss": 0.811962902545929, + "loss_ce": 7.329651270993054e-05, + "loss_iou": 0.318359375, + "loss_num": 0.03466796875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 470450140, + "step": 8396 + }, + { + "epoch": 18.70155902004454, + "grad_norm": 15.641639709472656, + "learning_rate": 1e-06, + "loss": 0.4462, + "num_input_tokens_seen": 470505936, + "step": 8397 + }, + { + "epoch": 18.70155902004454, + "loss": 0.47692984342575073, + "loss_ce": 0.0001232139766216278, + "loss_iou": 0.1982421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 470505936, + "step": 8397 + }, + { + "epoch": 18.70378619153675, + "grad_norm": 19.46721839904785, + "learning_rate": 1e-06, + "loss": 0.3472, + "num_input_tokens_seen": 470560592, + "step": 8398 + }, + { + "epoch": 18.70378619153675, + "loss": 0.20130589604377747, + "loss_ce": 7.29848543414846e-05, + "loss_iou": 0.080078125, + "loss_num": 0.00830078125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 470560592, + "step": 8398 + }, + { + "epoch": 18.706013363028955, + "grad_norm": 22.994003295898438, + "learning_rate": 1e-06, + "loss": 0.3366, + "num_input_tokens_seen": 470616532, + "step": 8399 + }, + { + "epoch": 18.706013363028955, + "loss": 0.40258532762527466, + "loss_ce": 0.00011949414329137653, + "loss_iou": 0.1923828125, + "loss_num": 0.0036773681640625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 470616532, + "step": 8399 + }, + { + "epoch": 18.70824053452116, + "grad_norm": 15.002291679382324, + "learning_rate": 1e-06, + "loss": 0.3363, + "num_input_tokens_seen": 470673684, + "step": 8400 + }, + { + "epoch": 18.70824053452116, + "loss": 0.3424663245677948, + "loss_ce": 0.00012012013758067042, + "loss_iou": 0.138671875, + "loss_num": 0.0128173828125, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 470673684, + "step": 8400 + }, + { + "epoch": 18.710467706013365, + "grad_norm": 17.72799301147461, + "learning_rate": 1e-06, + "loss": 0.2912, + "num_input_tokens_seen": 470732336, + "step": 8401 + }, + { + "epoch": 18.710467706013365, + "loss": 0.3603472411632538, + "loss_ce": 7.197496597655118e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0047607421875, + "loss_xval": 0.359375, + "num_input_tokens_seen": 470732336, + "step": 8401 + }, + { + "epoch": 18.71269487750557, + "grad_norm": 19.49424171447754, + "learning_rate": 1e-06, + "loss": 0.4016, + "num_input_tokens_seen": 470789100, + "step": 8402 + }, + { + "epoch": 18.71269487750557, + "loss": 0.5133872628211975, + "loss_ce": 8.159891876857728e-05, + "loss_iou": 0.23828125, + "loss_num": 0.00750732421875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 470789100, + "step": 8402 + }, + { + "epoch": 18.714922048997774, + "grad_norm": 18.081327438354492, + "learning_rate": 1e-06, + "loss": 0.4933, + "num_input_tokens_seen": 470845216, + "step": 8403 + }, + { + "epoch": 18.714922048997774, + "loss": 0.4872537851333618, + "loss_ce": 7.114657637430355e-05, + "loss_iou": 0.1875, + "loss_num": 0.0223388671875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 470845216, + "step": 8403 + }, + { + "epoch": 18.71714922048998, + "grad_norm": 17.4000186920166, + "learning_rate": 1e-06, + "loss": 0.3173, + "num_input_tokens_seen": 470902088, + "step": 8404 + }, + { + "epoch": 18.71714922048998, + "loss": 0.2885332405567169, + "loss_ce": 8.110511407721788e-05, + "loss_iou": 0.125, + "loss_num": 0.00750732421875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 470902088, + "step": 8404 + }, + { + "epoch": 18.719376391982184, + "grad_norm": 17.23605728149414, + "learning_rate": 1e-06, + "loss": 0.523, + "num_input_tokens_seen": 470956888, + "step": 8405 + }, + { + "epoch": 18.719376391982184, + "loss": 0.21879133582115173, + "loss_ce": 0.0001786548673408106, + "loss_iou": 0.09326171875, + "loss_num": 0.00640869140625, + "loss_xval": 0.21875, + "num_input_tokens_seen": 470956888, + "step": 8405 + }, + { + "epoch": 18.72160356347439, + "grad_norm": 22.435617446899414, + "learning_rate": 1e-06, + "loss": 0.4289, + "num_input_tokens_seen": 471010248, + "step": 8406 + }, + { + "epoch": 18.72160356347439, + "loss": 0.4550238251686096, + "loss_ce": 0.00018985335191246122, + "loss_iou": 0.1875, + "loss_num": 0.015869140625, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 471010248, + "step": 8406 + }, + { + "epoch": 18.723830734966594, + "grad_norm": 16.684246063232422, + "learning_rate": 1e-06, + "loss": 0.431, + "num_input_tokens_seen": 471067932, + "step": 8407 + }, + { + "epoch": 18.723830734966594, + "loss": 0.3406444191932678, + "loss_ce": 6.82445170241408e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.00762939453125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 471067932, + "step": 8407 + }, + { + "epoch": 18.7260579064588, + "grad_norm": 22.295700073242188, + "learning_rate": 1e-06, + "loss": 0.3284, + "num_input_tokens_seen": 471122864, + "step": 8408 + }, + { + "epoch": 18.7260579064588, + "loss": 0.30706241726875305, + "loss_ce": 8.610197255620733e-05, + "loss_iou": 0.123046875, + "loss_num": 0.01226806640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 471122864, + "step": 8408 + }, + { + "epoch": 18.728285077951004, + "grad_norm": 17.58847999572754, + "learning_rate": 1e-06, + "loss": 0.3384, + "num_input_tokens_seen": 471180772, + "step": 8409 + }, + { + "epoch": 18.728285077951004, + "loss": 0.3137807548046112, + "loss_ce": 9.056746785063297e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.01324462890625, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 471180772, + "step": 8409 + }, + { + "epoch": 18.73051224944321, + "grad_norm": 24.846115112304688, + "learning_rate": 1e-06, + "loss": 0.3556, + "num_input_tokens_seen": 471237380, + "step": 8410 + }, + { + "epoch": 18.73051224944321, + "loss": 0.3991147577762604, + "loss_ce": 6.691899034194648e-05, + "loss_iou": 0.185546875, + "loss_num": 0.00555419921875, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 471237380, + "step": 8410 + }, + { + "epoch": 18.732739420935413, + "grad_norm": 18.676179885864258, + "learning_rate": 1e-06, + "loss": 0.3904, + "num_input_tokens_seen": 471293424, + "step": 8411 + }, + { + "epoch": 18.732739420935413, + "loss": 0.28310564160346985, + "loss_ce": 8.561325375922024e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.00787353515625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 471293424, + "step": 8411 + }, + { + "epoch": 18.734966592427618, + "grad_norm": 20.800790786743164, + "learning_rate": 1e-06, + "loss": 0.4449, + "num_input_tokens_seen": 471349792, + "step": 8412 + }, + { + "epoch": 18.734966592427618, + "loss": 0.3075633645057678, + "loss_ce": 6.822836439823732e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.004638671875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 471349792, + "step": 8412 + }, + { + "epoch": 18.737193763919823, + "grad_norm": 23.218751907348633, + "learning_rate": 1e-06, + "loss": 0.3355, + "num_input_tokens_seen": 471407216, + "step": 8413 + }, + { + "epoch": 18.737193763919823, + "loss": 0.4214501678943634, + "loss_ce": 6.347508315229788e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.00970458984375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 471407216, + "step": 8413 + }, + { + "epoch": 18.739420935412028, + "grad_norm": 18.05634117126465, + "learning_rate": 1e-06, + "loss": 0.3551, + "num_input_tokens_seen": 471464544, + "step": 8414 + }, + { + "epoch": 18.739420935412028, + "loss": 0.388986736536026, + "loss_ce": 7.072095468174666e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.01507568359375, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 471464544, + "step": 8414 + }, + { + "epoch": 18.741648106904233, + "grad_norm": 22.533185958862305, + "learning_rate": 1e-06, + "loss": 0.4586, + "num_input_tokens_seen": 471520660, + "step": 8415 + }, + { + "epoch": 18.741648106904233, + "loss": 0.46280378103256226, + "loss_ce": 8.86318739503622e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0103759765625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 471520660, + "step": 8415 + }, + { + "epoch": 18.743875278396438, + "grad_norm": 20.770368576049805, + "learning_rate": 1e-06, + "loss": 0.329, + "num_input_tokens_seen": 471579084, + "step": 8416 + }, + { + "epoch": 18.743875278396438, + "loss": 0.31473100185394287, + "loss_ce": 9.477273124502972e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.00567626953125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 471579084, + "step": 8416 + }, + { + "epoch": 18.746102449888642, + "grad_norm": 48.90117645263672, + "learning_rate": 1e-06, + "loss": 0.2945, + "num_input_tokens_seen": 471633636, + "step": 8417 + }, + { + "epoch": 18.746102449888642, + "loss": 0.25058692693710327, + "loss_ce": 6.0503818531287834e-05, + "loss_iou": 0.11474609375, + "loss_num": 0.004241943359375, + "loss_xval": 0.25, + "num_input_tokens_seen": 471633636, + "step": 8417 + }, + { + "epoch": 18.748329621380847, + "grad_norm": 15.029791831970215, + "learning_rate": 1e-06, + "loss": 0.3173, + "num_input_tokens_seen": 471690856, + "step": 8418 + }, + { + "epoch": 18.748329621380847, + "loss": 0.32165291905403137, + "loss_ce": 5.868007428944111e-05, + "loss_iou": 0.14453125, + "loss_num": 0.006500244140625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 471690856, + "step": 8418 + }, + { + "epoch": 18.750556792873052, + "grad_norm": 18.00300407409668, + "learning_rate": 1e-06, + "loss": 0.3783, + "num_input_tokens_seen": 471749244, + "step": 8419 + }, + { + "epoch": 18.750556792873052, + "loss": 0.325858473777771, + "loss_ce": 0.0001748991635395214, + "loss_iou": 0.1455078125, + "loss_num": 0.00689697265625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 471749244, + "step": 8419 + }, + { + "epoch": 18.752783964365257, + "grad_norm": 12.98948860168457, + "learning_rate": 1e-06, + "loss": 0.4111, + "num_input_tokens_seen": 471804068, + "step": 8420 + }, + { + "epoch": 18.752783964365257, + "loss": 0.4239133596420288, + "loss_ce": 8.521559357177466e-05, + "loss_iou": 0.15625, + "loss_num": 0.0223388671875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 471804068, + "step": 8420 + }, + { + "epoch": 18.755011135857462, + "grad_norm": 13.578185081481934, + "learning_rate": 1e-06, + "loss": 0.3306, + "num_input_tokens_seen": 471859564, + "step": 8421 + }, + { + "epoch": 18.755011135857462, + "loss": 0.285760760307312, + "loss_ce": 5.517357203643769e-05, + "loss_iou": 0.11083984375, + "loss_num": 0.0126953125, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 471859564, + "step": 8421 + }, + { + "epoch": 18.757238307349667, + "grad_norm": 13.726969718933105, + "learning_rate": 1e-06, + "loss": 0.5174, + "num_input_tokens_seen": 471918184, + "step": 8422 + }, + { + "epoch": 18.757238307349667, + "loss": 0.4225544333457947, + "loss_ce": 6.908080104039982e-05, + "loss_iou": 0.1953125, + "loss_num": 0.0064697265625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 471918184, + "step": 8422 + }, + { + "epoch": 18.75946547884187, + "grad_norm": 27.957782745361328, + "learning_rate": 1e-06, + "loss": 0.4461, + "num_input_tokens_seen": 471972848, + "step": 8423 + }, + { + "epoch": 18.75946547884187, + "loss": 0.5910878777503967, + "loss_ce": 8.448412700090557e-05, + "loss_iou": 0.265625, + "loss_num": 0.0118408203125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 471972848, + "step": 8423 + }, + { + "epoch": 18.761692650334076, + "grad_norm": 20.952011108398438, + "learning_rate": 1e-06, + "loss": 0.6627, + "num_input_tokens_seen": 472030196, + "step": 8424 + }, + { + "epoch": 18.761692650334076, + "loss": 0.3828308880329132, + "loss_ce": 7.944751996546984e-05, + "loss_iou": 0.171875, + "loss_num": 0.007781982421875, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 472030196, + "step": 8424 + }, + { + "epoch": 18.76391982182628, + "grad_norm": 17.825992584228516, + "learning_rate": 1e-06, + "loss": 0.3981, + "num_input_tokens_seen": 472086448, + "step": 8425 + }, + { + "epoch": 18.76391982182628, + "loss": 0.3020787835121155, + "loss_ce": 7.681577699258924e-05, + "loss_iou": 0.134765625, + "loss_num": 0.006439208984375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 472086448, + "step": 8425 + }, + { + "epoch": 18.766146993318486, + "grad_norm": 15.120186805725098, + "learning_rate": 1e-06, + "loss": 0.6746, + "num_input_tokens_seen": 472144096, + "step": 8426 + }, + { + "epoch": 18.766146993318486, + "loss": 0.4926231801509857, + "loss_ce": 6.947731162654236e-05, + "loss_iou": 0.236328125, + "loss_num": 0.00396728515625, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 472144096, + "step": 8426 + }, + { + "epoch": 18.76837416481069, + "grad_norm": 60.89303207397461, + "learning_rate": 1e-06, + "loss": 0.6013, + "num_input_tokens_seen": 472197860, + "step": 8427 + }, + { + "epoch": 18.76837416481069, + "loss": 0.546029269695282, + "loss_ce": 6.977089651627466e-05, + "loss_iou": 0.201171875, + "loss_num": 0.0286865234375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 472197860, + "step": 8427 + }, + { + "epoch": 18.770601336302896, + "grad_norm": 23.926664352416992, + "learning_rate": 1e-06, + "loss": 0.4401, + "num_input_tokens_seen": 472254716, + "step": 8428 + }, + { + "epoch": 18.770601336302896, + "loss": 0.444293737411499, + "loss_ce": 7.984477269928902e-05, + "loss_iou": 0.1953125, + "loss_num": 0.01055908203125, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 472254716, + "step": 8428 + }, + { + "epoch": 18.7728285077951, + "grad_norm": 20.06789779663086, + "learning_rate": 1e-06, + "loss": 0.6177, + "num_input_tokens_seen": 472312756, + "step": 8429 + }, + { + "epoch": 18.7728285077951, + "loss": 0.5740514993667603, + "loss_ce": 7.688780897296965e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.0255126953125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 472312756, + "step": 8429 + }, + { + "epoch": 18.775055679287306, + "grad_norm": 32.13956069946289, + "learning_rate": 1e-06, + "loss": 0.3333, + "num_input_tokens_seen": 472367428, + "step": 8430 + }, + { + "epoch": 18.775055679287306, + "loss": 0.2963276207447052, + "loss_ce": 6.297710933722556e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.00482177734375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 472367428, + "step": 8430 + }, + { + "epoch": 18.77728285077951, + "grad_norm": 16.847000122070312, + "learning_rate": 1e-06, + "loss": 0.329, + "num_input_tokens_seen": 472424024, + "step": 8431 + }, + { + "epoch": 18.77728285077951, + "loss": 0.3400919437408447, + "loss_ce": 6.50831643724814e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.01348876953125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 472424024, + "step": 8431 + }, + { + "epoch": 18.779510022271715, + "grad_norm": 27.27733039855957, + "learning_rate": 1e-06, + "loss": 0.3656, + "num_input_tokens_seen": 472478816, + "step": 8432 + }, + { + "epoch": 18.779510022271715, + "loss": 0.36883819103240967, + "loss_ce": 6.378746184054762e-05, + "loss_iou": 0.162109375, + "loss_num": 0.0087890625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 472478816, + "step": 8432 + }, + { + "epoch": 18.78173719376392, + "grad_norm": 42.84598922729492, + "learning_rate": 1e-06, + "loss": 0.2843, + "num_input_tokens_seen": 472534048, + "step": 8433 + }, + { + "epoch": 18.78173719376392, + "loss": 0.2845653295516968, + "loss_ce": 8.045761933317408e-05, + "loss_iou": 0.12158203125, + "loss_num": 0.00830078125, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 472534048, + "step": 8433 + }, + { + "epoch": 18.783964365256125, + "grad_norm": 13.706661224365234, + "learning_rate": 1e-06, + "loss": 0.4059, + "num_input_tokens_seen": 472588848, + "step": 8434 + }, + { + "epoch": 18.783964365256125, + "loss": 0.5129237771034241, + "loss_ce": 0.00010639386164257303, + "loss_iou": 0.2021484375, + "loss_num": 0.0218505859375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 472588848, + "step": 8434 + }, + { + "epoch": 18.78619153674833, + "grad_norm": 17.942852020263672, + "learning_rate": 1e-06, + "loss": 0.3433, + "num_input_tokens_seen": 472646472, + "step": 8435 + }, + { + "epoch": 18.78619153674833, + "loss": 0.323493629693985, + "loss_ce": 6.834171654190868e-05, + "loss_iou": 0.142578125, + "loss_num": 0.007415771484375, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 472646472, + "step": 8435 + }, + { + "epoch": 18.788418708240535, + "grad_norm": 19.692903518676758, + "learning_rate": 1e-06, + "loss": 0.5043, + "num_input_tokens_seen": 472705540, + "step": 8436 + }, + { + "epoch": 18.788418708240535, + "loss": 0.5412243604660034, + "loss_ce": 8.667838119436055e-05, + "loss_iou": 0.2265625, + "loss_num": 0.0174560546875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 472705540, + "step": 8436 + }, + { + "epoch": 18.79064587973274, + "grad_norm": 16.23405647277832, + "learning_rate": 1e-06, + "loss": 0.4239, + "num_input_tokens_seen": 472761744, + "step": 8437 + }, + { + "epoch": 18.79064587973274, + "loss": 0.44612982869148254, + "loss_ce": 8.490896289004013e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.0101318359375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 472761744, + "step": 8437 + }, + { + "epoch": 18.792873051224944, + "grad_norm": 23.637725830078125, + "learning_rate": 1e-06, + "loss": 0.3022, + "num_input_tokens_seen": 472817648, + "step": 8438 + }, + { + "epoch": 18.792873051224944, + "loss": 0.30181801319122314, + "loss_ce": 6.019488864694722e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.00848388671875, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 472817648, + "step": 8438 + }, + { + "epoch": 18.79510022271715, + "grad_norm": 16.10801887512207, + "learning_rate": 1e-06, + "loss": 0.5896, + "num_input_tokens_seen": 472873080, + "step": 8439 + }, + { + "epoch": 18.79510022271715, + "loss": 0.7904741168022156, + "loss_ce": 6.880288128741086e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0250244140625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 472873080, + "step": 8439 + }, + { + "epoch": 18.797327394209354, + "grad_norm": 29.073570251464844, + "learning_rate": 1e-06, + "loss": 0.2354, + "num_input_tokens_seen": 472930596, + "step": 8440 + }, + { + "epoch": 18.797327394209354, + "loss": 0.18952415883541107, + "loss_ce": 7.103992538759485e-05, + "loss_iou": 0.07666015625, + "loss_num": 0.00726318359375, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 472930596, + "step": 8440 + }, + { + "epoch": 18.79955456570156, + "grad_norm": 19.053442001342773, + "learning_rate": 1e-06, + "loss": 0.5383, + "num_input_tokens_seen": 472984744, + "step": 8441 + }, + { + "epoch": 18.79955456570156, + "loss": 0.39138782024383545, + "loss_ce": 9.141798363998532e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.01470947265625, + "loss_xval": 0.390625, + "num_input_tokens_seen": 472984744, + "step": 8441 + }, + { + "epoch": 18.801781737193764, + "grad_norm": 16.559202194213867, + "learning_rate": 1e-06, + "loss": 0.4231, + "num_input_tokens_seen": 473041436, + "step": 8442 + }, + { + "epoch": 18.801781737193764, + "loss": 0.47130343317985535, + "loss_ce": 8.148739289026707e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.0291748046875, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 473041436, + "step": 8442 + }, + { + "epoch": 18.80400890868597, + "grad_norm": 25.658018112182617, + "learning_rate": 1e-06, + "loss": 0.3696, + "num_input_tokens_seen": 473093136, + "step": 8443 + }, + { + "epoch": 18.80400890868597, + "loss": 0.31416118144989014, + "loss_ce": 7.427769742207602e-05, + "loss_iou": 0.14453125, + "loss_num": 0.005096435546875, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 473093136, + "step": 8443 + }, + { + "epoch": 18.806236080178174, + "grad_norm": 21.92131996154785, + "learning_rate": 1e-06, + "loss": 0.3055, + "num_input_tokens_seen": 473149008, + "step": 8444 + }, + { + "epoch": 18.806236080178174, + "loss": 0.33100777864456177, + "loss_ce": 7.515733886975795e-05, + "loss_iou": 0.150390625, + "loss_num": 0.006195068359375, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 473149008, + "step": 8444 + }, + { + "epoch": 18.80846325167038, + "grad_norm": 21.062211990356445, + "learning_rate": 1e-06, + "loss": 0.3811, + "num_input_tokens_seen": 473203320, + "step": 8445 + }, + { + "epoch": 18.80846325167038, + "loss": 0.36316657066345215, + "loss_ce": 6.842101720394567e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.005859375, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 473203320, + "step": 8445 + }, + { + "epoch": 18.810690423162583, + "grad_norm": 24.50238800048828, + "learning_rate": 1e-06, + "loss": 0.3116, + "num_input_tokens_seen": 473259040, + "step": 8446 + }, + { + "epoch": 18.810690423162583, + "loss": 0.31474488973617554, + "loss_ce": 7.813816046109423e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.00726318359375, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 473259040, + "step": 8446 + }, + { + "epoch": 18.812917594654788, + "grad_norm": 18.850893020629883, + "learning_rate": 1e-06, + "loss": 0.5647, + "num_input_tokens_seen": 473312764, + "step": 8447 + }, + { + "epoch": 18.812917594654788, + "loss": 0.6031259298324585, + "loss_ce": 9.859535930445418e-05, + "loss_iou": 0.236328125, + "loss_num": 0.025634765625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 473312764, + "step": 8447 + }, + { + "epoch": 18.815144766146993, + "grad_norm": 122.17961883544922, + "learning_rate": 1e-06, + "loss": 0.3084, + "num_input_tokens_seen": 473371280, + "step": 8448 + }, + { + "epoch": 18.815144766146993, + "loss": 0.32418200373649597, + "loss_ce": 8.531348430551589e-05, + "loss_iou": 0.14453125, + "loss_num": 0.006805419921875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 473371280, + "step": 8448 + }, + { + "epoch": 18.817371937639198, + "grad_norm": 26.249561309814453, + "learning_rate": 1e-06, + "loss": 0.336, + "num_input_tokens_seen": 473426996, + "step": 8449 + }, + { + "epoch": 18.817371937639198, + "loss": 0.44048649072647095, + "loss_ce": 5.6789031077641994e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.01025390625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 473426996, + "step": 8449 + }, + { + "epoch": 18.819599109131403, + "grad_norm": 13.715222358703613, + "learning_rate": 1e-06, + "loss": 0.2667, + "num_input_tokens_seen": 473482228, + "step": 8450 + }, + { + "epoch": 18.819599109131403, + "loss": 0.30467987060546875, + "loss_ce": 0.00011442266986705363, + "loss_iou": 0.1396484375, + "loss_num": 0.005218505859375, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 473482228, + "step": 8450 + }, + { + "epoch": 18.821826280623608, + "grad_norm": 37.36369705200195, + "learning_rate": 1e-06, + "loss": 0.4166, + "num_input_tokens_seen": 473535108, + "step": 8451 + }, + { + "epoch": 18.821826280623608, + "loss": 0.44977593421936035, + "loss_ce": 6.89039152348414e-05, + "loss_iou": 0.19140625, + "loss_num": 0.013427734375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 473535108, + "step": 8451 + }, + { + "epoch": 18.824053452115812, + "grad_norm": 20.77572250366211, + "learning_rate": 1e-06, + "loss": 0.2944, + "num_input_tokens_seen": 473592996, + "step": 8452 + }, + { + "epoch": 18.824053452115812, + "loss": 0.3387192189693451, + "loss_ce": 9.616896568331867e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.010986328125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 473592996, + "step": 8452 + }, + { + "epoch": 18.826280623608017, + "grad_norm": 17.34552001953125, + "learning_rate": 1e-06, + "loss": 0.4778, + "num_input_tokens_seen": 473649796, + "step": 8453 + }, + { + "epoch": 18.826280623608017, + "loss": 0.4614381790161133, + "loss_ce": 7.342195021919906e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.016357421875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 473649796, + "step": 8453 + }, + { + "epoch": 18.828507795100222, + "grad_norm": 12.931282997131348, + "learning_rate": 1e-06, + "loss": 0.3362, + "num_input_tokens_seen": 473706892, + "step": 8454 + }, + { + "epoch": 18.828507795100222, + "loss": 0.2970700263977051, + "loss_ce": 7.297511911019683e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.0081787109375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 473706892, + "step": 8454 + }, + { + "epoch": 18.830734966592427, + "grad_norm": 40.271270751953125, + "learning_rate": 1e-06, + "loss": 0.5246, + "num_input_tokens_seen": 473763124, + "step": 8455 + }, + { + "epoch": 18.830734966592427, + "loss": 0.5076436996459961, + "loss_ce": 7.534350879723206e-05, + "loss_iou": 0.21875, + "loss_num": 0.01416015625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 473763124, + "step": 8455 + }, + { + "epoch": 18.832962138084632, + "grad_norm": 49.25373840332031, + "learning_rate": 1e-06, + "loss": 0.4866, + "num_input_tokens_seen": 473818376, + "step": 8456 + }, + { + "epoch": 18.832962138084632, + "loss": 0.4683504104614258, + "loss_ce": 8.872270700521767e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.00823974609375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 473818376, + "step": 8456 + }, + { + "epoch": 18.835189309576837, + "grad_norm": 16.492551803588867, + "learning_rate": 1e-06, + "loss": 0.5093, + "num_input_tokens_seen": 473873400, + "step": 8457 + }, + { + "epoch": 18.835189309576837, + "loss": 0.5018711686134338, + "loss_ce": 8.585450996179134e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.00799560546875, + "loss_xval": 0.5, + "num_input_tokens_seen": 473873400, + "step": 8457 + }, + { + "epoch": 18.83741648106904, + "grad_norm": 18.73830223083496, + "learning_rate": 1e-06, + "loss": 0.4191, + "num_input_tokens_seen": 473928576, + "step": 8458 + }, + { + "epoch": 18.83741648106904, + "loss": 0.3809613585472107, + "loss_ce": 0.00010200223186984658, + "loss_iou": 0.1611328125, + "loss_num": 0.01153564453125, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 473928576, + "step": 8458 + }, + { + "epoch": 18.839643652561247, + "grad_norm": 21.033058166503906, + "learning_rate": 1e-06, + "loss": 0.3665, + "num_input_tokens_seen": 473982784, + "step": 8459 + }, + { + "epoch": 18.839643652561247, + "loss": 0.28056997060775757, + "loss_ce": 5.241744656814262e-05, + "loss_iou": 0.123046875, + "loss_num": 0.00689697265625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 473982784, + "step": 8459 + }, + { + "epoch": 18.84187082405345, + "grad_norm": 18.85097312927246, + "learning_rate": 1e-06, + "loss": 0.4517, + "num_input_tokens_seen": 474036980, + "step": 8460 + }, + { + "epoch": 18.84187082405345, + "loss": 0.4336727559566498, + "loss_ce": 7.89997138781473e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.0162353515625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 474036980, + "step": 8460 + }, + { + "epoch": 18.844097995545656, + "grad_norm": 22.325136184692383, + "learning_rate": 1e-06, + "loss": 0.3886, + "num_input_tokens_seen": 474091556, + "step": 8461 + }, + { + "epoch": 18.844097995545656, + "loss": 0.43452924489974976, + "loss_ce": 0.00014204179751686752, + "loss_iou": 0.1875, + "loss_num": 0.01190185546875, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 474091556, + "step": 8461 + }, + { + "epoch": 18.84632516703786, + "grad_norm": 14.574409484863281, + "learning_rate": 1e-06, + "loss": 0.3643, + "num_input_tokens_seen": 474147028, + "step": 8462 + }, + { + "epoch": 18.84632516703786, + "loss": 0.22980637848377228, + "loss_ce": 7.004974031588063e-05, + "loss_iou": 0.10302734375, + "loss_num": 0.004608154296875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 474147028, + "step": 8462 + }, + { + "epoch": 18.848552338530066, + "grad_norm": 31.346830368041992, + "learning_rate": 1e-06, + "loss": 0.4338, + "num_input_tokens_seen": 474203072, + "step": 8463 + }, + { + "epoch": 18.848552338530066, + "loss": 0.4625985324382782, + "loss_ce": 7.410830585286021e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.00860595703125, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 474203072, + "step": 8463 + }, + { + "epoch": 18.85077951002227, + "grad_norm": 15.782477378845215, + "learning_rate": 1e-06, + "loss": 0.3963, + "num_input_tokens_seen": 474259464, + "step": 8464 + }, + { + "epoch": 18.85077951002227, + "loss": 0.4502849578857422, + "loss_ce": 8.967139001470059e-05, + "loss_iou": 0.1875, + "loss_num": 0.01519775390625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 474259464, + "step": 8464 + }, + { + "epoch": 18.853006681514476, + "grad_norm": 19.337142944335938, + "learning_rate": 1e-06, + "loss": 0.3895, + "num_input_tokens_seen": 474316776, + "step": 8465 + }, + { + "epoch": 18.853006681514476, + "loss": 0.47631609439849854, + "loss_ce": 0.00011980824638158083, + "loss_iou": 0.1923828125, + "loss_num": 0.01806640625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 474316776, + "step": 8465 + }, + { + "epoch": 18.85523385300668, + "grad_norm": 16.090877532958984, + "learning_rate": 1e-06, + "loss": 0.4098, + "num_input_tokens_seen": 474372780, + "step": 8466 + }, + { + "epoch": 18.85523385300668, + "loss": 0.3744850754737854, + "loss_ce": 9.543487976770848e-05, + "loss_iou": 0.169921875, + "loss_num": 0.006683349609375, + "loss_xval": 0.375, + "num_input_tokens_seen": 474372780, + "step": 8466 + }, + { + "epoch": 18.857461024498885, + "grad_norm": 19.818513870239258, + "learning_rate": 1e-06, + "loss": 0.4536, + "num_input_tokens_seen": 474429796, + "step": 8467 + }, + { + "epoch": 18.857461024498885, + "loss": 0.6125023365020752, + "loss_ce": 7.554069452453405e-05, + "loss_iou": 0.27734375, + "loss_num": 0.01123046875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 474429796, + "step": 8467 + }, + { + "epoch": 18.85968819599109, + "grad_norm": 18.213647842407227, + "learning_rate": 1e-06, + "loss": 0.3334, + "num_input_tokens_seen": 474489184, + "step": 8468 + }, + { + "epoch": 18.85968819599109, + "loss": 0.26028338074684143, + "loss_ce": 7.525723776780069e-05, + "loss_iou": 0.12109375, + "loss_num": 0.003570556640625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 474489184, + "step": 8468 + }, + { + "epoch": 18.861915367483295, + "grad_norm": 23.33075523376465, + "learning_rate": 1e-06, + "loss": 0.3492, + "num_input_tokens_seen": 474546056, + "step": 8469 + }, + { + "epoch": 18.861915367483295, + "loss": 0.4007795751094818, + "loss_ce": 8.379328937735409e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.01171875, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 474546056, + "step": 8469 + }, + { + "epoch": 18.8641425389755, + "grad_norm": 30.516542434692383, + "learning_rate": 1e-06, + "loss": 0.3336, + "num_input_tokens_seen": 474601092, + "step": 8470 + }, + { + "epoch": 18.8641425389755, + "loss": 0.2731455862522125, + "loss_ce": 7.429896504618227e-05, + "loss_iou": 0.126953125, + "loss_num": 0.003814697265625, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 474601092, + "step": 8470 + }, + { + "epoch": 18.866369710467705, + "grad_norm": 17.464170455932617, + "learning_rate": 1e-06, + "loss": 0.4543, + "num_input_tokens_seen": 474657324, + "step": 8471 + }, + { + "epoch": 18.866369710467705, + "loss": 0.48732370138168335, + "loss_ce": 8.003832772374153e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.0125732421875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 474657324, + "step": 8471 + }, + { + "epoch": 18.86859688195991, + "grad_norm": 19.651233673095703, + "learning_rate": 1e-06, + "loss": 0.3527, + "num_input_tokens_seen": 474713160, + "step": 8472 + }, + { + "epoch": 18.86859688195991, + "loss": 0.3799467086791992, + "loss_ce": 6.391612987499684e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00830078125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 474713160, + "step": 8472 + }, + { + "epoch": 18.870824053452115, + "grad_norm": 41.639102935791016, + "learning_rate": 1e-06, + "loss": 0.6776, + "num_input_tokens_seen": 474771492, + "step": 8473 + }, + { + "epoch": 18.870824053452115, + "loss": 0.556826651096344, + "loss_ce": 6.396578100975603e-05, + "loss_iou": 0.259765625, + "loss_num": 0.007720947265625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 474771492, + "step": 8473 + }, + { + "epoch": 18.87305122494432, + "grad_norm": 20.428213119506836, + "learning_rate": 1e-06, + "loss": 0.4747, + "num_input_tokens_seen": 474825884, + "step": 8474 + }, + { + "epoch": 18.87305122494432, + "loss": 0.5070443153381348, + "loss_ce": 8.636478742118925e-05, + "loss_iou": 0.23828125, + "loss_num": 0.006256103515625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 474825884, + "step": 8474 + }, + { + "epoch": 18.875278396436524, + "grad_norm": 16.10317039489746, + "learning_rate": 1e-06, + "loss": 0.2764, + "num_input_tokens_seen": 474880660, + "step": 8475 + }, + { + "epoch": 18.875278396436524, + "loss": 0.1743912398815155, + "loss_ce": 7.483765512006357e-05, + "loss_iou": 0.0791015625, + "loss_num": 0.0032806396484375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 474880660, + "step": 8475 + }, + { + "epoch": 18.87750556792873, + "grad_norm": 21.885726928710938, + "learning_rate": 1e-06, + "loss": 0.5585, + "num_input_tokens_seen": 474936820, + "step": 8476 + }, + { + "epoch": 18.87750556792873, + "loss": 0.6800100803375244, + "loss_ce": 7.844208448659629e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 474936820, + "step": 8476 + }, + { + "epoch": 18.879732739420934, + "grad_norm": 26.458858489990234, + "learning_rate": 1e-06, + "loss": 0.487, + "num_input_tokens_seen": 474995968, + "step": 8477 + }, + { + "epoch": 18.879732739420934, + "loss": 0.4402797818183899, + "loss_ce": 9.42522456170991e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.01336669921875, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 474995968, + "step": 8477 + }, + { + "epoch": 18.88195991091314, + "grad_norm": 21.456289291381836, + "learning_rate": 1e-06, + "loss": 0.4094, + "num_input_tokens_seen": 475052128, + "step": 8478 + }, + { + "epoch": 18.88195991091314, + "loss": 0.3907051384449005, + "loss_ce": 8.013312617549673e-05, + "loss_iou": 0.173828125, + "loss_num": 0.0086669921875, + "loss_xval": 0.390625, + "num_input_tokens_seen": 475052128, + "step": 8478 + }, + { + "epoch": 18.884187082405344, + "grad_norm": 19.8070068359375, + "learning_rate": 1e-06, + "loss": 0.5119, + "num_input_tokens_seen": 475109916, + "step": 8479 + }, + { + "epoch": 18.884187082405344, + "loss": 0.33552417159080505, + "loss_ce": 7.495496538467705e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.007537841796875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 475109916, + "step": 8479 + }, + { + "epoch": 18.88641425389755, + "grad_norm": 18.59421157836914, + "learning_rate": 1e-06, + "loss": 0.3461, + "num_input_tokens_seen": 475165660, + "step": 8480 + }, + { + "epoch": 18.88641425389755, + "loss": 0.34626448154449463, + "loss_ce": 7.30836036382243e-05, + "loss_iou": 0.162109375, + "loss_num": 0.00445556640625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 475165660, + "step": 8480 + }, + { + "epoch": 18.888641425389753, + "grad_norm": 35.3235969543457, + "learning_rate": 1e-06, + "loss": 0.3462, + "num_input_tokens_seen": 475223024, + "step": 8481 + }, + { + "epoch": 18.888641425389753, + "loss": 0.3160410225391388, + "loss_ce": 6.200766074471176e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.00726318359375, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 475223024, + "step": 8481 + }, + { + "epoch": 18.89086859688196, + "grad_norm": 17.297746658325195, + "learning_rate": 1e-06, + "loss": 0.3611, + "num_input_tokens_seen": 475278500, + "step": 8482 + }, + { + "epoch": 18.89086859688196, + "loss": 0.2839501202106476, + "loss_ce": 7.561447273474187e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00262451171875, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 475278500, + "step": 8482 + }, + { + "epoch": 18.893095768374163, + "grad_norm": 26.896717071533203, + "learning_rate": 1e-06, + "loss": 0.5017, + "num_input_tokens_seen": 475332444, + "step": 8483 + }, + { + "epoch": 18.893095768374163, + "loss": 0.4822501540184021, + "loss_ce": 7.242616266012192e-05, + "loss_iou": 0.1796875, + "loss_num": 0.0244140625, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 475332444, + "step": 8483 + }, + { + "epoch": 18.895322939866368, + "grad_norm": 27.973371505737305, + "learning_rate": 1e-06, + "loss": 0.3581, + "num_input_tokens_seen": 475389244, + "step": 8484 + }, + { + "epoch": 18.895322939866368, + "loss": 0.31134331226348877, + "loss_ce": 6.400147685781121e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00823974609375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 475389244, + "step": 8484 + }, + { + "epoch": 18.897550111358576, + "grad_norm": 16.82830810546875, + "learning_rate": 1e-06, + "loss": 0.3729, + "num_input_tokens_seen": 475446276, + "step": 8485 + }, + { + "epoch": 18.897550111358576, + "loss": 0.3148788809776306, + "loss_ce": 5.956278619123623e-05, + "loss_iou": 0.142578125, + "loss_num": 0.006072998046875, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 475446276, + "step": 8485 + }, + { + "epoch": 18.899777282850778, + "grad_norm": 16.516233444213867, + "learning_rate": 1e-06, + "loss": 0.2496, + "num_input_tokens_seen": 475501428, + "step": 8486 + }, + { + "epoch": 18.899777282850778, + "loss": 0.33766478300094604, + "loss_ce": 7.93259241618216e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.00396728515625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 475501428, + "step": 8486 + }, + { + "epoch": 18.902004454342986, + "grad_norm": 19.306852340698242, + "learning_rate": 1e-06, + "loss": 0.3633, + "num_input_tokens_seen": 475557696, + "step": 8487 + }, + { + "epoch": 18.902004454342986, + "loss": 0.2621913552284241, + "loss_ce": 7.588176958961412e-05, + "loss_iou": 0.11474609375, + "loss_num": 0.006439208984375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 475557696, + "step": 8487 + }, + { + "epoch": 18.90423162583519, + "grad_norm": 17.466773986816406, + "learning_rate": 1e-06, + "loss": 0.346, + "num_input_tokens_seen": 475614528, + "step": 8488 + }, + { + "epoch": 18.90423162583519, + "loss": 0.5012964010238647, + "loss_ce": 7.571464811917394e-05, + "loss_iou": 0.2109375, + "loss_num": 0.0159912109375, + "loss_xval": 0.5, + "num_input_tokens_seen": 475614528, + "step": 8488 + }, + { + "epoch": 18.906458797327396, + "grad_norm": 16.38589096069336, + "learning_rate": 1e-06, + "loss": 0.4567, + "num_input_tokens_seen": 475671076, + "step": 8489 + }, + { + "epoch": 18.906458797327396, + "loss": 0.43280452489852905, + "loss_ce": 6.528291123686358e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.007110595703125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 475671076, + "step": 8489 + }, + { + "epoch": 18.9086859688196, + "grad_norm": 22.471385955810547, + "learning_rate": 1e-06, + "loss": 0.6548, + "num_input_tokens_seen": 475723864, + "step": 8490 + }, + { + "epoch": 18.9086859688196, + "loss": 0.7026335000991821, + "loss_ce": 0.0003630055289249867, + "loss_iou": 0.328125, + "loss_num": 0.00872802734375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 475723864, + "step": 8490 + }, + { + "epoch": 18.910913140311806, + "grad_norm": 16.447473526000977, + "learning_rate": 1e-06, + "loss": 0.3351, + "num_input_tokens_seen": 475779860, + "step": 8491 + }, + { + "epoch": 18.910913140311806, + "loss": 0.29600396752357483, + "loss_ce": 7.501787331420928e-05, + "loss_iou": 0.12890625, + "loss_num": 0.007537841796875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 475779860, + "step": 8491 + }, + { + "epoch": 18.91314031180401, + "grad_norm": 16.438947677612305, + "learning_rate": 1e-06, + "loss": 0.4427, + "num_input_tokens_seen": 475835912, + "step": 8492 + }, + { + "epoch": 18.91314031180401, + "loss": 0.4535049796104431, + "loss_ce": 7.47992453398183e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01055908203125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 475835912, + "step": 8492 + }, + { + "epoch": 18.915367483296215, + "grad_norm": 16.084348678588867, + "learning_rate": 1e-06, + "loss": 0.7689, + "num_input_tokens_seen": 475892356, + "step": 8493 + }, + { + "epoch": 18.915367483296215, + "loss": 0.9672378301620483, + "loss_ce": 7.476539758499712e-05, + "loss_iou": 0.359375, + "loss_num": 0.04931640625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 475892356, + "step": 8493 + }, + { + "epoch": 18.91759465478842, + "grad_norm": 21.32638168334961, + "learning_rate": 1e-06, + "loss": 0.4226, + "num_input_tokens_seen": 475948980, + "step": 8494 + }, + { + "epoch": 18.91759465478842, + "loss": 0.461247056722641, + "loss_ce": 0.0001875017478596419, + "loss_iou": 0.2080078125, + "loss_num": 0.00921630859375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 475948980, + "step": 8494 + }, + { + "epoch": 18.919821826280625, + "grad_norm": 36.91980743408203, + "learning_rate": 1e-06, + "loss": 0.4261, + "num_input_tokens_seen": 476005300, + "step": 8495 + }, + { + "epoch": 18.919821826280625, + "loss": 0.39789801836013794, + "loss_ce": 7.085979450494051e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.006683349609375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 476005300, + "step": 8495 + }, + { + "epoch": 18.92204899777283, + "grad_norm": 28.42441749572754, + "learning_rate": 1e-06, + "loss": 0.3562, + "num_input_tokens_seen": 476059068, + "step": 8496 + }, + { + "epoch": 18.92204899777283, + "loss": 0.35445064306259155, + "loss_ce": 8.05156523711048e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.00555419921875, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 476059068, + "step": 8496 + }, + { + "epoch": 18.924276169265035, + "grad_norm": 15.65682601928711, + "learning_rate": 1e-06, + "loss": 0.3527, + "num_input_tokens_seen": 476116536, + "step": 8497 + }, + { + "epoch": 18.924276169265035, + "loss": 0.347369909286499, + "loss_ce": 7.986459240783006e-05, + "loss_iou": 0.15234375, + "loss_num": 0.00848388671875, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 476116536, + "step": 8497 + }, + { + "epoch": 18.92650334075724, + "grad_norm": 14.354292869567871, + "learning_rate": 1e-06, + "loss": 0.5194, + "num_input_tokens_seen": 476172064, + "step": 8498 + }, + { + "epoch": 18.92650334075724, + "loss": 0.49591702222824097, + "loss_ce": 6.739451055182144e-05, + "loss_iou": 0.228515625, + "loss_num": 0.007598876953125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 476172064, + "step": 8498 + }, + { + "epoch": 18.928730512249444, + "grad_norm": 26.137008666992188, + "learning_rate": 1e-06, + "loss": 0.4098, + "num_input_tokens_seen": 476226344, + "step": 8499 + }, + { + "epoch": 18.928730512249444, + "loss": 0.36395373940467834, + "loss_ce": 6.214019958861172e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.00921630859375, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 476226344, + "step": 8499 + }, + { + "epoch": 18.93095768374165, + "grad_norm": 15.024909973144531, + "learning_rate": 1e-06, + "loss": 0.2597, + "num_input_tokens_seen": 476279788, + "step": 8500 + }, + { + "epoch": 18.93095768374165, + "eval_seeclick_web_CIoU": 0.5848062932491302, + "eval_seeclick_web_GIoU": 0.583672821521759, + "eval_seeclick_web_IoU": 0.6038743853569031, + "eval_seeclick_web_MAE_all": 0.015243555419147015, + "eval_seeclick_web_MAE_h": 0.007093099411576986, + "eval_seeclick_web_MAE_w": 0.015268665738403797, + "eval_seeclick_web_MAE_x_boxes": 0.00826921034604311, + "eval_seeclick_web_MAE_y_boxes": 0.02124078758060932, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 0.9087226986885071, + "eval_seeclick_web_loss_ce": 0.00012506780331023037, + "eval_seeclick_web_loss_iou": 0.42181396484375, + "eval_seeclick_web_loss_num": 0.012174606323242188, + "eval_seeclick_web_loss_xval": 0.9036865234375, + "eval_seeclick_web_runtime": 22.9412, + "eval_seeclick_web_samples_per_second": 2.179, + "eval_seeclick_web_steps_per_second": 0.087, + "num_input_tokens_seen": 476279788, + "step": 8500 + }, + { + "epoch": 18.93095768374165, + "eval_icons_CIoU": 0.2560933604836464, + "eval_icons_GIoU": 0.290867879986763, + "eval_icons_IoU": 0.3409384936094284, + "eval_icons_MAE_all": 0.06184186786413193, + "eval_icons_MAE_h": 0.029924143571406603, + "eval_icons_MAE_w": 0.06795705109834671, + "eval_icons_MAE_x_boxes": 0.06149038299918175, + "eval_icons_MAE_y_boxes": 0.037362379021942616, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7201489210128784, + "eval_icons_loss_ce": 0.0001441572530893609, + "eval_icons_loss_iou": 0.67724609375, + "eval_icons_loss_num": 0.05980682373046875, + "eval_icons_loss_xval": 1.65380859375, + "eval_icons_runtime": 22.6756, + "eval_icons_samples_per_second": 2.205, + "eval_icons_steps_per_second": 0.088, + "num_input_tokens_seen": 476279788, + "step": 8500 + }, + { + "epoch": 18.93095768374165, + "eval_screenspot_CIoU": 0.3761156400044759, + "eval_screenspot_GIoU": 0.3939984142780304, + "eval_screenspot_IoU": 0.44694022337595624, + "eval_screenspot_MAE_all": 0.055543094873428345, + "eval_screenspot_MAE_h": 0.039369805405537285, + "eval_screenspot_MAE_w": 0.05557269603013992, + "eval_screenspot_MAE_x_boxes": 0.06785313226282597, + "eval_screenspot_MAE_y_boxes": 0.04004095122218132, + "eval_screenspot_inside_bbox": 0.7145833373069763, + "eval_screenspot_loss": 1.5496809482574463, + "eval_screenspot_loss_ce": 0.00017849090121065578, + "eval_screenspot_loss_iou": 0.644775390625, + "eval_screenspot_loss_num": 0.0635693868001302, + "eval_screenspot_loss_xval": 1.6062825520833333, + "eval_screenspot_runtime": 38.5423, + "eval_screenspot_samples_per_second": 2.309, + "eval_screenspot_steps_per_second": 0.078, + "num_input_tokens_seen": 476279788, + "step": 8500 + }, + { + "epoch": 18.93095768374165, + "eval_compot_CIoU": 0.3419719487428665, + "eval_compot_GIoU": 0.3519115000963211, + "eval_compot_IoU": 0.40101131796836853, + "eval_compot_MAE_all": 0.018997764214873314, + "eval_compot_MAE_h": 0.011835724115371704, + "eval_compot_MAE_w": 0.02075411193072796, + "eval_compot_MAE_x_boxes": 0.03001493401825428, + "eval_compot_MAE_y_boxes": 0.006946815177798271, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3982415199279785, + "eval_compot_loss_ce": 0.00011685657591442578, + "eval_compot_loss_iou": 0.646240234375, + "eval_compot_loss_num": 0.017612457275390625, + "eval_compot_loss_xval": 1.3798828125, + "eval_compot_runtime": 24.3361, + "eval_compot_samples_per_second": 2.055, + "eval_compot_steps_per_second": 0.082, + "num_input_tokens_seen": 476279788, + "step": 8500 + }, + { + "epoch": 18.93095768374165, + "eval_custom_ui_val_CIoU": 0.47164858794874615, + "eval_custom_ui_val_GIoU": 0.47631729145844776, + "eval_custom_ui_val_IoU": 0.5336444907718234, + "eval_custom_ui_val_MAE_all": 0.026907082750565477, + "eval_custom_ui_val_MAE_h": 0.013476735032680962, + "eval_custom_ui_val_MAE_w": 0.03637263929057452, + "eval_custom_ui_val_MAE_x_boxes": 0.03278244765371912, + "eval_custom_ui_val_MAE_y_boxes": 0.013371242494839761, + "eval_custom_ui_val_inside_bbox": 0.7685185207260979, + "eval_custom_ui_val_loss": 1.1731735467910767, + "eval_custom_ui_val_loss_ce": 0.0001309802391915582, + "eval_custom_ui_val_loss_iou": 0.5032552083333334, + "eval_custom_ui_val_loss_num": 0.023616578843858507, + "eval_custom_ui_val_loss_xval": 1.1244574652777777, + "eval_custom_ui_val_runtime": 72.5876, + "eval_custom_ui_val_samples_per_second": 3.651, + "eval_custom_ui_val_steps_per_second": 0.124, + "num_input_tokens_seen": 476279788, + "step": 8500 + }, + { + "epoch": 18.93095768374165, + "loss": 0.8257832527160645, + "loss_ce": 9.972714178729802e-05, + "loss_iou": 0.37109375, + "loss_num": 0.016357421875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 476279788, + "step": 8500 + }, + { + "epoch": 18.933184855233854, + "grad_norm": 16.40049934387207, + "learning_rate": 1e-06, + "loss": 0.3533, + "num_input_tokens_seen": 476335896, + "step": 8501 + }, + { + "epoch": 18.933184855233854, + "loss": 0.35964030027389526, + "loss_ce": 0.00011269970855209976, + "loss_iou": 0.1611328125, + "loss_num": 0.007568359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 476335896, + "step": 8501 + }, + { + "epoch": 18.93541202672606, + "grad_norm": 15.26424503326416, + "learning_rate": 1e-06, + "loss": 0.2743, + "num_input_tokens_seen": 476390700, + "step": 8502 + }, + { + "epoch": 18.93541202672606, + "loss": 0.33460482954978943, + "loss_ce": 7.113382162060589e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00823974609375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 476390700, + "step": 8502 + }, + { + "epoch": 18.937639198218264, + "grad_norm": 19.776226043701172, + "learning_rate": 1e-06, + "loss": 0.2961, + "num_input_tokens_seen": 476444976, + "step": 8503 + }, + { + "epoch": 18.937639198218264, + "loss": 0.34564700722694397, + "loss_ce": 6.596426828764379e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0084228515625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 476444976, + "step": 8503 + }, + { + "epoch": 18.93986636971047, + "grad_norm": 13.866682052612305, + "learning_rate": 1e-06, + "loss": 0.3222, + "num_input_tokens_seen": 476500844, + "step": 8504 + }, + { + "epoch": 18.93986636971047, + "loss": 0.31439658999443054, + "loss_ce": 6.556001608259976e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.01312255859375, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 476500844, + "step": 8504 + }, + { + "epoch": 18.942093541202674, + "grad_norm": 19.28264045715332, + "learning_rate": 1e-06, + "loss": 0.3066, + "num_input_tokens_seen": 476556968, + "step": 8505 + }, + { + "epoch": 18.942093541202674, + "loss": 0.3727559447288513, + "loss_ce": 7.529569120379165e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00823974609375, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 476556968, + "step": 8505 + }, + { + "epoch": 18.94432071269488, + "grad_norm": 20.56502342224121, + "learning_rate": 1e-06, + "loss": 0.3223, + "num_input_tokens_seen": 476614640, + "step": 8506 + }, + { + "epoch": 18.94432071269488, + "loss": 0.41475239396095276, + "loss_ce": 7.952825399115682e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.01141357421875, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 476614640, + "step": 8506 + }, + { + "epoch": 18.946547884187083, + "grad_norm": 34.558292388916016, + "learning_rate": 1e-06, + "loss": 0.5155, + "num_input_tokens_seen": 476670132, + "step": 8507 + }, + { + "epoch": 18.946547884187083, + "loss": 0.4488101005554199, + "loss_ce": 7.96294043539092e-05, + "loss_iou": 0.197265625, + "loss_num": 0.0107421875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 476670132, + "step": 8507 + }, + { + "epoch": 18.948775055679288, + "grad_norm": 18.674837112426758, + "learning_rate": 1e-06, + "loss": 0.4771, + "num_input_tokens_seen": 476726448, + "step": 8508 + }, + { + "epoch": 18.948775055679288, + "loss": 0.5774888396263123, + "loss_ce": 9.624622180126607e-05, + "loss_iou": 0.267578125, + "loss_num": 0.00823974609375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 476726448, + "step": 8508 + }, + { + "epoch": 18.951002227171493, + "grad_norm": 15.41340160369873, + "learning_rate": 1e-06, + "loss": 0.4142, + "num_input_tokens_seen": 476784916, + "step": 8509 + }, + { + "epoch": 18.951002227171493, + "loss": 0.4316607713699341, + "loss_ce": 8.12113648862578e-05, + "loss_iou": 0.1953125, + "loss_num": 0.00811767578125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 476784916, + "step": 8509 + }, + { + "epoch": 18.953229398663698, + "grad_norm": 14.191808700561523, + "learning_rate": 1e-06, + "loss": 0.422, + "num_input_tokens_seen": 476843120, + "step": 8510 + }, + { + "epoch": 18.953229398663698, + "loss": 0.4266318678855896, + "loss_ce": 0.00011820608051493764, + "loss_iou": 0.169921875, + "loss_num": 0.0174560546875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 476843120, + "step": 8510 + }, + { + "epoch": 18.955456570155903, + "grad_norm": 16.592500686645508, + "learning_rate": 1e-06, + "loss": 0.4383, + "num_input_tokens_seen": 476899824, + "step": 8511 + }, + { + "epoch": 18.955456570155903, + "loss": 0.40235602855682373, + "loss_ce": 7.331261440413073e-05, + "loss_iou": 0.166015625, + "loss_num": 0.01385498046875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 476899824, + "step": 8511 + }, + { + "epoch": 18.957683741648108, + "grad_norm": 12.043611526489258, + "learning_rate": 1e-06, + "loss": 0.3217, + "num_input_tokens_seen": 476956480, + "step": 8512 + }, + { + "epoch": 18.957683741648108, + "loss": 0.3974224627017975, + "loss_ce": 8.359744242625311e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.01275634765625, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 476956480, + "step": 8512 + }, + { + "epoch": 18.959910913140313, + "grad_norm": 20.1372013092041, + "learning_rate": 1e-06, + "loss": 0.3755, + "num_input_tokens_seen": 477011216, + "step": 8513 + }, + { + "epoch": 18.959910913140313, + "loss": 0.3341856598854065, + "loss_ce": 7.918624032754451e-05, + "loss_iou": 0.142578125, + "loss_num": 0.0096435546875, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 477011216, + "step": 8513 + }, + { + "epoch": 18.962138084632517, + "grad_norm": 23.11919593811035, + "learning_rate": 1e-06, + "loss": 0.3368, + "num_input_tokens_seen": 477068100, + "step": 8514 + }, + { + "epoch": 18.962138084632517, + "loss": 0.4352511167526245, + "loss_ce": 7.042582728900015e-05, + "loss_iou": 0.16015625, + "loss_num": 0.02294921875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 477068100, + "step": 8514 + }, + { + "epoch": 18.964365256124722, + "grad_norm": 15.498306274414062, + "learning_rate": 1e-06, + "loss": 0.4073, + "num_input_tokens_seen": 477124952, + "step": 8515 + }, + { + "epoch": 18.964365256124722, + "loss": 0.4004598557949066, + "loss_ce": 6.92239118507132e-05, + "loss_iou": 0.17578125, + "loss_num": 0.00982666015625, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 477124952, + "step": 8515 + }, + { + "epoch": 18.966592427616927, + "grad_norm": 16.32062530517578, + "learning_rate": 1e-06, + "loss": 0.3115, + "num_input_tokens_seen": 477178756, + "step": 8516 + }, + { + "epoch": 18.966592427616927, + "loss": 0.38423728942871094, + "loss_ce": 8.200977754313499e-05, + "loss_iou": 0.16796875, + "loss_num": 0.00958251953125, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 477178756, + "step": 8516 + }, + { + "epoch": 18.968819599109132, + "grad_norm": 16.013669967651367, + "learning_rate": 1e-06, + "loss": 0.4225, + "num_input_tokens_seen": 477234552, + "step": 8517 + }, + { + "epoch": 18.968819599109132, + "loss": 0.45508986711502075, + "loss_ce": 7.279004785232246e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.0133056640625, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 477234552, + "step": 8517 + }, + { + "epoch": 18.971046770601337, + "grad_norm": 15.03101921081543, + "learning_rate": 1e-06, + "loss": 0.5013, + "num_input_tokens_seen": 477291364, + "step": 8518 + }, + { + "epoch": 18.971046770601337, + "loss": 0.4453286826610565, + "loss_ce": 7.720896974205971e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.007476806640625, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 477291364, + "step": 8518 + }, + { + "epoch": 18.97327394209354, + "grad_norm": 16.71440887451172, + "learning_rate": 1e-06, + "loss": 0.3699, + "num_input_tokens_seen": 477346348, + "step": 8519 + }, + { + "epoch": 18.97327394209354, + "loss": 0.3506791889667511, + "loss_ce": 9.326588042313233e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00604248046875, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 477346348, + "step": 8519 + }, + { + "epoch": 18.975501113585747, + "grad_norm": 29.49169158935547, + "learning_rate": 1e-06, + "loss": 0.4555, + "num_input_tokens_seen": 477402184, + "step": 8520 + }, + { + "epoch": 18.975501113585747, + "loss": 0.31228724122047424, + "loss_ce": 6.190109706949443e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.01043701171875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 477402184, + "step": 8520 + }, + { + "epoch": 18.97772828507795, + "grad_norm": 17.541460037231445, + "learning_rate": 1e-06, + "loss": 0.4045, + "num_input_tokens_seen": 477458060, + "step": 8521 + }, + { + "epoch": 18.97772828507795, + "loss": 0.30591824650764465, + "loss_ce": 7.108383579179645e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.005279541015625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 477458060, + "step": 8521 + }, + { + "epoch": 18.979955456570156, + "grad_norm": 17.852941513061523, + "learning_rate": 1e-06, + "loss": 0.5856, + "num_input_tokens_seen": 477511612, + "step": 8522 + }, + { + "epoch": 18.979955456570156, + "loss": 0.4428853988647461, + "loss_ce": 7.535393524449319e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.0164794921875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 477511612, + "step": 8522 + }, + { + "epoch": 18.98218262806236, + "grad_norm": 18.045700073242188, + "learning_rate": 1e-06, + "loss": 0.3042, + "num_input_tokens_seen": 477566404, + "step": 8523 + }, + { + "epoch": 18.98218262806236, + "loss": 0.3434655964374542, + "loss_ce": 8.181348675861955e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.01300048828125, + "loss_xval": 0.34375, + "num_input_tokens_seen": 477566404, + "step": 8523 + }, + { + "epoch": 18.984409799554566, + "grad_norm": 21.54753875732422, + "learning_rate": 1e-06, + "loss": 0.4359, + "num_input_tokens_seen": 477623140, + "step": 8524 + }, + { + "epoch": 18.984409799554566, + "loss": 0.4005663990974426, + "loss_ce": 0.0004199253162369132, + "loss_iou": 0.1640625, + "loss_num": 0.014404296875, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 477623140, + "step": 8524 + }, + { + "epoch": 18.98663697104677, + "grad_norm": 14.066349983215332, + "learning_rate": 1e-06, + "loss": 0.3884, + "num_input_tokens_seen": 477678528, + "step": 8525 + }, + { + "epoch": 18.98663697104677, + "loss": 0.42566099762916565, + "loss_ce": 6.285572453634813e-05, + "loss_iou": 0.197265625, + "loss_num": 0.00616455078125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 477678528, + "step": 8525 + }, + { + "epoch": 18.988864142538976, + "grad_norm": 17.08536720275879, + "learning_rate": 1e-06, + "loss": 0.3323, + "num_input_tokens_seen": 477735340, + "step": 8526 + }, + { + "epoch": 18.988864142538976, + "loss": 0.3974015414714813, + "loss_ce": 0.00012369919568300247, + "loss_iou": 0.17578125, + "loss_num": 0.009033203125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 477735340, + "step": 8526 + }, + { + "epoch": 18.99109131403118, + "grad_norm": 17.082090377807617, + "learning_rate": 1e-06, + "loss": 0.3237, + "num_input_tokens_seen": 477792108, + "step": 8527 + }, + { + "epoch": 18.99109131403118, + "loss": 0.27058377861976624, + "loss_ce": 7.597178046125919e-05, + "loss_iou": 0.1103515625, + "loss_num": 0.010009765625, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 477792108, + "step": 8527 + }, + { + "epoch": 18.993318485523385, + "grad_norm": 13.515477180480957, + "learning_rate": 1e-06, + "loss": 0.35, + "num_input_tokens_seen": 477846252, + "step": 8528 + }, + { + "epoch": 18.993318485523385, + "loss": 0.24356822669506073, + "loss_ce": 6.846075120847672e-05, + "loss_iou": 0.10693359375, + "loss_num": 0.006011962890625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 477846252, + "step": 8528 + }, + { + "epoch": 18.99554565701559, + "grad_norm": 12.548789978027344, + "learning_rate": 1e-06, + "loss": 0.4884, + "num_input_tokens_seen": 477901432, + "step": 8529 + }, + { + "epoch": 18.99554565701559, + "loss": 0.44635307788848877, + "loss_ce": 6.401098653441295e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.01708984375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 477901432, + "step": 8529 + }, + { + "epoch": 18.997772828507795, + "grad_norm": 19.36771011352539, + "learning_rate": 1e-06, + "loss": 0.4848, + "num_input_tokens_seen": 477959164, + "step": 8530 + }, + { + "epoch": 18.997772828507795, + "loss": 0.4359922409057617, + "loss_ce": 0.00014018421643413603, + "loss_iou": 0.185546875, + "loss_num": 0.012939453125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 477959164, + "step": 8530 + }, + { + "epoch": 19.0, + "grad_norm": 24.555925369262695, + "learning_rate": 1e-06, + "loss": 0.4317, + "num_input_tokens_seen": 478014600, + "step": 8531 + }, + { + "epoch": 19.0, + "loss": 0.5009192228317261, + "loss_ce": 6.472200766438618e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.0108642578125, + "loss_xval": 0.5, + "num_input_tokens_seen": 478014600, + "step": 8531 + }, + { + "epoch": 19.002227171492205, + "grad_norm": 19.39182472229004, + "learning_rate": 1e-06, + "loss": 0.3046, + "num_input_tokens_seen": 478070168, + "step": 8532 + }, + { + "epoch": 19.002227171492205, + "loss": 0.2748461961746216, + "loss_ce": 6.590808334294707e-05, + "loss_iou": 0.12255859375, + "loss_num": 0.005859375, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 478070168, + "step": 8532 + }, + { + "epoch": 19.00445434298441, + "grad_norm": 15.633817672729492, + "learning_rate": 1e-06, + "loss": 0.2743, + "num_input_tokens_seen": 478124420, + "step": 8533 + }, + { + "epoch": 19.00445434298441, + "loss": 0.2822956442832947, + "loss_ce": 6.907560600666329e-05, + "loss_iou": 0.11865234375, + "loss_num": 0.00897216796875, + "loss_xval": 0.28125, + "num_input_tokens_seen": 478124420, + "step": 8533 + }, + { + "epoch": 19.006681514476615, + "grad_norm": 20.823501586914062, + "learning_rate": 1e-06, + "loss": 0.4119, + "num_input_tokens_seen": 478179792, + "step": 8534 + }, + { + "epoch": 19.006681514476615, + "loss": 0.4808357357978821, + "loss_ce": 6.183330697240308e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01611328125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 478179792, + "step": 8534 + }, + { + "epoch": 19.00890868596882, + "grad_norm": 23.914974212646484, + "learning_rate": 1e-06, + "loss": 0.433, + "num_input_tokens_seen": 478236068, + "step": 8535 + }, + { + "epoch": 19.00890868596882, + "loss": 0.5453649759292603, + "loss_ce": 7.688709592912346e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0125732421875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 478236068, + "step": 8535 + }, + { + "epoch": 19.011135857461024, + "grad_norm": 13.552783966064453, + "learning_rate": 1e-06, + "loss": 0.2469, + "num_input_tokens_seen": 478293348, + "step": 8536 + }, + { + "epoch": 19.011135857461024, + "loss": 0.2078023999929428, + "loss_ce": 6.924466288182884e-05, + "loss_iou": 0.09375, + "loss_num": 0.0040283203125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 478293348, + "step": 8536 + }, + { + "epoch": 19.01336302895323, + "grad_norm": 16.82329750061035, + "learning_rate": 1e-06, + "loss": 0.4532, + "num_input_tokens_seen": 478352520, + "step": 8537 + }, + { + "epoch": 19.01336302895323, + "loss": 0.4592329263687134, + "loss_ce": 6.543014023918658e-05, + "loss_iou": 0.205078125, + "loss_num": 0.010009765625, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 478352520, + "step": 8537 + }, + { + "epoch": 19.015590200445434, + "grad_norm": 18.402179718017578, + "learning_rate": 1e-06, + "loss": 0.4807, + "num_input_tokens_seen": 478409540, + "step": 8538 + }, + { + "epoch": 19.015590200445434, + "loss": 0.5504905581474304, + "loss_ce": 7.548804569523782e-05, + "loss_iou": 0.240234375, + "loss_num": 0.0140380859375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 478409540, + "step": 8538 + }, + { + "epoch": 19.01781737193764, + "grad_norm": 18.57283592224121, + "learning_rate": 1e-06, + "loss": 0.2209, + "num_input_tokens_seen": 478465060, + "step": 8539 + }, + { + "epoch": 19.01781737193764, + "loss": 0.2368859052658081, + "loss_ce": 6.950320675969124e-05, + "loss_iou": 0.09912109375, + "loss_num": 0.00762939453125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 478465060, + "step": 8539 + }, + { + "epoch": 19.020044543429844, + "grad_norm": 20.981054306030273, + "learning_rate": 1e-06, + "loss": 0.4838, + "num_input_tokens_seen": 478520264, + "step": 8540 + }, + { + "epoch": 19.020044543429844, + "loss": 0.39203161001205444, + "loss_ce": 6.387151370290667e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.01239013671875, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 478520264, + "step": 8540 + }, + { + "epoch": 19.02227171492205, + "grad_norm": 22.715524673461914, + "learning_rate": 1e-06, + "loss": 0.3701, + "num_input_tokens_seen": 478577056, + "step": 8541 + }, + { + "epoch": 19.02227171492205, + "loss": 0.37838345766067505, + "loss_ce": 8.756719762459397e-05, + "loss_iou": 0.158203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 478577056, + "step": 8541 + }, + { + "epoch": 19.024498886414253, + "grad_norm": 19.18893051147461, + "learning_rate": 1e-06, + "loss": 0.3867, + "num_input_tokens_seen": 478633512, + "step": 8542 + }, + { + "epoch": 19.024498886414253, + "loss": 0.3619902729988098, + "loss_ce": 6.705922714900225e-05, + "loss_iou": 0.1640625, + "loss_num": 0.00689697265625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 478633512, + "step": 8542 + }, + { + "epoch": 19.02672605790646, + "grad_norm": 16.043140411376953, + "learning_rate": 1e-06, + "loss": 0.5777, + "num_input_tokens_seen": 478689620, + "step": 8543 + }, + { + "epoch": 19.02672605790646, + "loss": 0.8731194734573364, + "loss_ce": 7.26049329387024e-05, + "loss_iou": 0.375, + "loss_num": 0.02490234375, + "loss_xval": 0.875, + "num_input_tokens_seen": 478689620, + "step": 8543 + }, + { + "epoch": 19.028953229398663, + "grad_norm": 22.03814697265625, + "learning_rate": 1e-06, + "loss": 0.3668, + "num_input_tokens_seen": 478745996, + "step": 8544 + }, + { + "epoch": 19.028953229398663, + "loss": 0.3608505129814148, + "loss_ce": 7.170035678427666e-05, + "loss_iou": 0.158203125, + "loss_num": 0.00909423828125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 478745996, + "step": 8544 + }, + { + "epoch": 19.031180400890868, + "grad_norm": 18.709949493408203, + "learning_rate": 1e-06, + "loss": 0.3843, + "num_input_tokens_seen": 478802344, + "step": 8545 + }, + { + "epoch": 19.031180400890868, + "loss": 0.3520601689815521, + "loss_ce": 7.041079516056925e-05, + "loss_iou": 0.1328125, + "loss_num": 0.017333984375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 478802344, + "step": 8545 + }, + { + "epoch": 19.033407572383073, + "grad_norm": 14.813765525817871, + "learning_rate": 1e-06, + "loss": 0.463, + "num_input_tokens_seen": 478860080, + "step": 8546 + }, + { + "epoch": 19.033407572383073, + "loss": 0.5505503416061401, + "loss_ce": 7.423260831274092e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.027587890625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 478860080, + "step": 8546 + }, + { + "epoch": 19.035634743875278, + "grad_norm": 23.64231300354004, + "learning_rate": 1e-06, + "loss": 0.3234, + "num_input_tokens_seen": 478919644, + "step": 8547 + }, + { + "epoch": 19.035634743875278, + "loss": 0.3638024926185608, + "loss_ce": 9.396575478604063e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.02392578125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 478919644, + "step": 8547 + }, + { + "epoch": 19.037861915367483, + "grad_norm": 22.59861183166504, + "learning_rate": 1e-06, + "loss": 0.2975, + "num_input_tokens_seen": 478974500, + "step": 8548 + }, + { + "epoch": 19.037861915367483, + "loss": 0.27411675453186035, + "loss_ce": 6.891056546010077e-05, + "loss_iou": 0.119140625, + "loss_num": 0.00701904296875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 478974500, + "step": 8548 + }, + { + "epoch": 19.040089086859687, + "grad_norm": 21.548479080200195, + "learning_rate": 1e-06, + "loss": 0.5299, + "num_input_tokens_seen": 479029432, + "step": 8549 + }, + { + "epoch": 19.040089086859687, + "loss": 0.5104723572731018, + "loss_ce": 9.636413597036153e-05, + "loss_iou": 0.20703125, + "loss_num": 0.019287109375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 479029432, + "step": 8549 + }, + { + "epoch": 19.042316258351892, + "grad_norm": 22.90362548828125, + "learning_rate": 1e-06, + "loss": 0.6133, + "num_input_tokens_seen": 479086928, + "step": 8550 + }, + { + "epoch": 19.042316258351892, + "loss": 0.6397278904914856, + "loss_ce": 7.949080463731661e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0228271484375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 479086928, + "step": 8550 + }, + { + "epoch": 19.044543429844097, + "grad_norm": 13.663710594177246, + "learning_rate": 1e-06, + "loss": 0.4714, + "num_input_tokens_seen": 479143420, + "step": 8551 + }, + { + "epoch": 19.044543429844097, + "loss": 0.41580140590667725, + "loss_ce": 9.095711720874533e-05, + "loss_iou": 0.1640625, + "loss_num": 0.017333984375, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 479143420, + "step": 8551 + }, + { + "epoch": 19.046770601336302, + "grad_norm": 26.639122009277344, + "learning_rate": 1e-06, + "loss": 0.4388, + "num_input_tokens_seen": 479199488, + "step": 8552 + }, + { + "epoch": 19.046770601336302, + "loss": 0.4706590175628662, + "loss_ce": 7.797098805895075e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.01190185546875, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 479199488, + "step": 8552 + }, + { + "epoch": 19.048997772828507, + "grad_norm": 37.29257583618164, + "learning_rate": 1e-06, + "loss": 0.4645, + "num_input_tokens_seen": 479255004, + "step": 8553 + }, + { + "epoch": 19.048997772828507, + "loss": 0.6149426698684692, + "loss_ce": 7.449123950209469e-05, + "loss_iou": 0.248046875, + "loss_num": 0.023681640625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 479255004, + "step": 8553 + }, + { + "epoch": 19.051224944320712, + "grad_norm": 187.8523712158203, + "learning_rate": 1e-06, + "loss": 0.4137, + "num_input_tokens_seen": 479310756, + "step": 8554 + }, + { + "epoch": 19.051224944320712, + "loss": 0.35520029067993164, + "loss_ce": 9.777834929991513e-05, + "loss_iou": 0.14453125, + "loss_num": 0.01336669921875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 479310756, + "step": 8554 + }, + { + "epoch": 19.053452115812917, + "grad_norm": 73.10868835449219, + "learning_rate": 1e-06, + "loss": 0.4949, + "num_input_tokens_seen": 479365280, + "step": 8555 + }, + { + "epoch": 19.053452115812917, + "loss": 0.5539067387580872, + "loss_ce": 7.372665277216583e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.01275634765625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 479365280, + "step": 8555 + }, + { + "epoch": 19.05567928730512, + "grad_norm": 25.378646850585938, + "learning_rate": 1e-06, + "loss": 0.261, + "num_input_tokens_seen": 479421220, + "step": 8556 + }, + { + "epoch": 19.05567928730512, + "loss": 0.2171689122915268, + "loss_ce": 6.685940024908632e-05, + "loss_iou": 0.09033203125, + "loss_num": 0.007293701171875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 479421220, + "step": 8556 + }, + { + "epoch": 19.057906458797326, + "grad_norm": 14.335795402526855, + "learning_rate": 1e-06, + "loss": 0.289, + "num_input_tokens_seen": 479477700, + "step": 8557 + }, + { + "epoch": 19.057906458797326, + "loss": 0.18457838892936707, + "loss_ce": 6.912185926921666e-05, + "loss_iou": 0.07666015625, + "loss_num": 0.00616455078125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 479477700, + "step": 8557 + }, + { + "epoch": 19.06013363028953, + "grad_norm": 14.203603744506836, + "learning_rate": 1e-06, + "loss": 0.4151, + "num_input_tokens_seen": 479536192, + "step": 8558 + }, + { + "epoch": 19.06013363028953, + "loss": 0.47530919313430786, + "loss_ce": 8.947977039497346e-05, + "loss_iou": 0.208984375, + "loss_num": 0.01153564453125, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 479536192, + "step": 8558 + }, + { + "epoch": 19.062360801781736, + "grad_norm": 15.753239631652832, + "learning_rate": 1e-06, + "loss": 0.2125, + "num_input_tokens_seen": 479591752, + "step": 8559 + }, + { + "epoch": 19.062360801781736, + "loss": 0.19928745925426483, + "loss_ce": 6.870189099572599e-05, + "loss_iou": 0.07568359375, + "loss_num": 0.0096435546875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 479591752, + "step": 8559 + }, + { + "epoch": 19.06458797327394, + "grad_norm": 13.365279197692871, + "learning_rate": 1e-06, + "loss": 0.2772, + "num_input_tokens_seen": 479650200, + "step": 8560 + }, + { + "epoch": 19.06458797327394, + "loss": 0.31562650203704834, + "loss_ce": 7.472425932064652e-05, + "loss_iou": 0.140625, + "loss_num": 0.00689697265625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 479650200, + "step": 8560 + }, + { + "epoch": 19.066815144766146, + "grad_norm": 16.33476448059082, + "learning_rate": 1e-06, + "loss": 0.3525, + "num_input_tokens_seen": 479705436, + "step": 8561 + }, + { + "epoch": 19.066815144766146, + "loss": 0.3400951027870178, + "loss_ce": 6.827242759754881e-05, + "loss_iou": 0.140625, + "loss_num": 0.01177978515625, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 479705436, + "step": 8561 + }, + { + "epoch": 19.06904231625835, + "grad_norm": 11.422565460205078, + "learning_rate": 1e-06, + "loss": 0.2924, + "num_input_tokens_seen": 479760176, + "step": 8562 + }, + { + "epoch": 19.06904231625835, + "loss": 0.21368908882141113, + "loss_ce": 6.604377267649397e-05, + "loss_iou": 0.08740234375, + "loss_num": 0.00775146484375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 479760176, + "step": 8562 + }, + { + "epoch": 19.071269487750556, + "grad_norm": 14.924948692321777, + "learning_rate": 1e-06, + "loss": 0.3429, + "num_input_tokens_seen": 479815112, + "step": 8563 + }, + { + "epoch": 19.071269487750556, + "loss": 0.42559611797332764, + "loss_ce": 5.89884803048335e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0240478515625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 479815112, + "step": 8563 + }, + { + "epoch": 19.07349665924276, + "grad_norm": 26.38053321838379, + "learning_rate": 1e-06, + "loss": 0.3599, + "num_input_tokens_seen": 479870284, + "step": 8564 + }, + { + "epoch": 19.07349665924276, + "loss": 0.36798515915870667, + "loss_ce": 6.524256605189294e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.00537109375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 479870284, + "step": 8564 + }, + { + "epoch": 19.075723830734965, + "grad_norm": 13.398744583129883, + "learning_rate": 1e-06, + "loss": 0.3668, + "num_input_tokens_seen": 479927696, + "step": 8565 + }, + { + "epoch": 19.075723830734965, + "loss": 0.4121933877468109, + "loss_ce": 8.401382365263999e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.007171630859375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 479927696, + "step": 8565 + }, + { + "epoch": 19.07795100222717, + "grad_norm": 23.407733917236328, + "learning_rate": 1e-06, + "loss": 0.5092, + "num_input_tokens_seen": 479984008, + "step": 8566 + }, + { + "epoch": 19.07795100222717, + "loss": 0.536881685256958, + "loss_ce": 0.00013851752737537026, + "loss_iou": 0.220703125, + "loss_num": 0.0189208984375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 479984008, + "step": 8566 + }, + { + "epoch": 19.080178173719375, + "grad_norm": 26.368122100830078, + "learning_rate": 1e-06, + "loss": 0.4519, + "num_input_tokens_seen": 480038472, + "step": 8567 + }, + { + "epoch": 19.080178173719375, + "loss": 0.5067821741104126, + "loss_ce": 0.00031240255339071155, + "loss_iou": 0.21484375, + "loss_num": 0.01531982421875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 480038472, + "step": 8567 + }, + { + "epoch": 19.08240534521158, + "grad_norm": 16.010215759277344, + "learning_rate": 1e-06, + "loss": 0.3062, + "num_input_tokens_seen": 480091528, + "step": 8568 + }, + { + "epoch": 19.08240534521158, + "loss": 0.3443114161491394, + "loss_ce": 7.312708476092666e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.0045166015625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 480091528, + "step": 8568 + }, + { + "epoch": 19.084632516703785, + "grad_norm": 15.22715950012207, + "learning_rate": 1e-06, + "loss": 0.3562, + "num_input_tokens_seen": 480147780, + "step": 8569 + }, + { + "epoch": 19.084632516703785, + "loss": 0.4474610388278961, + "loss_ce": 7.335421105381101e-05, + "loss_iou": 0.19140625, + "loss_num": 0.01312255859375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 480147780, + "step": 8569 + }, + { + "epoch": 19.08685968819599, + "grad_norm": 12.806982040405273, + "learning_rate": 1e-06, + "loss": 0.3955, + "num_input_tokens_seen": 480203968, + "step": 8570 + }, + { + "epoch": 19.08685968819599, + "loss": 0.2964867353439331, + "loss_ce": 0.00010003504576161504, + "loss_iou": 0.13671875, + "loss_num": 0.004669189453125, + "loss_xval": 0.296875, + "num_input_tokens_seen": 480203968, + "step": 8570 + }, + { + "epoch": 19.089086859688194, + "grad_norm": 21.179973602294922, + "learning_rate": 1e-06, + "loss": 0.356, + "num_input_tokens_seen": 480260560, + "step": 8571 + }, + { + "epoch": 19.089086859688194, + "loss": 0.29164212942123413, + "loss_ce": 7.720879511907697e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.005706787109375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 480260560, + "step": 8571 + }, + { + "epoch": 19.0913140311804, + "grad_norm": 17.90711784362793, + "learning_rate": 1e-06, + "loss": 0.4287, + "num_input_tokens_seen": 480317040, + "step": 8572 + }, + { + "epoch": 19.0913140311804, + "loss": 0.5795639753341675, + "loss_ce": 0.00015723146498203278, + "loss_iou": 0.2421875, + "loss_num": 0.0191650390625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 480317040, + "step": 8572 + }, + { + "epoch": 19.093541202672604, + "grad_norm": 26.034122467041016, + "learning_rate": 1e-06, + "loss": 0.3087, + "num_input_tokens_seen": 480371384, + "step": 8573 + }, + { + "epoch": 19.093541202672604, + "loss": 0.33735281229019165, + "loss_ce": 7.252431532833725e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.00830078125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 480371384, + "step": 8573 + }, + { + "epoch": 19.09576837416481, + "grad_norm": 17.473873138427734, + "learning_rate": 1e-06, + "loss": 0.3811, + "num_input_tokens_seen": 480426792, + "step": 8574 + }, + { + "epoch": 19.09576837416481, + "loss": 0.4617387652397156, + "loss_ce": 6.881446461193264e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 480426792, + "step": 8574 + }, + { + "epoch": 19.097995545657014, + "grad_norm": 16.091129302978516, + "learning_rate": 1e-06, + "loss": 0.4135, + "num_input_tokens_seen": 480483268, + "step": 8575 + }, + { + "epoch": 19.097995545657014, + "loss": 0.29927489161491394, + "loss_ce": 8.053722558543086e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.005462646484375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 480483268, + "step": 8575 + }, + { + "epoch": 19.100222717149222, + "grad_norm": 22.522878646850586, + "learning_rate": 1e-06, + "loss": 0.595, + "num_input_tokens_seen": 480537020, + "step": 8576 + }, + { + "epoch": 19.100222717149222, + "loss": 0.5641788244247437, + "loss_ce": 9.193105506710708e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.01409912109375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 480537020, + "step": 8576 + }, + { + "epoch": 19.102449888641427, + "grad_norm": 24.5418701171875, + "learning_rate": 1e-06, + "loss": 0.2987, + "num_input_tokens_seen": 480592488, + "step": 8577 + }, + { + "epoch": 19.102449888641427, + "loss": 0.2232351005077362, + "loss_ce": 7.150069723138586e-05, + "loss_iou": 0.1025390625, + "loss_num": 0.003631591796875, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 480592488, + "step": 8577 + }, + { + "epoch": 19.104677060133632, + "grad_norm": 27.310413360595703, + "learning_rate": 1e-06, + "loss": 0.4059, + "num_input_tokens_seen": 480647028, + "step": 8578 + }, + { + "epoch": 19.104677060133632, + "loss": 0.4668709635734558, + "loss_ce": 7.407699013128877e-05, + "loss_iou": 0.19921875, + "loss_num": 0.013671875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 480647028, + "step": 8578 + }, + { + "epoch": 19.106904231625837, + "grad_norm": 19.787166595458984, + "learning_rate": 1e-06, + "loss": 0.3002, + "num_input_tokens_seen": 480702564, + "step": 8579 + }, + { + "epoch": 19.106904231625837, + "loss": 0.2308284193277359, + "loss_ce": 8.500830153934658e-05, + "loss_iou": 0.107421875, + "loss_num": 0.0031280517578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 480702564, + "step": 8579 + }, + { + "epoch": 19.10913140311804, + "grad_norm": 20.583070755004883, + "learning_rate": 1e-06, + "loss": 0.3846, + "num_input_tokens_seen": 480758448, + "step": 8580 + }, + { + "epoch": 19.10913140311804, + "loss": 0.39584484696388245, + "loss_ce": 6.236594344954938e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.0155029296875, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 480758448, + "step": 8580 + }, + { + "epoch": 19.111358574610247, + "grad_norm": 20.13869285583496, + "learning_rate": 1e-06, + "loss": 0.3753, + "num_input_tokens_seen": 480810532, + "step": 8581 + }, + { + "epoch": 19.111358574610247, + "loss": 0.4490373134613037, + "loss_ce": 6.272210157476366e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.007476806640625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 480810532, + "step": 8581 + }, + { + "epoch": 19.11358574610245, + "grad_norm": 27.062620162963867, + "learning_rate": 1e-06, + "loss": 0.4051, + "num_input_tokens_seen": 480867128, + "step": 8582 + }, + { + "epoch": 19.11358574610245, + "loss": 0.388627290725708, + "loss_ce": 7.747422205284238e-05, + "loss_iou": 0.181640625, + "loss_num": 0.005279541015625, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 480867128, + "step": 8582 + }, + { + "epoch": 19.115812917594656, + "grad_norm": 14.657607078552246, + "learning_rate": 1e-06, + "loss": 0.2535, + "num_input_tokens_seen": 480921476, + "step": 8583 + }, + { + "epoch": 19.115812917594656, + "loss": 0.35783708095550537, + "loss_ce": 0.00017106709128711373, + "loss_iou": 0.1572265625, + "loss_num": 0.008544921875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 480921476, + "step": 8583 + }, + { + "epoch": 19.11804008908686, + "grad_norm": 21.15728759765625, + "learning_rate": 1e-06, + "loss": 0.4817, + "num_input_tokens_seen": 480977352, + "step": 8584 + }, + { + "epoch": 19.11804008908686, + "loss": 0.541458010673523, + "loss_ce": 7.615622598677874e-05, + "loss_iou": 0.212890625, + "loss_num": 0.023193359375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 480977352, + "step": 8584 + }, + { + "epoch": 19.120267260579066, + "grad_norm": 20.03176498413086, + "learning_rate": 1e-06, + "loss": 0.4498, + "num_input_tokens_seen": 481031908, + "step": 8585 + }, + { + "epoch": 19.120267260579066, + "loss": 0.45001912117004395, + "loss_ce": 6.798194954171777e-05, + "loss_iou": 0.169921875, + "loss_num": 0.0218505859375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 481031908, + "step": 8585 + }, + { + "epoch": 19.12249443207127, + "grad_norm": 20.93001937866211, + "learning_rate": 1e-06, + "loss": 0.3745, + "num_input_tokens_seen": 481088708, + "step": 8586 + }, + { + "epoch": 19.12249443207127, + "loss": 0.41656631231307983, + "loss_ce": 6.239269714569673e-05, + "loss_iou": 0.189453125, + "loss_num": 0.00738525390625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 481088708, + "step": 8586 + }, + { + "epoch": 19.124721603563476, + "grad_norm": 15.11413860321045, + "learning_rate": 1e-06, + "loss": 0.3466, + "num_input_tokens_seen": 481146480, + "step": 8587 + }, + { + "epoch": 19.124721603563476, + "loss": 0.3888677954673767, + "loss_ce": 7.386014476651326e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.01226806640625, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 481146480, + "step": 8587 + }, + { + "epoch": 19.12694877505568, + "grad_norm": 15.431100845336914, + "learning_rate": 1e-06, + "loss": 0.2699, + "num_input_tokens_seen": 481202468, + "step": 8588 + }, + { + "epoch": 19.12694877505568, + "loss": 0.22169449925422668, + "loss_ce": 7.583482511108741e-05, + "loss_iou": 0.09765625, + "loss_num": 0.00537109375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 481202468, + "step": 8588 + }, + { + "epoch": 19.129175946547885, + "grad_norm": 14.637504577636719, + "learning_rate": 1e-06, + "loss": 0.2738, + "num_input_tokens_seen": 481258796, + "step": 8589 + }, + { + "epoch": 19.129175946547885, + "loss": 0.2992823123931885, + "loss_ce": 8.799122588243335e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.008056640625, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 481258796, + "step": 8589 + }, + { + "epoch": 19.13140311804009, + "grad_norm": 18.58306312561035, + "learning_rate": 1e-06, + "loss": 0.4947, + "num_input_tokens_seen": 481314720, + "step": 8590 + }, + { + "epoch": 19.13140311804009, + "loss": 0.6162921786308289, + "loss_ce": 8.123279258143157e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0113525390625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 481314720, + "step": 8590 + }, + { + "epoch": 19.133630289532295, + "grad_norm": 24.67803382873535, + "learning_rate": 1e-06, + "loss": 0.3293, + "num_input_tokens_seen": 481371188, + "step": 8591 + }, + { + "epoch": 19.133630289532295, + "loss": 0.3968676030635834, + "loss_ce": 7.80346745159477e-05, + "loss_iou": 0.173828125, + "loss_num": 0.0098876953125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 481371188, + "step": 8591 + }, + { + "epoch": 19.1358574610245, + "grad_norm": 10.766166687011719, + "learning_rate": 1e-06, + "loss": 0.3724, + "num_input_tokens_seen": 481427332, + "step": 8592 + }, + { + "epoch": 19.1358574610245, + "loss": 0.4355580806732178, + "loss_ce": 7.223740976769477e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0096435546875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 481427332, + "step": 8592 + }, + { + "epoch": 19.138084632516705, + "grad_norm": 18.193437576293945, + "learning_rate": 1e-06, + "loss": 0.3822, + "num_input_tokens_seen": 481484316, + "step": 8593 + }, + { + "epoch": 19.138084632516705, + "loss": 0.3628009557723999, + "loss_ce": 6.902994937263429e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.005859375, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 481484316, + "step": 8593 + }, + { + "epoch": 19.14031180400891, + "grad_norm": 20.437171936035156, + "learning_rate": 1e-06, + "loss": 0.3256, + "num_input_tokens_seen": 481538132, + "step": 8594 + }, + { + "epoch": 19.14031180400891, + "loss": 0.22638912498950958, + "loss_ce": 7.076388283167034e-05, + "loss_iou": 0.10107421875, + "loss_num": 0.0048828125, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 481538132, + "step": 8594 + }, + { + "epoch": 19.142538975501115, + "grad_norm": 17.62116050720215, + "learning_rate": 1e-06, + "loss": 0.414, + "num_input_tokens_seen": 481596580, + "step": 8595 + }, + { + "epoch": 19.142538975501115, + "loss": 0.3615182042121887, + "loss_ce": 6.798798858653754e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0093994140625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 481596580, + "step": 8595 + }, + { + "epoch": 19.14476614699332, + "grad_norm": 16.3442440032959, + "learning_rate": 1e-06, + "loss": 0.3613, + "num_input_tokens_seen": 481653060, + "step": 8596 + }, + { + "epoch": 19.14476614699332, + "loss": 0.39320868253707886, + "loss_ce": 0.00014228782674763352, + "loss_iou": 0.171875, + "loss_num": 0.010009765625, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 481653060, + "step": 8596 + }, + { + "epoch": 19.146993318485524, + "grad_norm": 11.166417121887207, + "learning_rate": 1e-06, + "loss": 0.5462, + "num_input_tokens_seen": 481709604, + "step": 8597 + }, + { + "epoch": 19.146993318485524, + "loss": 0.8146294355392456, + "loss_ce": 0.00025259278481826186, + "loss_iou": 0.271484375, + "loss_num": 0.05419921875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 481709604, + "step": 8597 + }, + { + "epoch": 19.14922048997773, + "grad_norm": 24.46466827392578, + "learning_rate": 1e-06, + "loss": 0.5409, + "num_input_tokens_seen": 481766076, + "step": 8598 + }, + { + "epoch": 19.14922048997773, + "loss": 0.37435752153396606, + "loss_ce": 8.992976654553786e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.008056640625, + "loss_xval": 0.375, + "num_input_tokens_seen": 481766076, + "step": 8598 + }, + { + "epoch": 19.151447661469934, + "grad_norm": 13.304176330566406, + "learning_rate": 1e-06, + "loss": 0.3356, + "num_input_tokens_seen": 481822160, + "step": 8599 + }, + { + "epoch": 19.151447661469934, + "loss": 0.24649234116077423, + "loss_ce": 7.815843855496496e-05, + "loss_iou": 0.11083984375, + "loss_num": 0.0050048828125, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 481822160, + "step": 8599 + }, + { + "epoch": 19.15367483296214, + "grad_norm": 31.582448959350586, + "learning_rate": 1e-06, + "loss": 0.2843, + "num_input_tokens_seen": 481878668, + "step": 8600 + }, + { + "epoch": 19.15367483296214, + "loss": 0.27753129601478577, + "loss_ce": 6.547504017362371e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.00518798828125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 481878668, + "step": 8600 + }, + { + "epoch": 19.155902004454344, + "grad_norm": 16.38020896911621, + "learning_rate": 1e-06, + "loss": 0.3844, + "num_input_tokens_seen": 481936496, + "step": 8601 + }, + { + "epoch": 19.155902004454344, + "loss": 0.43722057342529297, + "loss_ce": 8.677801815792918e-05, + "loss_iou": 0.193359375, + "loss_num": 0.01025390625, + "loss_xval": 0.4375, + "num_input_tokens_seen": 481936496, + "step": 8601 + }, + { + "epoch": 19.15812917594655, + "grad_norm": 79.99256896972656, + "learning_rate": 1e-06, + "loss": 0.4235, + "num_input_tokens_seen": 481993192, + "step": 8602 + }, + { + "epoch": 19.15812917594655, + "loss": 0.4538060426712036, + "loss_ce": 7.070847641443834e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.015380859375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 481993192, + "step": 8602 + }, + { + "epoch": 19.160356347438753, + "grad_norm": 13.313328742980957, + "learning_rate": 1e-06, + "loss": 0.3876, + "num_input_tokens_seen": 482048636, + "step": 8603 + }, + { + "epoch": 19.160356347438753, + "loss": 0.39753034710884094, + "loss_ce": 6.941702304175124e-05, + "loss_iou": 0.177734375, + "loss_num": 0.0084228515625, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 482048636, + "step": 8603 + }, + { + "epoch": 19.16258351893096, + "grad_norm": 27.735868453979492, + "learning_rate": 1e-06, + "loss": 0.5142, + "num_input_tokens_seen": 482099436, + "step": 8604 + }, + { + "epoch": 19.16258351893096, + "loss": 0.3438310921192169, + "loss_ce": 8.108046313282102e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.00421142578125, + "loss_xval": 0.34375, + "num_input_tokens_seen": 482099436, + "step": 8604 + }, + { + "epoch": 19.164810690423163, + "grad_norm": 17.522174835205078, + "learning_rate": 1e-06, + "loss": 0.4765, + "num_input_tokens_seen": 482154916, + "step": 8605 + }, + { + "epoch": 19.164810690423163, + "loss": 0.4795961380004883, + "loss_ce": 0.00010396166180726141, + "loss_iou": 0.1953125, + "loss_num": 0.017578125, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 482154916, + "step": 8605 + }, + { + "epoch": 19.167037861915368, + "grad_norm": 17.66177749633789, + "learning_rate": 1e-06, + "loss": 0.5927, + "num_input_tokens_seen": 482211392, + "step": 8606 + }, + { + "epoch": 19.167037861915368, + "loss": 0.5756547451019287, + "loss_ce": 0.00021524931071326137, + "loss_iou": 0.259765625, + "loss_num": 0.0113525390625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 482211392, + "step": 8606 + }, + { + "epoch": 19.169265033407573, + "grad_norm": 19.507108688354492, + "learning_rate": 1e-06, + "loss": 0.2847, + "num_input_tokens_seen": 482270844, + "step": 8607 + }, + { + "epoch": 19.169265033407573, + "loss": 0.2603249251842499, + "loss_ce": 7.102765084709972e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.00323486328125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 482270844, + "step": 8607 + }, + { + "epoch": 19.171492204899778, + "grad_norm": 28.656322479248047, + "learning_rate": 1e-06, + "loss": 0.4528, + "num_input_tokens_seen": 482325476, + "step": 8608 + }, + { + "epoch": 19.171492204899778, + "loss": 0.43023812770843506, + "loss_ce": 0.00012339533714111894, + "loss_iou": 0.1923828125, + "loss_num": 0.00909423828125, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 482325476, + "step": 8608 + }, + { + "epoch": 19.173719376391983, + "grad_norm": 23.096620559692383, + "learning_rate": 1e-06, + "loss": 0.3489, + "num_input_tokens_seen": 482380532, + "step": 8609 + }, + { + "epoch": 19.173719376391983, + "loss": 0.48346471786499023, + "loss_ce": 6.630421557929367e-05, + "loss_iou": 0.2109375, + "loss_num": 0.01226806640625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 482380532, + "step": 8609 + }, + { + "epoch": 19.175946547884188, + "grad_norm": 21.371700286865234, + "learning_rate": 1e-06, + "loss": 0.4853, + "num_input_tokens_seen": 482433260, + "step": 8610 + }, + { + "epoch": 19.175946547884188, + "loss": 0.4420054256916046, + "loss_ce": 0.00023296594736166298, + "loss_iou": 0.1865234375, + "loss_num": 0.01385498046875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 482433260, + "step": 8610 + }, + { + "epoch": 19.178173719376392, + "grad_norm": 22.586750030517578, + "learning_rate": 1e-06, + "loss": 0.4347, + "num_input_tokens_seen": 482489688, + "step": 8611 + }, + { + "epoch": 19.178173719376392, + "loss": 0.42078396677970886, + "loss_ce": 6.861792644485831e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.008056640625, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 482489688, + "step": 8611 + }, + { + "epoch": 19.180400890868597, + "grad_norm": 14.678589820861816, + "learning_rate": 1e-06, + "loss": 0.2694, + "num_input_tokens_seen": 482546336, + "step": 8612 + }, + { + "epoch": 19.180400890868597, + "loss": 0.22610211372375488, + "loss_ce": 8.891787729226053e-05, + "loss_iou": 0.099609375, + "loss_num": 0.00543212890625, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 482546336, + "step": 8612 + }, + { + "epoch": 19.182628062360802, + "grad_norm": 16.697389602661133, + "learning_rate": 1e-06, + "loss": 0.3748, + "num_input_tokens_seen": 482603824, + "step": 8613 + }, + { + "epoch": 19.182628062360802, + "loss": 0.37165093421936035, + "loss_ce": 6.890263466630131e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.00909423828125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 482603824, + "step": 8613 + }, + { + "epoch": 19.184855233853007, + "grad_norm": 20.081663131713867, + "learning_rate": 1e-06, + "loss": 0.259, + "num_input_tokens_seen": 482662992, + "step": 8614 + }, + { + "epoch": 19.184855233853007, + "loss": 0.2634428143501282, + "loss_ce": 7.611673936480656e-05, + "loss_iou": 0.1103515625, + "loss_num": 0.00848388671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 482662992, + "step": 8614 + }, + { + "epoch": 19.187082405345212, + "grad_norm": 15.964731216430664, + "learning_rate": 1e-06, + "loss": 0.3832, + "num_input_tokens_seen": 482716832, + "step": 8615 + }, + { + "epoch": 19.187082405345212, + "loss": 0.31769198179244995, + "loss_ce": 6.505024066427723e-05, + "loss_iou": 0.138671875, + "loss_num": 0.00787353515625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 482716832, + "step": 8615 + }, + { + "epoch": 19.189309576837417, + "grad_norm": 17.872699737548828, + "learning_rate": 1e-06, + "loss": 0.3562, + "num_input_tokens_seen": 482771736, + "step": 8616 + }, + { + "epoch": 19.189309576837417, + "loss": 0.36840832233428955, + "loss_ce": 6.113122071838006e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.00994873046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 482771736, + "step": 8616 + }, + { + "epoch": 19.19153674832962, + "grad_norm": 17.340930938720703, + "learning_rate": 1e-06, + "loss": 0.3884, + "num_input_tokens_seen": 482826724, + "step": 8617 + }, + { + "epoch": 19.19153674832962, + "loss": 0.4055081009864807, + "loss_ce": 8.206171332858503e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00946044921875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 482826724, + "step": 8617 + }, + { + "epoch": 19.193763919821826, + "grad_norm": 17.151872634887695, + "learning_rate": 1e-06, + "loss": 0.303, + "num_input_tokens_seen": 482883452, + "step": 8618 + }, + { + "epoch": 19.193763919821826, + "loss": 0.4231913685798645, + "loss_ce": 9.566033986629918e-05, + "loss_iou": 0.1875, + "loss_num": 0.0096435546875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 482883452, + "step": 8618 + }, + { + "epoch": 19.19599109131403, + "grad_norm": 17.975557327270508, + "learning_rate": 1e-06, + "loss": 0.3269, + "num_input_tokens_seen": 482941656, + "step": 8619 + }, + { + "epoch": 19.19599109131403, + "loss": 0.3718949258327484, + "loss_ce": 6.874144310131669e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.00982666015625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 482941656, + "step": 8619 + }, + { + "epoch": 19.198218262806236, + "grad_norm": 20.601882934570312, + "learning_rate": 1e-06, + "loss": 0.3974, + "num_input_tokens_seen": 482997328, + "step": 8620 + }, + { + "epoch": 19.198218262806236, + "loss": 0.32132768630981445, + "loss_ce": 5.390634396462701e-05, + "loss_iou": 0.13671875, + "loss_num": 0.00970458984375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 482997328, + "step": 8620 + }, + { + "epoch": 19.20044543429844, + "grad_norm": 23.428096771240234, + "learning_rate": 1e-06, + "loss": 0.4218, + "num_input_tokens_seen": 483051884, + "step": 8621 + }, + { + "epoch": 19.20044543429844, + "loss": 0.28058868646621704, + "loss_ce": 7.111984450602904e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.006500244140625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 483051884, + "step": 8621 + }, + { + "epoch": 19.202672605790646, + "grad_norm": 24.112133026123047, + "learning_rate": 1e-06, + "loss": 0.3654, + "num_input_tokens_seen": 483111104, + "step": 8622 + }, + { + "epoch": 19.202672605790646, + "loss": 0.35199394822120667, + "loss_ce": 6.522829062305391e-05, + "loss_iou": 0.16015625, + "loss_num": 0.0064697265625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 483111104, + "step": 8622 + }, + { + "epoch": 19.20489977728285, + "grad_norm": 21.558853149414062, + "learning_rate": 1e-06, + "loss": 0.2897, + "num_input_tokens_seen": 483165588, + "step": 8623 + }, + { + "epoch": 19.20489977728285, + "loss": 0.2628226578235626, + "loss_ce": 6.632453005295247e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.00592041015625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 483165588, + "step": 8623 + }, + { + "epoch": 19.207126948775056, + "grad_norm": 17.29518699645996, + "learning_rate": 1e-06, + "loss": 0.278, + "num_input_tokens_seen": 483217036, + "step": 8624 + }, + { + "epoch": 19.207126948775056, + "loss": 0.24366407096385956, + "loss_ce": 7.275763346115127e-05, + "loss_iou": 0.10302734375, + "loss_num": 0.007415771484375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 483217036, + "step": 8624 + }, + { + "epoch": 19.20935412026726, + "grad_norm": 17.489004135131836, + "learning_rate": 1e-06, + "loss": 0.4856, + "num_input_tokens_seen": 483274096, + "step": 8625 + }, + { + "epoch": 19.20935412026726, + "loss": 0.37572720646858215, + "loss_ce": 0.0001168523303931579, + "loss_iou": 0.15625, + "loss_num": 0.01251220703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 483274096, + "step": 8625 + }, + { + "epoch": 19.211581291759465, + "grad_norm": 17.45191192626953, + "learning_rate": 1e-06, + "loss": 0.5543, + "num_input_tokens_seen": 483330112, + "step": 8626 + }, + { + "epoch": 19.211581291759465, + "loss": 0.36878734827041626, + "loss_ce": 7.397578156087548e-05, + "loss_iou": 0.158203125, + "loss_num": 0.01068115234375, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 483330112, + "step": 8626 + }, + { + "epoch": 19.21380846325167, + "grad_norm": 18.69688606262207, + "learning_rate": 1e-06, + "loss": 0.3022, + "num_input_tokens_seen": 483388400, + "step": 8627 + }, + { + "epoch": 19.21380846325167, + "loss": 0.24286232888698578, + "loss_ce": 6.447385385399684e-05, + "loss_iou": 0.103515625, + "loss_num": 0.00701904296875, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 483388400, + "step": 8627 + }, + { + "epoch": 19.216035634743875, + "grad_norm": 18.915224075317383, + "learning_rate": 1e-06, + "loss": 0.3235, + "num_input_tokens_seen": 483444252, + "step": 8628 + }, + { + "epoch": 19.216035634743875, + "loss": 0.40314409136772156, + "loss_ce": 6.792263593524694e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.013671875, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 483444252, + "step": 8628 + }, + { + "epoch": 19.21826280623608, + "grad_norm": 11.416555404663086, + "learning_rate": 1e-06, + "loss": 0.2597, + "num_input_tokens_seen": 483503672, + "step": 8629 + }, + { + "epoch": 19.21826280623608, + "loss": 0.2568409740924835, + "loss_ce": 6.605684757232666e-05, + "loss_iou": 0.1171875, + "loss_num": 0.00457763671875, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 483503672, + "step": 8629 + }, + { + "epoch": 19.220489977728285, + "grad_norm": 21.57809066772461, + "learning_rate": 1e-06, + "loss": 0.4147, + "num_input_tokens_seen": 483560072, + "step": 8630 + }, + { + "epoch": 19.220489977728285, + "loss": 0.3244105577468872, + "loss_ce": 6.974257121328264e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.01300048828125, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 483560072, + "step": 8630 + }, + { + "epoch": 19.22271714922049, + "grad_norm": 16.324159622192383, + "learning_rate": 1e-06, + "loss": 0.2693, + "num_input_tokens_seen": 483614548, + "step": 8631 + }, + { + "epoch": 19.22271714922049, + "loss": 0.2832646369934082, + "loss_ce": 6.152082642074674e-05, + "loss_iou": 0.12060546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 483614548, + "step": 8631 + }, + { + "epoch": 19.224944320712694, + "grad_norm": 29.336238861083984, + "learning_rate": 1e-06, + "loss": 0.3302, + "num_input_tokens_seen": 483672292, + "step": 8632 + }, + { + "epoch": 19.224944320712694, + "loss": 0.32135581970214844, + "loss_ce": 6.674337782897055e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.00439453125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 483672292, + "step": 8632 + }, + { + "epoch": 19.2271714922049, + "grad_norm": 18.221179962158203, + "learning_rate": 1e-06, + "loss": 0.5706, + "num_input_tokens_seen": 483729908, + "step": 8633 + }, + { + "epoch": 19.2271714922049, + "loss": 0.6349157691001892, + "loss_ce": 0.00015011939103715122, + "loss_iou": 0.27734375, + "loss_num": 0.016357421875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 483729908, + "step": 8633 + }, + { + "epoch": 19.229398663697104, + "grad_norm": 18.27716636657715, + "learning_rate": 1e-06, + "loss": 0.4169, + "num_input_tokens_seen": 483786140, + "step": 8634 + }, + { + "epoch": 19.229398663697104, + "loss": 0.3237491250038147, + "loss_ce": 7.969920261530206e-05, + "loss_iou": 0.140625, + "loss_num": 0.008544921875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 483786140, + "step": 8634 + }, + { + "epoch": 19.23162583518931, + "grad_norm": 33.526123046875, + "learning_rate": 1e-06, + "loss": 0.3883, + "num_input_tokens_seen": 483842292, + "step": 8635 + }, + { + "epoch": 19.23162583518931, + "loss": 0.27155420184135437, + "loss_ce": 6.981080514378846e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.00616455078125, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 483842292, + "step": 8635 + }, + { + "epoch": 19.233853006681514, + "grad_norm": 12.381620407104492, + "learning_rate": 1e-06, + "loss": 0.3823, + "num_input_tokens_seen": 483898136, + "step": 8636 + }, + { + "epoch": 19.233853006681514, + "loss": 0.510744571685791, + "loss_ce": 0.0001244788581971079, + "loss_iou": 0.220703125, + "loss_num": 0.0137939453125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 483898136, + "step": 8636 + }, + { + "epoch": 19.23608017817372, + "grad_norm": 15.86150074005127, + "learning_rate": 1e-06, + "loss": 0.4379, + "num_input_tokens_seen": 483952128, + "step": 8637 + }, + { + "epoch": 19.23608017817372, + "loss": 0.4342408776283264, + "loss_ce": 8.635166886961088e-05, + "loss_iou": 0.189453125, + "loss_num": 0.01092529296875, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 483952128, + "step": 8637 + }, + { + "epoch": 19.238307349665924, + "grad_norm": 22.681493759155273, + "learning_rate": 1e-06, + "loss": 0.347, + "num_input_tokens_seen": 484007556, + "step": 8638 + }, + { + "epoch": 19.238307349665924, + "loss": 0.26111987233161926, + "loss_ce": 7.25125937606208e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.0033721923828125, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 484007556, + "step": 8638 + }, + { + "epoch": 19.24053452115813, + "grad_norm": 16.731151580810547, + "learning_rate": 1e-06, + "loss": 0.3764, + "num_input_tokens_seen": 484061948, + "step": 8639 + }, + { + "epoch": 19.24053452115813, + "loss": 0.40643227100372314, + "loss_ce": 6.019037391524762e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.005859375, + "loss_xval": 0.40625, + "num_input_tokens_seen": 484061948, + "step": 8639 + }, + { + "epoch": 19.242761692650333, + "grad_norm": 17.2427978515625, + "learning_rate": 1e-06, + "loss": 0.3552, + "num_input_tokens_seen": 484118452, + "step": 8640 + }, + { + "epoch": 19.242761692650333, + "loss": 0.3023113012313843, + "loss_ce": 6.518626469187438e-05, + "loss_iou": 0.130859375, + "loss_num": 0.00799560546875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 484118452, + "step": 8640 + }, + { + "epoch": 19.244988864142538, + "grad_norm": 48.943878173828125, + "learning_rate": 1e-06, + "loss": 0.3616, + "num_input_tokens_seen": 484172552, + "step": 8641 + }, + { + "epoch": 19.244988864142538, + "loss": 0.34272468090057373, + "loss_ce": 7.331671076826751e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.0115966796875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 484172552, + "step": 8641 + }, + { + "epoch": 19.247216035634743, + "grad_norm": 14.787582397460938, + "learning_rate": 1e-06, + "loss": 0.3113, + "num_input_tokens_seen": 484228180, + "step": 8642 + }, + { + "epoch": 19.247216035634743, + "loss": 0.34266364574432373, + "loss_ce": 7.330120570259169e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00457763671875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 484228180, + "step": 8642 + }, + { + "epoch": 19.249443207126948, + "grad_norm": 18.2416934967041, + "learning_rate": 1e-06, + "loss": 0.4346, + "num_input_tokens_seen": 484283708, + "step": 8643 + }, + { + "epoch": 19.249443207126948, + "loss": 0.37335968017578125, + "loss_ce": 6.869265052955598e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.015625, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 484283708, + "step": 8643 + }, + { + "epoch": 19.251670378619153, + "grad_norm": 18.17558479309082, + "learning_rate": 1e-06, + "loss": 0.3687, + "num_input_tokens_seen": 484337716, + "step": 8644 + }, + { + "epoch": 19.251670378619153, + "loss": 0.259723424911499, + "loss_ce": 6.458204006776214e-05, + "loss_iou": 0.11962890625, + "loss_num": 0.004119873046875, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 484337716, + "step": 8644 + }, + { + "epoch": 19.253897550111358, + "grad_norm": 14.284744262695312, + "learning_rate": 1e-06, + "loss": 0.6006, + "num_input_tokens_seen": 484392308, + "step": 8645 + }, + { + "epoch": 19.253897550111358, + "loss": 0.6434794664382935, + "loss_ce": 7.73589126765728e-05, + "loss_iou": 0.25390625, + "loss_num": 0.02734375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 484392308, + "step": 8645 + }, + { + "epoch": 19.256124721603562, + "grad_norm": 12.745560646057129, + "learning_rate": 1e-06, + "loss": 0.2475, + "num_input_tokens_seen": 484446740, + "step": 8646 + }, + { + "epoch": 19.256124721603562, + "loss": 0.2596558630466461, + "loss_ce": 7.333945541176945e-05, + "loss_iou": 0.119140625, + "loss_num": 0.0042724609375, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 484446740, + "step": 8646 + }, + { + "epoch": 19.258351893095767, + "grad_norm": 19.38439178466797, + "learning_rate": 1e-06, + "loss": 0.4594, + "num_input_tokens_seen": 484501160, + "step": 8647 + }, + { + "epoch": 19.258351893095767, + "loss": 0.4618569016456604, + "loss_ce": 6.492799002444372e-05, + "loss_iou": 0.20703125, + "loss_num": 0.009765625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 484501160, + "step": 8647 + }, + { + "epoch": 19.260579064587972, + "grad_norm": 14.868861198425293, + "learning_rate": 1e-06, + "loss": 0.5964, + "num_input_tokens_seen": 484556848, + "step": 8648 + }, + { + "epoch": 19.260579064587972, + "loss": 0.8090330362319946, + "loss_ce": 7.303664460778236e-05, + "loss_iou": 0.3359375, + "loss_num": 0.02783203125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 484556848, + "step": 8648 + }, + { + "epoch": 19.262806236080177, + "grad_norm": 17.061763763427734, + "learning_rate": 1e-06, + "loss": 0.3803, + "num_input_tokens_seen": 484614472, + "step": 8649 + }, + { + "epoch": 19.262806236080177, + "loss": 0.4142407476902008, + "loss_ce": 5.6169934396166354e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.0159912109375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 484614472, + "step": 8649 + }, + { + "epoch": 19.265033407572382, + "grad_norm": 19.03131675720215, + "learning_rate": 1e-06, + "loss": 0.283, + "num_input_tokens_seen": 484667276, + "step": 8650 + }, + { + "epoch": 19.265033407572382, + "loss": 0.3287457227706909, + "loss_ce": 0.0001019124174490571, + "loss_iou": 0.1474609375, + "loss_num": 0.0068359375, + "loss_xval": 0.328125, + "num_input_tokens_seen": 484667276, + "step": 8650 + }, + { + "epoch": 19.267260579064587, + "grad_norm": 42.60155487060547, + "learning_rate": 1e-06, + "loss": 0.4224, + "num_input_tokens_seen": 484723500, + "step": 8651 + }, + { + "epoch": 19.267260579064587, + "loss": 0.37189292907714844, + "loss_ce": 6.674042379017919e-05, + "loss_iou": 0.154296875, + "loss_num": 0.01275634765625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 484723500, + "step": 8651 + }, + { + "epoch": 19.26948775055679, + "grad_norm": 17.580562591552734, + "learning_rate": 1e-06, + "loss": 0.363, + "num_input_tokens_seen": 484779672, + "step": 8652 + }, + { + "epoch": 19.26948775055679, + "loss": 0.2955964207649231, + "loss_ce": 6.419201235985383e-05, + "loss_iou": 0.130859375, + "loss_num": 0.00701904296875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 484779672, + "step": 8652 + }, + { + "epoch": 19.271714922048996, + "grad_norm": 17.12259292602539, + "learning_rate": 1e-06, + "loss": 0.4862, + "num_input_tokens_seen": 484836188, + "step": 8653 + }, + { + "epoch": 19.271714922048996, + "loss": 0.3255062699317932, + "loss_ce": 6.683074752800167e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00628662109375, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 484836188, + "step": 8653 + }, + { + "epoch": 19.2739420935412, + "grad_norm": 21.847190856933594, + "learning_rate": 1e-06, + "loss": 0.2709, + "num_input_tokens_seen": 484895900, + "step": 8654 + }, + { + "epoch": 19.2739420935412, + "loss": 0.25446265935897827, + "loss_ce": 6.814206426497549e-05, + "loss_iou": 0.115234375, + "loss_num": 0.0047607421875, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 484895900, + "step": 8654 + }, + { + "epoch": 19.276169265033406, + "grad_norm": 13.837854385375977, + "learning_rate": 1e-06, + "loss": 0.3591, + "num_input_tokens_seen": 484953396, + "step": 8655 + }, + { + "epoch": 19.276169265033406, + "loss": 0.28464746475219727, + "loss_ce": 0.00010157265205634758, + "loss_iou": 0.123046875, + "loss_num": 0.00775146484375, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 484953396, + "step": 8655 + }, + { + "epoch": 19.27839643652561, + "grad_norm": 18.63173484802246, + "learning_rate": 1e-06, + "loss": 0.5888, + "num_input_tokens_seen": 485004972, + "step": 8656 + }, + { + "epoch": 19.27839643652561, + "loss": 0.5696535706520081, + "loss_ce": 7.348039071075618e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0126953125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 485004972, + "step": 8656 + }, + { + "epoch": 19.280623608017816, + "grad_norm": 23.69566535949707, + "learning_rate": 1e-06, + "loss": 0.4021, + "num_input_tokens_seen": 485061276, + "step": 8657 + }, + { + "epoch": 19.280623608017816, + "loss": 0.4415389895439148, + "loss_ce": 0.00013270846102386713, + "loss_iou": 0.2021484375, + "loss_num": 0.00738525390625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 485061276, + "step": 8657 + }, + { + "epoch": 19.28285077951002, + "grad_norm": 16.324665069580078, + "learning_rate": 1e-06, + "loss": 0.5027, + "num_input_tokens_seen": 485116212, + "step": 8658 + }, + { + "epoch": 19.28285077951002, + "loss": 0.656080961227417, + "loss_ce": 7.512497541029006e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0177001953125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 485116212, + "step": 8658 + }, + { + "epoch": 19.285077951002226, + "grad_norm": 25.834796905517578, + "learning_rate": 1e-06, + "loss": 0.5209, + "num_input_tokens_seen": 485171484, + "step": 8659 + }, + { + "epoch": 19.285077951002226, + "loss": 0.42792704701423645, + "loss_ce": 7.059163181111217e-05, + "loss_iou": 0.189453125, + "loss_num": 0.009765625, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 485171484, + "step": 8659 + }, + { + "epoch": 19.28730512249443, + "grad_norm": 14.427014350891113, + "learning_rate": 1e-06, + "loss": 0.434, + "num_input_tokens_seen": 485229056, + "step": 8660 + }, + { + "epoch": 19.28730512249443, + "loss": 0.596036434173584, + "loss_ce": 8.911389159038663e-05, + "loss_iou": 0.236328125, + "loss_num": 0.0247802734375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 485229056, + "step": 8660 + }, + { + "epoch": 19.289532293986635, + "grad_norm": 56.55402374267578, + "learning_rate": 1e-06, + "loss": 0.5668, + "num_input_tokens_seen": 485286492, + "step": 8661 + }, + { + "epoch": 19.289532293986635, + "loss": 0.6791332960128784, + "loss_ce": 5.614275869447738e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0284423828125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 485286492, + "step": 8661 + }, + { + "epoch": 19.29175946547884, + "grad_norm": 16.25436019897461, + "learning_rate": 1e-06, + "loss": 0.3742, + "num_input_tokens_seen": 485343172, + "step": 8662 + }, + { + "epoch": 19.29175946547884, + "loss": 0.30635008215904236, + "loss_ce": 7.566015119664371e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.007049560546875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 485343172, + "step": 8662 + }, + { + "epoch": 19.293986636971045, + "grad_norm": 17.356847763061523, + "learning_rate": 1e-06, + "loss": 0.3913, + "num_input_tokens_seen": 485399704, + "step": 8663 + }, + { + "epoch": 19.293986636971045, + "loss": 0.4219398498535156, + "loss_ce": 6.486824713647366e-05, + "loss_iou": 0.173828125, + "loss_num": 0.01470947265625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 485399704, + "step": 8663 + }, + { + "epoch": 19.29621380846325, + "grad_norm": 21.03586769104004, + "learning_rate": 1e-06, + "loss": 0.2966, + "num_input_tokens_seen": 485455500, + "step": 8664 + }, + { + "epoch": 19.29621380846325, + "loss": 0.22834303975105286, + "loss_ce": 7.155879575293511e-05, + "loss_iou": 0.103515625, + "loss_num": 0.0042724609375, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 485455500, + "step": 8664 + }, + { + "epoch": 19.29844097995546, + "grad_norm": 25.01719093322754, + "learning_rate": 1e-06, + "loss": 0.4601, + "num_input_tokens_seen": 485511580, + "step": 8665 + }, + { + "epoch": 19.29844097995546, + "loss": 0.5397475361824036, + "loss_ce": 7.467882824130356e-05, + "loss_iou": 0.259765625, + "loss_num": 0.004241943359375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 485511580, + "step": 8665 + }, + { + "epoch": 19.30066815144766, + "grad_norm": 15.293524742126465, + "learning_rate": 1e-06, + "loss": 0.4385, + "num_input_tokens_seen": 485567524, + "step": 8666 + }, + { + "epoch": 19.30066815144766, + "loss": 0.29761672019958496, + "loss_ce": 7.03330006217584e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.00830078125, + "loss_xval": 0.296875, + "num_input_tokens_seen": 485567524, + "step": 8666 + }, + { + "epoch": 19.302895322939868, + "grad_norm": 24.937509536743164, + "learning_rate": 1e-06, + "loss": 0.4014, + "num_input_tokens_seen": 485623744, + "step": 8667 + }, + { + "epoch": 19.302895322939868, + "loss": 0.5748975872993469, + "loss_ce": 6.84759725118056e-05, + "loss_iou": 0.251953125, + "loss_num": 0.013916015625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 485623744, + "step": 8667 + }, + { + "epoch": 19.305122494432073, + "grad_norm": 16.04351234436035, + "learning_rate": 1e-06, + "loss": 0.4303, + "num_input_tokens_seen": 485680980, + "step": 8668 + }, + { + "epoch": 19.305122494432073, + "loss": 0.310863733291626, + "loss_ce": 7.271443610079587e-05, + "loss_iou": 0.12890625, + "loss_num": 0.01043701171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 485680980, + "step": 8668 + }, + { + "epoch": 19.307349665924278, + "grad_norm": 34.41288757324219, + "learning_rate": 1e-06, + "loss": 0.5049, + "num_input_tokens_seen": 485738648, + "step": 8669 + }, + { + "epoch": 19.307349665924278, + "loss": 0.6703619956970215, + "loss_ce": 7.39232636988163e-05, + "loss_iou": 0.302734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 485738648, + "step": 8669 + }, + { + "epoch": 19.309576837416483, + "grad_norm": 16.024730682373047, + "learning_rate": 1e-06, + "loss": 0.4571, + "num_input_tokens_seen": 485795136, + "step": 8670 + }, + { + "epoch": 19.309576837416483, + "loss": 0.3097517490386963, + "loss_ce": 5.93543445575051e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.00921630859375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 485795136, + "step": 8670 + }, + { + "epoch": 19.311804008908688, + "grad_norm": 22.233224868774414, + "learning_rate": 1e-06, + "loss": 0.3574, + "num_input_tokens_seen": 485853408, + "step": 8671 + }, + { + "epoch": 19.311804008908688, + "loss": 0.45661628246307373, + "loss_ce": 7.331735105253756e-05, + "loss_iou": 0.21875, + "loss_num": 0.00396728515625, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 485853408, + "step": 8671 + }, + { + "epoch": 19.314031180400892, + "grad_norm": 21.287254333496094, + "learning_rate": 1e-06, + "loss": 0.2966, + "num_input_tokens_seen": 485908808, + "step": 8672 + }, + { + "epoch": 19.314031180400892, + "loss": 0.31123119592666626, + "loss_ce": 7.398641901090741e-05, + "loss_iou": 0.146484375, + "loss_num": 0.003875732421875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 485908808, + "step": 8672 + }, + { + "epoch": 19.316258351893097, + "grad_norm": 17.1425838470459, + "learning_rate": 1e-06, + "loss": 0.4107, + "num_input_tokens_seen": 485965732, + "step": 8673 + }, + { + "epoch": 19.316258351893097, + "loss": 0.4722324013710022, + "loss_ce": 6.443218444474041e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01495361328125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 485965732, + "step": 8673 + }, + { + "epoch": 19.318485523385302, + "grad_norm": 23.668569564819336, + "learning_rate": 1e-06, + "loss": 0.4553, + "num_input_tokens_seen": 486019256, + "step": 8674 + }, + { + "epoch": 19.318485523385302, + "loss": 0.5221450328826904, + "loss_ce": 8.080543193500489e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 486019256, + "step": 8674 + }, + { + "epoch": 19.320712694877507, + "grad_norm": 19.682819366455078, + "learning_rate": 1e-06, + "loss": 0.2918, + "num_input_tokens_seen": 486077160, + "step": 8675 + }, + { + "epoch": 19.320712694877507, + "loss": 0.2836154103279114, + "loss_ce": 7.660247501917183e-05, + "loss_iou": 0.12158203125, + "loss_num": 0.0079345703125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 486077160, + "step": 8675 + }, + { + "epoch": 19.322939866369712, + "grad_norm": 19.56645965576172, + "learning_rate": 1e-06, + "loss": 0.4519, + "num_input_tokens_seen": 486132592, + "step": 8676 + }, + { + "epoch": 19.322939866369712, + "loss": 0.4211920499801636, + "loss_ce": 0.00011051179171772674, + "loss_iou": 0.166015625, + "loss_num": 0.0177001953125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 486132592, + "step": 8676 + }, + { + "epoch": 19.325167037861917, + "grad_norm": 10.575581550598145, + "learning_rate": 1e-06, + "loss": 0.3449, + "num_input_tokens_seen": 486190616, + "step": 8677 + }, + { + "epoch": 19.325167037861917, + "loss": 0.2473614513874054, + "loss_ce": 6.224659591680393e-05, + "loss_iou": 0.1123046875, + "loss_num": 0.0045166015625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 486190616, + "step": 8677 + }, + { + "epoch": 19.32739420935412, + "grad_norm": 16.035282135009766, + "learning_rate": 1e-06, + "loss": 0.4254, + "num_input_tokens_seen": 486245220, + "step": 8678 + }, + { + "epoch": 19.32739420935412, + "loss": 0.3443076014518738, + "loss_ce": 6.930766539881006e-05, + "loss_iou": 0.15625, + "loss_num": 0.006195068359375, + "loss_xval": 0.34375, + "num_input_tokens_seen": 486245220, + "step": 8678 + }, + { + "epoch": 19.329621380846326, + "grad_norm": 17.30709457397461, + "learning_rate": 1e-06, + "loss": 0.4013, + "num_input_tokens_seen": 486302496, + "step": 8679 + }, + { + "epoch": 19.329621380846326, + "loss": 0.3727584779262543, + "loss_ce": 7.78008543420583e-05, + "loss_iou": 0.16796875, + "loss_num": 0.007110595703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 486302496, + "step": 8679 + }, + { + "epoch": 19.33184855233853, + "grad_norm": 20.775163650512695, + "learning_rate": 1e-06, + "loss": 0.415, + "num_input_tokens_seen": 486356500, + "step": 8680 + }, + { + "epoch": 19.33184855233853, + "loss": 0.4452681541442871, + "loss_ce": 7.77466339059174e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.005218505859375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 486356500, + "step": 8680 + }, + { + "epoch": 19.334075723830736, + "grad_norm": 35.834449768066406, + "learning_rate": 1e-06, + "loss": 0.2574, + "num_input_tokens_seen": 486413916, + "step": 8681 + }, + { + "epoch": 19.334075723830736, + "loss": 0.27562782168388367, + "loss_ce": 6.935855344636366e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.0084228515625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 486413916, + "step": 8681 + }, + { + "epoch": 19.33630289532294, + "grad_norm": 14.752124786376953, + "learning_rate": 1e-06, + "loss": 0.3223, + "num_input_tokens_seen": 486471340, + "step": 8682 + }, + { + "epoch": 19.33630289532294, + "loss": 0.43706992268562317, + "loss_ce": 5.819134821649641e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.4375, + "num_input_tokens_seen": 486471340, + "step": 8682 + }, + { + "epoch": 19.338530066815146, + "grad_norm": 19.141727447509766, + "learning_rate": 1e-06, + "loss": 0.3472, + "num_input_tokens_seen": 486529184, + "step": 8683 + }, + { + "epoch": 19.338530066815146, + "loss": 0.2464015781879425, + "loss_ce": 6.367703463183716e-05, + "loss_iou": 0.109375, + "loss_num": 0.005645751953125, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 486529184, + "step": 8683 + }, + { + "epoch": 19.34075723830735, + "grad_norm": 19.265857696533203, + "learning_rate": 1e-06, + "loss": 0.2633, + "num_input_tokens_seen": 486584540, + "step": 8684 + }, + { + "epoch": 19.34075723830735, + "loss": 0.24787020683288574, + "loss_ce": 6.746564758941531e-05, + "loss_iou": 0.11181640625, + "loss_num": 0.0048828125, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 486584540, + "step": 8684 + }, + { + "epoch": 19.342984409799556, + "grad_norm": 64.30094909667969, + "learning_rate": 1e-06, + "loss": 0.4085, + "num_input_tokens_seen": 486639632, + "step": 8685 + }, + { + "epoch": 19.342984409799556, + "loss": 0.3603741526603699, + "loss_ce": 8.36373510537669e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.004547119140625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 486639632, + "step": 8685 + }, + { + "epoch": 19.34521158129176, + "grad_norm": 22.54205322265625, + "learning_rate": 1e-06, + "loss": 0.4647, + "num_input_tokens_seen": 486693520, + "step": 8686 + }, + { + "epoch": 19.34521158129176, + "loss": 0.39545318484306335, + "loss_ce": 6.7443739681039e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00970458984375, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 486693520, + "step": 8686 + }, + { + "epoch": 19.347438752783965, + "grad_norm": 19.74683380126953, + "learning_rate": 1e-06, + "loss": 0.5212, + "num_input_tokens_seen": 486747984, + "step": 8687 + }, + { + "epoch": 19.347438752783965, + "loss": 0.47200244665145874, + "loss_ce": 0.00044482320663519204, + "loss_iou": 0.1962890625, + "loss_num": 0.01556396484375, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 486747984, + "step": 8687 + }, + { + "epoch": 19.34966592427617, + "grad_norm": 26.414714813232422, + "learning_rate": 1e-06, + "loss": 0.3239, + "num_input_tokens_seen": 486803160, + "step": 8688 + }, + { + "epoch": 19.34966592427617, + "loss": 0.30175623297691345, + "loss_ce": 5.944071017438546e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.00982666015625, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 486803160, + "step": 8688 + }, + { + "epoch": 19.351893095768375, + "grad_norm": 17.674320220947266, + "learning_rate": 1e-06, + "loss": 0.4622, + "num_input_tokens_seen": 486861400, + "step": 8689 + }, + { + "epoch": 19.351893095768375, + "loss": 0.45283421874046326, + "loss_ce": 7.542921230196953e-05, + "loss_iou": 0.181640625, + "loss_num": 0.017822265625, + "loss_xval": 0.453125, + "num_input_tokens_seen": 486861400, + "step": 8689 + }, + { + "epoch": 19.35412026726058, + "grad_norm": 24.226484298706055, + "learning_rate": 1e-06, + "loss": 0.5093, + "num_input_tokens_seen": 486913828, + "step": 8690 + }, + { + "epoch": 19.35412026726058, + "loss": 0.6187217235565186, + "loss_ce": 6.936366116860881e-05, + "loss_iou": 0.283203125, + "loss_num": 0.01068115234375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 486913828, + "step": 8690 + }, + { + "epoch": 19.356347438752785, + "grad_norm": 22.290658950805664, + "learning_rate": 1e-06, + "loss": 0.4345, + "num_input_tokens_seen": 486968612, + "step": 8691 + }, + { + "epoch": 19.356347438752785, + "loss": 0.3384547233581543, + "loss_ce": 7.58438982302323e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.00567626953125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 486968612, + "step": 8691 + }, + { + "epoch": 19.35857461024499, + "grad_norm": 17.18849754333496, + "learning_rate": 1e-06, + "loss": 0.3679, + "num_input_tokens_seen": 487024476, + "step": 8692 + }, + { + "epoch": 19.35857461024499, + "loss": 0.46119123697280884, + "loss_ce": 0.00013169522571843117, + "loss_iou": 0.2041015625, + "loss_num": 0.01068115234375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 487024476, + "step": 8692 + }, + { + "epoch": 19.360801781737194, + "grad_norm": 21.173423767089844, + "learning_rate": 1e-06, + "loss": 0.3566, + "num_input_tokens_seen": 487081268, + "step": 8693 + }, + { + "epoch": 19.360801781737194, + "loss": 0.43475276231765747, + "loss_ce": 6.03883781877812e-05, + "loss_iou": 0.1796875, + "loss_num": 0.01519775390625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 487081268, + "step": 8693 + }, + { + "epoch": 19.3630289532294, + "grad_norm": 19.107248306274414, + "learning_rate": 1e-06, + "loss": 0.3304, + "num_input_tokens_seen": 487133808, + "step": 8694 + }, + { + "epoch": 19.3630289532294, + "loss": 0.30717021226882935, + "loss_ce": 5.656223220285028e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.005706787109375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 487133808, + "step": 8694 + }, + { + "epoch": 19.365256124721604, + "grad_norm": 18.290464401245117, + "learning_rate": 1e-06, + "loss": 0.3325, + "num_input_tokens_seen": 487191508, + "step": 8695 + }, + { + "epoch": 19.365256124721604, + "loss": 0.41119277477264404, + "loss_ce": 5.9931116993539035e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.0067138671875, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 487191508, + "step": 8695 + }, + { + "epoch": 19.36748329621381, + "grad_norm": 20.532798767089844, + "learning_rate": 1e-06, + "loss": 0.4025, + "num_input_tokens_seen": 487248060, + "step": 8696 + }, + { + "epoch": 19.36748329621381, + "loss": 0.24934418499469757, + "loss_ce": 7.660340634174645e-05, + "loss_iou": 0.11328125, + "loss_num": 0.004486083984375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 487248060, + "step": 8696 + }, + { + "epoch": 19.369710467706014, + "grad_norm": 21.59139633178711, + "learning_rate": 1e-06, + "loss": 0.4072, + "num_input_tokens_seen": 487304516, + "step": 8697 + }, + { + "epoch": 19.369710467706014, + "loss": 0.503758430480957, + "loss_ce": 9.630977001506835e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0181884765625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 487304516, + "step": 8697 + }, + { + "epoch": 19.37193763919822, + "grad_norm": 18.260494232177734, + "learning_rate": 1e-06, + "loss": 0.3184, + "num_input_tokens_seen": 487360040, + "step": 8698 + }, + { + "epoch": 19.37193763919822, + "loss": 0.3482136130332947, + "loss_ce": 6.908161594765261e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.009765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 487360040, + "step": 8698 + }, + { + "epoch": 19.374164810690424, + "grad_norm": 13.985121726989746, + "learning_rate": 1e-06, + "loss": 0.2731, + "num_input_tokens_seen": 487413428, + "step": 8699 + }, + { + "epoch": 19.374164810690424, + "loss": 0.23252707719802856, + "loss_ce": 7.46890582377091e-05, + "loss_iou": 0.107421875, + "loss_num": 0.00347900390625, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 487413428, + "step": 8699 + }, + { + "epoch": 19.37639198218263, + "grad_norm": 23.42201805114746, + "learning_rate": 1e-06, + "loss": 0.4771, + "num_input_tokens_seen": 487468832, + "step": 8700 + }, + { + "epoch": 19.37639198218263, + "loss": 0.4661286175251007, + "loss_ce": 6.414036033675075e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0107421875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 487468832, + "step": 8700 + }, + { + "epoch": 19.378619153674833, + "grad_norm": 18.14457893371582, + "learning_rate": 1e-06, + "loss": 0.3369, + "num_input_tokens_seen": 487525960, + "step": 8701 + }, + { + "epoch": 19.378619153674833, + "loss": 0.3606569170951843, + "loss_ce": 0.00012223681551404297, + "loss_iou": 0.14453125, + "loss_num": 0.01446533203125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 487525960, + "step": 8701 + }, + { + "epoch": 19.380846325167038, + "grad_norm": 11.536683082580566, + "learning_rate": 1e-06, + "loss": 0.3009, + "num_input_tokens_seen": 487583988, + "step": 8702 + }, + { + "epoch": 19.380846325167038, + "loss": 0.3344113230705261, + "loss_ce": 6.077022408135235e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.01318359375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 487583988, + "step": 8702 + }, + { + "epoch": 19.383073496659243, + "grad_norm": 21.613056182861328, + "learning_rate": 1e-06, + "loss": 0.3386, + "num_input_tokens_seen": 487636664, + "step": 8703 + }, + { + "epoch": 19.383073496659243, + "loss": 0.3694436550140381, + "loss_ce": 8.942555723479018e-05, + "loss_iou": 0.14453125, + "loss_num": 0.0157470703125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 487636664, + "step": 8703 + }, + { + "epoch": 19.385300668151448, + "grad_norm": 22.000041961669922, + "learning_rate": 1e-06, + "loss": 0.3079, + "num_input_tokens_seen": 487694000, + "step": 8704 + }, + { + "epoch": 19.385300668151448, + "loss": 0.23017969727516174, + "loss_ce": 7.716739492025226e-05, + "loss_iou": 0.10693359375, + "loss_num": 0.003326416015625, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 487694000, + "step": 8704 + }, + { + "epoch": 19.387527839643653, + "grad_norm": 13.974677085876465, + "learning_rate": 1e-06, + "loss": 0.3866, + "num_input_tokens_seen": 487749452, + "step": 8705 + }, + { + "epoch": 19.387527839643653, + "loss": 0.27674055099487305, + "loss_ce": 6.818344991188496e-05, + "loss_iou": 0.12890625, + "loss_num": 0.003692626953125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 487749452, + "step": 8705 + }, + { + "epoch": 19.389755011135858, + "grad_norm": 24.663671493530273, + "learning_rate": 1e-06, + "loss": 0.3754, + "num_input_tokens_seen": 487803040, + "step": 8706 + }, + { + "epoch": 19.389755011135858, + "loss": 0.4255967140197754, + "loss_ce": 5.9576763305813074e-05, + "loss_iou": 0.19140625, + "loss_num": 0.00836181640625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 487803040, + "step": 8706 + }, + { + "epoch": 19.391982182628063, + "grad_norm": 15.723613739013672, + "learning_rate": 1e-06, + "loss": 0.385, + "num_input_tokens_seen": 487861660, + "step": 8707 + }, + { + "epoch": 19.391982182628063, + "loss": 0.502314031124115, + "loss_ce": 0.0001167724549304694, + "loss_iou": 0.2216796875, + "loss_num": 0.0115966796875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 487861660, + "step": 8707 + }, + { + "epoch": 19.394209354120267, + "grad_norm": 18.70146369934082, + "learning_rate": 1e-06, + "loss": 0.4727, + "num_input_tokens_seen": 487917728, + "step": 8708 + }, + { + "epoch": 19.394209354120267, + "loss": 0.4696964621543884, + "loss_ce": 9.19793383218348e-05, + "loss_iou": 0.197265625, + "loss_num": 0.0150146484375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 487917728, + "step": 8708 + }, + { + "epoch": 19.396436525612472, + "grad_norm": 18.742565155029297, + "learning_rate": 1e-06, + "loss": 0.5473, + "num_input_tokens_seen": 487972684, + "step": 8709 + }, + { + "epoch": 19.396436525612472, + "loss": 0.47677797079086304, + "loss_ce": 9.340511314803734e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 487972684, + "step": 8709 + }, + { + "epoch": 19.398663697104677, + "grad_norm": 79.24798583984375, + "learning_rate": 1e-06, + "loss": 0.477, + "num_input_tokens_seen": 488027984, + "step": 8710 + }, + { + "epoch": 19.398663697104677, + "loss": 0.7484385967254639, + "loss_ce": 8.648347284179181e-05, + "loss_iou": 0.271484375, + "loss_num": 0.040771484375, + "loss_xval": 0.75, + "num_input_tokens_seen": 488027984, + "step": 8710 + }, + { + "epoch": 19.400890868596882, + "grad_norm": 23.097761154174805, + "learning_rate": 1e-06, + "loss": 0.4981, + "num_input_tokens_seen": 488084492, + "step": 8711 + }, + { + "epoch": 19.400890868596882, + "loss": 0.5120543241500854, + "loss_ce": 9.138373570749536e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.021728515625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 488084492, + "step": 8711 + }, + { + "epoch": 19.403118040089087, + "grad_norm": 26.856395721435547, + "learning_rate": 1e-06, + "loss": 0.5109, + "num_input_tokens_seen": 488139008, + "step": 8712 + }, + { + "epoch": 19.403118040089087, + "loss": 0.4314088821411133, + "loss_ce": 7.342306344071403e-05, + "loss_iou": 0.193359375, + "loss_num": 0.00885009765625, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 488139008, + "step": 8712 + }, + { + "epoch": 19.40534521158129, + "grad_norm": 17.314449310302734, + "learning_rate": 1e-06, + "loss": 0.443, + "num_input_tokens_seen": 488193600, + "step": 8713 + }, + { + "epoch": 19.40534521158129, + "loss": 0.39160478115081787, + "loss_ce": 6.424939056159928e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.00823974609375, + "loss_xval": 0.390625, + "num_input_tokens_seen": 488193600, + "step": 8713 + }, + { + "epoch": 19.407572383073497, + "grad_norm": 25.597942352294922, + "learning_rate": 1e-06, + "loss": 0.2987, + "num_input_tokens_seen": 488251452, + "step": 8714 + }, + { + "epoch": 19.407572383073497, + "loss": 0.3611512780189514, + "loss_ce": 6.729191227350384e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00811767578125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 488251452, + "step": 8714 + }, + { + "epoch": 19.4097995545657, + "grad_norm": 25.324542999267578, + "learning_rate": 1e-06, + "loss": 0.4442, + "num_input_tokens_seen": 488307252, + "step": 8715 + }, + { + "epoch": 19.4097995545657, + "loss": 0.5760635137557983, + "loss_ce": 7.472602010238916e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.019287109375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 488307252, + "step": 8715 + }, + { + "epoch": 19.412026726057906, + "grad_norm": 17.964107513427734, + "learning_rate": 1e-06, + "loss": 0.3626, + "num_input_tokens_seen": 488363092, + "step": 8716 + }, + { + "epoch": 19.412026726057906, + "loss": 0.3571329712867737, + "loss_ce": 7.73169522290118e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.00836181640625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 488363092, + "step": 8716 + }, + { + "epoch": 19.41425389755011, + "grad_norm": 16.54916763305664, + "learning_rate": 1e-06, + "loss": 0.2583, + "num_input_tokens_seen": 488420924, + "step": 8717 + }, + { + "epoch": 19.41425389755011, + "loss": 0.1723698377609253, + "loss_ce": 6.760148971807212e-05, + "loss_iou": 0.068359375, + "loss_num": 0.00714111328125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 488420924, + "step": 8717 + }, + { + "epoch": 19.416481069042316, + "grad_norm": 31.5208683013916, + "learning_rate": 1e-06, + "loss": 0.5982, + "num_input_tokens_seen": 488473012, + "step": 8718 + }, + { + "epoch": 19.416481069042316, + "loss": 0.48655349016189575, + "loss_ce": 0.00010326504707336426, + "loss_iou": 0.20703125, + "loss_num": 0.01434326171875, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 488473012, + "step": 8718 + }, + { + "epoch": 19.41870824053452, + "grad_norm": 14.7725248336792, + "learning_rate": 1e-06, + "loss": 0.3455, + "num_input_tokens_seen": 488529484, + "step": 8719 + }, + { + "epoch": 19.41870824053452, + "loss": 0.34601306915283203, + "loss_ce": 6.580428453162313e-05, + "loss_iou": 0.16015625, + "loss_num": 0.005126953125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 488529484, + "step": 8719 + }, + { + "epoch": 19.420935412026726, + "grad_norm": 18.437286376953125, + "learning_rate": 1e-06, + "loss": 0.384, + "num_input_tokens_seen": 488587032, + "step": 8720 + }, + { + "epoch": 19.420935412026726, + "loss": 0.30788394808769226, + "loss_ce": 0.0001446868700440973, + "loss_iou": 0.1259765625, + "loss_num": 0.0113525390625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 488587032, + "step": 8720 + }, + { + "epoch": 19.42316258351893, + "grad_norm": 18.742013931274414, + "learning_rate": 1e-06, + "loss": 0.6236, + "num_input_tokens_seen": 488643196, + "step": 8721 + }, + { + "epoch": 19.42316258351893, + "loss": 0.742760181427002, + "loss_ce": 8.442148100584745e-05, + "loss_iou": 0.298828125, + "loss_num": 0.029052734375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 488643196, + "step": 8721 + }, + { + "epoch": 19.425389755011135, + "grad_norm": 20.891321182250977, + "learning_rate": 1e-06, + "loss": 0.4322, + "num_input_tokens_seen": 488699628, + "step": 8722 + }, + { + "epoch": 19.425389755011135, + "loss": 0.44647514820098877, + "loss_ce": 6.399239646270871e-05, + "loss_iou": 0.20703125, + "loss_num": 0.006561279296875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 488699628, + "step": 8722 + }, + { + "epoch": 19.42761692650334, + "grad_norm": 11.5533447265625, + "learning_rate": 1e-06, + "loss": 0.4345, + "num_input_tokens_seen": 488756332, + "step": 8723 + }, + { + "epoch": 19.42761692650334, + "loss": 0.4649144411087036, + "loss_ce": 7.067194383125752e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.0174560546875, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 488756332, + "step": 8723 + }, + { + "epoch": 19.429844097995545, + "grad_norm": 27.625247955322266, + "learning_rate": 1e-06, + "loss": 0.3431, + "num_input_tokens_seen": 488812856, + "step": 8724 + }, + { + "epoch": 19.429844097995545, + "loss": 0.4067014753818512, + "loss_ce": 8.526656165486202e-05, + "loss_iou": 0.17578125, + "loss_num": 0.01080322265625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 488812856, + "step": 8724 + }, + { + "epoch": 19.43207126948775, + "grad_norm": 17.141159057617188, + "learning_rate": 1e-06, + "loss": 0.3232, + "num_input_tokens_seen": 488866352, + "step": 8725 + }, + { + "epoch": 19.43207126948775, + "loss": 0.34912025928497314, + "loss_ce": 6.021084118401632e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00982666015625, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 488866352, + "step": 8725 + }, + { + "epoch": 19.434298440979955, + "grad_norm": 18.241275787353516, + "learning_rate": 1e-06, + "loss": 0.36, + "num_input_tokens_seen": 488925556, + "step": 8726 + }, + { + "epoch": 19.434298440979955, + "loss": 0.3274751305580139, + "loss_ce": 8.253618580056354e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.004791259765625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 488925556, + "step": 8726 + }, + { + "epoch": 19.43652561247216, + "grad_norm": 61.344459533691406, + "learning_rate": 1e-06, + "loss": 0.3267, + "num_input_tokens_seen": 488983256, + "step": 8727 + }, + { + "epoch": 19.43652561247216, + "loss": 0.4625932276248932, + "loss_ce": 6.8813213147223e-05, + "loss_iou": 0.197265625, + "loss_num": 0.01336669921875, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 488983256, + "step": 8727 + }, + { + "epoch": 19.438752783964365, + "grad_norm": 24.339759826660156, + "learning_rate": 1e-06, + "loss": 0.4207, + "num_input_tokens_seen": 489039964, + "step": 8728 + }, + { + "epoch": 19.438752783964365, + "loss": 0.4445319175720215, + "loss_ce": 7.392082625301555e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.0047607421875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 489039964, + "step": 8728 + }, + { + "epoch": 19.44097995545657, + "grad_norm": 14.037641525268555, + "learning_rate": 1e-06, + "loss": 0.4557, + "num_input_tokens_seen": 489095200, + "step": 8729 + }, + { + "epoch": 19.44097995545657, + "loss": 0.36377742886543274, + "loss_ce": 6.892348756082356e-05, + "loss_iou": 0.140625, + "loss_num": 0.016357421875, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 489095200, + "step": 8729 + }, + { + "epoch": 19.443207126948774, + "grad_norm": 30.27907943725586, + "learning_rate": 1e-06, + "loss": 0.3208, + "num_input_tokens_seen": 489150700, + "step": 8730 + }, + { + "epoch": 19.443207126948774, + "loss": 0.3462543785572052, + "loss_ce": 6.298153311945498e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.006317138671875, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 489150700, + "step": 8730 + }, + { + "epoch": 19.44543429844098, + "grad_norm": 13.437280654907227, + "learning_rate": 1e-06, + "loss": 0.512, + "num_input_tokens_seen": 489206188, + "step": 8731 + }, + { + "epoch": 19.44543429844098, + "loss": 0.36061540246009827, + "loss_ce": 8.075297955656424e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.01007080078125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 489206188, + "step": 8731 + }, + { + "epoch": 19.447661469933184, + "grad_norm": 23.991952896118164, + "learning_rate": 1e-06, + "loss": 0.3874, + "num_input_tokens_seen": 489260872, + "step": 8732 + }, + { + "epoch": 19.447661469933184, + "loss": 0.3317917287349701, + "loss_ce": 6.565573858097196e-05, + "loss_iou": 0.14453125, + "loss_num": 0.00848388671875, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 489260872, + "step": 8732 + }, + { + "epoch": 19.44988864142539, + "grad_norm": 14.908693313598633, + "learning_rate": 1e-06, + "loss": 0.3623, + "num_input_tokens_seen": 489317248, + "step": 8733 + }, + { + "epoch": 19.44988864142539, + "loss": 0.30453062057495117, + "loss_ce": 8.725294173927978e-05, + "loss_iou": 0.134765625, + "loss_num": 0.007080078125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 489317248, + "step": 8733 + }, + { + "epoch": 19.452115812917594, + "grad_norm": 72.13795471191406, + "learning_rate": 1e-06, + "loss": 0.2718, + "num_input_tokens_seen": 489374240, + "step": 8734 + }, + { + "epoch": 19.452115812917594, + "loss": 0.3349186182022095, + "loss_ce": 7.976653432706371e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.004302978515625, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 489374240, + "step": 8734 + }, + { + "epoch": 19.4543429844098, + "grad_norm": 15.769805908203125, + "learning_rate": 1e-06, + "loss": 0.3651, + "num_input_tokens_seen": 489432756, + "step": 8735 + }, + { + "epoch": 19.4543429844098, + "loss": 0.3257424235343933, + "loss_ce": 5.8816825912799686e-05, + "loss_iou": 0.1328125, + "loss_num": 0.01202392578125, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 489432756, + "step": 8735 + }, + { + "epoch": 19.456570155902003, + "grad_norm": 134.32081604003906, + "learning_rate": 1e-06, + "loss": 0.3294, + "num_input_tokens_seen": 489488532, + "step": 8736 + }, + { + "epoch": 19.456570155902003, + "loss": 0.30285748839378357, + "loss_ce": 6.207545811776072e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.0068359375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 489488532, + "step": 8736 + }, + { + "epoch": 19.45879732739421, + "grad_norm": 19.14018440246582, + "learning_rate": 1e-06, + "loss": 0.4107, + "num_input_tokens_seen": 489541320, + "step": 8737 + }, + { + "epoch": 19.45879732739421, + "loss": 0.36432066559791565, + "loss_ce": 6.283754191827029e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.00836181640625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 489541320, + "step": 8737 + }, + { + "epoch": 19.461024498886413, + "grad_norm": 17.099512100219727, + "learning_rate": 1e-06, + "loss": 0.3985, + "num_input_tokens_seen": 489597048, + "step": 8738 + }, + { + "epoch": 19.461024498886413, + "loss": 0.45394012331962585, + "loss_ce": 8.270963735412806e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.01544189453125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 489597048, + "step": 8738 + }, + { + "epoch": 19.463251670378618, + "grad_norm": 17.23607063293457, + "learning_rate": 1e-06, + "loss": 0.44, + "num_input_tokens_seen": 489653544, + "step": 8739 + }, + { + "epoch": 19.463251670378618, + "loss": 0.47176241874694824, + "loss_ce": 8.272690320154652e-05, + "loss_iou": 0.212890625, + "loss_num": 0.009033203125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 489653544, + "step": 8739 + }, + { + "epoch": 19.465478841870823, + "grad_norm": 25.947065353393555, + "learning_rate": 1e-06, + "loss": 0.5785, + "num_input_tokens_seen": 489710516, + "step": 8740 + }, + { + "epoch": 19.465478841870823, + "loss": 0.40167826414108276, + "loss_ce": 6.691856833640486e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.00811767578125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 489710516, + "step": 8740 + }, + { + "epoch": 19.467706013363028, + "grad_norm": 15.991206169128418, + "learning_rate": 1e-06, + "loss": 0.2839, + "num_input_tokens_seen": 489766984, + "step": 8741 + }, + { + "epoch": 19.467706013363028, + "loss": 0.25690385699272156, + "loss_ce": 6.792375643271953e-05, + "loss_iou": 0.1142578125, + "loss_num": 0.0057373046875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 489766984, + "step": 8741 + }, + { + "epoch": 19.469933184855233, + "grad_norm": 12.759182929992676, + "learning_rate": 1e-06, + "loss": 0.3062, + "num_input_tokens_seen": 489823144, + "step": 8742 + }, + { + "epoch": 19.469933184855233, + "loss": 0.3527446389198303, + "loss_ce": 0.0001140242675319314, + "loss_iou": 0.15625, + "loss_num": 0.0079345703125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 489823144, + "step": 8742 + }, + { + "epoch": 19.472160356347437, + "grad_norm": 18.29888343811035, + "learning_rate": 1e-06, + "loss": 0.336, + "num_input_tokens_seen": 489878156, + "step": 8743 + }, + { + "epoch": 19.472160356347437, + "loss": 0.3941093683242798, + "loss_ce": 6.639507773797959e-05, + "loss_iou": 0.1796875, + "loss_num": 0.007232666015625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 489878156, + "step": 8743 + }, + { + "epoch": 19.474387527839642, + "grad_norm": 27.575145721435547, + "learning_rate": 1e-06, + "loss": 0.3742, + "num_input_tokens_seen": 489932784, + "step": 8744 + }, + { + "epoch": 19.474387527839642, + "loss": 0.45381438732147217, + "loss_ce": 7.904722588136792e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.01214599609375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 489932784, + "step": 8744 + }, + { + "epoch": 19.476614699331847, + "grad_norm": 16.64528465270996, + "learning_rate": 1e-06, + "loss": 0.3247, + "num_input_tokens_seen": 489988008, + "step": 8745 + }, + { + "epoch": 19.476614699331847, + "loss": 0.37039196491241455, + "loss_ce": 7.641837873961776e-05, + "loss_iou": 0.12353515625, + "loss_num": 0.024658203125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 489988008, + "step": 8745 + }, + { + "epoch": 19.478841870824052, + "grad_norm": 12.457158088684082, + "learning_rate": 1e-06, + "loss": 0.4706, + "num_input_tokens_seen": 490045656, + "step": 8746 + }, + { + "epoch": 19.478841870824052, + "loss": 0.576514482498169, + "loss_ce": 9.850240166997537e-05, + "loss_iou": 0.212890625, + "loss_num": 0.0301513671875, + "loss_xval": 0.578125, + "num_input_tokens_seen": 490045656, + "step": 8746 + }, + { + "epoch": 19.481069042316257, + "grad_norm": 29.269418716430664, + "learning_rate": 1e-06, + "loss": 0.5014, + "num_input_tokens_seen": 490100304, + "step": 8747 + }, + { + "epoch": 19.481069042316257, + "loss": 0.4481244385242462, + "loss_ce": 6.533482519444078e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01025390625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 490100304, + "step": 8747 + }, + { + "epoch": 19.48329621380846, + "grad_norm": 17.77977752685547, + "learning_rate": 1e-06, + "loss": 0.3385, + "num_input_tokens_seen": 490155632, + "step": 8748 + }, + { + "epoch": 19.48329621380846, + "loss": 0.24175959825515747, + "loss_ce": 6.037494677002542e-05, + "loss_iou": 0.1015625, + "loss_num": 0.00762939453125, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 490155632, + "step": 8748 + }, + { + "epoch": 19.485523385300667, + "grad_norm": 16.200801849365234, + "learning_rate": 1e-06, + "loss": 0.3294, + "num_input_tokens_seen": 490213296, + "step": 8749 + }, + { + "epoch": 19.485523385300667, + "loss": 0.3568875789642334, + "loss_ce": 7.605206337757409e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.0047607421875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 490213296, + "step": 8749 + }, + { + "epoch": 19.48775055679287, + "grad_norm": 15.862672805786133, + "learning_rate": 1e-06, + "loss": 0.3418, + "num_input_tokens_seen": 490266248, + "step": 8750 + }, + { + "epoch": 19.48775055679287, + "eval_seeclick_web_CIoU": 0.5831195414066315, + "eval_seeclick_web_GIoU": 0.5824593603610992, + "eval_seeclick_web_IoU": 0.6025447845458984, + "eval_seeclick_web_MAE_all": 0.0153358387760818, + "eval_seeclick_web_MAE_h": 0.0069596245884895325, + "eval_seeclick_web_MAE_w": 0.015038002282381058, + "eval_seeclick_web_MAE_x_boxes": 0.008386612171307206, + "eval_seeclick_web_MAE_y_boxes": 0.02156838239170611, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 0.9198112487792969, + "eval_seeclick_web_loss_ce": 0.00012025472460663877, + "eval_seeclick_web_loss_iou": 0.4251708984375, + "eval_seeclick_web_loss_num": 0.012319564819335938, + "eval_seeclick_web_loss_xval": 0.912109375, + "eval_seeclick_web_runtime": 21.1555, + "eval_seeclick_web_samples_per_second": 2.363, + "eval_seeclick_web_steps_per_second": 0.095, + "num_input_tokens_seen": 490266248, + "step": 8750 + }, + { + "epoch": 19.48775055679287, + "eval_icons_CIoU": 0.26400046050548553, + "eval_icons_GIoU": 0.299440860748291, + "eval_icons_IoU": 0.34618473052978516, + "eval_icons_MAE_all": 0.062128059566020966, + "eval_icons_MAE_h": 0.029708989895880222, + "eval_icons_MAE_w": 0.06808132492005825, + "eval_icons_MAE_x_boxes": 0.06109212152659893, + "eval_icons_MAE_y_boxes": 0.03715028055012226, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7176454067230225, + "eval_icons_loss_ce": 0.00013828581722918898, + "eval_icons_loss_iou": 0.673095703125, + "eval_icons_loss_num": 0.06012725830078125, + "eval_icons_loss_xval": 1.6474609375, + "eval_icons_runtime": 18.377, + "eval_icons_samples_per_second": 2.721, + "eval_icons_steps_per_second": 0.109, + "num_input_tokens_seen": 490266248, + "step": 8750 + }, + { + "epoch": 19.48775055679287, + "eval_screenspot_CIoU": 0.37087904413541156, + "eval_screenspot_GIoU": 0.3902689814567566, + "eval_screenspot_IoU": 0.4464185933272044, + "eval_screenspot_MAE_all": 0.058752829829851784, + "eval_screenspot_MAE_h": 0.039602997402350106, + "eval_screenspot_MAE_w": 0.06322793414195378, + "eval_screenspot_MAE_x_boxes": 0.0708691483984391, + "eval_screenspot_MAE_y_boxes": 0.04212713334709406, + "eval_screenspot_inside_bbox": 0.7041666706403097, + "eval_screenspot_loss": 1.5793300867080688, + "eval_screenspot_loss_ce": 0.00015701642648006478, + "eval_screenspot_loss_iou": 0.6509602864583334, + "eval_screenspot_loss_num": 0.06783040364583333, + "eval_screenspot_loss_xval": 1.640625, + "eval_screenspot_runtime": 31.0289, + "eval_screenspot_samples_per_second": 2.868, + "eval_screenspot_steps_per_second": 0.097, + "num_input_tokens_seen": 490266248, + "step": 8750 + }, + { + "epoch": 19.48775055679287, + "eval_compot_CIoU": 0.34746964275836945, + "eval_compot_GIoU": 0.3572629541158676, + "eval_compot_IoU": 0.40467870235443115, + "eval_compot_MAE_all": 0.018142362125217915, + "eval_compot_MAE_h": 0.009449589531868696, + "eval_compot_MAE_w": 0.020473646000027657, + "eval_compot_MAE_x_boxes": 0.030018706806004047, + "eval_compot_MAE_y_boxes": 0.006989206187427044, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3744853734970093, + "eval_compot_loss_ce": 0.00010999454752891324, + "eval_compot_loss_iou": 0.6365966796875, + "eval_compot_loss_num": 0.016666412353515625, + "eval_compot_loss_xval": 1.356689453125, + "eval_compot_runtime": 19.0377, + "eval_compot_samples_per_second": 2.626, + "eval_compot_steps_per_second": 0.105, + "num_input_tokens_seen": 490266248, + "step": 8750 + }, + { + "epoch": 19.48775055679287, + "eval_custom_ui_val_CIoU": 0.4767077672812674, + "eval_custom_ui_val_GIoU": 0.481793655289544, + "eval_custom_ui_val_IoU": 0.5382914178901248, + "eval_custom_ui_val_MAE_all": 0.026706857451548178, + "eval_custom_ui_val_MAE_h": 0.013321074657142162, + "eval_custom_ui_val_MAE_w": 0.03640370096804367, + "eval_custom_ui_val_MAE_x_boxes": 0.032115884642634124, + "eval_custom_ui_val_MAE_y_boxes": 0.013279020553454757, + "eval_custom_ui_val_inside_bbox": 0.7754629651705424, + "eval_custom_ui_val_loss": 1.1639938354492188, + "eval_custom_ui_val_loss_ce": 0.00012037252761527068, + "eval_custom_ui_val_loss_iou": 0.5000542534722222, + "eval_custom_ui_val_loss_num": 0.023473739624023438, + "eval_custom_ui_val_loss_xval": 1.1174858940972223, + "eval_custom_ui_val_runtime": 57.2523, + "eval_custom_ui_val_samples_per_second": 4.629, + "eval_custom_ui_val_steps_per_second": 0.157, + "num_input_tokens_seen": 490266248, + "step": 8750 + }, + { + "epoch": 19.48775055679287, + "loss": 0.8289517164230347, + "loss_ce": 9.427011536899954e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0164794921875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 490266248, + "step": 8750 + }, + { + "epoch": 19.489977728285076, + "grad_norm": 16.564908981323242, + "learning_rate": 1e-06, + "loss": 0.3765, + "num_input_tokens_seen": 490324400, + "step": 8751 + }, + { + "epoch": 19.489977728285076, + "loss": 0.39094218611717224, + "loss_ce": 7.304349128389731e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0068359375, + "loss_xval": 0.390625, + "num_input_tokens_seen": 490324400, + "step": 8751 + }, + { + "epoch": 19.49220489977728, + "grad_norm": 18.350833892822266, + "learning_rate": 1e-06, + "loss": 0.4225, + "num_input_tokens_seen": 490380904, + "step": 8752 + }, + { + "epoch": 19.49220489977728, + "loss": 0.43350207805633545, + "loss_ce": 6.09189628448803e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.01361083984375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 490380904, + "step": 8752 + }, + { + "epoch": 19.494432071269486, + "grad_norm": 21.1101016998291, + "learning_rate": 1e-06, + "loss": 0.5412, + "num_input_tokens_seen": 490436388, + "step": 8753 + }, + { + "epoch": 19.494432071269486, + "loss": 0.5999248027801514, + "loss_ce": 7.128326979000121e-05, + "loss_iou": 0.25, + "loss_num": 0.0196533203125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 490436388, + "step": 8753 + }, + { + "epoch": 19.49665924276169, + "grad_norm": 20.924692153930664, + "learning_rate": 1e-06, + "loss": 0.3887, + "num_input_tokens_seen": 490491856, + "step": 8754 + }, + { + "epoch": 19.49665924276169, + "loss": 0.5576558113098145, + "loss_ce": 0.00028280686819925904, + "loss_iou": 0.2470703125, + "loss_num": 0.0128173828125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 490491856, + "step": 8754 + }, + { + "epoch": 19.498886414253896, + "grad_norm": 16.07146644592285, + "learning_rate": 1e-06, + "loss": 0.3163, + "num_input_tokens_seen": 490547508, + "step": 8755 + }, + { + "epoch": 19.498886414253896, + "loss": 0.35533058643341064, + "loss_ce": 7.546801498392597e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.0059814453125, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 490547508, + "step": 8755 + }, + { + "epoch": 19.501113585746104, + "grad_norm": 17.8480167388916, + "learning_rate": 1e-06, + "loss": 0.3092, + "num_input_tokens_seen": 490602244, + "step": 8756 + }, + { + "epoch": 19.501113585746104, + "loss": 0.3217475414276123, + "loss_ce": 9.226159454556182e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.007598876953125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 490602244, + "step": 8756 + }, + { + "epoch": 19.50334075723831, + "grad_norm": 20.913305282592773, + "learning_rate": 1e-06, + "loss": 0.4527, + "num_input_tokens_seen": 490659044, + "step": 8757 + }, + { + "epoch": 19.50334075723831, + "loss": 0.49548420310020447, + "loss_ce": 6.180736818350852e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0224609375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 490659044, + "step": 8757 + }, + { + "epoch": 19.505567928730514, + "grad_norm": 18.693899154663086, + "learning_rate": 1e-06, + "loss": 0.2973, + "num_input_tokens_seen": 490715388, + "step": 8758 + }, + { + "epoch": 19.505567928730514, + "loss": 0.19214215874671936, + "loss_ce": 6.452086381614208e-05, + "loss_iou": 0.08056640625, + "loss_num": 0.006256103515625, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 490715388, + "step": 8758 + }, + { + "epoch": 19.50779510022272, + "grad_norm": 22.483903884887695, + "learning_rate": 1e-06, + "loss": 0.4002, + "num_input_tokens_seen": 490771780, + "step": 8759 + }, + { + "epoch": 19.50779510022272, + "loss": 0.40626299381256104, + "loss_ce": 7.403266499750316e-05, + "loss_iou": 0.181640625, + "loss_num": 0.00860595703125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 490771780, + "step": 8759 + }, + { + "epoch": 19.510022271714924, + "grad_norm": 14.928621292114258, + "learning_rate": 1e-06, + "loss": 0.3059, + "num_input_tokens_seen": 490829264, + "step": 8760 + }, + { + "epoch": 19.510022271714924, + "loss": 0.32245999574661255, + "loss_ce": 7.230706978589296e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.00927734375, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 490829264, + "step": 8760 + }, + { + "epoch": 19.51224944320713, + "grad_norm": 40.57841491699219, + "learning_rate": 1e-06, + "loss": 0.4451, + "num_input_tokens_seen": 490885112, + "step": 8761 + }, + { + "epoch": 19.51224944320713, + "loss": 0.2985847294330597, + "loss_ce": 6.179253978189081e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.00830078125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 490885112, + "step": 8761 + }, + { + "epoch": 19.514476614699333, + "grad_norm": 26.560928344726562, + "learning_rate": 1e-06, + "loss": 0.376, + "num_input_tokens_seen": 490940464, + "step": 8762 + }, + { + "epoch": 19.514476614699333, + "loss": 0.35675495862960815, + "loss_ce": 6.548444798681885e-05, + "loss_iou": 0.1640625, + "loss_num": 0.00579833984375, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 490940464, + "step": 8762 + }, + { + "epoch": 19.51670378619154, + "grad_norm": 14.288774490356445, + "learning_rate": 1e-06, + "loss": 0.4027, + "num_input_tokens_seen": 490995184, + "step": 8763 + }, + { + "epoch": 19.51670378619154, + "loss": 0.2923039197921753, + "loss_ce": 6.761118129361421e-05, + "loss_iou": 0.12890625, + "loss_num": 0.00677490234375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 490995184, + "step": 8763 + }, + { + "epoch": 19.518930957683743, + "grad_norm": 23.1356201171875, + "learning_rate": 1e-06, + "loss": 0.3756, + "num_input_tokens_seen": 491051012, + "step": 8764 + }, + { + "epoch": 19.518930957683743, + "loss": 0.41393959522247314, + "loss_ce": 6.0199585277587175e-05, + "loss_iou": 0.16796875, + "loss_num": 0.015380859375, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 491051012, + "step": 8764 + }, + { + "epoch": 19.521158129175948, + "grad_norm": 18.624935150146484, + "learning_rate": 1e-06, + "loss": 0.3166, + "num_input_tokens_seen": 491107176, + "step": 8765 + }, + { + "epoch": 19.521158129175948, + "loss": 0.34882843494415283, + "loss_ce": 7.35686844564043e-05, + "loss_iou": 0.16015625, + "loss_num": 0.005462646484375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 491107176, + "step": 8765 + }, + { + "epoch": 19.523385300668153, + "grad_norm": 20.93644905090332, + "learning_rate": 1e-06, + "loss": 0.4879, + "num_input_tokens_seen": 491162216, + "step": 8766 + }, + { + "epoch": 19.523385300668153, + "loss": 0.4118029773235321, + "loss_ce": 5.981113645248115e-05, + "loss_iou": 0.1875, + "loss_num": 0.0074462890625, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 491162216, + "step": 8766 + }, + { + "epoch": 19.525612472160358, + "grad_norm": 15.544754028320312, + "learning_rate": 1e-06, + "loss": 0.4786, + "num_input_tokens_seen": 491217424, + "step": 8767 + }, + { + "epoch": 19.525612472160358, + "loss": 0.5115818381309509, + "loss_ce": 0.00010719275451265275, + "loss_iou": 0.2080078125, + "loss_num": 0.0189208984375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 491217424, + "step": 8767 + }, + { + "epoch": 19.527839643652563, + "grad_norm": 16.272380828857422, + "learning_rate": 1e-06, + "loss": 0.3691, + "num_input_tokens_seen": 491269164, + "step": 8768 + }, + { + "epoch": 19.527839643652563, + "loss": 0.326735258102417, + "loss_ce": 7.508813723688945e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0078125, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 491269164, + "step": 8768 + }, + { + "epoch": 19.530066815144767, + "grad_norm": 19.87215232849121, + "learning_rate": 1e-06, + "loss": 0.276, + "num_input_tokens_seen": 491328140, + "step": 8769 + }, + { + "epoch": 19.530066815144767, + "loss": 0.25873589515686035, + "loss_ce": 6.88969885231927e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.002838134765625, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 491328140, + "step": 8769 + }, + { + "epoch": 19.532293986636972, + "grad_norm": 16.369861602783203, + "learning_rate": 1e-06, + "loss": 0.5755, + "num_input_tokens_seen": 491381900, + "step": 8770 + }, + { + "epoch": 19.532293986636972, + "loss": 0.7968376874923706, + "loss_ce": 8.477165829390287e-05, + "loss_iou": 0.310546875, + "loss_num": 0.035400390625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 491381900, + "step": 8770 + }, + { + "epoch": 19.534521158129177, + "grad_norm": 29.245031356811523, + "learning_rate": 1e-06, + "loss": 0.4296, + "num_input_tokens_seen": 491437320, + "step": 8771 + }, + { + "epoch": 19.534521158129177, + "loss": 0.2833980321884155, + "loss_ce": 7.282086880877614e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.00396728515625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 491437320, + "step": 8771 + }, + { + "epoch": 19.536748329621382, + "grad_norm": 27.237791061401367, + "learning_rate": 1e-06, + "loss": 0.4367, + "num_input_tokens_seen": 491493612, + "step": 8772 + }, + { + "epoch": 19.536748329621382, + "loss": 0.3653065860271454, + "loss_ce": 7.220754923764616e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00543212890625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 491493612, + "step": 8772 + }, + { + "epoch": 19.538975501113587, + "grad_norm": 52.4660758972168, + "learning_rate": 1e-06, + "loss": 0.3087, + "num_input_tokens_seen": 491546616, + "step": 8773 + }, + { + "epoch": 19.538975501113587, + "loss": 0.2954094111919403, + "loss_ce": 6.026863775332458e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00537109375, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 491546616, + "step": 8773 + }, + { + "epoch": 19.54120267260579, + "grad_norm": 20.992326736450195, + "learning_rate": 1e-06, + "loss": 0.4035, + "num_input_tokens_seen": 491598516, + "step": 8774 + }, + { + "epoch": 19.54120267260579, + "loss": 0.37043464183807373, + "loss_ce": 7.328739593503997e-05, + "loss_iou": 0.16015625, + "loss_num": 0.010009765625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 491598516, + "step": 8774 + }, + { + "epoch": 19.543429844097997, + "grad_norm": 17.51434898376465, + "learning_rate": 1e-06, + "loss": 0.3259, + "num_input_tokens_seen": 491655988, + "step": 8775 + }, + { + "epoch": 19.543429844097997, + "loss": 0.3194010257720947, + "loss_ce": 6.507511716336012e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.00555419921875, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 491655988, + "step": 8775 + }, + { + "epoch": 19.5456570155902, + "grad_norm": 19.03451919555664, + "learning_rate": 1e-06, + "loss": 0.3281, + "num_input_tokens_seen": 491711688, + "step": 8776 + }, + { + "epoch": 19.5456570155902, + "loss": 0.26713547110557556, + "loss_ce": 7.614593778271228e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.0045166015625, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 491711688, + "step": 8776 + }, + { + "epoch": 19.547884187082406, + "grad_norm": 26.791309356689453, + "learning_rate": 1e-06, + "loss": 0.4318, + "num_input_tokens_seen": 491765368, + "step": 8777 + }, + { + "epoch": 19.547884187082406, + "loss": 0.45965927839279175, + "loss_ce": 6.455044058384374e-05, + "loss_iou": 0.201171875, + "loss_num": 0.011474609375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 491765368, + "step": 8777 + }, + { + "epoch": 19.55011135857461, + "grad_norm": 39.71892166137695, + "learning_rate": 1e-06, + "loss": 0.4227, + "num_input_tokens_seen": 491822976, + "step": 8778 + }, + { + "epoch": 19.55011135857461, + "loss": 0.439960241317749, + "loss_ce": 7.986063428688794e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0093994140625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 491822976, + "step": 8778 + }, + { + "epoch": 19.552338530066816, + "grad_norm": 14.03285026550293, + "learning_rate": 1e-06, + "loss": 0.3264, + "num_input_tokens_seen": 491879560, + "step": 8779 + }, + { + "epoch": 19.552338530066816, + "loss": 0.28052568435668945, + "loss_ce": 6.915038829902187e-05, + "loss_iou": 0.12451171875, + "loss_num": 0.006256103515625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 491879560, + "step": 8779 + }, + { + "epoch": 19.55456570155902, + "grad_norm": 18.03692626953125, + "learning_rate": 1e-06, + "loss": 0.6105, + "num_input_tokens_seen": 491932316, + "step": 8780 + }, + { + "epoch": 19.55456570155902, + "loss": 0.5739569067955017, + "loss_ce": 0.00010437377204652876, + "loss_iou": 0.23046875, + "loss_num": 0.022705078125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 491932316, + "step": 8780 + }, + { + "epoch": 19.556792873051226, + "grad_norm": 32.511112213134766, + "learning_rate": 1e-06, + "loss": 0.4244, + "num_input_tokens_seen": 491988108, + "step": 8781 + }, + { + "epoch": 19.556792873051226, + "loss": 0.32041823863983154, + "loss_ce": 7.521534280385822e-05, + "loss_iou": 0.138671875, + "loss_num": 0.008544921875, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 491988108, + "step": 8781 + }, + { + "epoch": 19.55902004454343, + "grad_norm": 16.41408920288086, + "learning_rate": 1e-06, + "loss": 0.3266, + "num_input_tokens_seen": 492045888, + "step": 8782 + }, + { + "epoch": 19.55902004454343, + "loss": 0.24403327703475952, + "loss_ce": 7.575724157504737e-05, + "loss_iou": 0.11083984375, + "loss_num": 0.004486083984375, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 492045888, + "step": 8782 + }, + { + "epoch": 19.561247216035635, + "grad_norm": 19.35165786743164, + "learning_rate": 1e-06, + "loss": 0.4494, + "num_input_tokens_seen": 492102372, + "step": 8783 + }, + { + "epoch": 19.561247216035635, + "loss": 0.42511671781539917, + "loss_ce": 6.788804603274912e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.01129150390625, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 492102372, + "step": 8783 + }, + { + "epoch": 19.56347438752784, + "grad_norm": 28.635347366333008, + "learning_rate": 1e-06, + "loss": 0.4119, + "num_input_tokens_seen": 492158548, + "step": 8784 + }, + { + "epoch": 19.56347438752784, + "loss": 0.319175660610199, + "loss_ce": 8.385031105717644e-05, + "loss_iou": 0.138671875, + "loss_num": 0.00823974609375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 492158548, + "step": 8784 + }, + { + "epoch": 19.565701559020045, + "grad_norm": 20.49407386779785, + "learning_rate": 1e-06, + "loss": 0.3606, + "num_input_tokens_seen": 492214608, + "step": 8785 + }, + { + "epoch": 19.565701559020045, + "loss": 0.32773077487945557, + "loss_ce": 9.405831951880828e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.00482177734375, + "loss_xval": 0.328125, + "num_input_tokens_seen": 492214608, + "step": 8785 + }, + { + "epoch": 19.56792873051225, + "grad_norm": 19.137483596801758, + "learning_rate": 1e-06, + "loss": 0.6018, + "num_input_tokens_seen": 492271240, + "step": 8786 + }, + { + "epoch": 19.56792873051225, + "loss": 0.5808820128440857, + "loss_ce": 7.148716395022348e-05, + "loss_iou": 0.244140625, + "loss_num": 0.0185546875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 492271240, + "step": 8786 + }, + { + "epoch": 19.570155902004455, + "grad_norm": 13.811071395874023, + "learning_rate": 1e-06, + "loss": 0.3109, + "num_input_tokens_seen": 492323760, + "step": 8787 + }, + { + "epoch": 19.570155902004455, + "loss": 0.32258814573287964, + "loss_ce": 7.836205622879788e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.006988525390625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 492323760, + "step": 8787 + }, + { + "epoch": 19.57238307349666, + "grad_norm": 17.646892547607422, + "learning_rate": 1e-06, + "loss": 0.3744, + "num_input_tokens_seen": 492377292, + "step": 8788 + }, + { + "epoch": 19.57238307349666, + "loss": 0.48769521713256836, + "loss_ce": 8.538780093658715e-05, + "loss_iou": 0.203125, + "loss_num": 0.0164794921875, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 492377292, + "step": 8788 + }, + { + "epoch": 19.574610244988865, + "grad_norm": 32.08982467651367, + "learning_rate": 1e-06, + "loss": 0.339, + "num_input_tokens_seen": 492432440, + "step": 8789 + }, + { + "epoch": 19.574610244988865, + "loss": 0.32432878017425537, + "loss_ce": 6.424468301702291e-05, + "loss_iou": 0.1484375, + "loss_num": 0.0054931640625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 492432440, + "step": 8789 + }, + { + "epoch": 19.57683741648107, + "grad_norm": 10.528325080871582, + "learning_rate": 1e-06, + "loss": 0.2973, + "num_input_tokens_seen": 492490824, + "step": 8790 + }, + { + "epoch": 19.57683741648107, + "loss": 0.3674495816230774, + "loss_ce": 7.898532930994406e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0130615234375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 492490824, + "step": 8790 + }, + { + "epoch": 19.579064587973274, + "grad_norm": 18.546995162963867, + "learning_rate": 1e-06, + "loss": 0.3658, + "num_input_tokens_seen": 492549284, + "step": 8791 + }, + { + "epoch": 19.579064587973274, + "loss": 0.2795357406139374, + "loss_ce": 5.5770447943359613e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.0091552734375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 492549284, + "step": 8791 + }, + { + "epoch": 19.58129175946548, + "grad_norm": 19.887773513793945, + "learning_rate": 1e-06, + "loss": 0.376, + "num_input_tokens_seen": 492605324, + "step": 8792 + }, + { + "epoch": 19.58129175946548, + "loss": 0.35456758737564087, + "loss_ce": 7.540988008258864e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0081787109375, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 492605324, + "step": 8792 + }, + { + "epoch": 19.583518930957684, + "grad_norm": 19.090816497802734, + "learning_rate": 1e-06, + "loss": 0.3357, + "num_input_tokens_seen": 492661580, + "step": 8793 + }, + { + "epoch": 19.583518930957684, + "loss": 0.36834782361984253, + "loss_ce": 6.167573155835271e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0125732421875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 492661580, + "step": 8793 + }, + { + "epoch": 19.58574610244989, + "grad_norm": 18.095928192138672, + "learning_rate": 1e-06, + "loss": 0.3908, + "num_input_tokens_seen": 492719140, + "step": 8794 + }, + { + "epoch": 19.58574610244989, + "loss": 0.2904747724533081, + "loss_ce": 6.948962982278317e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.007720947265625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 492719140, + "step": 8794 + }, + { + "epoch": 19.587973273942094, + "grad_norm": 15.511141777038574, + "learning_rate": 1e-06, + "loss": 0.4641, + "num_input_tokens_seen": 492775612, + "step": 8795 + }, + { + "epoch": 19.587973273942094, + "loss": 0.42014381289482117, + "loss_ce": 9.987192606786266e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.00775146484375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 492775612, + "step": 8795 + }, + { + "epoch": 19.5902004454343, + "grad_norm": 19.146825790405273, + "learning_rate": 1e-06, + "loss": 0.4842, + "num_input_tokens_seen": 492833836, + "step": 8796 + }, + { + "epoch": 19.5902004454343, + "loss": 0.5711435079574585, + "loss_ce": 9.86100931186229e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0177001953125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 492833836, + "step": 8796 + }, + { + "epoch": 19.592427616926503, + "grad_norm": 14.747178077697754, + "learning_rate": 1e-06, + "loss": 0.444, + "num_input_tokens_seen": 492887604, + "step": 8797 + }, + { + "epoch": 19.592427616926503, + "loss": 0.29975390434265137, + "loss_ce": 7.127322896849364e-05, + "loss_iou": 0.140625, + "loss_num": 0.00384521484375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 492887604, + "step": 8797 + }, + { + "epoch": 19.59465478841871, + "grad_norm": 18.54738426208496, + "learning_rate": 1e-06, + "loss": 0.4126, + "num_input_tokens_seen": 492944056, + "step": 8798 + }, + { + "epoch": 19.59465478841871, + "loss": 0.259499728679657, + "loss_ce": 6.978299643378705e-05, + "loss_iou": 0.1171875, + "loss_num": 0.00494384765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 492944056, + "step": 8798 + }, + { + "epoch": 19.596881959910913, + "grad_norm": 22.788713455200195, + "learning_rate": 1e-06, + "loss": 0.4753, + "num_input_tokens_seen": 493001668, + "step": 8799 + }, + { + "epoch": 19.596881959910913, + "loss": 0.5575045347213745, + "loss_ce": 7.041562639642507e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.0179443359375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 493001668, + "step": 8799 + }, + { + "epoch": 19.599109131403118, + "grad_norm": 16.851640701293945, + "learning_rate": 1e-06, + "loss": 0.2476, + "num_input_tokens_seen": 493060028, + "step": 8800 + }, + { + "epoch": 19.599109131403118, + "loss": 0.2806518077850342, + "loss_ce": 7.319905853364617e-05, + "loss_iou": 0.125, + "loss_num": 0.00628662109375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 493060028, + "step": 8800 + }, + { + "epoch": 19.601336302895323, + "grad_norm": 17.70904541015625, + "learning_rate": 1e-06, + "loss": 0.3181, + "num_input_tokens_seen": 493117108, + "step": 8801 + }, + { + "epoch": 19.601336302895323, + "loss": 0.2125520408153534, + "loss_ce": 5.813151801703498e-05, + "loss_iou": 0.095703125, + "loss_num": 0.004150390625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 493117108, + "step": 8801 + }, + { + "epoch": 19.603563474387528, + "grad_norm": 15.63475513458252, + "learning_rate": 1e-06, + "loss": 0.2397, + "num_input_tokens_seen": 493170576, + "step": 8802 + }, + { + "epoch": 19.603563474387528, + "loss": 0.2356223165988922, + "loss_ce": 5.7125496823573485e-05, + "loss_iou": 0.1044921875, + "loss_num": 0.005218505859375, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 493170576, + "step": 8802 + }, + { + "epoch": 19.605790645879733, + "grad_norm": 12.706445693969727, + "learning_rate": 1e-06, + "loss": 0.5809, + "num_input_tokens_seen": 493227004, + "step": 8803 + }, + { + "epoch": 19.605790645879733, + "loss": 0.6226192712783813, + "loss_ce": 6.07035071880091e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.0264892578125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 493227004, + "step": 8803 + }, + { + "epoch": 19.608017817371937, + "grad_norm": 12.90841293334961, + "learning_rate": 1e-06, + "loss": 0.5149, + "num_input_tokens_seen": 493284640, + "step": 8804 + }, + { + "epoch": 19.608017817371937, + "loss": 0.36311158537864685, + "loss_ce": 7.448208634741604e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.00738525390625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 493284640, + "step": 8804 + }, + { + "epoch": 19.610244988864142, + "grad_norm": 20.88025665283203, + "learning_rate": 1e-06, + "loss": 0.5289, + "num_input_tokens_seen": 493343304, + "step": 8805 + }, + { + "epoch": 19.610244988864142, + "loss": 0.4120563268661499, + "loss_ce": 6.90398010192439e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.0133056640625, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 493343304, + "step": 8805 + }, + { + "epoch": 19.612472160356347, + "grad_norm": 18.58097267150879, + "learning_rate": 1e-06, + "loss": 0.3529, + "num_input_tokens_seen": 493401548, + "step": 8806 + }, + { + "epoch": 19.612472160356347, + "loss": 0.3655468821525574, + "loss_ce": 6.83803009451367e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.01513671875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 493401548, + "step": 8806 + }, + { + "epoch": 19.614699331848552, + "grad_norm": 23.805185317993164, + "learning_rate": 1e-06, + "loss": 0.3137, + "num_input_tokens_seen": 493456364, + "step": 8807 + }, + { + "epoch": 19.614699331848552, + "loss": 0.414080947637558, + "loss_ce": 7.948270649649203e-05, + "loss_iou": 0.1796875, + "loss_num": 0.0108642578125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 493456364, + "step": 8807 + }, + { + "epoch": 19.616926503340757, + "grad_norm": 14.647207260131836, + "learning_rate": 1e-06, + "loss": 0.3448, + "num_input_tokens_seen": 493514088, + "step": 8808 + }, + { + "epoch": 19.616926503340757, + "loss": 0.3253961503505707, + "loss_ce": 7.876554445829242e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.005950927734375, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 493514088, + "step": 8808 + }, + { + "epoch": 19.619153674832962, + "grad_norm": 16.693212509155273, + "learning_rate": 1e-06, + "loss": 0.3789, + "num_input_tokens_seen": 493571508, + "step": 8809 + }, + { + "epoch": 19.619153674832962, + "loss": 0.20014190673828125, + "loss_ce": 6.866748299216852e-05, + "loss_iou": 0.08251953125, + "loss_num": 0.00701904296875, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 493571508, + "step": 8809 + }, + { + "epoch": 19.621380846325167, + "grad_norm": 24.3143367767334, + "learning_rate": 1e-06, + "loss": 0.3707, + "num_input_tokens_seen": 493625268, + "step": 8810 + }, + { + "epoch": 19.621380846325167, + "loss": 0.4873785972595215, + "loss_ce": 7.392482802970335e-05, + "loss_iou": 0.208984375, + "loss_num": 0.01373291015625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 493625268, + "step": 8810 + }, + { + "epoch": 19.62360801781737, + "grad_norm": 23.970718383789062, + "learning_rate": 1e-06, + "loss": 0.5419, + "num_input_tokens_seen": 493682744, + "step": 8811 + }, + { + "epoch": 19.62360801781737, + "loss": 0.5781893730163574, + "loss_ce": 6.439335993491113e-05, + "loss_iou": 0.24609375, + "loss_num": 0.0172119140625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 493682744, + "step": 8811 + }, + { + "epoch": 19.625835189309576, + "grad_norm": 16.08030128479004, + "learning_rate": 1e-06, + "loss": 0.3674, + "num_input_tokens_seen": 493738420, + "step": 8812 + }, + { + "epoch": 19.625835189309576, + "loss": 0.36957836151123047, + "loss_ce": 7.152724720072001e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.013916015625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 493738420, + "step": 8812 + }, + { + "epoch": 19.62806236080178, + "grad_norm": 16.537208557128906, + "learning_rate": 1e-06, + "loss": 0.4184, + "num_input_tokens_seen": 493792444, + "step": 8813 + }, + { + "epoch": 19.62806236080178, + "loss": 0.27265116572380066, + "loss_ce": 6.81565361446701e-05, + "loss_iou": 0.1142578125, + "loss_num": 0.0087890625, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 493792444, + "step": 8813 + }, + { + "epoch": 19.630289532293986, + "grad_norm": 16.97813606262207, + "learning_rate": 1e-06, + "loss": 0.3923, + "num_input_tokens_seen": 493851536, + "step": 8814 + }, + { + "epoch": 19.630289532293986, + "loss": 0.22918465733528137, + "loss_ce": 5.868993321200833e-05, + "loss_iou": 0.10205078125, + "loss_num": 0.004974365234375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 493851536, + "step": 8814 + }, + { + "epoch": 19.63251670378619, + "grad_norm": 18.847007751464844, + "learning_rate": 1e-06, + "loss": 0.4566, + "num_input_tokens_seen": 493909260, + "step": 8815 + }, + { + "epoch": 19.63251670378619, + "loss": 0.3699719309806824, + "loss_ce": 9.890169894788414e-05, + "loss_iou": 0.1640625, + "loss_num": 0.0084228515625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 493909260, + "step": 8815 + }, + { + "epoch": 19.634743875278396, + "grad_norm": 12.239165306091309, + "learning_rate": 1e-06, + "loss": 0.2959, + "num_input_tokens_seen": 493964284, + "step": 8816 + }, + { + "epoch": 19.634743875278396, + "loss": 0.36634206771850586, + "loss_ce": 7.009244291111827e-05, + "loss_iou": 0.15625, + "loss_num": 0.01080322265625, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 493964284, + "step": 8816 + }, + { + "epoch": 19.6369710467706, + "grad_norm": 20.558364868164062, + "learning_rate": 1e-06, + "loss": 0.4118, + "num_input_tokens_seen": 494020864, + "step": 8817 + }, + { + "epoch": 19.6369710467706, + "loss": 0.4873617887496948, + "loss_ce": 5.70821066503413e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0120849609375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 494020864, + "step": 8817 + }, + { + "epoch": 19.639198218262806, + "grad_norm": 20.281597137451172, + "learning_rate": 1e-06, + "loss": 0.3563, + "num_input_tokens_seen": 494076352, + "step": 8818 + }, + { + "epoch": 19.639198218262806, + "loss": 0.2713017463684082, + "loss_ce": 6.150568515295163e-05, + "loss_iou": 0.125, + "loss_num": 0.00408935546875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 494076352, + "step": 8818 + }, + { + "epoch": 19.64142538975501, + "grad_norm": 11.944673538208008, + "learning_rate": 1e-06, + "loss": 0.2912, + "num_input_tokens_seen": 494132992, + "step": 8819 + }, + { + "epoch": 19.64142538975501, + "loss": 0.2255510687828064, + "loss_ce": 8.72044765856117e-05, + "loss_iou": 0.09912109375, + "loss_num": 0.0054931640625, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 494132992, + "step": 8819 + }, + { + "epoch": 19.643652561247215, + "grad_norm": 17.89927864074707, + "learning_rate": 1e-06, + "loss": 0.4362, + "num_input_tokens_seen": 494187992, + "step": 8820 + }, + { + "epoch": 19.643652561247215, + "loss": 0.3235388398170471, + "loss_ce": 6.778095848858356e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.0086669921875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 494187992, + "step": 8820 + }, + { + "epoch": 19.64587973273942, + "grad_norm": 22.77928924560547, + "learning_rate": 1e-06, + "loss": 0.3405, + "num_input_tokens_seen": 494243788, + "step": 8821 + }, + { + "epoch": 19.64587973273942, + "loss": 0.24566447734832764, + "loss_ce": 5.902632256038487e-05, + "loss_iou": 0.10888671875, + "loss_num": 0.005645751953125, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 494243788, + "step": 8821 + }, + { + "epoch": 19.648106904231625, + "grad_norm": 17.578380584716797, + "learning_rate": 1e-06, + "loss": 0.4421, + "num_input_tokens_seen": 494300640, + "step": 8822 + }, + { + "epoch": 19.648106904231625, + "loss": 0.6269207000732422, + "loss_ce": 8.963636355474591e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.625, + "num_input_tokens_seen": 494300640, + "step": 8822 + }, + { + "epoch": 19.65033407572383, + "grad_norm": 18.126558303833008, + "learning_rate": 1e-06, + "loss": 0.3305, + "num_input_tokens_seen": 494357432, + "step": 8823 + }, + { + "epoch": 19.65033407572383, + "loss": 0.2599582076072693, + "loss_ce": 7.050028216326609e-05, + "loss_iou": 0.11474609375, + "loss_num": 0.006134033203125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 494357432, + "step": 8823 + }, + { + "epoch": 19.652561247216035, + "grad_norm": 14.803417205810547, + "learning_rate": 1e-06, + "loss": 0.6651, + "num_input_tokens_seen": 494412312, + "step": 8824 + }, + { + "epoch": 19.652561247216035, + "loss": 0.7808718681335449, + "loss_ce": 0.00011010345770046115, + "loss_iou": 0.3125, + "loss_num": 0.031494140625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 494412312, + "step": 8824 + }, + { + "epoch": 19.65478841870824, + "grad_norm": 28.97988510131836, + "learning_rate": 1e-06, + "loss": 0.5272, + "num_input_tokens_seen": 494465128, + "step": 8825 + }, + { + "epoch": 19.65478841870824, + "loss": 0.629551112651825, + "loss_ce": 0.0005227710935287178, + "loss_iou": 0.287109375, + "loss_num": 0.0113525390625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 494465128, + "step": 8825 + }, + { + "epoch": 19.657015590200444, + "grad_norm": 14.80961799621582, + "learning_rate": 1e-06, + "loss": 0.3384, + "num_input_tokens_seen": 494520212, + "step": 8826 + }, + { + "epoch": 19.657015590200444, + "loss": 0.37103530764579773, + "loss_ce": 6.362018757499754e-05, + "loss_iou": 0.1640625, + "loss_num": 0.0084228515625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 494520212, + "step": 8826 + }, + { + "epoch": 19.65924276169265, + "grad_norm": 11.278867721557617, + "learning_rate": 1e-06, + "loss": 0.4748, + "num_input_tokens_seen": 494576024, + "step": 8827 + }, + { + "epoch": 19.65924276169265, + "loss": 0.3697161376476288, + "loss_ce": 8.720482583157718e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0067138671875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 494576024, + "step": 8827 + }, + { + "epoch": 19.661469933184854, + "grad_norm": 21.07610321044922, + "learning_rate": 1e-06, + "loss": 0.4067, + "num_input_tokens_seen": 494631812, + "step": 8828 + }, + { + "epoch": 19.661469933184854, + "loss": 0.46012747287750244, + "loss_ce": 7.498678314732388e-05, + "loss_iou": 0.1796875, + "loss_num": 0.02001953125, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 494631812, + "step": 8828 + }, + { + "epoch": 19.66369710467706, + "grad_norm": 29.130268096923828, + "learning_rate": 1e-06, + "loss": 0.5162, + "num_input_tokens_seen": 494688008, + "step": 8829 + }, + { + "epoch": 19.66369710467706, + "loss": 0.34423893690109253, + "loss_ce": 6.168825348140672e-05, + "loss_iou": 0.146484375, + "loss_num": 0.0103759765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 494688008, + "step": 8829 + }, + { + "epoch": 19.665924276169264, + "grad_norm": 15.006963729858398, + "learning_rate": 1e-06, + "loss": 0.2145, + "num_input_tokens_seen": 494743180, + "step": 8830 + }, + { + "epoch": 19.665924276169264, + "loss": 0.2853913903236389, + "loss_ce": 0.0004792909894604236, + "loss_iou": 0.1220703125, + "loss_num": 0.008056640625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 494743180, + "step": 8830 + }, + { + "epoch": 19.66815144766147, + "grad_norm": 18.46776580810547, + "learning_rate": 1e-06, + "loss": 0.3155, + "num_input_tokens_seen": 494800836, + "step": 8831 + }, + { + "epoch": 19.66815144766147, + "loss": 0.3410201072692871, + "loss_ce": 7.773660036036745e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0072021484375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 494800836, + "step": 8831 + }, + { + "epoch": 19.670378619153674, + "grad_norm": 23.41823387145996, + "learning_rate": 1e-06, + "loss": 0.4599, + "num_input_tokens_seen": 494858464, + "step": 8832 + }, + { + "epoch": 19.670378619153674, + "loss": 0.5210896730422974, + "loss_ce": 9.359928662888706e-05, + "loss_iou": 0.224609375, + "loss_num": 0.01422119140625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 494858464, + "step": 8832 + }, + { + "epoch": 19.67260579064588, + "grad_norm": 17.398420333862305, + "learning_rate": 1e-06, + "loss": 0.3252, + "num_input_tokens_seen": 494912524, + "step": 8833 + }, + { + "epoch": 19.67260579064588, + "loss": 0.5025025606155396, + "loss_ce": 6.115515134297311e-05, + "loss_iou": 0.19921875, + "loss_num": 0.0208740234375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 494912524, + "step": 8833 + }, + { + "epoch": 19.674832962138083, + "grad_norm": 12.045059204101562, + "learning_rate": 1e-06, + "loss": 0.4081, + "num_input_tokens_seen": 494968308, + "step": 8834 + }, + { + "epoch": 19.674832962138083, + "loss": 0.38177552819252014, + "loss_ce": 6.166419188957661e-05, + "loss_iou": 0.173828125, + "loss_num": 0.0067138671875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 494968308, + "step": 8834 + }, + { + "epoch": 19.677060133630288, + "grad_norm": 24.122774124145508, + "learning_rate": 1e-06, + "loss": 0.4293, + "num_input_tokens_seen": 495022460, + "step": 8835 + }, + { + "epoch": 19.677060133630288, + "loss": 0.5443927049636841, + "loss_ce": 8.117854304146022e-05, + "loss_iou": 0.220703125, + "loss_num": 0.0206298828125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 495022460, + "step": 8835 + }, + { + "epoch": 19.679287305122493, + "grad_norm": 30.17648696899414, + "learning_rate": 1e-06, + "loss": 0.3063, + "num_input_tokens_seen": 495079416, + "step": 8836 + }, + { + "epoch": 19.679287305122493, + "loss": 0.363957941532135, + "loss_ce": 6.632816075580195e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.007720947265625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 495079416, + "step": 8836 + }, + { + "epoch": 19.681514476614698, + "grad_norm": 28.630796432495117, + "learning_rate": 1e-06, + "loss": 0.4824, + "num_input_tokens_seen": 495136332, + "step": 8837 + }, + { + "epoch": 19.681514476614698, + "loss": 0.3295344114303589, + "loss_ce": 6.66461419314146e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 495136332, + "step": 8837 + }, + { + "epoch": 19.683741648106903, + "grad_norm": 16.524477005004883, + "learning_rate": 1e-06, + "loss": 0.3898, + "num_input_tokens_seen": 495192408, + "step": 8838 + }, + { + "epoch": 19.683741648106903, + "loss": 0.47682133316993713, + "loss_ce": 7.57328380132094e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.017578125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 495192408, + "step": 8838 + }, + { + "epoch": 19.685968819599108, + "grad_norm": 15.386173248291016, + "learning_rate": 1e-06, + "loss": 0.487, + "num_input_tokens_seen": 495248052, + "step": 8839 + }, + { + "epoch": 19.685968819599108, + "loss": 0.6375049948692322, + "loss_ce": 0.0001758981088642031, + "loss_iou": 0.251953125, + "loss_num": 0.027099609375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 495248052, + "step": 8839 + }, + { + "epoch": 19.688195991091312, + "grad_norm": 13.274645805358887, + "learning_rate": 1e-06, + "loss": 0.4217, + "num_input_tokens_seen": 495304220, + "step": 8840 + }, + { + "epoch": 19.688195991091312, + "loss": 0.40014198422431946, + "loss_ce": 5.652975960401818e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.01177978515625, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 495304220, + "step": 8840 + }, + { + "epoch": 19.690423162583517, + "grad_norm": 19.099824905395508, + "learning_rate": 1e-06, + "loss": 0.4126, + "num_input_tokens_seen": 495360180, + "step": 8841 + }, + { + "epoch": 19.690423162583517, + "loss": 0.408029705286026, + "loss_ce": 7.075037865433842e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.007171630859375, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 495360180, + "step": 8841 + }, + { + "epoch": 19.692650334075722, + "grad_norm": 14.30420207977295, + "learning_rate": 1e-06, + "loss": 0.3627, + "num_input_tokens_seen": 495417216, + "step": 8842 + }, + { + "epoch": 19.692650334075722, + "loss": 0.5124090909957886, + "loss_ce": 7.99886038294062e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.01458740234375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 495417216, + "step": 8842 + }, + { + "epoch": 19.694877505567927, + "grad_norm": 16.062725067138672, + "learning_rate": 1e-06, + "loss": 0.4026, + "num_input_tokens_seen": 495473692, + "step": 8843 + }, + { + "epoch": 19.694877505567927, + "loss": 0.44783011078834534, + "loss_ce": 0.0001982695539481938, + "loss_iou": 0.1728515625, + "loss_num": 0.0205078125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 495473692, + "step": 8843 + }, + { + "epoch": 19.697104677060132, + "grad_norm": 13.419224739074707, + "learning_rate": 1e-06, + "loss": 0.3158, + "num_input_tokens_seen": 495529692, + "step": 8844 + }, + { + "epoch": 19.697104677060132, + "loss": 0.41748589277267456, + "loss_ce": 6.646426481893286e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.021484375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 495529692, + "step": 8844 + }, + { + "epoch": 19.69933184855234, + "grad_norm": 20.578535079956055, + "learning_rate": 1e-06, + "loss": 0.4155, + "num_input_tokens_seen": 495583708, + "step": 8845 + }, + { + "epoch": 19.69933184855234, + "loss": 0.42976486682891846, + "loss_ce": 7.74070795159787e-05, + "loss_iou": 0.19140625, + "loss_num": 0.00946044921875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 495583708, + "step": 8845 + }, + { + "epoch": 19.70155902004454, + "grad_norm": 16.558269500732422, + "learning_rate": 1e-06, + "loss": 0.4006, + "num_input_tokens_seen": 495636772, + "step": 8846 + }, + { + "epoch": 19.70155902004454, + "loss": 0.49481481313705444, + "loss_ce": 6.387023313436657e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0247802734375, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 495636772, + "step": 8846 + }, + { + "epoch": 19.70378619153675, + "grad_norm": 18.609947204589844, + "learning_rate": 1e-06, + "loss": 0.3799, + "num_input_tokens_seen": 495690168, + "step": 8847 + }, + { + "epoch": 19.70378619153675, + "loss": 0.3060356378555298, + "loss_ce": 6.63895916659385e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.006927490234375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 495690168, + "step": 8847 + }, + { + "epoch": 19.706013363028955, + "grad_norm": 19.02481460571289, + "learning_rate": 1e-06, + "loss": 0.4229, + "num_input_tokens_seen": 495748480, + "step": 8848 + }, + { + "epoch": 19.706013363028955, + "loss": 0.520011305809021, + "loss_ce": 0.00011385796096874401, + "loss_iou": 0.220703125, + "loss_num": 0.01556396484375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 495748480, + "step": 8848 + }, + { + "epoch": 19.70824053452116, + "grad_norm": 13.990358352661133, + "learning_rate": 1e-06, + "loss": 0.3866, + "num_input_tokens_seen": 495805712, + "step": 8849 + }, + { + "epoch": 19.70824053452116, + "loss": 0.49600470066070557, + "loss_ce": 9.406625758856535e-05, + "loss_iou": 0.203125, + "loss_num": 0.017822265625, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 495805712, + "step": 8849 + }, + { + "epoch": 19.710467706013365, + "grad_norm": 15.833937644958496, + "learning_rate": 1e-06, + "loss": 0.4138, + "num_input_tokens_seen": 495862696, + "step": 8850 + }, + { + "epoch": 19.710467706013365, + "loss": 0.44215184450149536, + "loss_ce": 7.422211638186127e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.00921630859375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 495862696, + "step": 8850 + }, + { + "epoch": 19.71269487750557, + "grad_norm": 24.503477096557617, + "learning_rate": 1e-06, + "loss": 0.412, + "num_input_tokens_seen": 495913924, + "step": 8851 + }, + { + "epoch": 19.71269487750557, + "loss": 0.329414427280426, + "loss_ce": 6.872846279293299e-05, + "loss_iou": 0.146484375, + "loss_num": 0.007354736328125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 495913924, + "step": 8851 + }, + { + "epoch": 19.714922048997774, + "grad_norm": 20.467147827148438, + "learning_rate": 1e-06, + "loss": 0.4762, + "num_input_tokens_seen": 495969596, + "step": 8852 + }, + { + "epoch": 19.714922048997774, + "loss": 0.4412245452404022, + "loss_ce": 6.245089753065258e-05, + "loss_iou": 0.193359375, + "loss_num": 0.01068115234375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 495969596, + "step": 8852 + }, + { + "epoch": 19.71714922048998, + "grad_norm": 16.596433639526367, + "learning_rate": 1e-06, + "loss": 0.3338, + "num_input_tokens_seen": 496022692, + "step": 8853 + }, + { + "epoch": 19.71714922048998, + "loss": 0.3814437985420227, + "loss_ce": 6.563542410731316e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.007781982421875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 496022692, + "step": 8853 + }, + { + "epoch": 19.719376391982184, + "grad_norm": 14.351813316345215, + "learning_rate": 1e-06, + "loss": 0.2733, + "num_input_tokens_seen": 496077624, + "step": 8854 + }, + { + "epoch": 19.719376391982184, + "loss": 0.30036044120788574, + "loss_ce": 6.749367457814515e-05, + "loss_iou": 0.130859375, + "loss_num": 0.00775146484375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 496077624, + "step": 8854 + }, + { + "epoch": 19.72160356347439, + "grad_norm": 21.87047004699707, + "learning_rate": 1e-06, + "loss": 0.5035, + "num_input_tokens_seen": 496133564, + "step": 8855 + }, + { + "epoch": 19.72160356347439, + "loss": 0.36122041940689087, + "loss_ce": 7.540646765846759e-05, + "loss_iou": 0.16015625, + "loss_num": 0.0081787109375, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 496133564, + "step": 8855 + }, + { + "epoch": 19.723830734966594, + "grad_norm": 18.896873474121094, + "learning_rate": 1e-06, + "loss": 0.4041, + "num_input_tokens_seen": 496190924, + "step": 8856 + }, + { + "epoch": 19.723830734966594, + "loss": 0.38140565156936646, + "loss_ce": 5.800734652439132e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0135498046875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 496190924, + "step": 8856 + }, + { + "epoch": 19.7260579064588, + "grad_norm": 20.9360408782959, + "learning_rate": 1e-06, + "loss": 0.3515, + "num_input_tokens_seen": 496247316, + "step": 8857 + }, + { + "epoch": 19.7260579064588, + "loss": 0.49714395403862, + "loss_ce": 7.367074431385845e-05, + "loss_iou": 0.224609375, + "loss_num": 0.009521484375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 496247316, + "step": 8857 + }, + { + "epoch": 19.728285077951004, + "grad_norm": 22.762691497802734, + "learning_rate": 1e-06, + "loss": 0.3882, + "num_input_tokens_seen": 496305148, + "step": 8858 + }, + { + "epoch": 19.728285077951004, + "loss": 0.37469351291656494, + "loss_ce": 5.972578583168797e-05, + "loss_iou": 0.154296875, + "loss_num": 0.013427734375, + "loss_xval": 0.375, + "num_input_tokens_seen": 496305148, + "step": 8858 + }, + { + "epoch": 19.73051224944321, + "grad_norm": 17.930313110351562, + "learning_rate": 1e-06, + "loss": 0.4452, + "num_input_tokens_seen": 496360372, + "step": 8859 + }, + { + "epoch": 19.73051224944321, + "loss": 0.4869701862335205, + "loss_ce": 6.224210665095598e-05, + "loss_iou": 0.228515625, + "loss_num": 0.005859375, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 496360372, + "step": 8859 + }, + { + "epoch": 19.732739420935413, + "grad_norm": 20.98902702331543, + "learning_rate": 1e-06, + "loss": 0.3769, + "num_input_tokens_seen": 496418148, + "step": 8860 + }, + { + "epoch": 19.732739420935413, + "loss": 0.367613285779953, + "loss_ce": 5.957091343589127e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00927734375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 496418148, + "step": 8860 + }, + { + "epoch": 19.734966592427618, + "grad_norm": 27.783918380737305, + "learning_rate": 1e-06, + "loss": 0.3618, + "num_input_tokens_seen": 496475176, + "step": 8861 + }, + { + "epoch": 19.734966592427618, + "loss": 0.31055858731269836, + "loss_ce": 7.274634845089167e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.012451171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 496475176, + "step": 8861 + }, + { + "epoch": 19.737193763919823, + "grad_norm": 59.55124282836914, + "learning_rate": 1e-06, + "loss": 0.3794, + "num_input_tokens_seen": 496530672, + "step": 8862 + }, + { + "epoch": 19.737193763919823, + "loss": 0.5371950268745422, + "loss_ce": 8.565981988795102e-05, + "loss_iou": 0.251953125, + "loss_num": 0.00640869140625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 496530672, + "step": 8862 + }, + { + "epoch": 19.739420935412028, + "grad_norm": 20.648197174072266, + "learning_rate": 1e-06, + "loss": 0.4529, + "num_input_tokens_seen": 496587424, + "step": 8863 + }, + { + "epoch": 19.739420935412028, + "loss": 0.3034108877182007, + "loss_ce": 6.618791667278856e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.00653076171875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 496587424, + "step": 8863 + }, + { + "epoch": 19.741648106904233, + "grad_norm": 20.388151168823242, + "learning_rate": 1e-06, + "loss": 0.5532, + "num_input_tokens_seen": 496645436, + "step": 8864 + }, + { + "epoch": 19.741648106904233, + "loss": 0.7695181369781494, + "loss_ce": 0.00010891164129134268, + "loss_iou": 0.2890625, + "loss_num": 0.037841796875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 496645436, + "step": 8864 + }, + { + "epoch": 19.743875278396438, + "grad_norm": 13.303028106689453, + "learning_rate": 1e-06, + "loss": 0.3442, + "num_input_tokens_seen": 496701972, + "step": 8865 + }, + { + "epoch": 19.743875278396438, + "loss": 0.2887694835662842, + "loss_ce": 7.317646668525413e-05, + "loss_iou": 0.12890625, + "loss_num": 0.006439208984375, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 496701972, + "step": 8865 + }, + { + "epoch": 19.746102449888642, + "grad_norm": 63.60599899291992, + "learning_rate": 1e-06, + "loss": 0.3638, + "num_input_tokens_seen": 496755828, + "step": 8866 + }, + { + "epoch": 19.746102449888642, + "loss": 0.4621775150299072, + "loss_ce": 8.034885104279965e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 496755828, + "step": 8866 + }, + { + "epoch": 19.748329621380847, + "grad_norm": 22.44493293762207, + "learning_rate": 1e-06, + "loss": 0.3002, + "num_input_tokens_seen": 496812708, + "step": 8867 + }, + { + "epoch": 19.748329621380847, + "loss": 0.3059879541397095, + "loss_ce": 7.977118366397917e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.00921630859375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 496812708, + "step": 8867 + }, + { + "epoch": 19.750556792873052, + "grad_norm": 21.83323860168457, + "learning_rate": 1e-06, + "loss": 0.385, + "num_input_tokens_seen": 496868560, + "step": 8868 + }, + { + "epoch": 19.750556792873052, + "loss": 0.30378103256225586, + "loss_ce": 7.010510307736695e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.004364013671875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 496868560, + "step": 8868 + }, + { + "epoch": 19.752783964365257, + "grad_norm": 32.918434143066406, + "learning_rate": 1e-06, + "loss": 0.4902, + "num_input_tokens_seen": 496922692, + "step": 8869 + }, + { + "epoch": 19.752783964365257, + "loss": 0.5124045014381409, + "loss_ce": 7.53938511479646e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.0091552734375, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 496922692, + "step": 8869 + }, + { + "epoch": 19.755011135857462, + "grad_norm": 25.483102798461914, + "learning_rate": 1e-06, + "loss": 0.3786, + "num_input_tokens_seen": 496980516, + "step": 8870 + }, + { + "epoch": 19.755011135857462, + "loss": 0.3339354991912842, + "loss_ce": 7.317691051866859e-05, + "loss_iou": 0.154296875, + "loss_num": 0.004913330078125, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 496980516, + "step": 8870 + }, + { + "epoch": 19.757238307349667, + "grad_norm": 15.504528045654297, + "learning_rate": 1e-06, + "loss": 0.4814, + "num_input_tokens_seen": 497037204, + "step": 8871 + }, + { + "epoch": 19.757238307349667, + "loss": 0.6138589382171631, + "loss_ce": 7.417285814881325e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0145263671875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 497037204, + "step": 8871 + }, + { + "epoch": 19.75946547884187, + "grad_norm": 25.176443099975586, + "learning_rate": 1e-06, + "loss": 0.3316, + "num_input_tokens_seen": 497089592, + "step": 8872 + }, + { + "epoch": 19.75946547884187, + "loss": 0.2530931532382965, + "loss_ce": 7.191546319518238e-05, + "loss_iou": 0.1123046875, + "loss_num": 0.005584716796875, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 497089592, + "step": 8872 + }, + { + "epoch": 19.761692650334076, + "grad_norm": 15.639261245727539, + "learning_rate": 1e-06, + "loss": 0.3225, + "num_input_tokens_seen": 497143680, + "step": 8873 + }, + { + "epoch": 19.761692650334076, + "loss": 0.2651139497756958, + "loss_ce": 9.929385123541579e-05, + "loss_iou": 0.11767578125, + "loss_num": 0.005950927734375, + "loss_xval": 0.265625, + "num_input_tokens_seen": 497143680, + "step": 8873 + }, + { + "epoch": 19.76391982182628, + "grad_norm": 23.403366088867188, + "learning_rate": 1e-06, + "loss": 0.3117, + "num_input_tokens_seen": 497198980, + "step": 8874 + }, + { + "epoch": 19.76391982182628, + "loss": 0.3706684112548828, + "loss_ce": 6.295710045378655e-05, + "loss_iou": 0.171875, + "loss_num": 0.005462646484375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 497198980, + "step": 8874 + }, + { + "epoch": 19.766146993318486, + "grad_norm": 14.233652114868164, + "learning_rate": 1e-06, + "loss": 0.3257, + "num_input_tokens_seen": 497257808, + "step": 8875 + }, + { + "epoch": 19.766146993318486, + "loss": 0.41550278663635254, + "loss_ce": 0.0003416564140934497, + "loss_iou": 0.1669921875, + "loss_num": 0.0164794921875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 497257808, + "step": 8875 + }, + { + "epoch": 19.76837416481069, + "grad_norm": 13.950034141540527, + "learning_rate": 1e-06, + "loss": 0.3255, + "num_input_tokens_seen": 497314976, + "step": 8876 + }, + { + "epoch": 19.76837416481069, + "loss": 0.2907865047454834, + "loss_ce": 7.603740959893912e-05, + "loss_iou": 0.12890625, + "loss_num": 0.006561279296875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 497314976, + "step": 8876 + }, + { + "epoch": 19.770601336302896, + "grad_norm": 12.77400016784668, + "learning_rate": 1e-06, + "loss": 0.3122, + "num_input_tokens_seen": 497371316, + "step": 8877 + }, + { + "epoch": 19.770601336302896, + "loss": 0.302242249250412, + "loss_ce": 5.7184963225154206e-05, + "loss_iou": 0.1328125, + "loss_num": 0.00732421875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 497371316, + "step": 8877 + }, + { + "epoch": 19.7728285077951, + "grad_norm": 16.136669158935547, + "learning_rate": 1e-06, + "loss": 0.4607, + "num_input_tokens_seen": 497428012, + "step": 8878 + }, + { + "epoch": 19.7728285077951, + "loss": 0.7113887667655945, + "loss_ce": 8.501914271619171e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 497428012, + "step": 8878 + }, + { + "epoch": 19.775055679287306, + "grad_norm": 26.28825569152832, + "learning_rate": 1e-06, + "loss": 0.2925, + "num_input_tokens_seen": 497485468, + "step": 8879 + }, + { + "epoch": 19.775055679287306, + "loss": 0.346199095249176, + "loss_ce": 6.87175925122574e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.00469970703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 497485468, + "step": 8879 + }, + { + "epoch": 19.77728285077951, + "grad_norm": 17.712745666503906, + "learning_rate": 1e-06, + "loss": 0.3412, + "num_input_tokens_seen": 497543128, + "step": 8880 + }, + { + "epoch": 19.77728285077951, + "loss": 0.32294753193855286, + "loss_ce": 7.154815102694556e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.00537109375, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 497543128, + "step": 8880 + }, + { + "epoch": 19.779510022271715, + "grad_norm": 16.877283096313477, + "learning_rate": 1e-06, + "loss": 0.3093, + "num_input_tokens_seen": 497601184, + "step": 8881 + }, + { + "epoch": 19.779510022271715, + "loss": 0.26191645860671997, + "loss_ce": 7.561576785519719e-05, + "loss_iou": 0.10791015625, + "loss_num": 0.0091552734375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 497601184, + "step": 8881 + }, + { + "epoch": 19.78173719376392, + "grad_norm": 15.67979621887207, + "learning_rate": 1e-06, + "loss": 0.3683, + "num_input_tokens_seen": 497658448, + "step": 8882 + }, + { + "epoch": 19.78173719376392, + "loss": 0.4387954771518707, + "loss_ce": 7.479546184185892e-05, + "loss_iou": 0.171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 497658448, + "step": 8882 + }, + { + "epoch": 19.783964365256125, + "grad_norm": 15.870912551879883, + "learning_rate": 1e-06, + "loss": 0.4725, + "num_input_tokens_seen": 497714064, + "step": 8883 + }, + { + "epoch": 19.783964365256125, + "loss": 0.47480326890945435, + "loss_ce": 7.185728463809937e-05, + "loss_iou": 0.212890625, + "loss_num": 0.0096435546875, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 497714064, + "step": 8883 + }, + { + "epoch": 19.78619153674833, + "grad_norm": 22.020280838012695, + "learning_rate": 1e-06, + "loss": 0.5039, + "num_input_tokens_seen": 497768280, + "step": 8884 + }, + { + "epoch": 19.78619153674833, + "loss": 0.38886767625808716, + "loss_ce": 7.375521818175912e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.0032501220703125, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 497768280, + "step": 8884 + }, + { + "epoch": 19.788418708240535, + "grad_norm": 28.759016036987305, + "learning_rate": 1e-06, + "loss": 0.4566, + "num_input_tokens_seen": 497822296, + "step": 8885 + }, + { + "epoch": 19.788418708240535, + "loss": 0.5814878940582275, + "loss_ce": 6.704413681291044e-05, + "loss_iou": 0.236328125, + "loss_num": 0.021728515625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 497822296, + "step": 8885 + }, + { + "epoch": 19.79064587973274, + "grad_norm": 19.10303497314453, + "learning_rate": 1e-06, + "loss": 0.6282, + "num_input_tokens_seen": 497880436, + "step": 8886 + }, + { + "epoch": 19.79064587973274, + "loss": 0.586006760597229, + "loss_ce": 6.921241583768278e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.0263671875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 497880436, + "step": 8886 + }, + { + "epoch": 19.792873051224944, + "grad_norm": 24.136104583740234, + "learning_rate": 1e-06, + "loss": 0.3318, + "num_input_tokens_seen": 497934040, + "step": 8887 + }, + { + "epoch": 19.792873051224944, + "loss": 0.3225743770599365, + "loss_ce": 6.460573058575392e-05, + "loss_iou": 0.140625, + "loss_num": 0.00830078125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 497934040, + "step": 8887 + }, + { + "epoch": 19.79510022271715, + "grad_norm": 15.562583923339844, + "learning_rate": 1e-06, + "loss": 0.4173, + "num_input_tokens_seen": 497989992, + "step": 8888 + }, + { + "epoch": 19.79510022271715, + "loss": 0.4295352101325989, + "loss_ce": 6.133929127827287e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0185546875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 497989992, + "step": 8888 + }, + { + "epoch": 19.797327394209354, + "grad_norm": 12.647050857543945, + "learning_rate": 1e-06, + "loss": 0.3252, + "num_input_tokens_seen": 498045484, + "step": 8889 + }, + { + "epoch": 19.797327394209354, + "loss": 0.26227039098739624, + "loss_ce": 6.334840873023495e-05, + "loss_iou": 0.107421875, + "loss_num": 0.009521484375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 498045484, + "step": 8889 + }, + { + "epoch": 19.79955456570156, + "grad_norm": 17.201618194580078, + "learning_rate": 1e-06, + "loss": 0.3303, + "num_input_tokens_seen": 498102048, + "step": 8890 + }, + { + "epoch": 19.79955456570156, + "loss": 0.31794556975364685, + "loss_ce": 7.44800636311993e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.01220703125, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 498102048, + "step": 8890 + }, + { + "epoch": 19.801781737193764, + "grad_norm": 22.599042892456055, + "learning_rate": 1e-06, + "loss": 0.3699, + "num_input_tokens_seen": 498159040, + "step": 8891 + }, + { + "epoch": 19.801781737193764, + "loss": 0.2396983951330185, + "loss_ce": 7.437313615810126e-05, + "loss_iou": 0.107421875, + "loss_num": 0.00494384765625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 498159040, + "step": 8891 + }, + { + "epoch": 19.80400890868597, + "grad_norm": 26.07795524597168, + "learning_rate": 1e-06, + "loss": 0.4677, + "num_input_tokens_seen": 498212312, + "step": 8892 + }, + { + "epoch": 19.80400890868597, + "loss": 0.5568862557411194, + "loss_ce": 0.00012353430793154985, + "loss_iou": 0.2333984375, + "loss_num": 0.017822265625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 498212312, + "step": 8892 + }, + { + "epoch": 19.806236080178174, + "grad_norm": 22.389713287353516, + "learning_rate": 1e-06, + "loss": 0.3661, + "num_input_tokens_seen": 498267900, + "step": 8893 + }, + { + "epoch": 19.806236080178174, + "loss": 0.4324456751346588, + "loss_ce": 7.261322753038257e-05, + "loss_iou": 0.201171875, + "loss_num": 0.005950927734375, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 498267900, + "step": 8893 + }, + { + "epoch": 19.80846325167038, + "grad_norm": 16.400392532348633, + "learning_rate": 1e-06, + "loss": 0.2981, + "num_input_tokens_seen": 498324320, + "step": 8894 + }, + { + "epoch": 19.80846325167038, + "loss": 0.27405545115470886, + "loss_ce": 6.862355803605169e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.00360107421875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 498324320, + "step": 8894 + }, + { + "epoch": 19.810690423162583, + "grad_norm": 21.506378173828125, + "learning_rate": 1e-06, + "loss": 0.3227, + "num_input_tokens_seen": 498378944, + "step": 8895 + }, + { + "epoch": 19.810690423162583, + "loss": 0.3589167296886444, + "loss_ce": 6.0540056438185275e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.01043701171875, + "loss_xval": 0.359375, + "num_input_tokens_seen": 498378944, + "step": 8895 + }, + { + "epoch": 19.812917594654788, + "grad_norm": 23.304874420166016, + "learning_rate": 1e-06, + "loss": 0.2775, + "num_input_tokens_seen": 498432504, + "step": 8896 + }, + { + "epoch": 19.812917594654788, + "loss": 0.27460119128227234, + "loss_ce": 6.50603833491914e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.005218505859375, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 498432504, + "step": 8896 + }, + { + "epoch": 19.815144766146993, + "grad_norm": 15.28256893157959, + "learning_rate": 1e-06, + "loss": 0.334, + "num_input_tokens_seen": 498489036, + "step": 8897 + }, + { + "epoch": 19.815144766146993, + "loss": 0.3256245255470276, + "loss_ce": 6.301122630247846e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00518798828125, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 498489036, + "step": 8897 + }, + { + "epoch": 19.817371937639198, + "grad_norm": 37.541343688964844, + "learning_rate": 1e-06, + "loss": 0.556, + "num_input_tokens_seen": 498544072, + "step": 8898 + }, + { + "epoch": 19.817371937639198, + "loss": 0.7319257259368896, + "loss_ce": 0.0001142127366620116, + "loss_iou": 0.26953125, + "loss_num": 0.038818359375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 498544072, + "step": 8898 + }, + { + "epoch": 19.819599109131403, + "grad_norm": 35.13840866088867, + "learning_rate": 1e-06, + "loss": 0.4129, + "num_input_tokens_seen": 498600592, + "step": 8899 + }, + { + "epoch": 19.819599109131403, + "loss": 0.2181350737810135, + "loss_ce": 5.645793498842977e-05, + "loss_iou": 0.08837890625, + "loss_num": 0.00823974609375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 498600592, + "step": 8899 + }, + { + "epoch": 19.821826280623608, + "grad_norm": 18.837566375732422, + "learning_rate": 1e-06, + "loss": 0.3815, + "num_input_tokens_seen": 498655692, + "step": 8900 + }, + { + "epoch": 19.821826280623608, + "loss": 0.21076907217502594, + "loss_ce": 6.046070120646618e-05, + "loss_iou": 0.09521484375, + "loss_num": 0.004058837890625, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 498655692, + "step": 8900 + }, + { + "epoch": 19.824053452115812, + "grad_norm": 14.442410469055176, + "learning_rate": 1e-06, + "loss": 0.4286, + "num_input_tokens_seen": 498710648, + "step": 8901 + }, + { + "epoch": 19.824053452115812, + "loss": 0.48480677604675293, + "loss_ce": 6.559376924997196e-05, + "loss_iou": 0.205078125, + "loss_num": 0.0150146484375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 498710648, + "step": 8901 + }, + { + "epoch": 19.826280623608017, + "grad_norm": 15.92866039276123, + "learning_rate": 1e-06, + "loss": 0.3134, + "num_input_tokens_seen": 498766436, + "step": 8902 + }, + { + "epoch": 19.826280623608017, + "loss": 0.35358044505119324, + "loss_ce": 6.481447053374723e-05, + "loss_iou": 0.1328125, + "loss_num": 0.017578125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 498766436, + "step": 8902 + }, + { + "epoch": 19.828507795100222, + "grad_norm": 18.898021697998047, + "learning_rate": 1e-06, + "loss": 0.5045, + "num_input_tokens_seen": 498821800, + "step": 8903 + }, + { + "epoch": 19.828507795100222, + "loss": 0.49323803186416626, + "loss_ce": 7.400984031846747e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.01385498046875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 498821800, + "step": 8903 + }, + { + "epoch": 19.830734966592427, + "grad_norm": 11.007697105407715, + "learning_rate": 1e-06, + "loss": 0.2899, + "num_input_tokens_seen": 498876356, + "step": 8904 + }, + { + "epoch": 19.830734966592427, + "loss": 0.2739826440811157, + "loss_ce": 0.00017892879259306937, + "loss_iou": 0.11669921875, + "loss_num": 0.0081787109375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 498876356, + "step": 8904 + }, + { + "epoch": 19.832962138084632, + "grad_norm": 11.861038208007812, + "learning_rate": 1e-06, + "loss": 0.388, + "num_input_tokens_seen": 498934096, + "step": 8905 + }, + { + "epoch": 19.832962138084632, + "loss": 0.34637463092803955, + "loss_ce": 6.114893039921299e-05, + "loss_iou": 0.154296875, + "loss_num": 0.00738525390625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 498934096, + "step": 8905 + }, + { + "epoch": 19.835189309576837, + "grad_norm": 18.919658660888672, + "learning_rate": 1e-06, + "loss": 0.3537, + "num_input_tokens_seen": 498989812, + "step": 8906 + }, + { + "epoch": 19.835189309576837, + "loss": 0.30976182222366333, + "loss_ce": 6.946048233658075e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.004669189453125, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 498989812, + "step": 8906 + }, + { + "epoch": 19.83741648106904, + "grad_norm": 15.281343460083008, + "learning_rate": 1e-06, + "loss": 0.3773, + "num_input_tokens_seen": 499047512, + "step": 8907 + }, + { + "epoch": 19.83741648106904, + "loss": 0.3993797302246094, + "loss_ce": 8.777561015449464e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.01385498046875, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 499047512, + "step": 8907 + }, + { + "epoch": 19.839643652561247, + "grad_norm": 14.046529769897461, + "learning_rate": 1e-06, + "loss": 0.431, + "num_input_tokens_seen": 499105152, + "step": 8908 + }, + { + "epoch": 19.839643652561247, + "loss": 0.6798744201660156, + "loss_ce": 6.483346078312024e-05, + "loss_iou": 0.279296875, + "loss_num": 0.024169921875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 499105152, + "step": 8908 + }, + { + "epoch": 19.84187082405345, + "grad_norm": 19.07868766784668, + "learning_rate": 1e-06, + "loss": 0.362, + "num_input_tokens_seen": 499160952, + "step": 8909 + }, + { + "epoch": 19.84187082405345, + "loss": 0.5317122936248779, + "loss_ce": 9.61277837632224e-05, + "loss_iou": 0.205078125, + "loss_num": 0.024169921875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 499160952, + "step": 8909 + }, + { + "epoch": 19.844097995545656, + "grad_norm": 17.901517868041992, + "learning_rate": 1e-06, + "loss": 0.4618, + "num_input_tokens_seen": 499217672, + "step": 8910 + }, + { + "epoch": 19.844097995545656, + "loss": 0.3117324709892273, + "loss_ce": 8.69435680215247e-05, + "loss_iou": 0.12158203125, + "loss_num": 0.0135498046875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 499217672, + "step": 8910 + }, + { + "epoch": 19.84632516703786, + "grad_norm": 28.3020076751709, + "learning_rate": 1e-06, + "loss": 0.433, + "num_input_tokens_seen": 499274752, + "step": 8911 + }, + { + "epoch": 19.84632516703786, + "loss": 0.3874030113220215, + "loss_ce": 7.39007446100004e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0059814453125, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 499274752, + "step": 8911 + }, + { + "epoch": 19.848552338530066, + "grad_norm": 29.61766242980957, + "learning_rate": 1e-06, + "loss": 0.3846, + "num_input_tokens_seen": 499327256, + "step": 8912 + }, + { + "epoch": 19.848552338530066, + "loss": 0.24732181429862976, + "loss_ce": 6.841085996711627e-05, + "loss_iou": 0.111328125, + "loss_num": 0.004974365234375, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 499327256, + "step": 8912 + }, + { + "epoch": 19.85077951002227, + "grad_norm": 25.11189079284668, + "learning_rate": 1e-06, + "loss": 0.3364, + "num_input_tokens_seen": 499379248, + "step": 8913 + }, + { + "epoch": 19.85077951002227, + "loss": 0.40423035621643066, + "loss_ce": 5.5550888646394014e-05, + "loss_iou": 0.1640625, + "loss_num": 0.01513671875, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 499379248, + "step": 8913 + }, + { + "epoch": 19.853006681514476, + "grad_norm": 18.35516929626465, + "learning_rate": 1e-06, + "loss": 0.4902, + "num_input_tokens_seen": 499436244, + "step": 8914 + }, + { + "epoch": 19.853006681514476, + "loss": 0.41144680976867676, + "loss_ce": 6.984162610024214e-05, + "loss_iou": 0.18359375, + "loss_num": 0.00885009765625, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 499436244, + "step": 8914 + }, + { + "epoch": 19.85523385300668, + "grad_norm": 14.630391120910645, + "learning_rate": 1e-06, + "loss": 0.3681, + "num_input_tokens_seen": 499493472, + "step": 8915 + }, + { + "epoch": 19.85523385300668, + "loss": 0.3745167851448059, + "loss_ce": 6.610387936234474e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0078125, + "loss_xval": 0.375, + "num_input_tokens_seen": 499493472, + "step": 8915 + }, + { + "epoch": 19.857461024498885, + "grad_norm": 15.143143653869629, + "learning_rate": 1e-06, + "loss": 0.4017, + "num_input_tokens_seen": 499548600, + "step": 8916 + }, + { + "epoch": 19.857461024498885, + "loss": 0.5689959526062012, + "loss_ce": 5.67264505662024e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.026123046875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 499548600, + "step": 8916 + }, + { + "epoch": 19.85968819599109, + "grad_norm": 25.465965270996094, + "learning_rate": 1e-06, + "loss": 0.3066, + "num_input_tokens_seen": 499601500, + "step": 8917 + }, + { + "epoch": 19.85968819599109, + "loss": 0.31936201453208923, + "loss_ce": 8.711648843018338e-05, + "loss_iou": 0.1328125, + "loss_num": 0.01055908203125, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 499601500, + "step": 8917 + }, + { + "epoch": 19.861915367483295, + "grad_norm": 21.30182647705078, + "learning_rate": 1e-06, + "loss": 0.4865, + "num_input_tokens_seen": 499657012, + "step": 8918 + }, + { + "epoch": 19.861915367483295, + "loss": 0.38416028022766113, + "loss_ce": 6.603718065889552e-05, + "loss_iou": 0.154296875, + "loss_num": 0.0152587890625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 499657012, + "step": 8918 + }, + { + "epoch": 19.8641425389755, + "grad_norm": 19.05086898803711, + "learning_rate": 1e-06, + "loss": 0.5037, + "num_input_tokens_seen": 499713108, + "step": 8919 + }, + { + "epoch": 19.8641425389755, + "loss": 0.5627080202102661, + "loss_ce": 8.592565427534282e-05, + "loss_iou": 0.244140625, + "loss_num": 0.01513671875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 499713108, + "step": 8919 + }, + { + "epoch": 19.866369710467705, + "grad_norm": 21.24616813659668, + "learning_rate": 1e-06, + "loss": 0.3316, + "num_input_tokens_seen": 499771624, + "step": 8920 + }, + { + "epoch": 19.866369710467705, + "loss": 0.3205050528049469, + "loss_ce": 7.047764665912837e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.0089111328125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 499771624, + "step": 8920 + }, + { + "epoch": 19.86859688195991, + "grad_norm": 18.13334846496582, + "learning_rate": 1e-06, + "loss": 0.3272, + "num_input_tokens_seen": 499825620, + "step": 8921 + }, + { + "epoch": 19.86859688195991, + "loss": 0.3275926113128662, + "loss_ce": 7.797064608894289e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.0087890625, + "loss_xval": 0.328125, + "num_input_tokens_seen": 499825620, + "step": 8921 + }, + { + "epoch": 19.870824053452115, + "grad_norm": 13.680047988891602, + "learning_rate": 1e-06, + "loss": 0.7128, + "num_input_tokens_seen": 499881464, + "step": 8922 + }, + { + "epoch": 19.870824053452115, + "loss": 0.5840435028076172, + "loss_ce": 5.915117799304426e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.0194091796875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 499881464, + "step": 8922 + }, + { + "epoch": 19.87305122494432, + "grad_norm": 17.974990844726562, + "learning_rate": 1e-06, + "loss": 0.3335, + "num_input_tokens_seen": 499935500, + "step": 8923 + }, + { + "epoch": 19.87305122494432, + "loss": 0.39403825998306274, + "loss_ce": 5.633125692838803e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.017333984375, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 499935500, + "step": 8923 + }, + { + "epoch": 19.875278396436524, + "grad_norm": 16.1640625, + "learning_rate": 1e-06, + "loss": 0.5226, + "num_input_tokens_seen": 499991676, + "step": 8924 + }, + { + "epoch": 19.875278396436524, + "loss": 0.3693428039550781, + "loss_ce": 0.00020217550627421588, + "loss_iou": 0.1669921875, + "loss_num": 0.0069580078125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 499991676, + "step": 8924 + }, + { + "epoch": 19.87750556792873, + "grad_norm": 17.915504455566406, + "learning_rate": 1e-06, + "loss": 0.4869, + "num_input_tokens_seen": 500049156, + "step": 8925 + }, + { + "epoch": 19.87750556792873, + "loss": 0.44209641218185425, + "loss_ce": 7.978252688189968e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.00811767578125, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 500049156, + "step": 8925 + }, + { + "epoch": 19.879732739420934, + "grad_norm": 14.218931198120117, + "learning_rate": 1e-06, + "loss": 0.3029, + "num_input_tokens_seen": 500105296, + "step": 8926 + }, + { + "epoch": 19.879732739420934, + "loss": 0.33782947063446045, + "loss_ce": 6.0907594161108136e-05, + "loss_iou": 0.154296875, + "loss_num": 0.005950927734375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 500105296, + "step": 8926 + }, + { + "epoch": 19.88195991091314, + "grad_norm": 14.327959060668945, + "learning_rate": 1e-06, + "loss": 0.3458, + "num_input_tokens_seen": 500161776, + "step": 8927 + }, + { + "epoch": 19.88195991091314, + "loss": 0.3156734108924866, + "loss_ce": 6.0611731896642596e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00897216796875, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 500161776, + "step": 8927 + }, + { + "epoch": 19.884187082405344, + "grad_norm": 19.522199630737305, + "learning_rate": 1e-06, + "loss": 0.5647, + "num_input_tokens_seen": 500216288, + "step": 8928 + }, + { + "epoch": 19.884187082405344, + "loss": 0.6144946813583374, + "loss_ce": 5.3751304221805185e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.03662109375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 500216288, + "step": 8928 + }, + { + "epoch": 19.88641425389755, + "grad_norm": 17.130699157714844, + "learning_rate": 1e-06, + "loss": 0.3437, + "num_input_tokens_seen": 500271968, + "step": 8929 + }, + { + "epoch": 19.88641425389755, + "loss": 0.2988947629928589, + "loss_ce": 6.664128886768594e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.006866455078125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 500271968, + "step": 8929 + }, + { + "epoch": 19.888641425389753, + "grad_norm": 29.030588150024414, + "learning_rate": 1e-06, + "loss": 0.3789, + "num_input_tokens_seen": 500329792, + "step": 8930 + }, + { + "epoch": 19.888641425389753, + "loss": 0.40844833850860596, + "loss_ce": 6.208484410308301e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.013427734375, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 500329792, + "step": 8930 + }, + { + "epoch": 19.89086859688196, + "grad_norm": 20.700912475585938, + "learning_rate": 1e-06, + "loss": 0.3864, + "num_input_tokens_seen": 500384792, + "step": 8931 + }, + { + "epoch": 19.89086859688196, + "loss": 0.46894556283950806, + "loss_ce": 7.346599886659533e-05, + "loss_iou": 0.203125, + "loss_num": 0.0126953125, + "loss_xval": 0.46875, + "num_input_tokens_seen": 500384792, + "step": 8931 + }, + { + "epoch": 19.893095768374163, + "grad_norm": 20.326370239257812, + "learning_rate": 1e-06, + "loss": 0.2839, + "num_input_tokens_seen": 500439812, + "step": 8932 + }, + { + "epoch": 19.893095768374163, + "loss": 0.25635194778442383, + "loss_ce": 6.53410388622433e-05, + "loss_iou": 0.1162109375, + "loss_num": 0.004791259765625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 500439812, + "step": 8932 + }, + { + "epoch": 19.895322939866368, + "grad_norm": 18.51610565185547, + "learning_rate": 1e-06, + "loss": 0.4605, + "num_input_tokens_seen": 500497212, + "step": 8933 + }, + { + "epoch": 19.895322939866368, + "loss": 0.3897075355052948, + "loss_ce": 5.907195736654103e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.01123046875, + "loss_xval": 0.390625, + "num_input_tokens_seen": 500497212, + "step": 8933 + }, + { + "epoch": 19.897550111358576, + "grad_norm": 15.211495399475098, + "learning_rate": 1e-06, + "loss": 0.3421, + "num_input_tokens_seen": 500552316, + "step": 8934 + }, + { + "epoch": 19.897550111358576, + "loss": 0.3298904299736023, + "loss_ce": 5.644091288559139e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.01080322265625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 500552316, + "step": 8934 + }, + { + "epoch": 19.899777282850778, + "grad_norm": 20.869205474853516, + "learning_rate": 1e-06, + "loss": 0.2746, + "num_input_tokens_seen": 500606956, + "step": 8935 + }, + { + "epoch": 19.899777282850778, + "loss": 0.28754234313964844, + "loss_ce": 6.67754648020491e-05, + "loss_iou": 0.126953125, + "loss_num": 0.006866455078125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 500606956, + "step": 8935 + }, + { + "epoch": 19.902004454342986, + "grad_norm": 24.170188903808594, + "learning_rate": 1e-06, + "loss": 0.389, + "num_input_tokens_seen": 500662136, + "step": 8936 + }, + { + "epoch": 19.902004454342986, + "loss": 0.32582342624664307, + "loss_ce": 7.880674093030393e-05, + "loss_iou": 0.15234375, + "loss_num": 0.004119873046875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 500662136, + "step": 8936 + }, + { + "epoch": 19.90423162583519, + "grad_norm": 26.785816192626953, + "learning_rate": 1e-06, + "loss": 0.3826, + "num_input_tokens_seen": 500716584, + "step": 8937 + }, + { + "epoch": 19.90423162583519, + "loss": 0.41937169432640076, + "loss_ce": 6.017117266310379e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0125732421875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 500716584, + "step": 8937 + }, + { + "epoch": 19.906458797327396, + "grad_norm": 15.786996841430664, + "learning_rate": 1e-06, + "loss": 0.3471, + "num_input_tokens_seen": 500772304, + "step": 8938 + }, + { + "epoch": 19.906458797327396, + "loss": 0.26117777824401855, + "loss_ce": 6.9360678025987e-05, + "loss_iou": 0.11962890625, + "loss_num": 0.004364013671875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 500772304, + "step": 8938 + }, + { + "epoch": 19.9086859688196, + "grad_norm": 32.816749572753906, + "learning_rate": 1e-06, + "loss": 0.542, + "num_input_tokens_seen": 500825560, + "step": 8939 + }, + { + "epoch": 19.9086859688196, + "loss": 0.6478848457336426, + "loss_ce": 5.768300252384506e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0189208984375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 500825560, + "step": 8939 + }, + { + "epoch": 19.910913140311806, + "grad_norm": 23.55243492126465, + "learning_rate": 1e-06, + "loss": 0.3671, + "num_input_tokens_seen": 500881272, + "step": 8940 + }, + { + "epoch": 19.910913140311806, + "loss": 0.28195321559906006, + "loss_ce": 9.285648411605507e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.004364013671875, + "loss_xval": 0.28125, + "num_input_tokens_seen": 500881272, + "step": 8940 + }, + { + "epoch": 19.91314031180401, + "grad_norm": 19.66559600830078, + "learning_rate": 1e-06, + "loss": 0.49, + "num_input_tokens_seen": 500937444, + "step": 8941 + }, + { + "epoch": 19.91314031180401, + "loss": 0.621654748916626, + "loss_ce": 7.26881917216815e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0177001953125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 500937444, + "step": 8941 + }, + { + "epoch": 19.915367483296215, + "grad_norm": 18.567909240722656, + "learning_rate": 1e-06, + "loss": 0.4054, + "num_input_tokens_seen": 500995428, + "step": 8942 + }, + { + "epoch": 19.915367483296215, + "loss": 0.36586493253707886, + "loss_ce": 0.0001422582136001438, + "loss_iou": 0.1630859375, + "loss_num": 0.00799560546875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 500995428, + "step": 8942 + }, + { + "epoch": 19.91759465478842, + "grad_norm": 37.899871826171875, + "learning_rate": 1e-06, + "loss": 0.3518, + "num_input_tokens_seen": 501051864, + "step": 8943 + }, + { + "epoch": 19.91759465478842, + "loss": 0.374164342880249, + "loss_ce": 7.989482401171699e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.00653076171875, + "loss_xval": 0.375, + "num_input_tokens_seen": 501051864, + "step": 8943 + }, + { + "epoch": 19.919821826280625, + "grad_norm": 14.561594009399414, + "learning_rate": 1e-06, + "loss": 0.3799, + "num_input_tokens_seen": 501107716, + "step": 8944 + }, + { + "epoch": 19.919821826280625, + "loss": 0.3280653953552246, + "loss_ce": 6.246700650081038e-05, + "loss_iou": 0.146484375, + "loss_num": 0.006866455078125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 501107716, + "step": 8944 + }, + { + "epoch": 19.92204899777283, + "grad_norm": 23.4508056640625, + "learning_rate": 1e-06, + "loss": 0.5593, + "num_input_tokens_seen": 501162352, + "step": 8945 + }, + { + "epoch": 19.92204899777283, + "loss": 0.4726704955101013, + "loss_ce": 7.527978596044704e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.02197265625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 501162352, + "step": 8945 + }, + { + "epoch": 19.924276169265035, + "grad_norm": 17.324832916259766, + "learning_rate": 1e-06, + "loss": 0.3169, + "num_input_tokens_seen": 501219728, + "step": 8946 + }, + { + "epoch": 19.924276169265035, + "loss": 0.36799356341362, + "loss_ce": 7.363170152530074e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00592041015625, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 501219728, + "step": 8946 + }, + { + "epoch": 19.92650334075724, + "grad_norm": 21.649171829223633, + "learning_rate": 1e-06, + "loss": 0.244, + "num_input_tokens_seen": 501272792, + "step": 8947 + }, + { + "epoch": 19.92650334075724, + "loss": 0.23100095987319946, + "loss_ce": 0.00016598933143541217, + "loss_iou": 0.103515625, + "loss_num": 0.00482177734375, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 501272792, + "step": 8947 + }, + { + "epoch": 19.928730512249444, + "grad_norm": 14.748282432556152, + "learning_rate": 1e-06, + "loss": 0.326, + "num_input_tokens_seen": 501330904, + "step": 8948 + }, + { + "epoch": 19.928730512249444, + "loss": 0.3653227686882019, + "loss_ce": 8.841823728289455e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.006134033203125, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 501330904, + "step": 8948 + }, + { + "epoch": 19.93095768374165, + "grad_norm": 30.326812744140625, + "learning_rate": 1e-06, + "loss": 0.4558, + "num_input_tokens_seen": 501387276, + "step": 8949 + }, + { + "epoch": 19.93095768374165, + "loss": 0.4978707432746887, + "loss_ce": 6.799891707487404e-05, + "loss_iou": 0.17578125, + "loss_num": 0.029052734375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 501387276, + "step": 8949 + }, + { + "epoch": 19.933184855233854, + "grad_norm": 25.002771377563477, + "learning_rate": 1e-06, + "loss": 0.291, + "num_input_tokens_seen": 501445204, + "step": 8950 + }, + { + "epoch": 19.933184855233854, + "loss": 0.24606147408485413, + "loss_ce": 5.927299935137853e-05, + "loss_iou": 0.10595703125, + "loss_num": 0.0068359375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 501445204, + "step": 8950 + }, + { + "epoch": 19.93541202672606, + "grad_norm": 15.425994873046875, + "learning_rate": 1e-06, + "loss": 0.3445, + "num_input_tokens_seen": 501501064, + "step": 8951 + }, + { + "epoch": 19.93541202672606, + "loss": 0.29687991738319397, + "loss_ce": 6.595243030460551e-05, + "loss_iou": 0.1328125, + "loss_num": 0.006072998046875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 501501064, + "step": 8951 + }, + { + "epoch": 19.937639198218264, + "grad_norm": 18.998098373413086, + "learning_rate": 1e-06, + "loss": 0.526, + "num_input_tokens_seen": 501558712, + "step": 8952 + }, + { + "epoch": 19.937639198218264, + "loss": 0.7495875358581543, + "loss_ce": 7.577068754471838e-05, + "loss_iou": 0.302734375, + "loss_num": 0.028564453125, + "loss_xval": 0.75, + "num_input_tokens_seen": 501558712, + "step": 8952 + }, + { + "epoch": 19.93986636971047, + "grad_norm": 13.607953071594238, + "learning_rate": 1e-06, + "loss": 0.2764, + "num_input_tokens_seen": 501614280, + "step": 8953 + }, + { + "epoch": 19.93986636971047, + "loss": 0.32002097368240356, + "loss_ce": 7.468256808351725e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.00750732421875, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 501614280, + "step": 8953 + }, + { + "epoch": 19.942093541202674, + "grad_norm": 17.92012596130371, + "learning_rate": 1e-06, + "loss": 0.5354, + "num_input_tokens_seen": 501667972, + "step": 8954 + }, + { + "epoch": 19.942093541202674, + "loss": 0.3914256989955902, + "loss_ce": 6.828343612141907e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.0078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 501667972, + "step": 8954 + }, + { + "epoch": 19.94432071269488, + "grad_norm": 15.410776138305664, + "learning_rate": 1e-06, + "loss": 0.6504, + "num_input_tokens_seen": 501724172, + "step": 8955 + }, + { + "epoch": 19.94432071269488, + "loss": 0.7376750707626343, + "loss_ce": 0.00012627900287043303, + "loss_iou": 0.31640625, + "loss_num": 0.0205078125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 501724172, + "step": 8955 + }, + { + "epoch": 19.946547884187083, + "grad_norm": 14.882509231567383, + "learning_rate": 1e-06, + "loss": 0.2864, + "num_input_tokens_seen": 501778848, + "step": 8956 + }, + { + "epoch": 19.946547884187083, + "loss": 0.330025315284729, + "loss_ce": 6.925308844074607e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.0140380859375, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 501778848, + "step": 8956 + }, + { + "epoch": 19.948775055679288, + "grad_norm": 26.640323638916016, + "learning_rate": 1e-06, + "loss": 0.2967, + "num_input_tokens_seen": 501833784, + "step": 8957 + }, + { + "epoch": 19.948775055679288, + "loss": 0.23518048226833344, + "loss_ce": 7.305956387426704e-05, + "loss_iou": 0.0986328125, + "loss_num": 0.00750732421875, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 501833784, + "step": 8957 + }, + { + "epoch": 19.951002227171493, + "grad_norm": 23.93553352355957, + "learning_rate": 1e-06, + "loss": 0.4451, + "num_input_tokens_seen": 501889872, + "step": 8958 + }, + { + "epoch": 19.951002227171493, + "loss": 0.29492634534835815, + "loss_ce": 6.549932732013986e-05, + "loss_iou": 0.1328125, + "loss_num": 0.006011962890625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 501889872, + "step": 8958 + }, + { + "epoch": 19.953229398663698, + "grad_norm": 26.314388275146484, + "learning_rate": 1e-06, + "loss": 0.3367, + "num_input_tokens_seen": 501946172, + "step": 8959 + }, + { + "epoch": 19.953229398663698, + "loss": 0.3797050416469574, + "loss_ce": 6.637527258135378e-05, + "loss_iou": 0.17578125, + "loss_num": 0.00543212890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 501946172, + "step": 8959 + }, + { + "epoch": 19.955456570155903, + "grad_norm": 12.816929817199707, + "learning_rate": 1e-06, + "loss": 0.4444, + "num_input_tokens_seen": 502002296, + "step": 8960 + }, + { + "epoch": 19.955456570155903, + "loss": 0.5041437149047852, + "loss_ce": 6.582704372704029e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.0244140625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 502002296, + "step": 8960 + }, + { + "epoch": 19.957683741648108, + "grad_norm": 22.86423110961914, + "learning_rate": 1e-06, + "loss": 0.2911, + "num_input_tokens_seen": 502056424, + "step": 8961 + }, + { + "epoch": 19.957683741648108, + "loss": 0.2608652412891388, + "loss_ce": 6.202118674991652e-05, + "loss_iou": 0.1025390625, + "loss_num": 0.01116943359375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 502056424, + "step": 8961 + }, + { + "epoch": 19.959910913140313, + "grad_norm": 14.811808586120605, + "learning_rate": 1e-06, + "loss": 0.3608, + "num_input_tokens_seen": 502112304, + "step": 8962 + }, + { + "epoch": 19.959910913140313, + "loss": 0.4184660315513611, + "loss_ce": 7.004072540439665e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.01519775390625, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 502112304, + "step": 8962 + }, + { + "epoch": 19.962138084632517, + "grad_norm": 19.64674949645996, + "learning_rate": 1e-06, + "loss": 0.2859, + "num_input_tokens_seen": 502167776, + "step": 8963 + }, + { + "epoch": 19.962138084632517, + "loss": 0.2639721632003784, + "loss_ce": 5.613587563857436e-05, + "loss_iou": 0.10546875, + "loss_num": 0.01055908203125, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 502167776, + "step": 8963 + }, + { + "epoch": 19.964365256124722, + "grad_norm": 14.231873512268066, + "learning_rate": 1e-06, + "loss": 0.3257, + "num_input_tokens_seen": 502224072, + "step": 8964 + }, + { + "epoch": 19.964365256124722, + "loss": 0.29962459206581116, + "loss_ce": 6.405658496078104e-05, + "loss_iou": 0.1328125, + "loss_num": 0.00689697265625, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 502224072, + "step": 8964 + }, + { + "epoch": 19.966592427616927, + "grad_norm": 14.061171531677246, + "learning_rate": 1e-06, + "loss": 0.3948, + "num_input_tokens_seen": 502281656, + "step": 8965 + }, + { + "epoch": 19.966592427616927, + "loss": 0.36503365635871887, + "loss_ce": 0.0002570544893387705, + "loss_iou": 0.146484375, + "loss_num": 0.01416015625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 502281656, + "step": 8965 + }, + { + "epoch": 19.968819599109132, + "grad_norm": 39.687034606933594, + "learning_rate": 1e-06, + "loss": 0.2981, + "num_input_tokens_seen": 502338012, + "step": 8966 + }, + { + "epoch": 19.968819599109132, + "loss": 0.26977571845054626, + "loss_ce": 6.136279262136668e-05, + "loss_iou": 0.11865234375, + "loss_num": 0.006622314453125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 502338012, + "step": 8966 + }, + { + "epoch": 19.971046770601337, + "grad_norm": 17.423816680908203, + "learning_rate": 1e-06, + "loss": 0.3524, + "num_input_tokens_seen": 502393216, + "step": 8967 + }, + { + "epoch": 19.971046770601337, + "loss": 0.3833910822868347, + "loss_ce": 0.00015138789603952318, + "loss_iou": 0.16015625, + "loss_num": 0.01263427734375, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 502393216, + "step": 8967 + }, + { + "epoch": 19.97327394209354, + "grad_norm": 15.628920555114746, + "learning_rate": 1e-06, + "loss": 0.6525, + "num_input_tokens_seen": 502450296, + "step": 8968 + }, + { + "epoch": 19.97327394209354, + "loss": 0.8214820027351379, + "loss_ce": 7.082229421939701e-05, + "loss_iou": 0.3046875, + "loss_num": 0.04248046875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 502450296, + "step": 8968 + }, + { + "epoch": 19.975501113585747, + "grad_norm": 19.731292724609375, + "learning_rate": 1e-06, + "loss": 0.3995, + "num_input_tokens_seen": 502504404, + "step": 8969 + }, + { + "epoch": 19.975501113585747, + "loss": 0.3412543833255768, + "loss_ce": 6.785897130612284e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0107421875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 502504404, + "step": 8969 + }, + { + "epoch": 19.97772828507795, + "grad_norm": 17.818803787231445, + "learning_rate": 1e-06, + "loss": 0.5062, + "num_input_tokens_seen": 502561012, + "step": 8970 + }, + { + "epoch": 19.97772828507795, + "loss": 0.46796897053718567, + "loss_ce": 7.346458733081818e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.0166015625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 502561012, + "step": 8970 + }, + { + "epoch": 19.979955456570156, + "grad_norm": 22.212177276611328, + "learning_rate": 1e-06, + "loss": 0.6205, + "num_input_tokens_seen": 502615788, + "step": 8971 + }, + { + "epoch": 19.979955456570156, + "loss": 0.680034875869751, + "loss_ce": 0.00022534003073815256, + "loss_iou": 0.28515625, + "loss_num": 0.0220947265625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 502615788, + "step": 8971 + }, + { + "epoch": 19.98218262806236, + "grad_norm": 28.786479949951172, + "learning_rate": 1e-06, + "loss": 0.4148, + "num_input_tokens_seen": 502669668, + "step": 8972 + }, + { + "epoch": 19.98218262806236, + "loss": 0.234740749001503, + "loss_ce": 6.057398422854021e-05, + "loss_iou": 0.1083984375, + "loss_num": 0.0035400390625, + "loss_xval": 0.234375, + "num_input_tokens_seen": 502669668, + "step": 8972 + }, + { + "epoch": 19.984409799554566, + "grad_norm": 22.56906509399414, + "learning_rate": 1e-06, + "loss": 0.3357, + "num_input_tokens_seen": 502727112, + "step": 8973 + }, + { + "epoch": 19.984409799554566, + "loss": 0.32478001713752747, + "loss_ce": 7.298550917766988e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.007415771484375, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 502727112, + "step": 8973 + }, + { + "epoch": 19.98663697104677, + "grad_norm": 12.906547546386719, + "learning_rate": 1e-06, + "loss": 0.6115, + "num_input_tokens_seen": 502781448, + "step": 8974 + }, + { + "epoch": 19.98663697104677, + "loss": 0.7082492113113403, + "loss_ce": 5.82421307626646e-05, + "loss_iou": 0.265625, + "loss_num": 0.035400390625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 502781448, + "step": 8974 + }, + { + "epoch": 19.988864142538976, + "grad_norm": 20.916061401367188, + "learning_rate": 1e-06, + "loss": 0.5555, + "num_input_tokens_seen": 502834424, + "step": 8975 + }, + { + "epoch": 19.988864142538976, + "loss": 0.562214195728302, + "loss_ce": 8.040100510697812e-05, + "loss_iou": 0.251953125, + "loss_num": 0.01190185546875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 502834424, + "step": 8975 + }, + { + "epoch": 19.99109131403118, + "grad_norm": 27.778152465820312, + "learning_rate": 1e-06, + "loss": 0.5195, + "num_input_tokens_seen": 502888060, + "step": 8976 + }, + { + "epoch": 19.99109131403118, + "loss": 0.708865761756897, + "loss_ce": 6.44870669930242e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0135498046875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 502888060, + "step": 8976 + }, + { + "epoch": 19.993318485523385, + "grad_norm": 16.12858772277832, + "learning_rate": 1e-06, + "loss": 0.3678, + "num_input_tokens_seen": 502947760, + "step": 8977 + }, + { + "epoch": 19.993318485523385, + "loss": 0.4400678873062134, + "loss_ce": 6.544531788676977e-05, + "loss_iou": 0.189453125, + "loss_num": 0.01251220703125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 502947760, + "step": 8977 + }, + { + "epoch": 19.99554565701559, + "grad_norm": 14.300686836242676, + "learning_rate": 1e-06, + "loss": 0.4194, + "num_input_tokens_seen": 503003892, + "step": 8978 + }, + { + "epoch": 19.99554565701559, + "loss": 0.554030179977417, + "loss_ce": 7.511145668104291e-05, + "loss_iou": 0.240234375, + "loss_num": 0.0147705078125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 503003892, + "step": 8978 + }, + { + "epoch": 19.997772828507795, + "grad_norm": 48.166603088378906, + "learning_rate": 1e-06, + "loss": 0.3181, + "num_input_tokens_seen": 503060696, + "step": 8979 + }, + { + "epoch": 19.997772828507795, + "loss": 0.36199867725372314, + "loss_ce": 6.0221587773412466e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.01116943359375, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 503060696, + "step": 8979 + }, + { + "epoch": 20.0, + "grad_norm": 19.31943130493164, + "learning_rate": 1e-06, + "loss": 0.3377, + "num_input_tokens_seen": 503116608, + "step": 8980 + }, + { + "epoch": 20.0, + "loss": 0.40692806243896484, + "loss_ce": 6.770974141545594e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00946044921875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 503116608, + "step": 8980 + }, + { + "epoch": 20.002227171492205, + "grad_norm": 19.004310607910156, + "learning_rate": 1e-06, + "loss": 0.3899, + "num_input_tokens_seen": 503171700, + "step": 8981 + }, + { + "epoch": 20.002227171492205, + "loss": 0.31977057456970215, + "loss_ce": 6.842036236776039e-05, + "loss_iou": 0.150390625, + "loss_num": 0.0036163330078125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 503171700, + "step": 8981 + }, + { + "epoch": 20.00445434298441, + "grad_norm": 16.16302490234375, + "learning_rate": 1e-06, + "loss": 0.3615, + "num_input_tokens_seen": 503227328, + "step": 8982 + }, + { + "epoch": 20.00445434298441, + "loss": 0.43536823987960815, + "loss_ce": 6.547638622578233e-05, + "loss_iou": 0.193359375, + "loss_num": 0.00970458984375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 503227328, + "step": 8982 + }, + { + "epoch": 20.006681514476615, + "grad_norm": 28.54294204711914, + "learning_rate": 1e-06, + "loss": 0.4652, + "num_input_tokens_seen": 503280804, + "step": 8983 + }, + { + "epoch": 20.006681514476615, + "loss": 0.3693348169326782, + "loss_ce": 7.21029209671542e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00982666015625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 503280804, + "step": 8983 + }, + { + "epoch": 20.00890868596882, + "grad_norm": 13.636581420898438, + "learning_rate": 1e-06, + "loss": 0.3736, + "num_input_tokens_seen": 503337904, + "step": 8984 + }, + { + "epoch": 20.00890868596882, + "loss": 0.45746809244155884, + "loss_ce": 7.06176069797948e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.01312255859375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 503337904, + "step": 8984 + }, + { + "epoch": 20.011135857461024, + "grad_norm": 23.064783096313477, + "learning_rate": 1e-06, + "loss": 0.4764, + "num_input_tokens_seen": 503393424, + "step": 8985 + }, + { + "epoch": 20.011135857461024, + "loss": 0.5955382585525513, + "loss_ce": 7.930370338726789e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0169677734375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 503393424, + "step": 8985 + }, + { + "epoch": 20.01336302895323, + "grad_norm": 20.41514015197754, + "learning_rate": 1e-06, + "loss": 0.3805, + "num_input_tokens_seen": 503448520, + "step": 8986 + }, + { + "epoch": 20.01336302895323, + "loss": 0.3952462673187256, + "loss_ce": 0.00010465873492648825, + "loss_iou": 0.1787109375, + "loss_num": 0.007537841796875, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 503448520, + "step": 8986 + }, + { + "epoch": 20.015590200445434, + "grad_norm": 24.895496368408203, + "learning_rate": 1e-06, + "loss": 0.4096, + "num_input_tokens_seen": 503504024, + "step": 8987 + }, + { + "epoch": 20.015590200445434, + "loss": 0.43148207664489746, + "loss_ce": 8.557141700293869e-05, + "loss_iou": 0.189453125, + "loss_num": 0.01031494140625, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 503504024, + "step": 8987 + }, + { + "epoch": 20.01781737193764, + "grad_norm": 23.964984893798828, + "learning_rate": 1e-06, + "loss": 0.3723, + "num_input_tokens_seen": 503561476, + "step": 8988 + }, + { + "epoch": 20.01781737193764, + "loss": 0.49469244480133057, + "loss_ce": 6.352404307108372e-05, + "loss_iou": 0.22265625, + "loss_num": 0.010009765625, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 503561476, + "step": 8988 + }, + { + "epoch": 20.020044543429844, + "grad_norm": 23.97158432006836, + "learning_rate": 1e-06, + "loss": 0.6079, + "num_input_tokens_seen": 503618636, + "step": 8989 + }, + { + "epoch": 20.020044543429844, + "loss": 0.6602572202682495, + "loss_ce": 0.00010094826575368643, + "loss_iou": 0.255859375, + "loss_num": 0.02978515625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 503618636, + "step": 8989 + }, + { + "epoch": 20.02227171492205, + "grad_norm": 28.734207153320312, + "learning_rate": 1e-06, + "loss": 0.3533, + "num_input_tokens_seen": 503674208, + "step": 8990 + }, + { + "epoch": 20.02227171492205, + "loss": 0.34147197008132935, + "loss_ce": 0.00019389839144423604, + "loss_iou": 0.138671875, + "loss_num": 0.01287841796875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 503674208, + "step": 8990 + }, + { + "epoch": 20.024498886414253, + "grad_norm": 24.618515014648438, + "learning_rate": 1e-06, + "loss": 0.4778, + "num_input_tokens_seen": 503728444, + "step": 8991 + }, + { + "epoch": 20.024498886414253, + "loss": 0.24243876338005066, + "loss_ce": 6.816858513047919e-05, + "loss_iou": 0.10205078125, + "loss_num": 0.00762939453125, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 503728444, + "step": 8991 + }, + { + "epoch": 20.02672605790646, + "grad_norm": 17.8887996673584, + "learning_rate": 1e-06, + "loss": 0.3836, + "num_input_tokens_seen": 503784444, + "step": 8992 + }, + { + "epoch": 20.02672605790646, + "loss": 0.4087654948234558, + "loss_ce": 7.405409269267693e-05, + "loss_iou": 0.177734375, + "loss_num": 0.01043701171875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 503784444, + "step": 8992 + }, + { + "epoch": 20.028953229398663, + "grad_norm": 19.861093521118164, + "learning_rate": 1e-06, + "loss": 0.5203, + "num_input_tokens_seen": 503837876, + "step": 8993 + }, + { + "epoch": 20.028953229398663, + "loss": 0.7142700552940369, + "loss_ce": 6.721451791236177e-05, + "loss_iou": 0.265625, + "loss_num": 0.036865234375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 503837876, + "step": 8993 + }, + { + "epoch": 20.031180400890868, + "grad_norm": 37.61943817138672, + "learning_rate": 1e-06, + "loss": 0.4567, + "num_input_tokens_seen": 503894880, + "step": 8994 + }, + { + "epoch": 20.031180400890868, + "loss": 0.4224729835987091, + "loss_ce": 0.00010970777657348663, + "loss_iou": 0.181640625, + "loss_num": 0.01177978515625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 503894880, + "step": 8994 + }, + { + "epoch": 20.033407572383073, + "grad_norm": 21.20833969116211, + "learning_rate": 1e-06, + "loss": 0.4741, + "num_input_tokens_seen": 503952424, + "step": 8995 + }, + { + "epoch": 20.033407572383073, + "loss": 0.4196312725543976, + "loss_ce": 7.561713573522866e-05, + "loss_iou": 0.1640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 503952424, + "step": 8995 + }, + { + "epoch": 20.035634743875278, + "grad_norm": 22.4307861328125, + "learning_rate": 1e-06, + "loss": 0.4745, + "num_input_tokens_seen": 504008156, + "step": 8996 + }, + { + "epoch": 20.035634743875278, + "loss": 0.4712446331977844, + "loss_ce": 5.3244577429722995e-05, + "loss_iou": 0.21875, + "loss_num": 0.00677490234375, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 504008156, + "step": 8996 + }, + { + "epoch": 20.037861915367483, + "grad_norm": 35.790035247802734, + "learning_rate": 1e-06, + "loss": 0.3054, + "num_input_tokens_seen": 504066160, + "step": 8997 + }, + { + "epoch": 20.037861915367483, + "loss": 0.4005992114543915, + "loss_ce": 8.653838449390605e-05, + "loss_iou": 0.18359375, + "loss_num": 0.006683349609375, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 504066160, + "step": 8997 + }, + { + "epoch": 20.040089086859687, + "grad_norm": 30.35886001586914, + "learning_rate": 1e-06, + "loss": 0.4581, + "num_input_tokens_seen": 504123536, + "step": 8998 + }, + { + "epoch": 20.040089086859687, + "loss": 0.4870605170726776, + "loss_ce": 0.00012202416110085323, + "loss_iou": 0.1865234375, + "loss_num": 0.022705078125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 504123536, + "step": 8998 + }, + { + "epoch": 20.042316258351892, + "grad_norm": 11.284721374511719, + "learning_rate": 1e-06, + "loss": 0.3595, + "num_input_tokens_seen": 504179652, + "step": 8999 + }, + { + "epoch": 20.042316258351892, + "loss": 0.37006866931915283, + "loss_ce": 7.358190487138927e-05, + "loss_iou": 0.158203125, + "loss_num": 0.01092529296875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 504179652, + "step": 8999 + }, + { + "epoch": 20.044543429844097, + "grad_norm": 14.649779319763184, + "learning_rate": 1e-06, + "loss": 0.4503, + "num_input_tokens_seen": 504234948, + "step": 9000 + }, + { + "epoch": 20.044543429844097, + "eval_seeclick_web_CIoU": 0.5833241045475006, + "eval_seeclick_web_GIoU": 0.5827835202217102, + "eval_seeclick_web_IoU": 0.602398693561554, + "eval_seeclick_web_MAE_all": 0.015278099570423365, + "eval_seeclick_web_MAE_h": 0.007092589279636741, + "eval_seeclick_web_MAE_w": 0.015399211086332798, + "eval_seeclick_web_MAE_x_boxes": 0.007994883926585317, + "eval_seeclick_web_MAE_y_boxes": 0.021222305251285434, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 0.9251190423965454, + "eval_seeclick_web_loss_ce": 0.00011575160169741139, + "eval_seeclick_web_loss_iou": 0.428955078125, + "eval_seeclick_web_loss_num": 0.012242317199707031, + "eval_seeclick_web_loss_xval": 0.9189453125, + "eval_seeclick_web_runtime": 24.8776, + "eval_seeclick_web_samples_per_second": 2.01, + "eval_seeclick_web_steps_per_second": 0.08, + "num_input_tokens_seen": 504234948, + "step": 9000 + }, + { + "epoch": 20.044543429844097, + "eval_icons_CIoU": 0.26180562376976013, + "eval_icons_GIoU": 0.2955218553543091, + "eval_icons_IoU": 0.3424162417650223, + "eval_icons_MAE_all": 0.057900117710232735, + "eval_icons_MAE_h": 0.029276233166456223, + "eval_icons_MAE_w": 0.06031078938394785, + "eval_icons_MAE_x_boxes": 0.058521781116724014, + "eval_icons_MAE_y_boxes": 0.03748843166977167, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7113921642303467, + "eval_icons_loss_ce": 0.0001341397837677505, + "eval_icons_loss_iou": 0.6754150390625, + "eval_icons_loss_num": 0.057514190673828125, + "eval_icons_loss_xval": 1.6396484375, + "eval_icons_runtime": 24.3524, + "eval_icons_samples_per_second": 2.053, + "eval_icons_steps_per_second": 0.082, + "num_input_tokens_seen": 504234948, + "step": 9000 + }, + { + "epoch": 20.044543429844097, + "eval_screenspot_CIoU": 0.3642461995283763, + "eval_screenspot_GIoU": 0.38147173325220746, + "eval_screenspot_IoU": 0.4438290496667226, + "eval_screenspot_MAE_all": 0.058075872560342155, + "eval_screenspot_MAE_h": 0.039860475808382034, + "eval_screenspot_MAE_w": 0.060762856155633926, + "eval_screenspot_MAE_x_boxes": 0.07081380176047485, + "eval_screenspot_MAE_y_boxes": 0.04531909463306268, + "eval_screenspot_inside_bbox": 0.7041666706403097, + "eval_screenspot_loss": 1.5968748331069946, + "eval_screenspot_loss_ce": 0.00014579095780694237, + "eval_screenspot_loss_iou": 0.6578776041666666, + "eval_screenspot_loss_num": 0.06808090209960938, + "eval_screenspot_loss_xval": 1.6564127604166667, + "eval_screenspot_runtime": 41.1503, + "eval_screenspot_samples_per_second": 2.163, + "eval_screenspot_steps_per_second": 0.073, + "num_input_tokens_seen": 504234948, + "step": 9000 + }, + { + "epoch": 20.044543429844097, + "eval_compot_CIoU": 0.3458597809076309, + "eval_compot_GIoU": 0.35780732333660126, + "eval_compot_IoU": 0.40467947721481323, + "eval_compot_MAE_all": 0.01937184017151594, + "eval_compot_MAE_h": 0.01292520109564066, + "eval_compot_MAE_w": 0.02040032297372818, + "eval_compot_MAE_x_boxes": 0.030146288685500622, + "eval_compot_MAE_y_boxes": 0.006630459800362587, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.361356258392334, + "eval_compot_loss_ce": 0.0001064865973603446, + "eval_compot_loss_iou": 0.6224365234375, + "eval_compot_loss_num": 0.017818450927734375, + "eval_compot_loss_xval": 1.33349609375, + "eval_compot_runtime": 25.482, + "eval_compot_samples_per_second": 1.962, + "eval_compot_steps_per_second": 0.078, + "num_input_tokens_seen": 504234948, + "step": 9000 + }, + { + "epoch": 20.044543429844097, + "eval_custom_ui_val_CIoU": 0.4760332836045159, + "eval_custom_ui_val_GIoU": 0.48059673772917855, + "eval_custom_ui_val_IoU": 0.5367237561278873, + "eval_custom_ui_val_MAE_all": 0.026100213070296578, + "eval_custom_ui_val_MAE_h": 0.013512549611429373, + "eval_custom_ui_val_MAE_w": 0.036708953945587076, + "eval_custom_ui_val_MAE_x_boxes": 0.03128902590833604, + "eval_custom_ui_val_MAE_y_boxes": 0.012070265650335286, + "eval_custom_ui_val_inside_bbox": 0.7719907429483202, + "eval_custom_ui_val_loss": 1.1528477668762207, + "eval_custom_ui_val_loss_ce": 0.00011129721032274474, + "eval_custom_ui_val_loss_iou": 0.4967041015625, + "eval_custom_ui_val_loss_num": 0.022805955674913194, + "eval_custom_ui_val_loss_xval": 1.1072048611111112, + "eval_custom_ui_val_runtime": 76.4297, + "eval_custom_ui_val_samples_per_second": 3.467, + "eval_custom_ui_val_steps_per_second": 0.118, + "num_input_tokens_seen": 504234948, + "step": 9000 + }, + { + "epoch": 20.044543429844097, + "loss": 0.8328539729118347, + "loss_ce": 9.030340879689902e-05, + "loss_iou": 0.375, + "loss_num": 0.0166015625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 504234948, + "step": 9000 + }, + { + "epoch": 20.046770601336302, + "grad_norm": 13.546182632446289, + "learning_rate": 1e-06, + "loss": 0.3243, + "num_input_tokens_seen": 504290540, + "step": 9001 + }, + { + "epoch": 20.046770601336302, + "loss": 0.26092007756233215, + "loss_ce": 5.582950689131394e-05, + "loss_iou": 0.1171875, + "loss_num": 0.005340576171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 504290540, + "step": 9001 + }, + { + "epoch": 20.048997772828507, + "grad_norm": 11.733550071716309, + "learning_rate": 1e-06, + "loss": 0.3229, + "num_input_tokens_seen": 504346220, + "step": 9002 + }, + { + "epoch": 20.048997772828507, + "loss": 0.3032246530056, + "loss_ce": 6.304614362306893e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.01251220703125, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 504346220, + "step": 9002 + }, + { + "epoch": 20.051224944320712, + "grad_norm": 22.342601776123047, + "learning_rate": 1e-06, + "loss": 0.3593, + "num_input_tokens_seen": 504402204, + "step": 9003 + }, + { + "epoch": 20.051224944320712, + "loss": 0.26656997203826904, + "loss_ce": 9.050035441759974e-05, + "loss_iou": 0.119140625, + "loss_num": 0.005706787109375, + "loss_xval": 0.265625, + "num_input_tokens_seen": 504402204, + "step": 9003 + }, + { + "epoch": 20.053452115812917, + "grad_norm": 17.765623092651367, + "learning_rate": 1e-06, + "loss": 0.5938, + "num_input_tokens_seen": 504458700, + "step": 9004 + }, + { + "epoch": 20.053452115812917, + "loss": 0.5125864148139954, + "loss_ce": 7.418861787300557e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0201416015625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 504458700, + "step": 9004 + }, + { + "epoch": 20.05567928730512, + "grad_norm": 18.589075088500977, + "learning_rate": 1e-06, + "loss": 0.3828, + "num_input_tokens_seen": 504514400, + "step": 9005 + }, + { + "epoch": 20.05567928730512, + "loss": 0.3567635416984558, + "loss_ce": 7.408991950796917e-05, + "loss_iou": 0.14453125, + "loss_num": 0.0135498046875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 504514400, + "step": 9005 + }, + { + "epoch": 20.057906458797326, + "grad_norm": 19.300148010253906, + "learning_rate": 1e-06, + "loss": 0.5256, + "num_input_tokens_seen": 504570592, + "step": 9006 + }, + { + "epoch": 20.057906458797326, + "loss": 0.47822603583335876, + "loss_ce": 7.661766721867025e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 504570592, + "step": 9006 + }, + { + "epoch": 20.06013363028953, + "grad_norm": 17.056005477905273, + "learning_rate": 1e-06, + "loss": 0.5382, + "num_input_tokens_seen": 504623976, + "step": 9007 + }, + { + "epoch": 20.06013363028953, + "loss": 0.6177802085876465, + "loss_ce": 0.00010440404003020376, + "loss_iou": 0.267578125, + "loss_num": 0.0166015625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 504623976, + "step": 9007 + }, + { + "epoch": 20.062360801781736, + "grad_norm": 12.326020240783691, + "learning_rate": 1e-06, + "loss": 0.2692, + "num_input_tokens_seen": 504680516, + "step": 9008 + }, + { + "epoch": 20.062360801781736, + "loss": 0.2412145882844925, + "loss_ce": 6.467088678618893e-05, + "loss_iou": 0.109375, + "loss_num": 0.00439453125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 504680516, + "step": 9008 + }, + { + "epoch": 20.06458797327394, + "grad_norm": 15.348241806030273, + "learning_rate": 1e-06, + "loss": 0.2844, + "num_input_tokens_seen": 504732616, + "step": 9009 + }, + { + "epoch": 20.06458797327394, + "loss": 0.3364948332309723, + "loss_ce": 6.905219197506085e-05, + "loss_iou": 0.1328125, + "loss_num": 0.01416015625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 504732616, + "step": 9009 + }, + { + "epoch": 20.066815144766146, + "grad_norm": 274.50628662109375, + "learning_rate": 1e-06, + "loss": 0.3171, + "num_input_tokens_seen": 504789792, + "step": 9010 + }, + { + "epoch": 20.066815144766146, + "loss": 0.3943229615688324, + "loss_ce": 6.636053149122745e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00946044921875, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 504789792, + "step": 9010 + }, + { + "epoch": 20.06904231625835, + "grad_norm": 12.248230934143066, + "learning_rate": 1e-06, + "loss": 0.2955, + "num_input_tokens_seen": 504845860, + "step": 9011 + }, + { + "epoch": 20.06904231625835, + "loss": 0.2785142660140991, + "loss_ce": 7.186854054452851e-05, + "loss_iou": 0.10595703125, + "loss_num": 0.0133056640625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 504845860, + "step": 9011 + }, + { + "epoch": 20.071269487750556, + "grad_norm": 16.346586227416992, + "learning_rate": 1e-06, + "loss": 0.3333, + "num_input_tokens_seen": 504897560, + "step": 9012 + }, + { + "epoch": 20.071269487750556, + "loss": 0.25494903326034546, + "loss_ce": 6.623686931561679e-05, + "loss_iou": 0.10546875, + "loss_num": 0.0086669921875, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 504897560, + "step": 9012 + }, + { + "epoch": 20.07349665924276, + "grad_norm": 14.807296752929688, + "learning_rate": 1e-06, + "loss": 0.2808, + "num_input_tokens_seen": 504953152, + "step": 9013 + }, + { + "epoch": 20.07349665924276, + "loss": 0.27936726808547974, + "loss_ce": 7.039310003165156e-05, + "loss_iou": 0.11865234375, + "loss_num": 0.00836181640625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 504953152, + "step": 9013 + }, + { + "epoch": 20.075723830734965, + "grad_norm": 17.577438354492188, + "learning_rate": 1e-06, + "loss": 0.3015, + "num_input_tokens_seen": 505011868, + "step": 9014 + }, + { + "epoch": 20.075723830734965, + "loss": 0.23732471466064453, + "loss_ce": 5.0541042583063245e-05, + "loss_iou": 0.10546875, + "loss_num": 0.005279541015625, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 505011868, + "step": 9014 + }, + { + "epoch": 20.07795100222717, + "grad_norm": 14.317474365234375, + "learning_rate": 1e-06, + "loss": 0.44, + "num_input_tokens_seen": 505068340, + "step": 9015 + }, + { + "epoch": 20.07795100222717, + "loss": 0.4671054482460022, + "loss_ce": 6.44456158624962e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.017822265625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 505068340, + "step": 9015 + }, + { + "epoch": 20.080178173719375, + "grad_norm": 16.756528854370117, + "learning_rate": 1e-06, + "loss": 0.2364, + "num_input_tokens_seen": 505126536, + "step": 9016 + }, + { + "epoch": 20.080178173719375, + "loss": 0.22590871155261993, + "loss_ce": 7.862491474952549e-05, + "loss_iou": 0.10205078125, + "loss_num": 0.004302978515625, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 505126536, + "step": 9016 + }, + { + "epoch": 20.08240534521158, + "grad_norm": 13.3543062210083, + "learning_rate": 1e-06, + "loss": 0.3849, + "num_input_tokens_seen": 505183652, + "step": 9017 + }, + { + "epoch": 20.08240534521158, + "loss": 0.4101763963699341, + "loss_ce": 8.116650133160874e-05, + "loss_iou": 0.185546875, + "loss_num": 0.007720947265625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 505183652, + "step": 9017 + }, + { + "epoch": 20.084632516703785, + "grad_norm": 15.893471717834473, + "learning_rate": 1e-06, + "loss": 0.3159, + "num_input_tokens_seen": 505238776, + "step": 9018 + }, + { + "epoch": 20.084632516703785, + "loss": 0.3582761287689209, + "loss_ce": 6.079444574424997e-05, + "loss_iou": 0.162109375, + "loss_num": 0.0067138671875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 505238776, + "step": 9018 + }, + { + "epoch": 20.08685968819599, + "grad_norm": 26.983814239501953, + "learning_rate": 1e-06, + "loss": 0.502, + "num_input_tokens_seen": 505295268, + "step": 9019 + }, + { + "epoch": 20.08685968819599, + "loss": 0.48191970586776733, + "loss_ce": 0.00010815928544616327, + "loss_iou": 0.2158203125, + "loss_num": 0.009765625, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 505295268, + "step": 9019 + }, + { + "epoch": 20.089086859688194, + "grad_norm": 13.190807342529297, + "learning_rate": 1e-06, + "loss": 0.327, + "num_input_tokens_seen": 505351820, + "step": 9020 + }, + { + "epoch": 20.089086859688194, + "loss": 0.3194577693939209, + "loss_ce": 6.07890251558274e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.007110595703125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 505351820, + "step": 9020 + }, + { + "epoch": 20.0913140311804, + "grad_norm": 18.049283981323242, + "learning_rate": 1e-06, + "loss": 0.3301, + "num_input_tokens_seen": 505409524, + "step": 9021 + }, + { + "epoch": 20.0913140311804, + "loss": 0.3330759108066559, + "loss_ce": 6.808601028751582e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.006072998046875, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 505409524, + "step": 9021 + }, + { + "epoch": 20.093541202672604, + "grad_norm": 20.265382766723633, + "learning_rate": 1e-06, + "loss": 0.3425, + "num_input_tokens_seen": 505466160, + "step": 9022 + }, + { + "epoch": 20.093541202672604, + "loss": 0.2662951648235321, + "loss_ce": 5.9811318351421505e-05, + "loss_iou": 0.1171875, + "loss_num": 0.006439208984375, + "loss_xval": 0.265625, + "num_input_tokens_seen": 505466160, + "step": 9022 + }, + { + "epoch": 20.09576837416481, + "grad_norm": 18.630359649658203, + "learning_rate": 1e-06, + "loss": 0.459, + "num_input_tokens_seen": 505521684, + "step": 9023 + }, + { + "epoch": 20.09576837416481, + "loss": 0.34796595573425293, + "loss_ce": 6.557940650964156e-05, + "loss_iou": 0.1640625, + "loss_num": 0.004180908203125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 505521684, + "step": 9023 + }, + { + "epoch": 20.097995545657014, + "grad_norm": 18.471445083618164, + "learning_rate": 1e-06, + "loss": 0.44, + "num_input_tokens_seen": 505579020, + "step": 9024 + }, + { + "epoch": 20.097995545657014, + "loss": 0.4241413474082947, + "loss_ce": 6.90808956278488e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0107421875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 505579020, + "step": 9024 + }, + { + "epoch": 20.100222717149222, + "grad_norm": 13.083913803100586, + "learning_rate": 1e-06, + "loss": 0.4995, + "num_input_tokens_seen": 505636064, + "step": 9025 + }, + { + "epoch": 20.100222717149222, + "loss": 0.3323401212692261, + "loss_ce": 6.473006214946508e-05, + "loss_iou": 0.15234375, + "loss_num": 0.00537109375, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 505636064, + "step": 9025 + }, + { + "epoch": 20.102449888641427, + "grad_norm": 13.375629425048828, + "learning_rate": 1e-06, + "loss": 0.3271, + "num_input_tokens_seen": 505694548, + "step": 9026 + }, + { + "epoch": 20.102449888641427, + "loss": 0.261178195476532, + "loss_ce": 6.979504541959614e-05, + "loss_iou": 0.12255859375, + "loss_num": 0.003265380859375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 505694548, + "step": 9026 + }, + { + "epoch": 20.104677060133632, + "grad_norm": 15.511109352111816, + "learning_rate": 1e-06, + "loss": 0.4012, + "num_input_tokens_seen": 505752036, + "step": 9027 + }, + { + "epoch": 20.104677060133632, + "loss": 0.5342494249343872, + "loss_ce": 6.970732647459954e-05, + "loss_iou": 0.240234375, + "loss_num": 0.0106201171875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 505752036, + "step": 9027 + }, + { + "epoch": 20.106904231625837, + "grad_norm": 18.12278175354004, + "learning_rate": 1e-06, + "loss": 0.3609, + "num_input_tokens_seen": 505806088, + "step": 9028 + }, + { + "epoch": 20.106904231625837, + "loss": 0.21917679905891418, + "loss_ce": 6.0579972341656685e-05, + "loss_iou": 0.0859375, + "loss_num": 0.0093994140625, + "loss_xval": 0.21875, + "num_input_tokens_seen": 505806088, + "step": 9028 + }, + { + "epoch": 20.10913140311804, + "grad_norm": 19.46178436279297, + "learning_rate": 1e-06, + "loss": 0.3676, + "num_input_tokens_seen": 505863524, + "step": 9029 + }, + { + "epoch": 20.10913140311804, + "loss": 0.2694757878780365, + "loss_ce": 6.661626684945077e-05, + "loss_iou": 0.109375, + "loss_num": 0.01007080078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 505863524, + "step": 9029 + }, + { + "epoch": 20.111358574610247, + "grad_norm": 25.316984176635742, + "learning_rate": 1e-06, + "loss": 0.3969, + "num_input_tokens_seen": 505919056, + "step": 9030 + }, + { + "epoch": 20.111358574610247, + "loss": 0.39313435554504395, + "loss_ce": 6.794696673750877e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00909423828125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 505919056, + "step": 9030 + }, + { + "epoch": 20.11358574610245, + "grad_norm": 17.775178909301758, + "learning_rate": 1e-06, + "loss": 0.5433, + "num_input_tokens_seen": 505973188, + "step": 9031 + }, + { + "epoch": 20.11358574610245, + "loss": 0.5688655972480774, + "loss_ce": 0.00014000045484863222, + "loss_iou": 0.24609375, + "loss_num": 0.0152587890625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 505973188, + "step": 9031 + }, + { + "epoch": 20.115812917594656, + "grad_norm": 18.306743621826172, + "learning_rate": 1e-06, + "loss": 0.3148, + "num_input_tokens_seen": 506026908, + "step": 9032 + }, + { + "epoch": 20.115812917594656, + "loss": 0.25726398825645447, + "loss_ce": 6.184067751746625e-05, + "loss_iou": 0.107421875, + "loss_num": 0.00848388671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 506026908, + "step": 9032 + }, + { + "epoch": 20.11804008908686, + "grad_norm": 18.657054901123047, + "learning_rate": 1e-06, + "loss": 0.4274, + "num_input_tokens_seen": 506084560, + "step": 9033 + }, + { + "epoch": 20.11804008908686, + "loss": 0.4233429729938507, + "loss_ce": 6.415171810658649e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.0101318359375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 506084560, + "step": 9033 + }, + { + "epoch": 20.120267260579066, + "grad_norm": 20.036205291748047, + "learning_rate": 1e-06, + "loss": 0.3367, + "num_input_tokens_seen": 506141540, + "step": 9034 + }, + { + "epoch": 20.120267260579066, + "loss": 0.2808334231376648, + "loss_ce": 7.170556636992842e-05, + "loss_iou": 0.130859375, + "loss_num": 0.0036773681640625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 506141540, + "step": 9034 + }, + { + "epoch": 20.12249443207127, + "grad_norm": 14.675041198730469, + "learning_rate": 1e-06, + "loss": 0.4575, + "num_input_tokens_seen": 506195448, + "step": 9035 + }, + { + "epoch": 20.12249443207127, + "loss": 0.43460237979888916, + "loss_ce": 9.311149187851697e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.00439453125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 506195448, + "step": 9035 + }, + { + "epoch": 20.124721603563476, + "grad_norm": 22.29570770263672, + "learning_rate": 1e-06, + "loss": 0.4673, + "num_input_tokens_seen": 506249464, + "step": 9036 + }, + { + "epoch": 20.124721603563476, + "loss": 0.30451393127441406, + "loss_ce": 7.058979826979339e-05, + "loss_iou": 0.130859375, + "loss_num": 0.008544921875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 506249464, + "step": 9036 + }, + { + "epoch": 20.12694877505568, + "grad_norm": 18.38787841796875, + "learning_rate": 1e-06, + "loss": 0.3275, + "num_input_tokens_seen": 506305972, + "step": 9037 + }, + { + "epoch": 20.12694877505568, + "loss": 0.2641042470932007, + "loss_ce": 6.616647442569956e-05, + "loss_iou": 0.11328125, + "loss_num": 0.007476806640625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 506305972, + "step": 9037 + }, + { + "epoch": 20.129175946547885, + "grad_norm": 24.06353759765625, + "learning_rate": 1e-06, + "loss": 0.3628, + "num_input_tokens_seen": 506363376, + "step": 9038 + }, + { + "epoch": 20.129175946547885, + "loss": 0.3895137310028076, + "loss_ce": 0.00010943982488242909, + "loss_iou": 0.177734375, + "loss_num": 0.006561279296875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 506363376, + "step": 9038 + }, + { + "epoch": 20.13140311804009, + "grad_norm": 24.388277053833008, + "learning_rate": 1e-06, + "loss": 0.2923, + "num_input_tokens_seen": 506419872, + "step": 9039 + }, + { + "epoch": 20.13140311804009, + "loss": 0.33582985401153564, + "loss_ce": 0.00013651110930368304, + "loss_iou": 0.15234375, + "loss_num": 0.00634765625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 506419872, + "step": 9039 + }, + { + "epoch": 20.133630289532295, + "grad_norm": 12.003957748413086, + "learning_rate": 1e-06, + "loss": 0.3221, + "num_input_tokens_seen": 506474320, + "step": 9040 + }, + { + "epoch": 20.133630289532295, + "loss": 0.19839268922805786, + "loss_ce": 5.894018613616936e-05, + "loss_iou": 0.09130859375, + "loss_num": 0.0030670166015625, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 506474320, + "step": 9040 + }, + { + "epoch": 20.1358574610245, + "grad_norm": 20.20316505432129, + "learning_rate": 1e-06, + "loss": 0.4102, + "num_input_tokens_seen": 506531432, + "step": 9041 + }, + { + "epoch": 20.1358574610245, + "loss": 0.3831365704536438, + "loss_ce": 6.466210470534861e-05, + "loss_iou": 0.169921875, + "loss_num": 0.008544921875, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 506531432, + "step": 9041 + }, + { + "epoch": 20.138084632516705, + "grad_norm": 19.908742904663086, + "learning_rate": 1e-06, + "loss": 0.4735, + "num_input_tokens_seen": 506586268, + "step": 9042 + }, + { + "epoch": 20.138084632516705, + "loss": 0.5597177743911743, + "loss_ce": 0.0001474099699407816, + "loss_iou": 0.24609375, + "loss_num": 0.0135498046875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 506586268, + "step": 9042 + }, + { + "epoch": 20.14031180400891, + "grad_norm": 15.677102088928223, + "learning_rate": 1e-06, + "loss": 0.3571, + "num_input_tokens_seen": 506642036, + "step": 9043 + }, + { + "epoch": 20.14031180400891, + "loss": 0.41627195477485657, + "loss_ce": 7.322065357584506e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.0078125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 506642036, + "step": 9043 + }, + { + "epoch": 20.142538975501115, + "grad_norm": 21.689987182617188, + "learning_rate": 1e-06, + "loss": 0.4071, + "num_input_tokens_seen": 506698484, + "step": 9044 + }, + { + "epoch": 20.142538975501115, + "loss": 0.40728476643562317, + "loss_ce": 5.823103856528178e-05, + "loss_iou": 0.181640625, + "loss_num": 0.008544921875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 506698484, + "step": 9044 + }, + { + "epoch": 20.14476614699332, + "grad_norm": 27.1544132232666, + "learning_rate": 1e-06, + "loss": 0.3355, + "num_input_tokens_seen": 506752068, + "step": 9045 + }, + { + "epoch": 20.14476614699332, + "loss": 0.3848227858543396, + "loss_ce": 5.7126497267745435e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.0133056640625, + "loss_xval": 0.384765625, + "num_input_tokens_seen": 506752068, + "step": 9045 + }, + { + "epoch": 20.146993318485524, + "grad_norm": 15.306382179260254, + "learning_rate": 1e-06, + "loss": 0.4094, + "num_input_tokens_seen": 506810520, + "step": 9046 + }, + { + "epoch": 20.146993318485524, + "loss": 0.44698041677474976, + "loss_ce": 8.10260244179517e-05, + "loss_iou": 0.18359375, + "loss_num": 0.0159912109375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 506810520, + "step": 9046 + }, + { + "epoch": 20.14922048997773, + "grad_norm": 17.21749496459961, + "learning_rate": 1e-06, + "loss": 0.5436, + "num_input_tokens_seen": 506865016, + "step": 9047 + }, + { + "epoch": 20.14922048997773, + "loss": 0.689301609992981, + "loss_ce": 9.259382932214066e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0216064453125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 506865016, + "step": 9047 + }, + { + "epoch": 20.151447661469934, + "grad_norm": 28.808147430419922, + "learning_rate": 1e-06, + "loss": 0.477, + "num_input_tokens_seen": 506918928, + "step": 9048 + }, + { + "epoch": 20.151447661469934, + "loss": 0.46686720848083496, + "loss_ce": 7.035446469672024e-05, + "loss_iou": 0.19140625, + "loss_num": 0.016845703125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 506918928, + "step": 9048 + }, + { + "epoch": 20.15367483296214, + "grad_norm": 32.62353515625, + "learning_rate": 1e-06, + "loss": 0.3442, + "num_input_tokens_seen": 506975208, + "step": 9049 + }, + { + "epoch": 20.15367483296214, + "loss": 0.505931556224823, + "loss_ce": 7.217634993139654e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0157470703125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 506975208, + "step": 9049 + }, + { + "epoch": 20.155902004454344, + "grad_norm": 21.908447265625, + "learning_rate": 1e-06, + "loss": 0.4323, + "num_input_tokens_seen": 507031872, + "step": 9050 + }, + { + "epoch": 20.155902004454344, + "loss": 0.5479224324226379, + "loss_ce": 7.087649282766506e-05, + "loss_iou": 0.21875, + "loss_num": 0.0220947265625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 507031872, + "step": 9050 + }, + { + "epoch": 20.15812917594655, + "grad_norm": 20.468429565429688, + "learning_rate": 1e-06, + "loss": 0.3044, + "num_input_tokens_seen": 507089620, + "step": 9051 + }, + { + "epoch": 20.15812917594655, + "loss": 0.3056070804595947, + "loss_ce": 6.508109072456136e-05, + "loss_iou": 0.142578125, + "loss_num": 0.003997802734375, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 507089620, + "step": 9051 + }, + { + "epoch": 20.160356347438753, + "grad_norm": 22.60247802734375, + "learning_rate": 1e-06, + "loss": 0.3752, + "num_input_tokens_seen": 507147488, + "step": 9052 + }, + { + "epoch": 20.160356347438753, + "loss": 0.27081358432769775, + "loss_ce": 6.161894270917401e-05, + "loss_iou": 0.11962890625, + "loss_num": 0.006317138671875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 507147488, + "step": 9052 + }, + { + "epoch": 20.16258351893096, + "grad_norm": 27.35213851928711, + "learning_rate": 1e-06, + "loss": 0.6975, + "num_input_tokens_seen": 507203384, + "step": 9053 + }, + { + "epoch": 20.16258351893096, + "loss": 0.46700623631477356, + "loss_ce": 8.729432738618925e-05, + "loss_iou": 0.20703125, + "loss_num": 0.0103759765625, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 507203384, + "step": 9053 + }, + { + "epoch": 20.164810690423163, + "grad_norm": 23.201065063476562, + "learning_rate": 1e-06, + "loss": 0.2998, + "num_input_tokens_seen": 507256920, + "step": 9054 + }, + { + "epoch": 20.164810690423163, + "loss": 0.1558593213558197, + "loss_ce": 6.707788270432502e-05, + "loss_iou": 0.06640625, + "loss_num": 0.004638671875, + "loss_xval": 0.15625, + "num_input_tokens_seen": 507256920, + "step": 9054 + }, + { + "epoch": 20.167037861915368, + "grad_norm": 17.185827255249023, + "learning_rate": 1e-06, + "loss": 0.2036, + "num_input_tokens_seen": 507311420, + "step": 9055 + }, + { + "epoch": 20.167037861915368, + "loss": 0.24561916291713715, + "loss_ce": 7.47331214370206e-05, + "loss_iou": 0.11474609375, + "loss_num": 0.003173828125, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 507311420, + "step": 9055 + }, + { + "epoch": 20.169265033407573, + "grad_norm": 12.79721450805664, + "learning_rate": 1e-06, + "loss": 0.3995, + "num_input_tokens_seen": 507364720, + "step": 9056 + }, + { + "epoch": 20.169265033407573, + "loss": 0.397408664226532, + "loss_ce": 6.977266457397491e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.014892578125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 507364720, + "step": 9056 + }, + { + "epoch": 20.171492204899778, + "grad_norm": 21.816648483276367, + "learning_rate": 1e-06, + "loss": 0.4521, + "num_input_tokens_seen": 507420248, + "step": 9057 + }, + { + "epoch": 20.171492204899778, + "loss": 0.3353777527809143, + "loss_ce": 6.583896174561232e-05, + "loss_iou": 0.154296875, + "loss_num": 0.005523681640625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 507420248, + "step": 9057 + }, + { + "epoch": 20.173719376391983, + "grad_norm": 18.13716697692871, + "learning_rate": 1e-06, + "loss": 0.2414, + "num_input_tokens_seen": 507477140, + "step": 9058 + }, + { + "epoch": 20.173719376391983, + "loss": 0.22375068068504333, + "loss_ce": 5.682064875145443e-05, + "loss_iou": 0.09375, + "loss_num": 0.0072021484375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 507477140, + "step": 9058 + }, + { + "epoch": 20.175946547884188, + "grad_norm": 15.739980697631836, + "learning_rate": 1e-06, + "loss": 0.3639, + "num_input_tokens_seen": 507534024, + "step": 9059 + }, + { + "epoch": 20.175946547884188, + "loss": 0.436111718416214, + "loss_ce": 7.6580501627177e-05, + "loss_iou": 0.201171875, + "loss_num": 0.00665283203125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 507534024, + "step": 9059 + }, + { + "epoch": 20.178173719376392, + "grad_norm": 25.104169845581055, + "learning_rate": 1e-06, + "loss": 0.5474, + "num_input_tokens_seen": 507590760, + "step": 9060 + }, + { + "epoch": 20.178173719376392, + "loss": 0.6292134523391724, + "loss_ce": 0.00018516569980420172, + "loss_iou": 0.2265625, + "loss_num": 0.03515625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 507590760, + "step": 9060 + }, + { + "epoch": 20.180400890868597, + "grad_norm": 14.639026641845703, + "learning_rate": 1e-06, + "loss": 0.3374, + "num_input_tokens_seen": 507647528, + "step": 9061 + }, + { + "epoch": 20.180400890868597, + "loss": 0.26568013429641724, + "loss_ce": 5.5125230574049056e-05, + "loss_iou": 0.1142578125, + "loss_num": 0.007415771484375, + "loss_xval": 0.265625, + "num_input_tokens_seen": 507647528, + "step": 9061 + }, + { + "epoch": 20.182628062360802, + "grad_norm": 25.42153549194336, + "learning_rate": 1e-06, + "loss": 0.4638, + "num_input_tokens_seen": 507699664, + "step": 9062 + }, + { + "epoch": 20.182628062360802, + "loss": 0.4531900882720947, + "loss_ce": 6.505924829980358e-05, + "loss_iou": 0.208984375, + "loss_num": 0.007110595703125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 507699664, + "step": 9062 + }, + { + "epoch": 20.184855233853007, + "grad_norm": 20.597827911376953, + "learning_rate": 1e-06, + "loss": 0.2994, + "num_input_tokens_seen": 507755092, + "step": 9063 + }, + { + "epoch": 20.184855233853007, + "loss": 0.3056027889251709, + "loss_ce": 6.079179729567841e-05, + "loss_iou": 0.1328125, + "loss_num": 0.0079345703125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 507755092, + "step": 9063 + }, + { + "epoch": 20.187082405345212, + "grad_norm": 20.325477600097656, + "learning_rate": 1e-06, + "loss": 0.4699, + "num_input_tokens_seen": 507810656, + "step": 9064 + }, + { + "epoch": 20.187082405345212, + "loss": 0.5754441022872925, + "loss_ce": 0.00012668846466112882, + "loss_iou": 0.232421875, + "loss_num": 0.0220947265625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 507810656, + "step": 9064 + }, + { + "epoch": 20.189309576837417, + "grad_norm": 14.4949369430542, + "learning_rate": 1e-06, + "loss": 0.2323, + "num_input_tokens_seen": 507868900, + "step": 9065 + }, + { + "epoch": 20.189309576837417, + "loss": 0.29057085514068604, + "loss_ce": 0.0002876527141779661, + "loss_iou": 0.1201171875, + "loss_num": 0.010009765625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 507868900, + "step": 9065 + }, + { + "epoch": 20.19153674832962, + "grad_norm": 20.822650909423828, + "learning_rate": 1e-06, + "loss": 0.5267, + "num_input_tokens_seen": 507927360, + "step": 9066 + }, + { + "epoch": 20.19153674832962, + "loss": 0.4537545144557953, + "loss_ce": 0.0001412302954122424, + "loss_iou": 0.2109375, + "loss_num": 0.00616455078125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 507927360, + "step": 9066 + }, + { + "epoch": 20.193763919821826, + "grad_norm": 25.914817810058594, + "learning_rate": 1e-06, + "loss": 0.4188, + "num_input_tokens_seen": 507981276, + "step": 9067 + }, + { + "epoch": 20.193763919821826, + "loss": 0.4693218767642975, + "loss_ce": 8.358562627108768e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.020263671875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 507981276, + "step": 9067 + }, + { + "epoch": 20.19599109131403, + "grad_norm": 21.08448028564453, + "learning_rate": 1e-06, + "loss": 0.2869, + "num_input_tokens_seen": 508037936, + "step": 9068 + }, + { + "epoch": 20.19599109131403, + "loss": 0.3300858736038208, + "loss_ce": 6.879281136207283e-05, + "loss_iou": 0.142578125, + "loss_num": 0.00885009765625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 508037936, + "step": 9068 + }, + { + "epoch": 20.198218262806236, + "grad_norm": 28.94850730895996, + "learning_rate": 1e-06, + "loss": 0.3374, + "num_input_tokens_seen": 508094604, + "step": 9069 + }, + { + "epoch": 20.198218262806236, + "loss": 0.311705619096756, + "loss_ce": 6.010363722452894e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.00555419921875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 508094604, + "step": 9069 + }, + { + "epoch": 20.20044543429844, + "grad_norm": 27.5219783782959, + "learning_rate": 1e-06, + "loss": 0.491, + "num_input_tokens_seen": 508152580, + "step": 9070 + }, + { + "epoch": 20.20044543429844, + "loss": 0.6100476384162903, + "loss_ce": 6.228317215573043e-05, + "loss_iou": 0.25390625, + "loss_num": 0.02001953125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 508152580, + "step": 9070 + }, + { + "epoch": 20.202672605790646, + "grad_norm": 23.4427433013916, + "learning_rate": 1e-06, + "loss": 0.2314, + "num_input_tokens_seen": 508206192, + "step": 9071 + }, + { + "epoch": 20.202672605790646, + "loss": 0.2371232807636261, + "loss_ce": 6.273827602853999e-05, + "loss_iou": 0.10302734375, + "loss_num": 0.006103515625, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 508206192, + "step": 9071 + }, + { + "epoch": 20.20489977728285, + "grad_norm": 17.554397583007812, + "learning_rate": 1e-06, + "loss": 0.3968, + "num_input_tokens_seen": 508262040, + "step": 9072 + }, + { + "epoch": 20.20489977728285, + "loss": 0.4413501024246216, + "loss_ce": 6.59393408568576e-05, + "loss_iou": 0.19921875, + "loss_num": 0.00872802734375, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 508262040, + "step": 9072 + }, + { + "epoch": 20.207126948775056, + "grad_norm": 15.516806602478027, + "learning_rate": 1e-06, + "loss": 0.3465, + "num_input_tokens_seen": 508317992, + "step": 9073 + }, + { + "epoch": 20.207126948775056, + "loss": 0.41605615615844727, + "loss_ce": 0.0001626217272132635, + "loss_iou": 0.1923828125, + "loss_num": 0.00616455078125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 508317992, + "step": 9073 + }, + { + "epoch": 20.20935412026726, + "grad_norm": 26.189729690551758, + "learning_rate": 1e-06, + "loss": 0.3646, + "num_input_tokens_seen": 508373568, + "step": 9074 + }, + { + "epoch": 20.20935412026726, + "loss": 0.3610216975212097, + "loss_ce": 5.978911212878302e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0111083984375, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 508373568, + "step": 9074 + }, + { + "epoch": 20.211581291759465, + "grad_norm": 16.33150863647461, + "learning_rate": 1e-06, + "loss": 0.4738, + "num_input_tokens_seen": 508429440, + "step": 9075 + }, + { + "epoch": 20.211581291759465, + "loss": 0.3367946445941925, + "loss_ce": 6.368501635733992e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.013916015625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 508429440, + "step": 9075 + }, + { + "epoch": 20.21380846325167, + "grad_norm": 16.59500503540039, + "learning_rate": 1e-06, + "loss": 0.5425, + "num_input_tokens_seen": 508486396, + "step": 9076 + }, + { + "epoch": 20.21380846325167, + "loss": 0.4951022267341614, + "loss_ce": 0.00010707708133850247, + "loss_iou": 0.2158203125, + "loss_num": 0.0125732421875, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 508486396, + "step": 9076 + }, + { + "epoch": 20.216035634743875, + "grad_norm": 16.87767791748047, + "learning_rate": 1e-06, + "loss": 0.6097, + "num_input_tokens_seen": 508540776, + "step": 9077 + }, + { + "epoch": 20.216035634743875, + "loss": 0.6077404022216797, + "loss_ce": 7.441250636475161e-05, + "loss_iou": 0.248046875, + "loss_num": 0.0224609375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 508540776, + "step": 9077 + }, + { + "epoch": 20.21826280623608, + "grad_norm": 12.694762229919434, + "learning_rate": 1e-06, + "loss": 0.3561, + "num_input_tokens_seen": 508597792, + "step": 9078 + }, + { + "epoch": 20.21826280623608, + "loss": 0.22015753388404846, + "loss_ce": 6.474574183812365e-05, + "loss_iou": 0.099609375, + "loss_num": 0.004119873046875, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 508597792, + "step": 9078 + }, + { + "epoch": 20.220489977728285, + "grad_norm": 20.9544734954834, + "learning_rate": 1e-06, + "loss": 0.4849, + "num_input_tokens_seen": 508655992, + "step": 9079 + }, + { + "epoch": 20.220489977728285, + "loss": 0.7146784067153931, + "loss_ce": 7.875564915593714e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0147705078125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 508655992, + "step": 9079 + }, + { + "epoch": 20.22271714922049, + "grad_norm": 9.648091316223145, + "learning_rate": 1e-06, + "loss": 0.401, + "num_input_tokens_seen": 508711672, + "step": 9080 + }, + { + "epoch": 20.22271714922049, + "loss": 0.2577533423900604, + "loss_ce": 6.290937017183751e-05, + "loss_iou": 0.1162109375, + "loss_num": 0.005035400390625, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 508711672, + "step": 9080 + }, + { + "epoch": 20.224944320712694, + "grad_norm": 21.80451202392578, + "learning_rate": 1e-06, + "loss": 0.2682, + "num_input_tokens_seen": 508769188, + "step": 9081 + }, + { + "epoch": 20.224944320712694, + "loss": 0.2104550302028656, + "loss_ce": 6.68463617330417e-05, + "loss_iou": 0.087890625, + "loss_num": 0.006866455078125, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 508769188, + "step": 9081 + }, + { + "epoch": 20.2271714922049, + "grad_norm": 21.215343475341797, + "learning_rate": 1e-06, + "loss": 0.3385, + "num_input_tokens_seen": 508824548, + "step": 9082 + }, + { + "epoch": 20.2271714922049, + "loss": 0.4378107190132141, + "loss_ce": 6.65808329358697e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.01031494140625, + "loss_xval": 0.4375, + "num_input_tokens_seen": 508824548, + "step": 9082 + }, + { + "epoch": 20.229398663697104, + "grad_norm": 17.058391571044922, + "learning_rate": 1e-06, + "loss": 0.2433, + "num_input_tokens_seen": 508879452, + "step": 9083 + }, + { + "epoch": 20.229398663697104, + "loss": 0.23303891718387604, + "loss_ce": 6.771212792955339e-05, + "loss_iou": 0.10302734375, + "loss_num": 0.00543212890625, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 508879452, + "step": 9083 + }, + { + "epoch": 20.23162583518931, + "grad_norm": 16.908109664916992, + "learning_rate": 1e-06, + "loss": 0.5078, + "num_input_tokens_seen": 508935480, + "step": 9084 + }, + { + "epoch": 20.23162583518931, + "loss": 0.29596656560897827, + "loss_ce": 6.8145505792927e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.00433349609375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 508935480, + "step": 9084 + }, + { + "epoch": 20.233853006681514, + "grad_norm": 20.34218978881836, + "learning_rate": 1e-06, + "loss": 0.3869, + "num_input_tokens_seen": 508993000, + "step": 9085 + }, + { + "epoch": 20.233853006681514, + "loss": 0.3470456302165985, + "loss_ce": 6.076861609471962e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0089111328125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 508993000, + "step": 9085 + }, + { + "epoch": 20.23608017817372, + "grad_norm": 23.703197479248047, + "learning_rate": 1e-06, + "loss": 0.3962, + "num_input_tokens_seen": 509047736, + "step": 9086 + }, + { + "epoch": 20.23608017817372, + "loss": 0.3868297338485718, + "loss_ce": 0.00011096424714196473, + "loss_iou": 0.1748046875, + "loss_num": 0.007568359375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 509047736, + "step": 9086 + }, + { + "epoch": 20.238307349665924, + "grad_norm": 17.429872512817383, + "learning_rate": 1e-06, + "loss": 0.3348, + "num_input_tokens_seen": 509102768, + "step": 9087 + }, + { + "epoch": 20.238307349665924, + "loss": 0.4313143193721771, + "loss_ce": 0.00010093137825606391, + "loss_iou": 0.1953125, + "loss_num": 0.00823974609375, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 509102768, + "step": 9087 + }, + { + "epoch": 20.24053452115813, + "grad_norm": 24.075519561767578, + "learning_rate": 1e-06, + "loss": 0.3229, + "num_input_tokens_seen": 509159036, + "step": 9088 + }, + { + "epoch": 20.24053452115813, + "loss": 0.3190591335296631, + "loss_ce": 5.887117004022002e-05, + "loss_iou": 0.140625, + "loss_num": 0.007568359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 509159036, + "step": 9088 + }, + { + "epoch": 20.242761692650333, + "grad_norm": 15.556453704833984, + "learning_rate": 1e-06, + "loss": 0.2766, + "num_input_tokens_seen": 509215604, + "step": 9089 + }, + { + "epoch": 20.242761692650333, + "loss": 0.2935373783111572, + "loss_ce": 8.033675840124488e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.005035400390625, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 509215604, + "step": 9089 + }, + { + "epoch": 20.244988864142538, + "grad_norm": 26.041152954101562, + "learning_rate": 1e-06, + "loss": 0.4012, + "num_input_tokens_seen": 509271624, + "step": 9090 + }, + { + "epoch": 20.244988864142538, + "loss": 0.28717702627182007, + "loss_ce": 6.765717989765108e-05, + "loss_iou": 0.1103515625, + "loss_num": 0.0133056640625, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 509271624, + "step": 9090 + }, + { + "epoch": 20.247216035634743, + "grad_norm": 15.528565406799316, + "learning_rate": 1e-06, + "loss": 0.5706, + "num_input_tokens_seen": 509327216, + "step": 9091 + }, + { + "epoch": 20.247216035634743, + "loss": 0.7028372287750244, + "loss_ce": 7.844123319955543e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 509327216, + "step": 9091 + }, + { + "epoch": 20.249443207126948, + "grad_norm": 31.673660278320312, + "learning_rate": 1e-06, + "loss": 0.2802, + "num_input_tokens_seen": 509384560, + "step": 9092 + }, + { + "epoch": 20.249443207126948, + "loss": 0.2221912145614624, + "loss_ce": 5.375898035708815e-05, + "loss_iou": 0.09814453125, + "loss_num": 0.005157470703125, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 509384560, + "step": 9092 + }, + { + "epoch": 20.251670378619153, + "grad_norm": 17.960433959960938, + "learning_rate": 1e-06, + "loss": 0.2977, + "num_input_tokens_seen": 509439108, + "step": 9093 + }, + { + "epoch": 20.251670378619153, + "loss": 0.3416157364845276, + "loss_ce": 6.299982487689704e-05, + "loss_iou": 0.14453125, + "loss_num": 0.010498046875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 509439108, + "step": 9093 + }, + { + "epoch": 20.253897550111358, + "grad_norm": 15.661357879638672, + "learning_rate": 1e-06, + "loss": 0.3546, + "num_input_tokens_seen": 509495464, + "step": 9094 + }, + { + "epoch": 20.253897550111358, + "loss": 0.4557662904262543, + "loss_ce": 7.779739098623395e-05, + "loss_iou": 0.193359375, + "loss_num": 0.01373291015625, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 509495464, + "step": 9094 + }, + { + "epoch": 20.256124721603562, + "grad_norm": 20.898950576782227, + "learning_rate": 1e-06, + "loss": 0.3629, + "num_input_tokens_seen": 509552144, + "step": 9095 + }, + { + "epoch": 20.256124721603562, + "loss": 0.3690887987613678, + "loss_ce": 7.023525540716946e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.006195068359375, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 509552144, + "step": 9095 + }, + { + "epoch": 20.258351893095767, + "grad_norm": 14.072615623474121, + "learning_rate": 1e-06, + "loss": 0.3833, + "num_input_tokens_seen": 509607736, + "step": 9096 + }, + { + "epoch": 20.258351893095767, + "loss": 0.41329166293144226, + "loss_ce": 5.3150470193941146e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.0087890625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 509607736, + "step": 9096 + }, + { + "epoch": 20.260579064587972, + "grad_norm": 23.365612030029297, + "learning_rate": 1e-06, + "loss": 0.3804, + "num_input_tokens_seen": 509662676, + "step": 9097 + }, + { + "epoch": 20.260579064587972, + "loss": 0.42209792137145996, + "loss_ce": 7.031915447441861e-05, + "loss_iou": 0.158203125, + "loss_num": 0.0211181640625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 509662676, + "step": 9097 + }, + { + "epoch": 20.262806236080177, + "grad_norm": 29.634624481201172, + "learning_rate": 1e-06, + "loss": 0.3842, + "num_input_tokens_seen": 509720920, + "step": 9098 + }, + { + "epoch": 20.262806236080177, + "loss": 0.4519707262516022, + "loss_ce": 6.642582593485713e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.01904296875, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 509720920, + "step": 9098 + }, + { + "epoch": 20.265033407572382, + "grad_norm": 15.21391773223877, + "learning_rate": 1e-06, + "loss": 0.4699, + "num_input_tokens_seen": 509777720, + "step": 9099 + }, + { + "epoch": 20.265033407572382, + "loss": 0.5803358554840088, + "loss_ce": 7.468961121048778e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.0185546875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 509777720, + "step": 9099 + }, + { + "epoch": 20.267260579064587, + "grad_norm": 19.919464111328125, + "learning_rate": 1e-06, + "loss": 0.2832, + "num_input_tokens_seen": 509834440, + "step": 9100 + }, + { + "epoch": 20.267260579064587, + "loss": 0.2911492586135864, + "loss_ce": 7.257152174133807e-05, + "loss_iou": 0.12060546875, + "loss_num": 0.01007080078125, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 509834440, + "step": 9100 + }, + { + "epoch": 20.26948775055679, + "grad_norm": 17.073530197143555, + "learning_rate": 1e-06, + "loss": 0.4515, + "num_input_tokens_seen": 509891612, + "step": 9101 + }, + { + "epoch": 20.26948775055679, + "loss": 0.4402480721473694, + "loss_ce": 6.253210449358448e-05, + "loss_iou": 0.169921875, + "loss_num": 0.02001953125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 509891612, + "step": 9101 + }, + { + "epoch": 20.271714922048996, + "grad_norm": 14.897684097290039, + "learning_rate": 1e-06, + "loss": 0.376, + "num_input_tokens_seen": 509949728, + "step": 9102 + }, + { + "epoch": 20.271714922048996, + "loss": 0.4393659234046936, + "loss_ce": 6.541327456943691e-05, + "loss_iou": 0.203125, + "loss_num": 0.00640869140625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 509949728, + "step": 9102 + }, + { + "epoch": 20.2739420935412, + "grad_norm": 17.08948516845703, + "learning_rate": 1e-06, + "loss": 0.2489, + "num_input_tokens_seen": 510003660, + "step": 9103 + }, + { + "epoch": 20.2739420935412, + "loss": 0.2572115659713745, + "loss_ce": 7.0465452154167e-05, + "loss_iou": 0.10009765625, + "loss_num": 0.0113525390625, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 510003660, + "step": 9103 + }, + { + "epoch": 20.276169265033406, + "grad_norm": 19.830888748168945, + "learning_rate": 1e-06, + "loss": 0.3028, + "num_input_tokens_seen": 510059112, + "step": 9104 + }, + { + "epoch": 20.276169265033406, + "loss": 0.4022126793861389, + "loss_ce": 0.00011306728993076831, + "loss_iou": 0.1875, + "loss_num": 0.0054931640625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 510059112, + "step": 9104 + }, + { + "epoch": 20.27839643652561, + "grad_norm": 21.524456024169922, + "learning_rate": 1e-06, + "loss": 0.3969, + "num_input_tokens_seen": 510113856, + "step": 9105 + }, + { + "epoch": 20.27839643652561, + "loss": 0.2504650354385376, + "loss_ce": 5.308254185365513e-05, + "loss_iou": 0.10791015625, + "loss_num": 0.006988525390625, + "loss_xval": 0.25, + "num_input_tokens_seen": 510113856, + "step": 9105 + }, + { + "epoch": 20.280623608017816, + "grad_norm": 20.933208465576172, + "learning_rate": 1e-06, + "loss": 0.5396, + "num_input_tokens_seen": 510171268, + "step": 9106 + }, + { + "epoch": 20.280623608017816, + "loss": 0.566622257232666, + "loss_ce": 9.393271466251463e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0111083984375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 510171268, + "step": 9106 + }, + { + "epoch": 20.28285077951002, + "grad_norm": 20.48546028137207, + "learning_rate": 1e-06, + "loss": 0.3572, + "num_input_tokens_seen": 510224736, + "step": 9107 + }, + { + "epoch": 20.28285077951002, + "loss": 0.33814841508865356, + "loss_ce": 6.32288574706763e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00897216796875, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 510224736, + "step": 9107 + }, + { + "epoch": 20.285077951002226, + "grad_norm": 11.746217727661133, + "learning_rate": 1e-06, + "loss": 0.3038, + "num_input_tokens_seen": 510279584, + "step": 9108 + }, + { + "epoch": 20.285077951002226, + "loss": 0.21575751900672913, + "loss_ce": 5.92783690080978e-05, + "loss_iou": 0.08740234375, + "loss_num": 0.00811767578125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 510279584, + "step": 9108 + }, + { + "epoch": 20.28730512249443, + "grad_norm": 14.660599708557129, + "learning_rate": 1e-06, + "loss": 0.3486, + "num_input_tokens_seen": 510337512, + "step": 9109 + }, + { + "epoch": 20.28730512249443, + "loss": 0.3341745138168335, + "loss_ce": 6.808016041759402e-05, + "loss_iou": 0.150390625, + "loss_num": 0.006591796875, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 510337512, + "step": 9109 + }, + { + "epoch": 20.289532293986635, + "grad_norm": 13.905699729919434, + "learning_rate": 1e-06, + "loss": 0.3323, + "num_input_tokens_seen": 510393672, + "step": 9110 + }, + { + "epoch": 20.289532293986635, + "loss": 0.4626014828681946, + "loss_ce": 7.70436818129383e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.02099609375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 510393672, + "step": 9110 + }, + { + "epoch": 20.29175946547884, + "grad_norm": 22.228431701660156, + "learning_rate": 1e-06, + "loss": 0.3862, + "num_input_tokens_seen": 510449288, + "step": 9111 + }, + { + "epoch": 20.29175946547884, + "loss": 0.3664637804031372, + "loss_ce": 6.975741416681558e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.0081787109375, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 510449288, + "step": 9111 + }, + { + "epoch": 20.293986636971045, + "grad_norm": 21.240266799926758, + "learning_rate": 1e-06, + "loss": 0.323, + "num_input_tokens_seen": 510504764, + "step": 9112 + }, + { + "epoch": 20.293986636971045, + "loss": 0.31800100207328796, + "loss_ce": 6.887991185067222e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.0084228515625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 510504764, + "step": 9112 + }, + { + "epoch": 20.29621380846325, + "grad_norm": 13.980697631835938, + "learning_rate": 1e-06, + "loss": 0.4736, + "num_input_tokens_seen": 510560660, + "step": 9113 + }, + { + "epoch": 20.29621380846325, + "loss": 0.4085160195827484, + "loss_ce": 6.87609426677227e-05, + "loss_iou": 0.181640625, + "loss_num": 0.00909423828125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 510560660, + "step": 9113 + }, + { + "epoch": 20.29844097995546, + "grad_norm": 13.33857536315918, + "learning_rate": 1e-06, + "loss": 0.3764, + "num_input_tokens_seen": 510613008, + "step": 9114 + }, + { + "epoch": 20.29844097995546, + "loss": 0.36590009927749634, + "loss_ce": 8.587302727391943e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.0111083984375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 510613008, + "step": 9114 + }, + { + "epoch": 20.30066815144766, + "grad_norm": 18.67300033569336, + "learning_rate": 1e-06, + "loss": 0.2536, + "num_input_tokens_seen": 510670700, + "step": 9115 + }, + { + "epoch": 20.30066815144766, + "loss": 0.20209243893623352, + "loss_ce": 6.605993257835507e-05, + "loss_iou": 0.08935546875, + "loss_num": 0.004608154296875, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 510670700, + "step": 9115 + }, + { + "epoch": 20.302895322939868, + "grad_norm": 21.35281753540039, + "learning_rate": 1e-06, + "loss": 0.5114, + "num_input_tokens_seen": 510721468, + "step": 9116 + }, + { + "epoch": 20.302895322939868, + "loss": 0.388251394033432, + "loss_ce": 6.779546674806625e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.00848388671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 510721468, + "step": 9116 + }, + { + "epoch": 20.305122494432073, + "grad_norm": 17.087766647338867, + "learning_rate": 1e-06, + "loss": 0.5297, + "num_input_tokens_seen": 510776316, + "step": 9117 + }, + { + "epoch": 20.305122494432073, + "loss": 0.5188660621643066, + "loss_ce": 6.725148705299944e-05, + "loss_iou": 0.228515625, + "loss_num": 0.01251220703125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 510776316, + "step": 9117 + }, + { + "epoch": 20.307349665924278, + "grad_norm": 14.230613708496094, + "learning_rate": 1e-06, + "loss": 0.3324, + "num_input_tokens_seen": 510833816, + "step": 9118 + }, + { + "epoch": 20.307349665924278, + "loss": 0.2973046898841858, + "loss_ce": 6.350380135700107e-05, + "loss_iou": 0.138671875, + "loss_num": 0.00390625, + "loss_xval": 0.296875, + "num_input_tokens_seen": 510833816, + "step": 9118 + }, + { + "epoch": 20.309576837416483, + "grad_norm": 25.817169189453125, + "learning_rate": 1e-06, + "loss": 0.3926, + "num_input_tokens_seen": 510890252, + "step": 9119 + }, + { + "epoch": 20.309576837416483, + "loss": 0.3149741291999817, + "loss_ce": 6.32612791378051e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.007110595703125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 510890252, + "step": 9119 + }, + { + "epoch": 20.311804008908688, + "grad_norm": 17.687280654907227, + "learning_rate": 1e-06, + "loss": 0.4531, + "num_input_tokens_seen": 510946368, + "step": 9120 + }, + { + "epoch": 20.311804008908688, + "loss": 0.5778282284736633, + "loss_ce": 6.946115900063887e-05, + "loss_iou": 0.234375, + "loss_num": 0.02197265625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 510946368, + "step": 9120 + }, + { + "epoch": 20.314031180400892, + "grad_norm": 17.3331356048584, + "learning_rate": 1e-06, + "loss": 0.5977, + "num_input_tokens_seen": 511002084, + "step": 9121 + }, + { + "epoch": 20.314031180400892, + "loss": 0.4655322730541229, + "loss_ce": 7.816165452823043e-05, + "loss_iou": 0.201171875, + "loss_num": 0.01275634765625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 511002084, + "step": 9121 + }, + { + "epoch": 20.316258351893097, + "grad_norm": 28.032072067260742, + "learning_rate": 1e-06, + "loss": 0.3289, + "num_input_tokens_seen": 511060036, + "step": 9122 + }, + { + "epoch": 20.316258351893097, + "loss": 0.4448232352733612, + "loss_ce": 6.0018668591510504e-05, + "loss_iou": 0.19921875, + "loss_num": 0.00933837890625, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 511060036, + "step": 9122 + }, + { + "epoch": 20.318485523385302, + "grad_norm": 31.768814086914062, + "learning_rate": 1e-06, + "loss": 0.407, + "num_input_tokens_seen": 511115920, + "step": 9123 + }, + { + "epoch": 20.318485523385302, + "loss": 0.32061654329299927, + "loss_ce": 5.989862256683409e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0067138671875, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 511115920, + "step": 9123 + }, + { + "epoch": 20.320712694877507, + "grad_norm": 20.859983444213867, + "learning_rate": 1e-06, + "loss": 0.3574, + "num_input_tokens_seen": 511173348, + "step": 9124 + }, + { + "epoch": 20.320712694877507, + "loss": 0.36715149879455566, + "loss_ce": 8.607155177742243e-05, + "loss_iou": 0.15234375, + "loss_num": 0.01239013671875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 511173348, + "step": 9124 + }, + { + "epoch": 20.322939866369712, + "grad_norm": 14.776050567626953, + "learning_rate": 1e-06, + "loss": 0.4476, + "num_input_tokens_seen": 511230864, + "step": 9125 + }, + { + "epoch": 20.322939866369712, + "loss": 0.49763861298561096, + "loss_ce": 6.477968418039382e-05, + "loss_iou": 0.21875, + "loss_num": 0.01220703125, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 511230864, + "step": 9125 + }, + { + "epoch": 20.325167037861917, + "grad_norm": 25.55546760559082, + "learning_rate": 1e-06, + "loss": 0.3811, + "num_input_tokens_seen": 511285816, + "step": 9126 + }, + { + "epoch": 20.325167037861917, + "loss": 0.32178765535354614, + "loss_ce": 7.134034967748448e-05, + "loss_iou": 0.130859375, + "loss_num": 0.0118408203125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 511285816, + "step": 9126 + }, + { + "epoch": 20.32739420935412, + "grad_norm": 21.569284439086914, + "learning_rate": 1e-06, + "loss": 0.2597, + "num_input_tokens_seen": 511343148, + "step": 9127 + }, + { + "epoch": 20.32739420935412, + "loss": 0.262149453163147, + "loss_ce": 6.447016494348645e-05, + "loss_iou": 0.11767578125, + "loss_num": 0.00531005859375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 511343148, + "step": 9127 + }, + { + "epoch": 20.329621380846326, + "grad_norm": 14.305514335632324, + "learning_rate": 1e-06, + "loss": 0.3907, + "num_input_tokens_seen": 511400796, + "step": 9128 + }, + { + "epoch": 20.329621380846326, + "loss": 0.32069259881973267, + "loss_ce": 7.491336873499677e-05, + "loss_iou": 0.142578125, + "loss_num": 0.0072021484375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 511400796, + "step": 9128 + }, + { + "epoch": 20.33184855233853, + "grad_norm": 15.63278865814209, + "learning_rate": 1e-06, + "loss": 0.2036, + "num_input_tokens_seen": 511456720, + "step": 9129 + }, + { + "epoch": 20.33184855233853, + "loss": 0.2301052063703537, + "loss_ce": 6.369603215716779e-05, + "loss_iou": 0.10693359375, + "loss_num": 0.0032501220703125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 511456720, + "step": 9129 + }, + { + "epoch": 20.334075723830736, + "grad_norm": 93.94779205322266, + "learning_rate": 1e-06, + "loss": 0.3359, + "num_input_tokens_seen": 511513624, + "step": 9130 + }, + { + "epoch": 20.334075723830736, + "loss": 0.24669964611530304, + "loss_ce": 5.657020301441662e-05, + "loss_iou": 0.11083984375, + "loss_num": 0.00506591796875, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 511513624, + "step": 9130 + }, + { + "epoch": 20.33630289532294, + "grad_norm": 33.43038558959961, + "learning_rate": 1e-06, + "loss": 0.3538, + "num_input_tokens_seen": 511570664, + "step": 9131 + }, + { + "epoch": 20.33630289532294, + "loss": 0.4840731620788574, + "loss_ce": 0.00012543509365059435, + "loss_iou": 0.21875, + "loss_num": 0.00921630859375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 511570664, + "step": 9131 + }, + { + "epoch": 20.338530066815146, + "grad_norm": 11.925836563110352, + "learning_rate": 1e-06, + "loss": 0.2765, + "num_input_tokens_seen": 511626512, + "step": 9132 + }, + { + "epoch": 20.338530066815146, + "loss": 0.27660778164863586, + "loss_ce": 5.7503631978761405e-05, + "loss_iou": 0.12451171875, + "loss_num": 0.00543212890625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 511626512, + "step": 9132 + }, + { + "epoch": 20.34075723830735, + "grad_norm": 19.892284393310547, + "learning_rate": 1e-06, + "loss": 0.2753, + "num_input_tokens_seen": 511684156, + "step": 9133 + }, + { + "epoch": 20.34075723830735, + "loss": 0.2991905212402344, + "loss_ce": 5.724587390432134e-05, + "loss_iou": 0.13671875, + "loss_num": 0.005279541015625, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 511684156, + "step": 9133 + }, + { + "epoch": 20.342984409799556, + "grad_norm": 14.584566116333008, + "learning_rate": 1e-06, + "loss": 0.3131, + "num_input_tokens_seen": 511743004, + "step": 9134 + }, + { + "epoch": 20.342984409799556, + "loss": 0.31488001346588135, + "loss_ce": 6.067375943530351e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.005615234375, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 511743004, + "step": 9134 + }, + { + "epoch": 20.34521158129176, + "grad_norm": 24.601272583007812, + "learning_rate": 1e-06, + "loss": 0.5077, + "num_input_tokens_seen": 511794760, + "step": 9135 + }, + { + "epoch": 20.34521158129176, + "loss": 0.6481292247772217, + "loss_ce": 5.792171214125119e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0238037109375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 511794760, + "step": 9135 + }, + { + "epoch": 20.347438752783965, + "grad_norm": 13.779706954956055, + "learning_rate": 1e-06, + "loss": 0.4595, + "num_input_tokens_seen": 511852632, + "step": 9136 + }, + { + "epoch": 20.347438752783965, + "loss": 0.4468442499637604, + "loss_ce": 6.692019815091044e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.01409912109375, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 511852632, + "step": 9136 + }, + { + "epoch": 20.34966592427617, + "grad_norm": 17.18348503112793, + "learning_rate": 1e-06, + "loss": 0.3995, + "num_input_tokens_seen": 511910288, + "step": 9137 + }, + { + "epoch": 20.34966592427617, + "loss": 0.5219651460647583, + "loss_ce": 0.00011460046516731381, + "loss_iou": 0.1943359375, + "loss_num": 0.026611328125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 511910288, + "step": 9137 + }, + { + "epoch": 20.351893095768375, + "grad_norm": 16.120128631591797, + "learning_rate": 1e-06, + "loss": 0.434, + "num_input_tokens_seen": 511965500, + "step": 9138 + }, + { + "epoch": 20.351893095768375, + "loss": 0.4480791389942169, + "loss_ce": 8.108362089842558e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.0235595703125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 511965500, + "step": 9138 + }, + { + "epoch": 20.35412026726058, + "grad_norm": 14.766048431396484, + "learning_rate": 1e-06, + "loss": 0.3227, + "num_input_tokens_seen": 512020400, + "step": 9139 + }, + { + "epoch": 20.35412026726058, + "loss": 0.17573511600494385, + "loss_ce": 6.067653885111213e-05, + "loss_iou": 0.07080078125, + "loss_num": 0.006805419921875, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 512020400, + "step": 9139 + }, + { + "epoch": 20.356347438752785, + "grad_norm": 23.5654354095459, + "learning_rate": 1e-06, + "loss": 0.3828, + "num_input_tokens_seen": 512079228, + "step": 9140 + }, + { + "epoch": 20.356347438752785, + "loss": 0.4413471221923828, + "loss_ce": 6.295171624515206e-05, + "loss_iou": 0.19140625, + "loss_num": 0.01177978515625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 512079228, + "step": 9140 + }, + { + "epoch": 20.35857461024499, + "grad_norm": 17.19645118713379, + "learning_rate": 1e-06, + "loss": 0.4033, + "num_input_tokens_seen": 512136932, + "step": 9141 + }, + { + "epoch": 20.35857461024499, + "loss": 0.3532071113586426, + "loss_ce": 5.770741336164065e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.01312255859375, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 512136932, + "step": 9141 + }, + { + "epoch": 20.360801781737194, + "grad_norm": 15.467480659484863, + "learning_rate": 1e-06, + "loss": 0.3074, + "num_input_tokens_seen": 512194392, + "step": 9142 + }, + { + "epoch": 20.360801781737194, + "loss": 0.2540320158004761, + "loss_ce": 0.00012579177564475685, + "loss_iou": 0.10791015625, + "loss_num": 0.007568359375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 512194392, + "step": 9142 + }, + { + "epoch": 20.3630289532294, + "grad_norm": 21.110055923461914, + "learning_rate": 1e-06, + "loss": 0.4215, + "num_input_tokens_seen": 512251156, + "step": 9143 + }, + { + "epoch": 20.3630289532294, + "loss": 0.4504120349884033, + "loss_ce": 0.00021672958973795176, + "loss_iou": 0.2099609375, + "loss_num": 0.00592041015625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 512251156, + "step": 9143 + }, + { + "epoch": 20.365256124721604, + "grad_norm": 13.63264274597168, + "learning_rate": 1e-06, + "loss": 0.356, + "num_input_tokens_seen": 512307892, + "step": 9144 + }, + { + "epoch": 20.365256124721604, + "loss": 0.4380814731121063, + "loss_ce": 6.268207653192803e-05, + "loss_iou": 0.1953125, + "loss_num": 0.00921630859375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 512307892, + "step": 9144 + }, + { + "epoch": 20.36748329621381, + "grad_norm": 37.342010498046875, + "learning_rate": 1e-06, + "loss": 0.3701, + "num_input_tokens_seen": 512361460, + "step": 9145 + }, + { + "epoch": 20.36748329621381, + "loss": 0.4177306294441223, + "loss_ce": 6.703598774038255e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.013671875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 512361460, + "step": 9145 + }, + { + "epoch": 20.369710467706014, + "grad_norm": 17.764732360839844, + "learning_rate": 1e-06, + "loss": 0.3428, + "num_input_tokens_seen": 512418536, + "step": 9146 + }, + { + "epoch": 20.369710467706014, + "loss": 0.33477145433425903, + "loss_ce": 5.4668213124386966e-05, + "loss_iou": 0.150390625, + "loss_num": 0.0068359375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 512418536, + "step": 9146 + }, + { + "epoch": 20.37193763919822, + "grad_norm": 25.832767486572266, + "learning_rate": 1e-06, + "loss": 0.5058, + "num_input_tokens_seen": 512473128, + "step": 9147 + }, + { + "epoch": 20.37193763919822, + "loss": 0.4780276417732239, + "loss_ce": 6.131519330665469e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 512473128, + "step": 9147 + }, + { + "epoch": 20.374164810690424, + "grad_norm": 21.50016212463379, + "learning_rate": 1e-06, + "loss": 0.4721, + "num_input_tokens_seen": 512527572, + "step": 9148 + }, + { + "epoch": 20.374164810690424, + "loss": 0.42840418219566345, + "loss_ce": 5.943184805801138e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.0115966796875, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 512527572, + "step": 9148 + }, + { + "epoch": 20.37639198218263, + "grad_norm": 13.383490562438965, + "learning_rate": 1e-06, + "loss": 0.3591, + "num_input_tokens_seen": 512584280, + "step": 9149 + }, + { + "epoch": 20.37639198218263, + "loss": 0.345126748085022, + "loss_ce": 6.446812767535448e-05, + "loss_iou": 0.15625, + "loss_num": 0.006500244140625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 512584280, + "step": 9149 + }, + { + "epoch": 20.378619153674833, + "grad_norm": 17.600801467895508, + "learning_rate": 1e-06, + "loss": 0.3535, + "num_input_tokens_seen": 512640636, + "step": 9150 + }, + { + "epoch": 20.378619153674833, + "loss": 0.45195910334587097, + "loss_ce": 5.4796357289887965e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.01806640625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 512640636, + "step": 9150 + }, + { + "epoch": 20.380846325167038, + "grad_norm": 15.987733840942383, + "learning_rate": 1e-06, + "loss": 0.3294, + "num_input_tokens_seen": 512697364, + "step": 9151 + }, + { + "epoch": 20.380846325167038, + "loss": 0.42148369550704956, + "loss_ce": 9.698521898826584e-05, + "loss_iou": 0.18359375, + "loss_num": 0.0107421875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 512697364, + "step": 9151 + }, + { + "epoch": 20.383073496659243, + "grad_norm": 17.254024505615234, + "learning_rate": 1e-06, + "loss": 0.4233, + "num_input_tokens_seen": 512753732, + "step": 9152 + }, + { + "epoch": 20.383073496659243, + "loss": 0.43444541096687317, + "loss_ce": 0.00011922699195565656, + "loss_iou": 0.2001953125, + "loss_num": 0.00665283203125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 512753732, + "step": 9152 + }, + { + "epoch": 20.385300668151448, + "grad_norm": 32.762237548828125, + "learning_rate": 1e-06, + "loss": 0.5156, + "num_input_tokens_seen": 512808448, + "step": 9153 + }, + { + "epoch": 20.385300668151448, + "loss": 0.7034420371055603, + "loss_ce": 7.288139750016853e-05, + "loss_iou": 0.283203125, + "loss_num": 0.02734375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 512808448, + "step": 9153 + }, + { + "epoch": 20.387527839643653, + "grad_norm": 34.91286087036133, + "learning_rate": 1e-06, + "loss": 0.4527, + "num_input_tokens_seen": 512864528, + "step": 9154 + }, + { + "epoch": 20.387527839643653, + "loss": 0.48030704259872437, + "loss_ce": 8.245596836786717e-05, + "loss_iou": 0.181640625, + "loss_num": 0.023193359375, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 512864528, + "step": 9154 + }, + { + "epoch": 20.389755011135858, + "grad_norm": 16.32798194885254, + "learning_rate": 1e-06, + "loss": 0.3353, + "num_input_tokens_seen": 512921932, + "step": 9155 + }, + { + "epoch": 20.389755011135858, + "loss": 0.3125593066215515, + "loss_ce": 5.93056101934053e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.005279541015625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 512921932, + "step": 9155 + }, + { + "epoch": 20.391982182628063, + "grad_norm": 22.385419845581055, + "learning_rate": 1e-06, + "loss": 0.4722, + "num_input_tokens_seen": 512977824, + "step": 9156 + }, + { + "epoch": 20.391982182628063, + "loss": 0.4710025191307068, + "loss_ce": 5.522385981748812e-05, + "loss_iou": 0.2109375, + "loss_num": 0.00958251953125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 512977824, + "step": 9156 + }, + { + "epoch": 20.394209354120267, + "grad_norm": 22.299945831298828, + "learning_rate": 1e-06, + "loss": 0.332, + "num_input_tokens_seen": 513036572, + "step": 9157 + }, + { + "epoch": 20.394209354120267, + "loss": 0.27536553144454956, + "loss_ce": 6.644198583671823e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.00482177734375, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 513036572, + "step": 9157 + }, + { + "epoch": 20.396436525612472, + "grad_norm": 18.22890853881836, + "learning_rate": 1e-06, + "loss": 0.5649, + "num_input_tokens_seen": 513092372, + "step": 9158 + }, + { + "epoch": 20.396436525612472, + "loss": 0.5173371434211731, + "loss_ce": 6.420467980206013e-05, + "loss_iou": 0.21875, + "loss_num": 0.0162353515625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 513092372, + "step": 9158 + }, + { + "epoch": 20.398663697104677, + "grad_norm": 13.26450252532959, + "learning_rate": 1e-06, + "loss": 0.2702, + "num_input_tokens_seen": 513146088, + "step": 9159 + }, + { + "epoch": 20.398663697104677, + "loss": 0.24542546272277832, + "loss_ce": 6.411702634068206e-05, + "loss_iou": 0.111328125, + "loss_num": 0.004547119140625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 513146088, + "step": 9159 + }, + { + "epoch": 20.400890868596882, + "grad_norm": 13.41020393371582, + "learning_rate": 1e-06, + "loss": 0.2748, + "num_input_tokens_seen": 513201040, + "step": 9160 + }, + { + "epoch": 20.400890868596882, + "loss": 0.32405906915664673, + "loss_ce": 8.446039282716811e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.007537841796875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 513201040, + "step": 9160 + }, + { + "epoch": 20.403118040089087, + "grad_norm": 26.860549926757812, + "learning_rate": 1e-06, + "loss": 0.4289, + "num_input_tokens_seen": 513259616, + "step": 9161 + }, + { + "epoch": 20.403118040089087, + "loss": 0.5175533294677734, + "loss_ce": 9.72962225205265e-05, + "loss_iou": 0.21484375, + "loss_num": 0.017822265625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 513259616, + "step": 9161 + }, + { + "epoch": 20.40534521158129, + "grad_norm": 16.444419860839844, + "learning_rate": 1e-06, + "loss": 0.3839, + "num_input_tokens_seen": 513315256, + "step": 9162 + }, + { + "epoch": 20.40534521158129, + "loss": 0.21068307757377625, + "loss_ce": 5.074591172160581e-05, + "loss_iou": 0.0888671875, + "loss_num": 0.006622314453125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 513315256, + "step": 9162 + }, + { + "epoch": 20.407572383073497, + "grad_norm": 118.25879669189453, + "learning_rate": 1e-06, + "loss": 0.3403, + "num_input_tokens_seen": 513371288, + "step": 9163 + }, + { + "epoch": 20.407572383073497, + "loss": 0.3745216727256775, + "loss_ce": 7.099170761648566e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0145263671875, + "loss_xval": 0.375, + "num_input_tokens_seen": 513371288, + "step": 9163 + }, + { + "epoch": 20.4097995545657, + "grad_norm": 23.073387145996094, + "learning_rate": 1e-06, + "loss": 0.298, + "num_input_tokens_seen": 513429104, + "step": 9164 + }, + { + "epoch": 20.4097995545657, + "loss": 0.2354094237089157, + "loss_ce": 8.837871428113431e-05, + "loss_iou": 0.1064453125, + "loss_num": 0.004364013671875, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 513429104, + "step": 9164 + }, + { + "epoch": 20.412026726057906, + "grad_norm": 16.92924690246582, + "learning_rate": 1e-06, + "loss": 0.4139, + "num_input_tokens_seen": 513486220, + "step": 9165 + }, + { + "epoch": 20.412026726057906, + "loss": 0.30585503578186035, + "loss_ce": 6.89055013936013e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.007476806640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 513486220, + "step": 9165 + }, + { + "epoch": 20.41425389755011, + "grad_norm": 19.239347457885742, + "learning_rate": 1e-06, + "loss": 0.2636, + "num_input_tokens_seen": 513540880, + "step": 9166 + }, + { + "epoch": 20.41425389755011, + "loss": 0.24502648413181305, + "loss_ce": 6.187942926771939e-05, + "loss_iou": 0.103515625, + "loss_num": 0.007598876953125, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 513540880, + "step": 9166 + }, + { + "epoch": 20.416481069042316, + "grad_norm": 18.908649444580078, + "learning_rate": 1e-06, + "loss": 0.2436, + "num_input_tokens_seen": 513596724, + "step": 9167 + }, + { + "epoch": 20.416481069042316, + "loss": 0.274596244096756, + "loss_ce": 6.010765355313197e-05, + "loss_iou": 0.12060546875, + "loss_num": 0.0067138671875, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 513596724, + "step": 9167 + }, + { + "epoch": 20.41870824053452, + "grad_norm": 15.285927772521973, + "learning_rate": 1e-06, + "loss": 0.3752, + "num_input_tokens_seen": 513652904, + "step": 9168 + }, + { + "epoch": 20.41870824053452, + "loss": 0.34748250246047974, + "loss_ce": 7.040443597361445e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0089111328125, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 513652904, + "step": 9168 + }, + { + "epoch": 20.420935412026726, + "grad_norm": 20.112258911132812, + "learning_rate": 1e-06, + "loss": 0.3643, + "num_input_tokens_seen": 513708628, + "step": 9169 + }, + { + "epoch": 20.420935412026726, + "loss": 0.43531090021133423, + "loss_ce": 6.918917642906308e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.00628662109375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 513708628, + "step": 9169 + }, + { + "epoch": 20.42316258351893, + "grad_norm": 12.775040626525879, + "learning_rate": 1e-06, + "loss": 0.357, + "num_input_tokens_seen": 513766376, + "step": 9170 + }, + { + "epoch": 20.42316258351893, + "loss": 0.4500158429145813, + "loss_ce": 6.470449443440884e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.0263671875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 513766376, + "step": 9170 + }, + { + "epoch": 20.425389755011135, + "grad_norm": 22.044282913208008, + "learning_rate": 1e-06, + "loss": 0.413, + "num_input_tokens_seen": 513821636, + "step": 9171 + }, + { + "epoch": 20.425389755011135, + "loss": 0.5979858040809631, + "loss_ce": 8.540972339687869e-05, + "loss_iou": 0.251953125, + "loss_num": 0.0184326171875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 513821636, + "step": 9171 + }, + { + "epoch": 20.42761692650334, + "grad_norm": 16.4615478515625, + "learning_rate": 1e-06, + "loss": 0.4864, + "num_input_tokens_seen": 513878332, + "step": 9172 + }, + { + "epoch": 20.42761692650334, + "loss": 0.3729918897151947, + "loss_ce": 6.709150329697877e-05, + "loss_iou": 0.17578125, + "loss_num": 0.004119873046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 513878332, + "step": 9172 + }, + { + "epoch": 20.429844097995545, + "grad_norm": 17.841854095458984, + "learning_rate": 1e-06, + "loss": 0.329, + "num_input_tokens_seen": 513934092, + "step": 9173 + }, + { + "epoch": 20.429844097995545, + "loss": 0.3353223204612732, + "loss_ce": 5.619797593681142e-05, + "loss_iou": 0.154296875, + "loss_num": 0.00537109375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 513934092, + "step": 9173 + }, + { + "epoch": 20.43207126948775, + "grad_norm": 18.19709587097168, + "learning_rate": 1e-06, + "loss": 0.3155, + "num_input_tokens_seen": 513989020, + "step": 9174 + }, + { + "epoch": 20.43207126948775, + "loss": 0.2181463986635208, + "loss_ce": 6.778672832297161e-05, + "loss_iou": 0.09375, + "loss_num": 0.00616455078125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 513989020, + "step": 9174 + }, + { + "epoch": 20.434298440979955, + "grad_norm": 35.7059440612793, + "learning_rate": 1e-06, + "loss": 0.5612, + "num_input_tokens_seen": 514040600, + "step": 9175 + }, + { + "epoch": 20.434298440979955, + "loss": 0.6007704734802246, + "loss_ce": 6.243239477043971e-05, + "loss_iou": 0.25390625, + "loss_num": 0.018310546875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 514040600, + "step": 9175 + }, + { + "epoch": 20.43652561247216, + "grad_norm": 17.803630828857422, + "learning_rate": 1e-06, + "loss": 0.2915, + "num_input_tokens_seen": 514095804, + "step": 9176 + }, + { + "epoch": 20.43652561247216, + "loss": 0.27951860427856445, + "loss_ce": 9.964508353732526e-05, + "loss_iou": 0.10791015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 514095804, + "step": 9176 + }, + { + "epoch": 20.438752783964365, + "grad_norm": 15.95635986328125, + "learning_rate": 1e-06, + "loss": 0.3912, + "num_input_tokens_seen": 514152436, + "step": 9177 + }, + { + "epoch": 20.438752783964365, + "loss": 0.45222991704940796, + "loss_ce": 8.150480425683782e-05, + "loss_iou": 0.16796875, + "loss_num": 0.0234375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 514152436, + "step": 9177 + }, + { + "epoch": 20.44097995545657, + "grad_norm": 30.868133544921875, + "learning_rate": 1e-06, + "loss": 0.5745, + "num_input_tokens_seen": 514210436, + "step": 9178 + }, + { + "epoch": 20.44097995545657, + "loss": 0.45710474252700806, + "loss_ce": 7.350958185270429e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.00421142578125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 514210436, + "step": 9178 + }, + { + "epoch": 20.443207126948774, + "grad_norm": 28.9227352142334, + "learning_rate": 1e-06, + "loss": 0.3003, + "num_input_tokens_seen": 514268436, + "step": 9179 + }, + { + "epoch": 20.443207126948774, + "loss": 0.27464067935943604, + "loss_ce": 4.350075323600322e-05, + "loss_iou": 0.1181640625, + "loss_num": 0.00762939453125, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 514268436, + "step": 9179 + }, + { + "epoch": 20.44543429844098, + "grad_norm": 18.41109848022461, + "learning_rate": 1e-06, + "loss": 0.2609, + "num_input_tokens_seen": 514326408, + "step": 9180 + }, + { + "epoch": 20.44543429844098, + "loss": 0.2086607664823532, + "loss_ce": 0.00010363436012994498, + "loss_iou": 0.08642578125, + "loss_num": 0.007080078125, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 514326408, + "step": 9180 + }, + { + "epoch": 20.447661469933184, + "grad_norm": 22.37228012084961, + "learning_rate": 1e-06, + "loss": 0.2734, + "num_input_tokens_seen": 514381776, + "step": 9181 + }, + { + "epoch": 20.447661469933184, + "loss": 0.2675103545188904, + "loss_ce": 5.431023964774795e-05, + "loss_iou": 0.111328125, + "loss_num": 0.00897216796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 514381776, + "step": 9181 + }, + { + "epoch": 20.44988864142539, + "grad_norm": 20.06549072265625, + "learning_rate": 1e-06, + "loss": 0.2565, + "num_input_tokens_seen": 514435556, + "step": 9182 + }, + { + "epoch": 20.44988864142539, + "loss": 0.2752624452114105, + "loss_ce": 0.00011597707634791732, + "loss_iou": 0.10546875, + "loss_num": 0.012939453125, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 514435556, + "step": 9182 + }, + { + "epoch": 20.452115812917594, + "grad_norm": 14.442758560180664, + "learning_rate": 1e-06, + "loss": 0.4209, + "num_input_tokens_seen": 514491956, + "step": 9183 + }, + { + "epoch": 20.452115812917594, + "loss": 0.4563639163970947, + "loss_ce": 6.508714432129636e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0155029296875, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 514491956, + "step": 9183 + }, + { + "epoch": 20.4543429844098, + "grad_norm": 18.204957962036133, + "learning_rate": 1e-06, + "loss": 0.3314, + "num_input_tokens_seen": 514548608, + "step": 9184 + }, + { + "epoch": 20.4543429844098, + "loss": 0.2957186698913574, + "loss_ce": 6.437343108700588e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.00408935546875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 514548608, + "step": 9184 + }, + { + "epoch": 20.456570155902003, + "grad_norm": 19.535131454467773, + "learning_rate": 1e-06, + "loss": 0.3625, + "num_input_tokens_seen": 514604368, + "step": 9185 + }, + { + "epoch": 20.456570155902003, + "loss": 0.4076521098613739, + "loss_ce": 5.9345908084651455e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00946044921875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 514604368, + "step": 9185 + }, + { + "epoch": 20.45879732739421, + "grad_norm": 19.040164947509766, + "learning_rate": 1e-06, + "loss": 0.3052, + "num_input_tokens_seen": 514661100, + "step": 9186 + }, + { + "epoch": 20.45879732739421, + "loss": 0.29315710067749023, + "loss_ce": 6.629424751736224e-05, + "loss_iou": 0.130859375, + "loss_num": 0.006103515625, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 514661100, + "step": 9186 + }, + { + "epoch": 20.461024498886413, + "grad_norm": 26.235673904418945, + "learning_rate": 1e-06, + "loss": 0.6357, + "num_input_tokens_seen": 514715804, + "step": 9187 + }, + { + "epoch": 20.461024498886413, + "loss": 0.4242648184299469, + "loss_ce": 7.046835526125506e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.00927734375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 514715804, + "step": 9187 + }, + { + "epoch": 20.463251670378618, + "grad_norm": 20.59581756591797, + "learning_rate": 1e-06, + "loss": 0.2953, + "num_input_tokens_seen": 514772648, + "step": 9188 + }, + { + "epoch": 20.463251670378618, + "loss": 0.297730028629303, + "loss_ce": 6.155186565592885e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.0030364990234375, + "loss_xval": 0.296875, + "num_input_tokens_seen": 514772648, + "step": 9188 + }, + { + "epoch": 20.465478841870823, + "grad_norm": 21.08500099182129, + "learning_rate": 1e-06, + "loss": 0.4619, + "num_input_tokens_seen": 514827056, + "step": 9189 + }, + { + "epoch": 20.465478841870823, + "loss": 0.5222786664962769, + "loss_ce": 6.190245039761066e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.01251220703125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 514827056, + "step": 9189 + }, + { + "epoch": 20.467706013363028, + "grad_norm": 24.425458908081055, + "learning_rate": 1e-06, + "loss": 0.4498, + "num_input_tokens_seen": 514878540, + "step": 9190 + }, + { + "epoch": 20.467706013363028, + "loss": 0.27809709310531616, + "loss_ce": 8.197416900657117e-05, + "loss_iou": 0.115234375, + "loss_num": 0.00933837890625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 514878540, + "step": 9190 + }, + { + "epoch": 20.469933184855233, + "grad_norm": 20.29898452758789, + "learning_rate": 1e-06, + "loss": 0.3158, + "num_input_tokens_seen": 514933080, + "step": 9191 + }, + { + "epoch": 20.469933184855233, + "loss": 0.2902224063873291, + "loss_ce": 6.12850344623439e-05, + "loss_iou": 0.130859375, + "loss_num": 0.005889892578125, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 514933080, + "step": 9191 + }, + { + "epoch": 20.472160356347437, + "grad_norm": 23.6481876373291, + "learning_rate": 1e-06, + "loss": 0.2375, + "num_input_tokens_seen": 514987504, + "step": 9192 + }, + { + "epoch": 20.472160356347437, + "loss": 0.16320902109146118, + "loss_ce": 9.255468467017636e-05, + "loss_iou": 0.06982421875, + "loss_num": 0.004638671875, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 514987504, + "step": 9192 + }, + { + "epoch": 20.474387527839642, + "grad_norm": 16.435392379760742, + "learning_rate": 1e-06, + "loss": 0.3885, + "num_input_tokens_seen": 515043248, + "step": 9193 + }, + { + "epoch": 20.474387527839642, + "loss": 0.4120558798313141, + "loss_ce": 6.857035623397678e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.01092529296875, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 515043248, + "step": 9193 + }, + { + "epoch": 20.476614699331847, + "grad_norm": 18.2564697265625, + "learning_rate": 1e-06, + "loss": 0.3537, + "num_input_tokens_seen": 515098600, + "step": 9194 + }, + { + "epoch": 20.476614699331847, + "loss": 0.3193051218986511, + "loss_ce": 9.127189696300775e-05, + "loss_iou": 0.1484375, + "loss_num": 0.004547119140625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 515098600, + "step": 9194 + }, + { + "epoch": 20.478841870824052, + "grad_norm": 47.8402099609375, + "learning_rate": 1e-06, + "loss": 0.2802, + "num_input_tokens_seen": 515157696, + "step": 9195 + }, + { + "epoch": 20.478841870824052, + "loss": 0.31334012746810913, + "loss_ce": 0.00010771671804832295, + "loss_iou": 0.1435546875, + "loss_num": 0.005035400390625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 515157696, + "step": 9195 + }, + { + "epoch": 20.481069042316257, + "grad_norm": 16.30150604248047, + "learning_rate": 1e-06, + "loss": 0.2492, + "num_input_tokens_seen": 515213144, + "step": 9196 + }, + { + "epoch": 20.481069042316257, + "loss": 0.20818519592285156, + "loss_ce": 5.53104946448002e-05, + "loss_iou": 0.080078125, + "loss_num": 0.00946044921875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 515213144, + "step": 9196 + }, + { + "epoch": 20.48329621380846, + "grad_norm": 12.833916664123535, + "learning_rate": 1e-06, + "loss": 0.2618, + "num_input_tokens_seen": 515271576, + "step": 9197 + }, + { + "epoch": 20.48329621380846, + "loss": 0.3264831006526947, + "loss_ce": 6.709274020977318e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.01092529296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 515271576, + "step": 9197 + }, + { + "epoch": 20.485523385300667, + "grad_norm": 124.69937896728516, + "learning_rate": 1e-06, + "loss": 0.3763, + "num_input_tokens_seen": 515327292, + "step": 9198 + }, + { + "epoch": 20.485523385300667, + "loss": 0.25111645460128784, + "loss_ce": 7.887819083407521e-05, + "loss_iou": 0.10595703125, + "loss_num": 0.0079345703125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 515327292, + "step": 9198 + }, + { + "epoch": 20.48775055679287, + "grad_norm": 30.976198196411133, + "learning_rate": 1e-06, + "loss": 0.5357, + "num_input_tokens_seen": 515382716, + "step": 9199 + }, + { + "epoch": 20.48775055679287, + "loss": 0.6795259118080139, + "loss_ce": 9.781922562979162e-05, + "loss_iou": 0.298828125, + "loss_num": 0.016357421875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 515382716, + "step": 9199 + }, + { + "epoch": 20.489977728285076, + "grad_norm": 37.82209396362305, + "learning_rate": 1e-06, + "loss": 0.3135, + "num_input_tokens_seen": 515438296, + "step": 9200 + }, + { + "epoch": 20.489977728285076, + "loss": 0.38873451948165894, + "loss_ce": 6.264903640840203e-05, + "loss_iou": 0.177734375, + "loss_num": 0.006622314453125, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 515438296, + "step": 9200 + }, + { + "epoch": 20.49220489977728, + "grad_norm": 15.100608825683594, + "learning_rate": 1e-06, + "loss": 0.2894, + "num_input_tokens_seen": 515495380, + "step": 9201 + }, + { + "epoch": 20.49220489977728, + "loss": 0.3832995295524597, + "loss_ce": 5.9795893321279436e-05, + "loss_iou": 0.17578125, + "loss_num": 0.00653076171875, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 515495380, + "step": 9201 + }, + { + "epoch": 20.494432071269486, + "grad_norm": 26.354969024658203, + "learning_rate": 1e-06, + "loss": 0.3353, + "num_input_tokens_seen": 515552492, + "step": 9202 + }, + { + "epoch": 20.494432071269486, + "loss": 0.3944174647331238, + "loss_ce": 6.930494419066235e-05, + "loss_iou": 0.162109375, + "loss_num": 0.01409912109375, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 515552492, + "step": 9202 + }, + { + "epoch": 20.49665924276169, + "grad_norm": 19.706466674804688, + "learning_rate": 1e-06, + "loss": 0.3413, + "num_input_tokens_seen": 515607424, + "step": 9203 + }, + { + "epoch": 20.49665924276169, + "loss": 0.3399580419063568, + "loss_ce": 5.32609956280794e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 515607424, + "step": 9203 + }, + { + "epoch": 20.498886414253896, + "grad_norm": 24.811115264892578, + "learning_rate": 1e-06, + "loss": 0.4147, + "num_input_tokens_seen": 515661964, + "step": 9204 + }, + { + "epoch": 20.498886414253896, + "loss": 0.4160168766975403, + "loss_ce": 9.280447557102889e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.0107421875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 515661964, + "step": 9204 + }, + { + "epoch": 20.501113585746104, + "grad_norm": 27.92363929748535, + "learning_rate": 1e-06, + "loss": 0.3021, + "num_input_tokens_seen": 515717596, + "step": 9205 + }, + { + "epoch": 20.501113585746104, + "loss": 0.39837294816970825, + "loss_ce": 5.751060962211341e-05, + "loss_iou": 0.169921875, + "loss_num": 0.01171875, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 515717596, + "step": 9205 + }, + { + "epoch": 20.50334075723831, + "grad_norm": 14.264836311340332, + "learning_rate": 1e-06, + "loss": 0.4384, + "num_input_tokens_seen": 515773496, + "step": 9206 + }, + { + "epoch": 20.50334075723831, + "loss": 0.35908016562461853, + "loss_ce": 7.139322406146675e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.0120849609375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 515773496, + "step": 9206 + }, + { + "epoch": 20.505567928730514, + "grad_norm": 17.64116668701172, + "learning_rate": 1e-06, + "loss": 0.432, + "num_input_tokens_seen": 515827180, + "step": 9207 + }, + { + "epoch": 20.505567928730514, + "loss": 0.4668075442314148, + "loss_ce": 7.171083416324109e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.01495361328125, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 515827180, + "step": 9207 + }, + { + "epoch": 20.50779510022272, + "grad_norm": 19.3404541015625, + "learning_rate": 1e-06, + "loss": 0.3338, + "num_input_tokens_seen": 515881772, + "step": 9208 + }, + { + "epoch": 20.50779510022272, + "loss": 0.31878164410591125, + "loss_ce": 5.6054937886074185e-05, + "loss_iou": 0.146484375, + "loss_num": 0.0050048828125, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 515881772, + "step": 9208 + }, + { + "epoch": 20.510022271714924, + "grad_norm": 19.894765853881836, + "learning_rate": 1e-06, + "loss": 0.3582, + "num_input_tokens_seen": 515938532, + "step": 9209 + }, + { + "epoch": 20.510022271714924, + "loss": 0.3078060746192932, + "loss_ce": 6.68221473461017e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 515938532, + "step": 9209 + }, + { + "epoch": 20.51224944320713, + "grad_norm": 20.052772521972656, + "learning_rate": 1e-06, + "loss": 0.4125, + "num_input_tokens_seen": 515996532, + "step": 9210 + }, + { + "epoch": 20.51224944320713, + "loss": 0.5830047130584717, + "loss_ce": 5.793360833195038e-05, + "loss_iou": 0.248046875, + "loss_num": 0.017333984375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 515996532, + "step": 9210 + }, + { + "epoch": 20.514476614699333, + "grad_norm": 24.21659278869629, + "learning_rate": 1e-06, + "loss": 0.313, + "num_input_tokens_seen": 516052064, + "step": 9211 + }, + { + "epoch": 20.514476614699333, + "loss": 0.323422372341156, + "loss_ce": 5.810863513033837e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.00872802734375, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 516052064, + "step": 9211 + }, + { + "epoch": 20.51670378619154, + "grad_norm": 18.89595603942871, + "learning_rate": 1e-06, + "loss": 0.2474, + "num_input_tokens_seen": 516109116, + "step": 9212 + }, + { + "epoch": 20.51670378619154, + "loss": 0.18618465960025787, + "loss_ce": 5.79521874897182e-05, + "loss_iou": 0.08203125, + "loss_num": 0.004364013671875, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 516109116, + "step": 9212 + }, + { + "epoch": 20.518930957683743, + "grad_norm": 22.48140525817871, + "learning_rate": 1e-06, + "loss": 0.3139, + "num_input_tokens_seen": 516162824, + "step": 9213 + }, + { + "epoch": 20.518930957683743, + "loss": 0.28497621417045593, + "loss_ce": 6.412078801076859e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.0067138671875, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 516162824, + "step": 9213 + }, + { + "epoch": 20.521158129175948, + "grad_norm": 20.399208068847656, + "learning_rate": 1e-06, + "loss": 0.4476, + "num_input_tokens_seen": 516216872, + "step": 9214 + }, + { + "epoch": 20.521158129175948, + "loss": 0.6123678684234619, + "loss_ce": 6.322791159618646e-05, + "loss_iou": 0.2734375, + "loss_num": 0.012939453125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 516216872, + "step": 9214 + }, + { + "epoch": 20.523385300668153, + "grad_norm": 23.317928314208984, + "learning_rate": 1e-06, + "loss": 0.4519, + "num_input_tokens_seen": 516269896, + "step": 9215 + }, + { + "epoch": 20.523385300668153, + "loss": 0.4913950562477112, + "loss_ce": 6.203198427101597e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.01434326171875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 516269896, + "step": 9215 + }, + { + "epoch": 20.525612472160358, + "grad_norm": 24.348512649536133, + "learning_rate": 1e-06, + "loss": 0.5282, + "num_input_tokens_seen": 516324752, + "step": 9216 + }, + { + "epoch": 20.525612472160358, + "loss": 0.30804580450057983, + "loss_ce": 6.23964297119528e-05, + "loss_iou": 0.146484375, + "loss_num": 0.0029449462890625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 516324752, + "step": 9216 + }, + { + "epoch": 20.527839643652563, + "grad_norm": 19.019367218017578, + "learning_rate": 1e-06, + "loss": 0.3523, + "num_input_tokens_seen": 516381168, + "step": 9217 + }, + { + "epoch": 20.527839643652563, + "loss": 0.34173130989074707, + "loss_ce": 5.6528966524638236e-05, + "loss_iou": 0.15625, + "loss_num": 0.005767822265625, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 516381168, + "step": 9217 + }, + { + "epoch": 20.530066815144767, + "grad_norm": 14.334078788757324, + "learning_rate": 1e-06, + "loss": 0.4507, + "num_input_tokens_seen": 516438516, + "step": 9218 + }, + { + "epoch": 20.530066815144767, + "loss": 0.6355735063552856, + "loss_ce": 7.543759420514107e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0196533203125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 516438516, + "step": 9218 + }, + { + "epoch": 20.532293986636972, + "grad_norm": 24.67681312561035, + "learning_rate": 1e-06, + "loss": 0.5983, + "num_input_tokens_seen": 516492580, + "step": 9219 + }, + { + "epoch": 20.532293986636972, + "loss": 0.5057117938995361, + "loss_ce": 0.00015756976790726185, + "loss_iou": 0.1962890625, + "loss_num": 0.0224609375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 516492580, + "step": 9219 + }, + { + "epoch": 20.534521158129177, + "grad_norm": 25.80486297607422, + "learning_rate": 1e-06, + "loss": 0.4122, + "num_input_tokens_seen": 516546048, + "step": 9220 + }, + { + "epoch": 20.534521158129177, + "loss": 0.5275466442108154, + "loss_ce": 8.086032175924629e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.01214599609375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 516546048, + "step": 9220 + }, + { + "epoch": 20.536748329621382, + "grad_norm": 19.290876388549805, + "learning_rate": 1e-06, + "loss": 0.3358, + "num_input_tokens_seen": 516600172, + "step": 9221 + }, + { + "epoch": 20.536748329621382, + "loss": 0.40956592559814453, + "loss_ce": 6.581825437024236e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.005767822265625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 516600172, + "step": 9221 + }, + { + "epoch": 20.538975501113587, + "grad_norm": 14.446426391601562, + "learning_rate": 1e-06, + "loss": 0.3625, + "num_input_tokens_seen": 516655008, + "step": 9222 + }, + { + "epoch": 20.538975501113587, + "loss": 0.4629659950733185, + "loss_ce": 7.537858618889004e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0162353515625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 516655008, + "step": 9222 + }, + { + "epoch": 20.54120267260579, + "grad_norm": 20.61759376525879, + "learning_rate": 1e-06, + "loss": 0.3554, + "num_input_tokens_seen": 516713012, + "step": 9223 + }, + { + "epoch": 20.54120267260579, + "loss": 0.2745492458343506, + "loss_ce": 7.413580169668421e-05, + "loss_iou": 0.12109375, + "loss_num": 0.006561279296875, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 516713012, + "step": 9223 + }, + { + "epoch": 20.543429844097997, + "grad_norm": 15.4387788772583, + "learning_rate": 1e-06, + "loss": 0.3891, + "num_input_tokens_seen": 516771928, + "step": 9224 + }, + { + "epoch": 20.543429844097997, + "loss": 0.21673524379730225, + "loss_ce": 6.0440983361331746e-05, + "loss_iou": 0.09912109375, + "loss_num": 0.003662109375, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 516771928, + "step": 9224 + }, + { + "epoch": 20.5456570155902, + "grad_norm": 73.51461791992188, + "learning_rate": 1e-06, + "loss": 0.3232, + "num_input_tokens_seen": 516827052, + "step": 9225 + }, + { + "epoch": 20.5456570155902, + "loss": 0.37042248249053955, + "loss_ce": 6.116791337262839e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0037994384765625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 516827052, + "step": 9225 + }, + { + "epoch": 20.547884187082406, + "grad_norm": 18.046146392822266, + "learning_rate": 1e-06, + "loss": 0.3615, + "num_input_tokens_seen": 516883920, + "step": 9226 + }, + { + "epoch": 20.547884187082406, + "loss": 0.335880845785141, + "loss_ce": 6.541566108353436e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0064697265625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 516883920, + "step": 9226 + }, + { + "epoch": 20.55011135857461, + "grad_norm": 24.70758628845215, + "learning_rate": 1e-06, + "loss": 0.3517, + "num_input_tokens_seen": 516939448, + "step": 9227 + }, + { + "epoch": 20.55011135857461, + "loss": 0.3225664794445038, + "loss_ce": 5.6713775848038495e-05, + "loss_iou": 0.134765625, + "loss_num": 0.01055908203125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 516939448, + "step": 9227 + }, + { + "epoch": 20.552338530066816, + "grad_norm": 18.065723419189453, + "learning_rate": 1e-06, + "loss": 0.4389, + "num_input_tokens_seen": 516994964, + "step": 9228 + }, + { + "epoch": 20.552338530066816, + "loss": 0.35278570652008057, + "loss_ce": 6.352636410156265e-05, + "loss_iou": 0.16015625, + "loss_num": 0.0064697265625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 516994964, + "step": 9228 + }, + { + "epoch": 20.55456570155902, + "grad_norm": 17.124879837036133, + "learning_rate": 1e-06, + "loss": 0.4484, + "num_input_tokens_seen": 517051364, + "step": 9229 + }, + { + "epoch": 20.55456570155902, + "loss": 0.5044651031494141, + "loss_ce": 7.060293137328699e-05, + "loss_iou": 0.2265625, + "loss_num": 0.01043701171875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 517051364, + "step": 9229 + }, + { + "epoch": 20.556792873051226, + "grad_norm": 19.566333770751953, + "learning_rate": 1e-06, + "loss": 0.3415, + "num_input_tokens_seen": 517106692, + "step": 9230 + }, + { + "epoch": 20.556792873051226, + "loss": 0.3661907911300659, + "loss_ce": 7.13969930075109e-05, + "loss_iou": 0.1640625, + "loss_num": 0.007415771484375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 517106692, + "step": 9230 + }, + { + "epoch": 20.55902004454343, + "grad_norm": 16.50565528869629, + "learning_rate": 1e-06, + "loss": 0.3832, + "num_input_tokens_seen": 517162332, + "step": 9231 + }, + { + "epoch": 20.55902004454343, + "loss": 0.3715935945510864, + "loss_ce": 7.258763071149588e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.00933837890625, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 517162332, + "step": 9231 + }, + { + "epoch": 20.561247216035635, + "grad_norm": 26.36023712158203, + "learning_rate": 1e-06, + "loss": 0.3381, + "num_input_tokens_seen": 517219120, + "step": 9232 + }, + { + "epoch": 20.561247216035635, + "loss": 0.26105207204818726, + "loss_ce": 6.5757536503952e-05, + "loss_iou": 0.119140625, + "loss_num": 0.004547119140625, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 517219120, + "step": 9232 + }, + { + "epoch": 20.56347438752784, + "grad_norm": 40.432926177978516, + "learning_rate": 1e-06, + "loss": 0.5472, + "num_input_tokens_seen": 517273304, + "step": 9233 + }, + { + "epoch": 20.56347438752784, + "loss": 0.3474746346473694, + "loss_ce": 6.25449392828159e-05, + "loss_iou": 0.162109375, + "loss_num": 0.00482177734375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 517273304, + "step": 9233 + }, + { + "epoch": 20.565701559020045, + "grad_norm": 37.93396759033203, + "learning_rate": 1e-06, + "loss": 0.3976, + "num_input_tokens_seen": 517327196, + "step": 9234 + }, + { + "epoch": 20.565701559020045, + "loss": 0.4561777412891388, + "loss_ce": 6.202464282978326e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.01263427734375, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 517327196, + "step": 9234 + }, + { + "epoch": 20.56792873051225, + "grad_norm": 16.984933853149414, + "learning_rate": 1e-06, + "loss": 0.3733, + "num_input_tokens_seen": 517384080, + "step": 9235 + }, + { + "epoch": 20.56792873051225, + "loss": 0.4556872844696045, + "loss_ce": 5.983790470054373e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 517384080, + "step": 9235 + }, + { + "epoch": 20.570155902004455, + "grad_norm": 9.283608436584473, + "learning_rate": 1e-06, + "loss": 0.3774, + "num_input_tokens_seen": 517438320, + "step": 9236 + }, + { + "epoch": 20.570155902004455, + "loss": 0.4296599328517914, + "loss_ce": 0.00011740654008463025, + "loss_iou": 0.18359375, + "loss_num": 0.01220703125, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 517438320, + "step": 9236 + }, + { + "epoch": 20.57238307349666, + "grad_norm": 16.504417419433594, + "learning_rate": 1e-06, + "loss": 0.3546, + "num_input_tokens_seen": 517496136, + "step": 9237 + }, + { + "epoch": 20.57238307349666, + "loss": 0.3683554530143738, + "loss_ce": 6.933379336260259e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00958251953125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 517496136, + "step": 9237 + }, + { + "epoch": 20.574610244988865, + "grad_norm": 19.25970458984375, + "learning_rate": 1e-06, + "loss": 0.2776, + "num_input_tokens_seen": 517553096, + "step": 9238 + }, + { + "epoch": 20.574610244988865, + "loss": 0.3049623966217041, + "loss_ce": 6.127238884801045e-05, + "loss_iou": 0.14453125, + "loss_num": 0.0030517578125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 517553096, + "step": 9238 + }, + { + "epoch": 20.57683741648107, + "grad_norm": 19.565448760986328, + "learning_rate": 1e-06, + "loss": 0.3708, + "num_input_tokens_seen": 517608736, + "step": 9239 + }, + { + "epoch": 20.57683741648107, + "loss": 0.23846621811389923, + "loss_ce": 6.2903571233619e-05, + "loss_iou": 0.09814453125, + "loss_num": 0.0084228515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 517608736, + "step": 9239 + }, + { + "epoch": 20.579064587973274, + "grad_norm": 15.315186500549316, + "learning_rate": 1e-06, + "loss": 0.4186, + "num_input_tokens_seen": 517664492, + "step": 9240 + }, + { + "epoch": 20.579064587973274, + "loss": 0.5499961972236633, + "loss_ce": 6.946978101041168e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0277099609375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 517664492, + "step": 9240 + }, + { + "epoch": 20.58129175946548, + "grad_norm": 19.74601173400879, + "learning_rate": 1e-06, + "loss": 0.4442, + "num_input_tokens_seen": 517723296, + "step": 9241 + }, + { + "epoch": 20.58129175946548, + "loss": 0.39636969566345215, + "loss_ce": 6.84402184560895e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.0169677734375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 517723296, + "step": 9241 + }, + { + "epoch": 20.583518930957684, + "grad_norm": 13.046077728271484, + "learning_rate": 1e-06, + "loss": 0.2947, + "num_input_tokens_seen": 517783636, + "step": 9242 + }, + { + "epoch": 20.583518930957684, + "loss": 0.2621099352836609, + "loss_ce": 5.5477234127465636e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.004302978515625, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 517783636, + "step": 9242 + }, + { + "epoch": 20.58574610244989, + "grad_norm": 15.847582817077637, + "learning_rate": 1e-06, + "loss": 0.2867, + "num_input_tokens_seen": 517837348, + "step": 9243 + }, + { + "epoch": 20.58574610244989, + "loss": 0.36224034428596497, + "loss_ce": 5.771276482846588e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.00860595703125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 517837348, + "step": 9243 + }, + { + "epoch": 20.587973273942094, + "grad_norm": 18.27277374267578, + "learning_rate": 1e-06, + "loss": 0.4209, + "num_input_tokens_seen": 517892936, + "step": 9244 + }, + { + "epoch": 20.587973273942094, + "loss": 0.3434508144855499, + "loss_ce": 6.703373946947977e-05, + "loss_iou": 0.158203125, + "loss_num": 0.005523681640625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 517892936, + "step": 9244 + }, + { + "epoch": 20.5902004454343, + "grad_norm": 18.745811462402344, + "learning_rate": 1e-06, + "loss": 0.3061, + "num_input_tokens_seen": 517948732, + "step": 9245 + }, + { + "epoch": 20.5902004454343, + "loss": 0.3197273313999176, + "loss_ce": 0.0001472493022447452, + "loss_iou": 0.134765625, + "loss_num": 0.010009765625, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 517948732, + "step": 9245 + }, + { + "epoch": 20.592427616926503, + "grad_norm": 25.77054214477539, + "learning_rate": 1e-06, + "loss": 0.5403, + "num_input_tokens_seen": 518005172, + "step": 9246 + }, + { + "epoch": 20.592427616926503, + "loss": 0.6770814657211304, + "loss_ce": 7.9466997704003e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 518005172, + "step": 9246 + }, + { + "epoch": 20.59465478841871, + "grad_norm": 14.424224853515625, + "learning_rate": 1e-06, + "loss": 0.5092, + "num_input_tokens_seen": 518059824, + "step": 9247 + }, + { + "epoch": 20.59465478841871, + "loss": 0.6334977746009827, + "loss_ce": 7.492607983294874e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0135498046875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 518059824, + "step": 9247 + }, + { + "epoch": 20.596881959910913, + "grad_norm": 22.720426559448242, + "learning_rate": 1e-06, + "loss": 0.3461, + "num_input_tokens_seen": 518116568, + "step": 9248 + }, + { + "epoch": 20.596881959910913, + "loss": 0.3957892954349518, + "loss_ce": 9.837795369094238e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.01470947265625, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 518116568, + "step": 9248 + }, + { + "epoch": 20.599109131403118, + "grad_norm": 24.825748443603516, + "learning_rate": 1e-06, + "loss": 0.4304, + "num_input_tokens_seen": 518171416, + "step": 9249 + }, + { + "epoch": 20.599109131403118, + "loss": 0.44866153597831726, + "loss_ce": 5.314835289027542e-05, + "loss_iou": 0.197265625, + "loss_num": 0.0108642578125, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 518171416, + "step": 9249 + }, + { + "epoch": 20.601336302895323, + "grad_norm": 16.588918685913086, + "learning_rate": 1e-06, + "loss": 0.2945, + "num_input_tokens_seen": 518230356, + "step": 9250 + }, + { + "epoch": 20.601336302895323, + "eval_seeclick_web_CIoU": 0.583829253911972, + "eval_seeclick_web_GIoU": 0.5839782357215881, + "eval_seeclick_web_IoU": 0.6033029854297638, + "eval_seeclick_web_MAE_all": 0.015166566707193851, + "eval_seeclick_web_MAE_h": 0.006818295689299703, + "eval_seeclick_web_MAE_w": 0.015144134871661663, + "eval_seeclick_web_MAE_x_boxes": 0.008378135273233056, + "eval_seeclick_web_MAE_y_boxes": 0.021190070547163486, + "eval_seeclick_web_inside_bbox": 0.9010416567325592, + "eval_seeclick_web_loss": 0.9226139783859253, + "eval_seeclick_web_loss_ce": 0.00011066030492656864, + "eval_seeclick_web_loss_iou": 0.42724609375, + "eval_seeclick_web_loss_num": 0.0122528076171875, + "eval_seeclick_web_loss_xval": 0.9150390625, + "eval_seeclick_web_runtime": 20.8356, + "eval_seeclick_web_samples_per_second": 2.4, + "eval_seeclick_web_steps_per_second": 0.096, + "num_input_tokens_seen": 518230356, + "step": 9250 + }, + { + "epoch": 20.601336302895323, + "eval_icons_CIoU": 0.25548499822616577, + "eval_icons_GIoU": 0.2894558012485504, + "eval_icons_IoU": 0.33956870436668396, + "eval_icons_MAE_all": 0.062356239184737206, + "eval_icons_MAE_h": 0.029669910669326782, + "eval_icons_MAE_w": 0.07730034179985523, + "eval_icons_MAE_x_boxes": 0.05503878928720951, + "eval_icons_MAE_y_boxes": 0.037660352885723114, + "eval_icons_inside_bbox": 0.59375, + "eval_icons_loss": 1.7357631921768188, + "eval_icons_loss_ce": 0.00013154443513485603, + "eval_icons_loss_iou": 0.6806640625, + "eval_icons_loss_num": 0.06060218811035156, + "eval_icons_loss_xval": 1.6640625, + "eval_icons_runtime": 20.933, + "eval_icons_samples_per_second": 2.389, + "eval_icons_steps_per_second": 0.096, + "num_input_tokens_seen": 518230356, + "step": 9250 + }, + { + "epoch": 20.601336302895323, + "eval_screenspot_CIoU": 0.3744088610013326, + "eval_screenspot_GIoU": 0.3934180239836375, + "eval_screenspot_IoU": 0.44850159684817, + "eval_screenspot_MAE_all": 0.058480776846408844, + "eval_screenspot_MAE_h": 0.03973913627366225, + "eval_screenspot_MAE_w": 0.06190794085462888, + "eval_screenspot_MAE_x_boxes": 0.06778114537398021, + "eval_screenspot_MAE_y_boxes": 0.043080691869060196, + "eval_screenspot_inside_bbox": 0.7041666706403097, + "eval_screenspot_loss": 1.5582458972930908, + "eval_screenspot_loss_ce": 0.00013941208211084208, + "eval_screenspot_loss_iou": 0.640625, + "eval_screenspot_loss_num": 0.06737772623697917, + "eval_screenspot_loss_xval": 1.6188151041666667, + "eval_screenspot_runtime": 29.263, + "eval_screenspot_samples_per_second": 3.041, + "eval_screenspot_steps_per_second": 0.103, + "num_input_tokens_seen": 518230356, + "step": 9250 + }, + { + "epoch": 20.601336302895323, + "eval_compot_CIoU": 0.3450569659471512, + "eval_compot_GIoU": 0.3522755652666092, + "eval_compot_IoU": 0.40288856625556946, + "eval_compot_MAE_all": 0.018017619848251343, + "eval_compot_MAE_h": 0.008548983139917254, + "eval_compot_MAE_w": 0.020043439231812954, + "eval_compot_MAE_x_boxes": 0.03034346178174019, + "eval_compot_MAE_y_boxes": 0.007265899330377579, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 1.3769763708114624, + "eval_compot_loss_ce": 0.00010162792386836372, + "eval_compot_loss_iou": 0.633544921875, + "eval_compot_loss_num": 0.016551971435546875, + "eval_compot_loss_xval": 1.34912109375, + "eval_compot_runtime": 22.9916, + "eval_compot_samples_per_second": 2.175, + "eval_compot_steps_per_second": 0.087, + "num_input_tokens_seen": 518230356, + "step": 9250 + }, + { + "epoch": 20.601336302895323, + "eval_custom_ui_val_CIoU": 0.47359417213333976, + "eval_custom_ui_val_GIoU": 0.4787476923730638, + "eval_custom_ui_val_IoU": 0.5349448025226593, + "eval_custom_ui_val_MAE_all": 0.02652052231132984, + "eval_custom_ui_val_MAE_h": 0.013495955590365661, + "eval_custom_ui_val_MAE_w": 0.036366066274543606, + "eval_custom_ui_val_MAE_x_boxes": 0.03154980060127047, + "eval_custom_ui_val_MAE_y_boxes": 0.012846555560827255, + "eval_custom_ui_val_inside_bbox": 0.7596450646718343, + "eval_custom_ui_val_loss": 1.1579389572143555, + "eval_custom_ui_val_loss_ce": 0.00010260268269727628, + "eval_custom_ui_val_loss_iou": 0.4972601996527778, + "eval_custom_ui_val_loss_num": 0.023241043090820312, + "eval_custom_ui_val_loss_xval": 1.1111111111111112, + "eval_custom_ui_val_runtime": 58.4246, + "eval_custom_ui_val_samples_per_second": 4.536, + "eval_custom_ui_val_steps_per_second": 0.154, + "num_input_tokens_seen": 518230356, + "step": 9250 + } + ], + "logging_steps": 1.0, + "max_steps": 22450, + "num_input_tokens_seen": 518230356, + "num_train_epochs": 50, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.410904613476211e+19, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}