diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 16.0, + "epoch": 160.0, "eval_steps": 500, - "global_step": 2000, + "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -389,19 +389,3439 @@ "step": 2000 }, { - "epoch": 16.0, - "step": 2000, - "total_flos": 2.80447288344576e+17, - "train_loss": 0.1418005952835083, - "train_runtime": 624.6204, - "train_samples_per_second": 25.616, - "train_steps_per_second": 3.202 + "epoch": 16.8, + "grad_norm": 1.944307565689087, + "learning_rate": 5.37105e-05, + "loss": 0.1072, + "step": 2100 + }, + { + "epoch": 17.0, + "eval_accuracy_no_text": 0.9776347749308564, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9776347749308564, + "eval_iou_text": 0.0, + "eval_loss": 0.10060456395149231, + "eval_mean_accuracy": 0.9776347749308564, + "eval_mean_iou": 0.4888173874654282, + "eval_overall_accuracy": 0.9776347749308564, + "eval_runtime": 8.4116, + "eval_samples_per_second": 30.196, + "eval_steps_per_second": 3.804, + "step": 2125 + }, + { + "epoch": 17.6, + "grad_norm": 0.7363667488098145, + "learning_rate": 5.3411e-05, + "loss": 0.1046, + "step": 2200 + }, + { + "epoch": 18.0, + "eval_accuracy_no_text": 0.9800149224455696, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9800149224455696, + "eval_iou_text": 0.0, + "eval_loss": 0.09380830079317093, + "eval_mean_accuracy": 0.9800149224455696, + "eval_mean_iou": 0.4900074612227848, + "eval_overall_accuracy": 0.9800149224455696, + "eval_runtime": 8.0943, + "eval_samples_per_second": 31.38, + "eval_steps_per_second": 3.953, + "step": 2250 + }, + { + "epoch": 18.4, + "grad_norm": 1.9069602489471436, + "learning_rate": 5.31115e-05, + "loss": 0.1072, + "step": 2300 + }, + { + "epoch": 19.0, + "eval_accuracy_no_text": 0.9799700770156704, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9799700770156704, + "eval_iou_text": 0.0, + "eval_loss": 0.09618178755044937, + "eval_mean_accuracy": 0.9799700770156704, + "eval_mean_iou": 0.4899850385078352, + "eval_overall_accuracy": 0.9799700770156704, + "eval_runtime": 8.171, + "eval_samples_per_second": 31.086, + "eval_steps_per_second": 3.916, + "step": 2375 + }, + { + "epoch": 19.2, + "grad_norm": 0.44612982869148254, + "learning_rate": 5.2812e-05, + "loss": 0.1062, + "step": 2400 + }, + { + "epoch": 20.0, + "grad_norm": 1.044952392578125, + "learning_rate": 5.25125e-05, + "loss": 0.1127, + "step": 2500 + }, + { + "epoch": 20.0, + "eval_accuracy_no_text": 0.98404203872741, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.98404203872741, + "eval_iou_text": 0.0, + "eval_loss": 0.09183745831251144, + "eval_mean_accuracy": 0.98404203872741, + "eval_mean_iou": 0.492021019363705, + "eval_overall_accuracy": 0.98404203872741, + "eval_runtime": 8.4621, + "eval_samples_per_second": 30.016, + "eval_steps_per_second": 3.782, + "step": 2500 + }, + { + "epoch": 20.8, + "grad_norm": 3.0239295959472656, + "learning_rate": 5.2213e-05, + "loss": 0.1017, + "step": 2600 + }, + { + "epoch": 21.0, + "eval_accuracy_no_text": 0.9781973615966435, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9781973615966435, + "eval_iou_text": 0.0, + "eval_loss": 0.09400010854005814, + "eval_mean_accuracy": 0.9781973615966435, + "eval_mean_iou": 0.48909868079832175, + "eval_overall_accuracy": 0.9781973615966435, + "eval_runtime": 8.2162, + "eval_samples_per_second": 30.915, + "eval_steps_per_second": 3.895, + "step": 2625 + }, + { + "epoch": 21.6, + "grad_norm": 1.0735955238342285, + "learning_rate": 5.1913500000000004e-05, + "loss": 0.0961, + "step": 2700 + }, + { + "epoch": 22.0, + "eval_accuracy_no_text": 0.9784010685142297, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9784010685142297, + "eval_iou_text": 0.0, + "eval_loss": 0.09638506919145584, + "eval_mean_accuracy": 0.9784010685142297, + "eval_mean_iou": 0.48920053425711485, + "eval_overall_accuracy": 0.9784010685142297, + "eval_runtime": 8.6739, + "eval_samples_per_second": 29.283, + "eval_steps_per_second": 3.689, + "step": 2750 + }, + { + "epoch": 22.4, + "grad_norm": 0.7935839891433716, + "learning_rate": 5.1614000000000004e-05, + "loss": 0.0951, + "step": 2800 + }, + { + "epoch": 23.0, + "eval_accuracy_no_text": 0.9820980998348329, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9820980998348329, + "eval_iou_text": 0.0, + "eval_loss": 0.09395471215248108, + "eval_mean_accuracy": 0.9820980998348329, + "eval_mean_iou": 0.49104904991741644, + "eval_overall_accuracy": 0.9820980998348329, + "eval_runtime": 8.6006, + "eval_samples_per_second": 29.533, + "eval_steps_per_second": 3.721, + "step": 2875 + }, + { + "epoch": 23.2, + "grad_norm": 1.0572441816329956, + "learning_rate": 5.13145e-05, + "loss": 0.101, + "step": 2900 + }, + { + "epoch": 24.0, + "grad_norm": 0.9966760873794556, + "learning_rate": 5.1015e-05, + "loss": 0.0938, + "step": 3000 + }, + { + "epoch": 24.0, + "eval_accuracy_no_text": 0.983619405027922, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.983619405027922, + "eval_iou_text": 0.0, + "eval_loss": 0.10053908079862595, + "eval_mean_accuracy": 0.983619405027922, + "eval_mean_iou": 0.491809702513961, + "eval_overall_accuracy": 0.983619405027922, + "eval_runtime": 8.5467, + "eval_samples_per_second": 29.719, + "eval_steps_per_second": 3.744, + "step": 3000 + }, + { + "epoch": 24.8, + "grad_norm": 1.7212049961090088, + "learning_rate": 5.07155e-05, + "loss": 0.0949, + "step": 3100 + }, + { + "epoch": 25.0, + "eval_accuracy_no_text": 0.9802790768155358, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9802790768155358, + "eval_iou_text": 0.0, + "eval_loss": 0.10030657052993774, + "eval_mean_accuracy": 0.9802790768155358, + "eval_mean_iou": 0.4901395384077679, + "eval_overall_accuracy": 0.9802790768155358, + "eval_runtime": 8.5434, + "eval_samples_per_second": 29.731, + "eval_steps_per_second": 3.746, + "step": 3125 + }, + { + "epoch": 25.6, + "grad_norm": 0.9479594230651855, + "learning_rate": 5.0416e-05, + "loss": 0.0949, + "step": 3200 + }, + { + "epoch": 26.0, + "eval_accuracy_no_text": 0.9815445520404297, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9815445520404297, + "eval_iou_text": 0.0, + "eval_loss": 0.10147000104188919, + "eval_mean_accuracy": 0.9815445520404297, + "eval_mean_iou": 0.49077227602021484, + "eval_overall_accuracy": 0.9815445520404297, + "eval_runtime": 8.3101, + "eval_samples_per_second": 30.565, + "eval_steps_per_second": 3.851, + "step": 3250 + }, + { + "epoch": 26.4, + "grad_norm": 2.8663785457611084, + "learning_rate": 5.01165e-05, + "loss": 0.0949, + "step": 3300 + }, + { + "epoch": 27.0, + "eval_accuracy_no_text": 0.9779668869919259, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9779668869919259, + "eval_iou_text": 0.0, + "eval_loss": 0.0970182940363884, + "eval_mean_accuracy": 0.9779668869919259, + "eval_mean_iou": 0.48898344349596295, + "eval_overall_accuracy": 0.9779668869919259, + "eval_runtime": 8.507, + "eval_samples_per_second": 29.858, + "eval_steps_per_second": 3.762, + "step": 3375 + }, + { + "epoch": 27.2, + "grad_norm": 0.29338690638542175, + "learning_rate": 5.15e-05, + "loss": 0.0977, + "step": 3400 + }, + { + "epoch": 28.0, + "grad_norm": 0.5164900422096252, + "learning_rate": 5.125e-05, + "loss": 0.0883, + "step": 3500 + }, + { + "epoch": 28.0, + "eval_accuracy_no_text": 0.9779090481844123, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9779090481844123, + "eval_iou_text": 0.0, + "eval_loss": 0.09671156853437424, + "eval_mean_accuracy": 0.9779090481844123, + "eval_mean_iou": 0.48895452409220613, + "eval_overall_accuracy": 0.9779090481844123, + "eval_runtime": 8.4255, + "eval_samples_per_second": 30.147, + "eval_steps_per_second": 3.798, + "step": 3500 + }, + { + "epoch": 28.8, + "grad_norm": 0.9270356893539429, + "learning_rate": 5.1e-05, + "loss": 0.0846, + "step": 3600 + }, + { + "epoch": 29.0, + "eval_accuracy_no_text": 0.984867849291109, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.984867849291109, + "eval_iou_text": 0.0, + "eval_loss": 0.09728837013244629, + "eval_mean_accuracy": 0.984867849291109, + "eval_mean_iou": 0.4924339246455545, + "eval_overall_accuracy": 0.984867849291109, + "eval_runtime": 8.3583, + "eval_samples_per_second": 30.389, + "eval_steps_per_second": 3.829, + "step": 3625 + }, + { + "epoch": 29.6, + "grad_norm": 54.255714416503906, + "learning_rate": 5.075000000000001e-05, + "loss": 0.0842, + "step": 3700 + }, + { + "epoch": 30.0, + "eval_accuracy_no_text": 0.9820055344810097, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9820055344810097, + "eval_iou_text": 0.0, + "eval_loss": 0.0946170911192894, + "eval_mean_accuracy": 0.9820055344810097, + "eval_mean_iou": 0.49100276724050484, + "eval_overall_accuracy": 0.9820055344810097, + "eval_runtime": 8.1044, + "eval_samples_per_second": 31.341, + "eval_steps_per_second": 3.948, + "step": 3750 + }, + { + "epoch": 30.4, + "grad_norm": 0.4858163297176361, + "learning_rate": 5.050000000000001e-05, + "loss": 0.0814, + "step": 3800 + }, + { + "epoch": 31.0, + "eval_accuracy_no_text": 0.9818622916313758, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9818622916313758, + "eval_iou_text": 0.0, + "eval_loss": 0.09355071932077408, + "eval_mean_accuracy": 0.9818622916313758, + "eval_mean_iou": 0.4909311458156879, + "eval_overall_accuracy": 0.9818622916313758, + "eval_runtime": 8.3948, + "eval_samples_per_second": 30.257, + "eval_steps_per_second": 3.812, + "step": 3875 + }, + { + "epoch": 31.2, + "grad_norm": 0.7054563760757446, + "learning_rate": 5.0249999999999995e-05, + "loss": 0.082, + "step": 3900 + }, + { + "epoch": 32.0, + "grad_norm": 1.9450538158416748, + "learning_rate": 5e-05, + "loss": 0.0813, + "step": 4000 + }, + { + "epoch": 32.0, + "eval_accuracy_no_text": 0.9812941719799806, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9812941719799806, + "eval_iou_text": 0.0, + "eval_loss": 0.09379926323890686, + "eval_mean_accuracy": 0.9812941719799806, + "eval_mean_iou": 0.4906470859899903, + "eval_overall_accuracy": 0.9812941719799806, + "eval_runtime": 8.3235, + "eval_samples_per_second": 30.516, + "eval_steps_per_second": 3.845, + "step": 4000 + }, + { + "epoch": 32.8, + "grad_norm": 0.48973309993743896, + "learning_rate": 4.975e-05, + "loss": 0.0817, + "step": 4100 + }, + { + "epoch": 33.0, + "eval_accuracy_no_text": 0.9812250511988094, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9812250511988094, + "eval_iou_text": 0.0, + "eval_loss": 0.09455174952745438, + "eval_mean_accuracy": 0.9812250511988094, + "eval_mean_iou": 0.4906125255994047, + "eval_overall_accuracy": 0.9812250511988094, + "eval_runtime": 8.2561, + "eval_samples_per_second": 30.765, + "eval_steps_per_second": 3.876, + "step": 4125 + }, + { + "epoch": 33.6, + "grad_norm": 1.3802294731140137, + "learning_rate": 4.95e-05, + "loss": 0.0836, + "step": 4200 + }, + { + "epoch": 34.0, + "eval_accuracy_no_text": 0.9775148603448678, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9775148603448678, + "eval_iou_text": 0.0, + "eval_loss": 0.0939713567495346, + "eval_mean_accuracy": 0.9775148603448678, + "eval_mean_iou": 0.4887574301724339, + "eval_overall_accuracy": 0.9775148603448678, + "eval_runtime": 8.2502, + "eval_samples_per_second": 30.787, + "eval_steps_per_second": 3.879, + "step": 4250 + }, + { + "epoch": 34.4, + "grad_norm": 3.26483416557312, + "learning_rate": 4.925e-05, + "loss": 0.0836, + "step": 4300 + }, + { + "epoch": 35.0, + "eval_accuracy_no_text": 0.9811469413929715, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9811469413929715, + "eval_iou_text": 0.0, + "eval_loss": 0.09151175618171692, + "eval_mean_accuracy": 0.9811469413929715, + "eval_mean_iou": 0.49057347069648577, + "eval_overall_accuracy": 0.9811469413929715, + "eval_runtime": 8.2647, + "eval_samples_per_second": 30.733, + "eval_steps_per_second": 3.872, + "step": 4375 + }, + { + "epoch": 35.2, + "grad_norm": 1.5455337762832642, + "learning_rate": 4.9e-05, + "loss": 0.0784, + "step": 4400 + }, + { + "epoch": 36.0, + "grad_norm": 3.040316581726074, + "learning_rate": 4.875e-05, + "loss": 0.0785, + "step": 4500 + }, + { + "epoch": 36.0, + "eval_accuracy_no_text": 0.9815795942826148, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9815795942826148, + "eval_iou_text": 0.0, + "eval_loss": 0.09513239562511444, + "eval_mean_accuracy": 0.9815795942826148, + "eval_mean_iou": 0.4907897971413074, + "eval_overall_accuracy": 0.9815795942826148, + "eval_runtime": 8.2374, + "eval_samples_per_second": 30.835, + "eval_steps_per_second": 3.885, + "step": 4500 + }, + { + "epoch": 36.8, + "grad_norm": 2.6754534244537354, + "learning_rate": 4.85e-05, + "loss": 0.0746, + "step": 4600 + }, + { + "epoch": 37.0, + "eval_accuracy_no_text": 0.9757280549204483, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9757280549204483, + "eval_iou_text": 0.0, + "eval_loss": 0.0951041430234909, + "eval_mean_accuracy": 0.9757280549204483, + "eval_mean_iou": 0.48786402746022417, + "eval_overall_accuracy": 0.9757280549204483, + "eval_runtime": 8.2295, + "eval_samples_per_second": 30.864, + "eval_steps_per_second": 3.888, + "step": 4625 + }, + { + "epoch": 37.6, + "grad_norm": 0.629497766494751, + "learning_rate": 4.825000000000001e-05, + "loss": 0.0819, + "step": 4700 + }, + { + "epoch": 38.0, + "eval_accuracy_no_text": 0.9800142910538185, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9800142910538185, + "eval_iou_text": 0.0, + "eval_loss": 0.09518762677907944, + "eval_mean_accuracy": 0.9800142910538185, + "eval_mean_iou": 0.49000714552690927, + "eval_overall_accuracy": 0.9800142910538185, + "eval_runtime": 8.1766, + "eval_samples_per_second": 31.064, + "eval_steps_per_second": 3.914, + "step": 4750 + }, + { + "epoch": 38.4, + "grad_norm": 0.9247422218322754, + "learning_rate": 4.7999999999999994e-05, + "loss": 0.0731, + "step": 4800 + }, + { + "epoch": 39.0, + "eval_accuracy_no_text": 0.9797100599232311, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9797100599232311, + "eval_iou_text": 0.0, + "eval_loss": 0.09217899292707443, + "eval_mean_accuracy": 0.9797100599232311, + "eval_mean_iou": 0.48985502996161556, + "eval_overall_accuracy": 0.9797100599232311, + "eval_runtime": 8.1339, + "eval_samples_per_second": 31.227, + "eval_steps_per_second": 3.934, + "step": 4875 + }, + { + "epoch": 39.2, + "grad_norm": 0.5285217761993408, + "learning_rate": 4.775e-05, + "loss": 0.0826, + "step": 4900 + }, + { + "epoch": 40.0, + "grad_norm": 0.7555281519889832, + "learning_rate": 4.75e-05, + "loss": 0.0745, + "step": 5000 + }, + { + "epoch": 40.0, + "eval_accuracy_no_text": 0.9798135915548363, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9798135915548363, + "eval_iou_text": 0.0, + "eval_loss": 0.09387236088514328, + "eval_mean_accuracy": 0.9798135915548363, + "eval_mean_iou": 0.48990679577741814, + "eval_overall_accuracy": 0.9798135915548363, + "eval_runtime": 8.2225, + "eval_samples_per_second": 30.891, + "eval_steps_per_second": 3.892, + "step": 5000 + }, + { + "epoch": 40.8, + "grad_norm": 0.6379430294036865, + "learning_rate": 4.7249999999999997e-05, + "loss": 0.0755, + "step": 5100 + }, + { + "epoch": 41.0, + "eval_accuracy_no_text": 0.980224079270902, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.980224079270902, + "eval_iou_text": 0.0, + "eval_loss": 0.094584159553051, + "eval_mean_accuracy": 0.980224079270902, + "eval_mean_iou": 0.490112039635451, + "eval_overall_accuracy": 0.980224079270902, + "eval_runtime": 8.2271, + "eval_samples_per_second": 30.874, + "eval_steps_per_second": 3.89, + "step": 5125 + }, + { + "epoch": 41.6, + "grad_norm": 0.8154006004333496, + "learning_rate": 4.7000000000000004e-05, + "loss": 0.0692, + "step": 5200 + }, + { + "epoch": 42.0, + "eval_accuracy_no_text": 0.975735365772303, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.975735365772303, + "eval_iou_text": 0.0, + "eval_loss": 0.09759368002414703, + "eval_mean_accuracy": 0.975735365772303, + "eval_mean_iou": 0.4878676828861515, + "eval_overall_accuracy": 0.975735365772303, + "eval_runtime": 8.4223, + "eval_samples_per_second": 30.158, + "eval_steps_per_second": 3.799, + "step": 5250 + }, + { + "epoch": 42.4, + "grad_norm": 0.7524943351745605, + "learning_rate": 4.6750000000000005e-05, + "loss": 0.0798, + "step": 5300 + }, + { + "epoch": 43.0, + "eval_accuracy_no_text": 0.9804404638702271, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9804404638702271, + "eval_iou_text": 0.0, + "eval_loss": 0.09877783805131912, + "eval_mean_accuracy": 0.9804404638702271, + "eval_mean_iou": 0.49022023193511355, + "eval_overall_accuracy": 0.9804404638702271, + "eval_runtime": 8.2237, + "eval_samples_per_second": 30.886, + "eval_steps_per_second": 3.891, + "step": 5375 + }, + { + "epoch": 43.2, + "grad_norm": 0.3496994972229004, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.072, + "step": 5400 + }, + { + "epoch": 44.0, + "grad_norm": 1.7848222255706787, + "learning_rate": 4.625e-05, + "loss": 0.076, + "step": 5500 + }, + { + "epoch": 44.0, + "eval_accuracy_no_text": 0.9797547724285511, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9797547724285511, + "eval_iou_text": 0.0, + "eval_loss": 0.09654530137777328, + "eval_mean_accuracy": 0.9797547724285511, + "eval_mean_iou": 0.48987738621427557, + "eval_overall_accuracy": 0.9797547724285511, + "eval_runtime": 8.1971, + "eval_samples_per_second": 30.987, + "eval_steps_per_second": 3.904, + "step": 5500 + }, + { + "epoch": 44.8, + "grad_norm": 0.5874524712562561, + "learning_rate": 4.600000000000001e-05, + "loss": 0.0757, + "step": 5600 + }, + { + "epoch": 45.0, + "eval_accuracy_no_text": 0.9823000122706003, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9823000122706003, + "eval_iou_text": 0.0, + "eval_loss": 0.09140286594629288, + "eval_mean_accuracy": 0.9823000122706003, + "eval_mean_iou": 0.49115000613530013, + "eval_overall_accuracy": 0.9823000122706003, + "eval_runtime": 8.2597, + "eval_samples_per_second": 30.752, + "eval_steps_per_second": 3.874, + "step": 5625 + }, + { + "epoch": 45.6, + "grad_norm": 0.46248531341552734, + "learning_rate": 4.575000000000001e-05, + "loss": 0.0702, + "step": 5700 + }, + { + "epoch": 46.0, + "eval_accuracy_no_text": 0.9780766660787523, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9780766660787523, + "eval_iou_text": 0.0, + "eval_loss": 0.09352780878543854, + "eval_mean_accuracy": 0.9780766660787523, + "eval_mean_iou": 0.48903833303937616, + "eval_overall_accuracy": 0.9780766660787523, + "eval_runtime": 8.3183, + "eval_samples_per_second": 30.535, + "eval_steps_per_second": 3.847, + "step": 5750 + }, + { + "epoch": 46.4, + "grad_norm": 1.1901108026504517, + "learning_rate": 4.55e-05, + "loss": 0.0765, + "step": 5800 + }, + { + "epoch": 47.0, + "eval_accuracy_no_text": 0.9809388645798449, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9809388645798449, + "eval_iou_text": 0.0, + "eval_loss": 0.09659279137849808, + "eval_mean_accuracy": 0.9809388645798449, + "eval_mean_iou": 0.49046943228992246, + "eval_overall_accuracy": 0.9809388645798449, + "eval_runtime": 8.247, + "eval_samples_per_second": 30.799, + "eval_steps_per_second": 3.88, + "step": 5875 + }, + { + "epoch": 47.2, + "grad_norm": 0.2969698905944824, + "learning_rate": 4.525000000000001e-05, + "loss": 0.0722, + "step": 5900 + }, + { + "epoch": 48.0, + "grad_norm": 5.192142486572266, + "learning_rate": 4.5e-05, + "loss": 0.0724, + "step": 6000 + }, + { + "epoch": 48.0, + "eval_accuracy_no_text": 0.9832674872045554, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9832674872045554, + "eval_iou_text": 0.0, + "eval_loss": 0.09365521371364594, + "eval_mean_accuracy": 0.9832674872045554, + "eval_mean_iou": 0.4916337436022777, + "eval_overall_accuracy": 0.9832674872045554, + "eval_runtime": 8.1901, + "eval_samples_per_second": 31.013, + "eval_steps_per_second": 3.907, + "step": 6000 + }, + { + "epoch": 48.8, + "grad_norm": 0.8341678380966187, + "learning_rate": 4.4750000000000004e-05, + "loss": 0.0713, + "step": 6100 + }, + { + "epoch": 49.0, + "eval_accuracy_no_text": 0.9761614388952771, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9761614388952771, + "eval_iou_text": 0.0, + "eval_loss": 0.10168309509754181, + "eval_mean_accuracy": 0.9761614388952771, + "eval_mean_iou": 0.48808071944763853, + "eval_overall_accuracy": 0.9761614388952771, + "eval_runtime": 8.2425, + "eval_samples_per_second": 30.816, + "eval_steps_per_second": 3.882, + "step": 6125 + }, + { + "epoch": 49.6, + "grad_norm": 1.4235390424728394, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.0677, + "step": 6200 + }, + { + "epoch": 50.0, + "eval_accuracy_no_text": 0.9803571201590842, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9803571201590842, + "eval_iou_text": 0.0, + "eval_loss": 0.09322305768728256, + "eval_mean_accuracy": 0.9803571201590842, + "eval_mean_iou": 0.4901785600795421, + "eval_overall_accuracy": 0.9803571201590842, + "eval_runtime": 8.1322, + "eval_samples_per_second": 31.234, + "eval_steps_per_second": 3.935, + "step": 6250 + }, + { + "epoch": 50.4, + "grad_norm": 4.009937763214111, + "learning_rate": 4.4250000000000005e-05, + "loss": 0.0715, + "step": 6300 + }, + { + "epoch": 51.0, + "eval_accuracy_no_text": 0.9781054442501438, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9781054442501438, + "eval_iou_text": 0.0, + "eval_loss": 0.09748787432909012, + "eval_mean_accuracy": 0.9781054442501438, + "eval_mean_iou": 0.4890527221250719, + "eval_overall_accuracy": 0.9781054442501438, + "eval_runtime": 8.3284, + "eval_samples_per_second": 30.498, + "eval_steps_per_second": 3.842, + "step": 6375 + }, + { + "epoch": 51.2, + "grad_norm": 0.7396467328071594, + "learning_rate": 4.4e-05, + "loss": 0.07, + "step": 6400 + }, + { + "epoch": 52.0, + "grad_norm": 0.4652855694293976, + "learning_rate": 4.3750000000000006e-05, + "loss": 0.0713, + "step": 6500 + }, + { + "epoch": 52.0, + "eval_accuracy_no_text": 0.9833075639651767, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9833075639651767, + "eval_iou_text": 0.0, + "eval_loss": 0.09451069682836533, + "eval_mean_accuracy": 0.9833075639651767, + "eval_mean_iou": 0.49165378198258836, + "eval_overall_accuracy": 0.9833075639651767, + "eval_runtime": 8.2755, + "eval_samples_per_second": 30.693, + "eval_steps_per_second": 3.867, + "step": 6500 + }, + { + "epoch": 52.8, + "grad_norm": 0.32952389121055603, + "learning_rate": 4.35e-05, + "loss": 0.0695, + "step": 6600 + }, + { + "epoch": 53.0, + "eval_accuracy_no_text": 0.9819285545340948, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9819285545340948, + "eval_iou_text": 0.0, + "eval_loss": 0.09514930099248886, + "eval_mean_accuracy": 0.9819285545340948, + "eval_mean_iou": 0.4909642772670474, + "eval_overall_accuracy": 0.9819285545340948, + "eval_runtime": 8.5513, + "eval_samples_per_second": 29.703, + "eval_steps_per_second": 3.742, + "step": 6625 + }, + { + "epoch": 53.6, + "grad_norm": 1.479300856590271, + "learning_rate": 4.325e-05, + "loss": 0.0648, + "step": 6700 + }, + { + "epoch": 54.0, + "eval_accuracy_no_text": 0.9824639248922958, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9824639248922958, + "eval_iou_text": 0.0, + "eval_loss": 0.09646683931350708, + "eval_mean_accuracy": 0.9824639248922958, + "eval_mean_iou": 0.4912319624461479, + "eval_overall_accuracy": 0.9824639248922958, + "eval_runtime": 8.3734, + "eval_samples_per_second": 30.334, + "eval_steps_per_second": 3.822, + "step": 6750 + }, + { + "epoch": 54.4, + "grad_norm": 0.37164923548698425, + "learning_rate": 4.3e-05, + "loss": 0.0694, + "step": 6800 + }, + { + "epoch": 55.0, + "eval_accuracy_no_text": 0.9809235284065225, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9809235284065225, + "eval_iou_text": 0.0, + "eval_loss": 0.09463436901569366, + "eval_mean_accuracy": 0.9809235284065225, + "eval_mean_iou": 0.49046176420326126, + "eval_overall_accuracy": 0.9809235284065225, + "eval_runtime": 8.3659, + "eval_samples_per_second": 30.361, + "eval_steps_per_second": 3.825, + "step": 6875 + }, + { + "epoch": 55.2, + "grad_norm": 0.3387889862060547, + "learning_rate": 4.275e-05, + "loss": 0.0658, + "step": 6900 + }, + { + "epoch": 56.0, + "grad_norm": 0.8153048157691956, + "learning_rate": 4.25e-05, + "loss": 0.0665, + "step": 7000 + }, + { + "epoch": 56.0, + "eval_accuracy_no_text": 0.9824437868185507, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9824437868185507, + "eval_iou_text": 0.0, + "eval_loss": 0.1007036343216896, + "eval_mean_accuracy": 0.9824437868185507, + "eval_mean_iou": 0.49122189340927536, + "eval_overall_accuracy": 0.9824437868185507, + "eval_runtime": 8.3927, + "eval_samples_per_second": 30.265, + "eval_steps_per_second": 3.813, + "step": 7000 + }, + { + "epoch": 56.8, + "grad_norm": 0.8299281001091003, + "learning_rate": 4.225e-05, + "loss": 0.0635, + "step": 7100 + }, + { + "epoch": 57.0, + "eval_accuracy_no_text": 0.9831196252257952, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9831196252257952, + "eval_iou_text": 0.0, + "eval_loss": 0.09709285199642181, + "eval_mean_accuracy": 0.9831196252257952, + "eval_mean_iou": 0.4915598126128976, + "eval_overall_accuracy": 0.9831196252257952, + "eval_runtime": 8.2632, + "eval_samples_per_second": 30.739, + "eval_steps_per_second": 3.873, + "step": 7125 + }, + { + "epoch": 57.6, + "grad_norm": 0.6725646257400513, + "learning_rate": 4.2000000000000004e-05, + "loss": 0.0628, + "step": 7200 + }, + { + "epoch": 58.0, + "eval_accuracy_no_text": 0.9785253031490414, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9785253031490414, + "eval_iou_text": 0.0, + "eval_loss": 0.10017971694469452, + "eval_mean_accuracy": 0.9785253031490414, + "eval_mean_iou": 0.4892626515745207, + "eval_overall_accuracy": 0.9785253031490414, + "eval_runtime": 8.2012, + "eval_samples_per_second": 30.971, + "eval_steps_per_second": 3.902, + "step": 7250 + }, + { + "epoch": 58.4, + "grad_norm": 0.5249119400978088, + "learning_rate": 4.175e-05, + "loss": 0.0668, + "step": 7300 + }, + { + "epoch": 59.0, + "eval_accuracy_no_text": 0.9812511708786174, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9812511708786174, + "eval_iou_text": 0.0, + "eval_loss": 0.09598462283611298, + "eval_mean_accuracy": 0.9812511708786174, + "eval_mean_iou": 0.4906255854393087, + "eval_overall_accuracy": 0.9812511708786174, + "eval_runtime": 8.1994, + "eval_samples_per_second": 30.978, + "eval_steps_per_second": 3.903, + "step": 7375 + }, + { + "epoch": 59.2, + "grad_norm": 0.7958447933197021, + "learning_rate": 4.15e-05, + "loss": 0.0688, + "step": 7400 + }, + { + "epoch": 60.0, + "grad_norm": 0.3779708743095398, + "learning_rate": 4.125e-05, + "loss": 0.0648, + "step": 7500 + }, + { + "epoch": 60.0, + "eval_accuracy_no_text": 0.9796383969594832, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9796383969594832, + "eval_iou_text": 0.0, + "eval_loss": 0.0938754603266716, + "eval_mean_accuracy": 0.9796383969594832, + "eval_mean_iou": 0.4898191984797416, + "eval_overall_accuracy": 0.9796383969594832, + "eval_runtime": 8.2596, + "eval_samples_per_second": 30.752, + "eval_steps_per_second": 3.874, + "step": 7500 + }, + { + "epoch": 60.8, + "grad_norm": 0.30460646748542786, + "learning_rate": 4.1e-05, + "loss": 0.064, + "step": 7600 + }, + { + "epoch": 61.0, + "eval_accuracy_no_text": 0.9785537157778401, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9785537157778401, + "eval_iou_text": 0.0, + "eval_loss": 0.09466074407100677, + "eval_mean_accuracy": 0.9785537157778401, + "eval_mean_iou": 0.48927685788892006, + "eval_overall_accuracy": 0.9785537157778401, + "eval_runtime": 8.2318, + "eval_samples_per_second": 30.856, + "eval_steps_per_second": 3.887, + "step": 7625 + }, + { + "epoch": 61.6, + "grad_norm": 1.3763538599014282, + "learning_rate": 4.0749999999999994e-05, + "loss": 0.0636, + "step": 7700 + }, + { + "epoch": 62.0, + "eval_accuracy_no_text": 0.9788354161922574, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9788354161922574, + "eval_iou_text": 0.0, + "eval_loss": 0.09854825586080551, + "eval_mean_accuracy": 0.9788354161922574, + "eval_mean_iou": 0.4894177080961287, + "eval_overall_accuracy": 0.9788354161922574, + "eval_runtime": 8.1667, + "eval_samples_per_second": 31.102, + "eval_steps_per_second": 3.918, + "step": 7750 + }, + { + "epoch": 62.4, + "grad_norm": 0.5202354788780212, + "learning_rate": 4.05e-05, + "loss": 0.0653, + "step": 7800 + }, + { + "epoch": 63.0, + "eval_accuracy_no_text": 0.9812044645046096, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9812044645046096, + "eval_iou_text": 0.0, + "eval_loss": 0.0914328396320343, + "eval_mean_accuracy": 0.9812044645046096, + "eval_mean_iou": 0.4906022322523048, + "eval_overall_accuracy": 0.9812044645046096, + "eval_runtime": 8.3443, + "eval_samples_per_second": 30.44, + "eval_steps_per_second": 3.835, + "step": 7875 + }, + { + "epoch": 63.2, + "grad_norm": 0.6701187491416931, + "learning_rate": 4.0249999999999996e-05, + "loss": 0.061, + "step": 7900 + }, + { + "epoch": 64.0, + "grad_norm": 0.389220654964447, + "learning_rate": 3.9999999999999996e-05, + "loss": 0.0594, + "step": 8000 + }, + { + "epoch": 64.0, + "eval_accuracy_no_text": 0.9782057192295591, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9782057192295591, + "eval_iou_text": 0.0, + "eval_loss": 0.09664972871541977, + "eval_mean_accuracy": 0.9782057192295591, + "eval_mean_iou": 0.48910285961477956, + "eval_overall_accuracy": 0.9782057192295591, + "eval_runtime": 8.3923, + "eval_samples_per_second": 30.266, + "eval_steps_per_second": 3.813, + "step": 8000 + }, + { + "epoch": 64.8, + "grad_norm": 0.408395379781723, + "learning_rate": 3.975e-05, + "loss": 0.0608, + "step": 8100 + }, + { + "epoch": 65.0, + "eval_accuracy_no_text": 0.9794022730601506, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9794022730601506, + "eval_iou_text": 0.0, + "eval_loss": 0.09610570222139359, + "eval_mean_accuracy": 0.9794022730601506, + "eval_mean_iou": 0.4897011365300753, + "eval_overall_accuracy": 0.9794022730601506, + "eval_runtime": 8.4338, + "eval_samples_per_second": 30.117, + "eval_steps_per_second": 3.794, + "step": 8125 + }, + { + "epoch": 65.6, + "grad_norm": 0.7247848510742188, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.0625, + "step": 8200 + }, + { + "epoch": 66.0, + "eval_accuracy_no_text": 0.9813712682359023, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9813712682359023, + "eval_iou_text": 0.0, + "eval_loss": 0.09536296874284744, + "eval_mean_accuracy": 0.9813712682359023, + "eval_mean_iou": 0.49068563411795113, + "eval_overall_accuracy": 0.9813712682359023, + "eval_runtime": 8.3567, + "eval_samples_per_second": 30.395, + "eval_steps_per_second": 3.829, + "step": 8250 + }, + { + "epoch": 66.4, + "grad_norm": 0.7799036502838135, + "learning_rate": 3.924999999999999e-05, + "loss": 0.0646, + "step": 8300 + }, + { + "epoch": 67.0, + "eval_accuracy_no_text": 0.9800713489294296, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9800713489294296, + "eval_iou_text": 0.0, + "eval_loss": 0.09814047813415527, + "eval_mean_accuracy": 0.9800713489294296, + "eval_mean_iou": 0.4900356744647148, + "eval_overall_accuracy": 0.9800713489294296, + "eval_runtime": 8.317, + "eval_samples_per_second": 30.54, + "eval_steps_per_second": 3.848, + "step": 8375 + }, + { + "epoch": 67.2, + "grad_norm": 0.9421979188919067, + "learning_rate": 3.9e-05, + "loss": 0.0616, + "step": 8400 + }, + { + "epoch": 68.0, + "grad_norm": 0.7759032249450684, + "learning_rate": 3.874999999999999e-05, + "loss": 0.0634, + "step": 8500 + }, + { + "epoch": 68.0, + "eval_accuracy_no_text": 0.9822642223276573, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9822642223276573, + "eval_iou_text": 0.0, + "eval_loss": 0.09961166232824326, + "eval_mean_accuracy": 0.9822642223276573, + "eval_mean_iou": 0.49113211116382866, + "eval_overall_accuracy": 0.9822642223276573, + "eval_runtime": 8.4898, + "eval_samples_per_second": 29.918, + "eval_steps_per_second": 3.769, + "step": 8500 + }, + { + "epoch": 68.8, + "grad_norm": 0.9563459753990173, + "learning_rate": 3.850000000000001e-05, + "loss": 0.0611, + "step": 8600 + }, + { + "epoch": 69.0, + "eval_accuracy_no_text": 0.981046533488977, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.981046533488977, + "eval_iou_text": 0.0, + "eval_loss": 0.10069240629673004, + "eval_mean_accuracy": 0.981046533488977, + "eval_mean_iou": 0.4905232667444885, + "eval_overall_accuracy": 0.981046533488977, + "eval_runtime": 8.3357, + "eval_samples_per_second": 30.472, + "eval_steps_per_second": 3.839, + "step": 8625 + }, + { + "epoch": 69.6, + "grad_norm": 0.3676619231700897, + "learning_rate": 3.825e-05, + "loss": 0.0599, + "step": 8700 + }, + { + "epoch": 70.0, + "eval_accuracy_no_text": 0.9792718242012625, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9792718242012625, + "eval_iou_text": 0.0, + "eval_loss": 0.09285294264554977, + "eval_mean_accuracy": 0.9792718242012625, + "eval_mean_iou": 0.48963591210063123, + "eval_overall_accuracy": 0.9792718242012625, + "eval_runtime": 8.2408, + "eval_samples_per_second": 30.822, + "eval_steps_per_second": 3.883, + "step": 8750 + }, + { + "epoch": 70.4, + "grad_norm": 0.6337301135063171, + "learning_rate": 3.800000000000001e-05, + "loss": 0.0583, + "step": 8800 + }, + { + "epoch": 71.0, + "eval_accuracy_no_text": 0.9825129740620114, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9825129740620114, + "eval_iou_text": 0.0, + "eval_loss": 0.09883657097816467, + "eval_mean_accuracy": 0.9825129740620114, + "eval_mean_iou": 0.4912564870310057, + "eval_overall_accuracy": 0.9825129740620114, + "eval_runtime": 8.1683, + "eval_samples_per_second": 31.096, + "eval_steps_per_second": 3.918, + "step": 8875 + }, + { + "epoch": 71.2, + "grad_norm": 1.0939313173294067, + "learning_rate": 3.7749999999999996e-05, + "loss": 0.0618, + "step": 8900 + }, + { + "epoch": 72.0, + "grad_norm": 0.729832112789154, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0596, + "step": 9000 + }, + { + "epoch": 72.0, + "eval_accuracy_no_text": 0.9790404523556354, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9790404523556354, + "eval_iou_text": 0.0, + "eval_loss": 0.09549351781606674, + "eval_mean_accuracy": 0.9790404523556354, + "eval_mean_iou": 0.4895202261778177, + "eval_overall_accuracy": 0.9790404523556354, + "eval_runtime": 8.3006, + "eval_samples_per_second": 30.6, + "eval_steps_per_second": 3.855, + "step": 9000 + }, + { + "epoch": 72.8, + "grad_norm": 0.31644830107688904, + "learning_rate": 3.725e-05, + "loss": 0.0598, + "step": 9100 + }, + { + "epoch": 73.0, + "eval_accuracy_no_text": 0.9799757761570025, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9799757761570025, + "eval_iou_text": 0.0, + "eval_loss": 0.10248930752277374, + "eval_mean_accuracy": 0.9799757761570025, + "eval_mean_iou": 0.48998788807850124, + "eval_overall_accuracy": 0.9799757761570025, + "eval_runtime": 8.2995, + "eval_samples_per_second": 30.604, + "eval_steps_per_second": 3.856, + "step": 9125 + }, + { + "epoch": 73.6, + "grad_norm": 0.7597707509994507, + "learning_rate": 3.7000000000000005e-05, + "loss": 0.0623, + "step": 9200 + }, + { + "epoch": 74.0, + "eval_accuracy_no_text": 0.9836081064386921, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9836081064386921, + "eval_iou_text": 0.0, + "eval_loss": 0.09970316290855408, + "eval_mean_accuracy": 0.9836081064386921, + "eval_mean_iou": 0.49180405321934606, + "eval_overall_accuracy": 0.9836081064386921, + "eval_runtime": 8.2563, + "eval_samples_per_second": 30.764, + "eval_steps_per_second": 3.876, + "step": 9250 + }, + { + "epoch": 74.4, + "grad_norm": 1.544348955154419, + "learning_rate": 3.675e-05, + "loss": 0.0637, + "step": 9300 + }, + { + "epoch": 75.0, + "eval_accuracy_no_text": 0.9782330019994349, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9782330019994349, + "eval_iou_text": 0.0, + "eval_loss": 0.09712851047515869, + "eval_mean_accuracy": 0.9782330019994349, + "eval_mean_iou": 0.48911650099971743, + "eval_overall_accuracy": 0.9782330019994349, + "eval_runtime": 8.2372, + "eval_samples_per_second": 30.836, + "eval_steps_per_second": 3.885, + "step": 9375 + }, + { + "epoch": 75.2, + "grad_norm": 1.103569507598877, + "learning_rate": 3.6500000000000006e-05, + "loss": 0.0577, + "step": 9400 + }, + { + "epoch": 76.0, + "grad_norm": 3.0431878566741943, + "learning_rate": 3.625e-05, + "loss": 0.0627, + "step": 9500 + }, + { + "epoch": 76.0, + "eval_accuracy_no_text": 0.9806131328985758, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9806131328985758, + "eval_iou_text": 0.0, + "eval_loss": 0.09336179494857788, + "eval_mean_accuracy": 0.9806131328985758, + "eval_mean_iou": 0.4903065664492879, + "eval_overall_accuracy": 0.9806131328985758, + "eval_runtime": 8.3291, + "eval_samples_per_second": 30.496, + "eval_steps_per_second": 3.842, + "step": 9500 + }, + { + "epoch": 76.8, + "grad_norm": 0.6158745288848877, + "learning_rate": 3.6e-05, + "loss": 0.0566, + "step": 9600 + }, + { + "epoch": 77.0, + "eval_accuracy_no_text": 0.9830253983147323, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9830253983147323, + "eval_iou_text": 0.0, + "eval_loss": 0.1015550047159195, + "eval_mean_accuracy": 0.9830253983147323, + "eval_mean_iou": 0.4915126991573662, + "eval_overall_accuracy": 0.9830253983147323, + "eval_runtime": 8.1139, + "eval_samples_per_second": 31.304, + "eval_steps_per_second": 3.944, + "step": 9625 + }, + { + "epoch": 77.6, + "grad_norm": 0.6829688549041748, + "learning_rate": 3.575e-05, + "loss": 0.0585, + "step": 9700 + }, + { + "epoch": 78.0, + "eval_accuracy_no_text": 0.9816801849579057, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9816801849579057, + "eval_iou_text": 0.0, + "eval_loss": 0.09153465181589127, + "eval_mean_accuracy": 0.9816801849579057, + "eval_mean_iou": 0.4908400924789528, + "eval_overall_accuracy": 0.9816801849579057, + "eval_runtime": 8.1774, + "eval_samples_per_second": 31.061, + "eval_steps_per_second": 3.913, + "step": 9750 + }, + { + "epoch": 78.4, + "grad_norm": 0.5731038451194763, + "learning_rate": 3.55e-05, + "loss": 0.0574, + "step": 9800 + }, + { + "epoch": 79.0, + "eval_accuracy_no_text": 0.981351246471164, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.981351246471164, + "eval_iou_text": 0.0, + "eval_loss": 0.09391193091869354, + "eval_mean_accuracy": 0.981351246471164, + "eval_mean_iou": 0.490675623235582, + "eval_overall_accuracy": 0.981351246471164, + "eval_runtime": 8.219, + "eval_samples_per_second": 30.904, + "eval_steps_per_second": 3.893, + "step": 9875 + }, + { + "epoch": 79.2, + "grad_norm": 0.2290593832731247, + "learning_rate": 3.5249999999999996e-05, + "loss": 0.0561, + "step": 9900 + }, + { + "epoch": 80.0, + "grad_norm": 0.826732337474823, + "learning_rate": 3.5000000000000004e-05, + "loss": 0.0579, + "step": 10000 + }, + { + "epoch": 80.0, + "eval_accuracy_no_text": 0.9797084980594257, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9797084980594257, + "eval_iou_text": 0.0, + "eval_loss": 0.0996052473783493, + "eval_mean_accuracy": 0.9797084980594257, + "eval_mean_iou": 0.48985424902971286, + "eval_overall_accuracy": 0.9797084980594257, + "eval_runtime": 8.2761, + "eval_samples_per_second": 30.691, + "eval_steps_per_second": 3.867, + "step": 10000 + }, + { + "epoch": 80.8, + "grad_norm": 0.6924729943275452, + "learning_rate": 3.4750000000000004e-05, + "loss": 0.0564, + "step": 10100 + }, + { + "epoch": 81.0, + "eval_accuracy_no_text": 0.9801340560996555, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9801340560996555, + "eval_iou_text": 0.0, + "eval_loss": 0.09884705394506454, + "eval_mean_accuracy": 0.9801340560996555, + "eval_mean_iou": 0.49006702804982777, + "eval_overall_accuracy": 0.9801340560996555, + "eval_runtime": 8.235, + "eval_samples_per_second": 30.844, + "eval_steps_per_second": 3.886, + "step": 10125 + }, + { + "epoch": 81.6, + "grad_norm": 0.6362507939338684, + "learning_rate": 3.45e-05, + "loss": 0.0614, + "step": 10200 + }, + { + "epoch": 82.0, + "eval_accuracy_no_text": 0.9835959936864148, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9835959936864148, + "eval_iou_text": 0.0, + "eval_loss": 0.1010938361287117, + "eval_mean_accuracy": 0.9835959936864148, + "eval_mean_iou": 0.4917979968432074, + "eval_overall_accuracy": 0.9835959936864148, + "eval_runtime": 8.1531, + "eval_samples_per_second": 31.154, + "eval_steps_per_second": 3.925, + "step": 10250 + }, + { + "epoch": 82.4, + "grad_norm": 0.2358619123697281, + "learning_rate": 3.4250000000000006e-05, + "loss": 0.0556, + "step": 10300 + }, + { + "epoch": 83.0, + "eval_accuracy_no_text": 0.9816712457799561, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9816712457799561, + "eval_iou_text": 0.0, + "eval_loss": 0.09844139963388443, + "eval_mean_accuracy": 0.9816712457799561, + "eval_mean_iou": 0.49083562288997806, + "eval_overall_accuracy": 0.9816712457799561, + "eval_runtime": 8.4411, + "eval_samples_per_second": 30.091, + "eval_steps_per_second": 3.791, + "step": 10375 + }, + { + "epoch": 83.2, + "grad_norm": 0.4608982801437378, + "learning_rate": 3.4e-05, + "loss": 0.0585, + "step": 10400 + }, + { + "epoch": 84.0, + "grad_norm": 0.537953794002533, + "learning_rate": 3.3749999999999994e-05, + "loss": 0.0582, + "step": 10500 + }, + { + "epoch": 84.0, + "eval_accuracy_no_text": 0.9811251916087039, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9811251916087039, + "eval_iou_text": 0.0, + "eval_loss": 0.0963573008775711, + "eval_mean_accuracy": 0.9811251916087039, + "eval_mean_iou": 0.49056259580435196, + "eval_overall_accuracy": 0.9811251916087039, + "eval_runtime": 8.3426, + "eval_samples_per_second": 30.446, + "eval_steps_per_second": 3.836, + "step": 10500 + }, + { + "epoch": 84.8, + "grad_norm": 0.3407799303531647, + "learning_rate": 3.35e-05, + "loss": 0.057, + "step": 10600 + }, + { + "epoch": 85.0, + "eval_accuracy_no_text": 0.9821431280360285, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9821431280360285, + "eval_iou_text": 0.0, + "eval_loss": 0.09560161828994751, + "eval_mean_accuracy": 0.9821431280360285, + "eval_mean_iou": 0.4910715640180143, + "eval_overall_accuracy": 0.9821431280360285, + "eval_runtime": 8.3368, + "eval_samples_per_second": 30.467, + "eval_steps_per_second": 3.838, + "step": 10625 + }, + { + "epoch": 85.6, + "grad_norm": 0.7103127241134644, + "learning_rate": 3.325e-05, + "loss": 0.0552, + "step": 10700 + }, + { + "epoch": 86.0, + "eval_accuracy_no_text": 0.9803550099813897, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9803550099813897, + "eval_iou_text": 0.0, + "eval_loss": 0.10003025084733963, + "eval_mean_accuracy": 0.9803550099813897, + "eval_mean_iou": 0.49017750499069485, + "eval_overall_accuracy": 0.9803550099813897, + "eval_runtime": 8.3658, + "eval_samples_per_second": 30.362, + "eval_steps_per_second": 3.825, + "step": 10750 + }, + { + "epoch": 86.4, + "grad_norm": 0.22427453100681305, + "learning_rate": 3.2999999999999996e-05, + "loss": 0.059, + "step": 10800 + }, + { + "epoch": 87.0, + "eval_accuracy_no_text": 0.9828363297164279, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9828363297164279, + "eval_iou_text": 0.0, + "eval_loss": 0.09896524995565414, + "eval_mean_accuracy": 0.9828363297164279, + "eval_mean_iou": 0.49141816485821394, + "eval_overall_accuracy": 0.9828363297164279, + "eval_runtime": 8.1925, + "eval_samples_per_second": 31.004, + "eval_steps_per_second": 3.906, + "step": 10875 + }, + { + "epoch": 87.2, + "grad_norm": 0.7796413898468018, + "learning_rate": 3.2749999999999996e-05, + "loss": 0.0596, + "step": 10900 + }, + { + "epoch": 88.0, + "grad_norm": 0.44246774911880493, + "learning_rate": 3.25e-05, + "loss": 0.0547, + "step": 11000 + }, + { + "epoch": 88.0, + "eval_accuracy_no_text": 0.981057084377449, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.981057084377449, + "eval_iou_text": 0.0, + "eval_loss": 0.09589246660470963, + "eval_mean_accuracy": 0.981057084377449, + "eval_mean_iou": 0.4905285421887245, + "eval_overall_accuracy": 0.981057084377449, + "eval_runtime": 8.2104, + "eval_samples_per_second": 30.936, + "eval_steps_per_second": 3.898, + "step": 11000 + }, + { + "epoch": 88.8, + "grad_norm": 0.19626279175281525, + "learning_rate": 3.225e-05, + "loss": 0.0532, + "step": 11100 + }, + { + "epoch": 89.0, + "eval_accuracy_no_text": 0.9818968520219614, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9818968520219614, + "eval_iou_text": 0.0, + "eval_loss": 0.09797363728284836, + "eval_mean_accuracy": 0.9818968520219614, + "eval_mean_iou": 0.4909484260109807, + "eval_overall_accuracy": 0.9818968520219614, + "eval_runtime": 8.1435, + "eval_samples_per_second": 31.19, + "eval_steps_per_second": 3.929, + "step": 11125 + }, + { + "epoch": 89.6, + "grad_norm": 1.401407241821289, + "learning_rate": 3.2e-05, + "loss": 0.0578, + "step": 11200 + }, + { + "epoch": 90.0, + "eval_accuracy_no_text": 0.9829336138928118, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9829336138928118, + "eval_iou_text": 0.0, + "eval_loss": 0.09540460258722305, + "eval_mean_accuracy": 0.9829336138928118, + "eval_mean_iou": 0.4914668069464059, + "eval_overall_accuracy": 0.9829336138928118, + "eval_runtime": 7.9565, + "eval_samples_per_second": 31.924, + "eval_steps_per_second": 4.022, + "step": 11250 + }, + { + "epoch": 90.4, + "grad_norm": 0.3004220426082611, + "learning_rate": 3.1750000000000006e-05, + "loss": 0.0552, + "step": 11300 + }, + { + "epoch": 91.0, + "eval_accuracy_no_text": 0.9817473783326788, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9817473783326788, + "eval_iou_text": 0.0, + "eval_loss": 0.10131113231182098, + "eval_mean_accuracy": 0.9817473783326788, + "eval_mean_iou": 0.4908736891663394, + "eval_overall_accuracy": 0.9817473783326788, + "eval_runtime": 7.9524, + "eval_samples_per_second": 31.94, + "eval_steps_per_second": 4.024, + "step": 11375 + }, + { + "epoch": 91.2, + "grad_norm": 0.3821699321269989, + "learning_rate": 3.15e-05, + "loss": 0.0555, + "step": 11400 + }, + { + "epoch": 92.0, + "grad_norm": 0.6809713840484619, + "learning_rate": 3.1249999999999994e-05, + "loss": 0.0584, + "step": 11500 + }, + { + "epoch": 92.0, + "eval_accuracy_no_text": 0.9802022796399172, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9802022796399172, + "eval_iou_text": 0.0, + "eval_loss": 0.09861049801111221, + "eval_mean_accuracy": 0.9802022796399172, + "eval_mean_iou": 0.4901011398199586, + "eval_overall_accuracy": 0.9802022796399172, + "eval_runtime": 8.4024, + "eval_samples_per_second": 30.23, + "eval_steps_per_second": 3.808, + "step": 11500 + }, + { + "epoch": 92.8, + "grad_norm": 2.616292953491211, + "learning_rate": 3.1e-05, + "loss": 0.0528, + "step": 11600 + }, + { + "epoch": 93.0, + "eval_accuracy_no_text": 0.9805875116859397, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9805875116859397, + "eval_iou_text": 0.0, + "eval_loss": 0.10085444152355194, + "eval_mean_accuracy": 0.9805875116859397, + "eval_mean_iou": 0.49029375584296986, + "eval_overall_accuracy": 0.9805875116859397, + "eval_runtime": 8.1295, + "eval_samples_per_second": 31.244, + "eval_steps_per_second": 3.936, + "step": 11625 + }, + { + "epoch": 93.6, + "grad_norm": 0.6591981053352356, + "learning_rate": 3.075e-05, + "loss": 0.0566, + "step": 11700 + }, + { + "epoch": 94.0, + "eval_accuracy_no_text": 0.9802413096194776, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9802413096194776, + "eval_iou_text": 0.0, + "eval_loss": 0.098316490650177, + "eval_mean_accuracy": 0.9802413096194776, + "eval_mean_iou": 0.4901206548097388, + "eval_overall_accuracy": 0.9802413096194776, + "eval_runtime": 8.5005, + "eval_samples_per_second": 29.881, + "eval_steps_per_second": 3.764, + "step": 11750 + }, + { + "epoch": 94.4, + "grad_norm": 1.771093487739563, + "learning_rate": 3.05e-05, + "loss": 0.0541, + "step": 11800 + }, + { + "epoch": 95.0, + "eval_accuracy_no_text": 0.9805775257269292, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9805775257269292, + "eval_iou_text": 0.0, + "eval_loss": 0.10317344218492508, + "eval_mean_accuracy": 0.9805775257269292, + "eval_mean_iou": 0.4902887628634646, + "eval_overall_accuracy": 0.9805775257269292, + "eval_runtime": 8.3723, + "eval_samples_per_second": 30.338, + "eval_steps_per_second": 3.822, + "step": 11875 + }, + { + "epoch": 95.2, + "grad_norm": 1.3780484199523926, + "learning_rate": 3.025e-05, + "loss": 0.0534, + "step": 11900 + }, + { + "epoch": 96.0, + "grad_norm": 0.5944967865943909, + "learning_rate": 3e-05, + "loss": 0.0577, + "step": 12000 + }, + { + "epoch": 96.0, + "eval_accuracy_no_text": 0.9799943357513698, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9799943357513698, + "eval_iou_text": 0.0, + "eval_loss": 0.10296148806810379, + "eval_mean_accuracy": 0.9799943357513698, + "eval_mean_iou": 0.4899971678756849, + "eval_overall_accuracy": 0.9799943357513698, + "eval_runtime": 8.2647, + "eval_samples_per_second": 30.733, + "eval_steps_per_second": 3.872, + "step": 12000 + }, + { + "epoch": 96.8, + "grad_norm": 0.368669718503952, + "learning_rate": 2.975e-05, + "loss": 0.0567, + "step": 12100 + }, + { + "epoch": 97.0, + "eval_accuracy_no_text": 0.9795809070789898, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9795809070789898, + "eval_iou_text": 0.0, + "eval_loss": 0.10392536222934723, + "eval_mean_accuracy": 0.9795809070789898, + "eval_mean_iou": 0.4897904535394949, + "eval_overall_accuracy": 0.9795809070789898, + "eval_runtime": 8.318, + "eval_samples_per_second": 30.536, + "eval_steps_per_second": 3.847, + "step": 12125 + }, + { + "epoch": 97.6, + "grad_norm": 0.6572363376617432, + "learning_rate": 2.95e-05, + "loss": 0.056, + "step": 12200 + }, + { + "epoch": 98.0, + "eval_accuracy_no_text": 0.9788773871281321, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9788773871281321, + "eval_iou_text": 0.0, + "eval_loss": 0.10201127082109451, + "eval_mean_accuracy": 0.9788773871281321, + "eval_mean_iou": 0.48943869356406605, + "eval_overall_accuracy": 0.9788773871281321, + "eval_runtime": 8.2133, + "eval_samples_per_second": 30.925, + "eval_steps_per_second": 3.896, + "step": 12250 + }, + { + "epoch": 98.4, + "grad_norm": 0.1991199553012848, + "learning_rate": 2.9249999999999996e-05, + "loss": 0.0517, + "step": 12300 + }, + { + "epoch": 99.0, + "eval_accuracy_no_text": 0.981906671825248, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.981906671825248, + "eval_iou_text": 0.0, + "eval_loss": 0.10037293285131454, + "eval_mean_accuracy": 0.981906671825248, + "eval_mean_iou": 0.490953335912624, + "eval_overall_accuracy": 0.981906671825248, + "eval_runtime": 8.1469, + "eval_samples_per_second": 31.178, + "eval_steps_per_second": 3.928, + "step": 12375 + }, + { + "epoch": 99.2, + "grad_norm": 0.3791349530220032, + "learning_rate": 2.9e-05, + "loss": 0.0562, + "step": 12400 + }, + { + "epoch": 100.0, + "grad_norm": 0.8666055798530579, + "learning_rate": 2.875e-05, + "loss": 0.051, + "step": 12500 + }, + { + "epoch": 100.0, + "eval_accuracy_no_text": 0.9826272227378127, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9826272227378127, + "eval_iou_text": 0.0, + "eval_loss": 0.09898550808429718, + "eval_mean_accuracy": 0.9826272227378127, + "eval_mean_iou": 0.49131361136890633, + "eval_overall_accuracy": 0.9826272227378127, + "eval_runtime": 8.2239, + "eval_samples_per_second": 30.886, + "eval_steps_per_second": 3.891, + "step": 12500 + }, + { + "epoch": 100.8, + "grad_norm": 1.307895541191101, + "learning_rate": 2.8500000000000005e-05, + "loss": 0.0523, + "step": 12600 + }, + { + "epoch": 101.0, + "eval_accuracy_no_text": 0.9826056889559862, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9826056889559862, + "eval_iou_text": 0.0, + "eval_loss": 0.09835489839315414, + "eval_mean_accuracy": 0.9826056889559862, + "eval_mean_iou": 0.4913028444779931, + "eval_overall_accuracy": 0.9826056889559862, + "eval_runtime": 8.309, + "eval_samples_per_second": 30.569, + "eval_steps_per_second": 3.851, + "step": 12625 + }, + { + "epoch": 101.6, + "grad_norm": 0.38712045550346375, + "learning_rate": 2.825e-05, + "loss": 0.0521, + "step": 12700 + }, + { + "epoch": 102.0, + "eval_accuracy_no_text": 0.9799104769574827, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9799104769574827, + "eval_iou_text": 0.0, + "eval_loss": 0.098680280148983, + "eval_mean_accuracy": 0.9799104769574827, + "eval_mean_iou": 0.48995523847874134, + "eval_overall_accuracy": 0.9799104769574827, + "eval_runtime": 8.3754, + "eval_samples_per_second": 30.327, + "eval_steps_per_second": 3.821, + "step": 12750 + }, + { + "epoch": 102.4, + "grad_norm": 0.2037675976753235, + "learning_rate": 2.7999999999999996e-05, + "loss": 0.0518, + "step": 12800 + }, + { + "epoch": 103.0, + "eval_accuracy_no_text": 0.9818977160317262, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9818977160317262, + "eval_iou_text": 0.0, + "eval_loss": 0.10650473833084106, + "eval_mean_accuracy": 0.9818977160317262, + "eval_mean_iou": 0.4909488580158631, + "eval_overall_accuracy": 0.9818977160317262, + "eval_runtime": 8.2263, + "eval_samples_per_second": 30.876, + "eval_steps_per_second": 3.89, + "step": 12875 + }, + { + "epoch": 103.2, + "grad_norm": 0.6227338314056396, + "learning_rate": 2.7750000000000004e-05, + "loss": 0.0551, + "step": 12900 + }, + { + "epoch": 104.0, + "grad_norm": 0.6191376447677612, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0521, + "step": 13000 + }, + { + "epoch": 104.0, + "eval_accuracy_no_text": 0.9808840830376523, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9808840830376523, + "eval_iou_text": 0.0, + "eval_loss": 0.10519281029701233, + "eval_mean_accuracy": 0.9808840830376523, + "eval_mean_iou": 0.49044204151882614, + "eval_overall_accuracy": 0.9808840830376523, + "eval_runtime": 8.3514, + "eval_samples_per_second": 30.414, + "eval_steps_per_second": 3.832, + "step": 13000 + }, + { + "epoch": 104.8, + "grad_norm": 0.30404117703437805, + "learning_rate": 2.725e-05, + "loss": 0.0556, + "step": 13100 + }, + { + "epoch": 105.0, + "eval_accuracy_no_text": 0.9817842316722551, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9817842316722551, + "eval_iou_text": 0.0, + "eval_loss": 0.10063216090202332, + "eval_mean_accuracy": 0.9817842316722551, + "eval_mean_iou": 0.4908921158361276, + "eval_overall_accuracy": 0.9817842316722551, + "eval_runtime": 8.2778, + "eval_samples_per_second": 30.685, + "eval_steps_per_second": 3.866, + "step": 13125 + }, + { + "epoch": 105.6, + "grad_norm": 0.7706215977668762, + "learning_rate": 2.7e-05, + "loss": 0.0544, + "step": 13200 + }, + { + "epoch": 106.0, + "eval_accuracy_no_text": 0.9808936702229253, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9808936702229253, + "eval_iou_text": 0.0, + "eval_loss": 0.10449624061584473, + "eval_mean_accuracy": 0.9808936702229253, + "eval_mean_iou": 0.49044683511146264, + "eval_overall_accuracy": 0.9808936702229253, + "eval_runtime": 8.4473, + "eval_samples_per_second": 30.069, + "eval_steps_per_second": 3.788, + "step": 13250 + }, + { + "epoch": 106.4, + "grad_norm": 0.3159944713115692, + "learning_rate": 2.675e-05, + "loss": 0.0549, + "step": 13300 + }, + { + "epoch": 107.0, + "eval_accuracy_no_text": 0.9823117428647125, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9823117428647125, + "eval_iou_text": 0.0, + "eval_loss": 0.101432666182518, + "eval_mean_accuracy": 0.9823117428647125, + "eval_mean_iou": 0.49115587143235623, + "eval_overall_accuracy": 0.9823117428647125, + "eval_runtime": 8.2997, + "eval_samples_per_second": 30.604, + "eval_steps_per_second": 3.856, + "step": 13375 + }, + { + "epoch": 107.2, + "grad_norm": 0.7384445667266846, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0506, + "step": 13400 + }, + { + "epoch": 108.0, + "grad_norm": 1.998416543006897, + "learning_rate": 2.625e-05, + "loss": 0.054, + "step": 13500 + }, + { + "epoch": 108.0, + "eval_accuracy_no_text": 0.9808719370542301, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9808719370542301, + "eval_iou_text": 0.0, + "eval_loss": 0.10262825340032578, + "eval_mean_accuracy": 0.9808719370542301, + "eval_mean_iou": 0.4904359685271151, + "eval_overall_accuracy": 0.9808719370542301, + "eval_runtime": 8.2241, + "eval_samples_per_second": 30.885, + "eval_steps_per_second": 3.891, + "step": 13500 + }, + { + "epoch": 108.8, + "grad_norm": 0.29045435786247253, + "learning_rate": 2.5999999999999995e-05, + "loss": 0.0526, + "step": 13600 + }, + { + "epoch": 109.0, + "eval_accuracy_no_text": 0.9836949560356109, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9836949560356109, + "eval_iou_text": 0.0, + "eval_loss": 0.10518956929445267, + "eval_mean_accuracy": 0.9836949560356109, + "eval_mean_iou": 0.49184747801780543, + "eval_overall_accuracy": 0.9836949560356109, + "eval_runtime": 8.301, + "eval_samples_per_second": 30.599, + "eval_steps_per_second": 3.855, + "step": 13625 + }, + { + "epoch": 109.6, + "grad_norm": 0.6301660537719727, + "learning_rate": 2.575e-05, + "loss": 0.0524, + "step": 13700 + }, + { + "epoch": 110.0, + "eval_accuracy_no_text": 0.9830132357157378, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9830132357157378, + "eval_iou_text": 0.0, + "eval_loss": 0.09869378805160522, + "eval_mean_accuracy": 0.9830132357157378, + "eval_mean_iou": 0.4915066178578689, + "eval_overall_accuracy": 0.9830132357157378, + "eval_runtime": 8.2194, + "eval_samples_per_second": 30.903, + "eval_steps_per_second": 3.893, + "step": 13750 + }, + { + "epoch": 110.4, + "grad_norm": 5.493782043457031, + "learning_rate": 2.55e-05, + "loss": 0.0487, + "step": 13800 + }, + { + "epoch": 111.0, + "eval_accuracy_no_text": 0.9800823152072116, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9800823152072116, + "eval_iou_text": 0.0, + "eval_loss": 0.10276078432798386, + "eval_mean_accuracy": 0.9800823152072116, + "eval_mean_iou": 0.4900411576036058, + "eval_overall_accuracy": 0.9800823152072116, + "eval_runtime": 11.9277, + "eval_samples_per_second": 21.295, + "eval_steps_per_second": 2.683, + "step": 13875 + }, + { + "epoch": 111.2, + "grad_norm": 0.2871710956096649, + "learning_rate": 2.5250000000000004e-05, + "loss": 0.0536, + "step": 13900 + }, + { + "epoch": 112.0, + "grad_norm": 0.9292570948600769, + "learning_rate": 2.5000000000000005e-05, + "loss": 0.054, + "step": 14000 + }, + { + "epoch": 112.0, + "eval_accuracy_no_text": 0.9829168155491185, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9829168155491185, + "eval_iou_text": 0.0, + "eval_loss": 0.1070096418261528, + "eval_mean_accuracy": 0.9829168155491185, + "eval_mean_iou": 0.49145840777455924, + "eval_overall_accuracy": 0.9829168155491185, + "eval_runtime": 8.2923, + "eval_samples_per_second": 30.631, + "eval_steps_per_second": 3.859, + "step": 14000 + }, + { + "epoch": 112.8, + "grad_norm": 0.38912442326545715, + "learning_rate": 2.4750000000000002e-05, + "loss": 0.0531, + "step": 14100 + }, + { + "epoch": 113.0, + "eval_accuracy_no_text": 0.9806236837870479, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9806236837870479, + "eval_iou_text": 0.0, + "eval_loss": 0.10462525486946106, + "eval_mean_accuracy": 0.9806236837870479, + "eval_mean_iou": 0.49031184189352395, + "eval_overall_accuracy": 0.9806236837870479, + "eval_runtime": 8.2827, + "eval_samples_per_second": 30.666, + "eval_steps_per_second": 3.863, + "step": 14125 + }, + { + "epoch": 113.6, + "grad_norm": 0.5761541128158569, + "learning_rate": 2.45e-05, + "loss": 0.0478, + "step": 14200 + }, + { + "epoch": 114.0, + "eval_accuracy_no_text": 0.9830612214888201, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9830612214888201, + "eval_iou_text": 0.0, + "eval_loss": 0.10361193120479584, + "eval_mean_accuracy": 0.9830612214888201, + "eval_mean_iou": 0.49153061074441007, + "eval_overall_accuracy": 0.9830612214888201, + "eval_runtime": 8.3565, + "eval_samples_per_second": 30.396, + "eval_steps_per_second": 3.829, + "step": 14250 + }, + { + "epoch": 114.4, + "grad_norm": 0.43338820338249207, + "learning_rate": 2.425e-05, + "loss": 0.0511, + "step": 14300 + }, + { + "epoch": 115.0, + "eval_accuracy_no_text": 0.9807316019297658, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9807316019297658, + "eval_iou_text": 0.0, + "eval_loss": 0.10400665551424026, + "eval_mean_accuracy": 0.9807316019297658, + "eval_mean_iou": 0.4903658009648829, + "eval_overall_accuracy": 0.9807316019297658, + "eval_runtime": 8.1898, + "eval_samples_per_second": 31.014, + "eval_steps_per_second": 3.907, + "step": 14375 + }, + { + "epoch": 115.2, + "grad_norm": 0.4137003421783447, + "learning_rate": 2.4000000000000004e-05, + "loss": 0.0516, + "step": 14400 + }, + { + "epoch": 116.0, + "grad_norm": 0.34559884667396545, + "learning_rate": 2.375e-05, + "loss": 0.05, + "step": 14500 + }, + { + "epoch": 116.0, + "eval_accuracy_no_text": 0.9825900370867884, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9825900370867884, + "eval_iou_text": 0.0, + "eval_loss": 0.10383593291044235, + "eval_mean_accuracy": 0.9825900370867884, + "eval_mean_iou": 0.4912950185433942, + "eval_overall_accuracy": 0.9825900370867884, + "eval_runtime": 8.3311, + "eval_samples_per_second": 30.488, + "eval_steps_per_second": 3.841, + "step": 14500 + }, + { + "epoch": 116.8, + "grad_norm": 0.5693605542182922, + "learning_rate": 2.3500000000000002e-05, + "loss": 0.0522, + "step": 14600 + }, + { + "epoch": 117.0, + "eval_accuracy_no_text": 0.9813530907997, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9813530907997, + "eval_iou_text": 0.0, + "eval_loss": 0.10510838031768799, + "eval_mean_accuracy": 0.9813530907997, + "eval_mean_iou": 0.49067654539985, + "eval_overall_accuracy": 0.9813530907997, + "eval_runtime": 8.3739, + "eval_samples_per_second": 30.332, + "eval_steps_per_second": 3.821, + "step": 14625 + }, + { + "epoch": 117.6, + "grad_norm": 3.1022820472717285, + "learning_rate": 2.3250000000000003e-05, + "loss": 0.0492, + "step": 14700 + }, + { + "epoch": 118.0, + "eval_accuracy_no_text": 0.9816835745346747, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9816835745346747, + "eval_iou_text": 0.0, + "eval_loss": 0.10122588276863098, + "eval_mean_accuracy": 0.9816835745346747, + "eval_mean_iou": 0.49084178726733735, + "eval_overall_accuracy": 0.9816835745346747, + "eval_runtime": 8.251, + "eval_samples_per_second": 30.784, + "eval_steps_per_second": 3.878, + "step": 14750 + }, + { + "epoch": 118.4, + "grad_norm": 6.673212051391602, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0526, + "step": 14800 + }, + { + "epoch": 119.0, + "eval_accuracy_no_text": 0.9810688315871336, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9810688315871336, + "eval_iou_text": 0.0, + "eval_loss": 0.10411898046731949, + "eval_mean_accuracy": 0.9810688315871336, + "eval_mean_iou": 0.4905344157935668, + "eval_overall_accuracy": 0.9810688315871336, + "eval_runtime": 8.5831, + "eval_samples_per_second": 29.593, + "eval_steps_per_second": 3.728, + "step": 14875 + }, + { + "epoch": 119.2, + "grad_norm": 1.0424220561981201, + "learning_rate": 2.275e-05, + "loss": 0.0483, + "step": 14900 + }, + { + "epoch": 120.0, + "grad_norm": 0.8458845019340515, + "learning_rate": 2.25e-05, + "loss": 0.0483, + "step": 15000 + }, + { + "epoch": 120.0, + "eval_accuracy_no_text": 0.9836277958919825, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9836277958919825, + "eval_iou_text": 0.0, + "eval_loss": 0.10483555495738983, + "eval_mean_accuracy": 0.9836277958919825, + "eval_mean_iou": 0.49181389794599123, + "eval_overall_accuracy": 0.9836277958919825, + "eval_runtime": 8.2865, + "eval_samples_per_second": 30.652, + "eval_steps_per_second": 3.862, + "step": 15000 + }, + { + "epoch": 120.8, + "grad_norm": 0.9494897127151489, + "learning_rate": 2.2250000000000002e-05, + "loss": 0.0496, + "step": 15100 + }, + { + "epoch": 121.0, + "eval_accuracy_no_text": 0.9807214830461761, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9807214830461761, + "eval_iou_text": 0.0, + "eval_loss": 0.10666216909885406, + "eval_mean_accuracy": 0.9807214830461761, + "eval_mean_iou": 0.49036074152308806, + "eval_overall_accuracy": 0.9807214830461761, + "eval_runtime": 8.3274, + "eval_samples_per_second": 30.502, + "eval_steps_per_second": 3.843, + "step": 15125 + }, + { + "epoch": 121.6, + "grad_norm": 0.33741095662117004, + "learning_rate": 2.2e-05, + "loss": 0.0486, + "step": 15200 + }, + { + "epoch": 122.0, + "eval_accuracy_no_text": 0.9799435585621249, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9799435585621249, + "eval_iou_text": 0.0, + "eval_loss": 0.10895564407110214, + "eval_mean_accuracy": 0.9799435585621249, + "eval_mean_iou": 0.48997177928106245, + "eval_overall_accuracy": 0.9799435585621249, + "eval_runtime": 8.4498, + "eval_samples_per_second": 30.06, + "eval_steps_per_second": 3.787, + "step": 15250 + }, + { + "epoch": 122.4, + "grad_norm": 0.38935738801956177, + "learning_rate": 2.175e-05, + "loss": 0.0539, + "step": 15300 + }, + { + "epoch": 123.0, + "eval_accuracy_no_text": 0.9796546303737149, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9796546303737149, + "eval_iou_text": 0.0, + "eval_loss": 0.10292521864175797, + "eval_mean_accuracy": 0.9796546303737149, + "eval_mean_iou": 0.48982731518685746, + "eval_overall_accuracy": 0.9796546303737149, + "eval_runtime": 8.2096, + "eval_samples_per_second": 30.939, + "eval_steps_per_second": 3.898, + "step": 15375 + }, + { + "epoch": 123.2, + "grad_norm": 0.4297466278076172, + "learning_rate": 2.15e-05, + "loss": 0.0498, + "step": 15400 + }, + { + "epoch": 124.0, + "grad_norm": 0.7218087911605835, + "learning_rate": 2.1249999999999998e-05, + "loss": 0.0507, + "step": 15500 + }, + { + "epoch": 124.0, + "eval_accuracy_no_text": 0.9803565552296226, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9803565552296226, + "eval_iou_text": 0.0, + "eval_loss": 0.10427888482809067, + "eval_mean_accuracy": 0.9803565552296226, + "eval_mean_iou": 0.4901782776148113, + "eval_overall_accuracy": 0.9803565552296226, + "eval_runtime": 8.4272, + "eval_samples_per_second": 30.14, + "eval_steps_per_second": 3.797, + "step": 15500 + }, + { + "epoch": 124.8, + "grad_norm": 0.3873758316040039, + "learning_rate": 2.1e-05, + "loss": 0.0482, + "step": 15600 + }, + { + "epoch": 125.0, + "eval_accuracy_no_text": 0.9791164519837788, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9791164519837788, + "eval_iou_text": 0.0, + "eval_loss": 0.1063632071018219, + "eval_mean_accuracy": 0.9791164519837788, + "eval_mean_iou": 0.4895582259918894, + "eval_overall_accuracy": 0.9791164519837788, + "eval_runtime": 8.3041, + "eval_samples_per_second": 30.587, + "eval_steps_per_second": 3.854, + "step": 15625 + }, + { + "epoch": 125.6, + "grad_norm": 0.4336509704589844, + "learning_rate": 2.075e-05, + "loss": 0.0487, + "step": 15700 + }, + { + "epoch": 126.0, + "eval_accuracy_no_text": 0.981322318759621, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.981322318759621, + "eval_iou_text": 0.0, + "eval_loss": 0.10696995258331299, + "eval_mean_accuracy": 0.981322318759621, + "eval_mean_iou": 0.4906611593798105, + "eval_overall_accuracy": 0.981322318759621, + "eval_runtime": 8.3258, + "eval_samples_per_second": 30.508, + "eval_steps_per_second": 3.843, + "step": 15750 + }, + { + "epoch": 126.4, + "grad_norm": 0.6076303124427795, + "learning_rate": 2.0499999999999997e-05, + "loss": 0.0492, + "step": 15800 + }, + { + "epoch": 127.0, + "eval_accuracy_no_text": 0.9836348408946788, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9836348408946788, + "eval_iou_text": 0.0, + "eval_loss": 0.11010294407606125, + "eval_mean_accuracy": 0.9836348408946788, + "eval_mean_iou": 0.4918174204473394, + "eval_overall_accuracy": 0.9836348408946788, + "eval_runtime": 8.4835, + "eval_samples_per_second": 29.941, + "eval_steps_per_second": 3.772, + "step": 15875 + }, + { + "epoch": 127.2, + "grad_norm": 0.2879132032394409, + "learning_rate": 2.0249999999999998e-05, + "loss": 0.0507, + "step": 15900 + }, + { + "epoch": 128.0, + "grad_norm": 0.3296537697315216, + "learning_rate": 1.9999999999999998e-05, + "loss": 0.0479, + "step": 16000 + }, + { + "epoch": 128.0, + "eval_accuracy_no_text": 0.9799868587437913, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9799868587437913, + "eval_iou_text": 0.0, + "eval_loss": 0.10447646677494049, + "eval_mean_accuracy": 0.9799868587437913, + "eval_mean_iou": 0.48999342937189566, + "eval_overall_accuracy": 0.9799868587437913, + "eval_runtime": 8.3118, + "eval_samples_per_second": 30.559, + "eval_steps_per_second": 3.85, + "step": 16000 + }, + { + "epoch": 128.8, + "grad_norm": 0.43510717153549194, + "learning_rate": 1.9749999999999996e-05, + "loss": 0.0514, + "step": 16100 + }, + { + "epoch": 129.0, + "eval_accuracy_no_text": 0.9819989048676233, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9819989048676233, + "eval_iou_text": 0.0, + "eval_loss": 0.10433124750852585, + "eval_mean_accuracy": 0.9819989048676233, + "eval_mean_iou": 0.49099945243381166, + "eval_overall_accuracy": 0.9819989048676233, + "eval_runtime": 8.1845, + "eval_samples_per_second": 31.034, + "eval_steps_per_second": 3.91, + "step": 16125 + }, + { + "epoch": 129.6, + "grad_norm": 0.9808353781700134, + "learning_rate": 1.9499999999999996e-05, + "loss": 0.0505, + "step": 16200 + }, + { + "epoch": 130.0, + "eval_accuracy_no_text": 0.9821372295078277, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9821372295078277, + "eval_iou_text": 0.0, + "eval_loss": 0.1069905087351799, + "eval_mean_accuracy": 0.9821372295078277, + "eval_mean_iou": 0.49106861475391383, + "eval_overall_accuracy": 0.9821372295078277, + "eval_runtime": 8.336, + "eval_samples_per_second": 30.47, + "eval_steps_per_second": 3.839, + "step": 16250 + }, + { + "epoch": 130.4, + "grad_norm": 0.44676822423934937, + "learning_rate": 1.9250000000000004e-05, + "loss": 0.0491, + "step": 16300 + }, + { + "epoch": 131.0, + "eval_accuracy_no_text": 0.9810748464243413, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9810748464243413, + "eval_iou_text": 0.0, + "eval_loss": 0.10192025452852249, + "eval_mean_accuracy": 0.9810748464243413, + "eval_mean_iou": 0.49053742321217064, + "eval_overall_accuracy": 0.9810748464243413, + "eval_runtime": 8.1762, + "eval_samples_per_second": 31.066, + "eval_steps_per_second": 3.914, + "step": 16375 + }, + { + "epoch": 131.2, + "grad_norm": 0.2674311101436615, + "learning_rate": 1.9000000000000008e-05, + "loss": 0.0479, + "step": 16400 + }, + { + "epoch": 132.0, + "grad_norm": 0.18870976567268372, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.0477, + "step": 16500 + }, + { + "epoch": 132.0, + "eval_accuracy_no_text": 0.9808076347890541, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9808076347890541, + "eval_iou_text": 0.0, + "eval_loss": 0.10085348039865494, + "eval_mean_accuracy": 0.9808076347890541, + "eval_mean_iou": 0.49040381739452704, + "eval_overall_accuracy": 0.9808076347890541, + "eval_runtime": 8.2705, + "eval_samples_per_second": 30.712, + "eval_steps_per_second": 3.869, + "step": 16500 + }, + { + "epoch": 132.8, + "grad_norm": 1.6924952268600464, + "learning_rate": 1.8500000000000006e-05, + "loss": 0.0476, + "step": 16600 + }, + { + "epoch": 133.0, + "eval_accuracy_no_text": 0.9818020435658646, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9818020435658646, + "eval_iou_text": 0.0, + "eval_loss": 0.10147301107645035, + "eval_mean_accuracy": 0.9818020435658646, + "eval_mean_iou": 0.4909010217829323, + "eval_overall_accuracy": 0.9818020435658646, + "eval_runtime": 8.2767, + "eval_samples_per_second": 30.688, + "eval_steps_per_second": 3.866, + "step": 16625 + }, + { + "epoch": 133.6, + "grad_norm": 0.26980453729629517, + "learning_rate": 1.8250000000000003e-05, + "loss": 0.0462, + "step": 16700 + }, + { + "epoch": 134.0, + "eval_accuracy_no_text": 0.9804314416144155, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9804314416144155, + "eval_iou_text": 0.0, + "eval_loss": 0.10597039759159088, + "eval_mean_accuracy": 0.9804314416144155, + "eval_mean_iou": 0.49021572080720777, + "eval_overall_accuracy": 0.9804314416144155, + "eval_runtime": 8.2302, + "eval_samples_per_second": 30.862, + "eval_steps_per_second": 3.888, + "step": 16750 + }, + { + "epoch": 134.4, + "grad_norm": 0.5653632283210754, + "learning_rate": 1.8000000000000004e-05, + "loss": 0.0485, + "step": 16800 + }, + { + "epoch": 135.0, + "eval_accuracy_no_text": 0.9795444691287234, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9795444691287234, + "eval_iou_text": 0.0, + "eval_loss": 0.10182661563158035, + "eval_mean_accuracy": 0.9795444691287234, + "eval_mean_iou": 0.4897722345643617, + "eval_overall_accuracy": 0.9795444691287234, + "eval_runtime": 8.1917, + "eval_samples_per_second": 31.007, + "eval_steps_per_second": 3.906, + "step": 16875 + }, + { + "epoch": 135.2, + "grad_norm": 0.603354275226593, + "learning_rate": 1.775e-05, + "loss": 0.0518, + "step": 16900 + }, + { + "epoch": 136.0, + "grad_norm": 0.7962479591369629, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.0483, + "step": 17000 + }, + { + "epoch": 136.0, + "eval_accuracy_no_text": 0.9795505836593654, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9795505836593654, + "eval_iou_text": 0.0, + "eval_loss": 0.10557083040475845, + "eval_mean_accuracy": 0.9795505836593654, + "eval_mean_iou": 0.4897752918296827, + "eval_overall_accuracy": 0.9795505836593654, + "eval_runtime": 8.2516, + "eval_samples_per_second": 30.782, + "eval_steps_per_second": 3.878, + "step": 17000 + }, + { + "epoch": 136.8, + "grad_norm": 0.2562846839427948, + "learning_rate": 1.725e-05, + "loss": 0.0503, + "step": 17100 + }, + { + "epoch": 137.0, + "eval_accuracy_no_text": 0.9820201229535742, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9820201229535742, + "eval_iou_text": 0.0, + "eval_loss": 0.10440485179424286, + "eval_mean_accuracy": 0.9820201229535742, + "eval_mean_iou": 0.4910100614767871, + "eval_overall_accuracy": 0.9820201229535742, + "eval_runtime": 8.1927, + "eval_samples_per_second": 31.003, + "eval_steps_per_second": 3.906, + "step": 17125 + }, + { + "epoch": 137.6, + "grad_norm": 0.45571181178092957, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0514, + "step": 17200 + }, + { + "epoch": 138.0, + "eval_accuracy_no_text": 0.9812987744935345, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9812987744935345, + "eval_iou_text": 0.0, + "eval_loss": 0.10530504584312439, + "eval_mean_accuracy": 0.9812987744935345, + "eval_mean_iou": 0.49064938724676727, + "eval_overall_accuracy": 0.9812987744935345, + "eval_runtime": 8.2797, + "eval_samples_per_second": 30.678, + "eval_steps_per_second": 3.865, + "step": 17250 + }, + { + "epoch": 138.4, + "grad_norm": 0.21188737452030182, + "learning_rate": 1.675e-05, + "loss": 0.0446, + "step": 17300 + }, + { + "epoch": 139.0, + "eval_accuracy_no_text": 0.9807978980636295, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9807978980636295, + "eval_iou_text": 0.0, + "eval_loss": 0.10513133555650711, + "eval_mean_accuracy": 0.9807978980636295, + "eval_mean_iou": 0.49039894903181475, + "eval_overall_accuracy": 0.9807978980636295, + "eval_runtime": 8.251, + "eval_samples_per_second": 30.784, + "eval_steps_per_second": 3.878, + "step": 17375 + }, + { + "epoch": 139.2, + "grad_norm": 0.8840853571891785, + "learning_rate": 1.65e-05, + "loss": 0.0504, + "step": 17400 + }, + { + "epoch": 140.0, + "grad_norm": 0.943657398223877, + "learning_rate": 1.625e-05, + "loss": 0.047, + "step": 17500 + }, + { + "epoch": 140.0, + "eval_accuracy_no_text": 0.9806808579716656, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9806808579716656, + "eval_iou_text": 0.0, + "eval_loss": 0.1071048378944397, + "eval_mean_accuracy": 0.9806808579716656, + "eval_mean_iou": 0.4903404289858328, + "eval_overall_accuracy": 0.9806808579716656, + "eval_runtime": 8.5372, + "eval_samples_per_second": 29.752, + "eval_steps_per_second": 3.748, + "step": 17500 + }, + { + "epoch": 140.8, + "grad_norm": 0.7239159941673279, + "learning_rate": 1.6e-05, + "loss": 0.0467, + "step": 17600 + }, + { + "epoch": 141.0, + "eval_accuracy_no_text": 0.9828105755792126, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9828105755792126, + "eval_iou_text": 0.0, + "eval_loss": 0.10849156975746155, + "eval_mean_accuracy": 0.9828105755792126, + "eval_mean_iou": 0.4914052877896063, + "eval_overall_accuracy": 0.9828105755792126, + "eval_runtime": 8.3714, + "eval_samples_per_second": 30.341, + "eval_steps_per_second": 3.823, + "step": 17625 + }, + { + "epoch": 141.6, + "grad_norm": 0.34657156467437744, + "learning_rate": 1.5749999999999997e-05, + "loss": 0.0476, + "step": 17700 + }, + { + "epoch": 142.0, + "eval_accuracy_no_text": 0.9832416998361954, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9832416998361954, + "eval_iou_text": 0.0, + "eval_loss": 0.1076694130897522, + "eval_mean_accuracy": 0.9832416998361954, + "eval_mean_iou": 0.4916208499180977, + "eval_overall_accuracy": 0.9832416998361954, + "eval_runtime": 8.3324, + "eval_samples_per_second": 30.483, + "eval_steps_per_second": 3.84, + "step": 17750 + }, + { + "epoch": 142.4, + "grad_norm": 1.23021399974823, + "learning_rate": 1.55e-05, + "loss": 0.0472, + "step": 17800 + }, + { + "epoch": 143.0, + "eval_accuracy_no_text": 0.9818141397025696, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9818141397025696, + "eval_iou_text": 0.0, + "eval_loss": 0.11224336177110672, + "eval_mean_accuracy": 0.9818141397025696, + "eval_mean_iou": 0.4909070698512848, + "eval_overall_accuracy": 0.9818141397025696, + "eval_runtime": 8.346, + "eval_samples_per_second": 30.434, + "eval_steps_per_second": 3.834, + "step": 17875 + }, + { + "epoch": 143.2, + "grad_norm": 0.43515679240226746, + "learning_rate": 1.525e-05, + "loss": 0.0474, + "step": 17900 + }, + { + "epoch": 144.0, + "grad_norm": 0.26305532455444336, + "learning_rate": 1.5e-05, + "loss": 0.0477, + "step": 18000 + }, + { + "epoch": 144.0, + "eval_accuracy_no_text": 0.9808217912567363, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9808217912567363, + "eval_iou_text": 0.0, + "eval_loss": 0.10432812571525574, + "eval_mean_accuracy": 0.9808217912567363, + "eval_mean_iou": 0.49041089562836815, + "eval_overall_accuracy": 0.9808217912567363, + "eval_runtime": 8.2922, + "eval_samples_per_second": 30.631, + "eval_steps_per_second": 3.859, + "step": 18000 + }, + { + "epoch": 144.8, + "grad_norm": 0.24621394276618958, + "learning_rate": 1.4749999999999998e-05, + "loss": 0.0467, + "step": 18100 + }, + { + "epoch": 145.0, + "eval_accuracy_no_text": 0.9796973988570646, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9796973988570646, + "eval_iou_text": 0.0, + "eval_loss": 0.10507868975400925, + "eval_mean_accuracy": 0.9796973988570646, + "eval_mean_iou": 0.4898486994285323, + "eval_overall_accuracy": 0.9796973988570646, + "eval_runtime": 8.3989, + "eval_samples_per_second": 30.242, + "eval_steps_per_second": 3.81, + "step": 18125 + }, + { + "epoch": 145.6, + "grad_norm": 0.8053872585296631, + "learning_rate": 1.45e-05, + "loss": 0.0493, + "step": 18200 + }, + { + "epoch": 146.0, + "eval_accuracy_no_text": 0.9794659273180032, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9794659273180032, + "eval_iou_text": 0.0, + "eval_loss": 0.1048658937215805, + "eval_mean_accuracy": 0.9794659273180032, + "eval_mean_iou": 0.4897329636590016, + "eval_overall_accuracy": 0.9794659273180032, + "eval_runtime": 8.2794, + "eval_samples_per_second": 30.678, + "eval_steps_per_second": 3.865, + "step": 18250 + }, + { + "epoch": 146.4, + "grad_norm": 0.3059195876121521, + "learning_rate": 1.4249999999999999e-05, + "loss": 0.0485, + "step": 18300 + }, + { + "epoch": 147.0, + "eval_accuracy_no_text": 0.9809577398700878, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9809577398700878, + "eval_iou_text": 0.0, + "eval_loss": 0.10593844205141068, + "eval_mean_accuracy": 0.9809577398700878, + "eval_mean_iou": 0.4904788699350439, + "eval_overall_accuracy": 0.9809577398700878, + "eval_runtime": 8.3317, + "eval_samples_per_second": 30.486, + "eval_steps_per_second": 3.841, + "step": 18375 + }, + { + "epoch": 147.2, + "grad_norm": 0.41579416394233704, + "learning_rate": 1.3999999999999998e-05, + "loss": 0.0469, + "step": 18400 + }, + { + "epoch": 148.0, + "grad_norm": 0.320431113243103, + "learning_rate": 1.375e-05, + "loss": 0.0462, + "step": 18500 + }, + { + "epoch": 148.0, + "eval_accuracy_no_text": 0.9786900963960741, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9786900963960741, + "eval_iou_text": 0.0, + "eval_loss": 0.10566265136003494, + "eval_mean_accuracy": 0.9786900963960741, + "eval_mean_iou": 0.48934504819803704, + "eval_overall_accuracy": 0.9786900963960741, + "eval_runtime": 8.2787, + "eval_samples_per_second": 30.681, + "eval_steps_per_second": 3.865, + "step": 18500 + }, + { + "epoch": 148.8, + "grad_norm": 0.44983476400375366, + "learning_rate": 1.35e-05, + "loss": 0.0474, + "step": 18600 + }, + { + "epoch": 149.0, + "eval_accuracy_no_text": 0.9800260382635032, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9800260382635032, + "eval_iou_text": 0.0, + "eval_loss": 0.10372522473335266, + "eval_mean_accuracy": 0.9800260382635032, + "eval_mean_iou": 0.4900130191317516, + "eval_overall_accuracy": 0.9800260382635032, + "eval_runtime": 8.1632, + "eval_samples_per_second": 31.115, + "eval_steps_per_second": 3.92, + "step": 18625 + }, + { + "epoch": 149.6, + "grad_norm": 0.1674959510564804, + "learning_rate": 1.3249999999999999e-05, + "loss": 0.0506, + "step": 18700 + }, + { + "epoch": 150.0, + "eval_accuracy_no_text": 0.9814382456082342, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9814382456082342, + "eval_iou_text": 0.0, + "eval_loss": 0.10518930852413177, + "eval_mean_accuracy": 0.9814382456082342, + "eval_mean_iou": 0.4907191228041171, + "eval_overall_accuracy": 0.9814382456082342, + "eval_runtime": 8.3279, + "eval_samples_per_second": 30.5, + "eval_steps_per_second": 3.843, + "step": 18750 + }, + { + "epoch": 150.4, + "grad_norm": 0.31716248393058777, + "learning_rate": 1.3000000000000004e-05, + "loss": 0.0479, + "step": 18800 + }, + { + "epoch": 151.0, + "eval_accuracy_no_text": 0.9805025396487019, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9805025396487019, + "eval_iou_text": 0.0, + "eval_loss": 0.10686381161212921, + "eval_mean_accuracy": 0.9805025396487019, + "eval_mean_iou": 0.49025126982435097, + "eval_overall_accuracy": 0.9805025396487019, + "eval_runtime": 8.3922, + "eval_samples_per_second": 30.266, + "eval_steps_per_second": 3.813, + "step": 18875 + }, + { + "epoch": 151.2, + "grad_norm": 0.9992444515228271, + "learning_rate": 1.2750000000000003e-05, + "loss": 0.0464, + "step": 18900 + }, + { + "epoch": 152.0, + "grad_norm": 0.19730743765830994, + "learning_rate": 1.2500000000000002e-05, + "loss": 0.0439, + "step": 19000 + }, + { + "epoch": 152.0, + "eval_accuracy_no_text": 0.98159488060922, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.98159488060922, + "eval_iou_text": 0.0, + "eval_loss": 0.1079552099108696, + "eval_mean_accuracy": 0.98159488060922, + "eval_mean_iou": 0.49079744030461, + "eval_overall_accuracy": 0.98159488060922, + "eval_runtime": 8.3262, + "eval_samples_per_second": 30.506, + "eval_steps_per_second": 3.843, + "step": 19000 + }, + { + "epoch": 152.8, + "grad_norm": 0.3917344808578491, + "learning_rate": 1.2250000000000003e-05, + "loss": 0.0492, + "step": 19100 + }, + { + "epoch": 153.0, + "eval_accuracy_no_text": 0.9807935613992339, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9807935613992339, + "eval_iou_text": 0.0, + "eval_loss": 0.10192196071147919, + "eval_mean_accuracy": 0.9807935613992339, + "eval_mean_iou": 0.49039678069961695, + "eval_overall_accuracy": 0.9807935613992339, + "eval_runtime": 8.236, + "eval_samples_per_second": 30.84, + "eval_steps_per_second": 3.885, + "step": 19125 + }, + { + "epoch": 153.6, + "grad_norm": 0.3044929504394531, + "learning_rate": 1.2000000000000002e-05, + "loss": 0.0442, + "step": 19200 + }, + { + "epoch": 154.0, + "eval_accuracy_no_text": 0.9820950259539395, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9820950259539395, + "eval_iou_text": 0.0, + "eval_loss": 0.1053338274359703, + "eval_mean_accuracy": 0.9820950259539395, + "eval_mean_iou": 0.49104751297696975, + "eval_overall_accuracy": 0.9820950259539395, + "eval_runtime": 8.244, + "eval_samples_per_second": 30.81, + "eval_steps_per_second": 3.882, + "step": 19250 + }, + { + "epoch": 154.4, + "grad_norm": 0.5873416662216187, + "learning_rate": 1.1750000000000003e-05, + "loss": 0.0484, + "step": 19300 + }, + { + "epoch": 155.0, + "eval_accuracy_no_text": 0.9818892088586589, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9818892088586589, + "eval_iou_text": 0.0, + "eval_loss": 0.10322853177785873, + "eval_mean_accuracy": 0.9818892088586589, + "eval_mean_iou": 0.49094460442932947, + "eval_overall_accuracy": 0.9818892088586589, + "eval_runtime": 8.2069, + "eval_samples_per_second": 30.95, + "eval_steps_per_second": 3.899, + "step": 19375 + }, + { + "epoch": 155.2, + "grad_norm": 1.538596272468567, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0456, + "step": 19400 + }, + { + "epoch": 156.0, + "grad_norm": 1.1352208852767944, + "learning_rate": 1.1250000000000002e-05, + "loss": 0.0466, + "step": 19500 + }, + { + "epoch": 156.0, + "eval_accuracy_no_text": 0.9812151483176609, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9812151483176609, + "eval_iou_text": 0.0, + "eval_loss": 0.10393723100423813, + "eval_mean_accuracy": 0.9812151483176609, + "eval_mean_iou": 0.49060757415883044, + "eval_overall_accuracy": 0.9812151483176609, + "eval_runtime": 8.309, + "eval_samples_per_second": 30.569, + "eval_steps_per_second": 3.851, + "step": 19500 + }, + { + "epoch": 156.8, + "grad_norm": 0.30518653988838196, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0444, + "step": 19600 + }, + { + "epoch": 157.0, + "eval_accuracy_no_text": 0.9808679825479997, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9808679825479997, + "eval_iou_text": 0.0, + "eval_loss": 0.103802390396595, + "eval_mean_accuracy": 0.9808679825479997, + "eval_mean_iou": 0.49043399127399984, + "eval_overall_accuracy": 0.9808679825479997, + "eval_runtime": 8.2717, + "eval_samples_per_second": 30.707, + "eval_steps_per_second": 3.869, + "step": 19625 + }, + { + "epoch": 157.6, + "grad_norm": 0.371191143989563, + "learning_rate": 1.075e-05, + "loss": 0.0463, + "step": 19700 + }, + { + "epoch": 158.0, + "eval_accuracy_no_text": 0.9814478494090796, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9814478494090796, + "eval_iou_text": 0.0, + "eval_loss": 0.10381000488996506, + "eval_mean_accuracy": 0.9814478494090796, + "eval_mean_iou": 0.4907239247045398, + "eval_overall_accuracy": 0.9814478494090796, + "eval_runtime": 8.5122, + "eval_samples_per_second": 29.839, + "eval_steps_per_second": 3.759, + "step": 19750 + }, + { + "epoch": 158.4, + "grad_norm": 0.6957194209098816, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.0465, + "step": 19800 + }, + { + "epoch": 159.0, + "eval_accuracy_no_text": 0.9814518703775997, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9814518703775997, + "eval_iou_text": 0.0, + "eval_loss": 0.10540702193975449, + "eval_mean_accuracy": 0.9814518703775997, + "eval_mean_iou": 0.49072593518879987, + "eval_overall_accuracy": 0.9814518703775997, + "eval_runtime": 8.1775, + "eval_samples_per_second": 31.061, + "eval_steps_per_second": 3.913, + "step": 19875 + }, + { + "epoch": 159.2, + "grad_norm": 0.21183647215366364, + "learning_rate": 1.025e-05, + "loss": 0.0464, + "step": 19900 + }, + { + "epoch": 160.0, + "grad_norm": 0.3967491090297699, + "learning_rate": 9.999999999999999e-06, + "loss": 0.046, + "step": 20000 + }, + { + "epoch": 160.0, + "eval_accuracy_no_text": 0.9804176340737537, + "eval_accuracy_text": NaN, + "eval_iou_no_text": 0.9804176340737537, + "eval_iou_text": 0.0, + "eval_loss": 0.10416771471500397, + "eval_mean_accuracy": 0.9804176340737537, + "eval_mean_iou": 0.49020881703687685, + "eval_overall_accuracy": 0.9804176340737537, + "eval_runtime": 8.2603, + "eval_samples_per_second": 30.75, + "eval_steps_per_second": 3.874, + "step": 20000 + }, + { + "epoch": 160.0, + "step": 20000, + "total_flos": 2.80447288344576e+18, + "train_loss": 0.01519945946931839, + "train_runtime": 1952.9271, + "train_samples_per_second": 81.928, + "train_steps_per_second": 10.241 } ], "logging_steps": 100, - "max_steps": 2000, + "max_steps": 20000, "num_input_tokens_seen": 0, - "num_train_epochs": 16, + "num_train_epochs": 160, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { @@ -415,7 +3835,7 @@ "attributes": {} } }, - "total_flos": 2.80447288344576e+17, + "total_flos": 2.80447288344576e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null