oodeh's picture
Add files using upload-large-folder tool
94d2ab9 verified
{"current_steps": 1, "total_steps": 1200, "loss": 0.5857, "lr": 4.999991432639962e-05, "epoch": 0.016614745586708203, "percentage": 0.08, "elapsed_time": "0:00:21", "remaining_time": "7:01:54", "throughput": 3334.79, "total_tokens": 70408}
{"current_steps": 2, "total_steps": 1200, "loss": 0.5331, "lr": 4.999965730618567e-05, "epoch": 0.033229491173416406, "percentage": 0.17, "elapsed_time": "0:00:39", "remaining_time": "6:34:27", "throughput": 3534.07, "total_tokens": 139640}
{"current_steps": 3, "total_steps": 1200, "loss": 0.5852, "lr": 4.9999228941119745e-05, "epoch": 0.04984423676012461, "percentage": 0.25, "elapsed_time": "0:01:06", "remaining_time": "7:25:16", "throughput": 3340.24, "total_tokens": 223656}
{"current_steps": 4, "total_steps": 1200, "loss": 0.5384, "lr": 4.999862923413781e-05, "epoch": 0.06645898234683281, "percentage": 0.33, "elapsed_time": "0:01:29", "remaining_time": "7:24:44", "throughput": 3369.17, "total_tokens": 300688}
{"current_steps": 5, "total_steps": 1200, "loss": 0.5273, "lr": 4.999785818935018e-05, "epoch": 0.08307372793354102, "percentage": 0.42, "elapsed_time": "0:01:46", "remaining_time": "7:04:05", "throughput": 3441.11, "total_tokens": 366368}
{"current_steps": 6, "total_steps": 1200, "loss": 0.5145, "lr": 4.999691581204152e-05, "epoch": 0.09968847352024922, "percentage": 0.5, "elapsed_time": "0:02:10", "remaining_time": "7:13:04", "throughput": 3414.15, "total_tokens": 445808}
{"current_steps": 7, "total_steps": 1200, "loss": 0.5301, "lr": 4.9995802108670775e-05, "epoch": 0.11630321910695743, "percentage": 0.58, "elapsed_time": "0:02:36", "remaining_time": "7:23:40", "throughput": 3347.05, "total_tokens": 522800}
{"current_steps": 8, "total_steps": 1200, "loss": 0.5552, "lr": 4.999451708687114e-05, "epoch": 0.13291796469366562, "percentage": 0.67, "elapsed_time": "0:02:57", "remaining_time": "7:20:11", "throughput": 3382.74, "total_tokens": 599608}
{"current_steps": 9, "total_steps": 1200, "loss": 0.5999, "lr": 4.9993060755450015e-05, "epoch": 0.14953271028037382, "percentage": 0.75, "elapsed_time": "0:03:26", "remaining_time": "7:34:26", "throughput": 3307.16, "total_tokens": 681424}
{"current_steps": 10, "total_steps": 1200, "loss": 0.4535, "lr": 4.999143312438893e-05, "epoch": 0.16614745586708204, "percentage": 0.83, "elapsed_time": "0:03:46", "remaining_time": "7:29:55", "throughput": 3335.85, "total_tokens": 756744}
{"current_steps": 11, "total_steps": 1200, "loss": 0.4674, "lr": 4.998963420484349e-05, "epoch": 0.18276220145379024, "percentage": 0.92, "elapsed_time": "0:04:16", "remaining_time": "7:41:41", "throughput": 3287.71, "total_tokens": 842576}
{"current_steps": 12, "total_steps": 1200, "loss": 0.4703, "lr": 4.998766400914329e-05, "epoch": 0.19937694704049844, "percentage": 1.0, "elapsed_time": "0:04:40", "remaining_time": "7:42:34", "throughput": 3271.8, "total_tokens": 917232}
{"current_steps": 13, "total_steps": 1200, "loss": 0.3725, "lr": 4.9985522550791825e-05, "epoch": 0.21599169262720663, "percentage": 1.08, "elapsed_time": "0:05:15", "remaining_time": "8:00:18", "throughput": 3189.86, "total_tokens": 1006800}
{"current_steps": 14, "total_steps": 1200, "loss": 0.4653, "lr": 4.998320984446641e-05, "epoch": 0.23260643821391486, "percentage": 1.17, "elapsed_time": "0:05:42", "remaining_time": "8:02:59", "throughput": 3174.18, "total_tokens": 1085824}
{"current_steps": 15, "total_steps": 1200, "loss": 0.5026, "lr": 4.9980725906018074e-05, "epoch": 0.24922118380062305, "percentage": 1.25, "elapsed_time": "0:06:02", "remaining_time": "7:57:56", "throughput": 3207.08, "total_tokens": 1164160}
{"current_steps": 16, "total_steps": 1200, "loss": 0.5401, "lr": 4.997807075247146e-05, "epoch": 0.26583592938733125, "percentage": 1.33, "elapsed_time": "0:06:30", "remaining_time": "8:02:11", "throughput": 3177.42, "total_tokens": 1242264}
{"current_steps": 17, "total_steps": 1200, "loss": 0.4713, "lr": 4.997524440202469e-05, "epoch": 0.2824506749740395, "percentage": 1.42, "elapsed_time": "0:06:56", "remaining_time": "8:02:34", "throughput": 3186.66, "total_tokens": 1325904}
{"current_steps": 18, "total_steps": 1200, "loss": 0.5834, "lr": 4.9972246874049254e-05, "epoch": 0.29906542056074764, "percentage": 1.5, "elapsed_time": "0:07:12", "remaining_time": "7:53:40", "throughput": 3201.52, "total_tokens": 1385632}
{"current_steps": 19, "total_steps": 1200, "loss": 0.4045, "lr": 4.996907818908987e-05, "epoch": 0.31568016614745587, "percentage": 1.58, "elapsed_time": "0:07:42", "remaining_time": "7:58:51", "throughput": 3181.54, "total_tokens": 1470632}
{"current_steps": 20, "total_steps": 1200, "loss": 0.5283, "lr": 4.996573836886435e-05, "epoch": 0.3322949117341641, "percentage": 1.67, "elapsed_time": "0:08:07", "remaining_time": "7:59:23", "throughput": 3174.29, "total_tokens": 1547536}
{"current_steps": 21, "total_steps": 1200, "loss": 0.4199, "lr": 4.9962227436263453e-05, "epoch": 0.34890965732087226, "percentage": 1.75, "elapsed_time": "0:08:31", "remaining_time": "7:58:28", "throughput": 3159.38, "total_tokens": 1615528}
{"current_steps": 22, "total_steps": 1200, "loss": 0.4775, "lr": 4.995854541535071e-05, "epoch": 0.3655244029075805, "percentage": 1.83, "elapsed_time": "0:08:56", "remaining_time": "7:58:22", "throughput": 3160.84, "total_tokens": 1694352}
{"current_steps": 23, "total_steps": 1200, "loss": 0.4871, "lr": 4.9954692331362294e-05, "epoch": 0.3821391484942887, "percentage": 1.92, "elapsed_time": "0:09:13", "remaining_time": "7:52:16", "throughput": 3167.21, "total_tokens": 1753776}
{"current_steps": 24, "total_steps": 1200, "loss": 0.4871, "lr": 4.995066821070679e-05, "epoch": 0.3987538940809969, "percentage": 2.0, "elapsed_time": "0:09:28", "remaining_time": "7:44:30", "throughput": 3180.53, "total_tokens": 1809048}
{"current_steps": 25, "total_steps": 1200, "loss": 0.5461, "lr": 4.994647308096509e-05, "epoch": 0.4153686396677051, "percentage": 2.08, "elapsed_time": "0:09:49", "remaining_time": "7:42:02", "throughput": 3194.58, "total_tokens": 1884264}
{"current_steps": 26, "total_steps": 1200, "loss": 0.405, "lr": 4.994210697089014e-05, "epoch": 0.43198338525441327, "percentage": 2.17, "elapsed_time": "0:10:24", "remaining_time": "7:50:19", "throughput": 3170.87, "total_tokens": 1981704}
{"current_steps": 27, "total_steps": 1200, "loss": 0.4487, "lr": 4.9937569910406756e-05, "epoch": 0.4485981308411215, "percentage": 2.25, "elapsed_time": "0:10:42", "remaining_time": "7:45:26", "throughput": 3179.96, "total_tokens": 2044144}
{"current_steps": 28, "total_steps": 1200, "loss": 0.3946, "lr": 4.9932861930611454e-05, "epoch": 0.4652128764278297, "percentage": 2.33, "elapsed_time": "0:10:58", "remaining_time": "7:39:27", "throughput": 3200.05, "total_tokens": 2107584}
{"current_steps": 29, "total_steps": 1200, "loss": 0.4257, "lr": 4.9927983063772196e-05, "epoch": 0.4818276220145379, "percentage": 2.42, "elapsed_time": "0:11:15", "remaining_time": "7:34:36", "throughput": 3211.32, "total_tokens": 2169248}
{"current_steps": 30, "total_steps": 1200, "loss": 0.3911, "lr": 4.99229333433282e-05, "epoch": 0.4984423676012461, "percentage": 2.5, "elapsed_time": "0:11:34", "remaining_time": "7:31:13", "throughput": 3212.84, "total_tokens": 2230344}
{"current_steps": 31, "total_steps": 1200, "loss": 0.3749, "lr": 4.9917712803889674e-05, "epoch": 0.5150571131879543, "percentage": 2.58, "elapsed_time": "0:11:58", "remaining_time": "7:31:45", "throughput": 3203.13, "total_tokens": 2302368}
{"current_steps": 32, "total_steps": 1200, "loss": 0.4761, "lr": 4.991232148123761e-05, "epoch": 0.5316718587746625, "percentage": 2.67, "elapsed_time": "0:12:17", "remaining_time": "7:28:25", "throughput": 3215.14, "total_tokens": 2369984}
{"current_steps": 33, "total_steps": 1200, "loss": 0.4328, "lr": 4.990675941232353e-05, "epoch": 0.5482866043613707, "percentage": 2.75, "elapsed_time": "0:12:40", "remaining_time": "7:28:23", "throughput": 3224.46, "total_tokens": 2453032}
{"current_steps": 34, "total_steps": 1200, "loss": 0.417, "lr": 4.990102663526924e-05, "epoch": 0.564901349948079, "percentage": 2.83, "elapsed_time": "0:13:04", "remaining_time": "7:28:22", "throughput": 3221.86, "total_tokens": 2527464}
{"current_steps": 35, "total_steps": 1200, "loss": 0.4097, "lr": 4.989512318936655e-05, "epoch": 0.5815160955347871, "percentage": 2.92, "elapsed_time": "0:13:25", "remaining_time": "7:26:55", "throughput": 3223.65, "total_tokens": 2597032}
{"current_steps": 36, "total_steps": 1200, "loss": 0.4065, "lr": 4.9889049115077005e-05, "epoch": 0.5981308411214953, "percentage": 3.0, "elapsed_time": "0:13:45", "remaining_time": "7:24:51", "throughput": 3236.48, "total_tokens": 2671704}
{"current_steps": 37, "total_steps": 1200, "loss": 0.4136, "lr": 4.988280445403164e-05, "epoch": 0.6147455867082036, "percentage": 3.08, "elapsed_time": "0:14:19", "remaining_time": "7:30:26", "throughput": 3218.88, "total_tokens": 2767640}
{"current_steps": 38, "total_steps": 1200, "loss": 0.4125, "lr": 4.987638924903067e-05, "epoch": 0.6313603322949117, "percentage": 3.17, "elapsed_time": "0:14:41", "remaining_time": "7:29:27", "throughput": 3224.49, "total_tokens": 2843720}
{"current_steps": 39, "total_steps": 1200, "loss": 0.3135, "lr": 4.9869803544043166e-05, "epoch": 0.6479750778816199, "percentage": 3.25, "elapsed_time": "0:15:04", "remaining_time": "7:28:59", "throughput": 3228.31, "total_tokens": 2921472}
{"current_steps": 40, "total_steps": 1200, "loss": 0.4549, "lr": 4.9863047384206835e-05, "epoch": 0.6645898234683282, "percentage": 3.33, "elapsed_time": "0:15:28", "remaining_time": "7:28:58", "throughput": 3227.82, "total_tokens": 2998400}
{"current_steps": 41, "total_steps": 1200, "loss": 0.4135, "lr": 4.985612081582764e-05, "epoch": 0.6812045690550363, "percentage": 3.42, "elapsed_time": "0:15:45", "remaining_time": "7:25:18", "throughput": 3237.09, "total_tokens": 3059648}
{"current_steps": 42, "total_steps": 1200, "loss": 0.3538, "lr": 4.98490238863795e-05, "epoch": 0.6978193146417445, "percentage": 3.5, "elapsed_time": "0:16:07", "remaining_time": "7:24:27", "throughput": 3246.58, "total_tokens": 3140184}
{"current_steps": 43, "total_steps": 1200, "loss": 0.3644, "lr": 4.984175664450397e-05, "epoch": 0.7144340602284528, "percentage": 3.58, "elapsed_time": "0:16:28", "remaining_time": "7:23:04", "throughput": 3246.03, "total_tokens": 3207184}
{"current_steps": 44, "total_steps": 1200, "loss": 0.4019, "lr": 4.983431914000991e-05, "epoch": 0.731048805815161, "percentage": 3.67, "elapsed_time": "0:16:53", "remaining_time": "7:23:49", "throughput": 3248.26, "total_tokens": 3292344}
{"current_steps": 45, "total_steps": 1200, "loss": 0.4238, "lr": 4.982671142387316e-05, "epoch": 0.7476635514018691, "percentage": 3.75, "elapsed_time": "0:17:15", "remaining_time": "7:23:09", "throughput": 3248.61, "total_tokens": 3365384}
{"current_steps": 46, "total_steps": 1200, "loss": 0.3702, "lr": 4.981893354823614e-05, "epoch": 0.7642782969885774, "percentage": 3.83, "elapsed_time": "0:17:37", "remaining_time": "7:22:21", "throughput": 3252.17, "total_tokens": 3440720}
{"current_steps": 47, "total_steps": 1200, "loss": 0.3354, "lr": 4.9810985566407544e-05, "epoch": 0.7808930425752856, "percentage": 3.92, "elapsed_time": "0:18:10", "remaining_time": "7:26:00", "throughput": 3239.35, "total_tokens": 3533576}
{"current_steps": 48, "total_steps": 1200, "loss": 0.4981, "lr": 4.980286753286195e-05, "epoch": 0.7975077881619937, "percentage": 4.0, "elapsed_time": "0:18:30", "remaining_time": "7:24:08", "throughput": 3241.93, "total_tokens": 3599744}
{"current_steps": 49, "total_steps": 1200, "loss": 0.4016, "lr": 4.979457950323945e-05, "epoch": 0.814122533748702, "percentage": 4.08, "elapsed_time": "0:19:00", "remaining_time": "7:26:35", "throughput": 3234.41, "total_tokens": 3689520}
{"current_steps": 50, "total_steps": 1200, "loss": 0.388, "lr": 4.9786121534345265e-05, "epoch": 0.8307372793354102, "percentage": 4.17, "elapsed_time": "0:19:17", "remaining_time": "7:23:34", "throughput": 3242.27, "total_tokens": 3751808}
{"current_steps": 51, "total_steps": 1200, "loss": 0.3674, "lr": 4.9777493684149375e-05, "epoch": 0.8473520249221184, "percentage": 4.25, "elapsed_time": "0:19:46", "remaining_time": "7:25:23", "throughput": 3236.55, "total_tokens": 3839096}
{"current_steps": 52, "total_steps": 1200, "loss": 0.4147, "lr": 4.976869601178609e-05, "epoch": 0.8639667705088265, "percentage": 4.33, "elapsed_time": "0:20:11", "remaining_time": "7:25:43", "throughput": 3235.87, "total_tokens": 3919824}
{"current_steps": 53, "total_steps": 1200, "loss": 0.2978, "lr": 4.975972857755369e-05, "epoch": 0.8805815160955348, "percentage": 4.42, "elapsed_time": "0:20:33", "remaining_time": "7:24:48", "throughput": 3234.9, "total_tokens": 3989312}
{"current_steps": 54, "total_steps": 1200, "loss": 0.3923, "lr": 4.975059144291394e-05, "epoch": 0.897196261682243, "percentage": 4.5, "elapsed_time": "0:20:51", "remaining_time": "7:22:31", "throughput": 3245.55, "total_tokens": 4060528}
{"current_steps": 55, "total_steps": 1200, "loss": 0.3282, "lr": 4.974128467049176e-05, "epoch": 0.9138110072689511, "percentage": 4.58, "elapsed_time": "0:21:10", "remaining_time": "7:20:54", "throughput": 3249.52, "total_tokens": 4129368}
{"current_steps": 56, "total_steps": 1200, "loss": 0.3403, "lr": 4.9731808324074717e-05, "epoch": 0.9304257528556594, "percentage": 4.67, "elapsed_time": "0:21:22", "remaining_time": "7:16:42", "throughput": 3255.18, "total_tokens": 4175208}
{"current_steps": 57, "total_steps": 1200, "loss": 0.3191, "lr": 4.972216246861262e-05, "epoch": 0.9470404984423676, "percentage": 4.75, "elapsed_time": "0:21:33", "remaining_time": "7:12:18", "throughput": 3260.97, "total_tokens": 4218096}
{"current_steps": 58, "total_steps": 1200, "loss": 0.3745, "lr": 4.971234717021709e-05, "epoch": 0.9636552440290758, "percentage": 4.83, "elapsed_time": "0:21:48", "remaining_time": "7:09:33", "throughput": 3266.62, "total_tokens": 4275968}
{"current_steps": 59, "total_steps": 1200, "loss": 0.3129, "lr": 4.9702362496161085e-05, "epoch": 0.980269989615784, "percentage": 4.92, "elapsed_time": "0:22:08", "remaining_time": "7:08:10", "throughput": 3271.97, "total_tokens": 4346616}
{"current_steps": 60, "total_steps": 1200, "loss": 0.3324, "lr": 4.9692208514878444e-05, "epoch": 0.9968847352024922, "percentage": 5.0, "elapsed_time": "0:22:36", "remaining_time": "7:09:41", "throughput": 3261.13, "total_tokens": 4425064}
{"current_steps": 61, "total_steps": 1200, "loss": 0.2814, "lr": 4.968188529596342e-05, "epoch": 1.0, "percentage": 5.08, "elapsed_time": "0:22:41", "remaining_time": "7:03:32", "throughput": 3258.86, "total_tokens": 4435328}
{"current_steps": 62, "total_steps": 1200, "loss": 0.3467, "lr": 4.9671392910170185e-05, "epoch": 1.0166147455867083, "percentage": 5.17, "elapsed_time": "0:23:02", "remaining_time": "7:03:02", "throughput": 3254.18, "total_tokens": 4500104}
{"current_steps": 63, "total_steps": 1200, "loss": 0.3892, "lr": 4.966073142941239e-05, "epoch": 1.0332294911734163, "percentage": 5.25, "elapsed_time": "0:23:30", "remaining_time": "7:04:08", "throughput": 3249.5, "total_tokens": 4581976}
{"current_steps": 64, "total_steps": 1200, "loss": 0.3354, "lr": 4.964990092676263e-05, "epoch": 1.0498442367601246, "percentage": 5.33, "elapsed_time": "0:23:50", "remaining_time": "7:03:07", "throughput": 3252.6, "total_tokens": 4652160}
{"current_steps": 65, "total_steps": 1200, "loss": 0.3457, "lr": 4.9638901476451946e-05, "epoch": 1.066458982346833, "percentage": 5.42, "elapsed_time": "0:24:05", "remaining_time": "7:00:39", "throughput": 3258.09, "total_tokens": 4709368}
{"current_steps": 66, "total_steps": 1200, "loss": 0.3672, "lr": 4.962773315386935e-05, "epoch": 1.083073727933541, "percentage": 5.5, "elapsed_time": "0:24:40", "remaining_time": "7:03:55", "throughput": 3241.26, "total_tokens": 4798256}
{"current_steps": 67, "total_steps": 1200, "loss": 0.3157, "lr": 4.961639603556127e-05, "epoch": 1.0996884735202492, "percentage": 5.58, "elapsed_time": "0:24:56", "remaining_time": "7:01:43", "throughput": 3247.49, "total_tokens": 4859200}
{"current_steps": 68, "total_steps": 1200, "loss": 0.3968, "lr": 4.960489019923105e-05, "epoch": 1.1163032191069575, "percentage": 5.67, "elapsed_time": "0:25:14", "remaining_time": "7:00:17", "throughput": 3251.88, "total_tokens": 4925992}
{"current_steps": 69, "total_steps": 1200, "loss": 0.329, "lr": 4.9593215723738404e-05, "epoch": 1.1329179646936656, "percentage": 5.75, "elapsed_time": "0:25:36", "remaining_time": "6:59:42", "throughput": 3253.72, "total_tokens": 4998808}
{"current_steps": 70, "total_steps": 1200, "loss": 0.2856, "lr": 4.958137268909887e-05, "epoch": 1.1495327102803738, "percentage": 5.83, "elapsed_time": "0:26:10", "remaining_time": "7:02:27", "throughput": 3241.45, "total_tokens": 5089672}
{"current_steps": 71, "total_steps": 1200, "loss": 0.3512, "lr": 4.9569361176483286e-05, "epoch": 1.1661474558670821, "percentage": 5.92, "elapsed_time": "0:26:35", "remaining_time": "7:02:50", "throughput": 3238.33, "total_tokens": 5166744}
{"current_steps": 72, "total_steps": 1200, "loss": 0.4263, "lr": 4.9557181268217227e-05, "epoch": 1.1827622014537902, "percentage": 6.0, "elapsed_time": "0:26:52", "remaining_time": "7:01:10", "throughput": 3241.36, "total_tokens": 5228264}
{"current_steps": 73, "total_steps": 1200, "loss": 0.3126, "lr": 4.9544833047780394e-05, "epoch": 1.1993769470404985, "percentage": 6.08, "elapsed_time": "0:27:31", "remaining_time": "7:04:56", "throughput": 3232.37, "total_tokens": 5338224}
{"current_steps": 74, "total_steps": 1200, "loss": 0.3473, "lr": 4.9532316599806124e-05, "epoch": 1.2159916926272065, "percentage": 6.17, "elapsed_time": "0:27:49", "remaining_time": "7:03:24", "throughput": 3234.31, "total_tokens": 5399848}
{"current_steps": 75, "total_steps": 1200, "loss": 0.3275, "lr": 4.951963201008076e-05, "epoch": 1.2326064382139148, "percentage": 6.25, "elapsed_time": "0:28:09", "remaining_time": "7:02:16", "throughput": 3237.57, "total_tokens": 5468624}
{"current_steps": 76, "total_steps": 1200, "loss": 0.3296, "lr": 4.9506779365543046e-05, "epoch": 1.249221183800623, "percentage": 6.33, "elapsed_time": "0:28:28", "remaining_time": "7:01:09", "throughput": 3240.53, "total_tokens": 5536776}
{"current_steps": 77, "total_steps": 1200, "loss": 0.3605, "lr": 4.949375875428357e-05, "epoch": 1.2658359293873311, "percentage": 6.42, "elapsed_time": "0:28:47", "remaining_time": "6:59:56", "throughput": 3246.83, "total_tokens": 5609296}
{"current_steps": 78, "total_steps": 1200, "loss": 0.3133, "lr": 4.9480570265544144e-05, "epoch": 1.2824506749740394, "percentage": 6.5, "elapsed_time": "0:29:03", "remaining_time": "6:57:56", "throughput": 3248.87, "total_tokens": 5663824}
{"current_steps": 79, "total_steps": 1200, "loss": 0.3464, "lr": 4.94672139897172e-05, "epoch": 1.2990654205607477, "percentage": 6.58, "elapsed_time": "0:29:25", "remaining_time": "6:57:32", "throughput": 3252.38, "total_tokens": 5742032}
{"current_steps": 80, "total_steps": 1200, "loss": 0.3346, "lr": 4.9453690018345144e-05, "epoch": 1.3156801661474558, "percentage": 6.67, "elapsed_time": "0:29:47", "remaining_time": "6:56:59", "throughput": 3254.88, "total_tokens": 5816864}
{"current_steps": 81, "total_steps": 1200, "loss": 0.3102, "lr": 4.943999844411977e-05, "epoch": 1.332294911734164, "percentage": 6.75, "elapsed_time": "0:30:04", "remaining_time": "6:55:35", "throughput": 3258.61, "total_tokens": 5881624}
{"current_steps": 82, "total_steps": 1200, "loss": 0.2853, "lr": 4.94261393608816e-05, "epoch": 1.3489096573208723, "percentage": 6.83, "elapsed_time": "0:30:38", "remaining_time": "6:57:46", "throughput": 3247.33, "total_tokens": 5970272}
{"current_steps": 83, "total_steps": 1200, "loss": 0.3038, "lr": 4.941211286361922e-05, "epoch": 1.3655244029075804, "percentage": 6.92, "elapsed_time": "0:31:03", "remaining_time": "6:58:00", "throughput": 3251.08, "total_tokens": 6058752}
{"current_steps": 84, "total_steps": 1200, "loss": 0.3283, "lr": 4.939791904846869e-05, "epoch": 1.3821391484942886, "percentage": 7.0, "elapsed_time": "0:31:19", "remaining_time": "6:56:10", "throughput": 3256.24, "total_tokens": 6120064}
{"current_steps": 85, "total_steps": 1200, "loss": 0.321, "lr": 4.938355801271282e-05, "epoch": 1.398753894080997, "percentage": 7.08, "elapsed_time": "0:31:38", "remaining_time": "6:55:02", "throughput": 3256.5, "total_tokens": 6182072}
{"current_steps": 86, "total_steps": 1200, "loss": 0.3296, "lr": 4.936902985478055e-05, "epoch": 1.415368639667705, "percentage": 7.17, "elapsed_time": "0:32:04", "remaining_time": "6:55:26", "throughput": 3258.16, "total_tokens": 6269680}
{"current_steps": 87, "total_steps": 1200, "loss": 0.3225, "lr": 4.935433467424624e-05, "epoch": 1.4319833852544133, "percentage": 7.25, "elapsed_time": "0:32:27", "remaining_time": "6:55:15", "throughput": 3259.11, "total_tokens": 6347424}
{"current_steps": 88, "total_steps": 1200, "loss": 0.3479, "lr": 4.933947257182901e-05, "epoch": 1.4485981308411215, "percentage": 7.33, "elapsed_time": "0:32:48", "remaining_time": "6:54:30", "throughput": 3258.12, "total_tokens": 6412584}
{"current_steps": 89, "total_steps": 1200, "loss": 0.3322, "lr": 4.932444364939205e-05, "epoch": 1.4652128764278296, "percentage": 7.42, "elapsed_time": "0:33:07", "remaining_time": "6:53:33", "throughput": 3261.31, "total_tokens": 6482728}
{"current_steps": 90, "total_steps": 1200, "loss": 0.3802, "lr": 4.9309248009941914e-05, "epoch": 1.4818276220145379, "percentage": 7.5, "elapsed_time": "0:33:32", "remaining_time": "6:53:39", "throughput": 3260.86, "total_tokens": 6562104}
{"current_steps": 91, "total_steps": 1200, "loss": 0.3371, "lr": 4.929388575762782e-05, "epoch": 1.4984423676012462, "percentage": 7.58, "elapsed_time": "0:34:00", "remaining_time": "6:54:32", "throughput": 3261.52, "total_tokens": 6656552}
{"current_steps": 92, "total_steps": 1200, "loss": 0.293, "lr": 4.9278356997740904e-05, "epoch": 1.5150571131879542, "percentage": 7.67, "elapsed_time": "0:34:15", "remaining_time": "6:52:39", "throughput": 3265.95, "total_tokens": 6714184}
{"current_steps": 93, "total_steps": 1200, "loss": 0.3127, "lr": 4.9262661836713564e-05, "epoch": 1.5316718587746625, "percentage": 7.75, "elapsed_time": "0:34:41", "remaining_time": "6:53:01", "throughput": 3263.19, "total_tokens": 6793552}
{"current_steps": 94, "total_steps": 1200, "loss": 0.3263, "lr": 4.924680038211867e-05, "epoch": 1.5482866043613708, "percentage": 7.83, "elapsed_time": "0:35:04", "remaining_time": "6:52:42", "throughput": 3262.05, "total_tokens": 6865256}
{"current_steps": 95, "total_steps": 1200, "loss": 0.3204, "lr": 4.9230772742668866e-05, "epoch": 1.5649013499480788, "percentage": 7.92, "elapsed_time": "0:35:24", "remaining_time": "6:51:48", "throughput": 3262.87, "total_tokens": 6931152}
{"current_steps": 96, "total_steps": 1200, "loss": 0.326, "lr": 4.9214579028215776e-05, "epoch": 1.5815160955347871, "percentage": 8.0, "elapsed_time": "0:35:42", "remaining_time": "6:50:42", "throughput": 3266.01, "total_tokens": 6998408}
{"current_steps": 97, "total_steps": 1200, "loss": 0.2814, "lr": 4.919821934974933e-05, "epoch": 1.5981308411214954, "percentage": 8.08, "elapsed_time": "0:35:56", "remaining_time": "6:48:46", "throughput": 3269.95, "total_tokens": 7053008}
{"current_steps": 98, "total_steps": 1200, "loss": 0.3254, "lr": 4.918169381939692e-05, "epoch": 1.6147455867082035, "percentage": 8.17, "elapsed_time": "0:36:10", "remaining_time": "6:46:44", "throughput": 3274.5, "total_tokens": 7106440}
{"current_steps": 99, "total_steps": 1200, "loss": 0.3228, "lr": 4.916500255042268e-05, "epoch": 1.6313603322949117, "percentage": 8.25, "elapsed_time": "0:36:27", "remaining_time": "6:45:27", "throughput": 3276.4, "total_tokens": 7167032}
{"current_steps": 100, "total_steps": 1200, "loss": 0.2729, "lr": 4.914814565722671e-05, "epoch": 1.64797507788162, "percentage": 8.33, "elapsed_time": "0:36:55", "remaining_time": "6:46:09", "throughput": 3270.63, "total_tokens": 7245720}
{"current_steps": 101, "total_steps": 1200, "loss": 0.3462, "lr": 4.913112325534426e-05, "epoch": 1.664589823468328, "percentage": 8.42, "elapsed_time": "0:37:19", "remaining_time": "6:46:05", "throughput": 3271.76, "total_tokens": 7326320}
{"current_steps": 102, "total_steps": 1200, "loss": 0.3096, "lr": 4.9113935461444955e-05, "epoch": 1.6812045690550363, "percentage": 8.5, "elapsed_time": "0:38:03", "remaining_time": "6:49:43", "throughput": 3258.77, "total_tokens": 7442232}
{"current_steps": 103, "total_steps": 1200, "loss": 0.3014, "lr": 4.9096582393332025e-05, "epoch": 1.6978193146417446, "percentage": 8.58, "elapsed_time": "0:38:20", "remaining_time": "6:48:20", "throughput": 3261.35, "total_tokens": 7502496}
{"current_steps": 104, "total_steps": 1200, "loss": 0.3336, "lr": 4.907906416994146e-05, "epoch": 1.7144340602284527, "percentage": 8.67, "elapsed_time": "0:38:39", "remaining_time": "6:47:27", "throughput": 3261.68, "total_tokens": 7566496}
{"current_steps": 105, "total_steps": 1200, "loss": 0.3911, "lr": 4.906138091134118e-05, "epoch": 1.731048805815161, "percentage": 8.75, "elapsed_time": "0:38:56", "remaining_time": "6:46:09", "throughput": 3264.7, "total_tokens": 7629056}
{"current_steps": 106, "total_steps": 1200, "loss": 0.3201, "lr": 4.9043532738730284e-05, "epoch": 1.7476635514018692, "percentage": 8.83, "elapsed_time": "0:39:21", "remaining_time": "6:46:11", "throughput": 3263.4, "total_tokens": 7706096}
{"current_steps": 107, "total_steps": 1200, "loss": 0.2783, "lr": 4.9025519774438136e-05, "epoch": 1.7642782969885773, "percentage": 8.92, "elapsed_time": "0:39:43", "remaining_time": "6:45:48", "throughput": 3263.98, "total_tokens": 7780072}
{"current_steps": 108, "total_steps": 1200, "loss": 0.3044, "lr": 4.900734214192358e-05, "epoch": 1.7808930425752856, "percentage": 9.0, "elapsed_time": "0:40:06", "remaining_time": "6:45:30", "throughput": 3265.41, "total_tokens": 7857712}
{"current_steps": 109, "total_steps": 1200, "loss": 0.3009, "lr": 4.898899996577407e-05, "epoch": 1.7975077881619939, "percentage": 9.08, "elapsed_time": "0:40:22", "remaining_time": "6:44:10", "throughput": 3267.54, "total_tokens": 7916832}
{"current_steps": 110, "total_steps": 1200, "loss": 0.3229, "lr": 4.8970493371704826e-05, "epoch": 1.814122533748702, "percentage": 9.17, "elapsed_time": "0:40:41", "remaining_time": "6:43:16", "throughput": 3273.39, "total_tokens": 7993056}
{"current_steps": 111, "total_steps": 1200, "loss": 0.3414, "lr": 4.8951822486557986e-05, "epoch": 1.8307372793354102, "percentage": 9.25, "elapsed_time": "0:41:14", "remaining_time": "6:44:33", "throughput": 3269.76, "total_tokens": 8090056}
{"current_steps": 112, "total_steps": 1200, "loss": 0.2907, "lr": 4.893298743830168e-05, "epoch": 1.8473520249221185, "percentage": 9.33, "elapsed_time": "0:41:37", "remaining_time": "6:44:23", "throughput": 3268.97, "total_tokens": 8164808}
{"current_steps": 113, "total_steps": 1200, "loss": 0.2901, "lr": 4.891398835602925e-05, "epoch": 1.8639667705088265, "percentage": 9.42, "elapsed_time": "0:41:53", "remaining_time": "6:42:54", "throughput": 3272.33, "total_tokens": 8223568}
{"current_steps": 114, "total_steps": 1200, "loss": 0.2918, "lr": 4.8894825369958255e-05, "epoch": 1.8805815160955348, "percentage": 9.5, "elapsed_time": "0:42:07", "remaining_time": "6:41:16", "throughput": 3274.59, "total_tokens": 8276160}
{"current_steps": 115, "total_steps": 1200, "loss": 0.3074, "lr": 4.8875498611429674e-05, "epoch": 1.897196261682243, "percentage": 9.58, "elapsed_time": "0:42:29", "remaining_time": "6:40:57", "throughput": 3276.64, "total_tokens": 8354904}
{"current_steps": 116, "total_steps": 1200, "loss": 0.3461, "lr": 4.8856008212906925e-05, "epoch": 1.9138110072689511, "percentage": 9.67, "elapsed_time": "0:42:53", "remaining_time": "6:40:51", "throughput": 3280.17, "total_tokens": 8442584}
{"current_steps": 117, "total_steps": 1200, "loss": 0.3078, "lr": 4.8836354307975026e-05, "epoch": 1.9304257528556594, "percentage": 9.75, "elapsed_time": "0:43:11", "remaining_time": "6:39:45", "throughput": 3282.81, "total_tokens": 8506688}
{"current_steps": 118, "total_steps": 1200, "loss": 0.2432, "lr": 4.881653703133966e-05, "epoch": 1.9470404984423677, "percentage": 9.83, "elapsed_time": "0:43:45", "remaining_time": "6:41:13", "throughput": 3279.76, "total_tokens": 8610712}
{"current_steps": 119, "total_steps": 1200, "loss": 0.2915, "lr": 4.87965565188262e-05, "epoch": 1.9636552440290758, "percentage": 9.92, "elapsed_time": "0:44:09", "remaining_time": "6:41:08", "throughput": 3280.83, "total_tokens": 8692624}
{"current_steps": 120, "total_steps": 1200, "loss": 0.3006, "lr": 4.877641290737884e-05, "epoch": 1.980269989615784, "percentage": 10.0, "elapsed_time": "0:44:34", "remaining_time": "6:41:08", "throughput": 3280.24, "total_tokens": 8772208}
{"current_steps": 121, "total_steps": 1200, "loss": 0.2774, "lr": 4.8756106335059646e-05, "epoch": 1.9968847352024923, "percentage": 10.08, "elapsed_time": "0:44:58", "remaining_time": "6:41:02", "throughput": 3281.6, "total_tokens": 8854904}
{"current_steps": 122, "total_steps": 1200, "loss": 0.2786, "lr": 4.87356369410476e-05, "epoch": 2.0, "percentage": 10.17, "elapsed_time": "0:45:05", "remaining_time": "6:38:27", "throughput": 3279.34, "total_tokens": 8872656}
{"current_steps": 123, "total_steps": 1200, "loss": 0.294, "lr": 4.8715004865637614e-05, "epoch": 2.016614745586708, "percentage": 10.25, "elapsed_time": "0:45:27", "remaining_time": "6:38:04", "throughput": 3279.85, "total_tokens": 8946480}
{"current_steps": 124, "total_steps": 1200, "loss": 0.2931, "lr": 4.869421025023965e-05, "epoch": 2.0332294911734166, "percentage": 10.33, "elapsed_time": "0:45:48", "remaining_time": "6:37:34", "throughput": 3282.41, "total_tokens": 9023328}
{"current_steps": 125, "total_steps": 1200, "loss": 0.2887, "lr": 4.867325323737765e-05, "epoch": 2.0498442367601246, "percentage": 10.42, "elapsed_time": "0:46:02", "remaining_time": "6:35:56", "throughput": 3284.96, "total_tokens": 9074320}
{"current_steps": 126, "total_steps": 1200, "loss": 0.2776, "lr": 4.8652133970688636e-05, "epoch": 2.0664589823468327, "percentage": 10.5, "elapsed_time": "0:46:27", "remaining_time": "6:35:57", "throughput": 3282.43, "total_tokens": 9148784}
{"current_steps": 127, "total_steps": 1200, "loss": 0.3091, "lr": 4.8630852594921706e-05, "epoch": 2.083073727933541, "percentage": 10.58, "elapsed_time": "0:47:00", "remaining_time": "6:37:11", "throughput": 3278.14, "total_tokens": 9246624}
{"current_steps": 128, "total_steps": 1200, "loss": 0.3354, "lr": 4.860940925593703e-05, "epoch": 2.0996884735202492, "percentage": 10.67, "elapsed_time": "0:47:24", "remaining_time": "6:37:02", "throughput": 3279.46, "total_tokens": 9328176}
{"current_steps": 129, "total_steps": 1200, "loss": 0.282, "lr": 4.8587804100704845e-05, "epoch": 2.1163032191069573, "percentage": 10.75, "elapsed_time": "0:47:39", "remaining_time": "6:35:43", "throughput": 3283.0, "total_tokens": 9388936}
{"current_steps": 130, "total_steps": 1200, "loss": 0.2801, "lr": 4.856603727730447e-05, "epoch": 2.132917964693666, "percentage": 10.83, "elapsed_time": "0:48:03", "remaining_time": "6:35:35", "throughput": 3281.0, "total_tokens": 9461664}
{"current_steps": 131, "total_steps": 1200, "loss": 0.2927, "lr": 4.854410893492326e-05, "epoch": 2.149532710280374, "percentage": 10.92, "elapsed_time": "0:48:23", "remaining_time": "6:34:55", "throughput": 3283.64, "total_tokens": 9535000}
{"current_steps": 132, "total_steps": 1200, "loss": 0.2807, "lr": 4.852201922385564e-05, "epoch": 2.166147455867082, "percentage": 11.0, "elapsed_time": "0:48:40", "remaining_time": "6:33:52", "throughput": 3286.79, "total_tokens": 9600296}
{"current_steps": 133, "total_steps": 1200, "loss": 0.2765, "lr": 4.8499768295502004e-05, "epoch": 2.1827622014537904, "percentage": 11.08, "elapsed_time": "0:49:06", "remaining_time": "6:34:00", "throughput": 3287.27, "total_tokens": 9686784}
{"current_steps": 134, "total_steps": 1200, "loss": 0.3068, "lr": 4.847735630236773e-05, "epoch": 2.1993769470404985, "percentage": 11.17, "elapsed_time": "0:49:37", "remaining_time": "6:34:47", "throughput": 3284.86, "total_tokens": 9781112}
{"current_steps": 135, "total_steps": 1200, "loss": 0.2737, "lr": 4.8454783398062106e-05, "epoch": 2.2159916926272065, "percentage": 11.25, "elapsed_time": "0:49:58", "remaining_time": "6:34:13", "throughput": 3284.94, "total_tokens": 9849528}
{"current_steps": 136, "total_steps": 1200, "loss": 0.2831, "lr": 4.843204973729729e-05, "epoch": 2.232606438213915, "percentage": 11.33, "elapsed_time": "0:50:24", "remaining_time": "6:34:25", "throughput": 3283.05, "total_tokens": 9931080}
{"current_steps": 137, "total_steps": 1200, "loss": 0.3047, "lr": 4.840915547588725e-05, "epoch": 2.249221183800623, "percentage": 11.42, "elapsed_time": "0:50:52", "remaining_time": "6:34:46", "throughput": 3279.48, "total_tokens": 10011176}
{"current_steps": 138, "total_steps": 1200, "loss": 0.2759, "lr": 4.838610077074669e-05, "epoch": 2.265835929387331, "percentage": 11.5, "elapsed_time": "0:51:15", "remaining_time": "6:34:27", "throughput": 3278.87, "total_tokens": 10084128}
{"current_steps": 139, "total_steps": 1200, "loss": 0.298, "lr": 4.836288577988996e-05, "epoch": 2.2824506749740396, "percentage": 11.58, "elapsed_time": "0:51:35", "remaining_time": "6:33:44", "throughput": 3281.25, "total_tokens": 10155536}
{"current_steps": 140, "total_steps": 1200, "loss": 0.255, "lr": 4.8339510662430046e-05, "epoch": 2.2990654205607477, "percentage": 11.67, "elapsed_time": "0:52:07", "remaining_time": "6:34:41", "throughput": 3277.54, "total_tokens": 10251160}
{"current_steps": 141, "total_steps": 1200, "loss": 0.2566, "lr": 4.8315975578577355e-05, "epoch": 2.3156801661474558, "percentage": 11.75, "elapsed_time": "0:52:42", "remaining_time": "6:35:53", "throughput": 3271.29, "total_tokens": 10345864}
{"current_steps": 142, "total_steps": 1200, "loss": 0.4367, "lr": 4.8292280689638725e-05, "epoch": 2.3322949117341643, "percentage": 11.83, "elapsed_time": "0:53:03", "remaining_time": "6:35:19", "throughput": 3272.33, "total_tokens": 10417616}
{"current_steps": 143, "total_steps": 1200, "loss": 0.2954, "lr": 4.826842615801628e-05, "epoch": 2.3489096573208723, "percentage": 11.92, "elapsed_time": "0:53:21", "remaining_time": "6:34:23", "throughput": 3274.19, "total_tokens": 10481816}
{"current_steps": 144, "total_steps": 1200, "loss": 0.341, "lr": 4.8244412147206284e-05, "epoch": 2.3655244029075804, "percentage": 12.0, "elapsed_time": "0:53:49", "remaining_time": "6:34:39", "throughput": 3270.97, "total_tokens": 10562056}
{"current_steps": 145, "total_steps": 1200, "loss": 0.2716, "lr": 4.822023882179811e-05, "epoch": 2.382139148494289, "percentage": 12.08, "elapsed_time": "0:54:04", "remaining_time": "6:33:23", "throughput": 3271.46, "total_tokens": 10612808}
{"current_steps": 146, "total_steps": 1200, "loss": 0.2716, "lr": 4.8195906347473e-05, "epoch": 2.398753894080997, "percentage": 12.17, "elapsed_time": "0:54:23", "remaining_time": "6:32:38", "throughput": 3273.48, "total_tokens": 10682328}
{"current_steps": 147, "total_steps": 1200, "loss": 0.2829, "lr": 4.817141489100302e-05, "epoch": 2.415368639667705, "percentage": 12.25, "elapsed_time": "0:54:51", "remaining_time": "6:32:54", "throughput": 3273.07, "total_tokens": 10771912}
{"current_steps": 148, "total_steps": 1200, "loss": 0.3038, "lr": 4.814676462024988e-05, "epoch": 2.431983385254413, "percentage": 12.33, "elapsed_time": "0:55:13", "remaining_time": "6:32:32", "throughput": 3272.22, "total_tokens": 10842232}
{"current_steps": 149, "total_steps": 1200, "loss": 0.2792, "lr": 4.8121955704163745e-05, "epoch": 2.4485981308411215, "percentage": 12.42, "elapsed_time": "0:55:28", "remaining_time": "6:31:20", "throughput": 3275.1, "total_tokens": 10902264}
{"current_steps": 150, "total_steps": 1200, "loss": 0.2403, "lr": 4.8096988312782174e-05, "epoch": 2.4652128764278296, "percentage": 12.5, "elapsed_time": "0:56:02", "remaining_time": "6:32:14", "throughput": 3269.58, "total_tokens": 10992744}
{"current_steps": 151, "total_steps": 1200, "loss": 0.1605, "lr": 4.8071862617228855e-05, "epoch": 2.4818276220145377, "percentage": 12.58, "elapsed_time": "0:56:35", "remaining_time": "6:33:12", "throughput": 3265.63, "total_tokens": 11090064}
{"current_steps": 152, "total_steps": 1200, "loss": 0.2547, "lr": 4.8046578789712515e-05, "epoch": 2.498442367601246, "percentage": 12.67, "elapsed_time": "0:56:59", "remaining_time": "6:32:58", "throughput": 3264.25, "total_tokens": 11162864}
{"current_steps": 153, "total_steps": 1200, "loss": 0.2676, "lr": 4.8021137003525664e-05, "epoch": 2.515057113187954, "percentage": 12.75, "elapsed_time": "0:57:15", "remaining_time": "6:31:50", "throughput": 3267.01, "total_tokens": 11224368}
{"current_steps": 154, "total_steps": 1200, "loss": 0.2898, "lr": 4.7995537433043446e-05, "epoch": 2.5316718587746623, "percentage": 12.83, "elapsed_time": "0:57:33", "remaining_time": "6:30:55", "throughput": 3269.67, "total_tokens": 11291056}
{"current_steps": 155, "total_steps": 1200, "loss": 0.2546, "lr": 4.796978025372246e-05, "epoch": 2.5482866043613708, "percentage": 12.92, "elapsed_time": "0:57:46", "remaining_time": "6:29:31", "throughput": 3272.82, "total_tokens": 11345464}
{"current_steps": 156, "total_steps": 1200, "loss": 0.3134, "lr": 4.794386564209953e-05, "epoch": 2.564901349948079, "percentage": 13.0, "elapsed_time": "0:58:08", "remaining_time": "6:29:04", "throughput": 3273.58, "total_tokens": 11418912}
{"current_steps": 157, "total_steps": 1200, "loss": 0.2689, "lr": 4.79177937757905e-05, "epoch": 2.581516095534787, "percentage": 13.08, "elapsed_time": "0:58:28", "remaining_time": "6:28:30", "throughput": 3274.84, "total_tokens": 11491216}
{"current_steps": 158, "total_steps": 1200, "loss": 0.2359, "lr": 4.7891564833489035e-05, "epoch": 2.5981308411214954, "percentage": 13.17, "elapsed_time": "0:58:47", "remaining_time": "6:27:42", "throughput": 3276.67, "total_tokens": 11558016}
{"current_steps": 159, "total_steps": 1200, "loss": 0.2735, "lr": 4.7865178994965344e-05, "epoch": 2.6147455867082035, "percentage": 13.25, "elapsed_time": "0:59:07", "remaining_time": "6:27:07", "throughput": 3278.24, "total_tokens": 11630432}
{"current_steps": 160, "total_steps": 1200, "loss": 0.254, "lr": 4.783863644106502e-05, "epoch": 2.6313603322949115, "percentage": 13.33, "elapsed_time": "0:59:22", "remaining_time": "6:25:54", "throughput": 3280.12, "total_tokens": 11684624}
{"current_steps": 161, "total_steps": 1200, "loss": 0.2763, "lr": 4.781193735370777e-05, "epoch": 2.64797507788162, "percentage": 13.42, "elapsed_time": "0:59:50", "remaining_time": "6:26:11", "throughput": 3278.14, "total_tokens": 11770232}
{"current_steps": 162, "total_steps": 1200, "loss": 0.2663, "lr": 4.7785081915886134e-05, "epoch": 2.664589823468328, "percentage": 13.5, "elapsed_time": "1:00:05", "remaining_time": "6:25:04", "throughput": 3280.21, "total_tokens": 11828360}
{"current_steps": 163, "total_steps": 1200, "loss": 0.2625, "lr": 4.775807031166428e-05, "epoch": 2.681204569055036, "percentage": 13.58, "elapsed_time": "1:00:34", "remaining_time": "6:25:21", "throughput": 3278.65, "total_tokens": 11915944}
{"current_steps": 164, "total_steps": 1200, "loss": 0.2615, "lr": 4.773090272617672e-05, "epoch": 2.6978193146417446, "percentage": 13.67, "elapsed_time": "1:00:53", "remaining_time": "6:24:39", "throughput": 3279.54, "total_tokens": 11981792}
{"current_steps": 165, "total_steps": 1200, "loss": 0.3453, "lr": 4.7703579345627035e-05, "epoch": 2.7144340602284527, "percentage": 13.75, "elapsed_time": "1:01:11", "remaining_time": "6:23:53", "throughput": 3280.01, "total_tokens": 12044024}
{"current_steps": 166, "total_steps": 1200, "loss": 0.3036, "lr": 4.7676100357286624e-05, "epoch": 2.7310488058151607, "percentage": 13.83, "elapsed_time": "1:01:26", "remaining_time": "6:22:42", "throughput": 3280.57, "total_tokens": 12093424}
{"current_steps": 167, "total_steps": 1200, "loss": 0.2523, "lr": 4.76484659494934e-05, "epoch": 2.7476635514018692, "percentage": 13.92, "elapsed_time": "1:01:47", "remaining_time": "6:22:16", "throughput": 3281.5, "total_tokens": 12167792}
{"current_steps": 168, "total_steps": 1200, "loss": 0.2791, "lr": 4.762067631165049e-05, "epoch": 2.7642782969885773, "percentage": 14.0, "elapsed_time": "1:02:05", "remaining_time": "6:21:23", "throughput": 3284.08, "total_tokens": 12233712}
{"current_steps": 169, "total_steps": 1200, "loss": 0.2291, "lr": 4.7592731634224966e-05, "epoch": 2.7808930425752854, "percentage": 14.08, "elapsed_time": "1:02:33", "remaining_time": "6:21:36", "throughput": 3280.11, "total_tokens": 12310544}
{"current_steps": 170, "total_steps": 1200, "loss": 0.2628, "lr": 4.756463210874652e-05, "epoch": 2.797507788161994, "percentage": 14.17, "elapsed_time": "1:03:02", "remaining_time": "6:21:57", "throughput": 3278.33, "total_tokens": 12400160}
{"current_steps": 171, "total_steps": 1200, "loss": 0.2824, "lr": 4.753637792780614e-05, "epoch": 2.814122533748702, "percentage": 14.25, "elapsed_time": "1:03:25", "remaining_time": "6:21:37", "throughput": 3279.93, "total_tokens": 12480432}
{"current_steps": 172, "total_steps": 1200, "loss": 0.2663, "lr": 4.7507969285054845e-05, "epoch": 2.83073727933541, "percentage": 14.33, "elapsed_time": "1:03:50", "remaining_time": "6:21:32", "throughput": 3281.25, "total_tokens": 12568064}
{"current_steps": 173, "total_steps": 1200, "loss": 0.28, "lr": 4.7479406375202264e-05, "epoch": 2.8473520249221185, "percentage": 14.42, "elapsed_time": "1:04:14", "remaining_time": "6:21:23", "throughput": 3281.04, "total_tokens": 12647400}
{"current_steps": 174, "total_steps": 1200, "loss": 0.2424, "lr": 4.745068939401539e-05, "epoch": 2.8639667705088265, "percentage": 14.5, "elapsed_time": "1:04:27", "remaining_time": "6:20:03", "throughput": 3283.58, "total_tokens": 12698208}
{"current_steps": 175, "total_steps": 1200, "loss": 0.2518, "lr": 4.742181853831721e-05, "epoch": 2.8805815160955346, "percentage": 14.58, "elapsed_time": "1:04:41", "remaining_time": "6:18:55", "throughput": 3286.81, "total_tokens": 12758528}
{"current_steps": 176, "total_steps": 1200, "loss": 0.259, "lr": 4.7392794005985326e-05, "epoch": 2.897196261682243, "percentage": 14.67, "elapsed_time": "1:05:09", "remaining_time": "6:19:06", "throughput": 3283.58, "total_tokens": 12837264}
{"current_steps": 177, "total_steps": 1200, "loss": 0.247, "lr": 4.7363615995950626e-05, "epoch": 2.913811007268951, "percentage": 14.75, "elapsed_time": "1:05:29", "remaining_time": "6:18:33", "throughput": 3283.15, "total_tokens": 12902368}
{"current_steps": 178, "total_steps": 1200, "loss": 0.2438, "lr": 4.733428470819594e-05, "epoch": 2.930425752855659, "percentage": 14.83, "elapsed_time": "1:05:51", "remaining_time": "6:18:09", "throughput": 3283.07, "total_tokens": 12974296}
{"current_steps": 179, "total_steps": 1200, "loss": 0.2708, "lr": 4.730480034375462e-05, "epoch": 2.9470404984423677, "percentage": 14.92, "elapsed_time": "1:06:18", "remaining_time": "6:18:10", "throughput": 3282.33, "total_tokens": 13057280}
{"current_steps": 180, "total_steps": 1200, "loss": 0.3171, "lr": 4.72751631047092e-05, "epoch": 2.9636552440290758, "percentage": 15.0, "elapsed_time": "1:06:49", "remaining_time": "6:18:40", "throughput": 3281.82, "total_tokens": 13158232}
{"current_steps": 181, "total_steps": 1200, "loss": 0.24, "lr": 4.7245373194189994e-05, "epoch": 2.980269989615784, "percentage": 15.08, "elapsed_time": "1:07:11", "remaining_time": "6:18:14", "throughput": 3281.98, "total_tokens": 13229840}
{"current_steps": 182, "total_steps": 1200, "loss": 0.2674, "lr": 4.7215430816373726e-05, "epoch": 2.9968847352024923, "percentage": 15.17, "elapsed_time": "1:07:29", "remaining_time": "6:17:29", "throughput": 3283.62, "total_tokens": 13296520}
{"current_steps": 183, "total_steps": 1200, "loss": 0.1677, "lr": 4.718533617648209e-05, "epoch": 3.0, "percentage": 15.25, "elapsed_time": "1:07:33", "remaining_time": "6:15:25", "throughput": 3283.73, "total_tokens": 13309672}
{"current_steps": 184, "total_steps": 1200, "loss": 0.2272, "lr": 4.715508948078037e-05, "epoch": 3.016614745586708, "percentage": 15.33, "elapsed_time": "1:07:49", "remaining_time": "6:14:30", "throughput": 3285.86, "total_tokens": 13371544}
{"current_steps": 185, "total_steps": 1200, "loss": 0.2133, "lr": 4.712469093657605e-05, "epoch": 3.0332294911734166, "percentage": 15.42, "elapsed_time": "1:08:06", "remaining_time": "6:13:39", "throughput": 3287.26, "total_tokens": 13432984}
{"current_steps": 186, "total_steps": 1200, "loss": 0.2695, "lr": 4.709414075221734e-05, "epoch": 3.0498442367601246, "percentage": 15.5, "elapsed_time": "1:08:26", "remaining_time": "6:13:08", "throughput": 3287.24, "total_tokens": 13500016}
{"current_steps": 187, "total_steps": 1200, "loss": 0.2524, "lr": 4.706343913709178e-05, "epoch": 3.0664589823468327, "percentage": 15.58, "elapsed_time": "1:08:49", "remaining_time": "6:12:51", "throughput": 3288.16, "total_tokens": 13579672}
{"current_steps": 188, "total_steps": 1200, "loss": 0.2396, "lr": 4.70325863016248e-05, "epoch": 3.083073727933541, "percentage": 15.67, "elapsed_time": "1:09:04", "remaining_time": "6:11:48", "throughput": 3289.01, "total_tokens": 13630704}
{"current_steps": 189, "total_steps": 1200, "loss": 0.3057, "lr": 4.7001582457278304e-05, "epoch": 3.0996884735202492, "percentage": 15.75, "elapsed_time": "1:09:24", "remaining_time": "6:11:18", "throughput": 3288.33, "total_tokens": 13695472}
{"current_steps": 190, "total_steps": 1200, "loss": 0.2436, "lr": 4.697042781654913e-05, "epoch": 3.1163032191069573, "percentage": 15.83, "elapsed_time": "1:09:47", "remaining_time": "6:11:01", "throughput": 3287.65, "total_tokens": 13767792}
{"current_steps": 191, "total_steps": 1200, "loss": 0.2974, "lr": 4.693912259296773e-05, "epoch": 3.132917964693666, "percentage": 15.92, "elapsed_time": "1:10:13", "remaining_time": "6:10:56", "throughput": 3289.11, "total_tokens": 13857352}
{"current_steps": 192, "total_steps": 1200, "loss": 0.2457, "lr": 4.690766700109659e-05, "epoch": 3.149532710280374, "percentage": 16.0, "elapsed_time": "1:10:38", "remaining_time": "6:10:54", "throughput": 3288.52, "total_tokens": 13939928}
{"current_steps": 193, "total_steps": 1200, "loss": 0.2205, "lr": 4.687606125652882e-05, "epoch": 3.166147455867082, "percentage": 16.08, "elapsed_time": "1:11:06", "remaining_time": "6:11:00", "throughput": 3285.63, "total_tokens": 14017936}
{"current_steps": 194, "total_steps": 1200, "loss": 0.2116, "lr": 4.684430557588664e-05, "epoch": 3.1827622014537904, "percentage": 16.17, "elapsed_time": "1:11:22", "remaining_time": "6:10:08", "throughput": 3286.25, "total_tokens": 14074176}
{"current_steps": 195, "total_steps": 1200, "loss": 0.2839, "lr": 4.681240017681993e-05, "epoch": 3.1993769470404985, "percentage": 16.25, "elapsed_time": "1:11:51", "remaining_time": "6:10:20", "throughput": 3286.1, "total_tokens": 14167656}
{"current_steps": 196, "total_steps": 1200, "loss": 0.2115, "lr": 4.678034527800474e-05, "epoch": 3.2159916926272065, "percentage": 16.33, "elapsed_time": "1:12:15", "remaining_time": "6:10:07", "throughput": 3283.63, "total_tokens": 14235800}
{"current_steps": 197, "total_steps": 1200, "loss": 0.1982, "lr": 4.674814109914174e-05, "epoch": 3.232606438213915, "percentage": 16.42, "elapsed_time": "1:12:35", "remaining_time": "6:09:36", "throughput": 3283.37, "total_tokens": 14301272}
{"current_steps": 198, "total_steps": 1200, "loss": 0.2494, "lr": 4.671578786095478e-05, "epoch": 3.249221183800623, "percentage": 16.5, "elapsed_time": "1:12:47", "remaining_time": "6:08:20", "throughput": 3285.29, "total_tokens": 14347352}
{"current_steps": 199, "total_steps": 1200, "loss": 0.3186, "lr": 4.668328578518933e-05, "epoch": 3.265835929387331, "percentage": 16.58, "elapsed_time": "1:13:15", "remaining_time": "6:08:30", "throughput": 3283.91, "total_tokens": 14434600}
{"current_steps": 200, "total_steps": 1200, "loss": 0.2361, "lr": 4.665063509461097e-05, "epoch": 3.2824506749740396, "percentage": 16.67, "elapsed_time": "1:13:28", "remaining_time": "6:07:21", "throughput": 3285.72, "total_tokens": 14484104}
{"current_steps": 201, "total_steps": 1200, "loss": 0.2457, "lr": 4.661783601300388e-05, "epoch": 3.2990654205607477, "percentage": 16.75, "elapsed_time": "1:13:56", "remaining_time": "6:07:30", "throughput": 3283.39, "total_tokens": 14567152}
{"current_steps": 202, "total_steps": 1200, "loss": 0.2885, "lr": 4.6584888765169296e-05, "epoch": 3.3156801661474558, "percentage": 16.83, "elapsed_time": "1:14:19", "remaining_time": "6:07:10", "throughput": 3284.71, "total_tokens": 14647040}
{"current_steps": 203, "total_steps": 1200, "loss": 0.259, "lr": 4.6551793576923964e-05, "epoch": 3.3322949117341643, "percentage": 16.92, "elapsed_time": "1:14:46", "remaining_time": "6:07:16", "throughput": 3284.76, "total_tokens": 14738216}
{"current_steps": 204, "total_steps": 1200, "loss": 0.2102, "lr": 4.65185506750986e-05, "epoch": 3.3489096573208723, "percentage": 17.0, "elapsed_time": "1:15:07", "remaining_time": "6:06:48", "throughput": 3285.68, "total_tokens": 14811216}
{"current_steps": 205, "total_steps": 1200, "loss": 0.1858, "lr": 4.648516028753632e-05, "epoch": 3.3655244029075804, "percentage": 17.08, "elapsed_time": "1:15:32", "remaining_time": "6:06:38", "throughput": 3284.39, "total_tokens": 14885992}
{"current_steps": 206, "total_steps": 1200, "loss": 0.272, "lr": 4.645162264309112e-05, "epoch": 3.382139148494289, "percentage": 17.17, "elapsed_time": "1:15:57", "remaining_time": "6:06:30", "throughput": 3282.97, "total_tokens": 14961984}
{"current_steps": 207, "total_steps": 1200, "loss": 0.2036, "lr": 4.6417937971626245e-05, "epoch": 3.398753894080997, "percentage": 17.25, "elapsed_time": "1:16:14", "remaining_time": "6:05:43", "throughput": 3283.85, "total_tokens": 15021240}
{"current_steps": 208, "total_steps": 1200, "loss": 0.2011, "lr": 4.638410650401267e-05, "epoch": 3.415368639667705, "percentage": 17.33, "elapsed_time": "1:16:34", "remaining_time": "6:05:12", "throughput": 3284.72, "total_tokens": 15092016}
{"current_steps": 209, "total_steps": 1200, "loss": 0.2007, "lr": 4.635012847212748e-05, "epoch": 3.431983385254413, "percentage": 17.42, "elapsed_time": "1:17:05", "remaining_time": "6:05:33", "throughput": 3285.57, "total_tokens": 15198192}
{"current_steps": 210, "total_steps": 1200, "loss": 0.2139, "lr": 4.6316004108852305e-05, "epoch": 3.4485981308411215, "percentage": 17.5, "elapsed_time": "1:17:22", "remaining_time": "6:04:45", "throughput": 3286.73, "total_tokens": 15258432}
{"current_steps": 211, "total_steps": 1200, "loss": 0.2441, "lr": 4.628173364807171e-05, "epoch": 3.4652128764278296, "percentage": 17.58, "elapsed_time": "1:17:41", "remaining_time": "6:04:09", "throughput": 3288.54, "total_tokens": 15329600}
{"current_steps": 212, "total_steps": 1200, "loss": 0.2368, "lr": 4.6247317324671605e-05, "epoch": 3.4818276220145377, "percentage": 17.67, "elapsed_time": "1:18:04", "remaining_time": "6:03:53", "throughput": 3288.83, "total_tokens": 15407920}
{"current_steps": 213, "total_steps": 1200, "loss": 0.231, "lr": 4.6212755374537596e-05, "epoch": 3.498442367601246, "percentage": 17.75, "elapsed_time": "1:18:24", "remaining_time": "6:03:20", "throughput": 3290.3, "total_tokens": 15479640}
{"current_steps": 214, "total_steps": 1200, "loss": 0.2336, "lr": 4.617804803455344e-05, "epoch": 3.515057113187954, "percentage": 17.83, "elapsed_time": "1:18:50", "remaining_time": "6:03:15", "throughput": 3289.74, "total_tokens": 15561960}
{"current_steps": 215, "total_steps": 1200, "loss": 0.2638, "lr": 4.614319554259934e-05, "epoch": 3.5316718587746623, "percentage": 17.92, "elapsed_time": "1:19:11", "remaining_time": "6:02:47", "throughput": 3292.07, "total_tokens": 15641440}
{"current_steps": 216, "total_steps": 1200, "loss": 0.2646, "lr": 4.610819813755038e-05, "epoch": 3.5482866043613708, "percentage": 18.0, "elapsed_time": "1:19:40", "remaining_time": "6:02:57", "throughput": 3290.21, "total_tokens": 15728872}
{"current_steps": 217, "total_steps": 1200, "loss": 0.2211, "lr": 4.607305605927487e-05, "epoch": 3.564901349948079, "percentage": 18.08, "elapsed_time": "1:20:00", "remaining_time": "6:02:24", "throughput": 3291.13, "total_tokens": 15798112}
{"current_steps": 218, "total_steps": 1200, "loss": 0.2901, "lr": 4.6037769548632656e-05, "epoch": 3.581516095534787, "percentage": 18.17, "elapsed_time": "1:20:22", "remaining_time": "6:02:05", "throughput": 3289.66, "total_tokens": 15865936}
{"current_steps": 219, "total_steps": 1200, "loss": 0.2713, "lr": 4.600233884747355e-05, "epoch": 3.5981308411214954, "percentage": 18.25, "elapsed_time": "1:20:43", "remaining_time": "6:01:38", "throughput": 3291.02, "total_tokens": 15941368}
{"current_steps": 220, "total_steps": 1200, "loss": 0.2047, "lr": 4.5966764198635606e-05, "epoch": 3.6147455867082035, "percentage": 18.33, "elapsed_time": "1:21:14", "remaining_time": "6:01:55", "throughput": 3287.95, "total_tokens": 16028208}
{"current_steps": 221, "total_steps": 1200, "loss": 0.1872, "lr": 4.5931045845943474e-05, "epoch": 3.6313603322949115, "percentage": 18.42, "elapsed_time": "1:21:39", "remaining_time": "6:01:42", "throughput": 3287.21, "total_tokens": 16104408}
{"current_steps": 222, "total_steps": 1200, "loss": 0.3526, "lr": 4.5895184034206765e-05, "epoch": 3.64797507788162, "percentage": 18.5, "elapsed_time": "1:21:52", "remaining_time": "6:00:40", "throughput": 3289.02, "total_tokens": 16156800}
{"current_steps": 223, "total_steps": 1200, "loss": 0.2905, "lr": 4.585917900921829e-05, "epoch": 3.664589823468328, "percentage": 18.58, "elapsed_time": "1:22:26", "remaining_time": "6:01:10", "throughput": 3286.62, "total_tokens": 16256712}
{"current_steps": 224, "total_steps": 1200, "loss": 0.2014, "lr": 4.5823031017752485e-05, "epoch": 3.681204569055036, "percentage": 18.67, "elapsed_time": "1:22:50", "remaining_time": "6:00:57", "throughput": 3285.45, "total_tokens": 16330344}
{"current_steps": 225, "total_steps": 1200, "loss": 0.2178, "lr": 4.5786740307563636e-05, "epoch": 3.6978193146417446, "percentage": 18.75, "elapsed_time": "1:23:11", "remaining_time": "6:00:28", "throughput": 3285.76, "total_tokens": 16399792}
{"current_steps": 226, "total_steps": 1200, "loss": 0.2149, "lr": 4.575030712738419e-05, "epoch": 3.7144340602284527, "percentage": 18.83, "elapsed_time": "1:23:30", "remaining_time": "5:59:53", "throughput": 3286.4, "total_tokens": 16466368}
{"current_steps": 227, "total_steps": 1200, "loss": 0.2164, "lr": 4.571373172692309e-05, "epoch": 3.7310488058151607, "percentage": 18.92, "elapsed_time": "1:23:46", "remaining_time": "5:59:06", "throughput": 3288.54, "total_tokens": 16530976}
{"current_steps": 228, "total_steps": 1200, "loss": 0.2197, "lr": 4.567701435686404e-05, "epoch": 3.7476635514018692, "percentage": 19.0, "elapsed_time": "1:24:08", "remaining_time": "5:58:43", "throughput": 3287.98, "total_tokens": 16600216}
{"current_steps": 229, "total_steps": 1200, "loss": 0.2527, "lr": 4.5640155268863796e-05, "epoch": 3.7642782969885773, "percentage": 19.08, "elapsed_time": "1:24:29", "remaining_time": "5:58:17", "throughput": 3288.64, "total_tokens": 16673192}
{"current_steps": 230, "total_steps": 1200, "loss": 0.1974, "lr": 4.5603154715550386e-05, "epoch": 3.7808930425752854, "percentage": 19.17, "elapsed_time": "1:24:54", "remaining_time": "5:58:04", "throughput": 3285.96, "total_tokens": 16739912}
{"current_steps": 231, "total_steps": 1200, "loss": 0.2098, "lr": 4.55660129505215e-05, "epoch": 3.797507788161994, "percentage": 19.25, "elapsed_time": "1:25:21", "remaining_time": "5:58:04", "throughput": 3286.93, "total_tokens": 16834632}
{"current_steps": 232, "total_steps": 1200, "loss": 0.2109, "lr": 4.5528730228342605e-05, "epoch": 3.814122533748702, "percentage": 19.33, "elapsed_time": "1:25:45", "remaining_time": "5:57:47", "throughput": 3287.49, "total_tokens": 16914728}
{"current_steps": 233, "total_steps": 1200, "loss": 0.2492, "lr": 4.549130680454532e-05, "epoch": 3.83073727933541, "percentage": 19.42, "elapsed_time": "1:26:18", "remaining_time": "5:58:10", "throughput": 3285.81, "total_tokens": 17014304}
{"current_steps": 234, "total_steps": 1200, "loss": 0.2415, "lr": 4.545374293562559e-05, "epoch": 3.8473520249221185, "percentage": 19.5, "elapsed_time": "1:26:46", "remaining_time": "5:58:13", "throughput": 3285.59, "total_tokens": 17106664}
{"current_steps": 235, "total_steps": 1200, "loss": 0.2311, "lr": 4.541603887904198e-05, "epoch": 3.8639667705088265, "percentage": 19.58, "elapsed_time": "1:27:15", "remaining_time": "5:58:17", "throughput": 3284.34, "total_tokens": 17193744}
{"current_steps": 236, "total_steps": 1200, "loss": 0.2309, "lr": 4.537819489321386e-05, "epoch": 3.8805815160955346, "percentage": 19.67, "elapsed_time": "1:27:31", "remaining_time": "5:57:30", "throughput": 3285.79, "total_tokens": 17254656}
{"current_steps": 237, "total_steps": 1200, "loss": 0.2334, "lr": 4.534021123751968e-05, "epoch": 3.897196261682243, "percentage": 19.75, "elapsed_time": "1:27:50", "remaining_time": "5:56:55", "throughput": 3287.29, "total_tokens": 17325896}
{"current_steps": 238, "total_steps": 1200, "loss": 0.2598, "lr": 4.5302088172295156e-05, "epoch": 3.913811007268951, "percentage": 19.83, "elapsed_time": "1:28:10", "remaining_time": "5:56:23", "throughput": 3287.91, "total_tokens": 17394424}
{"current_steps": 239, "total_steps": 1200, "loss": 0.2132, "lr": 4.526382595883152e-05, "epoch": 3.930425752855659, "percentage": 19.92, "elapsed_time": "1:28:26", "remaining_time": "5:55:36", "throughput": 3289.76, "total_tokens": 17456352}
{"current_steps": 240, "total_steps": 1200, "loss": 0.2139, "lr": 4.522542485937369e-05, "epoch": 3.9470404984423677, "percentage": 20.0, "elapsed_time": "1:28:49", "remaining_time": "5:55:16", "throughput": 3287.48, "total_tokens": 17519168}
{"current_steps": 241, "total_steps": 1200, "loss": 0.2358, "lr": 4.51868851371185e-05, "epoch": 3.9636552440290758, "percentage": 20.08, "elapsed_time": "1:29:08", "remaining_time": "5:54:44", "throughput": 3287.59, "total_tokens": 17585144}
{"current_steps": 242, "total_steps": 1200, "loss": 0.1937, "lr": 4.5148207056212896e-05, "epoch": 3.980269989615784, "percentage": 20.17, "elapsed_time": "1:29:30", "remaining_time": "5:54:18", "throughput": 3288.88, "total_tokens": 17662024}
{"current_steps": 243, "total_steps": 1200, "loss": 0.222, "lr": 4.5109390881752114e-05, "epoch": 3.9968847352024923, "percentage": 20.25, "elapsed_time": "1:29:47", "remaining_time": "5:53:38", "throughput": 3289.67, "total_tokens": 17724360}
{"current_steps": 244, "total_steps": 1200, "loss": 0.2036, "lr": 4.5070436879777865e-05, "epoch": 4.0, "percentage": 20.33, "elapsed_time": "1:29:54", "remaining_time": "5:52:14", "throughput": 3289.87, "total_tokens": 17746200}
{"current_steps": 245, "total_steps": 1200, "loss": 0.189, "lr": 4.503134531727652e-05, "epoch": 4.0166147455867085, "percentage": 20.42, "elapsed_time": "1:30:22", "remaining_time": "5:52:15", "throughput": 3288.52, "total_tokens": 17830760}
{"current_steps": 246, "total_steps": 1200, "loss": 0.2027, "lr": 4.499211646217727e-05, "epoch": 4.033229491173416, "percentage": 20.5, "elapsed_time": "1:30:42", "remaining_time": "5:51:45", "throughput": 3289.82, "total_tokens": 17903840}
{"current_steps": 247, "total_steps": 1200, "loss": 0.2018, "lr": 4.495275058335029e-05, "epoch": 4.049844236760125, "percentage": 20.58, "elapsed_time": "1:31:10", "remaining_time": "5:51:47", "throughput": 3288.5, "total_tokens": 17990448}
{"current_steps": 248, "total_steps": 1200, "loss": 0.2182, "lr": 4.491324795060491e-05, "epoch": 4.066458982346833, "percentage": 20.67, "elapsed_time": "1:31:33", "remaining_time": "5:51:29", "throughput": 3289.02, "total_tokens": 18069520}
{"current_steps": 249, "total_steps": 1200, "loss": 0.1997, "lr": 4.487360883468775e-05, "epoch": 4.083073727933541, "percentage": 20.75, "elapsed_time": "1:31:50", "remaining_time": "5:50:46", "throughput": 3289.83, "total_tokens": 18129128}
{"current_steps": 250, "total_steps": 1200, "loss": 0.2237, "lr": 4.4833833507280884e-05, "epoch": 4.099688473520249, "percentage": 20.83, "elapsed_time": "1:32:10", "remaining_time": "5:50:15", "throughput": 3291.39, "total_tokens": 18202472}
{"current_steps": 251, "total_steps": 1200, "loss": 0.2012, "lr": 4.4793922240999933e-05, "epoch": 4.116303219106958, "percentage": 20.92, "elapsed_time": "1:32:30", "remaining_time": "5:49:44", "throughput": 3291.27, "total_tokens": 18267232}
{"current_steps": 252, "total_steps": 1200, "loss": 0.2189, "lr": 4.4753875309392266e-05, "epoch": 4.132917964693665, "percentage": 21.0, "elapsed_time": "1:32:47", "remaining_time": "5:49:05", "throughput": 3291.22, "total_tokens": 18325216}
{"current_steps": 253, "total_steps": 1200, "loss": 0.2333, "lr": 4.471369298693505e-05, "epoch": 4.149532710280374, "percentage": 21.08, "elapsed_time": "1:33:14", "remaining_time": "5:49:01", "throughput": 3289.99, "total_tokens": 18406184}
{"current_steps": 254, "total_steps": 1200, "loss": 0.191, "lr": 4.467337554903344e-05, "epoch": 4.166147455867082, "percentage": 21.17, "elapsed_time": "1:33:35", "remaining_time": "5:48:35", "throughput": 3290.94, "total_tokens": 18481056}
{"current_steps": 255, "total_steps": 1200, "loss": 0.1707, "lr": 4.463292327201862e-05, "epoch": 4.18276220145379, "percentage": 21.25, "elapsed_time": "1:33:57", "remaining_time": "5:48:13", "throughput": 3291.12, "total_tokens": 18554864}
{"current_steps": 256, "total_steps": 1200, "loss": 0.2362, "lr": 4.4592336433146e-05, "epoch": 4.1993769470404985, "percentage": 21.33, "elapsed_time": "1:34:14", "remaining_time": "5:47:32", "throughput": 3291.38, "total_tokens": 18612120}
{"current_steps": 257, "total_steps": 1200, "loss": 0.2385, "lr": 4.4551615310593195e-05, "epoch": 4.215991692627207, "percentage": 21.42, "elapsed_time": "1:34:48", "remaining_time": "5:47:53", "throughput": 3288.97, "total_tokens": 18710408}
{"current_steps": 258, "total_steps": 1200, "loss": 0.2154, "lr": 4.451076018345825e-05, "epoch": 4.232606438213915, "percentage": 21.5, "elapsed_time": "1:35:04", "remaining_time": "5:47:08", "throughput": 3290.21, "total_tokens": 18769400}
{"current_steps": 259, "total_steps": 1200, "loss": 0.2021, "lr": 4.4469771331757604e-05, "epoch": 4.249221183800623, "percentage": 21.58, "elapsed_time": "1:35:28", "remaining_time": "5:46:51", "throughput": 3290.71, "total_tokens": 18849704}
{"current_steps": 260, "total_steps": 1200, "loss": 0.2245, "lr": 4.442864903642428e-05, "epoch": 4.265835929387332, "percentage": 21.67, "elapsed_time": "1:36:01", "remaining_time": "5:47:10", "throughput": 3287.86, "total_tokens": 18943328}
{"current_steps": 261, "total_steps": 1200, "loss": 0.2279, "lr": 4.4387393579305865e-05, "epoch": 4.282450674974039, "percentage": 21.75, "elapsed_time": "1:36:25", "remaining_time": "5:46:53", "throughput": 3288.16, "total_tokens": 19022536}
{"current_steps": 262, "total_steps": 1200, "loss": 0.1913, "lr": 4.434600524316266e-05, "epoch": 4.299065420560748, "percentage": 21.83, "elapsed_time": "1:36:46", "remaining_time": "5:46:28", "throughput": 3287.55, "total_tokens": 19089200}
{"current_steps": 263, "total_steps": 1200, "loss": 0.3056, "lr": 4.430448431166567e-05, "epoch": 4.315680166147456, "percentage": 21.92, "elapsed_time": "1:37:11", "remaining_time": "5:46:16", "throughput": 3287.48, "total_tokens": 19171216}
{"current_steps": 264, "total_steps": 1200, "loss": 0.1719, "lr": 4.426283106939474e-05, "epoch": 4.332294911734164, "percentage": 22.0, "elapsed_time": "1:37:44", "remaining_time": "5:46:33", "throughput": 3285.94, "total_tokens": 19271872}
{"current_steps": 265, "total_steps": 1200, "loss": 0.2856, "lr": 4.4221045801836494e-05, "epoch": 4.348909657320872, "percentage": 22.08, "elapsed_time": "1:38:05", "remaining_time": "5:46:06", "throughput": 3286.5, "total_tokens": 19342984}
{"current_steps": 266, "total_steps": 1200, "loss": 0.2079, "lr": 4.41791287953825e-05, "epoch": 4.365524402907581, "percentage": 22.17, "elapsed_time": "1:38:17", "remaining_time": "5:45:06", "throughput": 3288.32, "total_tokens": 19391640}
{"current_steps": 267, "total_steps": 1200, "loss": 0.2321, "lr": 4.4137080337327205e-05, "epoch": 4.382139148494288, "percentage": 22.25, "elapsed_time": "1:38:38", "remaining_time": "5:44:42", "throughput": 3288.44, "total_tokens": 19463232}
{"current_steps": 268, "total_steps": 1200, "loss": 0.2147, "lr": 4.4094900715866064e-05, "epoch": 4.398753894080997, "percentage": 22.33, "elapsed_time": "1:38:55", "remaining_time": "5:44:00", "throughput": 3289.5, "total_tokens": 19523728}
{"current_steps": 269, "total_steps": 1200, "loss": 0.2283, "lr": 4.4052590220093446e-05, "epoch": 4.415368639667705, "percentage": 22.42, "elapsed_time": "1:39:16", "remaining_time": "5:43:36", "throughput": 3290.15, "total_tokens": 19598960}
{"current_steps": 270, "total_steps": 1200, "loss": 0.2041, "lr": 4.401014914000078e-05, "epoch": 4.431983385254413, "percentage": 22.5, "elapsed_time": "1:39:36", "remaining_time": "5:43:06", "throughput": 3290.44, "total_tokens": 19666136}
{"current_steps": 271, "total_steps": 1200, "loss": 0.21, "lr": 4.3967577766474455e-05, "epoch": 4.4485981308411215, "percentage": 22.58, "elapsed_time": "1:39:53", "remaining_time": "5:42:26", "throughput": 3291.63, "total_tokens": 19728600}
{"current_steps": 272, "total_steps": 1200, "loss": 0.2471, "lr": 4.3924876391293915e-05, "epoch": 4.46521287642783, "percentage": 22.67, "elapsed_time": "1:40:18", "remaining_time": "5:42:12", "throughput": 3290.21, "total_tokens": 19801032}
{"current_steps": 273, "total_steps": 1200, "loss": 0.2173, "lr": 4.3882045307129594e-05, "epoch": 4.481827622014538, "percentage": 22.75, "elapsed_time": "1:40:42", "remaining_time": "5:41:57", "throughput": 3290.98, "total_tokens": 19885496}
{"current_steps": 274, "total_steps": 1200, "loss": 0.2104, "lr": 4.383908480754095e-05, "epoch": 4.498442367601246, "percentage": 22.83, "elapsed_time": "1:41:01", "remaining_time": "5:41:25", "throughput": 3291.52, "total_tokens": 19952072}
{"current_steps": 275, "total_steps": 1200, "loss": 0.1908, "lr": 4.379599518697444e-05, "epoch": 4.515057113187955, "percentage": 22.92, "elapsed_time": "1:41:21", "remaining_time": "5:40:56", "throughput": 3292.89, "total_tokens": 20026536}
{"current_steps": 276, "total_steps": 1200, "loss": 0.1778, "lr": 4.375277674076149e-05, "epoch": 4.531671858774662, "percentage": 23.0, "elapsed_time": "1:41:36", "remaining_time": "5:40:08", "throughput": 3293.75, "total_tokens": 20079112}
{"current_steps": 277, "total_steps": 1200, "loss": 0.302, "lr": 4.3709429765116504e-05, "epoch": 4.548286604361371, "percentage": 23.08, "elapsed_time": "1:41:54", "remaining_time": "5:39:35", "throughput": 3294.35, "total_tokens": 20144264}
{"current_steps": 278, "total_steps": 1200, "loss": 0.2113, "lr": 4.366595455713479e-05, "epoch": 4.564901349948079, "percentage": 23.17, "elapsed_time": "1:42:11", "remaining_time": "5:38:54", "throughput": 3295.88, "total_tokens": 20207568}
{"current_steps": 279, "total_steps": 1200, "loss": 0.2519, "lr": 4.3622351414790554e-05, "epoch": 4.581516095534787, "percentage": 23.25, "elapsed_time": "1:42:35", "remaining_time": "5:38:39", "throughput": 3296.61, "total_tokens": 20292376}
{"current_steps": 280, "total_steps": 1200, "loss": 0.1628, "lr": 4.357862063693486e-05, "epoch": 4.598130841121495, "percentage": 23.33, "elapsed_time": "1:43:11", "remaining_time": "5:39:02", "throughput": 3292.19, "total_tokens": 20383048}
{"current_steps": 281, "total_steps": 1200, "loss": 0.1923, "lr": 4.353476252329356e-05, "epoch": 4.614745586708204, "percentage": 23.42, "elapsed_time": "1:43:33", "remaining_time": "5:38:41", "throughput": 3293.23, "total_tokens": 20463376}
{"current_steps": 282, "total_steps": 1200, "loss": 0.1745, "lr": 4.349077737446525e-05, "epoch": 4.6313603322949115, "percentage": 23.5, "elapsed_time": "1:43:55", "remaining_time": "5:38:17", "throughput": 3293.8, "total_tokens": 20537808}
{"current_steps": 283, "total_steps": 1200, "loss": 0.207, "lr": 4.344666549191921e-05, "epoch": 4.64797507788162, "percentage": 23.58, "elapsed_time": "1:44:13", "remaining_time": "5:37:44", "throughput": 3294.79, "total_tokens": 20605496}
{"current_steps": 284, "total_steps": 1200, "loss": 0.2412, "lr": 4.3402427177993366e-05, "epoch": 4.6645898234683285, "percentage": 23.67, "elapsed_time": "1:44:45", "remaining_time": "5:37:53", "throughput": 3291.87, "total_tokens": 20692096}
{"current_steps": 285, "total_steps": 1200, "loss": 0.2219, "lr": 4.335806273589214e-05, "epoch": 4.681204569055036, "percentage": 23.75, "elapsed_time": "1:45:08", "remaining_time": "5:37:32", "throughput": 3291.42, "total_tokens": 20762800}
{"current_steps": 286, "total_steps": 1200, "loss": 0.1831, "lr": 4.3313572469684474e-05, "epoch": 4.697819314641745, "percentage": 23.83, "elapsed_time": "1:45:28", "remaining_time": "5:37:03", "throughput": 3291.86, "total_tokens": 20831584}
{"current_steps": 287, "total_steps": 1200, "loss": 0.1457, "lr": 4.326895668430166e-05, "epoch": 4.714434060228453, "percentage": 23.92, "elapsed_time": "1:45:51", "remaining_time": "5:36:46", "throughput": 3289.91, "total_tokens": 20897320}
{"current_steps": 288, "total_steps": 1200, "loss": 0.193, "lr": 4.3224215685535294e-05, "epoch": 4.731048805815161, "percentage": 24.0, "elapsed_time": "1:46:11", "remaining_time": "5:36:15", "throughput": 3290.69, "total_tokens": 20966136}
{"current_steps": 289, "total_steps": 1200, "loss": 0.1868, "lr": 4.317934978003517e-05, "epoch": 4.747663551401869, "percentage": 24.08, "elapsed_time": "1:46:35", "remaining_time": "5:35:58", "throughput": 3289.22, "total_tokens": 21034800}
{"current_steps": 290, "total_steps": 1200, "loss": 0.2251, "lr": 4.313435927530719e-05, "epoch": 4.764278296988578, "percentage": 24.17, "elapsed_time": "1:46:54", "remaining_time": "5:35:29", "throughput": 3289.09, "total_tokens": 21098672}
{"current_steps": 291, "total_steps": 1200, "loss": 0.1853, "lr": 4.3089244479711236e-05, "epoch": 4.780893042575285, "percentage": 24.25, "elapsed_time": "1:47:22", "remaining_time": "5:35:24", "throughput": 3287.22, "total_tokens": 21177632}
{"current_steps": 292, "total_steps": 1200, "loss": 0.2135, "lr": 4.304400570245906e-05, "epoch": 4.797507788161994, "percentage": 24.33, "elapsed_time": "1:47:39", "remaining_time": "5:34:45", "throughput": 3288.54, "total_tokens": 21240896}
{"current_steps": 293, "total_steps": 1200, "loss": 0.177, "lr": 4.299864325361217e-05, "epoch": 4.814122533748702, "percentage": 24.42, "elapsed_time": "1:48:06", "remaining_time": "5:34:40", "throughput": 3287.12, "total_tokens": 21322984}
{"current_steps": 294, "total_steps": 1200, "loss": 0.1877, "lr": 4.295315744407972e-05, "epoch": 4.83073727933541, "percentage": 24.5, "elapsed_time": "1:48:26", "remaining_time": "5:34:10", "throughput": 3287.35, "total_tokens": 21389128}
{"current_steps": 295, "total_steps": 1200, "loss": 0.2124, "lr": 4.290754858561637e-05, "epoch": 4.8473520249221185, "percentage": 24.58, "elapsed_time": "1:48:50", "remaining_time": "5:33:55", "throughput": 3287.43, "total_tokens": 21469912}
{"current_steps": 296, "total_steps": 1200, "loss": 0.1833, "lr": 4.2861816990820084e-05, "epoch": 4.863966770508826, "percentage": 24.67, "elapsed_time": "1:49:09", "remaining_time": "5:33:22", "throughput": 3288.87, "total_tokens": 21540320}
{"current_steps": 297, "total_steps": 1200, "loss": 0.2134, "lr": 4.281596297313013e-05, "epoch": 4.880581516095535, "percentage": 24.75, "elapsed_time": "1:49:34", "remaining_time": "5:33:08", "throughput": 3289.47, "total_tokens": 21626312}
{"current_steps": 298, "total_steps": 1200, "loss": 0.1912, "lr": 4.2769986846824815e-05, "epoch": 4.897196261682243, "percentage": 24.83, "elapsed_time": "1:49:55", "remaining_time": "5:32:44", "throughput": 3290.47, "total_tokens": 21702792}
{"current_steps": 299, "total_steps": 1200, "loss": 0.2051, "lr": 4.272388892701934e-05, "epoch": 4.913811007268951, "percentage": 24.92, "elapsed_time": "1:50:14", "remaining_time": "5:32:11", "throughput": 3291.67, "total_tokens": 21771880}
{"current_steps": 300, "total_steps": 1200, "loss": 0.1926, "lr": 4.267766952966369e-05, "epoch": 4.930425752855659, "percentage": 25.0, "elapsed_time": "1:50:35", "remaining_time": "5:31:45", "throughput": 3292.22, "total_tokens": 21844024}
{"current_steps": 301, "total_steps": 1200, "loss": 0.2039, "lr": 4.2631328971540444e-05, "epoch": 4.947040498442368, "percentage": 25.08, "elapsed_time": "1:51:01", "remaining_time": "5:31:34", "throughput": 3291.57, "total_tokens": 21925632}
{"current_steps": 302, "total_steps": 1200, "loss": 0.2077, "lr": 4.2584867570262597e-05, "epoch": 4.963655244029075, "percentage": 25.17, "elapsed_time": "1:51:14", "remaining_time": "5:30:47", "throughput": 3293.27, "total_tokens": 21981952}
{"current_steps": 303, "total_steps": 1200, "loss": 0.174, "lr": 4.25382856442714e-05, "epoch": 4.980269989615784, "percentage": 25.25, "elapsed_time": "1:51:46", "remaining_time": "5:30:52", "throughput": 3291.07, "total_tokens": 22070440}
{"current_steps": 304, "total_steps": 1200, "loss": 0.204, "lr": 4.249158351283414e-05, "epoch": 4.996884735202492, "percentage": 25.33, "elapsed_time": "1:52:19", "remaining_time": "5:31:03", "throughput": 3289.63, "total_tokens": 22170184}
{"current_steps": 305, "total_steps": 1200, "loss": 0.2849, "lr": 4.244476149604201e-05, "epoch": 5.0, "percentage": 25.42, "elapsed_time": "1:52:22", "remaining_time": "5:29:45", "throughput": 3289.83, "total_tokens": 22181856}
{"current_steps": 306, "total_steps": 1200, "loss": 0.1879, "lr": 4.2397819914807856e-05, "epoch": 5.0166147455867085, "percentage": 25.5, "elapsed_time": "1:52:43", "remaining_time": "5:29:20", "throughput": 3290.63, "total_tokens": 22256808}
{"current_steps": 307, "total_steps": 1200, "loss": 0.2158, "lr": 4.2350759090864046e-05, "epoch": 5.033229491173416, "percentage": 25.58, "elapsed_time": "1:53:02", "remaining_time": "5:28:50", "throughput": 3291.37, "total_tokens": 22325224}
{"current_steps": 308, "total_steps": 1200, "loss": 0.1685, "lr": 4.230357934676017e-05, "epoch": 5.049844236760125, "percentage": 25.67, "elapsed_time": "1:53:21", "remaining_time": "5:28:18", "throughput": 3291.74, "total_tokens": 22389624}
{"current_steps": 309, "total_steps": 1200, "loss": 0.2253, "lr": 4.225628100586093e-05, "epoch": 5.066458982346833, "percentage": 25.75, "elapsed_time": "1:53:48", "remaining_time": "5:28:09", "throughput": 3289.83, "total_tokens": 22463872}
{"current_steps": 310, "total_steps": 1200, "loss": 0.1762, "lr": 4.220886439234385e-05, "epoch": 5.083073727933541, "percentage": 25.83, "elapsed_time": "1:54:01", "remaining_time": "5:27:22", "throughput": 3290.93, "total_tokens": 22515824}
{"current_steps": 311, "total_steps": 1200, "loss": 0.1772, "lr": 4.2161329831197095e-05, "epoch": 5.099688473520249, "percentage": 25.92, "elapsed_time": "1:54:29", "remaining_time": "5:27:16", "throughput": 3290.26, "total_tokens": 22602336}
{"current_steps": 312, "total_steps": 1200, "loss": 0.1729, "lr": 4.211367764821722e-05, "epoch": 5.116303219106958, "percentage": 26.0, "elapsed_time": "1:54:43", "remaining_time": "5:26:30", "throughput": 3291.46, "total_tokens": 22655176}
{"current_steps": 313, "total_steps": 1200, "loss": 0.1857, "lr": 4.2065908170006955e-05, "epoch": 5.132917964693665, "percentage": 26.08, "elapsed_time": "1:55:03", "remaining_time": "5:26:03", "throughput": 3292.34, "total_tokens": 22728680}
{"current_steps": 314, "total_steps": 1200, "loss": 0.176, "lr": 4.201802172397295e-05, "epoch": 5.149532710280374, "percentage": 26.17, "elapsed_time": "1:55:25", "remaining_time": "5:25:41", "throughput": 3293.22, "total_tokens": 22806784}
{"current_steps": 315, "total_steps": 1200, "loss": 0.1903, "lr": 4.197001863832355e-05, "epoch": 5.166147455867082, "percentage": 26.25, "elapsed_time": "1:55:46", "remaining_time": "5:25:16", "throughput": 3293.77, "total_tokens": 22880648}
{"current_steps": 316, "total_steps": 1200, "loss": 0.1706, "lr": 4.192189924206652e-05, "epoch": 5.18276220145379, "percentage": 26.33, "elapsed_time": "1:56:04", "remaining_time": "5:24:43", "throughput": 3295.63, "total_tokens": 22953184}
{"current_steps": 317, "total_steps": 1200, "loss": 0.2127, "lr": 4.187366386500683e-05, "epoch": 5.1993769470404985, "percentage": 26.42, "elapsed_time": "1:56:29", "remaining_time": "5:24:29", "throughput": 3295.86, "total_tokens": 23037392}
{"current_steps": 318, "total_steps": 1200, "loss": 0.293, "lr": 4.182531283774434e-05, "epoch": 5.215991692627207, "percentage": 26.5, "elapsed_time": "1:56:43", "remaining_time": "5:23:43", "throughput": 3296.66, "total_tokens": 23086552}
{"current_steps": 319, "total_steps": 1200, "loss": 0.1843, "lr": 4.177684649167158e-05, "epoch": 5.232606438213915, "percentage": 26.58, "elapsed_time": "1:56:59", "remaining_time": "5:23:05", "throughput": 3298.52, "total_tokens": 23153152}
{"current_steps": 320, "total_steps": 1200, "loss": 0.1945, "lr": 4.172826515897146e-05, "epoch": 5.249221183800623, "percentage": 26.67, "elapsed_time": "1:57:24", "remaining_time": "5:22:51", "throughput": 3299.28, "total_tokens": 23240928}
{"current_steps": 321, "total_steps": 1200, "loss": 0.1825, "lr": 4.1679569172614996e-05, "epoch": 5.265835929387332, "percentage": 26.75, "elapsed_time": "1:57:50", "remaining_time": "5:22:40", "throughput": 3299.25, "total_tokens": 23325912}
{"current_steps": 322, "total_steps": 1200, "loss": 0.2044, "lr": 4.163075886635902e-05, "epoch": 5.282450674974039, "percentage": 26.83, "elapsed_time": "1:58:10", "remaining_time": "5:22:14", "throughput": 3300.25, "total_tokens": 23401952}
{"current_steps": 323, "total_steps": 1200, "loss": 0.1718, "lr": 4.1581834574743915e-05, "epoch": 5.299065420560748, "percentage": 26.92, "elapsed_time": "1:58:26", "remaining_time": "5:21:35", "throughput": 3301.78, "total_tokens": 23463760}
{"current_steps": 324, "total_steps": 1200, "loss": 0.1768, "lr": 4.1532796633091296e-05, "epoch": 5.315680166147456, "percentage": 27.0, "elapsed_time": "1:58:47", "remaining_time": "5:21:10", "throughput": 3302.01, "total_tokens": 23535272}
{"current_steps": 325, "total_steps": 1200, "loss": 0.1609, "lr": 4.148364537750172e-05, "epoch": 5.332294911734164, "percentage": 27.08, "elapsed_time": "1:59:11", "remaining_time": "5:20:54", "throughput": 3300.97, "total_tokens": 23607752}
{"current_steps": 326, "total_steps": 1200, "loss": 0.1769, "lr": 4.14343811448524e-05, "epoch": 5.348909657320872, "percentage": 27.17, "elapsed_time": "1:59:28", "remaining_time": "5:20:18", "throughput": 3302.7, "total_tokens": 23674872}
{"current_steps": 327, "total_steps": 1200, "loss": 0.2087, "lr": 4.138500427279485e-05, "epoch": 5.365524402907581, "percentage": 27.25, "elapsed_time": "1:59:43", "remaining_time": "5:19:37", "throughput": 3304.35, "total_tokens": 23736384}
{"current_steps": 328, "total_steps": 1200, "loss": 0.162, "lr": 4.133551509975264e-05, "epoch": 5.382139148494288, "percentage": 27.33, "elapsed_time": "2:00:19", "remaining_time": "5:19:52", "throughput": 3301.57, "total_tokens": 23835000}
{"current_steps": 329, "total_steps": 1200, "loss": 0.1614, "lr": 4.128591396491901e-05, "epoch": 5.398753894080997, "percentage": 27.42, "elapsed_time": "2:00:44", "remaining_time": "5:19:40", "throughput": 3300.57, "total_tokens": 23912552}
{"current_steps": 330, "total_steps": 1200, "loss": 0.2, "lr": 4.123620120825459e-05, "epoch": 5.415368639667705, "percentage": 27.5, "elapsed_time": "2:01:08", "remaining_time": "5:19:21", "throughput": 3300.3, "total_tokens": 23987368}
{"current_steps": 331, "total_steps": 1200, "loss": 0.1719, "lr": 4.118637717048506e-05, "epoch": 5.431983385254413, "percentage": 27.58, "elapsed_time": "2:01:24", "remaining_time": "5:18:43", "throughput": 3301.79, "total_tokens": 24050848}
{"current_steps": 332, "total_steps": 1200, "loss": 0.1678, "lr": 4.113644219309877e-05, "epoch": 5.4485981308411215, "percentage": 27.67, "elapsed_time": "2:01:54", "remaining_time": "5:18:42", "throughput": 3301.25, "total_tokens": 24146104}
{"current_steps": 333, "total_steps": 1200, "loss": 0.176, "lr": 4.1086396618344476e-05, "epoch": 5.46521287642783, "percentage": 27.75, "elapsed_time": "2:02:06", "remaining_time": "5:17:54", "throughput": 3302.41, "total_tokens": 24194184}
{"current_steps": 334, "total_steps": 1200, "loss": 0.1844, "lr": 4.1036240789228954e-05, "epoch": 5.481827622014538, "percentage": 27.83, "elapsed_time": "2:02:33", "remaining_time": "5:17:45", "throughput": 3301.27, "total_tokens": 24275368}
{"current_steps": 335, "total_steps": 1200, "loss": 0.1901, "lr": 4.098597504951462e-05, "epoch": 5.498442367601246, "percentage": 27.92, "elapsed_time": "2:02:46", "remaining_time": "5:16:59", "throughput": 3302.9, "total_tokens": 24329192}
{"current_steps": 336, "total_steps": 1200, "loss": 0.1751, "lr": 4.093559974371725e-05, "epoch": 5.515057113187955, "percentage": 28.0, "elapsed_time": "2:03:11", "remaining_time": "5:16:47", "throughput": 3304.49, "total_tokens": 24426696}
{"current_steps": 337, "total_steps": 1200, "loss": 0.171, "lr": 4.088511521710352e-05, "epoch": 5.531671858774662, "percentage": 28.08, "elapsed_time": "2:03:38", "remaining_time": "5:16:37", "throughput": 3304.5, "total_tokens": 24514344}
{"current_steps": 338, "total_steps": 1200, "loss": 0.1766, "lr": 4.083452181568875e-05, "epoch": 5.548286604361371, "percentage": 28.17, "elapsed_time": "2:03:56", "remaining_time": "5:16:05", "throughput": 3305.8, "total_tokens": 24584464}
{"current_steps": 339, "total_steps": 1200, "loss": 0.1584, "lr": 4.0783819886234445e-05, "epoch": 5.564901349948079, "percentage": 28.25, "elapsed_time": "2:04:19", "remaining_time": "5:15:45", "throughput": 3306.04, "total_tokens": 24660600}
{"current_steps": 340, "total_steps": 1200, "loss": 0.1711, "lr": 4.073300977624594e-05, "epoch": 5.581516095534787, "percentage": 28.33, "elapsed_time": "2:04:34", "remaining_time": "5:15:05", "throughput": 3306.87, "total_tokens": 24717088}
{"current_steps": 341, "total_steps": 1200, "loss": 0.1798, "lr": 4.068209183397004e-05, "epoch": 5.598130841121495, "percentage": 28.42, "elapsed_time": "2:04:48", "remaining_time": "5:14:24", "throughput": 3308.34, "total_tokens": 24775352}
{"current_steps": 342, "total_steps": 1200, "loss": 0.1666, "lr": 4.063106640839264e-05, "epoch": 5.614745586708204, "percentage": 28.5, "elapsed_time": "2:05:19", "remaining_time": "5:14:25", "throughput": 3305.87, "total_tokens": 24860072}
{"current_steps": 343, "total_steps": 1200, "loss": 0.168, "lr": 4.057993384923626e-05, "epoch": 5.6313603322949115, "percentage": 28.58, "elapsed_time": "2:05:53", "remaining_time": "5:14:32", "throughput": 3302.82, "total_tokens": 24947856}
{"current_steps": 344, "total_steps": 1200, "loss": 0.1801, "lr": 4.052869450695776e-05, "epoch": 5.64797507788162, "percentage": 28.67, "elapsed_time": "2:06:19", "remaining_time": "5:14:21", "throughput": 3301.47, "total_tokens": 25024992}
{"current_steps": 345, "total_steps": 1200, "loss": 0.183, "lr": 4.047734873274586e-05, "epoch": 5.6645898234683285, "percentage": 28.75, "elapsed_time": "2:06:41", "remaining_time": "5:13:58", "throughput": 3301.02, "total_tokens": 25092248}
{"current_steps": 346, "total_steps": 1200, "loss": 0.1752, "lr": 4.042589687851872e-05, "epoch": 5.681204569055036, "percentage": 28.83, "elapsed_time": "2:07:06", "remaining_time": "5:13:43", "throughput": 3300.41, "total_tokens": 25170496}
{"current_steps": 347, "total_steps": 1200, "loss": 0.1798, "lr": 4.037433929692161e-05, "epoch": 5.697819314641745, "percentage": 28.92, "elapsed_time": "2:07:39", "remaining_time": "5:13:49", "throughput": 3298.94, "total_tokens": 25268720}
{"current_steps": 348, "total_steps": 1200, "loss": 0.1793, "lr": 4.0322676341324415e-05, "epoch": 5.714434060228453, "percentage": 29.0, "elapsed_time": "2:07:57", "remaining_time": "5:13:15", "throughput": 3299.77, "total_tokens": 25332688}
{"current_steps": 349, "total_steps": 1200, "loss": 0.2022, "lr": 4.027090836581925e-05, "epoch": 5.731048805815161, "percentage": 29.08, "elapsed_time": "2:08:21", "remaining_time": "5:13:00", "throughput": 3299.7, "total_tokens": 25413904}
{"current_steps": 350, "total_steps": 1200, "loss": 0.1848, "lr": 4.021903572521802e-05, "epoch": 5.747663551401869, "percentage": 29.17, "elapsed_time": "2:08:52", "remaining_time": "5:12:58", "throughput": 3298.34, "total_tokens": 25503720}
{"current_steps": 351, "total_steps": 1200, "loss": 0.1931, "lr": 4.0167058775049996e-05, "epoch": 5.764278296988578, "percentage": 29.25, "elapsed_time": "2:09:08", "remaining_time": "5:12:21", "throughput": 3299.9, "total_tokens": 25568560}
{"current_steps": 352, "total_steps": 1200, "loss": 0.1675, "lr": 4.011497787155938e-05, "epoch": 5.780893042575285, "percentage": 29.33, "elapsed_time": "2:09:28", "remaining_time": "5:11:55", "throughput": 3299.8, "total_tokens": 25635184}
{"current_steps": 353, "total_steps": 1200, "loss": 0.176, "lr": 4.006279337170283e-05, "epoch": 5.797507788161994, "percentage": 29.42, "elapsed_time": "2:09:56", "remaining_time": "5:11:46", "throughput": 3298.94, "total_tokens": 25719768}
{"current_steps": 354, "total_steps": 1200, "loss": 0.1705, "lr": 4.0010505633147106e-05, "epoch": 5.814122533748702, "percentage": 29.5, "elapsed_time": "2:10:18", "remaining_time": "5:11:25", "throughput": 3299.15, "total_tokens": 25795016}
{"current_steps": 355, "total_steps": 1200, "loss": 0.153, "lr": 3.995811501426648e-05, "epoch": 5.83073727933541, "percentage": 29.58, "elapsed_time": "2:10:40", "remaining_time": "5:11:02", "throughput": 3298.64, "total_tokens": 25863184}
{"current_steps": 356, "total_steps": 1200, "loss": 0.1795, "lr": 3.99056218741404e-05, "epoch": 5.8473520249221185, "percentage": 29.67, "elapsed_time": "2:10:59", "remaining_time": "5:10:33", "throughput": 3299.86, "total_tokens": 25935752}
{"current_steps": 357, "total_steps": 1200, "loss": 0.1744, "lr": 3.985302657255097e-05, "epoch": 5.863966770508826, "percentage": 29.75, "elapsed_time": "2:11:14", "remaining_time": "5:09:55", "throughput": 3301.05, "total_tokens": 25995760}
{"current_steps": 358, "total_steps": 1200, "loss": 0.1538, "lr": 3.980032946998049e-05, "epoch": 5.880581516095535, "percentage": 29.83, "elapsed_time": "2:11:35", "remaining_time": "5:09:28", "throughput": 3300.94, "total_tokens": 26061240}
{"current_steps": 359, "total_steps": 1200, "loss": 0.172, "lr": 3.974753092760901e-05, "epoch": 5.897196261682243, "percentage": 29.92, "elapsed_time": "2:11:54", "remaining_time": "5:09:00", "throughput": 3301.68, "total_tokens": 26131024}
{"current_steps": 360, "total_steps": 1200, "loss": 0.197, "lr": 3.969463130731183e-05, "epoch": 5.913811007268951, "percentage": 30.0, "elapsed_time": "2:12:30", "remaining_time": "5:09:10", "throughput": 3299.78, "total_tokens": 26233672}
{"current_steps": 361, "total_steps": 1200, "loss": 0.1359, "lr": 3.964163097165702e-05, "epoch": 5.930425752855659, "percentage": 30.08, "elapsed_time": "2:12:54", "remaining_time": "5:08:53", "throughput": 3298.48, "total_tokens": 26303488}
{"current_steps": 362, "total_steps": 1200, "loss": 0.1622, "lr": 3.958853028390294e-05, "epoch": 5.947040498442368, "percentage": 30.17, "elapsed_time": "2:13:17", "remaining_time": "5:08:34", "throughput": 3298.11, "total_tokens": 26377768}
{"current_steps": 363, "total_steps": 1200, "loss": 0.3039, "lr": 3.953532960799577e-05, "epoch": 5.963655244029075, "percentage": 30.25, "elapsed_time": "2:13:32", "remaining_time": "5:07:55", "throughput": 3299.22, "total_tokens": 26435984}
{"current_steps": 364, "total_steps": 1200, "loss": 0.185, "lr": 3.948202930856697e-05, "epoch": 5.980269989615784, "percentage": 30.33, "elapsed_time": "2:13:56", "remaining_time": "5:07:38", "throughput": 3299.05, "total_tokens": 26513960}
{"current_steps": 365, "total_steps": 1200, "loss": 0.1671, "lr": 3.942862975093085e-05, "epoch": 5.996884735202492, "percentage": 30.42, "elapsed_time": "2:14:22", "remaining_time": "5:07:23", "throughput": 3299.23, "total_tokens": 26599064}
{"current_steps": 366, "total_steps": 1200, "loss": 0.2127, "lr": 3.937513130108197e-05, "epoch": 6.0, "percentage": 30.5, "elapsed_time": "2:14:28", "remaining_time": "5:06:26", "throughput": 3298.8, "total_tokens": 26617264}
{"current_steps": 367, "total_steps": 1200, "loss": 0.1736, "lr": 3.9321534325692726e-05, "epoch": 6.0166147455867085, "percentage": 30.58, "elapsed_time": "2:15:02", "remaining_time": "5:06:30", "throughput": 3297.33, "total_tokens": 26717024}
{"current_steps": 368, "total_steps": 1200, "loss": 0.166, "lr": 3.92678391921108e-05, "epoch": 6.033229491173416, "percentage": 30.67, "elapsed_time": "2:15:26", "remaining_time": "5:06:13", "throughput": 3296.31, "total_tokens": 26788120}
{"current_steps": 369, "total_steps": 1200, "loss": 0.182, "lr": 3.92140462683566e-05, "epoch": 6.049844236760125, "percentage": 30.75, "elapsed_time": "2:15:44", "remaining_time": "5:05:41", "throughput": 3297.2, "total_tokens": 26853760}
{"current_steps": 370, "total_steps": 1200, "loss": 0.1621, "lr": 3.916015592312082e-05, "epoch": 6.066458982346833, "percentage": 30.83, "elapsed_time": "2:16:05", "remaining_time": "5:05:16", "throughput": 3297.39, "total_tokens": 26923848}
{"current_steps": 371, "total_steps": 1200, "loss": 0.1488, "lr": 3.9106168525761855e-05, "epoch": 6.083073727933541, "percentage": 30.92, "elapsed_time": "2:16:19", "remaining_time": "5:04:36", "throughput": 3298.08, "total_tokens": 26976184}
{"current_steps": 372, "total_steps": 1200, "loss": 0.1554, "lr": 3.905208444630327e-05, "epoch": 6.099688473520249, "percentage": 31.0, "elapsed_time": "2:16:46", "remaining_time": "5:04:26", "throughput": 3298.03, "total_tokens": 27065712}
{"current_steps": 373, "total_steps": 1200, "loss": 0.1653, "lr": 3.899790405543129e-05, "epoch": 6.116303219106958, "percentage": 31.08, "elapsed_time": "2:17:12", "remaining_time": "5:04:13", "throughput": 3297.2, "total_tokens": 27145472}
{"current_steps": 374, "total_steps": 1200, "loss": 0.1503, "lr": 3.894362772449226e-05, "epoch": 6.132917964693665, "percentage": 31.17, "elapsed_time": "2:17:43", "remaining_time": "5:04:10", "throughput": 3295.67, "total_tokens": 27233904}
{"current_steps": 375, "total_steps": 1200, "loss": 0.1358, "lr": 3.888925582549006e-05, "epoch": 6.149532710280374, "percentage": 31.25, "elapsed_time": "2:18:06", "remaining_time": "5:03:49", "throughput": 3295.99, "total_tokens": 27311512}
{"current_steps": 376, "total_steps": 1200, "loss": 0.15, "lr": 3.883478873108361e-05, "epoch": 6.166147455867082, "percentage": 31.33, "elapsed_time": "2:18:26", "remaining_time": "5:03:24", "throughput": 3297.0, "total_tokens": 27387400}
{"current_steps": 377, "total_steps": 1200, "loss": 0.1588, "lr": 3.878022681458426e-05, "epoch": 6.18276220145379, "percentage": 31.42, "elapsed_time": "2:18:47", "remaining_time": "5:02:59", "throughput": 3297.54, "total_tokens": 27461280}
{"current_steps": 378, "total_steps": 1200, "loss": 0.143, "lr": 3.87255704499533e-05, "epoch": 6.1993769470404985, "percentage": 31.5, "elapsed_time": "2:19:20", "remaining_time": "5:03:00", "throughput": 3296.01, "total_tokens": 27556400}
{"current_steps": 379, "total_steps": 1200, "loss": 0.1713, "lr": 3.8670820011799315e-05, "epoch": 6.215991692627207, "percentage": 31.58, "elapsed_time": "2:19:35", "remaining_time": "5:02:23", "throughput": 3296.85, "total_tokens": 27613664}
{"current_steps": 380, "total_steps": 1200, "loss": 0.1893, "lr": 3.861597587537568e-05, "epoch": 6.232606438213915, "percentage": 31.67, "elapsed_time": "2:19:56", "remaining_time": "5:01:59", "throughput": 3296.68, "total_tokens": 27681024}
{"current_steps": 381, "total_steps": 1200, "loss": 0.156, "lr": 3.856103841657797e-05, "epoch": 6.249221183800623, "percentage": 31.75, "elapsed_time": "2:20:18", "remaining_time": "5:01:36", "throughput": 3297.41, "total_tokens": 27759536}
{"current_steps": 382, "total_steps": 1200, "loss": 0.1247, "lr": 3.850600801194138e-05, "epoch": 6.265835929387332, "percentage": 31.83, "elapsed_time": "2:20:51", "remaining_time": "5:01:38", "throughput": 3296.07, "total_tokens": 27857288}
{"current_steps": 383, "total_steps": 1200, "loss": 0.172, "lr": 3.8450885038638127e-05, "epoch": 6.282450674974039, "percentage": 31.92, "elapsed_time": "2:21:18", "remaining_time": "5:01:25", "throughput": 3295.52, "total_tokens": 27940528}
{"current_steps": 384, "total_steps": 1200, "loss": 0.166, "lr": 3.8395669874474915e-05, "epoch": 6.299065420560748, "percentage": 32.0, "elapsed_time": "2:21:47", "remaining_time": "5:01:18", "throughput": 3295.24, "total_tokens": 28033824}
{"current_steps": 385, "total_steps": 1200, "loss": 0.1415, "lr": 3.834036289789029e-05, "epoch": 6.315680166147456, "percentage": 32.08, "elapsed_time": "2:22:06", "remaining_time": "5:00:49", "throughput": 3295.23, "total_tokens": 28096192}
{"current_steps": 386, "total_steps": 1200, "loss": 0.1369, "lr": 3.828496448795207e-05, "epoch": 6.332294911734164, "percentage": 32.17, "elapsed_time": "2:22:37", "remaining_time": "5:00:47", "throughput": 3292.99, "total_tokens": 28181256}
{"current_steps": 387, "total_steps": 1200, "loss": 0.1465, "lr": 3.822947502435477e-05, "epoch": 6.348909657320872, "percentage": 32.25, "elapsed_time": "2:22:56", "remaining_time": "5:00:16", "throughput": 3293.51, "total_tokens": 28245480}
{"current_steps": 388, "total_steps": 1200, "loss": 0.1456, "lr": 3.8173894887416945e-05, "epoch": 6.365524402907581, "percentage": 32.33, "elapsed_time": "2:23:12", "remaining_time": "4:59:43", "throughput": 3294.23, "total_tokens": 28307200}
{"current_steps": 389, "total_steps": 1200, "loss": 0.1752, "lr": 3.811822445807863e-05, "epoch": 6.382139148494288, "percentage": 32.42, "elapsed_time": "2:23:36", "remaining_time": "4:59:23", "throughput": 3294.24, "total_tokens": 28384640}
{"current_steps": 390, "total_steps": 1200, "loss": 0.1344, "lr": 3.8062464117898724e-05, "epoch": 6.398753894080997, "percentage": 32.5, "elapsed_time": "2:23:53", "remaining_time": "4:58:51", "throughput": 3295.0, "total_tokens": 28447992}
{"current_steps": 391, "total_steps": 1200, "loss": 0.1506, "lr": 3.800661424905235e-05, "epoch": 6.415368639667705, "percentage": 32.58, "elapsed_time": "2:24:11", "remaining_time": "4:58:19", "throughput": 3295.98, "total_tokens": 28513856}
{"current_steps": 392, "total_steps": 1200, "loss": 0.1648, "lr": 3.795067523432826e-05, "epoch": 6.431983385254413, "percentage": 32.67, "elapsed_time": "2:24:40", "remaining_time": "4:58:13", "throughput": 3294.24, "total_tokens": 28596584}
{"current_steps": 393, "total_steps": 1200, "loss": 0.1762, "lr": 3.789464745712619e-05, "epoch": 6.4485981308411215, "percentage": 32.75, "elapsed_time": "2:25:01", "remaining_time": "4:57:48", "throughput": 3294.08, "total_tokens": 28664560}
{"current_steps": 394, "total_steps": 1200, "loss": 0.14, "lr": 3.7838531301454254e-05, "epoch": 6.46521287642783, "percentage": 32.83, "elapsed_time": "2:25:22", "remaining_time": "4:57:23", "throughput": 3294.82, "total_tokens": 28739512}
{"current_steps": 395, "total_steps": 1200, "loss": 0.1591, "lr": 3.77823271519263e-05, "epoch": 6.481827622014538, "percentage": 32.92, "elapsed_time": "2:25:55", "remaining_time": "4:57:24", "throughput": 3292.84, "total_tokens": 28831848}
{"current_steps": 396, "total_steps": 1200, "loss": 0.163, "lr": 3.7726035393759285e-05, "epoch": 6.498442367601246, "percentage": 33.0, "elapsed_time": "2:26:13", "remaining_time": "4:56:53", "throughput": 3293.73, "total_tokens": 28898408}
{"current_steps": 397, "total_steps": 1200, "loss": 0.1558, "lr": 3.76696564127706e-05, "epoch": 6.515057113187955, "percentage": 33.08, "elapsed_time": "2:26:31", "remaining_time": "4:56:22", "throughput": 3294.09, "total_tokens": 28960224}
{"current_steps": 398, "total_steps": 1200, "loss": 0.1906, "lr": 3.761319059537548e-05, "epoch": 6.531671858774662, "percentage": 33.17, "elapsed_time": "2:26:47", "remaining_time": "4:55:47", "throughput": 3295.04, "total_tokens": 29020568}
{"current_steps": 399, "total_steps": 1200, "loss": 0.1399, "lr": 3.755663832858432e-05, "epoch": 6.548286604361371, "percentage": 33.25, "elapsed_time": "2:27:10", "remaining_time": "4:55:27", "throughput": 3294.88, "total_tokens": 29095448}
{"current_steps": 400, "total_steps": 1200, "loss": 0.1554, "lr": 3.7500000000000003e-05, "epoch": 6.564901349948079, "percentage": 33.33, "elapsed_time": "2:27:36", "remaining_time": "4:55:12", "throughput": 3295.64, "total_tokens": 29186600}
{"current_steps": 401, "total_steps": 1200, "loss": 0.2448, "lr": 3.744327599781531e-05, "epoch": 6.581516095534787, "percentage": 33.42, "elapsed_time": "2:28:00", "remaining_time": "4:54:53", "throughput": 3294.88, "total_tokens": 29258552}
{"current_steps": 402, "total_steps": 1200, "loss": 0.1853, "lr": 3.7386466710810194e-05, "epoch": 6.598130841121495, "percentage": 33.5, "elapsed_time": "2:28:29", "remaining_time": "4:54:46", "throughput": 3293.6, "total_tokens": 29344848}
{"current_steps": 403, "total_steps": 1200, "loss": 0.2248, "lr": 3.7329572528349146e-05, "epoch": 6.614745586708204, "percentage": 33.58, "elapsed_time": "2:28:48", "remaining_time": "4:54:17", "throughput": 3293.95, "total_tokens": 29410184}
{"current_steps": 404, "total_steps": 1200, "loss": 0.1647, "lr": 3.727259384037852e-05, "epoch": 6.6313603322949115, "percentage": 33.67, "elapsed_time": "2:29:13", "remaining_time": "4:54:01", "throughput": 3293.02, "total_tokens": 29484928}
{"current_steps": 405, "total_steps": 1200, "loss": 0.1622, "lr": 3.721553103742388e-05, "epoch": 6.64797507788162, "percentage": 33.75, "elapsed_time": "2:29:40", "remaining_time": "4:53:48", "throughput": 3292.24, "total_tokens": 29566432}
{"current_steps": 406, "total_steps": 1200, "loss": 0.1509, "lr": 3.715838451058726e-05, "epoch": 6.6645898234683285, "percentage": 33.83, "elapsed_time": "2:30:01", "remaining_time": "4:53:22", "throughput": 3292.3, "total_tokens": 29634032}
{"current_steps": 407, "total_steps": 1200, "loss": 0.1719, "lr": 3.7101154651544584e-05, "epoch": 6.681204569055036, "percentage": 33.92, "elapsed_time": "2:30:13", "remaining_time": "4:52:42", "throughput": 3292.94, "total_tokens": 29681424}
{"current_steps": 408, "total_steps": 1200, "loss": 0.1478, "lr": 3.704384185254288e-05, "epoch": 6.697819314641745, "percentage": 34.0, "elapsed_time": "2:30:37", "remaining_time": "4:52:24", "throughput": 3293.05, "total_tokens": 29762208}
{"current_steps": 409, "total_steps": 1200, "loss": 0.1563, "lr": 3.6986446506397666e-05, "epoch": 6.714434060228453, "percentage": 34.08, "elapsed_time": "2:30:52", "remaining_time": "4:51:47", "throughput": 3293.76, "total_tokens": 29816280}
{"current_steps": 410, "total_steps": 1200, "loss": 0.1456, "lr": 3.692896900649021e-05, "epoch": 6.731048805815161, "percentage": 34.17, "elapsed_time": "2:31:14", "remaining_time": "4:51:25", "throughput": 3294.18, "total_tokens": 29893040}
{"current_steps": 411, "total_steps": 1200, "loss": 0.1617, "lr": 3.6871409746764865e-05, "epoch": 6.747663551401869, "percentage": 34.25, "elapsed_time": "2:31:37", "remaining_time": "4:51:04", "throughput": 3294.45, "total_tokens": 29971688}
{"current_steps": 412, "total_steps": 1200, "loss": 0.1417, "lr": 3.681376912172636e-05, "epoch": 6.764278296988578, "percentage": 34.33, "elapsed_time": "2:32:00", "remaining_time": "4:50:43", "throughput": 3295.14, "total_tokens": 30051784}
{"current_steps": 413, "total_steps": 1200, "loss": 0.1527, "lr": 3.675604752643706e-05, "epoch": 6.780893042575285, "percentage": 34.42, "elapsed_time": "2:32:35", "remaining_time": "4:50:45", "throughput": 3292.8, "total_tokens": 30146048}
{"current_steps": 414, "total_steps": 1200, "loss": 0.1498, "lr": 3.6698245356514335e-05, "epoch": 6.797507788161994, "percentage": 34.5, "elapsed_time": "2:32:57", "remaining_time": "4:50:23", "throughput": 3293.15, "total_tokens": 30221296}
{"current_steps": 415, "total_steps": 1200, "loss": 0.1594, "lr": 3.6640363008127784e-05, "epoch": 6.814122533748702, "percentage": 34.58, "elapsed_time": "2:33:18", "remaining_time": "4:49:58", "throughput": 3292.81, "total_tokens": 30287664}
{"current_steps": 416, "total_steps": 1200, "loss": 0.1691, "lr": 3.6582400877996546e-05, "epoch": 6.83073727933541, "percentage": 34.67, "elapsed_time": "2:33:35", "remaining_time": "4:49:27", "throughput": 3293.74, "total_tokens": 30352816}
{"current_steps": 417, "total_steps": 1200, "loss": 0.1556, "lr": 3.652435936338656e-05, "epoch": 6.8473520249221185, "percentage": 34.75, "elapsed_time": "2:34:04", "remaining_time": "4:49:18", "throughput": 3292.66, "total_tokens": 30439688}
{"current_steps": 418, "total_steps": 1200, "loss": 0.1729, "lr": 3.646623886210788e-05, "epoch": 6.863966770508826, "percentage": 34.83, "elapsed_time": "2:34:23", "remaining_time": "4:48:50", "throughput": 3293.16, "total_tokens": 30506856}
{"current_steps": 419, "total_steps": 1200, "loss": 0.1459, "lr": 3.64080397725119e-05, "epoch": 6.880581516095535, "percentage": 34.92, "elapsed_time": "2:34:39", "remaining_time": "4:48:16", "throughput": 3294.0, "total_tokens": 30565848}
{"current_steps": 420, "total_steps": 1200, "loss": 0.1753, "lr": 3.634976249348867e-05, "epoch": 6.897196261682243, "percentage": 35.0, "elapsed_time": "2:35:00", "remaining_time": "4:47:52", "throughput": 3293.84, "total_tokens": 30633944}
{"current_steps": 421, "total_steps": 1200, "loss": 0.1538, "lr": 3.629140742446414e-05, "epoch": 6.913811007268951, "percentage": 35.08, "elapsed_time": "2:35:24", "remaining_time": "4:47:33", "throughput": 3292.9, "total_tokens": 30704760}
{"current_steps": 422, "total_steps": 1200, "loss": 0.1475, "lr": 3.623297496539741e-05, "epoch": 6.930425752855659, "percentage": 35.17, "elapsed_time": "2:35:44", "remaining_time": "4:47:06", "throughput": 3293.36, "total_tokens": 30773792}
{"current_steps": 423, "total_steps": 1200, "loss": 0.16, "lr": 3.6174465516778035e-05, "epoch": 6.947040498442368, "percentage": 35.25, "elapsed_time": "2:36:04", "remaining_time": "4:46:42", "throughput": 3294.07, "total_tokens": 30848672}
{"current_steps": 424, "total_steps": 1200, "loss": 0.1505, "lr": 3.611587947962319e-05, "epoch": 6.963655244029075, "percentage": 35.33, "elapsed_time": "2:36:20", "remaining_time": "4:46:07", "throughput": 3294.83, "total_tokens": 30906064}
{"current_steps": 425, "total_steps": 1200, "loss": 0.1796, "lr": 3.6057217255475034e-05, "epoch": 6.980269989615784, "percentage": 35.42, "elapsed_time": "2:36:36", "remaining_time": "4:45:35", "throughput": 3295.28, "total_tokens": 30964720}
{"current_steps": 426, "total_steps": 1200, "loss": 0.159, "lr": 3.599847924639788e-05, "epoch": 6.996884735202492, "percentage": 35.5, "elapsed_time": "2:37:01", "remaining_time": "4:45:17", "throughput": 3294.99, "total_tokens": 31043152}
{"current_steps": 427, "total_steps": 1200, "loss": 0.1275, "lr": 3.593966585497547e-05, "epoch": 7.0, "percentage": 35.58, "elapsed_time": "2:37:04", "remaining_time": "4:44:21", "throughput": 3295.17, "total_tokens": 31056056}
{"current_steps": 428, "total_steps": 1200, "loss": 0.1349, "lr": 3.588077748430819e-05, "epoch": 7.0166147455867085, "percentage": 35.67, "elapsed_time": "2:37:30", "remaining_time": "4:44:06", "throughput": 3294.54, "total_tokens": 31135304}
{"current_steps": 429, "total_steps": 1200, "loss": 0.148, "lr": 3.582181453801036e-05, "epoch": 7.033229491173416, "percentage": 35.75, "elapsed_time": "2:37:44", "remaining_time": "4:43:30", "throughput": 3294.87, "total_tokens": 31185600}
{"current_steps": 430, "total_steps": 1200, "loss": 0.1439, "lr": 3.576277742020738e-05, "epoch": 7.049844236760125, "percentage": 35.83, "elapsed_time": "2:38:05", "remaining_time": "4:43:05", "throughput": 3295.02, "total_tokens": 31254312}
{"current_steps": 431, "total_steps": 1200, "loss": 0.1345, "lr": 3.570366653553307e-05, "epoch": 7.066458982346833, "percentage": 35.92, "elapsed_time": "2:38:37", "remaining_time": "4:43:00", "throughput": 3292.87, "total_tokens": 31339112}
{"current_steps": 432, "total_steps": 1200, "loss": 0.1196, "lr": 3.564448228912682e-05, "epoch": 7.083073727933541, "percentage": 36.0, "elapsed_time": "2:39:06", "remaining_time": "4:42:50", "throughput": 3291.84, "total_tokens": 31424024}
{"current_steps": 433, "total_steps": 1200, "loss": 0.1533, "lr": 3.558522508663081e-05, "epoch": 7.099688473520249, "percentage": 36.08, "elapsed_time": "2:39:23", "remaining_time": "4:42:20", "throughput": 3293.13, "total_tokens": 31494656}
{"current_steps": 434, "total_steps": 1200, "loss": 0.1404, "lr": 3.552589533418728e-05, "epoch": 7.116303219106958, "percentage": 36.17, "elapsed_time": "2:39:52", "remaining_time": "4:42:09", "throughput": 3293.17, "total_tokens": 31588536}
{"current_steps": 435, "total_steps": 1200, "loss": 0.2437, "lr": 3.54664934384357e-05, "epoch": 7.132917964693665, "percentage": 36.25, "elapsed_time": "2:40:13", "remaining_time": "4:41:45", "throughput": 3293.16, "total_tokens": 31657560}
{"current_steps": 436, "total_steps": 1200, "loss": 0.15, "lr": 3.540701980651003e-05, "epoch": 7.149532710280374, "percentage": 36.33, "elapsed_time": "2:40:39", "remaining_time": "4:41:31", "throughput": 3292.97, "total_tokens": 31743992}
{"current_steps": 437, "total_steps": 1200, "loss": 0.1375, "lr": 3.534747484603587e-05, "epoch": 7.166147455867082, "percentage": 36.42, "elapsed_time": "2:40:56", "remaining_time": "4:40:59", "throughput": 3293.83, "total_tokens": 31806520}
{"current_steps": 438, "total_steps": 1200, "loss": 0.1388, "lr": 3.528785896512772e-05, "epoch": 7.18276220145379, "percentage": 36.5, "elapsed_time": "2:41:13", "remaining_time": "4:40:28", "throughput": 3293.68, "total_tokens": 31860464}
{"current_steps": 439, "total_steps": 1200, "loss": 0.2926, "lr": 3.5228172572386146e-05, "epoch": 7.1993769470404985, "percentage": 36.58, "elapsed_time": "2:41:31", "remaining_time": "4:39:59", "throughput": 3293.91, "total_tokens": 31921424}
{"current_steps": 440, "total_steps": 1200, "loss": 0.1454, "lr": 3.516841607689501e-05, "epoch": 7.215991692627207, "percentage": 36.67, "elapsed_time": "2:41:44", "remaining_time": "4:39:22", "throughput": 3295.39, "total_tokens": 31981064}
{"current_steps": 441, "total_steps": 1200, "loss": 0.1388, "lr": 3.510858988821863e-05, "epoch": 7.232606438213915, "percentage": 36.75, "elapsed_time": "2:42:03", "remaining_time": "4:38:54", "throughput": 3296.26, "total_tokens": 32050648}
{"current_steps": 442, "total_steps": 1200, "loss": 0.1248, "lr": 3.504869441639901e-05, "epoch": 7.249221183800623, "percentage": 36.83, "elapsed_time": "2:42:23", "remaining_time": "4:38:30", "throughput": 3296.25, "total_tokens": 32118584}
{"current_steps": 443, "total_steps": 1200, "loss": 0.1252, "lr": 3.4988730071953004e-05, "epoch": 7.265835929387332, "percentage": 36.92, "elapsed_time": "2:42:49", "remaining_time": "4:38:13", "throughput": 3296.74, "total_tokens": 32206384}
{"current_steps": 444, "total_steps": 1200, "loss": 0.1101, "lr": 3.4928697265869515e-05, "epoch": 7.282450674974039, "percentage": 37.0, "elapsed_time": "2:43:18", "remaining_time": "4:38:04", "throughput": 3296.28, "total_tokens": 32299040}
{"current_steps": 445, "total_steps": 1200, "loss": 0.1351, "lr": 3.486859640960668e-05, "epoch": 7.299065420560748, "percentage": 37.08, "elapsed_time": "2:43:33", "remaining_time": "4:37:29", "throughput": 3297.03, "total_tokens": 32355624}
{"current_steps": 446, "total_steps": 1200, "loss": 0.1513, "lr": 3.480842791508904e-05, "epoch": 7.315680166147456, "percentage": 37.17, "elapsed_time": "2:43:53", "remaining_time": "4:37:04", "throughput": 3297.57, "total_tokens": 32427792}
{"current_steps": 447, "total_steps": 1200, "loss": 0.1342, "lr": 3.474819219470471e-05, "epoch": 7.332294911734164, "percentage": 37.25, "elapsed_time": "2:44:18", "remaining_time": "4:36:47", "throughput": 3297.4, "total_tokens": 32508696}
{"current_steps": 448, "total_steps": 1200, "loss": 0.136, "lr": 3.4687889661302576e-05, "epoch": 7.348909657320872, "percentage": 37.33, "elapsed_time": "2:44:49", "remaining_time": "4:36:39", "throughput": 3296.66, "total_tokens": 32601312}
{"current_steps": 449, "total_steps": 1200, "loss": 0.1183, "lr": 3.4627520728189456e-05, "epoch": 7.365524402907581, "percentage": 37.42, "elapsed_time": "2:45:13", "remaining_time": "4:36:20", "throughput": 3296.69, "total_tokens": 32680256}
{"current_steps": 450, "total_steps": 1200, "loss": 0.1375, "lr": 3.456708580912725e-05, "epoch": 7.382139148494288, "percentage": 37.5, "elapsed_time": "2:45:31", "remaining_time": "4:35:52", "throughput": 3296.37, "total_tokens": 32738816}
{"current_steps": 451, "total_steps": 1200, "loss": 0.1265, "lr": 3.4506585318330125e-05, "epoch": 7.398753894080997, "percentage": 37.58, "elapsed_time": "2:46:01", "remaining_time": "4:35:42", "throughput": 3294.13, "total_tokens": 32813240}
{"current_steps": 452, "total_steps": 1200, "loss": 0.1422, "lr": 3.444601967046168e-05, "epoch": 7.415368639667705, "percentage": 37.67, "elapsed_time": "2:46:23", "remaining_time": "4:35:21", "throughput": 3294.42, "total_tokens": 32889680}
{"current_steps": 453, "total_steps": 1200, "loss": 0.1534, "lr": 3.438538928063208e-05, "epoch": 7.431983385254413, "percentage": 37.75, "elapsed_time": "2:46:44", "remaining_time": "4:34:57", "throughput": 3295.01, "total_tokens": 32964760}
{"current_steps": 454, "total_steps": 1200, "loss": 0.1517, "lr": 3.432469456439523e-05, "epoch": 7.4485981308411215, "percentage": 37.83, "elapsed_time": "2:47:08", "remaining_time": "4:34:38", "throughput": 3295.43, "total_tokens": 33048992}
{"current_steps": 455, "total_steps": 1200, "loss": 0.1345, "lr": 3.426393593774591e-05, "epoch": 7.46521287642783, "percentage": 37.92, "elapsed_time": "2:47:37", "remaining_time": "4:34:26", "throughput": 3294.24, "total_tokens": 33130200}
{"current_steps": 456, "total_steps": 1200, "loss": 0.133, "lr": 3.4203113817116957e-05, "epoch": 7.481827622014538, "percentage": 38.0, "elapsed_time": "2:48:06", "remaining_time": "4:34:16", "throughput": 3293.89, "total_tokens": 33223024}
{"current_steps": 457, "total_steps": 1200, "loss": 0.1394, "lr": 3.414222861937636e-05, "epoch": 7.498442367601246, "percentage": 38.08, "elapsed_time": "2:48:32", "remaining_time": "4:34:01", "throughput": 3293.27, "total_tokens": 33303120}
{"current_steps": 458, "total_steps": 1200, "loss": 0.1474, "lr": 3.408128076182446e-05, "epoch": 7.515057113187955, "percentage": 38.17, "elapsed_time": "2:48:49", "remaining_time": "4:33:31", "throughput": 3293.73, "total_tokens": 33364984}
{"current_steps": 459, "total_steps": 1200, "loss": 0.1585, "lr": 3.402027066219105e-05, "epoch": 7.531671858774662, "percentage": 38.25, "elapsed_time": "2:49:06", "remaining_time": "4:33:00", "throughput": 3294.5, "total_tokens": 33427352}
{"current_steps": 460, "total_steps": 1200, "loss": 0.1315, "lr": 3.39591987386325e-05, "epoch": 7.548286604361371, "percentage": 38.33, "elapsed_time": "2:49:22", "remaining_time": "4:32:27", "throughput": 3294.7, "total_tokens": 33481272}
{"current_steps": 461, "total_steps": 1200, "loss": 0.1252, "lr": 3.389806540972898e-05, "epoch": 7.564901349948079, "percentage": 38.42, "elapsed_time": "2:49:36", "remaining_time": "4:31:53", "throughput": 3295.75, "total_tokens": 33538904}
{"current_steps": 462, "total_steps": 1200, "loss": 0.1375, "lr": 3.383687109448143e-05, "epoch": 7.581516095534787, "percentage": 38.5, "elapsed_time": "2:50:05", "remaining_time": "4:31:42", "throughput": 3295.76, "total_tokens": 33635976}
{"current_steps": 463, "total_steps": 1200, "loss": 0.137, "lr": 3.377561621230887e-05, "epoch": 7.598130841121495, "percentage": 38.58, "elapsed_time": "2:50:27", "remaining_time": "4:31:20", "throughput": 3296.11, "total_tokens": 33711184}
{"current_steps": 464, "total_steps": 1200, "loss": 0.1133, "lr": 3.3714301183045385e-05, "epoch": 7.614745586708204, "percentage": 38.67, "elapsed_time": "2:50:51", "remaining_time": "4:31:01", "throughput": 3295.01, "total_tokens": 33778848}
{"current_steps": 465, "total_steps": 1200, "loss": 0.129, "lr": 3.365292642693732e-05, "epoch": 7.6313603322949115, "percentage": 38.75, "elapsed_time": "2:51:17", "remaining_time": "4:30:44", "throughput": 3295.31, "total_tokens": 33866024}
{"current_steps": 466, "total_steps": 1200, "loss": 0.1453, "lr": 3.359149236464041e-05, "epoch": 7.64797507788162, "percentage": 38.83, "elapsed_time": "2:51:53", "remaining_time": "4:30:44", "throughput": 3294.51, "total_tokens": 33978144}
{"current_steps": 467, "total_steps": 1200, "loss": 0.1642, "lr": 3.35299994172168e-05, "epoch": 7.6645898234683285, "percentage": 38.92, "elapsed_time": "2:52:12", "remaining_time": "4:30:17", "throughput": 3295.29, "total_tokens": 34047480}
{"current_steps": 468, "total_steps": 1200, "loss": 0.1296, "lr": 3.346844800613229e-05, "epoch": 7.681204569055036, "percentage": 39.0, "elapsed_time": "2:52:40", "remaining_time": "4:30:04", "throughput": 3294.73, "total_tokens": 34134480}
{"current_steps": 469, "total_steps": 1200, "loss": 0.1299, "lr": 3.340683855325335e-05, "epoch": 7.697819314641745, "percentage": 39.08, "elapsed_time": "2:52:54", "remaining_time": "4:29:30", "throughput": 3295.53, "total_tokens": 34190176}
{"current_steps": 470, "total_steps": 1200, "loss": 0.155, "lr": 3.3345171480844275e-05, "epoch": 7.714434060228453, "percentage": 39.17, "elapsed_time": "2:53:21", "remaining_time": "4:29:14", "throughput": 3294.58, "total_tokens": 34267336}
{"current_steps": 471, "total_steps": 1200, "loss": 0.1439, "lr": 3.3283447211564276e-05, "epoch": 7.731048805815161, "percentage": 39.25, "elapsed_time": "2:53:36", "remaining_time": "4:28:42", "throughput": 3295.95, "total_tokens": 34333616}
{"current_steps": 472, "total_steps": 1200, "loss": 0.1451, "lr": 3.322166616846458e-05, "epoch": 7.747663551401869, "percentage": 39.33, "elapsed_time": "2:53:55", "remaining_time": "4:28:14", "throughput": 3296.92, "total_tokens": 34404000}
{"current_steps": 473, "total_steps": 1200, "loss": 0.1403, "lr": 3.315982877498555e-05, "epoch": 7.764278296988578, "percentage": 39.42, "elapsed_time": "2:54:10", "remaining_time": "4:27:42", "throughput": 3298.0, "total_tokens": 34466048}
{"current_steps": 474, "total_steps": 1200, "loss": 0.1355, "lr": 3.309793545495374e-05, "epoch": 7.780893042575285, "percentage": 39.5, "elapsed_time": "2:54:34", "remaining_time": "4:27:22", "throughput": 3298.32, "total_tokens": 34547312}
{"current_steps": 475, "total_steps": 1200, "loss": 0.1295, "lr": 3.303598663257904e-05, "epoch": 7.797507788161994, "percentage": 39.58, "elapsed_time": "2:54:48", "remaining_time": "4:26:48", "throughput": 3298.89, "total_tokens": 34600544}
{"current_steps": 476, "total_steps": 1200, "loss": 0.1405, "lr": 3.2973982732451755e-05, "epoch": 7.814122533748702, "percentage": 39.67, "elapsed_time": "2:55:06", "remaining_time": "4:26:20", "throughput": 3299.05, "total_tokens": 34660792}
{"current_steps": 477, "total_steps": 1200, "loss": 0.1858, "lr": 3.2911924179539656e-05, "epoch": 7.83073727933541, "percentage": 39.75, "elapsed_time": "2:55:46", "remaining_time": "4:26:25", "throughput": 3297.69, "total_tokens": 34778440}
{"current_steps": 478, "total_steps": 1200, "loss": 0.1454, "lr": 3.284981139918513e-05, "epoch": 7.8473520249221185, "percentage": 39.83, "elapsed_time": "2:56:05", "remaining_time": "4:25:58", "throughput": 3298.53, "total_tokens": 34849760}
{"current_steps": 479, "total_steps": 1200, "loss": 0.1177, "lr": 3.278764481710221e-05, "epoch": 7.863966770508826, "percentage": 39.92, "elapsed_time": "2:56:35", "remaining_time": "4:25:48", "throughput": 3297.74, "total_tokens": 34940776}
{"current_steps": 480, "total_steps": 1200, "loss": 0.1369, "lr": 3.272542485937369e-05, "epoch": 7.880581516095535, "percentage": 40.0, "elapsed_time": "2:56:59", "remaining_time": "4:25:28", "throughput": 3297.59, "total_tokens": 35018104}
{"current_steps": 481, "total_steps": 1200, "loss": 0.1267, "lr": 3.26631519524482e-05, "epoch": 7.897196261682243, "percentage": 40.08, "elapsed_time": "2:57:18", "remaining_time": "4:25:03", "throughput": 3297.31, "total_tokens": 35079744}
{"current_steps": 482, "total_steps": 1200, "loss": 0.1236, "lr": 3.260082652313726e-05, "epoch": 7.913811007268951, "percentage": 40.17, "elapsed_time": "2:57:33", "remaining_time": "4:24:30", "throughput": 3297.68, "total_tokens": 35132808}
{"current_steps": 483, "total_steps": 1200, "loss": 0.131, "lr": 3.253844899861239e-05, "epoch": 7.930425752855659, "percentage": 40.25, "elapsed_time": "2:57:51", "remaining_time": "4:24:01", "throughput": 3298.41, "total_tokens": 35197816}
{"current_steps": 484, "total_steps": 1200, "loss": 0.1129, "lr": 3.247601980640217e-05, "epoch": 7.947040498442368, "percentage": 40.33, "elapsed_time": "2:58:16", "remaining_time": "4:23:44", "throughput": 3297.71, "total_tokens": 35275528}
{"current_steps": 485, "total_steps": 1200, "loss": 0.1448, "lr": 3.241353937438927e-05, "epoch": 7.963655244029075, "percentage": 40.42, "elapsed_time": "2:58:31", "remaining_time": "4:23:11", "throughput": 3298.53, "total_tokens": 35333280}
{"current_steps": 486, "total_steps": 1200, "loss": 0.1488, "lr": 3.23510081308076e-05, "epoch": 7.980269989615784, "percentage": 40.5, "elapsed_time": "2:58:56", "remaining_time": "4:22:54", "throughput": 3298.24, "total_tokens": 35412944}
{"current_steps": 487, "total_steps": 1200, "loss": 0.148, "lr": 3.228842650423929e-05, "epoch": 7.996884735202492, "percentage": 40.58, "elapsed_time": "2:59:17", "remaining_time": "4:22:29", "throughput": 3298.66, "total_tokens": 35485056}
{"current_steps": 488, "total_steps": 1200, "loss": 0.1484, "lr": 3.222579492361179e-05, "epoch": 8.0, "percentage": 40.67, "elapsed_time": "2:59:20", "remaining_time": "4:21:39", "throughput": 3298.72, "total_tokens": 35494824}
{"current_steps": 489, "total_steps": 1200, "loss": 0.124, "lr": 3.2163113818194964e-05, "epoch": 8.016614745586708, "percentage": 40.75, "elapsed_time": "2:59:36", "remaining_time": "4:21:08", "throughput": 3299.67, "total_tokens": 35557768}
{"current_steps": 490, "total_steps": 1200, "loss": 0.124, "lr": 3.210038361759807e-05, "epoch": 8.033229491173417, "percentage": 40.83, "elapsed_time": "2:59:49", "remaining_time": "4:20:33", "throughput": 3300.69, "total_tokens": 35613120}
{"current_steps": 491, "total_steps": 1200, "loss": 0.1214, "lr": 3.2037604751766885e-05, "epoch": 8.049844236760125, "percentage": 40.92, "elapsed_time": "3:00:08", "remaining_time": "4:20:07", "throughput": 3300.57, "total_tokens": 35674176}
{"current_steps": 492, "total_steps": 1200, "loss": 0.1295, "lr": 3.1974777650980735e-05, "epoch": 8.066458982346832, "percentage": 41.0, "elapsed_time": "3:00:48", "remaining_time": "4:20:10", "throughput": 3298.91, "total_tokens": 35786664}
{"current_steps": 493, "total_steps": 1200, "loss": 0.1376, "lr": 3.191190274584952e-05, "epoch": 8.083073727933542, "percentage": 41.08, "elapsed_time": "3:01:02", "remaining_time": "4:19:38", "throughput": 3299.36, "total_tokens": 35840720}
{"current_steps": 494, "total_steps": 1200, "loss": 0.1087, "lr": 3.184898046731082e-05, "epoch": 8.09968847352025, "percentage": 41.17, "elapsed_time": "3:01:35", "remaining_time": "4:19:31", "throughput": 3298.28, "total_tokens": 35936736}
{"current_steps": 495, "total_steps": 1200, "loss": 0.1227, "lr": 3.178601124662686e-05, "epoch": 8.116303219106957, "percentage": 41.25, "elapsed_time": "3:01:57", "remaining_time": "4:19:09", "throughput": 3298.58, "total_tokens": 36013800}
{"current_steps": 496, "total_steps": 1200, "loss": 0.1221, "lr": 3.172299551538164e-05, "epoch": 8.132917964693666, "percentage": 41.33, "elapsed_time": "3:02:25", "remaining_time": "4:18:55", "throughput": 3297.96, "total_tokens": 36097904}
{"current_steps": 497, "total_steps": 1200, "loss": 0.0985, "lr": 3.165993370547794e-05, "epoch": 8.149532710280374, "percentage": 41.42, "elapsed_time": "3:02:54", "remaining_time": "4:18:43", "throughput": 3298.01, "total_tokens": 36195544}
{"current_steps": 498, "total_steps": 1200, "loss": 0.1529, "lr": 3.1596826249134324e-05, "epoch": 8.166147455867081, "percentage": 41.5, "elapsed_time": "3:03:14", "remaining_time": "4:18:18", "throughput": 3298.12, "total_tokens": 36261256}
{"current_steps": 499, "total_steps": 1200, "loss": 0.1489, "lr": 3.153367357888224e-05, "epoch": 8.18276220145379, "percentage": 41.58, "elapsed_time": "3:03:31", "remaining_time": "4:17:48", "throughput": 3298.98, "total_tokens": 36325024}
{"current_steps": 500, "total_steps": 1200, "loss": 0.1288, "lr": 3.147047612756302e-05, "epoch": 8.199376947040498, "percentage": 41.67, "elapsed_time": "3:03:44", "remaining_time": "4:17:14", "throughput": 3299.54, "total_tokens": 36377368}
{"current_steps": 501, "total_steps": 1200, "loss": 0.1153, "lr": 3.140723432832492e-05, "epoch": 8.215991692627206, "percentage": 41.75, "elapsed_time": "3:04:09", "remaining_time": "4:16:56", "throughput": 3299.68, "total_tokens": 36459240}
{"current_steps": 502, "total_steps": 1200, "loss": 0.1305, "lr": 3.1343948614620145e-05, "epoch": 8.232606438213915, "percentage": 41.83, "elapsed_time": "3:04:38", "remaining_time": "4:16:43", "throughput": 3299.51, "total_tokens": 36553088}
{"current_steps": 503, "total_steps": 1200, "loss": 0.2824, "lr": 3.128061942020189e-05, "epoch": 8.249221183800623, "percentage": 41.92, "elapsed_time": "3:04:53", "remaining_time": "4:16:12", "throughput": 3300.27, "total_tokens": 36611464}
{"current_steps": 504, "total_steps": 1200, "loss": 0.1033, "lr": 3.121724717912138e-05, "epoch": 8.26583592938733, "percentage": 42.0, "elapsed_time": "3:05:28", "remaining_time": "4:16:07", "throughput": 3298.48, "total_tokens": 36705696}
{"current_steps": 505, "total_steps": 1200, "loss": 0.1124, "lr": 3.115383232572483e-05, "epoch": 8.28245067497404, "percentage": 42.08, "elapsed_time": "3:05:43", "remaining_time": "4:15:36", "throughput": 3298.9, "total_tokens": 36762744}
{"current_steps": 506, "total_steps": 1200, "loss": 0.1297, "lr": 3.109037529465056e-05, "epoch": 8.299065420560748, "percentage": 42.17, "elapsed_time": "3:06:03", "remaining_time": "4:15:11", "throughput": 3298.86, "total_tokens": 36827816}
{"current_steps": 507, "total_steps": 1200, "loss": 0.1158, "lr": 3.102687652082597e-05, "epoch": 8.315680166147455, "percentage": 42.25, "elapsed_time": "3:06:36", "remaining_time": "4:15:04", "throughput": 3298.47, "total_tokens": 36931424}
{"current_steps": 508, "total_steps": 1200, "loss": 0.1146, "lr": 3.0963336439464526e-05, "epoch": 8.332294911734165, "percentage": 42.33, "elapsed_time": "3:06:54", "remaining_time": "4:14:36", "throughput": 3298.51, "total_tokens": 36991464}
{"current_steps": 509, "total_steps": 1200, "loss": 0.1044, "lr": 3.089975548606283e-05, "epoch": 8.348909657320872, "percentage": 42.42, "elapsed_time": "3:07:27", "remaining_time": "4:14:29", "throughput": 3297.79, "total_tokens": 37092928}
{"current_steps": 510, "total_steps": 1200, "loss": 0.1192, "lr": 3.083613409639764e-05, "epoch": 8.36552440290758, "percentage": 42.5, "elapsed_time": "3:07:46", "remaining_time": "4:14:03", "throughput": 3298.95, "total_tokens": 37168792}
{"current_steps": 511, "total_steps": 1200, "loss": 0.1197, "lr": 3.0772472706522806e-05, "epoch": 8.38213914849429, "percentage": 42.58, "elapsed_time": "3:08:20", "remaining_time": "4:13:56", "throughput": 3297.1, "total_tokens": 37258864}
{"current_steps": 512, "total_steps": 1200, "loss": 0.1351, "lr": 3.0708771752766394e-05, "epoch": 8.398753894080997, "percentage": 42.67, "elapsed_time": "3:08:42", "remaining_time": "4:13:34", "throughput": 3298.09, "total_tokens": 37343224}
{"current_steps": 513, "total_steps": 1200, "loss": 0.1336, "lr": 3.06450316717276e-05, "epoch": 8.415368639667705, "percentage": 42.75, "elapsed_time": "3:08:56", "remaining_time": "4:13:02", "throughput": 3298.6, "total_tokens": 37395488}
{"current_steps": 514, "total_steps": 1200, "loss": 0.1057, "lr": 3.0581252900273786e-05, "epoch": 8.431983385254414, "percentage": 42.83, "elapsed_time": "3:09:21", "remaining_time": "4:12:43", "throughput": 3298.28, "total_tokens": 37473248}
{"current_steps": 515, "total_steps": 1200, "loss": 0.1101, "lr": 3.0517435875537536e-05, "epoch": 8.448598130841122, "percentage": 42.92, "elapsed_time": "3:09:35", "remaining_time": "4:12:09", "throughput": 3299.51, "total_tokens": 37532096}
{"current_steps": 516, "total_steps": 1200, "loss": 0.1079, "lr": 3.045358103491357e-05, "epoch": 8.46521287642783, "percentage": 43.0, "elapsed_time": "3:10:04", "remaining_time": "4:11:57", "throughput": 3298.96, "total_tokens": 37622328}
{"current_steps": 517, "total_steps": 1200, "loss": 0.1245, "lr": 3.038968881605583e-05, "epoch": 8.481827622014539, "percentage": 43.08, "elapsed_time": "3:10:22", "remaining_time": "4:11:29", "throughput": 3299.42, "total_tokens": 37686304}
{"current_steps": 518, "total_steps": 1200, "loss": 0.1275, "lr": 3.0325759656874418e-05, "epoch": 8.498442367601246, "percentage": 43.17, "elapsed_time": "3:10:49", "remaining_time": "4:11:14", "throughput": 3298.97, "total_tokens": 37770856}
{"current_steps": 519, "total_steps": 1200, "loss": 0.1123, "lr": 3.026179399553264e-05, "epoch": 8.515057113187954, "percentage": 43.25, "elapsed_time": "3:11:09", "remaining_time": "4:10:49", "throughput": 3298.77, "total_tokens": 37834072}
{"current_steps": 520, "total_steps": 1200, "loss": 0.112, "lr": 3.0197792270443982e-05, "epoch": 8.531671858774663, "percentage": 43.33, "elapsed_time": "3:11:24", "remaining_time": "4:10:17", "throughput": 3299.33, "total_tokens": 37889928}
{"current_steps": 521, "total_steps": 1200, "loss": 0.2376, "lr": 3.0133754920269103e-05, "epoch": 8.54828660436137, "percentage": 43.42, "elapsed_time": "3:11:50", "remaining_time": "4:10:01", "throughput": 3298.7, "total_tokens": 37971296}
{"current_steps": 522, "total_steps": 1200, "loss": 0.123, "lr": 3.0069682383912813e-05, "epoch": 8.564901349948078, "percentage": 43.5, "elapsed_time": "3:12:14", "remaining_time": "4:09:42", "throughput": 3298.61, "total_tokens": 38049288}
{"current_steps": 523, "total_steps": 1200, "loss": 0.1386, "lr": 3.0005575100521118e-05, "epoch": 8.581516095534788, "percentage": 43.58, "elapsed_time": "3:12:40", "remaining_time": "4:09:24", "throughput": 3297.75, "total_tokens": 38123392}
{"current_steps": 524, "total_steps": 1200, "loss": 0.1194, "lr": 2.9941433509478156e-05, "epoch": 8.598130841121495, "percentage": 43.67, "elapsed_time": "3:13:06", "remaining_time": "4:09:07", "throughput": 3297.63, "total_tokens": 38208264}
{"current_steps": 525, "total_steps": 1200, "loss": 0.126, "lr": 2.9877258050403212e-05, "epoch": 8.614745586708203, "percentage": 43.75, "elapsed_time": "3:13:20", "remaining_time": "4:08:35", "throughput": 3297.84, "total_tokens": 38258192}
{"current_steps": 526, "total_steps": 1200, "loss": 0.1295, "lr": 2.9813049163147688e-05, "epoch": 8.631360332294912, "percentage": 43.83, "elapsed_time": "3:13:41", "remaining_time": "4:08:11", "throughput": 3298.31, "total_tokens": 38332408}
{"current_steps": 527, "total_steps": 1200, "loss": 0.1035, "lr": 2.974880728779212e-05, "epoch": 8.64797507788162, "percentage": 43.92, "elapsed_time": "3:14:08", "remaining_time": "4:07:54", "throughput": 3297.12, "total_tokens": 38404960}
{"current_steps": 528, "total_steps": 1200, "loss": 0.1347, "lr": 2.9684532864643122e-05, "epoch": 8.664589823468328, "percentage": 44.0, "elapsed_time": "3:14:26", "remaining_time": "4:07:28", "throughput": 3298.39, "total_tokens": 38481704}
{"current_steps": 529, "total_steps": 1200, "loss": 0.1076, "lr": 2.9620226334230388e-05, "epoch": 8.681204569055037, "percentage": 44.08, "elapsed_time": "3:14:44", "remaining_time": "4:07:01", "throughput": 3298.85, "total_tokens": 38546304}
{"current_steps": 530, "total_steps": 1200, "loss": 0.1514, "lr": 2.9555888137303695e-05, "epoch": 8.697819314641745, "percentage": 44.17, "elapsed_time": "3:15:08", "remaining_time": "4:06:41", "throughput": 3298.58, "total_tokens": 38621024}
{"current_steps": 531, "total_steps": 1200, "loss": 0.1119, "lr": 2.949151871482982e-05, "epoch": 8.714434060228452, "percentage": 44.25, "elapsed_time": "3:15:23", "remaining_time": "4:06:09", "throughput": 3299.38, "total_tokens": 38679368}
{"current_steps": 532, "total_steps": 1200, "loss": 0.1331, "lr": 2.9427118507989586e-05, "epoch": 8.731048805815162, "percentage": 44.33, "elapsed_time": "3:15:46", "remaining_time": "4:05:48", "throughput": 3299.31, "total_tokens": 38753984}
{"current_steps": 533, "total_steps": 1200, "loss": 0.1158, "lr": 2.93626879581748e-05, "epoch": 8.74766355140187, "percentage": 44.42, "elapsed_time": "3:16:00", "remaining_time": "4:05:17", "throughput": 3299.92, "total_tokens": 38808336}
{"current_steps": 534, "total_steps": 1200, "loss": 0.2268, "lr": 2.929822750698524e-05, "epoch": 8.764278296988577, "percentage": 44.5, "elapsed_time": "3:16:23", "remaining_time": "4:04:56", "throughput": 3299.25, "total_tokens": 38876624}
{"current_steps": 535, "total_steps": 1200, "loss": 0.1155, "lr": 2.9233737596225613e-05, "epoch": 8.780893042575286, "percentage": 44.58, "elapsed_time": "3:16:38", "remaining_time": "4:04:25", "throughput": 3299.89, "total_tokens": 38933576}
{"current_steps": 536, "total_steps": 1200, "loss": 0.114, "lr": 2.916921866790256e-05, "epoch": 8.797507788161994, "percentage": 44.67, "elapsed_time": "3:17:14", "remaining_time": "4:04:20", "throughput": 3299.85, "total_tokens": 39050816}
{"current_steps": 537, "total_steps": 1200, "loss": 0.119, "lr": 2.9104671164221576e-05, "epoch": 8.814122533748701, "percentage": 44.75, "elapsed_time": "3:17:27", "remaining_time": "4:03:47", "throughput": 3300.33, "total_tokens": 39101856}
{"current_steps": 538, "total_steps": 1200, "loss": 0.115, "lr": 2.9040095527584032e-05, "epoch": 8.83073727933541, "percentage": 44.83, "elapsed_time": "3:17:42", "remaining_time": "4:03:16", "throughput": 3301.39, "total_tokens": 39161928}
{"current_steps": 539, "total_steps": 1200, "loss": 0.1312, "lr": 2.897549220058411e-05, "epoch": 8.847352024922118, "percentage": 44.92, "elapsed_time": "3:17:56", "remaining_time": "4:02:45", "throughput": 3301.9, "total_tokens": 39216048}
{"current_steps": 540, "total_steps": 1200, "loss": 0.1107, "lr": 2.8910861626005776e-05, "epoch": 8.863966770508826, "percentage": 45.0, "elapsed_time": "3:18:29", "remaining_time": "4:02:36", "throughput": 3301.26, "total_tokens": 39317320}
{"current_steps": 541, "total_steps": 1200, "loss": 0.119, "lr": 2.884620424681976e-05, "epoch": 8.880581516095535, "percentage": 45.08, "elapsed_time": "3:18:50", "remaining_time": "4:02:12", "throughput": 3301.06, "total_tokens": 39383120}
{"current_steps": 542, "total_steps": 1200, "loss": 0.1212, "lr": 2.8781520506180486e-05, "epoch": 8.897196261682243, "percentage": 45.17, "elapsed_time": "3:19:14", "remaining_time": "4:01:53", "throughput": 3300.59, "total_tokens": 39458584}
{"current_steps": 543, "total_steps": 1200, "loss": 0.0999, "lr": 2.871681084742308e-05, "epoch": 8.91381100726895, "percentage": 45.25, "elapsed_time": "3:19:37", "remaining_time": "4:01:32", "throughput": 3300.79, "total_tokens": 39535152}
{"current_steps": 544, "total_steps": 1200, "loss": 0.1297, "lr": 2.8652075714060295e-05, "epoch": 8.93042575285566, "percentage": 45.33, "elapsed_time": "3:19:51", "remaining_time": "4:01:00", "throughput": 3301.56, "total_tokens": 39590360}
{"current_steps": 545, "total_steps": 1200, "loss": 0.1159, "lr": 2.858731554977948e-05, "epoch": 8.947040498442368, "percentage": 45.42, "elapsed_time": "3:20:16", "remaining_time": "4:00:41", "throughput": 3301.36, "total_tokens": 39669984}
{"current_steps": 546, "total_steps": 1200, "loss": 0.1139, "lr": 2.8522530798439567e-05, "epoch": 8.963655244029075, "percentage": 45.5, "elapsed_time": "3:20:42", "remaining_time": "4:00:24", "throughput": 3301.48, "total_tokens": 39757392}
{"current_steps": 547, "total_steps": 1200, "loss": 0.1266, "lr": 2.845772190406798e-05, "epoch": 8.980269989615785, "percentage": 45.58, "elapsed_time": "3:21:14", "remaining_time": "4:00:14", "throughput": 3300.19, "total_tokens": 39848064}
{"current_steps": 548, "total_steps": 1200, "loss": 0.1199, "lr": 2.8392889310857612e-05, "epoch": 8.996884735202492, "percentage": 45.67, "elapsed_time": "3:21:36", "remaining_time": "3:59:52", "throughput": 3300.19, "total_tokens": 39922288}
{"current_steps": 549, "total_steps": 1200, "loss": 0.1246, "lr": 2.832803346316381e-05, "epoch": 9.0, "percentage": 45.75, "elapsed_time": "3:21:39", "remaining_time": "3:59:07", "throughput": 3300.27, "total_tokens": 39932640}
{"current_steps": 550, "total_steps": 1200, "loss": 0.0949, "lr": 2.8263154805501297e-05, "epoch": 9.016614745586708, "percentage": 45.83, "elapsed_time": "3:21:59", "remaining_time": "3:58:43", "throughput": 3300.89, "total_tokens": 40005688}
{"current_steps": 551, "total_steps": 1200, "loss": 0.1086, "lr": 2.819825378254111e-05, "epoch": 9.033229491173417, "percentage": 45.92, "elapsed_time": "3:22:13", "remaining_time": "3:58:11", "throughput": 3301.32, "total_tokens": 40057120}
{"current_steps": 552, "total_steps": 1200, "loss": 0.1098, "lr": 2.8133330839107608e-05, "epoch": 9.049844236760125, "percentage": 46.0, "elapsed_time": "3:22:39", "remaining_time": "3:57:54", "throughput": 3300.71, "total_tokens": 40135992}
{"current_steps": 553, "total_steps": 1200, "loss": 0.1297, "lr": 2.8068386420175375e-05, "epoch": 9.066458982346832, "percentage": 46.08, "elapsed_time": "3:22:53", "remaining_time": "3:57:22", "throughput": 3302.03, "total_tokens": 40196928}
{"current_steps": 554, "total_steps": 1200, "loss": 0.1113, "lr": 2.8003420970866177e-05, "epoch": 9.083073727933542, "percentage": 46.17, "elapsed_time": "3:23:15", "remaining_time": "3:57:00", "throughput": 3302.04, "total_tokens": 40269392}
{"current_steps": 555, "total_steps": 1200, "loss": 0.1145, "lr": 2.7938434936445945e-05, "epoch": 9.09968847352025, "percentage": 46.25, "elapsed_time": "3:23:37", "remaining_time": "3:56:38", "throughput": 3302.65, "total_tokens": 40350080}
{"current_steps": 556, "total_steps": 1200, "loss": 0.123, "lr": 2.787342876232167e-05, "epoch": 9.116303219106957, "percentage": 46.33, "elapsed_time": "3:23:55", "remaining_time": "3:56:12", "throughput": 3303.11, "total_tokens": 40416360}
{"current_steps": 557, "total_steps": 1200, "loss": 0.0915, "lr": 2.780840289403839e-05, "epoch": 9.132917964693666, "percentage": 46.42, "elapsed_time": "3:24:15", "remaining_time": "3:55:47", "throughput": 3303.99, "total_tokens": 40490432}
{"current_steps": 558, "total_steps": 1200, "loss": 0.1149, "lr": 2.774335777727613e-05, "epoch": 9.149532710280374, "percentage": 46.5, "elapsed_time": "3:24:35", "remaining_time": "3:55:23", "throughput": 3304.32, "total_tokens": 40561784}
{"current_steps": 559, "total_steps": 1200, "loss": 0.109, "lr": 2.7678293857846844e-05, "epoch": 9.166147455867081, "percentage": 46.58, "elapsed_time": "3:24:57", "remaining_time": "3:55:01", "throughput": 3304.87, "total_tokens": 40641728}
{"current_steps": 560, "total_steps": 1200, "loss": 0.1246, "lr": 2.761321158169134e-05, "epoch": 9.18276220145379, "percentage": 46.67, "elapsed_time": "3:25:12", "remaining_time": "3:54:30", "throughput": 3305.75, "total_tokens": 40700744}
{"current_steps": 561, "total_steps": 1200, "loss": 0.1251, "lr": 2.754811139487625e-05, "epoch": 9.199376947040498, "percentage": 46.75, "elapsed_time": "3:25:24", "remaining_time": "3:53:57", "throughput": 3306.32, "total_tokens": 40748048}
{"current_steps": 562, "total_steps": 1200, "loss": 0.0937, "lr": 2.7482993743590978e-05, "epoch": 9.215991692627206, "percentage": 46.83, "elapsed_time": "3:25:41", "remaining_time": "3:53:30", "throughput": 3306.79, "total_tokens": 40810104}
{"current_steps": 563, "total_steps": 1200, "loss": 0.1241, "lr": 2.7417859074144604e-05, "epoch": 9.232606438213915, "percentage": 46.92, "elapsed_time": "3:26:12", "remaining_time": "3:53:19", "throughput": 3305.59, "total_tokens": 40899480}
{"current_steps": 564, "total_steps": 1200, "loss": 0.103, "lr": 2.7352707832962865e-05, "epoch": 9.249221183800623, "percentage": 47.0, "elapsed_time": "3:26:41", "remaining_time": "3:53:04", "throughput": 3305.49, "total_tokens": 40993536}
{"current_steps": 565, "total_steps": 1200, "loss": 0.0912, "lr": 2.7287540466585065e-05, "epoch": 9.26583592938733, "percentage": 47.08, "elapsed_time": "3:26:59", "remaining_time": "3:52:38", "throughput": 3306.16, "total_tokens": 41060848}
{"current_steps": 566, "total_steps": 1200, "loss": 0.119, "lr": 2.7222357421661042e-05, "epoch": 9.28245067497404, "percentage": 47.17, "elapsed_time": "3:27:22", "remaining_time": "3:52:16", "throughput": 3306.39, "total_tokens": 41138352}
{"current_steps": 567, "total_steps": 1200, "loss": 0.1273, "lr": 2.7157159144948092e-05, "epoch": 9.299065420560748, "percentage": 47.25, "elapsed_time": "3:27:44", "remaining_time": "3:51:55", "throughput": 3306.41, "total_tokens": 41212624}
{"current_steps": 568, "total_steps": 1200, "loss": 0.1002, "lr": 2.7091946083307896e-05, "epoch": 9.315680166147455, "percentage": 47.33, "elapsed_time": "3:28:03", "remaining_time": "3:51:30", "throughput": 3306.67, "total_tokens": 41279472}
{"current_steps": 569, "total_steps": 1200, "loss": 0.118, "lr": 2.7026718683703473e-05, "epoch": 9.332294911734165, "percentage": 47.42, "elapsed_time": "3:28:30", "remaining_time": "3:51:13", "throughput": 3305.46, "total_tokens": 41353544}
{"current_steps": 570, "total_steps": 1200, "loss": 0.0949, "lr": 2.6961477393196126e-05, "epoch": 9.348909657320872, "percentage": 47.5, "elapsed_time": "3:28:56", "remaining_time": "3:50:56", "throughput": 3305.81, "total_tokens": 41444896}
{"current_steps": 571, "total_steps": 1200, "loss": 0.1177, "lr": 2.6896222658942348e-05, "epoch": 9.36552440290758, "percentage": 47.58, "elapsed_time": "3:29:13", "remaining_time": "3:50:28", "throughput": 3306.23, "total_tokens": 41505152}
{"current_steps": 572, "total_steps": 1200, "loss": 0.1371, "lr": 2.6830954928190794e-05, "epoch": 9.38213914849429, "percentage": 47.67, "elapsed_time": "3:29:31", "remaining_time": "3:50:02", "throughput": 3306.31, "total_tokens": 41566696}
{"current_steps": 573, "total_steps": 1200, "loss": 0.1241, "lr": 2.6765674648279172e-05, "epoch": 9.398753894080997, "percentage": 47.75, "elapsed_time": "3:29:55", "remaining_time": "3:49:41", "throughput": 3306.21, "total_tokens": 41641736}
{"current_steps": 574, "total_steps": 1200, "loss": 0.0956, "lr": 2.6700382266631206e-05, "epoch": 9.415368639667705, "percentage": 47.83, "elapsed_time": "3:30:32", "remaining_time": "3:49:36", "throughput": 3304.2, "total_tokens": 41740008}
{"current_steps": 575, "total_steps": 1200, "loss": 0.1068, "lr": 2.663507823075358e-05, "epoch": 9.431983385254414, "percentage": 47.92, "elapsed_time": "3:31:01", "remaining_time": "3:49:22", "throughput": 3304.63, "total_tokens": 41842808}
{"current_steps": 576, "total_steps": 1200, "loss": 0.1019, "lr": 2.656976298823284e-05, "epoch": 9.448598130841122, "percentage": 48.0, "elapsed_time": "3:31:19", "remaining_time": "3:48:56", "throughput": 3305.9, "total_tokens": 41917128}
{"current_steps": 577, "total_steps": 1200, "loss": 0.1116, "lr": 2.6504436986732338e-05, "epoch": 9.46521287642783, "percentage": 48.08, "elapsed_time": "3:31:41", "remaining_time": "3:48:33", "throughput": 3305.56, "total_tokens": 41984232}
{"current_steps": 578, "total_steps": 1200, "loss": 0.11, "lr": 2.6439100673989187e-05, "epoch": 9.481827622014539, "percentage": 48.17, "elapsed_time": "3:31:57", "remaining_time": "3:48:05", "throughput": 3306.23, "total_tokens": 42047216}
{"current_steps": 579, "total_steps": 1200, "loss": 0.1044, "lr": 2.637375449781115e-05, "epoch": 9.498442367601246, "percentage": 48.25, "elapsed_time": "3:32:18", "remaining_time": "3:47:42", "throughput": 3306.65, "total_tokens": 42122072}
{"current_steps": 580, "total_steps": 1200, "loss": 0.0985, "lr": 2.63083989060736e-05, "epoch": 9.515057113187954, "percentage": 48.33, "elapsed_time": "3:32:38", "remaining_time": "3:47:18", "throughput": 3307.36, "total_tokens": 42198480}
{"current_steps": 581, "total_steps": 1200, "loss": 0.1046, "lr": 2.624303434671645e-05, "epoch": 9.531671858774663, "percentage": 48.42, "elapsed_time": "3:33:09", "remaining_time": "3:47:06", "throughput": 3306.55, "total_tokens": 42289336}
{"current_steps": 582, "total_steps": 1200, "loss": 0.114, "lr": 2.6177661267741065e-05, "epoch": 9.54828660436137, "percentage": 48.5, "elapsed_time": "3:33:28", "remaining_time": "3:46:40", "throughput": 3306.56, "total_tokens": 42352288}
{"current_steps": 583, "total_steps": 1200, "loss": 0.1117, "lr": 2.611228011720722e-05, "epoch": 9.564901349948078, "percentage": 48.58, "elapsed_time": "3:33:59", "remaining_time": "3:46:28", "throughput": 3306.57, "total_tokens": 42453832}
{"current_steps": 584, "total_steps": 1200, "loss": 0.1148, "lr": 2.604689134322999e-05, "epoch": 9.581516095534788, "percentage": 48.67, "elapsed_time": "3:34:15", "remaining_time": "3:46:00", "throughput": 3307.1, "total_tokens": 42514704}
{"current_steps": 585, "total_steps": 1200, "loss": 0.2187, "lr": 2.598149539397672e-05, "epoch": 9.598130841121495, "percentage": 48.75, "elapsed_time": "3:34:33", "remaining_time": "3:45:33", "throughput": 3307.22, "total_tokens": 42576344}
{"current_steps": 586, "total_steps": 1200, "loss": 0.1046, "lr": 2.591609271766391e-05, "epoch": 9.614745586708203, "percentage": 48.83, "elapsed_time": "3:35:04", "remaining_time": "3:45:21", "throughput": 3305.98, "total_tokens": 42662824}
{"current_steps": 587, "total_steps": 1200, "loss": 0.0848, "lr": 2.5850683762554184e-05, "epoch": 9.631360332294912, "percentage": 48.92, "elapsed_time": "3:35:38", "remaining_time": "3:45:11", "throughput": 3304.23, "total_tokens": 42752496}
{"current_steps": 588, "total_steps": 1200, "loss": 0.0886, "lr": 2.578526897695321e-05, "epoch": 9.64797507788162, "percentage": 49.0, "elapsed_time": "3:36:01", "remaining_time": "3:44:50", "throughput": 3304.19, "total_tokens": 42826064}
{"current_steps": 589, "total_steps": 1200, "loss": 0.1989, "lr": 2.5719848809206586e-05, "epoch": 9.664589823468328, "percentage": 49.08, "elapsed_time": "3:36:19", "remaining_time": "3:44:24", "throughput": 3304.82, "total_tokens": 42895808}
{"current_steps": 590, "total_steps": 1200, "loss": 0.1156, "lr": 2.5654423707696833e-05, "epoch": 9.681204569055037, "percentage": 49.17, "elapsed_time": "3:36:34", "remaining_time": "3:43:54", "throughput": 3305.54, "total_tokens": 42952408}
{"current_steps": 591, "total_steps": 1200, "loss": 0.1053, "lr": 2.558899412084026e-05, "epoch": 9.697819314641745, "percentage": 49.25, "elapsed_time": "3:36:57", "remaining_time": "3:43:33", "throughput": 3305.28, "total_tokens": 43025536}
{"current_steps": 592, "total_steps": 1200, "loss": 0.084, "lr": 2.5523560497083926e-05, "epoch": 9.714434060228452, "percentage": 49.33, "elapsed_time": "3:37:26", "remaining_time": "3:43:19", "throughput": 3304.95, "total_tokens": 43118024}
{"current_steps": 593, "total_steps": 1200, "loss": 0.1033, "lr": 2.5458123284902573e-05, "epoch": 9.731048805815162, "percentage": 49.42, "elapsed_time": "3:37:53", "remaining_time": "3:43:02", "throughput": 3304.3, "total_tokens": 43198360}
{"current_steps": 594, "total_steps": 1200, "loss": 0.0881, "lr": 2.539268293279552e-05, "epoch": 9.74766355140187, "percentage": 49.5, "elapsed_time": "3:38:17", "remaining_time": "3:42:41", "throughput": 3303.32, "total_tokens": 43264072}
{"current_steps": 595, "total_steps": 1200, "loss": 0.1286, "lr": 2.5327239889283612e-05, "epoch": 9.764278296988577, "percentage": 49.58, "elapsed_time": "3:38:39", "remaining_time": "3:42:20", "throughput": 3303.37, "total_tokens": 43339600}
{"current_steps": 596, "total_steps": 1200, "loss": 0.1113, "lr": 2.5261794602906145e-05, "epoch": 9.780893042575286, "percentage": 49.67, "elapsed_time": "3:38:58", "remaining_time": "3:41:54", "throughput": 3303.46, "total_tokens": 43401136}
{"current_steps": 597, "total_steps": 1200, "loss": 0.1126, "lr": 2.5196347522217784e-05, "epoch": 9.797507788161994, "percentage": 49.75, "elapsed_time": "3:39:23", "remaining_time": "3:41:36", "throughput": 3302.83, "total_tokens": 43477528}
{"current_steps": 598, "total_steps": 1200, "loss": 0.11, "lr": 2.513089909578549e-05, "epoch": 9.814122533748701, "percentage": 49.83, "elapsed_time": "3:39:48", "remaining_time": "3:41:16", "throughput": 3302.62, "total_tokens": 43557352}
{"current_steps": 599, "total_steps": 1200, "loss": 0.1259, "lr": 2.5065449772185456e-05, "epoch": 9.83073727933541, "percentage": 49.92, "elapsed_time": "3:40:23", "remaining_time": "3:41:07", "throughput": 3300.46, "total_tokens": 43643104}
{"current_steps": 600, "total_steps": 1200, "loss": 0.1101, "lr": 2.5e-05, "epoch": 9.847352024922118, "percentage": 50.0, "elapsed_time": "3:40:45", "remaining_time": "3:40:45", "throughput": 3300.8, "total_tokens": 43721408}
{"current_steps": 601, "total_steps": 1200, "loss": 0.0984, "lr": 2.4934550227814553e-05, "epoch": 9.863966770508826, "percentage": 50.08, "elapsed_time": "3:41:14", "remaining_time": "3:40:30", "throughput": 3299.67, "total_tokens": 43802136}
{"current_steps": 602, "total_steps": 1200, "loss": 0.1168, "lr": 2.486910090421451e-05, "epoch": 9.880581516095535, "percentage": 50.17, "elapsed_time": "3:41:31", "remaining_time": "3:40:03", "throughput": 3300.04, "total_tokens": 43862904}
{"current_steps": 603, "total_steps": 1200, "loss": 0.1232, "lr": 2.480365247778223e-05, "epoch": 9.897196261682243, "percentage": 50.25, "elapsed_time": "3:41:56", "remaining_time": "3:39:44", "throughput": 3299.7, "total_tokens": 43942056}
{"current_steps": 604, "total_steps": 1200, "loss": 0.182, "lr": 2.4738205397093864e-05, "epoch": 9.91381100726895, "percentage": 50.33, "elapsed_time": "3:42:16", "remaining_time": "3:39:20", "throughput": 3300.33, "total_tokens": 44016096}
{"current_steps": 605, "total_steps": 1200, "loss": 0.1335, "lr": 2.4672760110716394e-05, "epoch": 9.93042575285566, "percentage": 50.42, "elapsed_time": "3:42:29", "remaining_time": "3:38:48", "throughput": 3300.98, "total_tokens": 44065504}
{"current_steps": 606, "total_steps": 1200, "loss": 0.1064, "lr": 2.460731706720449e-05, "epoch": 9.947040498442368, "percentage": 50.5, "elapsed_time": "3:42:41", "remaining_time": "3:38:16", "throughput": 3301.75, "total_tokens": 44114776}
{"current_steps": 607, "total_steps": 1200, "loss": 0.1359, "lr": 2.4541876715097432e-05, "epoch": 9.963655244029075, "percentage": 50.58, "elapsed_time": "3:42:57", "remaining_time": "3:37:48", "throughput": 3302.29, "total_tokens": 44175184}
{"current_steps": 608, "total_steps": 1200, "loss": 0.0915, "lr": 2.447643950291608e-05, "epoch": 9.980269989615785, "percentage": 50.67, "elapsed_time": "3:43:30", "remaining_time": "3:37:37", "throughput": 3300.68, "total_tokens": 44265024}
{"current_steps": 609, "total_steps": 1200, "loss": 0.1024, "lr": 2.4411005879159753e-05, "epoch": 9.996884735202492, "percentage": 50.75, "elapsed_time": "3:44:00", "remaining_time": "3:37:23", "throughput": 3300.05, "total_tokens": 44355400}
{"current_steps": 610, "total_steps": 1200, "loss": 0.087, "lr": 2.4345576292303176e-05, "epoch": 10.0, "percentage": 50.83, "elapsed_time": "3:44:04", "remaining_time": "3:36:43", "throughput": 3300.32, "total_tokens": 44370360}
{"current_steps": 611, "total_steps": 1200, "loss": 0.0902, "lr": 2.4280151190793417e-05, "epoch": 10.016614745586708, "percentage": 50.92, "elapsed_time": "3:44:30", "remaining_time": "3:36:25", "throughput": 3299.69, "total_tokens": 44446816}
{"current_steps": 612, "total_steps": 1200, "loss": 0.1038, "lr": 2.4214731023046793e-05, "epoch": 10.033229491173417, "percentage": 51.0, "elapsed_time": "3:44:44", "remaining_time": "3:35:55", "throughput": 3300.39, "total_tokens": 44503632}
{"current_steps": 613, "total_steps": 1200, "loss": 0.1036, "lr": 2.4149316237445812e-05, "epoch": 10.049844236760125, "percentage": 51.08, "elapsed_time": "3:45:11", "remaining_time": "3:35:38", "throughput": 3300.2, "total_tokens": 44590320}
{"current_steps": 614, "total_steps": 1200, "loss": 0.1014, "lr": 2.408390728233609e-05, "epoch": 10.066458982346832, "percentage": 51.17, "elapsed_time": "3:45:27", "remaining_time": "3:35:10", "throughput": 3301.03, "total_tokens": 44655224}
{"current_steps": 615, "total_steps": 1200, "loss": 0.1802, "lr": 2.4018504606023293e-05, "epoch": 10.083073727933542, "percentage": 51.25, "elapsed_time": "3:45:53", "remaining_time": "3:34:52", "throughput": 3300.63, "total_tokens": 44736200}
{"current_steps": 616, "total_steps": 1200, "loss": 0.0907, "lr": 2.3953108656770016e-05, "epoch": 10.09968847352025, "percentage": 51.33, "elapsed_time": "3:46:15", "remaining_time": "3:34:30", "throughput": 3300.44, "total_tokens": 44804416}
{"current_steps": 617, "total_steps": 1200, "loss": 0.0943, "lr": 2.3887719882792785e-05, "epoch": 10.116303219106957, "percentage": 51.42, "elapsed_time": "3:46:34", "remaining_time": "3:34:05", "throughput": 3300.81, "total_tokens": 44873864}
{"current_steps": 618, "total_steps": 1200, "loss": 0.1052, "lr": 2.3822338732258937e-05, "epoch": 10.132917964693666, "percentage": 51.5, "elapsed_time": "3:46:51", "remaining_time": "3:33:39", "throughput": 3301.26, "total_tokens": 44936736}
{"current_steps": 619, "total_steps": 1200, "loss": 0.0812, "lr": 2.3756965653283557e-05, "epoch": 10.149532710280374, "percentage": 51.58, "elapsed_time": "3:47:18", "remaining_time": "3:33:21", "throughput": 3301.38, "total_tokens": 45026952}
{"current_steps": 620, "total_steps": 1200, "loss": 0.1003, "lr": 2.3691601093926404e-05, "epoch": 10.166147455867081, "percentage": 51.67, "elapsed_time": "3:47:45", "remaining_time": "3:33:04", "throughput": 3300.57, "total_tokens": 45104816}
{"current_steps": 621, "total_steps": 1200, "loss": 0.0952, "lr": 2.3626245502188864e-05, "epoch": 10.18276220145379, "percentage": 51.75, "elapsed_time": "3:48:10", "remaining_time": "3:32:44", "throughput": 3300.02, "total_tokens": 45177392}
{"current_steps": 622, "total_steps": 1200, "loss": 0.1141, "lr": 2.3560899326010822e-05, "epoch": 10.199376947040498, "percentage": 51.83, "elapsed_time": "3:48:29", "remaining_time": "3:32:19", "throughput": 3299.64, "total_tokens": 45237200}
{"current_steps": 623, "total_steps": 1200, "loss": 0.1083, "lr": 2.3495563013267664e-05, "epoch": 10.215991692627206, "percentage": 51.92, "elapsed_time": "3:48:45", "remaining_time": "3:31:51", "throughput": 3300.01, "total_tokens": 45293376}
{"current_steps": 624, "total_steps": 1200, "loss": 0.1032, "lr": 2.3430237011767167e-05, "epoch": 10.232606438213915, "percentage": 52.0, "elapsed_time": "3:49:08", "remaining_time": "3:31:30", "throughput": 3299.98, "total_tokens": 45369376}
{"current_steps": 625, "total_steps": 1200, "loss": 0.0921, "lr": 2.3364921769246423e-05, "epoch": 10.249221183800623, "percentage": 52.08, "elapsed_time": "3:49:33", "remaining_time": "3:31:11", "throughput": 3299.08, "total_tokens": 45439920}