|
{"current_steps": 2, "total_steps": 1320, "loss": 2.4685, "learning_rate": 4e-07, "epoch": 0.0030303030303030303, "percentage": 0.15, "elapsed_time": "0:00:08", "remaining_time": "1:35:23", "throughput": "1185.43", "total_tokens": 10296} |
|
{"current_steps": 4, "total_steps": 1320, "loss": 2.4399, "learning_rate": 8e-07, "epoch": 0.006060606060606061, "percentage": 0.3, "elapsed_time": "0:00:15", "remaining_time": "1:25:43", "throughput": "1303.29", "total_tokens": 20376} |
|
{"current_steps": 6, "total_steps": 1320, "loss": 2.3101, "learning_rate": 1.2e-06, "epoch": 0.00909090909090909, "percentage": 0.45, "elapsed_time": "0:00:24", "remaining_time": "1:27:39", "throughput": "1360.20", "total_tokens": 32664} |
|
{"current_steps": 8, "total_steps": 1320, "loss": 2.2743, "learning_rate": 1.6e-06, "epoch": 0.012121212121212121, "percentage": 0.61, "elapsed_time": "0:00:30", "remaining_time": "1:23:14", "throughput": "1376.04", "total_tokens": 41904} |
|
{"current_steps": 10, "total_steps": 1320, "loss": 2.355, "learning_rate": 2e-06, "epoch": 0.015151515151515152, "percentage": 0.76, "elapsed_time": "0:00:37", "remaining_time": "1:22:52", "throughput": "1390.44", "total_tokens": 52776} |
|
{"current_steps": 12, "total_steps": 1320, "loss": 2.633, "learning_rate": 1.999990798125535e-06, "epoch": 0.01818181818181818, "percentage": 0.91, "elapsed_time": "0:00:44", "remaining_time": "1:19:58", "throughput": "1396.29", "total_tokens": 61464} |
|
{"current_steps": 14, "total_steps": 1320, "loss": 2.3386, "learning_rate": 1.9999631927138275e-06, "epoch": 0.021212121212121213, "percentage": 1.06, "elapsed_time": "0:00:51", "remaining_time": "1:20:22", "throughput": "1404.84", "total_tokens": 72624} |
|
{"current_steps": 16, "total_steps": 1320, "loss": 2.3536, "learning_rate": 1.9999171843999306e-06, "epoch": 0.024242424242424242, "percentage": 1.21, "elapsed_time": "0:00:58", "remaining_time": "1:18:58", "throughput": "1407.68", "total_tokens": 81840} |
|
{"current_steps": 18, "total_steps": 1320, "loss": 2.2979, "learning_rate": 1.9998527742422515e-06, "epoch": 0.02727272727272727, "percentage": 1.36, "elapsed_time": "0:01:05", "remaining_time": "1:18:38", "throughput": "1409.73", "total_tokens": 91968} |
|
{"current_steps": 20, "total_steps": 1320, "loss": 2.3755, "learning_rate": 1.9997699637225253e-06, "epoch": 0.030303030303030304, "percentage": 1.52, "elapsed_time": "0:01:12", "remaining_time": "1:18:53", "throughput": "1414.29", "total_tokens": 102984} |
|
{"current_steps": 20, "total_steps": 1320, "eval_loss": 2.3641138076782227, "epoch": 0.030303030303030304, "percentage": 1.52, "elapsed_time": "0:01:18", "remaining_time": "1:25:11", "throughput": "1309.62", "total_tokens": 102984} |
|
{"current_steps": 22, "total_steps": 1320, "loss": 2.286, "learning_rate": 1.9996687547457825e-06, "epoch": 0.03333333333333333, "percentage": 1.67, "elapsed_time": "0:01:25", "remaining_time": "1:24:26", "throughput": "1320.13", "total_tokens": 113352} |
|
{"current_steps": 24, "total_steps": 1320, "loss": 2.3933, "learning_rate": 1.999549149640303e-06, "epoch": 0.03636363636363636, "percentage": 1.82, "elapsed_time": "0:01:34", "remaining_time": "1:24:39", "throughput": "1330.73", "total_tokens": 125184} |
|
{"current_steps": 26, "total_steps": 1320, "loss": 2.1378, "learning_rate": 1.9994111511575657e-06, "epoch": 0.03939393939393939, "percentage": 1.97, "elapsed_time": "0:01:41", "remaining_time": "1:23:58", "throughput": "1338.34", "total_tokens": 135480} |
|
{"current_steps": 28, "total_steps": 1320, "loss": 2.2551, "learning_rate": 1.999254762472182e-06, "epoch": 0.04242424242424243, "percentage": 2.12, "elapsed_time": "0:01:49", "remaining_time": "1:24:11", "throughput": "1346.35", "total_tokens": 147384} |
|
{"current_steps": 30, "total_steps": 1320, "loss": 2.506, "learning_rate": 1.999079987181824e-06, "epoch": 0.045454545454545456, "percentage": 2.27, "elapsed_time": "0:01:56", "remaining_time": "1:23:13", "throughput": "1351.29", "total_tokens": 156912} |
|
{"current_steps": 32, "total_steps": 1320, "loss": 2.4742, "learning_rate": 1.9988868293071435e-06, "epoch": 0.048484848484848485, "percentage": 2.42, "elapsed_time": "0:02:03", "remaining_time": "1:22:49", "throughput": "1357.12", "total_tokens": 167568} |
|
{"current_steps": 34, "total_steps": 1320, "loss": 2.5007, "learning_rate": 1.998675293291676e-06, "epoch": 0.051515151515151514, "percentage": 2.58, "elapsed_time": "0:02:09", "remaining_time": "1:21:48", "throughput": "1360.82", "total_tokens": 176616} |
|
{"current_steps": 36, "total_steps": 1320, "loss": 2.4632, "learning_rate": 1.998445384001741e-06, "epoch": 0.05454545454545454, "percentage": 2.73, "elapsed_time": "0:02:17", "remaining_time": "1:21:35", "throughput": "1364.52", "total_tokens": 187272} |
|
{"current_steps": 38, "total_steps": 1320, "loss": 2.4556, "learning_rate": 1.99819710672633e-06, "epoch": 0.05757575757575758, "percentage": 2.88, "elapsed_time": "0:02:24", "remaining_time": "1:21:00", "throughput": "1367.19", "total_tokens": 196992} |
|
{"current_steps": 40, "total_steps": 1320, "loss": 2.5355, "learning_rate": 1.9979304671769838e-06, "epoch": 0.06060606060606061, "percentage": 3.03, "elapsed_time": "0:02:32", "remaining_time": "1:21:13", "throughput": "1371.22", "total_tokens": 208824} |
|
{"current_steps": 40, "total_steps": 1320, "eval_loss": 2.361894130706787, "epoch": 0.06060606060606061, "percentage": 3.03, "elapsed_time": "0:02:38", "remaining_time": "1:24:19", "throughput": "1320.83", "total_tokens": 208824} |
|
{"current_steps": 42, "total_steps": 1320, "loss": 2.497, "learning_rate": 1.997645471487661e-06, "epoch": 0.06363636363636363, "percentage": 3.18, "elapsed_time": "0:02:44", "remaining_time": "1:23:11", "throughput": "1324.45", "total_tokens": 217272} |
|
{"current_steps": 44, "total_steps": 1320, "loss": 2.4371, "learning_rate": 1.9973421262145992e-06, "epoch": 0.06666666666666667, "percentage": 3.33, "elapsed_time": "0:02:52", "remaining_time": "1:23:24", "throughput": "1330.36", "total_tokens": 229560} |
|
{"current_steps": 46, "total_steps": 1320, "loss": 2.4757, "learning_rate": 1.99702043833616e-06, "epoch": 0.0696969696969697, "percentage": 3.48, "elapsed_time": "0:02:59", "remaining_time": "1:22:52", "throughput": "1334.23", "total_tokens": 239568} |
|
{"current_steps": 48, "total_steps": 1320, "loss": 2.4514, "learning_rate": 1.9966804152526726e-06, "epoch": 0.07272727272727272, "percentage": 3.64, "elapsed_time": "0:03:07", "remaining_time": "1:22:59", "throughput": "1339.28", "total_tokens": 251664} |
|
{"current_steps": 50, "total_steps": 1320, "loss": 2.3474, "learning_rate": 1.996322064786261e-06, "epoch": 0.07575757575757576, "percentage": 3.79, "elapsed_time": "0:03:15", "remaining_time": "1:22:51", "throughput": "1343.86", "total_tokens": 263040} |
|
{"current_steps": 52, "total_steps": 1320, "loss": 2.4297, "learning_rate": 1.9959453951806656e-06, "epoch": 0.07878787878787878, "percentage": 3.94, "elapsed_time": "0:03:21", "remaining_time": "1:21:52", "throughput": "1345.69", "total_tokens": 271080} |
|
{"current_steps": 54, "total_steps": 1320, "loss": 2.6676, "learning_rate": 1.995550415101052e-06, "epoch": 0.08181818181818182, "percentage": 4.09, "elapsed_time": "0:03:28", "remaining_time": "1:21:39", "throughput": "1349.37", "total_tokens": 282000} |
|
{"current_steps": 56, "total_steps": 1320, "loss": 2.1799, "learning_rate": 1.9951371336338145e-06, "epoch": 0.08484848484848485, "percentage": 4.24, "elapsed_time": "0:03:34", "remaining_time": "1:20:52", "throughput": "1351.62", "total_tokens": 290568} |
|
{"current_steps": 58, "total_steps": 1320, "loss": 2.5315, "learning_rate": 1.994705560286361e-06, "epoch": 0.08787878787878788, "percentage": 4.39, "elapsed_time": "0:03:40", "remaining_time": "1:20:03", "throughput": "1353.89", "total_tokens": 298920} |
|
{"current_steps": 60, "total_steps": 1320, "loss": 2.4679, "learning_rate": 1.994255704986903e-06, "epoch": 0.09090909090909091, "percentage": 4.55, "elapsed_time": "0:03:48", "remaining_time": "1:19:53", "throughput": "1356.89", "total_tokens": 309744} |
|
{"current_steps": 60, "total_steps": 1320, "eval_loss": 2.360027551651001, "epoch": 0.09090909090909091, "percentage": 4.55, "elapsed_time": "0:03:54", "remaining_time": "1:21:55", "throughput": "1323.16", "total_tokens": 309744} |
|
{"current_steps": 62, "total_steps": 1320, "loss": 2.4576, "learning_rate": 1.993787578084219e-06, "epoch": 0.09393939393939393, "percentage": 4.7, "elapsed_time": "0:04:02", "remaining_time": "1:21:54", "throughput": "1326.91", "total_tokens": 321360} |
|
{"current_steps": 64, "total_steps": 1320, "loss": 2.275, "learning_rate": 1.9933011903474228e-06, "epoch": 0.09696969696969697, "percentage": 4.85, "elapsed_time": "0:04:10", "remaining_time": "1:21:47", "throughput": "1330.64", "total_tokens": 332736} |
|
{"current_steps": 66, "total_steps": 1320, "loss": 2.2761, "learning_rate": 1.992796552965711e-06, "epoch": 0.1, "percentage": 5.0, "elapsed_time": "0:04:18", "remaining_time": "1:21:47", "throughput": "1334.05", "total_tokens": 344568} |
|
{"current_steps": 68, "total_steps": 1320, "loss": 2.3385, "learning_rate": 1.9922736775481083e-06, "epoch": 0.10303030303030303, "percentage": 5.15, "elapsed_time": "0:04:26", "remaining_time": "1:21:49", "throughput": "1337.34", "total_tokens": 356616} |
|
{"current_steps": 70, "total_steps": 1320, "loss": 2.3342, "learning_rate": 1.991732576123199e-06, "epoch": 0.10606060606060606, "percentage": 5.3, "elapsed_time": "0:04:34", "remaining_time": "1:21:38", "throughput": "1340.34", "total_tokens": 367680} |
|
{"current_steps": 72, "total_steps": 1320, "loss": 2.3604, "learning_rate": 1.9911732611388524e-06, "epoch": 0.10909090909090909, "percentage": 5.45, "elapsed_time": "0:04:41", "remaining_time": "1:21:12", "throughput": "1342.46", "total_tokens": 377376} |
|
{"current_steps": 74, "total_steps": 1320, "loss": 2.2869, "learning_rate": 1.9905957454619343e-06, "epoch": 0.11212121212121212, "percentage": 5.61, "elapsed_time": "0:04:48", "remaining_time": "1:20:51", "throughput": "1344.63", "total_tokens": 387432} |
|
{"current_steps": 76, "total_steps": 1320, "loss": 2.601, "learning_rate": 1.9900000423780104e-06, "epoch": 0.11515151515151516, "percentage": 5.76, "elapsed_time": "0:04:53", "remaining_time": "1:20:11", "throughput": "1346.48", "total_tokens": 395808} |
|
{"current_steps": 78, "total_steps": 1320, "loss": 2.3741, "learning_rate": 1.9893861655910444e-06, "epoch": 0.11818181818181818, "percentage": 5.91, "elapsed_time": "0:05:02", "remaining_time": "1:20:09", "throughput": "1349.26", "total_tokens": 407568} |
|
{"current_steps": 80, "total_steps": 1320, "loss": 2.3929, "learning_rate": 1.988754129223079e-06, "epoch": 0.12121212121212122, "percentage": 6.06, "elapsed_time": "0:05:09", "remaining_time": "1:19:50", "throughput": "1351.36", "total_tokens": 417648} |
|
{"current_steps": 80, "total_steps": 1320, "eval_loss": 2.3575997352600098, "epoch": 0.12121212121212122, "percentage": 6.06, "elapsed_time": "0:05:14", "remaining_time": "1:21:20", "throughput": "1326.38", "total_tokens": 417648} |
|
{"current_steps": 82, "total_steps": 1320, "loss": 2.3717, "learning_rate": 1.9881039478139115e-06, "epoch": 0.12424242424242424, "percentage": 6.21, "elapsed_time": "0:05:22", "remaining_time": "1:21:07", "throughput": "1329.30", "total_tokens": 428568} |
|
{"current_steps": 84, "total_steps": 1320, "loss": 2.2728, "learning_rate": 1.9874356363207624e-06, "epoch": 0.12727272727272726, "percentage": 6.36, "elapsed_time": "0:05:28", "remaining_time": "1:20:38", "throughput": "1331.16", "total_tokens": 437688} |
|
{"current_steps": 86, "total_steps": 1320, "loss": 2.5347, "learning_rate": 1.986749210117927e-06, "epoch": 0.1303030303030303, "percentage": 6.52, "elapsed_time": "0:05:35", "remaining_time": "1:20:15", "throughput": "1333.15", "total_tokens": 447408} |
|
{"current_steps": 88, "total_steps": 1320, "loss": 2.4081, "learning_rate": 1.986044684996425e-06, "epoch": 0.13333333333333333, "percentage": 6.67, "elapsed_time": "0:05:41", "remaining_time": "1:19:45", "throughput": "1334.40", "total_tokens": 456120} |
|
{"current_steps": 90, "total_steps": 1320, "loss": 2.5697, "learning_rate": 1.985322077163636e-06, "epoch": 0.13636363636363635, "percentage": 6.82, "elapsed_time": "0:05:49", "remaining_time": "1:19:36", "throughput": "1336.88", "total_tokens": 467208} |
|
{"current_steps": 92, "total_steps": 1320, "loss": 2.3267, "learning_rate": 1.9845814032429257e-06, "epoch": 0.1393939393939394, "percentage": 6.97, "elapsed_time": "0:05:56", "remaining_time": "1:19:17", "throughput": "1338.71", "total_tokens": 477168} |
|
{"current_steps": 94, "total_steps": 1320, "loss": 2.5342, "learning_rate": 1.9838226802732656e-06, "epoch": 0.14242424242424243, "percentage": 7.12, "elapsed_time": "0:06:03", "remaining_time": "1:18:57", "throughput": "1340.43", "total_tokens": 486888} |
|
{"current_steps": 96, "total_steps": 1320, "loss": 2.5662, "learning_rate": 1.9830459257088395e-06, "epoch": 0.14545454545454545, "percentage": 7.27, "elapsed_time": "0:06:09", "remaining_time": "1:18:36", "throughput": "1342.29", "total_tokens": 496584} |
|
{"current_steps": 98, "total_steps": 1320, "loss": 2.3294, "learning_rate": 1.982251157418642e-06, "epoch": 0.1484848484848485, "percentage": 7.42, "elapsed_time": "0:06:14", "remaining_time": "1:17:55", "throughput": "1343.39", "total_tokens": 503736} |
|
{"current_steps": 100, "total_steps": 1320, "loss": 2.6105, "learning_rate": 1.981438393686069e-06, "epoch": 0.15151515151515152, "percentage": 7.58, "elapsed_time": "0:06:21", "remaining_time": "1:17:38", "throughput": "1345.05", "total_tokens": 513600} |
|
{"current_steps": 100, "total_steps": 1320, "eval_loss": 2.3544414043426514, "epoch": 0.15151515151515152, "percentage": 7.58, "elapsed_time": "0:06:27", "remaining_time": "1:18:49", "throughput": "1324.85", "total_tokens": 513600} |
|
{"current_steps": 102, "total_steps": 1320, "loss": 2.6435, "learning_rate": 1.980607653208495e-06, "epoch": 0.15454545454545454, "percentage": 7.73, "elapsed_time": "0:06:34", "remaining_time": "1:18:25", "throughput": "1319.48", "total_tokens": 519960} |
|
{"current_steps": 104, "total_steps": 1320, "loss": 2.4326, "learning_rate": 1.9797589550968434e-06, "epoch": 0.15757575757575756, "percentage": 7.88, "elapsed_time": "0:06:40", "remaining_time": "1:18:04", "throughput": "1321.20", "total_tokens": 529392} |
|
{"current_steps": 106, "total_steps": 1320, "loss": 2.5169, "learning_rate": 1.9788923188751478e-06, "epoch": 0.1606060606060606, "percentage": 8.03, "elapsed_time": "0:06:45", "remaining_time": "1:17:29", "throughput": "1322.68", "total_tokens": 537000} |
|
{"current_steps": 108, "total_steps": 1320, "loss": 2.3097, "learning_rate": 1.978007764480103e-06, "epoch": 0.16363636363636364, "percentage": 8.18, "elapsed_time": "0:06:52", "remaining_time": "1:17:12", "throughput": "1324.65", "total_tokens": 546864} |
|
{"current_steps": 110, "total_steps": 1320, "loss": 2.2565, "learning_rate": 1.977105312260605e-06, "epoch": 0.16666666666666666, "percentage": 8.33, "elapsed_time": "0:07:00", "remaining_time": "1:17:09", "throughput": "1326.82", "total_tokens": 558432} |
|
{"current_steps": 112, "total_steps": 1320, "loss": 2.3503, "learning_rate": 1.976184982977284e-06, "epoch": 0.1696969696969697, "percentage": 8.48, "elapsed_time": "0:07:08", "remaining_time": "1:16:57", "throughput": "1329.06", "total_tokens": 569016} |
|
{"current_steps": 114, "total_steps": 1320, "loss": 2.2801, "learning_rate": 1.975246797802026e-06, "epoch": 0.17272727272727273, "percentage": 8.64, "elapsed_time": "0:07:15", "remaining_time": "1:16:52", "throughput": "1331.24", "total_tokens": 580392} |
|
{"current_steps": 116, "total_steps": 1320, "loss": 2.4019, "learning_rate": 1.974290778317487e-06, "epoch": 0.17575757575757575, "percentage": 8.79, "elapsed_time": "0:07:23", "remaining_time": "1:16:38", "throughput": "1332.90", "total_tokens": 590568} |
|
{"current_steps": 118, "total_steps": 1320, "loss": 2.3779, "learning_rate": 1.973316946516595e-06, "epoch": 0.1787878787878788, "percentage": 8.94, "elapsed_time": "0:07:30", "remaining_time": "1:16:31", "throughput": "1334.94", "total_tokens": 601704} |
|
{"current_steps": 120, "total_steps": 1320, "loss": 2.2488, "learning_rate": 1.9723253248020455e-06, "epoch": 0.18181818181818182, "percentage": 9.09, "elapsed_time": "0:07:39", "remaining_time": "1:16:30", "throughput": "1336.76", "total_tokens": 613584} |
|
{"current_steps": 120, "total_steps": 1320, "eval_loss": 2.3512158393859863, "epoch": 0.18181818181818182, "percentage": 9.09, "elapsed_time": "0:07:44", "remaining_time": "1:17:28", "throughput": "1320.01", "total_tokens": 613584} |
|
{"current_steps": 122, "total_steps": 1320, "loss": 2.4906, "learning_rate": 1.9713159359857833e-06, "epoch": 0.18484848484848485, "percentage": 9.24, "elapsed_time": "0:07:52", "remaining_time": "1:17:20", "throughput": "1322.02", "total_tokens": 624792} |
|
{"current_steps": 124, "total_steps": 1320, "loss": 2.5957, "learning_rate": 1.9702888032884826e-06, "epoch": 0.18787878787878787, "percentage": 9.39, "elapsed_time": "0:08:00", "remaining_time": "1:17:12", "throughput": "1323.97", "total_tokens": 635832} |
|
{"current_steps": 126, "total_steps": 1320, "loss": 2.1759, "learning_rate": 1.969243950339009e-06, "epoch": 0.19090909090909092, "percentage": 9.55, "elapsed_time": "0:08:08", "remaining_time": "1:17:08", "throughput": "1325.92", "total_tokens": 647664} |
|
{"current_steps": 128, "total_steps": 1320, "loss": 2.5093, "learning_rate": 1.9681814011738758e-06, "epoch": 0.19393939393939394, "percentage": 9.7, "elapsed_time": "0:08:14", "remaining_time": "1:16:49", "throughput": "1327.37", "total_tokens": 656952} |
|
{"current_steps": 130, "total_steps": 1320, "loss": 2.5727, "learning_rate": 1.9671011802366934e-06, "epoch": 0.19696969696969696, "percentage": 9.85, "elapsed_time": "0:08:20", "remaining_time": "1:16:17", "throughput": "1328.17", "total_tokens": 664104} |
|
{"current_steps": 132, "total_steps": 1320, "loss": 2.3728, "learning_rate": 1.9660033123776056e-06, "epoch": 0.2, "percentage": 10.0, "elapsed_time": "0:08:26", "remaining_time": "1:16:02", "throughput": "1329.58", "total_tokens": 674016} |
|
{"current_steps": 134, "total_steps": 1320, "loss": 2.4271, "learning_rate": 1.964887822852718e-06, "epoch": 0.20303030303030303, "percentage": 10.15, "elapsed_time": "0:08:34", "remaining_time": "1:15:51", "throughput": "1331.16", "total_tokens": 684480} |
|
{"current_steps": 136, "total_steps": 1320, "loss": 2.5413, "learning_rate": 1.963754737323516e-06, "epoch": 0.20606060606060606, "percentage": 10.3, "elapsed_time": "0:08:40", "remaining_time": "1:15:34", "throughput": "1332.60", "total_tokens": 694056} |
|
{"current_steps": 138, "total_steps": 1320, "loss": 2.4513, "learning_rate": 1.9626040818562783e-06, "epoch": 0.20909090909090908, "percentage": 10.45, "elapsed_time": "0:08:48", "remaining_time": "1:15:24", "throughput": "1334.06", "total_tokens": 704640} |
|
{"current_steps": 140, "total_steps": 1320, "loss": 2.3866, "learning_rate": 1.9614358829214722e-06, "epoch": 0.21212121212121213, "percentage": 10.61, "elapsed_time": "0:08:54", "remaining_time": "1:15:04", "throughput": "1335.30", "total_tokens": 713640} |
|
{"current_steps": 140, "total_steps": 1320, "eval_loss": 2.349419355392456, "epoch": 0.21212121212121213, "percentage": 10.61, "elapsed_time": "0:09:00", "remaining_time": "1:15:53", "throughput": "1320.89", "total_tokens": 713640} |
|
{"current_steps": 142, "total_steps": 1320, "loss": 2.4217, "learning_rate": 1.960250167393147e-06, "epoch": 0.21515151515151515, "percentage": 10.76, "elapsed_time": "0:09:06", "remaining_time": "1:15:35", "throughput": "1322.24", "total_tokens": 722880} |
|
{"current_steps": 144, "total_steps": 1320, "loss": 2.5271, "learning_rate": 1.959046962548316e-06, "epoch": 0.21818181818181817, "percentage": 10.91, "elapsed_time": "0:09:13", "remaining_time": "1:15:23", "throughput": "1323.63", "total_tokens": 733104} |
|
{"current_steps": 146, "total_steps": 1320, "loss": 2.4228, "learning_rate": 1.9578262960663305e-06, "epoch": 0.22121212121212122, "percentage": 11.06, "elapsed_time": "0:09:22", "remaining_time": "1:15:21", "throughput": "1325.48", "total_tokens": 745392} |
|
{"current_steps": 148, "total_steps": 1320, "loss": 2.1895, "learning_rate": 1.9565881960282384e-06, "epoch": 0.22424242424242424, "percentage": 11.21, "elapsed_time": "0:09:29", "remaining_time": "1:15:10", "throughput": "1326.79", "total_tokens": 755736} |
|
{"current_steps": 150, "total_steps": 1320, "loss": 2.4702, "learning_rate": 1.9553326909161436e-06, "epoch": 0.22727272727272727, "percentage": 11.36, "elapsed_time": "0:09:37", "remaining_time": "1:15:03", "throughput": "1328.44", "total_tokens": 767040} |
|
{"current_steps": 152, "total_steps": 1320, "loss": 2.4535, "learning_rate": 1.954059809612546e-06, "epoch": 0.23030303030303031, "percentage": 11.52, "elapsed_time": "0:09:44", "remaining_time": "1:14:47", "throughput": "1329.61", "total_tokens": 776496} |
|
{"current_steps": 154, "total_steps": 1320, "loss": 2.3757, "learning_rate": 1.9527695813996817e-06, "epoch": 0.23333333333333334, "percentage": 11.67, "elapsed_time": "0:09:50", "remaining_time": "1:14:29", "throughput": "1330.65", "total_tokens": 785568} |
|
{"current_steps": 156, "total_steps": 1320, "loss": 2.3609, "learning_rate": 1.9514620359588454e-06, "epoch": 0.23636363636363636, "percentage": 11.82, "elapsed_time": "0:09:58", "remaining_time": "1:14:26", "throughput": "1332.19", "total_tokens": 797496} |
|
{"current_steps": 158, "total_steps": 1320, "loss": 2.4576, "learning_rate": 1.9501372033697097e-06, "epoch": 0.23939393939393938, "percentage": 11.97, "elapsed_time": "0:10:06", "remaining_time": "1:14:18", "throughput": "1333.69", "total_tokens": 808536} |
|
{"current_steps": 160, "total_steps": 1320, "loss": 2.2509, "learning_rate": 1.948795114109632e-06, "epoch": 0.24242424242424243, "percentage": 12.12, "elapsed_time": "0:10:13", "remaining_time": "1:14:06", "throughput": "1334.79", "total_tokens": 818592} |
|
{"current_steps": 160, "total_steps": 1320, "eval_loss": 2.3466238975524902, "epoch": 0.24242424242424243, "percentage": 12.12, "elapsed_time": "0:10:19", "remaining_time": "1:14:48", "throughput": "1322.24", "total_tokens": 818592} |
|
{"current_steps": 162, "total_steps": 1320, "loss": 2.4558, "learning_rate": 1.947435799052955e-06, "epoch": 0.24545454545454545, "percentage": 12.27, "elapsed_time": "0:10:25", "remaining_time": "1:14:33", "throughput": "1323.46", "total_tokens": 828336} |
|
{"current_steps": 164, "total_steps": 1320, "loss": 2.3038, "learning_rate": 1.9460592894702946e-06, "epoch": 0.24848484848484848, "percentage": 12.42, "elapsed_time": "0:10:32", "remaining_time": "1:14:19", "throughput": "1324.66", "total_tokens": 838080} |
|
{"current_steps": 166, "total_steps": 1320, "loss": 2.2954, "learning_rate": 1.944665617027823e-06, "epoch": 0.2515151515151515, "percentage": 12.58, "elapsed_time": "0:10:41", "remaining_time": "1:14:16", "throughput": "1326.15", "total_tokens": 850128} |
|
{"current_steps": 168, "total_steps": 1320, "loss": 2.2327, "learning_rate": 1.943254813786535e-06, "epoch": 0.2545454545454545, "percentage": 12.73, "elapsed_time": "0:10:49", "remaining_time": "1:14:14", "throughput": "1327.69", "total_tokens": 862416} |
|
{"current_steps": 170, "total_steps": 1320, "loss": 2.487, "learning_rate": 1.941826912201518e-06, "epoch": 0.25757575757575757, "percentage": 12.88, "elapsed_time": "0:10:57", "remaining_time": "1:14:08", "throughput": "1328.98", "total_tokens": 873936} |
|
{"current_steps": 172, "total_steps": 1320, "loss": 2.6736, "learning_rate": 1.9403819451212004e-06, "epoch": 0.2606060606060606, "percentage": 13.03, "elapsed_time": "0:11:04", "remaining_time": "1:13:53", "throughput": "1330.08", "total_tokens": 883584} |
|
{"current_steps": 174, "total_steps": 1320, "loss": 2.2313, "learning_rate": 1.938919945786595e-06, "epoch": 0.2636363636363636, "percentage": 13.18, "elapsed_time": "0:11:10", "remaining_time": "1:13:36", "throughput": "1331.04", "total_tokens": 892632} |
|
{"current_steps": 176, "total_steps": 1320, "loss": 2.4444, "learning_rate": 1.9374409478305385e-06, "epoch": 0.26666666666666666, "percentage": 13.33, "elapsed_time": "0:11:19", "remaining_time": "1:13:34", "throughput": "1332.39", "total_tokens": 904920} |
|
{"current_steps": 178, "total_steps": 1320, "loss": 2.5038, "learning_rate": 1.935944985276914e-06, "epoch": 0.2696969696969697, "percentage": 13.48, "elapsed_time": "0:11:25", "remaining_time": "1:13:17", "throughput": "1333.19", "total_tokens": 913752} |
|
{"current_steps": 180, "total_steps": 1320, "loss": 2.3807, "learning_rate": 1.9344320925398713e-06, "epoch": 0.2727272727272727, "percentage": 13.64, "elapsed_time": "0:11:30", "remaining_time": "1:12:52", "throughput": "1333.89", "total_tokens": 920952} |
|
{"current_steps": 180, "total_steps": 1320, "eval_loss": 2.3428144454956055, "epoch": 0.2727272727272727, "percentage": 13.64, "elapsed_time": "0:11:36", "remaining_time": "1:13:29", "throughput": "1322.74", "total_tokens": 920952} |
|
{"current_steps": 182, "total_steps": 1320, "loss": 2.5033, "learning_rate": 1.932902304423033e-06, "epoch": 0.27575757575757576, "percentage": 13.79, "elapsed_time": "0:11:44", "remaining_time": "1:13:22", "throughput": "1324.14", "total_tokens": 932280} |
|
{"current_steps": 184, "total_steps": 1320, "loss": 2.275, "learning_rate": 1.931355656118694e-06, "epoch": 0.2787878787878788, "percentage": 13.94, "elapsed_time": "0:11:52", "remaining_time": "1:13:19", "throughput": "1325.54", "total_tokens": 944568} |
|
{"current_steps": 186, "total_steps": 1320, "loss": 2.567, "learning_rate": 1.9297921832070134e-06, "epoch": 0.2818181818181818, "percentage": 14.09, "elapsed_time": "0:11:57", "remaining_time": "1:12:56", "throughput": "1326.26", "total_tokens": 952032} |
|
{"current_steps": 188, "total_steps": 1320, "loss": 2.5257, "learning_rate": 1.928211921655195e-06, "epoch": 0.28484848484848485, "percentage": 14.24, "elapsed_time": "0:12:05", "remaining_time": "1:12:51", "throughput": "1327.63", "total_tokens": 963840} |
|
{"current_steps": 190, "total_steps": 1320, "loss": 2.3678, "learning_rate": 1.9266149078166603e-06, "epoch": 0.2878787878787879, "percentage": 14.39, "elapsed_time": "0:12:13", "remaining_time": "1:12:44", "throughput": "1329.07", "total_tokens": 975288} |
|
{"current_steps": 192, "total_steps": 1320, "loss": 2.232, "learning_rate": 1.9250011784302106e-06, "epoch": 0.2909090909090909, "percentage": 14.55, "elapsed_time": "0:12:22", "remaining_time": "1:12:39", "throughput": "1330.36", "total_tokens": 987144} |
|
{"current_steps": 194, "total_steps": 1320, "loss": 2.1844, "learning_rate": 1.923370770619184e-06, "epoch": 0.29393939393939394, "percentage": 14.7, "elapsed_time": "0:12:30", "remaining_time": "1:12:33", "throughput": "1331.50", "total_tokens": 998664} |
|
{"current_steps": 196, "total_steps": 1320, "loss": 2.3571, "learning_rate": 1.921723721890602e-06, "epoch": 0.296969696969697, "percentage": 14.85, "elapsed_time": "0:12:36", "remaining_time": "1:12:20", "throughput": "1332.44", "total_tokens": 1008504} |
|
{"current_steps": 198, "total_steps": 1320, "loss": 2.472, "learning_rate": 1.920060070134301e-06, "epoch": 0.3, "percentage": 15.0, "elapsed_time": "0:12:42", "remaining_time": "1:12:01", "throughput": "1333.09", "total_tokens": 1016664} |
|
{"current_steps": 200, "total_steps": 1320, "loss": 2.4112, "learning_rate": 1.91837985362207e-06, "epoch": 0.30303030303030304, "percentage": 15.15, "elapsed_time": "0:12:49", "remaining_time": "1:11:47", "throughput": "1334.00", "total_tokens": 1026192} |
|
{"current_steps": 200, "total_steps": 1320, "eval_loss": 2.340877056121826, "epoch": 0.30303030303030304, "percentage": 15.15, "elapsed_time": "0:12:55", "remaining_time": "1:12:20", "throughput": "1323.98", "total_tokens": 1026192} |
|
{"current_steps": 202, "total_steps": 1320, "loss": 2.5731, "learning_rate": 1.9166831110067615e-06, "epoch": 0.30606060606060603, "percentage": 15.3, "elapsed_time": "0:13:04", "remaining_time": "1:12:21", "throughput": "1322.15", "total_tokens": 1037064} |
|
{"current_steps": 204, "total_steps": 1320, "loss": 2.5743, "learning_rate": 1.914969881321407e-06, "epoch": 0.3090909090909091, "percentage": 15.45, "elapsed_time": "0:13:12", "remaining_time": "1:12:17", "throughput": "1323.44", "total_tokens": 1049352} |
|
{"current_steps": 206, "total_steps": 1320, "loss": 2.4531, "learning_rate": 1.913240203978318e-06, "epoch": 0.31212121212121213, "percentage": 15.61, "elapsed_time": "0:13:18", "remaining_time": "1:11:58", "throughput": "1324.09", "total_tokens": 1057272} |
|
{"current_steps": 208, "total_steps": 1320, "loss": 2.5391, "learning_rate": 1.9114941187681783e-06, "epoch": 0.3151515151515151, "percentage": 15.76, "elapsed_time": "0:13:23", "remaining_time": "1:11:38", "throughput": "1324.83", "total_tokens": 1065120} |
|
{"current_steps": 210, "total_steps": 1320, "loss": 2.4156, "learning_rate": 1.9097316658591304e-06, "epoch": 0.3181818181818182, "percentage": 15.91, "elapsed_time": "0:13:30", "remaining_time": "1:11:22", "throughput": "1325.70", "total_tokens": 1074192} |
|
{"current_steps": 212, "total_steps": 1320, "loss": 2.5733, "learning_rate": 1.9079528857958504e-06, "epoch": 0.3212121212121212, "percentage": 16.06, "elapsed_time": "0:13:37", "remaining_time": "1:11:11", "throughput": "1326.69", "total_tokens": 1084416} |
|
{"current_steps": 214, "total_steps": 1320, "loss": 2.5264, "learning_rate": 1.906157819498616e-06, "epoch": 0.3242424242424242, "percentage": 16.21, "elapsed_time": "0:13:43", "remaining_time": "1:10:55", "throughput": "1327.36", "total_tokens": 1092888} |
|
{"current_steps": 216, "total_steps": 1320, "loss": 2.3912, "learning_rate": 1.904346508262363e-06, "epoch": 0.32727272727272727, "percentage": 16.36, "elapsed_time": "0:13:48", "remaining_time": "1:10:34", "throughput": "1327.88", "total_tokens": 1100160} |
|
{"current_steps": 218, "total_steps": 1320, "loss": 2.462, "learning_rate": 1.9025189937557386e-06, "epoch": 0.3303030303030303, "percentage": 16.52, "elapsed_time": "0:13:53", "remaining_time": "1:10:13", "throughput": "1328.49", "total_tokens": 1107360} |
|
{"current_steps": 220, "total_steps": 1320, "loss": 2.2447, "learning_rate": 1.90067531802014e-06, "epoch": 0.3333333333333333, "percentage": 16.67, "elapsed_time": "0:14:01", "remaining_time": "1:10:08", "throughput": "1329.53", "total_tokens": 1119096} |
|
{"current_steps": 220, "total_steps": 1320, "eval_loss": 2.339911937713623, "epoch": 0.3333333333333333, "percentage": 16.67, "elapsed_time": "0:14:07", "remaining_time": "1:10:37", "throughput": "1320.40", "total_tokens": 1119096} |
|
{"current_steps": 222, "total_steps": 1320, "loss": 2.4013, "learning_rate": 1.8988155234687495e-06, "epoch": 0.33636363636363636, "percentage": 16.82, "elapsed_time": "0:14:16", "remaining_time": "1:10:33", "throughput": "1321.63", "total_tokens": 1131384} |
|
{"current_steps": 224, "total_steps": 1320, "loss": 2.4763, "learning_rate": 1.8969396528855567e-06, "epoch": 0.3393939393939394, "percentage": 16.97, "elapsed_time": "0:14:23", "remaining_time": "1:10:26", "throughput": "1322.70", "total_tokens": 1142616} |
|
{"current_steps": 226, "total_steps": 1320, "loss": 2.3552, "learning_rate": 1.8950477494243762e-06, "epoch": 0.3424242424242424, "percentage": 17.12, "elapsed_time": "0:14:32", "remaining_time": "1:10:22", "throughput": "1323.88", "total_tokens": 1154904} |
|
{"current_steps": 228, "total_steps": 1320, "loss": 2.4198, "learning_rate": 1.8931398566078523e-06, "epoch": 0.34545454545454546, "percentage": 17.27, "elapsed_time": "0:14:38", "remaining_time": "1:10:09", "throughput": "1324.68", "total_tokens": 1164264} |
|
{"current_steps": 230, "total_steps": 1320, "loss": 2.5283, "learning_rate": 1.8912160183264612e-06, "epoch": 0.3484848484848485, "percentage": 17.42, "elapsed_time": "0:14:46", "remaining_time": "1:10:01", "throughput": "1325.78", "total_tokens": 1175472} |
|
{"current_steps": 232, "total_steps": 1320, "loss": 2.5246, "learning_rate": 1.8892762788374985e-06, "epoch": 0.3515151515151515, "percentage": 17.58, "elapsed_time": "0:14:53", "remaining_time": "1:09:49", "throughput": "1326.79", "total_tokens": 1185264} |
|
{"current_steps": 234, "total_steps": 1320, "loss": 2.3821, "learning_rate": 1.8873206827640624e-06, "epoch": 0.35454545454545455, "percentage": 17.73, "elapsed_time": "0:15:01", "remaining_time": "1:09:44", "throughput": "1328.02", "total_tokens": 1197408} |
|
{"current_steps": 236, "total_steps": 1320, "loss": 2.3593, "learning_rate": 1.8853492750940275e-06, "epoch": 0.3575757575757576, "percentage": 17.88, "elapsed_time": "0:15:08", "remaining_time": "1:09:34", "throughput": "1328.83", "total_tokens": 1207656} |
|
{"current_steps": 238, "total_steps": 1320, "loss": 2.2261, "learning_rate": 1.8833621011790078e-06, "epoch": 0.3606060606060606, "percentage": 18.03, "elapsed_time": "0:15:16", "remaining_time": "1:09:27", "throughput": "1329.78", "total_tokens": 1219080} |
|
{"current_steps": 240, "total_steps": 1320, "loss": 2.4046, "learning_rate": 1.8813592067333155e-06, "epoch": 0.36363636363636365, "percentage": 18.18, "elapsed_time": "0:15:24", "remaining_time": "1:09:19", "throughput": "1330.69", "total_tokens": 1230048} |
|
{"current_steps": 240, "total_steps": 1320, "eval_loss": 2.339547872543335, "epoch": 0.36363636363636365, "percentage": 18.18, "elapsed_time": "0:15:30", "remaining_time": "1:09:45", "throughput": "1322.36", "total_tokens": 1230048} |
|
{"current_steps": 242, "total_steps": 1320, "loss": 2.1956, "learning_rate": 1.8793406378329092e-06, "epoch": 0.36666666666666664, "percentage": 18.33, "elapsed_time": "0:15:36", "remaining_time": "1:09:32", "throughput": "1323.10", "total_tokens": 1239288} |
|
{"current_steps": 244, "total_steps": 1320, "loss": 2.3843, "learning_rate": 1.877306440914333e-06, "epoch": 0.3696969696969697, "percentage": 18.48, "elapsed_time": "0:15:41", "remaining_time": "1:09:13", "throughput": "1323.58", "total_tokens": 1246512} |
|
{"current_steps": 246, "total_steps": 1320, "loss": 2.2977, "learning_rate": 1.8752566627736477e-06, "epoch": 0.37272727272727274, "percentage": 18.64, "elapsed_time": "0:15:48", "remaining_time": "1:09:01", "throughput": "1324.43", "total_tokens": 1256256} |
|
{"current_steps": 248, "total_steps": 1320, "loss": 2.4575, "learning_rate": 1.8731913505653569e-06, "epoch": 0.37575757575757573, "percentage": 18.79, "elapsed_time": "0:15:55", "remaining_time": "1:08:48", "throughput": "1325.27", "total_tokens": 1265712} |
|
{"current_steps": 250, "total_steps": 1320, "loss": 2.3638, "learning_rate": 1.8711105518013199e-06, "epoch": 0.3787878787878788, "percentage": 18.94, "elapsed_time": "0:16:00", "remaining_time": "1:08:32", "throughput": "1325.88", "total_tokens": 1273848} |
|
{"current_steps": 252, "total_steps": 1320, "loss": 2.388, "learning_rate": 1.869014314349659e-06, "epoch": 0.38181818181818183, "percentage": 19.09, "elapsed_time": "0:16:07", "remaining_time": "1:08:20", "throughput": "1326.65", "total_tokens": 1283664} |
|
{"current_steps": 254, "total_steps": 1320, "loss": 2.3997, "learning_rate": 1.8669026864336591e-06, "epoch": 0.38484848484848483, "percentage": 19.24, "elapsed_time": "0:16:14", "remaining_time": "1:08:10", "throughput": "1327.47", "total_tokens": 1293768} |
|
{"current_steps": 256, "total_steps": 1320, "loss": 2.4629, "learning_rate": 1.8647757166306572e-06, "epoch": 0.3878787878787879, "percentage": 19.39, "elapsed_time": "0:16:21", "remaining_time": "1:07:58", "throughput": "1328.10", "total_tokens": 1303392} |
|
{"current_steps": 258, "total_steps": 1320, "loss": 2.3915, "learning_rate": 1.8626334538709263e-06, "epoch": 0.39090909090909093, "percentage": 19.55, "elapsed_time": "0:16:28", "remaining_time": "1:07:49", "throughput": "1328.88", "total_tokens": 1313784} |
|
{"current_steps": 260, "total_steps": 1320, "loss": 2.4396, "learning_rate": 1.8604759474365492e-06, "epoch": 0.3939393939393939, "percentage": 19.7, "elapsed_time": "0:16:37", "remaining_time": "1:07:45", "throughput": "1329.88", "total_tokens": 1326072} |
|
{"current_steps": 260, "total_steps": 1320, "eval_loss": 2.338432788848877, "epoch": 0.3939393939393939, "percentage": 19.7, "elapsed_time": "0:16:42", "remaining_time": "1:08:08", "throughput": "1322.17", "total_tokens": 1326072} |
|
{"current_steps": 262, "total_steps": 1320, "loss": 2.4028, "learning_rate": 1.858303246960284e-06, "epoch": 0.396969696969697, "percentage": 19.85, "elapsed_time": "0:16:49", "remaining_time": "1:07:57", "throughput": "1322.86", "total_tokens": 1335864} |
|
{"current_steps": 264, "total_steps": 1320, "loss": 2.434, "learning_rate": 1.856115402424423e-06, "epoch": 0.4, "percentage": 20.0, "elapsed_time": "0:16:57", "remaining_time": "1:07:51", "throughput": "1323.73", "total_tokens": 1347552} |
|
{"current_steps": 266, "total_steps": 1320, "loss": 2.3149, "learning_rate": 1.8539124641596437e-06, "epoch": 0.403030303030303, "percentage": 20.15, "elapsed_time": "0:17:04", "remaining_time": "1:07:39", "throughput": "1324.47", "total_tokens": 1356912} |
|
{"current_steps": 268, "total_steps": 1320, "loss": 2.5401, "learning_rate": 1.851694482843849e-06, "epoch": 0.40606060606060607, "percentage": 20.3, "elapsed_time": "0:17:12", "remaining_time": "1:07:32", "throughput": "1325.48", "total_tokens": 1368408} |
|
{"current_steps": 270, "total_steps": 1320, "loss": 2.3905, "learning_rate": 1.8494615095010037e-06, "epoch": 0.4090909090909091, "percentage": 20.45, "elapsed_time": "0:17:20", "remaining_time": "1:07:27", "throughput": "1326.43", "total_tokens": 1380696} |
|
{"current_steps": 272, "total_steps": 1320, "loss": 2.7022, "learning_rate": 1.8472135954999582e-06, "epoch": 0.4121212121212121, "percentage": 20.61, "elapsed_time": "0:17:26", "remaining_time": "1:07:13", "throughput": "1326.98", "total_tokens": 1389096} |
|
{"current_steps": 274, "total_steps": 1320, "loss": 2.5369, "learning_rate": 1.8449507925532685e-06, "epoch": 0.41515151515151516, "percentage": 20.76, "elapsed_time": "0:17:34", "remaining_time": "1:07:06", "throughput": "1327.95", "total_tokens": 1400784} |
|
{"current_steps": 276, "total_steps": 1320, "loss": 2.2525, "learning_rate": 1.8426731527160064e-06, "epoch": 0.41818181818181815, "percentage": 20.91, "elapsed_time": "0:17:43", "remaining_time": "1:07:02", "throughput": "1328.95", "total_tokens": 1413072} |
|
{"current_steps": 278, "total_steps": 1320, "loss": 2.3052, "learning_rate": 1.8403807283845616e-06, "epoch": 0.4212121212121212, "percentage": 21.06, "elapsed_time": "0:17:50", "remaining_time": "1:06:50", "throughput": "1329.68", "total_tokens": 1422888} |
|
{"current_steps": 280, "total_steps": 1320, "loss": 2.344, "learning_rate": 1.8380735722954367e-06, "epoch": 0.42424242424242425, "percentage": 21.21, "elapsed_time": "0:17:56", "remaining_time": "1:06:38", "throughput": "1330.23", "total_tokens": 1432128} |
|
{"current_steps": 280, "total_steps": 1320, "eval_loss": 2.3386666774749756, "epoch": 0.42424242424242425, "percentage": 21.21, "elapsed_time": "0:18:02", "remaining_time": "1:07:00", "throughput": "1323.07", "total_tokens": 1432128} |
|
{"current_steps": 282, "total_steps": 1320, "loss": 2.4498, "learning_rate": 1.835751737524033e-06, "epoch": 0.42727272727272725, "percentage": 21.36, "elapsed_time": "0:18:07", "remaining_time": "1:06:44", "throughput": "1323.62", "total_tokens": 1439928} |
|
{"current_steps": 284, "total_steps": 1320, "loss": 2.3934, "learning_rate": 1.8334152774834309e-06, "epoch": 0.4303030303030303, "percentage": 21.52, "elapsed_time": "0:18:14", "remaining_time": "1:06:32", "throughput": "1324.36", "total_tokens": 1449624} |
|
{"current_steps": 286, "total_steps": 1320, "loss": 2.52, "learning_rate": 1.83106424592316e-06, "epoch": 0.43333333333333335, "percentage": 21.67, "elapsed_time": "0:18:22", "remaining_time": "1:06:24", "throughput": "1325.13", "total_tokens": 1460520} |
|
{"current_steps": 288, "total_steps": 1320, "loss": 2.5132, "learning_rate": 1.8286986969279643e-06, "epoch": 0.43636363636363634, "percentage": 21.82, "elapsed_time": "0:18:28", "remaining_time": "1:06:12", "throughput": "1325.82", "total_tokens": 1469712} |
|
{"current_steps": 290, "total_steps": 1320, "loss": 2.403, "learning_rate": 1.8263186849165555e-06, "epoch": 0.4393939393939394, "percentage": 21.97, "elapsed_time": "0:18:36", "remaining_time": "1:06:04", "throughput": "1326.71", "total_tokens": 1480824} |
|
{"current_steps": 292, "total_steps": 1320, "loss": 2.5149, "learning_rate": 1.8239242646403628e-06, "epoch": 0.44242424242424244, "percentage": 22.12, "elapsed_time": "0:18:41", "remaining_time": "1:05:49", "throughput": "1327.17", "total_tokens": 1488816} |
|
{"current_steps": 294, "total_steps": 1320, "loss": 2.2043, "learning_rate": 1.8215154911822737e-06, "epoch": 0.44545454545454544, "percentage": 22.27, "elapsed_time": "0:18:48", "remaining_time": "1:05:36", "throughput": "1327.75", "total_tokens": 1497816} |
|
{"current_steps": 296, "total_steps": 1320, "loss": 2.439, "learning_rate": 1.8190924199553655e-06, "epoch": 0.4484848484848485, "percentage": 22.42, "elapsed_time": "0:18:55", "remaining_time": "1:05:28", "throughput": "1328.60", "total_tokens": 1508928} |
|
{"current_steps": 298, "total_steps": 1320, "loss": 2.4665, "learning_rate": 1.816655106701631e-06, "epoch": 0.45151515151515154, "percentage": 22.58, "elapsed_time": "0:19:03", "remaining_time": "1:05:20", "throughput": "1329.34", "total_tokens": 1519512} |
|
{"current_steps": 300, "total_steps": 1320, "loss": 2.4901, "learning_rate": 1.8142036074906968e-06, "epoch": 0.45454545454545453, "percentage": 22.73, "elapsed_time": "0:19:10", "remaining_time": "1:05:10", "throughput": "1329.99", "total_tokens": 1529520} |
|
{"current_steps": 300, "total_steps": 1320, "eval_loss": 2.337289333343506, "epoch": 0.45454545454545453, "percentage": 22.73, "elapsed_time": "0:19:15", "remaining_time": "1:05:29", "throughput": "1323.29", "total_tokens": 1529520} |
|
{"current_steps": 302, "total_steps": 1320, "loss": 2.1503, "learning_rate": 1.8117379787185333e-06, "epoch": 0.4575757575757576, "percentage": 22.88, "elapsed_time": "0:19:26", "remaining_time": "1:05:30", "throughput": "1322.27", "total_tokens": 1541808} |
|
{"current_steps": 304, "total_steps": 1320, "loss": 2.4026, "learning_rate": 1.809258277106156e-06, "epoch": 0.46060606060606063, "percentage": 23.03, "elapsed_time": "0:19:32", "remaining_time": "1:05:18", "throughput": "1322.81", "total_tokens": 1550952} |
|
{"current_steps": 306, "total_steps": 1320, "loss": 2.4195, "learning_rate": 1.8067645596983226e-06, "epoch": 0.4636363636363636, "percentage": 23.18, "elapsed_time": "0:19:40", "remaining_time": "1:05:10", "throughput": "1323.64", "total_tokens": 1562064} |
|
{"current_steps": 308, "total_steps": 1320, "loss": 2.6243, "learning_rate": 1.804256883862219e-06, "epoch": 0.4666666666666667, "percentage": 23.33, "elapsed_time": "0:19:45", "remaining_time": "1:04:54", "throughput": "1324.05", "total_tokens": 1569240} |
|
{"current_steps": 310, "total_steps": 1320, "loss": 2.3603, "learning_rate": 1.8017353072861416e-06, "epoch": 0.4696969696969697, "percentage": 23.48, "elapsed_time": "0:19:52", "remaining_time": "1:04:44", "throughput": "1324.79", "total_tokens": 1579560} |
|
{"current_steps": 312, "total_steps": 1320, "loss": 2.2741, "learning_rate": 1.7991998879781676e-06, "epoch": 0.4727272727272727, "percentage": 23.64, "elapsed_time": "0:20:00", "remaining_time": "1:04:38", "throughput": "1325.56", "total_tokens": 1591248} |
|
{"current_steps": 314, "total_steps": 1320, "loss": 2.5167, "learning_rate": 1.796650684264823e-06, "epoch": 0.47575757575757577, "percentage": 23.79, "elapsed_time": "0:20:08", "remaining_time": "1:04:31", "throughput": "1326.33", "total_tokens": 1602840} |
|
{"current_steps": 316, "total_steps": 1320, "loss": 2.404, "learning_rate": 1.7940877547897383e-06, "epoch": 0.47878787878787876, "percentage": 23.94, "elapsed_time": "0:20:13", "remaining_time": "1:04:16", "throughput": "1326.84", "total_tokens": 1610520} |
|
{"current_steps": 318, "total_steps": 1320, "loss": 2.3861, "learning_rate": 1.7915111585123026e-06, "epoch": 0.4818181818181818, "percentage": 24.09, "elapsed_time": "0:20:19", "remaining_time": "1:04:01", "throughput": "1327.13", "total_tokens": 1617936} |
|
{"current_steps": 320, "total_steps": 1320, "loss": 2.3335, "learning_rate": 1.7889209547063038e-06, "epoch": 0.48484848484848486, "percentage": 24.24, "elapsed_time": "0:20:26", "remaining_time": "1:03:52", "throughput": "1327.82", "total_tokens": 1628424} |
|
{"current_steps": 320, "total_steps": 1320, "eval_loss": 2.336883068084717, "epoch": 0.48484848484848486, "percentage": 24.24, "elapsed_time": "0:20:32", "remaining_time": "1:04:10", "throughput": "1321.54", "total_tokens": 1628424} |
|
{"current_steps": 322, "total_steps": 1320, "loss": 2.6274, "learning_rate": 1.7863172029585684e-06, "epoch": 0.48787878787878786, "percentage": 24.39, "elapsed_time": "0:20:37", "remaining_time": "1:03:56", "throughput": "1322.10", "total_tokens": 1636704} |
|
{"current_steps": 324, "total_steps": 1320, "loss": 2.2444, "learning_rate": 1.7836999631675877e-06, "epoch": 0.4909090909090909, "percentage": 24.55, "elapsed_time": "0:20:45", "remaining_time": "1:03:47", "throughput": "1322.60", "total_tokens": 1646760} |
|
{"current_steps": 326, "total_steps": 1320, "loss": 2.3407, "learning_rate": 1.7810692955421418e-06, "epoch": 0.49393939393939396, "percentage": 24.7, "elapsed_time": "0:20:52", "remaining_time": "1:03:39", "throughput": "1323.35", "total_tokens": 1657824} |
|
{"current_steps": 328, "total_steps": 1320, "loss": 2.5075, "learning_rate": 1.778425260599914e-06, "epoch": 0.49696969696969695, "percentage": 24.85, "elapsed_time": "0:21:01", "remaining_time": "1:03:33", "throughput": "1324.15", "total_tokens": 1669800} |
|
{"current_steps": 330, "total_steps": 1320, "loss": 2.4304, "learning_rate": 1.7757679191660974e-06, "epoch": 0.5, "percentage": 25.0, "elapsed_time": "0:21:07", "remaining_time": "1:03:22", "throughput": "1324.66", "total_tokens": 1678896} |
|
{"current_steps": 332, "total_steps": 1320, "loss": 2.3898, "learning_rate": 1.7730973323719996e-06, "epoch": 0.503030303030303, "percentage": 25.15, "elapsed_time": "0:21:12", "remaining_time": "1:03:07", "throughput": "1325.15", "total_tokens": 1686696} |
|
{"current_steps": 334, "total_steps": 1320, "loss": 2.1912, "learning_rate": 1.7704135616536297e-06, "epoch": 0.5060606060606061, "percentage": 25.3, "elapsed_time": "0:21:19", "remaining_time": "1:02:55", "throughput": "1325.71", "total_tokens": 1695648} |
|
{"current_steps": 336, "total_steps": 1320, "loss": 2.4971, "learning_rate": 1.767716668750292e-06, "epoch": 0.509090909090909, "percentage": 25.45, "elapsed_time": "0:21:24", "remaining_time": "1:02:41", "throughput": "1326.12", "total_tokens": 1703112} |
|
{"current_steps": 338, "total_steps": 1320, "loss": 2.3934, "learning_rate": 1.7650067157031607e-06, "epoch": 0.5121212121212121, "percentage": 25.61, "elapsed_time": "0:21:32", "remaining_time": "1:02:35", "throughput": "1326.95", "total_tokens": 1715400} |
|
{"current_steps": 340, "total_steps": 1320, "loss": 2.1842, "learning_rate": 1.7622837648538558e-06, "epoch": 0.5151515151515151, "percentage": 25.76, "elapsed_time": "0:21:40", "remaining_time": "1:02:27", "throughput": "1327.53", "total_tokens": 1725816} |
|
{"current_steps": 340, "total_steps": 1320, "eval_loss": 2.3365180492401123, "epoch": 0.5151515151515151, "percentage": 25.76, "elapsed_time": "0:21:45", "remaining_time": "1:02:43", "throughput": "1321.62", "total_tokens": 1725816} |
|
{"current_steps": 342, "total_steps": 1320, "loss": 2.2292, "learning_rate": 1.7595478788430067e-06, "epoch": 0.5181818181818182, "percentage": 25.91, "elapsed_time": "0:21:53", "remaining_time": "1:02:36", "throughput": "1322.38", "total_tokens": 1737240} |
|
{"current_steps": 344, "total_steps": 1320, "loss": 2.3013, "learning_rate": 1.7567991206088122e-06, "epoch": 0.5212121212121212, "percentage": 26.06, "elapsed_time": "0:21:58", "remaining_time": "1:02:20", "throughput": "1322.71", "total_tokens": 1743792} |
|
{"current_steps": 346, "total_steps": 1320, "loss": 2.5828, "learning_rate": 1.7540375533855931e-06, "epoch": 0.5242424242424243, "percentage": 26.21, "elapsed_time": "0:22:06", "remaining_time": "1:02:13", "throughput": "1323.51", "total_tokens": 1755192} |
|
{"current_steps": 348, "total_steps": 1320, "loss": 2.2834, "learning_rate": 1.751263240702337e-06, "epoch": 0.5272727272727272, "percentage": 26.36, "elapsed_time": "0:22:13", "remaining_time": "1:02:04", "throughput": "1324.16", "total_tokens": 1765656} |
|
{"current_steps": 350, "total_steps": 1320, "loss": 2.5502, "learning_rate": 1.7484762463812359e-06, "epoch": 0.5303030303030303, "percentage": 26.52, "elapsed_time": "0:22:18", "remaining_time": "1:01:50", "throughput": "1324.53", "total_tokens": 1773504} |
|
{"current_steps": 352, "total_steps": 1320, "loss": 2.2939, "learning_rate": 1.7456766345362195e-06, "epoch": 0.5333333333333333, "percentage": 26.67, "elapsed_time": "0:22:27", "remaining_time": "1:01:45", "throughput": "1325.29", "total_tokens": 1785792} |
|
{"current_steps": 354, "total_steps": 1320, "loss": 2.3919, "learning_rate": 1.7428644695714798e-06, "epoch": 0.5363636363636364, "percentage": 26.82, "elapsed_time": "0:22:32", "remaining_time": "1:01:30", "throughput": "1325.64", "total_tokens": 1792848} |
|
{"current_steps": 356, "total_steps": 1320, "loss": 2.4862, "learning_rate": 1.7400398161799901e-06, "epoch": 0.5393939393939394, "percentage": 26.97, "elapsed_time": "0:22:38", "remaining_time": "1:01:19", "throughput": "1326.19", "total_tokens": 1802256} |
|
{"current_steps": 358, "total_steps": 1320, "loss": 2.4536, "learning_rate": 1.7372027393420136e-06, "epoch": 0.5424242424242425, "percentage": 27.12, "elapsed_time": "0:22:46", "remaining_time": "1:01:11", "throughput": "1326.74", "total_tokens": 1812840} |
|
{"current_steps": 360, "total_steps": 1320, "loss": 2.2118, "learning_rate": 1.7343533043236135e-06, "epoch": 0.5454545454545454, "percentage": 27.27, "elapsed_time": "0:22:53", "remaining_time": "1:01:01", "throughput": "1327.23", "total_tokens": 1822440} |
|
{"current_steps": 360, "total_steps": 1320, "eval_loss": 2.335080623626709, "epoch": 0.5454545454545454, "percentage": 27.27, "elapsed_time": "0:22:58", "remaining_time": "1:01:17", "throughput": "1321.62", "total_tokens": 1822440} |
|
{"current_steps": 362, "total_steps": 1320, "loss": 2.3442, "learning_rate": 1.7314915766751482e-06, "epoch": 0.5484848484848485, "percentage": 27.42, "elapsed_time": "0:23:06", "remaining_time": "1:01:08", "throughput": "1322.25", "total_tokens": 1833168} |
|
{"current_steps": 364, "total_steps": 1320, "loss": 2.3881, "learning_rate": 1.7286176222297643e-06, "epoch": 0.5515151515151515, "percentage": 27.58, "elapsed_time": "0:23:14", "remaining_time": "1:01:02", "throughput": "1322.95", "total_tokens": 1845072} |
|
{"current_steps": 366, "total_steps": 1320, "loss": 2.2764, "learning_rate": 1.7257315071018814e-06, "epoch": 0.5545454545454546, "percentage": 27.73, "elapsed_time": "0:23:23", "remaining_time": "1:00:57", "throughput": "1323.60", "total_tokens": 1857168} |
|
{"current_steps": 368, "total_steps": 1320, "loss": 2.364, "learning_rate": 1.7228332976856717e-06, "epoch": 0.5575757575757576, "percentage": 27.88, "elapsed_time": "0:23:31", "remaining_time": "1:00:51", "throughput": "1324.29", "total_tokens": 1869456} |
|
{"current_steps": 370, "total_steps": 1320, "loss": 2.4654, "learning_rate": 1.7199230606535347e-06, "epoch": 0.5606060606060606, "percentage": 28.03, "elapsed_time": "0:23:37", "remaining_time": "1:00:40", "throughput": "1324.75", "total_tokens": 1878168} |
|
{"current_steps": 372, "total_steps": 1320, "loss": 2.4599, "learning_rate": 1.717000862954559e-06, "epoch": 0.5636363636363636, "percentage": 28.18, "elapsed_time": "0:23:44", "remaining_time": "1:00:31", "throughput": "1325.34", "total_tokens": 1888608} |
|
{"current_steps": 374, "total_steps": 1320, "loss": 2.2146, "learning_rate": 1.7140667718129853e-06, "epoch": 0.5666666666666667, "percentage": 28.33, "elapsed_time": "0:23:50", "remaining_time": "1:00:19", "throughput": "1325.78", "total_tokens": 1897008} |
|
{"current_steps": 376, "total_steps": 1320, "loss": 2.4951, "learning_rate": 1.7111208547266607e-06, "epoch": 0.5696969696969697, "percentage": 28.48, "elapsed_time": "0:23:57", "remaining_time": "1:00:09", "throughput": "1326.28", "total_tokens": 1906776} |
|
{"current_steps": 378, "total_steps": 1320, "loss": 2.1497, "learning_rate": 1.7081631794654818e-06, "epoch": 0.5727272727272728, "percentage": 28.64, "elapsed_time": "0:24:06", "remaining_time": "1:00:04", "throughput": "1326.95", "total_tokens": 1919064} |
|
{"current_steps": 380, "total_steps": 1320, "loss": 2.3233, "learning_rate": 1.7051938140698408e-06, "epoch": 0.5757575757575758, "percentage": 28.79, "elapsed_time": "0:24:12", "remaining_time": "0:59:54", "throughput": "1327.42", "total_tokens": 1928688} |
|
{"current_steps": 380, "total_steps": 1320, "eval_loss": 2.334742546081543, "epoch": 0.5757575757575758, "percentage": 28.79, "elapsed_time": "0:24:18", "remaining_time": "1:00:08", "throughput": "1322.12", "total_tokens": 1928688} |
|
{"current_steps": 382, "total_steps": 1320, "loss": 2.319, "learning_rate": 1.702212826849056e-06, "epoch": 0.5787878787878787, "percentage": 28.94, "elapsed_time": "0:24:27", "remaining_time": "1:00:02", "throughput": "1322.84", "total_tokens": 1940784} |
|
{"current_steps": 384, "total_steps": 1320, "loss": 2.5949, "learning_rate": 1.6992202863798037e-06, "epoch": 0.5818181818181818, "percentage": 29.09, "elapsed_time": "0:24:34", "remaining_time": "0:59:53", "throughput": "1323.34", "total_tokens": 1950840} |
|
{"current_steps": 386, "total_steps": 1320, "loss": 2.3292, "learning_rate": 1.6962162615045377e-06, "epoch": 0.5848484848484848, "percentage": 29.24, "elapsed_time": "0:24:42", "remaining_time": "0:59:47", "throughput": "1324.03", "total_tokens": 1963128} |
|
{"current_steps": 388, "total_steps": 1320, "loss": 2.5239, "learning_rate": 1.6932008213299071e-06, "epoch": 0.5878787878787879, "percentage": 29.39, "elapsed_time": "0:24:50", "remaining_time": "0:59:41", "throughput": "1324.71", "total_tokens": 1975008} |
|
{"current_steps": 390, "total_steps": 1320, "loss": 2.4818, "learning_rate": 1.6901740352251675e-06, "epoch": 0.5909090909090909, "percentage": 29.55, "elapsed_time": "0:24:57", "remaining_time": "0:59:30", "throughput": "1324.96", "total_tokens": 1983648} |
|
{"current_steps": 392, "total_steps": 1320, "loss": 2.1795, "learning_rate": 1.6871359728205828e-06, "epoch": 0.593939393939394, "percentage": 29.7, "elapsed_time": "0:25:04", "remaining_time": "0:59:20", "throughput": "1325.42", "total_tokens": 1993536} |
|
{"current_steps": 394, "total_steps": 1320, "loss": 2.3585, "learning_rate": 1.6840867040058254e-06, "epoch": 0.5969696969696969, "percentage": 29.85, "elapsed_time": "0:25:10", "remaining_time": "0:59:10", "throughput": "1325.86", "total_tokens": 2002872} |
|
{"current_steps": 396, "total_steps": 1320, "loss": 2.3718, "learning_rate": 1.6810262989283674e-06, "epoch": 0.6, "percentage": 30.0, "elapsed_time": "0:25:17", "remaining_time": "0:59:00", "throughput": "1326.25", "total_tokens": 2012400} |
|
{"current_steps": 398, "total_steps": 1320, "loss": 2.314, "learning_rate": 1.6779548279918671e-06, "epoch": 0.603030303030303, "percentage": 30.15, "elapsed_time": "0:25:24", "remaining_time": "0:58:52", "throughput": "1326.79", "total_tokens": 2022936} |
|
{"current_steps": 400, "total_steps": 1320, "loss": 2.6427, "learning_rate": 1.6748723618545496e-06, "epoch": 0.6060606060606061, "percentage": 30.3, "elapsed_time": "0:25:30", "remaining_time": "0:58:40", "throughput": "1327.19", "total_tokens": 2031480} |
|
{"current_steps": 400, "total_steps": 1320, "eval_loss": 2.334027051925659, "epoch": 0.6060606060606061, "percentage": 30.3, "elapsed_time": "0:25:36", "remaining_time": "0:58:53", "throughput": "1322.16", "total_tokens": 2031480} |
|
{"current_steps": 402, "total_steps": 1320, "loss": 2.2379, "learning_rate": 1.6717789714275808e-06, "epoch": 0.6090909090909091, "percentage": 30.45, "elapsed_time": "0:25:46", "remaining_time": "0:58:51", "throughput": "1321.40", "total_tokens": 2043768} |
|
{"current_steps": 404, "total_steps": 1320, "loss": 2.3286, "learning_rate": 1.6686747278734364e-06, "epoch": 0.6121212121212121, "percentage": 30.61, "elapsed_time": "0:25:52", "remaining_time": "0:58:40", "throughput": "1321.84", "total_tokens": 2052456} |
|
{"current_steps": 406, "total_steps": 1320, "loss": 2.3891, "learning_rate": 1.6655597026042654e-06, "epoch": 0.6151515151515151, "percentage": 30.76, "elapsed_time": "0:25:59", "remaining_time": "0:58:31", "throughput": "1322.35", "total_tokens": 2062608} |
|
{"current_steps": 408, "total_steps": 1320, "loss": 2.2766, "learning_rate": 1.6624339672802466e-06, "epoch": 0.6181818181818182, "percentage": 30.91, "elapsed_time": "0:26:05", "remaining_time": "0:58:19", "throughput": "1322.72", "total_tokens": 2070624} |
|
{"current_steps": 410, "total_steps": 1320, "loss": 2.3172, "learning_rate": 1.65929759380794e-06, "epoch": 0.6212121212121212, "percentage": 31.06, "elapsed_time": "0:26:13", "remaining_time": "0:58:11", "throughput": "1323.34", "total_tokens": 2082024} |
|
{"current_steps": 412, "total_steps": 1320, "loss": 2.2975, "learning_rate": 1.6561506543386332e-06, "epoch": 0.6242424242424243, "percentage": 31.21, "elapsed_time": "0:26:21", "remaining_time": "0:58:05", "throughput": "1323.97", "total_tokens": 2093928} |
|
{"current_steps": 414, "total_steps": 1320, "loss": 2.5706, "learning_rate": 1.6529932212666813e-06, "epoch": 0.6272727272727273, "percentage": 31.36, "elapsed_time": "0:26:27", "remaining_time": "0:57:54", "throughput": "1324.36", "total_tokens": 2102712} |
|
{"current_steps": 416, "total_steps": 1320, "loss": 2.4111, "learning_rate": 1.6498253672278403e-06, "epoch": 0.6303030303030303, "percentage": 31.52, "elapsed_time": "0:26:33", "remaining_time": "0:57:43", "throughput": "1324.79", "total_tokens": 2111352} |
|
{"current_steps": 418, "total_steps": 1320, "loss": 2.3655, "learning_rate": 1.6466471650975989e-06, "epoch": 0.6333333333333333, "percentage": 31.67, "elapsed_time": "0:26:41", "remaining_time": "0:57:36", "throughput": "1325.41", "total_tokens": 2123184} |
|
{"current_steps": 420, "total_steps": 1320, "loss": 2.1955, "learning_rate": 1.6434586879894994e-06, "epoch": 0.6363636363636364, "percentage": 31.82, "elapsed_time": "0:26:48", "remaining_time": "0:57:26", "throughput": "1325.86", "total_tokens": 2132520} |
|
{"current_steps": 420, "total_steps": 1320, "eval_loss": 2.3337419033050537, "epoch": 0.6363636363636364, "percentage": 31.82, "elapsed_time": "0:26:54", "remaining_time": "0:57:39", "throughput": "1321.07", "total_tokens": 2132520} |
|
{"current_steps": 422, "total_steps": 1320, "loss": 2.4297, "learning_rate": 1.6402600092534571e-06, "epoch": 0.6393939393939394, "percentage": 31.97, "elapsed_time": "0:26:59", "remaining_time": "0:57:26", "throughput": "1321.41", "total_tokens": 2140344} |
|
{"current_steps": 424, "total_steps": 1320, "loss": 2.3299, "learning_rate": 1.637051202474072e-06, "epoch": 0.6424242424242425, "percentage": 32.12, "elapsed_time": "0:27:06", "remaining_time": "0:57:17", "throughput": "1321.93", "total_tokens": 2150592} |
|
{"current_steps": 426, "total_steps": 1320, "loss": 2.4036, "learning_rate": 1.6338323414689384e-06, "epoch": 0.6454545454545455, "percentage": 32.27, "elapsed_time": "0:27:12", "remaining_time": "0:57:06", "throughput": "1322.31", "total_tokens": 2158848} |
|
{"current_steps": 428, "total_steps": 1320, "loss": 2.3709, "learning_rate": 1.6306035002869418e-06, "epoch": 0.6484848484848484, "percentage": 32.42, "elapsed_time": "0:27:17", "remaining_time": "0:56:53", "throughput": "1322.63", "total_tokens": 2166120} |
|
{"current_steps": 430, "total_steps": 1320, "loss": 2.6169, "learning_rate": 1.6273647532065615e-06, "epoch": 0.6515151515151515, "percentage": 32.58, "elapsed_time": "0:27:25", "remaining_time": "0:56:46", "throughput": "1323.21", "total_tokens": 2177760} |
|
{"current_steps": 432, "total_steps": 1320, "loss": 2.4805, "learning_rate": 1.6241161747341568e-06, "epoch": 0.6545454545454545, "percentage": 32.73, "elapsed_time": "0:27:31", "remaining_time": "0:56:34", "throughput": "1323.55", "total_tokens": 2185488} |
|
{"current_steps": 434, "total_steps": 1320, "loss": 2.1922, "learning_rate": 1.6208578396022566e-06, "epoch": 0.6575757575757576, "percentage": 32.88, "elapsed_time": "0:27:38", "remaining_time": "0:56:26", "throughput": "1324.04", "total_tokens": 2196336} |
|
{"current_steps": 436, "total_steps": 1320, "loss": 2.4529, "learning_rate": 1.6175898227678376e-06, "epoch": 0.6606060606060606, "percentage": 33.03, "elapsed_time": "0:27:44", "remaining_time": "0:56:14", "throughput": "1324.42", "total_tokens": 2204520} |
|
{"current_steps": 438, "total_steps": 1320, "loss": 2.3597, "learning_rate": 1.6143121994106012e-06, "epoch": 0.6636363636363637, "percentage": 33.18, "elapsed_time": "0:27:53", "remaining_time": "0:56:09", "throughput": "1324.99", "total_tokens": 2216808} |
|
{"current_steps": 440, "total_steps": 1320, "loss": 2.4364, "learning_rate": 1.611025044931245e-06, "epoch": 0.6666666666666666, "percentage": 33.33, "elapsed_time": "0:28:00", "remaining_time": "0:56:01", "throughput": "1325.52", "total_tokens": 2227752} |
|
{"current_steps": 440, "total_steps": 1320, "eval_loss": 2.3327877521514893, "epoch": 0.6666666666666666, "percentage": 33.33, "elapsed_time": "0:28:06", "remaining_time": "0:56:12", "throughput": "1320.94", "total_tokens": 2227752} |
|
{"current_steps": 442, "total_steps": 1320, "loss": 2.5148, "learning_rate": 1.6077284349497254e-06, "epoch": 0.6696969696969697, "percentage": 33.48, "elapsed_time": "0:28:13", "remaining_time": "0:56:04", "throughput": "1321.38", "total_tokens": 2237808} |
|
{"current_steps": 444, "total_steps": 1320, "loss": 2.1969, "learning_rate": 1.6044224453035203e-06, "epoch": 0.6727272727272727, "percentage": 33.64, "elapsed_time": "0:28:21", "remaining_time": "0:55:57", "throughput": "1321.95", "total_tokens": 2249304} |
|
{"current_steps": 446, "total_steps": 1320, "loss": 2.5604, "learning_rate": 1.6011071520458845e-06, "epoch": 0.6757575757575758, "percentage": 33.79, "elapsed_time": "0:28:29", "remaining_time": "0:55:49", "throughput": "1322.48", "total_tokens": 2260176} |
|
{"current_steps": 448, "total_steps": 1320, "loss": 2.2211, "learning_rate": 1.5977826314440987e-06, "epoch": 0.6787878787878788, "percentage": 33.94, "elapsed_time": "0:28:36", "remaining_time": "0:55:40", "throughput": "1322.93", "total_tokens": 2270184} |
|
{"current_steps": 450, "total_steps": 1320, "loss": 2.3477, "learning_rate": 1.5944489599777161e-06, "epoch": 0.6818181818181818, "percentage": 34.09, "elapsed_time": "0:28:43", "remaining_time": "0:55:32", "throughput": "1323.51", "total_tokens": 2281464} |
|
{"current_steps": 452, "total_steps": 1320, "loss": 2.4645, "learning_rate": 1.5911062143368027e-06, "epoch": 0.6848484848484848, "percentage": 34.24, "elapsed_time": "0:28:51", "remaining_time": "0:55:25", "throughput": "1324.04", "total_tokens": 2292720} |
|
{"current_steps": 454, "total_steps": 1320, "loss": 2.5217, "learning_rate": 1.5877544714201726e-06, "epoch": 0.6878787878787879, "percentage": 34.39, "elapsed_time": "0:28:59", "remaining_time": "0:55:17", "throughput": "1324.50", "total_tokens": 2303376} |
|
{"current_steps": 456, "total_steps": 1320, "loss": 2.5123, "learning_rate": 1.5843938083336194e-06, "epoch": 0.6909090909090909, "percentage": 34.55, "elapsed_time": "0:29:05", "remaining_time": "0:55:07", "throughput": "1324.92", "total_tokens": 2312544} |
|
{"current_steps": 458, "total_steps": 1320, "loss": 2.2975, "learning_rate": 1.5810243023881432e-06, "epoch": 0.693939393939394, "percentage": 34.7, "elapsed_time": "0:29:12", "remaining_time": "0:54:59", "throughput": "1325.38", "total_tokens": 2323344} |
|
{"current_steps": 460, "total_steps": 1320, "loss": 2.3568, "learning_rate": 1.5776460310981702e-06, "epoch": 0.696969696969697, "percentage": 34.85, "elapsed_time": "0:29:19", "remaining_time": "0:54:48", "throughput": "1325.73", "total_tokens": 2332056} |
|
{"current_steps": 460, "total_steps": 1320, "eval_loss": 2.332925319671631, "epoch": 0.696969696969697, "percentage": 34.85, "elapsed_time": "0:29:24", "remaining_time": "0:54:59", "throughput": "1321.36", "total_tokens": 2332056} |
|
{"current_steps": 462, "total_steps": 1320, "loss": 2.5328, "learning_rate": 1.5742590721797725e-06, "epoch": 0.7, "percentage": 35.0, "elapsed_time": "0:29:32", "remaining_time": "0:54:51", "throughput": "1321.82", "total_tokens": 2342400} |
|
{"current_steps": 464, "total_steps": 1320, "loss": 2.5166, "learning_rate": 1.5708635035488756e-06, "epoch": 0.703030303030303, "percentage": 35.15, "elapsed_time": "0:29:37", "remaining_time": "0:54:39", "throughput": "1322.16", "total_tokens": 2350536} |
|
{"current_steps": 466, "total_steps": 1320, "loss": 2.3471, "learning_rate": 1.5674594033194706e-06, "epoch": 0.706060606060606, "percentage": 35.3, "elapsed_time": "0:29:45", "remaining_time": "0:54:32", "throughput": "1322.66", "total_tokens": 2361528} |
|
{"current_steps": 468, "total_steps": 1320, "loss": 2.4315, "learning_rate": 1.5640468498018153e-06, "epoch": 0.7090909090909091, "percentage": 35.45, "elapsed_time": "0:29:51", "remaining_time": "0:54:21", "throughput": "1323.05", "total_tokens": 2370552} |
|
{"current_steps": 470, "total_steps": 1320, "loss": 2.5083, "learning_rate": 1.5606259215006325e-06, "epoch": 0.7121212121212122, "percentage": 35.61, "elapsed_time": "0:29:58", "remaining_time": "0:54:12", "throughput": "1323.48", "total_tokens": 2380368} |
|
{"current_steps": 472, "total_steps": 1320, "loss": 2.3308, "learning_rate": 1.5571966971133037e-06, "epoch": 0.7151515151515152, "percentage": 35.76, "elapsed_time": "0:30:04", "remaining_time": "0:54:02", "throughput": "1323.89", "total_tokens": 2389176} |
|
{"current_steps": 474, "total_steps": 1320, "loss": 2.3236, "learning_rate": 1.5537592555280594e-06, "epoch": 0.7181818181818181, "percentage": 35.91, "elapsed_time": "0:30:11", "remaining_time": "0:53:53", "throughput": "1324.29", "total_tokens": 2398944} |
|
{"current_steps": 476, "total_steps": 1320, "loss": 2.8391, "learning_rate": 1.5503136758221653e-06, "epoch": 0.7212121212121212, "percentage": 36.06, "elapsed_time": "0:30:15", "remaining_time": "0:53:39", "throughput": "1324.49", "total_tokens": 2404656} |
|
{"current_steps": 478, "total_steps": 1320, "loss": 2.6875, "learning_rate": 1.5468600372601009e-06, "epoch": 0.7242424242424242, "percentage": 36.21, "elapsed_time": "0:30:23", "remaining_time": "0:53:32", "throughput": "1324.98", "total_tokens": 2416392} |
|
{"current_steps": 480, "total_steps": 1320, "loss": 2.4508, "learning_rate": 1.543398419291737e-06, "epoch": 0.7272727272727273, "percentage": 36.36, "elapsed_time": "0:30:29", "remaining_time": "0:53:22", "throughput": "1325.31", "total_tokens": 2425032} |
|
{"current_steps": 480, "total_steps": 1320, "eval_loss": 2.332369089126587, "epoch": 0.7272727272727273, "percentage": 36.36, "elapsed_time": "0:30:35", "remaining_time": "0:53:32", "throughput": "1321.11", "total_tokens": 2425032} |
|
{"current_steps": 482, "total_steps": 1320, "loss": 2.4884, "learning_rate": 1.5399289015505096e-06, "epoch": 0.7303030303030303, "percentage": 36.52, "elapsed_time": "0:30:40", "remaining_time": "0:53:20", "throughput": "1321.40", "total_tokens": 2432280} |
|
{"current_steps": 484, "total_steps": 1320, "loss": 2.3159, "learning_rate": 1.536451563851584e-06, "epoch": 0.7333333333333333, "percentage": 36.67, "elapsed_time": "0:30:47", "remaining_time": "0:53:11", "throughput": "1321.92", "total_tokens": 2442576} |
|
{"current_steps": 486, "total_steps": 1320, "loss": 2.5522, "learning_rate": 1.5329664861900237e-06, "epoch": 0.7363636363636363, "percentage": 36.82, "elapsed_time": "0:30:53", "remaining_time": "0:53:00", "throughput": "1322.21", "total_tokens": 2450664} |
|
{"current_steps": 488, "total_steps": 1320, "loss": 2.4853, "learning_rate": 1.5294737487389462e-06, "epoch": 0.7393939393939394, "percentage": 36.97, "elapsed_time": "0:31:01", "remaining_time": "0:52:54", "throughput": "1322.73", "total_tokens": 2462568} |
|
{"current_steps": 490, "total_steps": 1320, "loss": 2.5841, "learning_rate": 1.5259734318476807e-06, "epoch": 0.7424242424242424, "percentage": 37.12, "elapsed_time": "0:31:08", "remaining_time": "0:52:45", "throughput": "1323.15", "total_tokens": 2472312} |
|
{"current_steps": 492, "total_steps": 1320, "loss": 2.4222, "learning_rate": 1.5224656160399186e-06, "epoch": 0.7454545454545455, "percentage": 37.27, "elapsed_time": "0:31:15", "remaining_time": "0:52:37", "throughput": "1323.60", "total_tokens": 2483016} |
|
{"current_steps": 494, "total_steps": 1320, "loss": 2.261, "learning_rate": 1.518950382011861e-06, "epoch": 0.7484848484848485, "percentage": 37.42, "elapsed_time": "0:31:22", "remaining_time": "0:52:28", "throughput": "1323.94", "total_tokens": 2492688} |
|
{"current_steps": 496, "total_steps": 1320, "loss": 2.3332, "learning_rate": 1.5154278106303649e-06, "epoch": 0.7515151515151515, "percentage": 37.58, "elapsed_time": "0:31:30", "remaining_time": "0:52:21", "throughput": "1324.44", "total_tokens": 2504472} |
|
{"current_steps": 498, "total_steps": 1320, "loss": 2.3521, "learning_rate": 1.511897982931078e-06, "epoch": 0.7545454545454545, "percentage": 37.73, "elapsed_time": "0:31:39", "remaining_time": "0:52:14", "throughput": "1324.94", "total_tokens": 2516160} |
|
{"current_steps": 500, "total_steps": 1320, "loss": 2.3093, "learning_rate": 1.50836098011658e-06, "epoch": 0.7575757575757576, "percentage": 37.88, "elapsed_time": "0:31:46", "remaining_time": "0:52:07", "throughput": "1325.46", "total_tokens": 2527320} |
|
{"current_steps": 500, "total_steps": 1320, "eval_loss": 2.3320088386535645, "epoch": 0.7575757575757576, "percentage": 37.88, "elapsed_time": "0:31:52", "remaining_time": "0:52:16", "throughput": "1321.42", "total_tokens": 2527320} |
|
{"current_steps": 502, "total_steps": 1320, "loss": 2.4031, "learning_rate": 1.5048168835545094e-06, "epoch": 0.7606060606060606, "percentage": 38.03, "elapsed_time": "0:32:00", "remaining_time": "0:52:08", "throughput": "1320.62", "total_tokens": 2535744} |
|
{"current_steps": 504, "total_steps": 1320, "loss": 2.2842, "learning_rate": 1.5012657747756961e-06, "epoch": 0.7636363636363637, "percentage": 38.18, "elapsed_time": "0:32:07", "remaining_time": "0:52:00", "throughput": "1321.08", "total_tokens": 2546376} |
|
{"current_steps": 506, "total_steps": 1320, "loss": 2.4888, "learning_rate": 1.4977077354722828e-06, "epoch": 0.7666666666666667, "percentage": 38.33, "elapsed_time": "0:32:12", "remaining_time": "0:51:48", "throughput": "1321.33", "total_tokens": 2553456} |
|
{"current_steps": 508, "total_steps": 1320, "loss": 2.3082, "learning_rate": 1.4941428474958469e-06, "epoch": 0.7696969696969697, "percentage": 38.48, "elapsed_time": "0:32:19", "remaining_time": "0:51:40", "throughput": "1321.73", "total_tokens": 2563632} |
|
{"current_steps": 510, "total_steps": 1320, "loss": 2.4127, "learning_rate": 1.4905711928555178e-06, "epoch": 0.7727272727272727, "percentage": 38.64, "elapsed_time": "0:32:26", "remaining_time": "0:51:31", "throughput": "1322.11", "total_tokens": 2573184} |
|
{"current_steps": 512, "total_steps": 1320, "loss": 2.1886, "learning_rate": 1.4869928537160892e-06, "epoch": 0.7757575757575758, "percentage": 38.79, "elapsed_time": "0:32:34", "remaining_time": "0:51:24", "throughput": "1322.61", "total_tokens": 2585472} |
|
{"current_steps": 514, "total_steps": 1320, "loss": 2.2753, "learning_rate": 1.4834079123961308e-06, "epoch": 0.7787878787878788, "percentage": 38.94, "elapsed_time": "0:32:41", "remaining_time": "0:51:15", "throughput": "1322.94", "total_tokens": 2594304} |
|
{"current_steps": 516, "total_steps": 1320, "loss": 2.4605, "learning_rate": 1.479816451366092e-06, "epoch": 0.7818181818181819, "percentage": 39.09, "elapsed_time": "0:32:46", "remaining_time": "0:51:03", "throughput": "1323.18", "total_tokens": 2601600} |
|
{"current_steps": 518, "total_steps": 1320, "loss": 2.4019, "learning_rate": 1.4762185532464057e-06, "epoch": 0.7848484848484848, "percentage": 39.24, "elapsed_time": "0:32:53", "remaining_time": "0:50:55", "throughput": "1323.52", "total_tokens": 2612280} |
|
{"current_steps": 520, "total_steps": 1320, "loss": 2.5201, "learning_rate": 1.472614300805591e-06, "epoch": 0.7878787878787878, "percentage": 39.39, "elapsed_time": "0:33:02", "remaining_time": "0:50:49", "throughput": "1324.01", "total_tokens": 2624280} |
|
{"current_steps": 520, "total_steps": 1320, "eval_loss": 2.3315682411193848, "epoch": 0.7878787878787878, "percentage": 39.39, "elapsed_time": "0:33:07", "remaining_time": "0:50:58", "throughput": "1320.13", "total_tokens": 2624280} |
|
{"current_steps": 522, "total_steps": 1320, "loss": 2.429, "learning_rate": 1.4690037769583428e-06, "epoch": 0.7909090909090909, "percentage": 39.55, "elapsed_time": "0:33:14", "remaining_time": "0:50:49", "throughput": "1320.50", "total_tokens": 2634072} |
|
{"current_steps": 524, "total_steps": 1320, "loss": 2.4341, "learning_rate": 1.4653870647636297e-06, "epoch": 0.793939393939394, "percentage": 39.7, "elapsed_time": "0:33:21", "remaining_time": "0:50:40", "throughput": "1320.88", "total_tokens": 2643864} |
|
{"current_steps": 526, "total_steps": 1320, "loss": 2.2926, "learning_rate": 1.4617642474227797e-06, "epoch": 0.796969696969697, "percentage": 39.85, "elapsed_time": "0:33:30", "remaining_time": "0:50:34", "throughput": "1321.39", "total_tokens": 2656152} |
|
{"current_steps": 528, "total_steps": 1320, "loss": 2.6445, "learning_rate": 1.45813540827757e-06, "epoch": 0.8, "percentage": 40.0, "elapsed_time": "0:33:36", "remaining_time": "0:50:25", "throughput": "1321.78", "total_tokens": 2665968} |
|
{"current_steps": 530, "total_steps": 1320, "loss": 2.2952, "learning_rate": 1.4545006308083055e-06, "epoch": 0.803030303030303, "percentage": 40.15, "elapsed_time": "0:33:45", "remaining_time": "0:50:18", "throughput": "1322.27", "total_tokens": 2677680} |
|
{"current_steps": 532, "total_steps": 1320, "loss": 2.4097, "learning_rate": 1.4508599986319015e-06, "epoch": 0.806060606060606, "percentage": 40.3, "elapsed_time": "0:33:51", "remaining_time": "0:50:09", "throughput": "1322.63", "total_tokens": 2687376} |
|
{"current_steps": 534, "total_steps": 1320, "loss": 2.2751, "learning_rate": 1.4472135954999578e-06, "epoch": 0.8090909090909091, "percentage": 40.45, "elapsed_time": "0:33:59", "remaining_time": "0:50:02", "throughput": "1323.11", "total_tokens": 2699112} |
|
{"current_steps": 536, "total_steps": 1320, "loss": 2.4527, "learning_rate": 1.4435615052968358e-06, "epoch": 0.8121212121212121, "percentage": 40.61, "elapsed_time": "0:34:07", "remaining_time": "0:49:54", "throughput": "1323.52", "total_tokens": 2710008} |
|
{"current_steps": 538, "total_steps": 1320, "loss": 2.3689, "learning_rate": 1.4399038120377224e-06, "epoch": 0.8151515151515152, "percentage": 40.76, "elapsed_time": "0:34:14", "remaining_time": "0:49:46", "throughput": "1323.84", "total_tokens": 2720136} |
|
{"current_steps": 540, "total_steps": 1320, "loss": 2.4758, "learning_rate": 1.4362405998667043e-06, "epoch": 0.8181818181818182, "percentage": 40.91, "elapsed_time": "0:34:21", "remaining_time": "0:49:37", "throughput": "1324.17", "total_tokens": 2729160} |
|
{"current_steps": 540, "total_steps": 1320, "eval_loss": 2.3316752910614014, "epoch": 0.8181818181818182, "percentage": 40.91, "elapsed_time": "0:34:26", "remaining_time": "0:49:45", "throughput": "1320.44", "total_tokens": 2729160} |
|
{"current_steps": 542, "total_steps": 1320, "loss": 2.3434, "learning_rate": 1.432571953054828e-06, "epoch": 0.8212121212121212, "percentage": 41.06, "elapsed_time": "0:34:33", "remaining_time": "0:49:36", "throughput": "1320.85", "total_tokens": 2739168} |
|
{"current_steps": 544, "total_steps": 1320, "loss": 2.364, "learning_rate": 1.4288979559981615e-06, "epoch": 0.8242424242424242, "percentage": 41.21, "elapsed_time": "0:34:39", "remaining_time": "0:49:26", "throughput": "1321.22", "total_tokens": 2747688} |
|
{"current_steps": 546, "total_steps": 1320, "loss": 2.4677, "learning_rate": 1.4252186932158546e-06, "epoch": 0.8272727272727273, "percentage": 41.36, "elapsed_time": "0:34:47", "remaining_time": "0:49:18", "throughput": "1321.57", "total_tokens": 2758488} |
|
{"current_steps": 548, "total_steps": 1320, "loss": 2.5121, "learning_rate": 1.421534249348192e-06, "epoch": 0.8303030303030303, "percentage": 41.52, "elapsed_time": "0:34:54", "remaining_time": "0:49:10", "throughput": "1322.00", "total_tokens": 2768832} |
|
{"current_steps": 550, "total_steps": 1320, "loss": 2.491, "learning_rate": 1.4178447091546497e-06, "epoch": 0.8333333333333334, "percentage": 41.67, "elapsed_time": "0:35:01", "remaining_time": "0:49:02", "throughput": "1322.36", "total_tokens": 2779584} |
|
{"current_steps": 552, "total_steps": 1320, "loss": 2.3513, "learning_rate": 1.414150157511941e-06, "epoch": 0.8363636363636363, "percentage": 41.82, "elapsed_time": "0:35:06", "remaining_time": "0:48:51", "throughput": "1322.58", "total_tokens": 2786232} |
|
{"current_steps": 554, "total_steps": 1320, "loss": 2.4317, "learning_rate": 1.410450679412067e-06, "epoch": 0.8393939393939394, "percentage": 41.97, "elapsed_time": "0:35:13", "remaining_time": "0:48:42", "throughput": "1322.96", "total_tokens": 2796216} |
|
{"current_steps": 556, "total_steps": 1320, "loss": 2.3216, "learning_rate": 1.406746359960361e-06, "epoch": 0.8424242424242424, "percentage": 42.12, "elapsed_time": "0:35:21", "remaining_time": "0:48:34", "throughput": "1323.44", "total_tokens": 2807352} |
|
{"current_steps": 558, "total_steps": 1320, "loss": 2.2947, "learning_rate": 1.403037284373529e-06, "epoch": 0.8454545454545455, "percentage": 42.27, "elapsed_time": "0:35:28", "remaining_time": "0:48:26", "throughput": "1323.81", "total_tokens": 2817936} |
|
{"current_steps": 560, "total_steps": 1320, "loss": 2.5013, "learning_rate": 1.3993235379776908e-06, "epoch": 0.8484848484848485, "percentage": 42.42, "elapsed_time": "0:35:35", "remaining_time": "0:48:17", "throughput": "1324.11", "total_tokens": 2827104} |
|
{"current_steps": 560, "total_steps": 1320, "eval_loss": 2.33099102973938, "epoch": 0.8484848484848485, "percentage": 42.42, "elapsed_time": "0:35:40", "remaining_time": "0:48:25", "throughput": "1320.51", "total_tokens": 2827104} |
|
{"current_steps": 562, "total_steps": 1320, "loss": 2.3728, "learning_rate": 1.395605206206417e-06, "epoch": 0.8515151515151516, "percentage": 42.58, "elapsed_time": "0:35:46", "remaining_time": "0:48:14", "throughput": "1320.75", "total_tokens": 2834520} |
|
{"current_steps": 564, "total_steps": 1320, "loss": 2.5102, "learning_rate": 1.3918823745987625e-06, "epoch": 0.8545454545454545, "percentage": 42.73, "elapsed_time": "0:35:53", "remaining_time": "0:48:07", "throughput": "1321.18", "total_tokens": 2845560} |
|
{"current_steps": 566, "total_steps": 1320, "loss": 2.4606, "learning_rate": 1.3881551287973006e-06, "epoch": 0.8575757575757575, "percentage": 42.88, "elapsed_time": "0:36:01", "remaining_time": "0:47:59", "throughput": "1321.53", "total_tokens": 2856168} |
|
{"current_steps": 568, "total_steps": 1320, "loss": 2.6367, "learning_rate": 1.384423554546151e-06, "epoch": 0.8606060606060606, "percentage": 43.03, "elapsed_time": "0:36:08", "remaining_time": "0:47:51", "throughput": "1321.95", "total_tokens": 2866872} |
|
{"current_steps": 570, "total_steps": 1320, "loss": 2.4952, "learning_rate": 1.3806877376890084e-06, "epoch": 0.8636363636363636, "percentage": 43.18, "elapsed_time": "0:36:16", "remaining_time": "0:47:43", "throughput": "1322.39", "total_tokens": 2878296} |
|
{"current_steps": 572, "total_steps": 1320, "loss": 2.4297, "learning_rate": 1.3769477641671668e-06, "epoch": 0.8666666666666667, "percentage": 43.33, "elapsed_time": "0:36:22", "remaining_time": "0:47:34", "throughput": "1322.66", "total_tokens": 2887056} |
|
{"current_steps": 574, "total_steps": 1320, "loss": 2.3496, "learning_rate": 1.373203720017544e-06, "epoch": 0.8696969696969697, "percentage": 43.48, "elapsed_time": "0:36:29", "remaining_time": "0:47:25", "throughput": "1322.97", "total_tokens": 2896152} |
|
{"current_steps": 576, "total_steps": 1320, "loss": 2.4491, "learning_rate": 1.3694556913706996e-06, "epoch": 0.8727272727272727, "percentage": 43.64, "elapsed_time": "0:36:35", "remaining_time": "0:47:16", "throughput": "1323.30", "total_tokens": 2905776} |
|
{"current_steps": 578, "total_steps": 1320, "loss": 2.1934, "learning_rate": 1.3657037644488574e-06, "epoch": 0.8757575757575757, "percentage": 43.79, "elapsed_time": "0:36:42", "remaining_time": "0:47:07", "throughput": "1323.62", "total_tokens": 2915568} |
|
{"current_steps": 580, "total_steps": 1320, "loss": 2.3654, "learning_rate": 1.361948025563918e-06, "epoch": 0.8787878787878788, "percentage": 43.94, "elapsed_time": "0:36:50", "remaining_time": "0:46:59", "throughput": "1324.03", "total_tokens": 2926128} |
|
{"current_steps": 580, "total_steps": 1320, "eval_loss": 2.33089542388916, "epoch": 0.8787878787878788, "percentage": 43.94, "elapsed_time": "0:36:55", "remaining_time": "0:47:07", "throughput": "1320.54", "total_tokens": 2926128} |
|
{"current_steps": 582, "total_steps": 1320, "loss": 2.4307, "learning_rate": 1.3581885611154759e-06, "epoch": 0.8818181818181818, "percentage": 44.09, "elapsed_time": "0:37:01", "remaining_time": "0:46:56", "throughput": "1320.78", "total_tokens": 2933568} |
|
{"current_steps": 584, "total_steps": 1320, "loss": 2.6203, "learning_rate": 1.3544254575888313e-06, "epoch": 0.8848484848484849, "percentage": 44.24, "elapsed_time": "0:37:07", "remaining_time": "0:46:47", "throughput": "1321.07", "total_tokens": 2942616} |
|
{"current_steps": 586, "total_steps": 1320, "loss": 2.4422, "learning_rate": 1.3506588015529994e-06, "epoch": 0.8878787878787879, "percentage": 44.39, "elapsed_time": "0:37:14", "remaining_time": "0:46:38", "throughput": "1321.41", "total_tokens": 2952480} |
|
{"current_steps": 588, "total_steps": 1320, "loss": 2.2622, "learning_rate": 1.3468886796587202e-06, "epoch": 0.8909090909090909, "percentage": 44.55, "elapsed_time": "0:37:21", "remaining_time": "0:46:30", "throughput": "1321.75", "total_tokens": 2962344} |
|
{"current_steps": 590, "total_steps": 1320, "loss": 2.3397, "learning_rate": 1.3431151786364647e-06, "epoch": 0.8939393939393939, "percentage": 44.7, "elapsed_time": "0:37:26", "remaining_time": "0:46:19", "throughput": "1321.93", "total_tokens": 2969832} |
|
{"current_steps": 592, "total_steps": 1320, "loss": 2.4768, "learning_rate": 1.33933838529444e-06, "epoch": 0.896969696969697, "percentage": 44.85, "elapsed_time": "0:37:33", "remaining_time": "0:46:10", "throughput": "1322.26", "total_tokens": 2979312} |
|
{"current_steps": 594, "total_steps": 1320, "loss": 2.3752, "learning_rate": 1.3355583865165912e-06, "epoch": 0.9, "percentage": 45.0, "elapsed_time": "0:37:40", "remaining_time": "0:46:03", "throughput": "1322.68", "total_tokens": 2990568} |
|
{"current_steps": 596, "total_steps": 1320, "loss": 2.3682, "learning_rate": 1.331775269260604e-06, "epoch": 0.9030303030303031, "percentage": 45.15, "elapsed_time": "0:37:46", "remaining_time": "0:45:53", "throughput": "1322.92", "total_tokens": 2998584} |
|
{"current_steps": 598, "total_steps": 1320, "loss": 2.4906, "learning_rate": 1.3279891205559034e-06, "epoch": 0.906060606060606, "percentage": 45.3, "elapsed_time": "0:37:51", "remaining_time": "0:45:42", "throughput": "1323.16", "total_tokens": 3005784} |
|
{"current_steps": 600, "total_steps": 1320, "loss": 2.4142, "learning_rate": 1.3242000275016527e-06, "epoch": 0.9090909090909091, "percentage": 45.45, "elapsed_time": "0:37:57", "remaining_time": "0:45:32", "throughput": "1323.43", "total_tokens": 3013968} |
|
{"current_steps": 600, "total_steps": 1320, "eval_loss": 2.3308167457580566, "epoch": 0.9090909090909091, "percentage": 45.45, "elapsed_time": "0:38:03", "remaining_time": "0:45:39", "throughput": "1320.05", "total_tokens": 3013968} |
|
{"current_steps": 602, "total_steps": 1320, "loss": 2.8198, "learning_rate": 1.3204080772647478e-06, "epoch": 0.9121212121212121, "percentage": 45.61, "elapsed_time": "0:38:10", "remaining_time": "0:45:31", "throughput": "1319.31", "total_tokens": 3021504} |
|
{"current_steps": 604, "total_steps": 1320, "loss": 2.4954, "learning_rate": 1.3166133570778143e-06, "epoch": 0.9151515151515152, "percentage": 45.76, "elapsed_time": "0:38:18", "remaining_time": "0:45:24", "throughput": "1319.76", "total_tokens": 3033264} |
|
{"current_steps": 606, "total_steps": 1320, "loss": 2.4191, "learning_rate": 1.3128159542371987e-06, "epoch": 0.9181818181818182, "percentage": 45.91, "elapsed_time": "0:38:26", "remaining_time": "0:45:17", "throughput": "1320.21", "total_tokens": 3044688} |
|
{"current_steps": 608, "total_steps": 1320, "loss": 2.2432, "learning_rate": 1.309015956100962e-06, "epoch": 0.9212121212121213, "percentage": 46.06, "elapsed_time": "0:38:34", "remaining_time": "0:45:10", "throughput": "1320.63", "total_tokens": 3056592} |
|
{"current_steps": 610, "total_steps": 1320, "loss": 2.4408, "learning_rate": 1.3052134500868686e-06, "epoch": 0.9242424242424242, "percentage": 46.21, "elapsed_time": "0:38:41", "remaining_time": "0:45:01", "throughput": "1320.97", "total_tokens": 3066048} |
|
{"current_steps": 612, "total_steps": 1320, "loss": 2.5248, "learning_rate": 1.301408523670376e-06, "epoch": 0.9272727272727272, "percentage": 46.36, "elapsed_time": "0:38:48", "remaining_time": "0:44:53", "throughput": "1321.33", "total_tokens": 3076128} |
|
{"current_steps": 614, "total_steps": 1320, "loss": 2.4202, "learning_rate": 1.297601264382622e-06, "epoch": 0.9303030303030303, "percentage": 46.52, "elapsed_time": "0:38:54", "remaining_time": "0:44:44", "throughput": "1321.63", "total_tokens": 3085464} |
|
{"current_steps": 616, "total_steps": 1320, "loss": 2.3525, "learning_rate": 1.2937917598084123e-06, "epoch": 0.9333333333333333, "percentage": 46.67, "elapsed_time": "0:39:00", "remaining_time": "0:44:35", "throughput": "1321.94", "total_tokens": 3094440} |
|
{"current_steps": 618, "total_steps": 1320, "loss": 2.3598, "learning_rate": 1.2899800975842038e-06, "epoch": 0.9363636363636364, "percentage": 46.82, "elapsed_time": "0:39:08", "remaining_time": "0:44:27", "throughput": "1322.37", "total_tokens": 3105720} |
|
{"current_steps": 620, "total_steps": 1320, "loss": 2.588, "learning_rate": 1.286166365396089e-06, "epoch": 0.9393939393939394, "percentage": 46.97, "elapsed_time": "0:39:14", "remaining_time": "0:44:18", "throughput": "1322.60", "total_tokens": 3113856} |
|
{"current_steps": 620, "total_steps": 1320, "eval_loss": 2.3307266235351562, "epoch": 0.9393939393939394, "percentage": 46.97, "elapsed_time": "0:39:20", "remaining_time": "0:44:24", "throughput": "1319.34", "total_tokens": 3113856} |
|
{"current_steps": 622, "total_steps": 1320, "loss": 2.4249, "learning_rate": 1.2823506509777807e-06, "epoch": 0.9424242424242424, "percentage": 47.12, "elapsed_time": "0:39:26", "remaining_time": "0:44:15", "throughput": "1319.65", "total_tokens": 3123288} |
|
{"current_steps": 624, "total_steps": 1320, "loss": 2.3551, "learning_rate": 1.2785330421085917e-06, "epoch": 0.9454545454545454, "percentage": 47.27, "elapsed_time": "0:39:32", "remaining_time": "0:44:06", "throughput": "1319.88", "total_tokens": 3131256} |
|
{"current_steps": 626, "total_steps": 1320, "loss": 2.1922, "learning_rate": 1.2747136266114156e-06, "epoch": 0.9484848484848485, "percentage": 47.42, "elapsed_time": "0:39:38", "remaining_time": "0:43:56", "throughput": "1320.18", "total_tokens": 3139656} |
|
{"current_steps": 628, "total_steps": 1320, "loss": 2.4905, "learning_rate": 1.270892492350707e-06, "epoch": 0.9515151515151515, "percentage": 47.58, "elapsed_time": "0:39:43", "remaining_time": "0:43:46", "throughput": "1320.42", "total_tokens": 3147744} |
|
{"current_steps": 630, "total_steps": 1320, "loss": 2.4588, "learning_rate": 1.267069727230461e-06, "epoch": 0.9545454545454546, "percentage": 47.73, "elapsed_time": "0:39:51", "remaining_time": "0:43:39", "throughput": "1320.76", "total_tokens": 3158376} |
|
{"current_steps": 632, "total_steps": 1320, "loss": 2.3059, "learning_rate": 1.2632454191921894e-06, "epoch": 0.9575757575757575, "percentage": 47.88, "elapsed_time": "0:39:58", "remaining_time": "0:43:30", "throughput": "1321.06", "total_tokens": 3168120} |
|
{"current_steps": 634, "total_steps": 1320, "loss": 2.5159, "learning_rate": 1.2594196562128978e-06, "epoch": 0.9606060606060606, "percentage": 48.03, "elapsed_time": "0:40:05", "remaining_time": "0:43:22", "throughput": "1321.42", "total_tokens": 3178176} |
|
{"current_steps": 636, "total_steps": 1320, "loss": 2.3614, "learning_rate": 1.2555925263030634e-06, "epoch": 0.9636363636363636, "percentage": 48.18, "elapsed_time": "0:40:13", "remaining_time": "0:43:15", "throughput": "1321.83", "total_tokens": 3189816} |
|
{"current_steps": 638, "total_steps": 1320, "loss": 2.6341, "learning_rate": 1.2517641175046078e-06, "epoch": 0.9666666666666667, "percentage": 48.33, "elapsed_time": "0:40:19", "remaining_time": "0:43:06", "throughput": "1322.09", "total_tokens": 3198528} |
|
{"current_steps": 640, "total_steps": 1320, "loss": 2.1493, "learning_rate": 1.2479345178888752e-06, "epoch": 0.9696969696969697, "percentage": 48.48, "elapsed_time": "0:40:27", "remaining_time": "0:42:58", "throughput": "1322.52", "total_tokens": 3209904} |
|
{"current_steps": 640, "total_steps": 1320, "eval_loss": 2.3306069374084473, "epoch": 0.9696969696969697, "percentage": 48.48, "elapsed_time": "0:40:32", "remaining_time": "0:43:04", "throughput": "1319.36", "total_tokens": 3209904} |
|
{"current_steps": 642, "total_steps": 1320, "loss": 2.5543, "learning_rate": 1.244103815554602e-06, "epoch": 0.9727272727272728, "percentage": 48.64, "elapsed_time": "0:40:40", "remaining_time": "0:42:57", "throughput": "1319.71", "total_tokens": 3220584} |
|
{"current_steps": 644, "total_steps": 1320, "loss": 2.3468, "learning_rate": 1.2402720986258936e-06, "epoch": 0.9757575757575757, "percentage": 48.79, "elapsed_time": "0:40:47", "remaining_time": "0:42:49", "throughput": "1320.11", "total_tokens": 3231576} |
|
{"current_steps": 646, "total_steps": 1320, "loss": 2.3648, "learning_rate": 1.2364394552501951e-06, "epoch": 0.9787878787878788, "percentage": 48.94, "elapsed_time": "0:40:53", "remaining_time": "0:42:39", "throughput": "1320.34", "total_tokens": 3239208} |
|
{"current_steps": 648, "total_steps": 1320, "loss": 2.5894, "learning_rate": 1.2326059735962648e-06, "epoch": 0.9818181818181818, "percentage": 49.09, "elapsed_time": "0:40:58", "remaining_time": "0:42:29", "throughput": "1320.52", "total_tokens": 3246072} |
|
{"current_steps": 650, "total_steps": 1320, "loss": 2.4484, "learning_rate": 1.228771741852145e-06, "epoch": 0.9848484848484849, "percentage": 49.24, "elapsed_time": "0:41:06", "remaining_time": "0:42:22", "throughput": "1320.92", "total_tokens": 3258000} |
|
{"current_steps": 652, "total_steps": 1320, "loss": 2.5076, "learning_rate": 1.2249368482231334e-06, "epoch": 0.9878787878787879, "percentage": 49.39, "elapsed_time": "0:41:11", "remaining_time": "0:42:12", "throughput": "1321.06", "total_tokens": 3264912} |
|
{"current_steps": 654, "total_steps": 1320, "loss": 2.3112, "learning_rate": 1.2211013809297546e-06, "epoch": 0.990909090909091, "percentage": 49.55, "elapsed_time": "0:41:17", "remaining_time": "0:42:02", "throughput": "1321.28", "total_tokens": 3272832} |
|
{"current_steps": 656, "total_steps": 1320, "loss": 2.3038, "learning_rate": 1.21726542820573e-06, "epoch": 0.9939393939393939, "percentage": 49.7, "elapsed_time": "0:41:24", "remaining_time": "0:41:55", "throughput": "1321.58", "total_tokens": 3283848} |
|
{"current_steps": 658, "total_steps": 1320, "loss": 2.3811, "learning_rate": 1.213429078295948e-06, "epoch": 0.996969696969697, "percentage": 49.85, "elapsed_time": "0:41:32", "remaining_time": "0:41:47", "throughput": "1321.93", "total_tokens": 3295272} |
|
{"current_steps": 660, "total_steps": 1320, "loss": 2.4287, "learning_rate": 1.2095924194544344e-06, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "0:41:40", "remaining_time": "0:41:40", "throughput": "1322.27", "total_tokens": 3305760} |
|
{"current_steps": 660, "total_steps": 1320, "eval_loss": 2.3300185203552246, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "0:41:45", "remaining_time": "0:41:45", "throughput": "1319.20", "total_tokens": 3305760} |
|
{"current_steps": 662, "total_steps": 1320, "loss": 2.4229, "learning_rate": 1.2057555399423218e-06, "epoch": 1.003030303030303, "percentage": 50.15, "elapsed_time": "0:41:53", "remaining_time": "0:41:38", "throughput": "1319.53", "total_tokens": 3316512} |
|
{"current_steps": 664, "total_steps": 1320, "loss": 2.3718, "learning_rate": 1.201918528025819e-06, "epoch": 1.006060606060606, "percentage": 50.3, "elapsed_time": "0:42:01", "remaining_time": "0:41:31", "throughput": "1319.94", "total_tokens": 3328800} |
|
{"current_steps": 666, "total_steps": 1320, "loss": 2.5418, "learning_rate": 1.1980814719741809e-06, "epoch": 1.009090909090909, "percentage": 50.45, "elapsed_time": "0:42:06", "remaining_time": "0:41:21", "throughput": "1320.11", "total_tokens": 3335424} |
|
{"current_steps": 668, "total_steps": 1320, "loss": 2.4076, "learning_rate": 1.1942444600576783e-06, "epoch": 1.0121212121212122, "percentage": 50.61, "elapsed_time": "0:42:13", "remaining_time": "0:41:12", "throughput": "1320.40", "total_tokens": 3344904} |
|
{"current_steps": 670, "total_steps": 1320, "loss": 2.3543, "learning_rate": 1.1904075805455657e-06, "epoch": 1.0151515151515151, "percentage": 50.76, "elapsed_time": "0:42:20", "remaining_time": "0:41:04", "throughput": "1320.71", "total_tokens": 3355176} |
|
{"current_steps": 672, "total_steps": 1320, "loss": 2.3537, "learning_rate": 1.186570921704052e-06, "epoch": 1.018181818181818, "percentage": 50.91, "elapsed_time": "0:42:27", "remaining_time": "0:40:56", "throughput": "1321.08", "total_tokens": 3366096} |
|
{"current_steps": 674, "total_steps": 1320, "loss": 2.1717, "learning_rate": 1.18273457179427e-06, "epoch": 1.0212121212121212, "percentage": 51.06, "elapsed_time": "0:42:34", "remaining_time": "0:40:48", "throughput": "1321.39", "total_tokens": 3375696} |
|
{"current_steps": 676, "total_steps": 1320, "loss": 2.408, "learning_rate": 1.1788986190702453e-06, "epoch": 1.0242424242424242, "percentage": 51.21, "elapsed_time": "0:42:40", "remaining_time": "0:40:39", "throughput": "1321.64", "total_tokens": 3384288} |
|
{"current_steps": 678, "total_steps": 1320, "loss": 2.3485, "learning_rate": 1.1750631517768667e-06, "epoch": 1.0272727272727273, "percentage": 51.36, "elapsed_time": "0:42:45", "remaining_time": "0:40:29", "throughput": "1321.81", "total_tokens": 3391128} |
|
{"current_steps": 680, "total_steps": 1320, "loss": 2.4617, "learning_rate": 1.1712282581478552e-06, "epoch": 1.0303030303030303, "percentage": 51.52, "elapsed_time": "0:42:52", "remaining_time": "0:40:21", "throughput": "1322.15", "total_tokens": 3401640} |
|
{"current_steps": 680, "total_steps": 1320, "eval_loss": 2.3293986320495605, "epoch": 1.0303030303030303, "percentage": 51.52, "elapsed_time": "0:42:58", "remaining_time": "0:40:26", "throughput": "1319.16", "total_tokens": 3401640} |
|
{"current_steps": 682, "total_steps": 1320, "loss": 2.3971, "learning_rate": 1.167394026403735e-06, "epoch": 1.0333333333333334, "percentage": 51.67, "elapsed_time": "0:43:05", "remaining_time": "0:40:18", "throughput": "1319.44", "total_tokens": 3411120} |
|
{"current_steps": 684, "total_steps": 1320, "loss": 2.3265, "learning_rate": 1.1635605447498048e-06, "epoch": 1.0363636363636364, "percentage": 51.82, "elapsed_time": "0:43:12", "remaining_time": "0:40:10", "throughput": "1319.74", "total_tokens": 3420912} |
|
{"current_steps": 686, "total_steps": 1320, "loss": 2.5114, "learning_rate": 1.1597279013741067e-06, "epoch": 1.0393939393939393, "percentage": 51.97, "elapsed_time": "0:43:18", "remaining_time": "0:40:01", "throughput": "1319.97", "total_tokens": 3429744} |
|
{"current_steps": 688, "total_steps": 1320, "loss": 2.5497, "learning_rate": 1.1558961844453978e-06, "epoch": 1.0424242424242425, "percentage": 52.12, "elapsed_time": "0:43:24", "remaining_time": "0:39:52", "throughput": "1320.24", "total_tokens": 3438936} |
|
{"current_steps": 690, "total_steps": 1320, "loss": 2.5458, "learning_rate": 1.152065482111125e-06, "epoch": 1.0454545454545454, "percentage": 52.27, "elapsed_time": "0:43:29", "remaining_time": "0:39:42", "throughput": "1320.38", "total_tokens": 3444912} |
|
{"current_steps": 692, "total_steps": 1320, "loss": 2.3622, "learning_rate": 1.1482358824953919e-06, "epoch": 1.0484848484848486, "percentage": 52.42, "elapsed_time": "0:43:37", "remaining_time": "0:39:35", "throughput": "1320.80", "total_tokens": 3456936} |
|
{"current_steps": 694, "total_steps": 1320, "loss": 2.221, "learning_rate": 1.144407473696937e-06, "epoch": 1.0515151515151515, "percentage": 52.58, "elapsed_time": "0:43:43", "remaining_time": "0:39:26", "throughput": "1321.07", "total_tokens": 3466344} |
|
{"current_steps": 696, "total_steps": 1320, "loss": 2.3708, "learning_rate": 1.1405803437871027e-06, "epoch": 1.0545454545454545, "percentage": 52.73, "elapsed_time": "0:43:52", "remaining_time": "0:39:20", "throughput": "1321.44", "total_tokens": 3478632} |
|
{"current_steps": 698, "total_steps": 1320, "loss": 2.5175, "learning_rate": 1.136754580807811e-06, "epoch": 1.0575757575757576, "percentage": 52.88, "elapsed_time": "0:43:57", "remaining_time": "0:39:10", "throughput": "1321.61", "total_tokens": 3485496} |
|
{"current_steps": 700, "total_steps": 1320, "loss": 2.2166, "learning_rate": 1.1329302727695389e-06, "epoch": 1.0606060606060606, "percentage": 53.03, "elapsed_time": "0:44:04", "remaining_time": "0:39:02", "throughput": "1321.90", "total_tokens": 3496272} |
|
{"current_steps": 700, "total_steps": 1320, "eval_loss": 2.329413890838623, "epoch": 1.0606060606060606, "percentage": 53.03, "elapsed_time": "0:44:10", "remaining_time": "0:39:07", "throughput": "1318.99", "total_tokens": 3496272} |
|
{"current_steps": 702, "total_steps": 1320, "loss": 2.5228, "learning_rate": 1.1291075076492928e-06, "epoch": 1.0636363636363637, "percentage": 53.18, "elapsed_time": "0:44:19", "remaining_time": "0:39:01", "throughput": "1318.50", "total_tokens": 3506712} |
|
{"current_steps": 704, "total_steps": 1320, "loss": 2.4304, "learning_rate": 1.1252863733885845e-06, "epoch": 1.0666666666666667, "percentage": 53.33, "elapsed_time": "0:44:28", "remaining_time": "0:38:54", "throughput": "1318.89", "total_tokens": 3518856} |
|
{"current_steps": 706, "total_steps": 1320, "loss": 2.0998, "learning_rate": 1.1214669578914087e-06, "epoch": 1.0696969696969698, "percentage": 53.48, "elapsed_time": "0:44:34", "remaining_time": "0:38:46", "throughput": "1319.16", "total_tokens": 3528456} |
|
{"current_steps": 708, "total_steps": 1320, "loss": 2.146, "learning_rate": 1.1176493490222192e-06, "epoch": 1.0727272727272728, "percentage": 53.64, "elapsed_time": "0:44:40", "remaining_time": "0:38:37", "throughput": "1319.39", "total_tokens": 3537048} |
|
{"current_steps": 710, "total_steps": 1320, "loss": 2.3275, "learning_rate": 1.1138336346039113e-06, "epoch": 1.0757575757575757, "percentage": 53.79, "elapsed_time": "0:44:46", "remaining_time": "0:38:27", "throughput": "1319.59", "total_tokens": 3544536} |
|
{"current_steps": 712, "total_steps": 1320, "loss": 2.3477, "learning_rate": 1.1100199024157966e-06, "epoch": 1.0787878787878789, "percentage": 53.94, "elapsed_time": "0:44:50", "remaining_time": "0:38:17", "throughput": "1319.76", "total_tokens": 3551472} |
|
{"current_steps": 714, "total_steps": 1320, "loss": 2.4356, "learning_rate": 1.1062082401915878e-06, "epoch": 1.0818181818181818, "percentage": 54.09, "elapsed_time": "0:44:57", "remaining_time": "0:38:09", "throughput": "1320.07", "total_tokens": 3561312} |
|
{"current_steps": 716, "total_steps": 1320, "loss": 2.5201, "learning_rate": 1.1023987356173782e-06, "epoch": 1.084848484848485, "percentage": 54.24, "elapsed_time": "0:45:04", "remaining_time": "0:38:01", "throughput": "1320.37", "total_tokens": 3570456} |
|
{"current_steps": 718, "total_steps": 1320, "loss": 2.0526, "learning_rate": 1.0985914763296245e-06, "epoch": 1.087878787878788, "percentage": 54.39, "elapsed_time": "0:45:12", "remaining_time": "0:37:54", "throughput": "1320.77", "total_tokens": 3582744} |
|
{"current_steps": 720, "total_steps": 1320, "loss": 2.2984, "learning_rate": 1.0947865499131315e-06, "epoch": 1.0909090909090908, "percentage": 54.55, "elapsed_time": "0:45:21", "remaining_time": "0:37:47", "throughput": "1321.14", "total_tokens": 3595032} |
|
{"current_steps": 720, "total_steps": 1320, "eval_loss": 2.328953504562378, "epoch": 1.0909090909090908, "percentage": 54.55, "elapsed_time": "0:45:26", "remaining_time": "0:37:52", "throughput": "1318.32", "total_tokens": 3595032} |
|
{"current_steps": 722, "total_steps": 1320, "loss": 1.7515, "learning_rate": 1.0909840438990383e-06, "epoch": 1.093939393939394, "percentage": 54.7, "elapsed_time": "0:45:34", "remaining_time": "0:37:44", "throughput": "1318.66", "total_tokens": 3606048} |
|
{"current_steps": 724, "total_steps": 1320, "loss": 2.3416, "learning_rate": 1.0871840457628012e-06, "epoch": 1.096969696969697, "percentage": 54.85, "elapsed_time": "0:45:41", "remaining_time": "0:37:37", "throughput": "1318.97", "total_tokens": 3616368} |
|
{"current_steps": 726, "total_steps": 1320, "loss": 2.3327, "learning_rate": 1.0833866429221858e-06, "epoch": 1.1, "percentage": 55.0, "elapsed_time": "0:45:50", "remaining_time": "0:37:30", "throughput": "1319.31", "total_tokens": 3628368} |
|
{"current_steps": 728, "total_steps": 1320, "loss": 2.401, "learning_rate": 1.0795919227352523e-06, "epoch": 1.103030303030303, "percentage": 55.15, "elapsed_time": "0:45:56", "remaining_time": "0:37:21", "throughput": "1319.60", "total_tokens": 3637848} |
|
{"current_steps": 730, "total_steps": 1320, "loss": 2.2503, "learning_rate": 1.0757999724983474e-06, "epoch": 1.106060606060606, "percentage": 55.3, "elapsed_time": "0:46:03", "remaining_time": "0:37:13", "throughput": "1319.84", "total_tokens": 3647640} |
|
{"current_steps": 732, "total_steps": 1320, "loss": 2.4449, "learning_rate": 1.0720108794440967e-06, "epoch": 1.1090909090909091, "percentage": 55.45, "elapsed_time": "0:46:11", "remaining_time": "0:37:06", "throughput": "1320.12", "total_tokens": 3658272} |
|
{"current_steps": 734, "total_steps": 1320, "loss": 2.2787, "learning_rate": 1.068224730739396e-06, "epoch": 1.112121212121212, "percentage": 55.61, "elapsed_time": "0:46:18", "remaining_time": "0:36:58", "throughput": "1320.38", "total_tokens": 3668760} |
|
{"current_steps": 736, "total_steps": 1320, "loss": 2.2226, "learning_rate": 1.064441613483409e-06, "epoch": 1.1151515151515152, "percentage": 55.76, "elapsed_time": "0:46:26", "remaining_time": "0:36:50", "throughput": "1320.67", "total_tokens": 3679608} |
|
{"current_steps": 738, "total_steps": 1320, "loss": 2.6116, "learning_rate": 1.0606616147055602e-06, "epoch": 1.1181818181818182, "percentage": 55.91, "elapsed_time": "0:46:33", "remaining_time": "0:36:42", "throughput": "1321.01", "total_tokens": 3689832} |
|
{"current_steps": 740, "total_steps": 1320, "loss": 2.1242, "learning_rate": 1.056884821363535e-06, "epoch": 1.121212121212121, "percentage": 56.06, "elapsed_time": "0:46:40", "remaining_time": "0:36:35", "throughput": "1321.27", "total_tokens": 3700392} |
|
{"current_steps": 740, "total_steps": 1320, "eval_loss": 2.3289198875427246, "epoch": 1.121212121212121, "percentage": 56.06, "elapsed_time": "0:46:46", "remaining_time": "0:36:39", "throughput": "1318.53", "total_tokens": 3700392} |
|
{"current_steps": 742, "total_steps": 1320, "loss": 2.3979, "learning_rate": 1.05311132034128e-06, "epoch": 1.1242424242424243, "percentage": 56.21, "elapsed_time": "0:46:52", "remaining_time": "0:36:31", "throughput": "1318.77", "total_tokens": 3709632} |
|
{"current_steps": 744, "total_steps": 1320, "loss": 2.4608, "learning_rate": 1.0493411984470007e-06, "epoch": 1.1272727272727272, "percentage": 56.36, "elapsed_time": "0:46:58", "remaining_time": "0:36:22", "throughput": "1318.98", "total_tokens": 3717720} |
|
{"current_steps": 746, "total_steps": 1320, "loss": 2.5028, "learning_rate": 1.0455745424111686e-06, "epoch": 1.1303030303030304, "percentage": 56.52, "elapsed_time": "0:47:05", "remaining_time": "0:36:14", "throughput": "1319.28", "total_tokens": 3728280} |
|
{"current_steps": 748, "total_steps": 1320, "loss": 2.5461, "learning_rate": 1.0418114388845242e-06, "epoch": 1.1333333333333333, "percentage": 56.67, "elapsed_time": "0:47:11", "remaining_time": "0:36:05", "throughput": "1319.45", "total_tokens": 3735888} |
|
{"current_steps": 750, "total_steps": 1320, "loss": 2.2596, "learning_rate": 1.038051974436082e-06, "epoch": 1.1363636363636362, "percentage": 56.82, "elapsed_time": "0:47:19", "remaining_time": "0:35:58", "throughput": "1319.83", "total_tokens": 3747720} |
|
{"current_steps": 752, "total_steps": 1320, "loss": 2.2801, "learning_rate": 1.034296235551143e-06, "epoch": 1.1393939393939394, "percentage": 56.97, "elapsed_time": "0:47:27", "remaining_time": "0:35:50", "throughput": "1320.17", "total_tokens": 3758640} |
|
{"current_steps": 754, "total_steps": 1320, "loss": 2.3337, "learning_rate": 1.0305443086293003e-06, "epoch": 1.1424242424242423, "percentage": 57.12, "elapsed_time": "0:47:34", "remaining_time": "0:35:42", "throughput": "1320.46", "total_tokens": 3769128} |
|
{"current_steps": 756, "total_steps": 1320, "loss": 2.6706, "learning_rate": 1.0267962799824562e-06, "epoch": 1.1454545454545455, "percentage": 57.27, "elapsed_time": "0:47:41", "remaining_time": "0:35:34", "throughput": "1320.72", "total_tokens": 3779304} |
|
{"current_steps": 758, "total_steps": 1320, "loss": 2.2422, "learning_rate": 1.0230522358328331e-06, "epoch": 1.1484848484848484, "percentage": 57.42, "elapsed_time": "0:47:48", "remaining_time": "0:35:26", "throughput": "1320.99", "total_tokens": 3789312} |
|
{"current_steps": 760, "total_steps": 1320, "loss": 2.4892, "learning_rate": 1.0193122623109917e-06, "epoch": 1.1515151515151516, "percentage": 57.58, "elapsed_time": "0:47:53", "remaining_time": "0:35:17", "throughput": "1321.16", "total_tokens": 3796848} |
|
{"current_steps": 760, "total_steps": 1320, "eval_loss": 2.3289122581481934, "epoch": 1.1515151515151516, "percentage": 57.58, "elapsed_time": "0:47:59", "remaining_time": "0:35:21", "throughput": "1318.49", "total_tokens": 3796848} |
|
{"current_steps": 762, "total_steps": 1320, "loss": 2.2826, "learning_rate": 1.015576445453849e-06, "epoch": 1.1545454545454545, "percentage": 57.73, "elapsed_time": "0:48:06", "remaining_time": "0:35:13", "throughput": "1318.78", "total_tokens": 3806640} |
|
{"current_steps": 764, "total_steps": 1320, "loss": 2.4358, "learning_rate": 1.0118448712026992e-06, "epoch": 1.1575757575757575, "percentage": 57.88, "elapsed_time": "0:48:14", "remaining_time": "0:35:06", "throughput": "1319.09", "total_tokens": 3817608} |
|
{"current_steps": 766, "total_steps": 1320, "loss": 2.421, "learning_rate": 1.0081176254012374e-06, "epoch": 1.1606060606060606, "percentage": 58.03, "elapsed_time": "0:48:21", "remaining_time": "0:34:58", "throughput": "1319.34", "total_tokens": 3827592} |
|
{"current_steps": 768, "total_steps": 1320, "loss": 2.3245, "learning_rate": 1.0043947937935832e-06, "epoch": 1.1636363636363636, "percentage": 58.18, "elapsed_time": "0:48:27", "remaining_time": "0:34:50", "throughput": "1319.63", "total_tokens": 3837264} |
|
{"current_steps": 770, "total_steps": 1320, "loss": 2.3388, "learning_rate": 1.0006764620223093e-06, "epoch": 1.1666666666666667, "percentage": 58.33, "elapsed_time": "0:48:35", "remaining_time": "0:34:42", "throughput": "1319.91", "total_tokens": 3847656} |
|
{"current_steps": 772, "total_steps": 1320, "loss": 2.3956, "learning_rate": 9.96962715626471e-07, "epoch": 1.1696969696969697, "percentage": 58.48, "elapsed_time": "0:48:42", "remaining_time": "0:34:34", "throughput": "1320.22", "total_tokens": 3858600} |
|
{"current_steps": 774, "total_steps": 1320, "loss": 2.3562, "learning_rate": 9.932536400396393e-07, "epoch": 1.1727272727272728, "percentage": 58.64, "elapsed_time": "0:48:50", "remaining_time": "0:34:27", "throughput": "1320.57", "total_tokens": 3870120} |
|
{"current_steps": 776, "total_steps": 1320, "loss": 2.5851, "learning_rate": 9.895493205879332e-07, "epoch": 1.1757575757575758, "percentage": 58.79, "elapsed_time": "0:48:57", "remaining_time": "0:34:19", "throughput": "1320.80", "total_tokens": 3879600} |
|
{"current_steps": 778, "total_steps": 1320, "loss": 2.7061, "learning_rate": 9.858498424880592e-07, "epoch": 1.1787878787878787, "percentage": 58.94, "elapsed_time": "0:49:03", "remaining_time": "0:34:10", "throughput": "1321.10", "total_tokens": 3889296} |
|
{"current_steps": 780, "total_steps": 1320, "loss": 2.4251, "learning_rate": 9.821552908453506e-07, "epoch": 1.1818181818181819, "percentage": 59.09, "elapsed_time": "0:49:12", "remaining_time": "0:34:04", "throughput": "1321.43", "total_tokens": 3901464} |
|
{"current_steps": 780, "total_steps": 1320, "eval_loss": 2.3287835121154785, "epoch": 1.1818181818181819, "percentage": 59.09, "elapsed_time": "0:49:18", "remaining_time": "0:34:08", "throughput": "1318.82", "total_tokens": 3901464} |
|
{"current_steps": 782, "total_steps": 1320, "loss": 2.6212, "learning_rate": 9.784657506518078e-07, "epoch": 1.1848484848484848, "percentage": 59.24, "elapsed_time": "0:49:24", "remaining_time": "0:33:59", "throughput": "1319.08", "total_tokens": 3910656} |
|
{"current_steps": 784, "total_steps": 1320, "loss": 2.3086, "learning_rate": 9.747813067841455e-07, "epoch": 1.187878787878788, "percentage": 59.39, "elapsed_time": "0:49:33", "remaining_time": "0:33:52", "throughput": "1319.43", "total_tokens": 3922944} |
|
{"current_steps": 786, "total_steps": 1320, "loss": 2.5027, "learning_rate": 9.711020440018384e-07, "epoch": 1.190909090909091, "percentage": 59.55, "elapsed_time": "0:49:39", "remaining_time": "0:33:44", "throughput": "1319.69", "total_tokens": 3931752} |
|
{"current_steps": 788, "total_steps": 1320, "loss": 2.4088, "learning_rate": 9.674280469451718e-07, "epoch": 1.1939393939393939, "percentage": 59.7, "elapsed_time": "0:49:46", "remaining_time": "0:33:36", "throughput": "1319.99", "total_tokens": 3942120} |
|
{"current_steps": 790, "total_steps": 1320, "loss": 2.3161, "learning_rate": 9.637594001332956e-07, "epoch": 1.196969696969697, "percentage": 59.85, "elapsed_time": "0:49:53", "remaining_time": "0:33:28", "throughput": "1320.22", "total_tokens": 3952248} |
|
{"current_steps": 792, "total_steps": 1320, "loss": 2.4837, "learning_rate": 9.600961879622777e-07, "epoch": 1.2, "percentage": 60.0, "elapsed_time": "0:49:59", "remaining_time": "0:33:19", "throughput": "1320.42", "total_tokens": 3960600} |
|
{"current_steps": 794, "total_steps": 1320, "loss": 2.3195, "learning_rate": 9.564384947031646e-07, "epoch": 1.2030303030303031, "percentage": 60.15, "elapsed_time": "0:50:07", "remaining_time": "0:33:12", "throughput": "1320.72", "total_tokens": 3971568} |
|
{"current_steps": 796, "total_steps": 1320, "loss": 2.5749, "learning_rate": 9.527864045000421e-07, "epoch": 1.206060606060606, "percentage": 60.3, "elapsed_time": "0:50:15", "remaining_time": "0:33:05", "throughput": "1321.06", "total_tokens": 3983592} |
|
{"current_steps": 798, "total_steps": 1320, "loss": 2.39, "learning_rate": 9.491400013680988e-07, "epoch": 1.209090909090909, "percentage": 60.45, "elapsed_time": "0:50:22", "remaining_time": "0:32:57", "throughput": "1321.34", "total_tokens": 3994008} |
|
{"current_steps": 800, "total_steps": 1320, "loss": 2.3579, "learning_rate": 9.454993691916948e-07, "epoch": 1.2121212121212122, "percentage": 60.61, "elapsed_time": "0:50:30", "remaining_time": "0:32:49", "throughput": "1321.61", "total_tokens": 4004496} |
|
{"current_steps": 800, "total_steps": 1320, "eval_loss": 2.3282077312469482, "epoch": 1.2121212121212122, "percentage": 60.61, "elapsed_time": "0:50:35", "remaining_time": "0:32:53", "throughput": "1319.07", "total_tokens": 4004496} |
|
{"current_steps": 802, "total_steps": 1320, "loss": 2.3152, "learning_rate": 9.418645917224303e-07, "epoch": 1.215151515151515, "percentage": 60.76, "elapsed_time": "0:50:45", "remaining_time": "0:32:47", "throughput": "1318.69", "total_tokens": 4016592} |
|
{"current_steps": 804, "total_steps": 1320, "loss": 2.4599, "learning_rate": 9.382357525772202e-07, "epoch": 1.2181818181818183, "percentage": 60.91, "elapsed_time": "0:50:51", "remaining_time": "0:32:38", "throughput": "1318.92", "total_tokens": 4024800} |
|
{"current_steps": 806, "total_steps": 1320, "loss": 2.2412, "learning_rate": 9.346129352363705e-07, "epoch": 1.2212121212121212, "percentage": 61.06, "elapsed_time": "0:50:58", "remaining_time": "0:32:30", "throughput": "1319.19", "total_tokens": 4035144} |
|
{"current_steps": 808, "total_steps": 1320, "loss": 2.4022, "learning_rate": 9.309962230416574e-07, "epoch": 1.2242424242424241, "percentage": 61.21, "elapsed_time": "0:51:04", "remaining_time": "0:32:21", "throughput": "1319.38", "total_tokens": 4042920} |
|
{"current_steps": 810, "total_steps": 1320, "loss": 2.4082, "learning_rate": 9.273856991944089e-07, "epoch": 1.2272727272727273, "percentage": 61.36, "elapsed_time": "0:51:11", "remaining_time": "0:32:13", "throughput": "1319.63", "total_tokens": 4053072} |
|
{"current_steps": 812, "total_steps": 1320, "loss": 2.3188, "learning_rate": 9.237814467535941e-07, "epoch": 1.2303030303030302, "percentage": 61.52, "elapsed_time": "0:51:18", "remaining_time": "0:32:05", "throughput": "1319.90", "total_tokens": 4063368} |
|
{"current_steps": 814, "total_steps": 1320, "loss": 2.4367, "learning_rate": 9.201835486339084e-07, "epoch": 1.2333333333333334, "percentage": 61.67, "elapsed_time": "0:51:24", "remaining_time": "0:31:57", "throughput": "1320.11", "total_tokens": 4072392} |
|
{"current_steps": 816, "total_steps": 1320, "loss": 2.3054, "learning_rate": 9.165920876038694e-07, "epoch": 1.2363636363636363, "percentage": 61.82, "elapsed_time": "0:51:32", "remaining_time": "0:31:49", "throughput": "1320.42", "total_tokens": 4083072} |
|
{"current_steps": 818, "total_steps": 1320, "loss": 2.4475, "learning_rate": 9.130071462839108e-07, "epoch": 1.2393939393939393, "percentage": 61.97, "elapsed_time": "0:51:39", "remaining_time": "0:31:42", "throughput": "1320.70", "total_tokens": 4093776} |
|
{"current_steps": 820, "total_steps": 1320, "loss": 2.4868, "learning_rate": 9.094288071444822e-07, "epoch": 1.2424242424242424, "percentage": 62.12, "elapsed_time": "0:51:48", "remaining_time": "0:31:35", "throughput": "1321.02", "total_tokens": 4106040} |
|
{"current_steps": 820, "total_steps": 1320, "eval_loss": 2.3283748626708984, "epoch": 1.2424242424242424, "percentage": 62.12, "elapsed_time": "0:51:54", "remaining_time": "0:31:38", "throughput": "1318.55", "total_tokens": 4106040} |
|
{"current_steps": 822, "total_steps": 1320, "loss": 2.4682, "learning_rate": 9.058571525041534e-07, "epoch": 1.2454545454545454, "percentage": 62.27, "elapsed_time": "0:52:01", "remaining_time": "0:31:31", "throughput": "1318.88", "total_tokens": 4117392} |
|
{"current_steps": 824, "total_steps": 1320, "loss": 2.372, "learning_rate": 9.022922645277176e-07, "epoch": 1.2484848484848485, "percentage": 62.42, "elapsed_time": "0:52:07", "remaining_time": "0:31:22", "throughput": "1319.03", "total_tokens": 4125696} |
|
{"current_steps": 826, "total_steps": 1320, "loss": 2.4424, "learning_rate": 8.987342252243042e-07, "epoch": 1.2515151515151515, "percentage": 62.58, "elapsed_time": "0:52:16", "remaining_time": "0:31:15", "throughput": "1319.35", "total_tokens": 4137816} |
|
{"current_steps": 828, "total_steps": 1320, "loss": 2.4164, "learning_rate": 8.951831164454908e-07, "epoch": 1.2545454545454544, "percentage": 62.73, "elapsed_time": "0:52:24", "remaining_time": "0:31:08", "throughput": "1319.68", "total_tokens": 4150104} |
|
{"current_steps": 830, "total_steps": 1320, "loss": 2.4451, "learning_rate": 8.916390198834203e-07, "epoch": 1.2575757575757576, "percentage": 62.88, "elapsed_time": "0:52:32", "remaining_time": "0:31:00", "throughput": "1319.95", "total_tokens": 4160832} |
|
{"current_steps": 832, "total_steps": 1320, "loss": 2.3256, "learning_rate": 8.88102017068922e-07, "epoch": 1.2606060606060607, "percentage": 63.03, "elapsed_time": "0:52:38", "remaining_time": "0:30:52", "throughput": "1320.18", "total_tokens": 4170216} |
|
{"current_steps": 834, "total_steps": 1320, "loss": 2.2176, "learning_rate": 8.845721893696354e-07, "epoch": 1.2636363636363637, "percentage": 63.18, "elapsed_time": "0:52:46", "remaining_time": "0:30:45", "throughput": "1320.49", "total_tokens": 4181256} |
|
{"current_steps": 836, "total_steps": 1320, "loss": 2.3812, "learning_rate": 8.810496179881387e-07, "epoch": 1.2666666666666666, "percentage": 63.33, "elapsed_time": "0:52:53", "remaining_time": "0:30:37", "throughput": "1320.79", "total_tokens": 4192128} |
|
{"current_steps": 838, "total_steps": 1320, "loss": 2.4275, "learning_rate": 8.775343839600816e-07, "epoch": 1.2696969696969698, "percentage": 63.48, "elapsed_time": "0:53:00", "remaining_time": "0:30:29", "throughput": "1321.04", "total_tokens": 4202208} |
|
{"current_steps": 840, "total_steps": 1320, "loss": 2.4706, "learning_rate": 8.740265681523195e-07, "epoch": 1.2727272727272727, "percentage": 63.64, "elapsed_time": "0:53:06", "remaining_time": "0:30:21", "throughput": "1321.20", "total_tokens": 4210464} |
|
{"current_steps": 840, "total_steps": 1320, "eval_loss": 2.3279545307159424, "epoch": 1.2727272727272727, "percentage": 63.64, "elapsed_time": "0:53:12", "remaining_time": "0:30:24", "throughput": "1318.79", "total_tokens": 4210464} |
|
{"current_steps": 842, "total_steps": 1320, "loss": 2.4054, "learning_rate": 8.705262512610539e-07, "epoch": 1.2757575757575759, "percentage": 63.79, "elapsed_time": "0:53:17", "remaining_time": "0:30:15", "throughput": "1318.94", "total_tokens": 4217928} |
|
{"current_steps": 844, "total_steps": 1320, "loss": 2.4653, "learning_rate": 8.670335138099765e-07, "epoch": 1.2787878787878788, "percentage": 63.94, "elapsed_time": "0:53:24", "remaining_time": "0:30:07", "throughput": "1319.15", "total_tokens": 4226904} |
|
{"current_steps": 846, "total_steps": 1320, "loss": 2.3184, "learning_rate": 8.635484361484158e-07, "epoch": 1.2818181818181817, "percentage": 64.09, "elapsed_time": "0:53:31", "remaining_time": "0:29:59", "throughput": "1319.44", "total_tokens": 4237656} |
|
{"current_steps": 848, "total_steps": 1320, "loss": 2.3415, "learning_rate": 8.600710984494909e-07, "epoch": 1.284848484848485, "percentage": 64.24, "elapsed_time": "0:53:39", "remaining_time": "0:29:51", "throughput": "1319.73", "total_tokens": 4248720} |
|
{"current_steps": 850, "total_steps": 1320, "loss": 2.6382, "learning_rate": 8.56601580708263e-07, "epoch": 1.2878787878787878, "percentage": 64.39, "elapsed_time": "0:53:42", "remaining_time": "0:29:42", "throughput": "1319.81", "total_tokens": 4253448} |
|
{"current_steps": 852, "total_steps": 1320, "loss": 2.5681, "learning_rate": 8.531399627398991e-07, "epoch": 1.290909090909091, "percentage": 64.55, "elapsed_time": "0:53:48", "remaining_time": "0:29:33", "throughput": "1320.04", "total_tokens": 4261488} |
|
{"current_steps": 854, "total_steps": 1320, "loss": 2.2039, "learning_rate": 8.496863241778346e-07, "epoch": 1.293939393939394, "percentage": 64.7, "elapsed_time": "0:53:56", "remaining_time": "0:29:25", "throughput": "1320.36", "total_tokens": 4273104} |
|
{"current_steps": 856, "total_steps": 1320, "loss": 2.3936, "learning_rate": 8.462407444719405e-07, "epoch": 1.2969696969696969, "percentage": 64.85, "elapsed_time": "0:54:03", "remaining_time": "0:29:18", "throughput": "1320.59", "total_tokens": 4283136} |
|
{"current_steps": 858, "total_steps": 1320, "loss": 2.3669, "learning_rate": 8.428033028866967e-07, "epoch": 1.3, "percentage": 65.0, "elapsed_time": "0:54:09", "remaining_time": "0:29:09", "throughput": "1320.78", "total_tokens": 4292208} |
|
{"current_steps": 860, "total_steps": 1320, "loss": 2.4704, "learning_rate": 8.393740784993677e-07, "epoch": 1.303030303030303, "percentage": 65.15, "elapsed_time": "0:54:16", "remaining_time": "0:29:01", "throughput": "1321.03", "total_tokens": 4302240} |
|
{"current_steps": 860, "total_steps": 1320, "eval_loss": 2.3276970386505127, "epoch": 1.303030303030303, "percentage": 65.15, "elapsed_time": "0:54:22", "remaining_time": "0:29:05", "throughput": "1318.67", "total_tokens": 4302240} |
|
{"current_steps": 862, "total_steps": 1320, "loss": 2.7633, "learning_rate": 8.359531501981846e-07, "epoch": 1.3060606060606061, "percentage": 65.3, "elapsed_time": "0:54:29", "remaining_time": "0:28:57", "throughput": "1318.92", "total_tokens": 4311888} |
|
{"current_steps": 864, "total_steps": 1320, "loss": 2.1671, "learning_rate": 8.325405966805295e-07, "epoch": 1.309090909090909, "percentage": 65.45, "elapsed_time": "0:54:36", "remaining_time": "0:28:49", "throughput": "1319.15", "total_tokens": 4321992} |
|
{"current_steps": 866, "total_steps": 1320, "loss": 2.4139, "learning_rate": 8.291364964511247e-07, "epoch": 1.312121212121212, "percentage": 65.61, "elapsed_time": "0:54:43", "remaining_time": "0:28:41", "throughput": "1319.45", "total_tokens": 4332408} |
|
{"current_steps": 868, "total_steps": 1320, "loss": 2.5621, "learning_rate": 8.25740927820228e-07, "epoch": 1.3151515151515152, "percentage": 65.76, "elapsed_time": "0:54:52", "remaining_time": "0:28:34", "throughput": "1319.76", "total_tokens": 4344696} |
|
{"current_steps": 870, "total_steps": 1320, "loss": 2.4142, "learning_rate": 8.223539689018299e-07, "epoch": 1.3181818181818181, "percentage": 65.91, "elapsed_time": "0:54:59", "remaining_time": "0:28:26", "throughput": "1320.08", "total_tokens": 4356168} |
|
{"current_steps": 872, "total_steps": 1320, "loss": 2.3459, "learning_rate": 8.189756976118568e-07, "epoch": 1.3212121212121213, "percentage": 66.06, "elapsed_time": "0:55:05", "remaining_time": "0:28:18", "throughput": "1320.25", "total_tokens": 4364568} |
|
{"current_steps": 874, "total_steps": 1320, "loss": 2.2973, "learning_rate": 8.156061916663807e-07, "epoch": 1.3242424242424242, "percentage": 66.21, "elapsed_time": "0:55:13", "remaining_time": "0:28:10", "throughput": "1320.52", "total_tokens": 4374984} |
|
{"current_steps": 876, "total_steps": 1320, "loss": 2.5294, "learning_rate": 8.12245528579828e-07, "epoch": 1.3272727272727272, "percentage": 66.36, "elapsed_time": "0:55:20", "remaining_time": "0:28:02", "throughput": "1320.78", "total_tokens": 4385424} |
|
{"current_steps": 878, "total_steps": 1320, "loss": 2.4239, "learning_rate": 8.088937856631974e-07, "epoch": 1.3303030303030303, "percentage": 66.52, "elapsed_time": "0:55:27", "remaining_time": "0:27:54", "throughput": "1321.00", "total_tokens": 4395192} |
|
{"current_steps": 880, "total_steps": 1320, "loss": 2.4403, "learning_rate": 8.055510400222836e-07, "epoch": 1.3333333333333333, "percentage": 66.67, "elapsed_time": "0:55:34", "remaining_time": "0:27:47", "throughput": "1321.29", "total_tokens": 4405608} |
|
{"current_steps": 880, "total_steps": 1320, "eval_loss": 2.328122138977051, "epoch": 1.3333333333333333, "percentage": 66.67, "elapsed_time": "0:55:40", "remaining_time": "0:27:50", "throughput": "1318.98", "total_tokens": 4405608} |
|
{"current_steps": 882, "total_steps": 1320, "loss": 2.389, "learning_rate": 8.022173685559011e-07, "epoch": 1.3363636363636364, "percentage": 66.82, "elapsed_time": "0:55:48", "remaining_time": "0:27:42", "throughput": "1319.30", "total_tokens": 4417896} |
|
{"current_steps": 884, "total_steps": 1320, "loss": 2.3811, "learning_rate": 7.988928479541154e-07, "epoch": 1.3393939393939394, "percentage": 66.97, "elapsed_time": "0:55:55", "remaining_time": "0:27:35", "throughput": "1319.52", "total_tokens": 4428000} |
|
{"current_steps": 886, "total_steps": 1320, "loss": 2.4351, "learning_rate": 7.955775546964797e-07, "epoch": 1.3424242424242423, "percentage": 67.12, "elapsed_time": "0:56:01", "remaining_time": "0:27:26", "throughput": "1319.74", "total_tokens": 4436736} |
|
{"current_steps": 888, "total_steps": 1320, "loss": 2.4343, "learning_rate": 7.922715650502746e-07, "epoch": 1.3454545454545455, "percentage": 67.27, "elapsed_time": "0:56:09", "remaining_time": "0:27:19", "throughput": "1319.98", "total_tokens": 4447488} |
|
{"current_steps": 890, "total_steps": 1320, "loss": 2.5435, "learning_rate": 7.889749550687552e-07, "epoch": 1.3484848484848486, "percentage": 67.42, "elapsed_time": "0:56:15", "remaining_time": "0:27:10", "throughput": "1320.17", "total_tokens": 4455840} |
|
{"current_steps": 892, "total_steps": 1320, "loss": 2.3398, "learning_rate": 7.856878005893988e-07, "epoch": 1.3515151515151516, "percentage": 67.58, "elapsed_time": "0:56:20", "remaining_time": "0:27:02", "throughput": "1320.31", "total_tokens": 4463568} |
|
{"current_steps": 894, "total_steps": 1320, "loss": 2.3618, "learning_rate": 7.824101772321625e-07, "epoch": 1.3545454545454545, "percentage": 67.73, "elapsed_time": "0:56:27", "remaining_time": "0:26:54", "throughput": "1320.55", "total_tokens": 4472904} |
|
{"current_steps": 896, "total_steps": 1320, "loss": 2.1904, "learning_rate": 7.791421603977435e-07, "epoch": 1.3575757575757577, "percentage": 67.88, "elapsed_time": "0:56:35", "remaining_time": "0:26:46", "throughput": "1320.85", "total_tokens": 4484400} |
|
{"current_steps": 898, "total_steps": 1320, "loss": 2.3122, "learning_rate": 7.758838252658433e-07, "epoch": 1.3606060606060606, "percentage": 68.03, "elapsed_time": "0:56:41", "remaining_time": "0:26:38", "throughput": "1321.06", "total_tokens": 4493592} |
|
{"current_steps": 900, "total_steps": 1320, "loss": 2.4964, "learning_rate": 7.726352467934386e-07, "epoch": 1.3636363636363638, "percentage": 68.18, "elapsed_time": "0:56:47", "remaining_time": "0:26:30", "throughput": "1321.28", "total_tokens": 4502664} |
|
{"current_steps": 900, "total_steps": 1320, "eval_loss": 2.327789783477783, "epoch": 1.3636363636363638, "percentage": 68.18, "elapsed_time": "0:56:53", "remaining_time": "0:26:33", "throughput": "1319.02", "total_tokens": 4502664} |
|
{"current_steps": 902, "total_steps": 1320, "loss": 2.4142, "learning_rate": 7.693964997130581e-07, "epoch": 1.3666666666666667, "percentage": 68.33, "elapsed_time": "0:57:01", "remaining_time": "0:26:25", "throughput": "1318.55", "total_tokens": 4510920} |
|
{"current_steps": 904, "total_steps": 1320, "loss": 2.3751, "learning_rate": 7.661676585310618e-07, "epoch": 1.3696969696969696, "percentage": 68.48, "elapsed_time": "0:57:09", "remaining_time": "0:26:18", "throughput": "1318.88", "total_tokens": 4523208} |
|
{"current_steps": 906, "total_steps": 1320, "loss": 2.5808, "learning_rate": 7.629487975259276e-07, "epoch": 1.3727272727272728, "percentage": 68.64, "elapsed_time": "0:57:16", "remaining_time": "0:26:10", "throughput": "1319.09", "total_tokens": 4532520} |
|
{"current_steps": 908, "total_steps": 1320, "loss": 2.3199, "learning_rate": 7.597399907465431e-07, "epoch": 1.3757575757575757, "percentage": 68.79, "elapsed_time": "0:57:24", "remaining_time": "0:26:02", "throughput": "1319.37", "total_tokens": 4544688} |
|
{"current_steps": 910, "total_steps": 1320, "loss": 2.3752, "learning_rate": 7.565413120105009e-07, "epoch": 1.378787878787879, "percentage": 68.94, "elapsed_time": "0:57:31", "remaining_time": "0:25:54", "throughput": "1319.56", "total_tokens": 4554000} |
|
{"current_steps": 912, "total_steps": 1320, "loss": 2.3512, "learning_rate": 7.533528349024014e-07, "epoch": 1.3818181818181818, "percentage": 69.09, "elapsed_time": "0:57:38", "remaining_time": "0:25:47", "throughput": "1319.81", "total_tokens": 4564368} |
|
{"current_steps": 914, "total_steps": 1320, "loss": 2.3772, "learning_rate": 7.5017463277216e-07, "epoch": 1.3848484848484848, "percentage": 69.24, "elapsed_time": "0:57:45", "remaining_time": "0:25:39", "throughput": "1320.03", "total_tokens": 4574448} |
|
{"current_steps": 916, "total_steps": 1320, "loss": 2.4036, "learning_rate": 7.470067787333188e-07, "epoch": 1.387878787878788, "percentage": 69.39, "elapsed_time": "0:57:51", "remaining_time": "0:25:30", "throughput": "1320.20", "total_tokens": 4582464} |
|
{"current_steps": 918, "total_steps": 1320, "loss": 2.3063, "learning_rate": 7.43849345661367e-07, "epoch": 1.3909090909090909, "percentage": 69.55, "elapsed_time": "0:57:58", "remaining_time": "0:25:23", "throughput": "1320.46", "total_tokens": 4592976} |
|
{"current_steps": 920, "total_steps": 1320, "loss": 2.4129, "learning_rate": 7.407024061920599e-07, "epoch": 1.393939393939394, "percentage": 69.7, "elapsed_time": "0:58:05", "remaining_time": "0:25:15", "throughput": "1320.73", "total_tokens": 4603920} |
|
{"current_steps": 920, "total_steps": 1320, "eval_loss": 2.32749080657959, "epoch": 1.393939393939394, "percentage": 69.7, "elapsed_time": "0:58:11", "remaining_time": "0:25:18", "throughput": "1318.52", "total_tokens": 4603920} |
|
{"current_steps": 922, "total_steps": 1320, "loss": 2.3207, "learning_rate": 7.375660327197534e-07, "epoch": 1.396969696969697, "percentage": 69.85, "elapsed_time": "0:58:18", "remaining_time": "0:25:10", "throughput": "1318.74", "total_tokens": 4614072} |
|
{"current_steps": 924, "total_steps": 1320, "loss": 2.4536, "learning_rate": 7.344402973957346e-07, "epoch": 1.4, "percentage": 70.0, "elapsed_time": "0:58:24", "remaining_time": "0:25:02", "throughput": "1318.93", "total_tokens": 4622640} |
|
{"current_steps": 926, "total_steps": 1320, "loss": 2.5495, "learning_rate": 7.313252721265638e-07, "epoch": 1.403030303030303, "percentage": 70.15, "elapsed_time": "0:58:32", "remaining_time": "0:24:54", "throughput": "1319.24", "total_tokens": 4634040} |
|
{"current_steps": 928, "total_steps": 1320, "loss": 2.4487, "learning_rate": 7.282210285724195e-07, "epoch": 1.406060606060606, "percentage": 70.3, "elapsed_time": "0:58:39", "remaining_time": "0:24:46", "throughput": "1319.50", "total_tokens": 4644192} |
|
{"current_steps": 930, "total_steps": 1320, "loss": 2.5896, "learning_rate": 7.251276381454506e-07, "epoch": 1.4090909090909092, "percentage": 70.45, "elapsed_time": "0:58:46", "remaining_time": "0:24:38", "throughput": "1319.71", "total_tokens": 4653720} |
|
{"current_steps": 932, "total_steps": 1320, "loss": 2.261, "learning_rate": 7.22045172008133e-07, "epoch": 1.412121212121212, "percentage": 70.61, "elapsed_time": "0:58:54", "remaining_time": "0:24:31", "throughput": "1320.00", "total_tokens": 4666008} |
|
{"current_steps": 934, "total_steps": 1320, "loss": 2.384, "learning_rate": 7.189737010716326e-07, "epoch": 1.415151515151515, "percentage": 70.76, "elapsed_time": "0:59:01", "remaining_time": "0:24:23", "throughput": "1320.20", "total_tokens": 4674936} |
|
{"current_steps": 936, "total_steps": 1320, "loss": 2.4542, "learning_rate": 7.159132959941745e-07, "epoch": 1.4181818181818182, "percentage": 70.91, "elapsed_time": "0:59:07", "remaining_time": "0:24:15", "throughput": "1320.41", "total_tokens": 4684272} |
|
{"current_steps": 938, "total_steps": 1320, "loss": 2.3937, "learning_rate": 7.128640271794171e-07, "epoch": 1.4212121212121211, "percentage": 71.06, "elapsed_time": "0:59:15", "remaining_time": "0:24:07", "throughput": "1320.70", "total_tokens": 4695576} |
|
{"current_steps": 940, "total_steps": 1320, "loss": 2.2943, "learning_rate": 7.098259647748328e-07, "epoch": 1.4242424242424243, "percentage": 71.21, "elapsed_time": "0:59:22", "remaining_time": "0:24:00", "throughput": "1320.92", "total_tokens": 4705800} |
|
{"current_steps": 940, "total_steps": 1320, "eval_loss": 2.3277194499969482, "epoch": 1.4242424242424243, "percentage": 71.21, "elapsed_time": "0:59:28", "remaining_time": "0:24:02", "throughput": "1318.76", "total_tokens": 4705800} |
|
{"current_steps": 942, "total_steps": 1320, "loss": 2.3552, "learning_rate": 7.067991786700929e-07, "epoch": 1.4272727272727272, "percentage": 71.36, "elapsed_time": "0:59:36", "remaining_time": "0:23:55", "throughput": "1319.05", "total_tokens": 4718088} |
|
{"current_steps": 944, "total_steps": 1320, "loss": 2.4507, "learning_rate": 7.037837384954625e-07, "epoch": 1.4303030303030302, "percentage": 71.52, "elapsed_time": "0:59:44", "remaining_time": "0:23:47", "throughput": "1319.31", "total_tokens": 4729536} |
|
{"current_steps": 946, "total_steps": 1320, "loss": 2.4813, "learning_rate": 7.007797136201966e-07, "epoch": 1.4333333333333333, "percentage": 71.67, "elapsed_time": "0:59:50", "remaining_time": "0:23:39", "throughput": "1319.50", "total_tokens": 4738272} |
|
{"current_steps": 948, "total_steps": 1320, "loss": 2.4679, "learning_rate": 6.977871731509438e-07, "epoch": 1.4363636363636363, "percentage": 71.82, "elapsed_time": "0:59:57", "remaining_time": "0:23:31", "throughput": "1319.72", "total_tokens": 4747488} |
|
{"current_steps": 950, "total_steps": 1320, "loss": 2.5084, "learning_rate": 6.948061859301593e-07, "epoch": 1.4393939393939394, "percentage": 71.97, "elapsed_time": "1:00:03", "remaining_time": "0:23:23", "throughput": "1319.89", "total_tokens": 4756032} |
|
{"current_steps": 952, "total_steps": 1320, "loss": 2.3797, "learning_rate": 6.918368205345182e-07, "epoch": 1.4424242424242424, "percentage": 72.12, "elapsed_time": "1:00:10", "remaining_time": "0:23:15", "throughput": "1320.15", "total_tokens": 4766904} |
|
{"current_steps": 954, "total_steps": 1320, "loss": 2.4923, "learning_rate": 6.888791452733397e-07, "epoch": 1.4454545454545453, "percentage": 72.27, "elapsed_time": "1:00:18", "remaining_time": "0:23:08", "throughput": "1320.41", "total_tokens": 4777680} |
|
{"current_steps": 956, "total_steps": 1320, "loss": 2.5362, "learning_rate": 6.859332281870147e-07, "epoch": 1.4484848484848485, "percentage": 72.42, "elapsed_time": "1:00:25", "remaining_time": "0:23:00", "throughput": "1320.65", "total_tokens": 4788432} |
|
{"current_steps": 958, "total_steps": 1320, "loss": 2.433, "learning_rate": 6.829991370454411e-07, "epoch": 1.4515151515151516, "percentage": 72.58, "elapsed_time": "1:00:33", "remaining_time": "0:22:53", "throughput": "1320.90", "total_tokens": 4799712} |
|
{"current_steps": 960, "total_steps": 1320, "loss": 2.362, "learning_rate": 6.800769393464656e-07, "epoch": 1.4545454545454546, "percentage": 72.73, "elapsed_time": "1:00:39", "remaining_time": "0:22:44", "throughput": "1321.09", "total_tokens": 4808688} |
|
{"current_steps": 960, "total_steps": 1320, "eval_loss": 2.3274452686309814, "epoch": 1.4545454545454546, "percentage": 72.73, "elapsed_time": "1:00:45", "remaining_time": "0:22:47", "throughput": "1318.98", "total_tokens": 4808688} |
|
{"current_steps": 962, "total_steps": 1320, "loss": 2.5027, "learning_rate": 6.771667023143284e-07, "epoch": 1.4575757575757575, "percentage": 72.88, "elapsed_time": "1:00:51", "remaining_time": "0:22:38", "throughput": "1319.12", "total_tokens": 4817136} |
|
{"current_steps": 964, "total_steps": 1320, "loss": 2.6941, "learning_rate": 6.742684928981188e-07, "epoch": 1.4606060606060607, "percentage": 73.03, "elapsed_time": "1:01:00", "remaining_time": "0:22:31", "throughput": "1319.42", "total_tokens": 4829112} |
|
{"current_steps": 966, "total_steps": 1320, "loss": 2.2785, "learning_rate": 6.713823777702359e-07, "epoch": 1.4636363636363636, "percentage": 73.18, "elapsed_time": "1:01:06", "remaining_time": "0:22:23", "throughput": "1319.62", "total_tokens": 4838664} |
|
{"current_steps": 968, "total_steps": 1320, "loss": 2.502, "learning_rate": 6.685084233248517e-07, "epoch": 1.4666666666666668, "percentage": 73.33, "elapsed_time": "1:01:12", "remaining_time": "0:22:15", "throughput": "1319.73", "total_tokens": 4846656} |
|
{"current_steps": 970, "total_steps": 1320, "loss": 2.4094, "learning_rate": 6.656466956763864e-07, "epoch": 1.4696969696969697, "percentage": 73.48, "elapsed_time": "1:01:18", "remaining_time": "0:22:07", "throughput": "1319.91", "total_tokens": 4855296} |
|
{"current_steps": 972, "total_steps": 1320, "loss": 2.3646, "learning_rate": 6.627972606579866e-07, "epoch": 1.4727272727272727, "percentage": 73.64, "elapsed_time": "1:01:26", "remaining_time": "0:22:00", "throughput": "1320.21", "total_tokens": 4867584} |
|
{"current_steps": 974, "total_steps": 1320, "loss": 2.3642, "learning_rate": 6.599601838200104e-07, "epoch": 1.4757575757575758, "percentage": 73.79, "elapsed_time": "1:01:35", "remaining_time": "0:21:52", "throughput": "1320.51", "total_tokens": 4879584} |
|
{"current_steps": 976, "total_steps": 1320, "loss": 2.571, "learning_rate": 6.571355304285202e-07, "epoch": 1.4787878787878788, "percentage": 73.94, "elapsed_time": "1:01:42", "remaining_time": "0:21:44", "throughput": "1320.73", "total_tokens": 4889976} |
|
{"current_steps": 978, "total_steps": 1320, "loss": 2.5749, "learning_rate": 6.543233654637804e-07, "epoch": 1.481818181818182, "percentage": 74.09, "elapsed_time": "1:01:48", "remaining_time": "0:21:36", "throughput": "1320.92", "total_tokens": 4899048} |
|
{"current_steps": 980, "total_steps": 1320, "loss": 2.2386, "learning_rate": 6.515237536187644e-07, "epoch": 1.4848484848484849, "percentage": 74.24, "elapsed_time": "1:01:56", "remaining_time": "0:21:29", "throughput": "1321.17", "total_tokens": 4910088} |
|
{"current_steps": 980, "total_steps": 1320, "eval_loss": 2.3277652263641357, "epoch": 1.4848484848484849, "percentage": 74.24, "elapsed_time": "1:02:02", "remaining_time": "0:21:31", "throughput": "1319.10", "total_tokens": 4910088} |
|
{"current_steps": 982, "total_steps": 1320, "loss": 2.5641, "learning_rate": 6.487367592976633e-07, "epoch": 1.4878787878787878, "percentage": 74.39, "elapsed_time": "1:02:10", "remaining_time": "0:21:24", "throughput": "1319.38", "total_tokens": 4922376} |
|
{"current_steps": 984, "total_steps": 1320, "loss": 2.298, "learning_rate": 6.459624466144067e-07, "epoch": 1.490909090909091, "percentage": 74.55, "elapsed_time": "1:02:19", "remaining_time": "0:21:16", "throughput": "1319.66", "total_tokens": 4934664} |
|
{"current_steps": 986, "total_steps": 1320, "loss": 2.3938, "learning_rate": 6.432008793911877e-07, "epoch": 1.493939393939394, "percentage": 74.7, "elapsed_time": "1:02:25", "remaining_time": "0:21:08", "throughput": "1319.83", "total_tokens": 4943352} |
|
{"current_steps": 988, "total_steps": 1320, "loss": 2.421, "learning_rate": 6.404521211569937e-07, "epoch": 1.496969696969697, "percentage": 74.85, "elapsed_time": "1:02:32", "remaining_time": "0:21:01", "throughput": "1320.06", "total_tokens": 4953888} |
|
{"current_steps": 990, "total_steps": 1320, "loss": 2.1273, "learning_rate": 6.377162351461442e-07, "epoch": 1.5, "percentage": 75.0, "elapsed_time": "1:02:40", "remaining_time": "0:20:53", "throughput": "1320.32", "total_tokens": 4965024} |
|
{"current_steps": 992, "total_steps": 1320, "loss": 2.3928, "learning_rate": 6.349932842968391e-07, "epoch": 1.503030303030303, "percentage": 75.15, "elapsed_time": "1:02:48", "remaining_time": "0:20:46", "throughput": "1320.57", "total_tokens": 4977216} |
|
{"current_steps": 994, "total_steps": 1320, "loss": 2.3595, "learning_rate": 6.322833312497082e-07, "epoch": 1.506060606060606, "percentage": 75.3, "elapsed_time": "1:02:55", "remaining_time": "0:20:38", "throughput": "1320.74", "total_tokens": 4986720} |
|
{"current_steps": 996, "total_steps": 1320, "loss": 2.5852, "learning_rate": 6.295864383463705e-07, "epoch": 1.509090909090909, "percentage": 75.45, "elapsed_time": "1:03:01", "remaining_time": "0:20:30", "throughput": "1320.93", "total_tokens": 4995072} |
|
{"current_steps": 998, "total_steps": 1320, "loss": 2.4611, "learning_rate": 6.269026676280008e-07, "epoch": 1.5121212121212122, "percentage": 75.61, "elapsed_time": "1:03:07", "remaining_time": "0:20:21", "throughput": "1321.09", "total_tokens": 5003256} |
|
{"current_steps": 1000, "total_steps": 1320, "loss": 2.0949, "learning_rate": 6.242320808339023e-07, "epoch": 1.5151515151515151, "percentage": 75.76, "elapsed_time": "1:03:12", "remaining_time": "0:20:13", "throughput": "1321.22", "total_tokens": 5010864} |
|
{"current_steps": 1000, "total_steps": 1320, "eval_loss": 2.3277881145477295, "epoch": 1.5151515151515151, "percentage": 75.76, "elapsed_time": "1:03:18", "remaining_time": "0:20:15", "throughput": "1319.19", "total_tokens": 5010864} |
|
{"current_steps": 1002, "total_steps": 1320, "loss": 2.2478, "learning_rate": 6.215747394000864e-07, "epoch": 1.518181818181818, "percentage": 75.91, "elapsed_time": "1:03:27", "remaining_time": "0:20:08", "throughput": "1318.78", "total_tokens": 5021400} |
|
{"current_steps": 1004, "total_steps": 1320, "loss": 2.1912, "learning_rate": 6.189307044578585e-07, "epoch": 1.5212121212121212, "percentage": 76.06, "elapsed_time": "1:03:34", "remaining_time": "0:20:00", "throughput": "1318.98", "total_tokens": 5031576} |
|
{"current_steps": 1006, "total_steps": 1320, "loss": 2.3441, "learning_rate": 6.163000368324124e-07, "epoch": 1.5242424242424244, "percentage": 76.21, "elapsed_time": "1:03:42", "remaining_time": "0:19:52", "throughput": "1319.20", "total_tokens": 5042136} |
|
{"current_steps": 1008, "total_steps": 1320, "loss": 2.3444, "learning_rate": 6.136827970414317e-07, "epoch": 1.5272727272727273, "percentage": 76.36, "elapsed_time": "1:03:49", "remaining_time": "0:19:45", "throughput": "1319.40", "total_tokens": 5052480} |
|
{"current_steps": 1010, "total_steps": 1320, "loss": 2.5014, "learning_rate": 6.11079045293696e-07, "epoch": 1.5303030303030303, "percentage": 76.52, "elapsed_time": "1:03:56", "remaining_time": "0:19:37", "throughput": "1319.62", "total_tokens": 5062872} |
|
{"current_steps": 1012, "total_steps": 1320, "loss": 2.2427, "learning_rate": 6.084888414876976e-07, "epoch": 1.5333333333333332, "percentage": 76.67, "elapsed_time": "1:04:04", "remaining_time": "0:19:29", "throughput": "1319.85", "total_tokens": 5073744} |
|
{"current_steps": 1014, "total_steps": 1320, "loss": 2.3813, "learning_rate": 6.059122452102618e-07, "epoch": 1.5363636363636364, "percentage": 76.82, "elapsed_time": "1:04:10", "remaining_time": "0:19:21", "throughput": "1320.05", "total_tokens": 5082432} |
|
{"current_steps": 1016, "total_steps": 1320, "loss": 2.6378, "learning_rate": 6.033493157351772e-07, "epoch": 1.5393939393939395, "percentage": 76.97, "elapsed_time": "1:04:17", "remaining_time": "0:19:14", "throughput": "1320.26", "total_tokens": 5092848} |
|
{"current_steps": 1018, "total_steps": 1320, "loss": 2.4006, "learning_rate": 6.008001120218322e-07, "epoch": 1.5424242424242425, "percentage": 77.12, "elapsed_time": "1:04:26", "remaining_time": "0:19:06", "throughput": "1320.52", "total_tokens": 5105136} |
|
{"current_steps": 1020, "total_steps": 1320, "loss": 2.5504, "learning_rate": 5.982646927138584e-07, "epoch": 1.5454545454545454, "percentage": 77.27, "elapsed_time": "1:04:32", "remaining_time": "0:18:58", "throughput": "1320.69", "total_tokens": 5114064} |
|
{"current_steps": 1020, "total_steps": 1320, "eval_loss": 2.3275692462921143, "epoch": 1.5454545454545454, "percentage": 77.27, "elapsed_time": "1:04:38", "remaining_time": "0:19:00", "throughput": "1318.70", "total_tokens": 5114064} |
|
{"current_steps": 1022, "total_steps": 1320, "loss": 2.4085, "learning_rate": 5.957431161377809e-07, "epoch": 1.5484848484848484, "percentage": 77.42, "elapsed_time": "1:04:46", "remaining_time": "0:18:53", "throughput": "1318.95", "total_tokens": 5125872} |
|
{"current_steps": 1024, "total_steps": 1320, "loss": 2.263, "learning_rate": 5.932354403016777e-07, "epoch": 1.5515151515151515, "percentage": 77.58, "elapsed_time": "1:04:52", "remaining_time": "0:18:45", "throughput": "1319.14", "total_tokens": 5135208} |
|
{"current_steps": 1026, "total_steps": 1320, "loss": 2.352, "learning_rate": 5.907417228938442e-07, "epoch": 1.5545454545454547, "percentage": 77.73, "elapsed_time": "1:05:00", "remaining_time": "0:18:37", "throughput": "1319.40", "total_tokens": 5146896} |
|
{"current_steps": 1028, "total_steps": 1320, "loss": 2.3172, "learning_rate": 5.88262021281467e-07, "epoch": 1.5575757575757576, "percentage": 77.88, "elapsed_time": "1:05:09", "remaining_time": "0:18:30", "throughput": "1319.66", "total_tokens": 5159184} |
|
{"current_steps": 1030, "total_steps": 1320, "loss": 2.4402, "learning_rate": 5.857963925093034e-07, "epoch": 1.5606060606060606, "percentage": 78.03, "elapsed_time": "1:05:15", "remaining_time": "0:18:22", "throughput": "1319.83", "total_tokens": 5167656} |
|
{"current_steps": 1032, "total_steps": 1320, "loss": 2.5926, "learning_rate": 5.833448932983693e-07, "epoch": 1.5636363636363635, "percentage": 78.18, "elapsed_time": "1:05:23", "remaining_time": "0:18:14", "throughput": "1320.09", "total_tokens": 5179680} |
|
{"current_steps": 1034, "total_steps": 1320, "loss": 2.5999, "learning_rate": 5.809075800446348e-07, "epoch": 1.5666666666666667, "percentage": 78.33, "elapsed_time": "1:05:31", "remaining_time": "0:18:07", "throughput": "1320.31", "total_tokens": 5190216} |
|
{"current_steps": 1036, "total_steps": 1320, "loss": 2.379, "learning_rate": 5.784845088177263e-07, "epoch": 1.5696969696969698, "percentage": 78.48, "elapsed_time": "1:05:38", "remaining_time": "0:17:59", "throughput": "1320.56", "total_tokens": 5201592} |
|
{"current_steps": 1038, "total_steps": 1320, "loss": 2.3246, "learning_rate": 5.760757353596371e-07, "epoch": 1.5727272727272728, "percentage": 78.64, "elapsed_time": "1:05:46", "remaining_time": "0:17:52", "throughput": "1320.80", "total_tokens": 5213040} |
|
{"current_steps": 1040, "total_steps": 1320, "loss": 2.4542, "learning_rate": 5.736813150834447e-07, "epoch": 1.5757575757575757, "percentage": 78.79, "elapsed_time": "1:05:54", "remaining_time": "0:17:44", "throughput": "1321.02", "total_tokens": 5223360} |
|
{"current_steps": 1040, "total_steps": 1320, "eval_loss": 2.3277275562286377, "epoch": 1.5757575757575757, "percentage": 78.79, "elapsed_time": "1:05:59", "remaining_time": "0:17:46", "throughput": "1319.07", "total_tokens": 5223360} |
|
{"current_steps": 1042, "total_steps": 1320, "loss": 2.3253, "learning_rate": 5.713013030720356e-07, "epoch": 1.5787878787878786, "percentage": 78.94, "elapsed_time": "1:06:08", "remaining_time": "0:17:38", "throughput": "1319.32", "total_tokens": 5235480} |
|
{"current_steps": 1044, "total_steps": 1320, "loss": 2.3232, "learning_rate": 5.6893575407684e-07, "epoch": 1.5818181818181818, "percentage": 79.09, "elapsed_time": "1:06:15", "remaining_time": "0:17:31", "throughput": "1319.54", "total_tokens": 5246280} |
|
{"current_steps": 1046, "total_steps": 1320, "loss": 2.323, "learning_rate": 5.665847225165695e-07, "epoch": 1.584848484848485, "percentage": 79.24, "elapsed_time": "1:06:23", "remaining_time": "0:17:23", "throughput": "1319.77", "total_tokens": 5257248} |
|
{"current_steps": 1048, "total_steps": 1320, "loss": 2.6128, "learning_rate": 5.642482624759672e-07, "epoch": 1.587878787878788, "percentage": 79.39, "elapsed_time": "1:06:31", "remaining_time": "0:17:15", "throughput": "1320.02", "total_tokens": 5268264} |
|
{"current_steps": 1050, "total_steps": 1320, "loss": 2.5484, "learning_rate": 5.619264277045634e-07, "epoch": 1.5909090909090908, "percentage": 79.55, "elapsed_time": "1:06:39", "remaining_time": "0:17:08", "throughput": "1320.26", "total_tokens": 5280432} |
|
{"current_steps": 1052, "total_steps": 1320, "loss": 2.5, "learning_rate": 5.596192716154385e-07, "epoch": 1.593939393939394, "percentage": 79.7, "elapsed_time": "1:06:46", "remaining_time": "0:17:00", "throughput": "1320.45", "total_tokens": 5290488} |
|
{"current_steps": 1054, "total_steps": 1320, "loss": 2.4814, "learning_rate": 5.573268472839937e-07, "epoch": 1.596969696969697, "percentage": 79.85, "elapsed_time": "1:06:52", "remaining_time": "0:16:52", "throughput": "1320.64", "total_tokens": 5299536} |
|
{"current_steps": 1056, "total_steps": 1320, "loss": 2.4972, "learning_rate": 5.550492074467317e-07, "epoch": 1.6, "percentage": 80.0, "elapsed_time": "1:06:59", "remaining_time": "0:16:44", "throughput": "1320.87", "total_tokens": 5309544} |
|
{"current_steps": 1058, "total_steps": 1320, "loss": 2.5041, "learning_rate": 5.527864045000421e-07, "epoch": 1.603030303030303, "percentage": 80.15, "elapsed_time": "1:07:06", "remaining_time": "0:16:37", "throughput": "1321.04", "total_tokens": 5319024} |
|
{"current_steps": 1060, "total_steps": 1320, "loss": 2.3262, "learning_rate": 5.505384904989965e-07, "epoch": 1.606060606060606, "percentage": 80.3, "elapsed_time": "1:07:13", "remaining_time": "0:16:29", "throughput": "1321.23", "total_tokens": 5329752} |
|
{"current_steps": 1060, "total_steps": 1320, "eval_loss": 2.327099323272705, "epoch": 1.606060606060606, "percentage": 80.3, "elapsed_time": "1:07:19", "remaining_time": "0:16:30", "throughput": "1319.32", "total_tokens": 5329752} |
|
{"current_steps": 1062, "total_steps": 1320, "loss": 2.2181, "learning_rate": 5.483055171561511e-07, "epoch": 1.6090909090909091, "percentage": 80.45, "elapsed_time": "1:07:27", "remaining_time": "0:16:23", "throughput": "1319.50", "total_tokens": 5340552} |
|
{"current_steps": 1064, "total_steps": 1320, "loss": 2.3349, "learning_rate": 5.460875358403565e-07, "epoch": 1.612121212121212, "percentage": 80.61, "elapsed_time": "1:07:34", "remaining_time": "0:16:15", "throughput": "1319.69", "total_tokens": 5350320} |
|
{"current_steps": 1066, "total_steps": 1320, "loss": 2.4784, "learning_rate": 5.438845975755772e-07, "epoch": 1.6151515151515152, "percentage": 80.76, "elapsed_time": "1:07:38", "remaining_time": "0:16:07", "throughput": "1319.79", "total_tokens": 5356608} |
|
{"current_steps": 1068, "total_steps": 1320, "loss": 2.2265, "learning_rate": 5.416967530397164e-07, "epoch": 1.6181818181818182, "percentage": 80.91, "elapsed_time": "1:07:45", "remaining_time": "0:15:59", "throughput": "1320.00", "total_tokens": 5366568} |
|
{"current_steps": 1070, "total_steps": 1320, "loss": 2.4877, "learning_rate": 5.395240525634511e-07, "epoch": 1.621212121212121, "percentage": 81.06, "elapsed_time": "1:07:54", "remaining_time": "0:15:51", "throughput": "1320.25", "total_tokens": 5378856} |
|
{"current_steps": 1072, "total_steps": 1320, "loss": 2.3169, "learning_rate": 5.37366546129074e-07, "epoch": 1.6242424242424243, "percentage": 81.21, "elapsed_time": "1:08:02", "remaining_time": "0:15:44", "throughput": "1320.50", "total_tokens": 5391120} |
|
{"current_steps": 1074, "total_steps": 1320, "loss": 2.4456, "learning_rate": 5.35224283369343e-07, "epoch": 1.6272727272727274, "percentage": 81.36, "elapsed_time": "1:08:07", "remaining_time": "0:15:36", "throughput": "1320.63", "total_tokens": 5398752} |
|
{"current_steps": 1076, "total_steps": 1320, "loss": 2.5053, "learning_rate": 5.330973135663411e-07, "epoch": 1.6303030303030304, "percentage": 81.52, "elapsed_time": "1:08:16", "remaining_time": "0:15:28", "throughput": "1320.88", "total_tokens": 5411040} |
|
{"current_steps": 1078, "total_steps": 1320, "loss": 2.4062, "learning_rate": 5.309856856503409e-07, "epoch": 1.6333333333333333, "percentage": 81.67, "elapsed_time": "1:08:24", "remaining_time": "0:15:21", "throughput": "1321.12", "total_tokens": 5422848} |
|
{"current_steps": 1080, "total_steps": 1320, "loss": 2.2278, "learning_rate": 5.2888944819868e-07, "epoch": 1.6363636363636362, "percentage": 81.82, "elapsed_time": "1:08:33", "remaining_time": "0:15:14", "throughput": "1321.36", "total_tokens": 5435136} |
|
{"current_steps": 1080, "total_steps": 1320, "eval_loss": 2.32759428024292, "epoch": 1.6363636363636362, "percentage": 81.82, "elapsed_time": "1:08:39", "remaining_time": "0:15:15", "throughput": "1319.49", "total_tokens": 5435136} |
|
{"current_steps": 1082, "total_steps": 1320, "loss": 2.3976, "learning_rate": 5.26808649434643e-07, "epoch": 1.6393939393939394, "percentage": 81.97, "elapsed_time": "1:08:46", "remaining_time": "0:15:07", "throughput": "1319.67", "total_tokens": 5445672} |
|
{"current_steps": 1084, "total_steps": 1320, "loss": 2.4648, "learning_rate": 5.247433372263522e-07, "epoch": 1.6424242424242426, "percentage": 82.12, "elapsed_time": "1:08:54", "remaining_time": "0:15:00", "throughput": "1319.89", "total_tokens": 5456640} |
|
{"current_steps": 1086, "total_steps": 1320, "loss": 2.2962, "learning_rate": 5.226935590856675e-07, "epoch": 1.6454545454545455, "percentage": 82.27, "elapsed_time": "1:09:00", "remaining_time": "0:14:52", "throughput": "1320.07", "total_tokens": 5465976} |
|
{"current_steps": 1088, "total_steps": 1320, "loss": 2.3107, "learning_rate": 5.20659362167091e-07, "epoch": 1.6484848484848484, "percentage": 82.42, "elapsed_time": "1:09:08", "remaining_time": "0:14:44", "throughput": "1320.29", "total_tokens": 5477016} |
|
{"current_steps": 1090, "total_steps": 1320, "loss": 2.2394, "learning_rate": 5.186407932666846e-07, "epoch": 1.6515151515151514, "percentage": 82.58, "elapsed_time": "1:09:15", "remaining_time": "0:14:36", "throughput": "1320.49", "total_tokens": 5487504} |
|
{"current_steps": 1092, "total_steps": 1320, "loss": 2.6481, "learning_rate": 5.166378988209924e-07, "epoch": 1.6545454545454545, "percentage": 82.73, "elapsed_time": "1:09:21", "remaining_time": "0:14:28", "throughput": "1320.67", "total_tokens": 5496600} |
|
{"current_steps": 1094, "total_steps": 1320, "loss": 2.5754, "learning_rate": 5.146507249059727e-07, "epoch": 1.6575757575757577, "percentage": 82.88, "elapsed_time": "1:09:28", "remaining_time": "0:14:21", "throughput": "1320.86", "total_tokens": 5506416} |
|
{"current_steps": 1096, "total_steps": 1320, "loss": 2.3295, "learning_rate": 5.126793172359373e-07, "epoch": 1.6606060606060606, "percentage": 83.03, "elapsed_time": "1:09:34", "remaining_time": "0:14:13", "throughput": "1321.01", "total_tokens": 5514600} |
|
{"current_steps": 1098, "total_steps": 1320, "loss": 2.3752, "learning_rate": 5.107237211625016e-07, "epoch": 1.6636363636363636, "percentage": 83.18, "elapsed_time": "1:09:40", "remaining_time": "0:14:05", "throughput": "1321.15", "total_tokens": 5522616} |
|
{"current_steps": 1100, "total_steps": 1320, "loss": 2.2484, "learning_rate": 5.087839816735391e-07, "epoch": 1.6666666666666665, "percentage": 83.33, "elapsed_time": "1:09:47", "remaining_time": "0:13:57", "throughput": "1321.36", "total_tokens": 5533488} |
|
{"current_steps": 1100, "total_steps": 1320, "eval_loss": 2.326948642730713, "epoch": 1.6666666666666665, "percentage": 83.33, "elapsed_time": "1:09:53", "remaining_time": "0:13:58", "throughput": "1319.52", "total_tokens": 5533488} |
|
{"current_steps": 1102, "total_steps": 1320, "loss": 2.392, "learning_rate": 5.068601433921479e-07, "epoch": 1.6696969696969697, "percentage": 83.48, "elapsed_time": "1:10:03", "remaining_time": "0:13:51", "throughput": "1319.20", "total_tokens": 5544864} |
|
{"current_steps": 1104, "total_steps": 1320, "loss": 2.4998, "learning_rate": 5.04952250575624e-07, "epoch": 1.6727272727272728, "percentage": 83.64, "elapsed_time": "1:10:10", "remaining_time": "0:13:43", "throughput": "1319.43", "total_tokens": 5555928} |
|
{"current_steps": 1106, "total_steps": 1320, "loss": 2.3944, "learning_rate": 5.030603471144432e-07, "epoch": 1.6757575757575758, "percentage": 83.79, "elapsed_time": "1:10:18", "remaining_time": "0:13:36", "throughput": "1319.65", "total_tokens": 5567088} |
|
{"current_steps": 1108, "total_steps": 1320, "loss": 2.487, "learning_rate": 5.011844765312504e-07, "epoch": 1.6787878787878787, "percentage": 83.94, "elapsed_time": "1:10:26", "remaining_time": "0:13:28", "throughput": "1319.88", "total_tokens": 5578128} |
|
{"current_steps": 1110, "total_steps": 1320, "loss": 2.5279, "learning_rate": 4.9932468197986e-07, "epoch": 1.6818181818181817, "percentage": 84.09, "elapsed_time": "1:10:33", "remaining_time": "0:13:20", "throughput": "1320.07", "total_tokens": 5588952} |
|
{"current_steps": 1112, "total_steps": 1320, "loss": 2.4558, "learning_rate": 4.974810062442615e-07, "epoch": 1.6848484848484848, "percentage": 84.24, "elapsed_time": "1:10:41", "remaining_time": "0:13:13", "throughput": "1320.29", "total_tokens": 5599992} |
|
{"current_steps": 1114, "total_steps": 1320, "loss": 2.3407, "learning_rate": 4.956534917376373e-07, "epoch": 1.687878787878788, "percentage": 84.39, "elapsed_time": "1:10:49", "remaining_time": "0:13:05", "throughput": "1320.53", "total_tokens": 5611752} |
|
{"current_steps": 1116, "total_steps": 1320, "loss": 2.4067, "learning_rate": 4.938421805013844e-07, "epoch": 1.690909090909091, "percentage": 84.55, "elapsed_time": "1:10:54", "remaining_time": "0:12:57", "throughput": "1320.64", "total_tokens": 5619072} |
|
{"current_steps": 1118, "total_steps": 1320, "loss": 2.2224, "learning_rate": 4.920471142041496e-07, "epoch": 1.6939393939393939, "percentage": 84.7, "elapsed_time": "1:11:02", "remaining_time": "0:12:50", "throughput": "1320.85", "total_tokens": 5629824} |
|
{"current_steps": 1120, "total_steps": 1320, "loss": 2.4764, "learning_rate": 4.902683341408698e-07, "epoch": 1.696969696969697, "percentage": 84.85, "elapsed_time": "1:11:08", "remaining_time": "0:12:42", "throughput": "1321.02", "total_tokens": 5639376} |
|
{"current_steps": 1120, "total_steps": 1320, "eval_loss": 2.327069044113159, "epoch": 1.696969696969697, "percentage": 84.85, "elapsed_time": "1:11:14", "remaining_time": "0:12:43", "throughput": "1319.22", "total_tokens": 5639376} |
|
{"current_steps": 1122, "total_steps": 1320, "loss": 2.4383, "learning_rate": 4.88505881231822e-07, "epoch": 1.7, "percentage": 85.0, "elapsed_time": "1:11:21", "remaining_time": "0:12:35", "throughput": "1319.42", "total_tokens": 5649624} |
|
{"current_steps": 1124, "total_steps": 1320, "loss": 2.3752, "learning_rate": 4.867597960216823e-07, "epoch": 1.7030303030303031, "percentage": 85.15, "elapsed_time": "1:11:28", "remaining_time": "0:12:27", "throughput": "1319.61", "total_tokens": 5659800} |
|
{"current_steps": 1126, "total_steps": 1320, "loss": 2.2427, "learning_rate": 4.85030118678593e-07, "epoch": 1.706060606060606, "percentage": 85.3, "elapsed_time": "1:11:35", "remaining_time": "0:12:19", "throughput": "1319.74", "total_tokens": 5668296} |
|
{"current_steps": 1128, "total_steps": 1320, "loss": 2.3027, "learning_rate": 4.833168889932384e-07, "epoch": 1.709090909090909, "percentage": 85.45, "elapsed_time": "1:11:42", "remaining_time": "0:12:12", "throughput": "1319.94", "total_tokens": 5678832} |
|
{"current_steps": 1130, "total_steps": 1320, "loss": 2.4966, "learning_rate": 4.816201463779299e-07, "epoch": 1.7121212121212122, "percentage": 85.61, "elapsed_time": "1:11:47", "remaining_time": "0:12:04", "throughput": "1320.07", "total_tokens": 5686872} |
|
{"current_steps": 1132, "total_steps": 1320, "loss": 2.5635, "learning_rate": 4.799399298656985e-07, "epoch": 1.7151515151515153, "percentage": 85.76, "elapsed_time": "1:11:55", "remaining_time": "0:11:56", "throughput": "1320.26", "total_tokens": 5697216} |
|
{"current_steps": 1134, "total_steps": 1320, "loss": 2.3295, "learning_rate": 4.782762781093983e-07, "epoch": 1.7181818181818183, "percentage": 85.91, "elapsed_time": "1:12:01", "remaining_time": "0:11:48", "throughput": "1320.43", "total_tokens": 5706840} |
|
{"current_steps": 1136, "total_steps": 1320, "loss": 2.3937, "learning_rate": 4.7662922938081575e-07, "epoch": 1.7212121212121212, "percentage": 86.06, "elapsed_time": "1:12:08", "remaining_time": "0:11:41", "throughput": "1320.58", "total_tokens": 5715816} |
|
{"current_steps": 1138, "total_steps": 1320, "loss": 2.3815, "learning_rate": 4.7499882156978934e-07, "epoch": 1.7242424242424241, "percentage": 86.21, "elapsed_time": "1:12:14", "remaining_time": "0:11:33", "throughput": "1320.73", "total_tokens": 5724456} |
|
{"current_steps": 1140, "total_steps": 1320, "loss": 2.3489, "learning_rate": 4.7338509218333966e-07, "epoch": 1.7272727272727273, "percentage": 86.36, "elapsed_time": "1:12:19", "remaining_time": "0:11:25", "throughput": "1320.87", "total_tokens": 5732496} |
|
{"current_steps": 1140, "total_steps": 1320, "eval_loss": 2.326911687850952, "epoch": 1.7272727272727273, "percentage": 86.36, "elapsed_time": "1:12:25", "remaining_time": "0:11:26", "throughput": "1319.10", "total_tokens": 5732496} |
|
{"current_steps": 1142, "total_steps": 1320, "loss": 2.2154, "learning_rate": 4.717880783448046e-07, "epoch": 1.7303030303030305, "percentage": 86.52, "elapsed_time": "1:12:34", "remaining_time": "0:11:18", "throughput": "1319.34", "total_tokens": 5744784} |
|
{"current_steps": 1144, "total_steps": 1320, "loss": 2.5799, "learning_rate": 4.7020781679298636e-07, "epoch": 1.7333333333333334, "percentage": 86.67, "elapsed_time": "1:12:39", "remaining_time": "0:11:10", "throughput": "1319.48", "total_tokens": 5752872} |
|
{"current_steps": 1146, "total_steps": 1320, "loss": 2.4051, "learning_rate": 4.6864434388130604e-07, "epoch": 1.7363636363636363, "percentage": 86.82, "elapsed_time": "1:12:46", "remaining_time": "0:11:03", "throughput": "1319.66", "total_tokens": 5762880} |
|
{"current_steps": 1148, "total_steps": 1320, "loss": 2.2532, "learning_rate": 4.6709769557696724e-07, "epoch": 1.7393939393939393, "percentage": 86.97, "elapsed_time": "1:12:54", "remaining_time": "0:10:55", "throughput": "1319.86", "total_tokens": 5773632} |
|
{"current_steps": 1150, "total_steps": 1320, "loss": 2.2363, "learning_rate": 4.6556790746012866e-07, "epoch": 1.7424242424242424, "percentage": 87.12, "elapsed_time": "1:13:02", "remaining_time": "0:10:47", "throughput": "1320.08", "total_tokens": 5784960} |
|
{"current_steps": 1152, "total_steps": 1320, "loss": 2.264, "learning_rate": 4.6405501472308593e-07, "epoch": 1.7454545454545456, "percentage": 87.27, "elapsed_time": "1:13:08", "remaining_time": "0:10:40", "throughput": "1320.25", "total_tokens": 5794392} |
|
{"current_steps": 1154, "total_steps": 1320, "loss": 2.6636, "learning_rate": 4.6255905216946174e-07, "epoch": 1.7484848484848485, "percentage": 87.42, "elapsed_time": "1:13:13", "remaining_time": "0:10:32", "throughput": "1320.36", "total_tokens": 5801088} |
|
{"current_steps": 1156, "total_steps": 1320, "loss": 2.3849, "learning_rate": 4.6108005421340517e-07, "epoch": 1.7515151515151515, "percentage": 87.58, "elapsed_time": "1:13:19", "remaining_time": "0:10:24", "throughput": "1320.53", "total_tokens": 5810232} |
|
{"current_steps": 1158, "total_steps": 1320, "loss": 2.1526, "learning_rate": 4.5961805487879993e-07, "epoch": 1.7545454545454544, "percentage": 87.73, "elapsed_time": "1:13:26", "remaining_time": "0:10:16", "throughput": "1320.69", "total_tokens": 5819976} |
|
{"current_steps": 1160, "total_steps": 1320, "loss": 2.3448, "learning_rate": 4.581730877984817e-07, "epoch": 1.7575757575757576, "percentage": 87.88, "elapsed_time": "1:13:34", "remaining_time": "0:10:08", "throughput": "1320.90", "total_tokens": 5831304} |
|
{"current_steps": 1160, "total_steps": 1320, "eval_loss": 2.326674699783325, "epoch": 1.7575757575757576, "percentage": 87.88, "elapsed_time": "1:13:40", "remaining_time": "0:10:09", "throughput": "1319.16", "total_tokens": 5831304} |
|
{"current_steps": 1162, "total_steps": 1320, "loss": 2.2982, "learning_rate": 4.567451862134651e-07, "epoch": 1.7606060606060607, "percentage": 88.03, "elapsed_time": "1:13:47", "remaining_time": "0:10:02", "throughput": "1319.36", "total_tokens": 5841792} |
|
{"current_steps": 1164, "total_steps": 1320, "loss": 2.296, "learning_rate": 4.553343829721776e-07, "epoch": 1.7636363636363637, "percentage": 88.18, "elapsed_time": "1:13:55", "remaining_time": "0:09:54", "throughput": "1319.57", "total_tokens": 5852640} |
|
{"current_steps": 1166, "total_steps": 1320, "loss": 2.3767, "learning_rate": 4.539407105297053e-07, "epoch": 1.7666666666666666, "percentage": 88.33, "elapsed_time": "1:14:03", "remaining_time": "0:09:46", "throughput": "1319.79", "total_tokens": 5864328} |
|
{"current_steps": 1168, "total_steps": 1320, "loss": 2.1989, "learning_rate": 4.5256420094704516e-07, "epoch": 1.7696969696969695, "percentage": 88.48, "elapsed_time": "1:14:09", "remaining_time": "0:09:39", "throughput": "1319.94", "total_tokens": 5873424} |
|
{"current_steps": 1170, "total_steps": 1320, "loss": 2.2727, "learning_rate": 4.5120488589036816e-07, "epoch": 1.7727272727272727, "percentage": 88.64, "elapsed_time": "1:14:15", "remaining_time": "0:09:31", "throughput": "1320.06", "total_tokens": 5881608} |
|
{"current_steps": 1172, "total_steps": 1320, "loss": 2.3122, "learning_rate": 4.498627966302905e-07, "epoch": 1.7757575757575759, "percentage": 88.79, "elapsed_time": "1:14:23", "remaining_time": "0:09:23", "throughput": "1320.28", "total_tokens": 5892984} |
|
{"current_steps": 1174, "total_steps": 1320, "loss": 2.607, "learning_rate": 4.485379640411545e-07, "epoch": 1.7787878787878788, "percentage": 88.94, "elapsed_time": "1:14:30", "remaining_time": "0:09:16", "throughput": "1320.50", "total_tokens": 5903832} |
|
{"current_steps": 1176, "total_steps": 1320, "loss": 2.444, "learning_rate": 4.4723041860031803e-07, "epoch": 1.7818181818181817, "percentage": 89.09, "elapsed_time": "1:14:38", "remaining_time": "0:09:08", "throughput": "1320.70", "total_tokens": 5914224} |
|
{"current_steps": 1178, "total_steps": 1320, "loss": 2.462, "learning_rate": 4.459401903874538e-07, "epoch": 1.7848484848484847, "percentage": 89.24, "elapsed_time": "1:14:46", "remaining_time": "0:09:00", "throughput": "1320.91", "total_tokens": 5925768} |
|
{"current_steps": 1180, "total_steps": 1320, "loss": 2.4997, "learning_rate": 4.4466730908385664e-07, "epoch": 1.7878787878787878, "percentage": 89.39, "elapsed_time": "1:14:52", "remaining_time": "0:08:52", "throughput": "1321.06", "total_tokens": 5934528} |
|
{"current_steps": 1180, "total_steps": 1320, "eval_loss": 2.3263440132141113, "epoch": 1.7878787878787878, "percentage": 89.39, "elapsed_time": "1:14:58", "remaining_time": "0:08:53", "throughput": "1319.35", "total_tokens": 5934528} |
|
{"current_steps": 1182, "total_steps": 1320, "loss": 2.2541, "learning_rate": 4.434118039717616e-07, "epoch": 1.790909090909091, "percentage": 89.55, "elapsed_time": "1:15:04", "remaining_time": "0:08:45", "throughput": "1319.54", "total_tokens": 5944224} |
|
{"current_steps": 1184, "total_steps": 1320, "loss": 2.2483, "learning_rate": 4.4217370393366995e-07, "epoch": 1.793939393939394, "percentage": 89.7, "elapsed_time": "1:15:12", "remaining_time": "0:08:38", "throughput": "1319.73", "total_tokens": 5954688} |
|
{"current_steps": 1186, "total_steps": 1320, "loss": 2.403, "learning_rate": 4.40953037451684e-07, "epoch": 1.7969696969696969, "percentage": 89.85, "elapsed_time": "1:15:18", "remaining_time": "0:08:30", "throughput": "1319.91", "total_tokens": 5964072} |
|
{"current_steps": 1188, "total_steps": 1320, "loss": 2.5772, "learning_rate": 4.3974983260685345e-07, "epoch": 1.8, "percentage": 90.0, "elapsed_time": "1:15:26", "remaining_time": "0:08:22", "throughput": "1320.13", "total_tokens": 5975184} |
|
{"current_steps": 1190, "total_steps": 1320, "loss": 2.3809, "learning_rate": 4.3856411707852814e-07, "epoch": 1.803030303030303, "percentage": 90.15, "elapsed_time": "1:15:30", "remaining_time": "0:08:14", "throughput": "1320.21", "total_tokens": 5981496} |
|
{"current_steps": 1192, "total_steps": 1320, "loss": 2.3452, "learning_rate": 4.373959181437216e-07, "epoch": 1.8060606060606061, "percentage": 90.3, "elapsed_time": "1:15:38", "remaining_time": "0:08:07", "throughput": "1320.42", "total_tokens": 5993088} |
|
{"current_steps": 1194, "total_steps": 1320, "loss": 2.2971, "learning_rate": 4.3624526267648363e-07, "epoch": 1.809090909090909, "percentage": 90.45, "elapsed_time": "1:15:46", "remaining_time": "0:07:59", "throughput": "1320.58", "total_tokens": 6003864} |
|
{"current_steps": 1196, "total_steps": 1320, "loss": 2.1717, "learning_rate": 4.351121771472823e-07, "epoch": 1.812121212121212, "percentage": 90.61, "elapsed_time": "1:15:53", "remaining_time": "0:07:52", "throughput": "1320.75", "total_tokens": 6013824} |
|
{"current_steps": 1198, "total_steps": 1320, "loss": 2.3326, "learning_rate": 4.3399668762239446e-07, "epoch": 1.8151515151515152, "percentage": 90.76, "elapsed_time": "1:16:00", "remaining_time": "0:07:44", "throughput": "1320.94", "total_tokens": 6024120} |
|
{"current_steps": 1200, "total_steps": 1320, "loss": 2.311, "learning_rate": 4.328988197633066e-07, "epoch": 1.8181818181818183, "percentage": 90.91, "elapsed_time": "1:16:08", "remaining_time": "0:07:36", "throughput": "1321.16", "total_tokens": 6035544} |
|
{"current_steps": 1200, "total_steps": 1320, "eval_loss": 2.326775550842285, "epoch": 1.8181818181818183, "percentage": 90.91, "elapsed_time": "1:16:14", "remaining_time": "0:07:37", "throughput": "1319.47", "total_tokens": 6035544} |
|
{"current_steps": 1202, "total_steps": 1320, "loss": 2.4867, "learning_rate": 4.3181859882612426e-07, "epoch": 1.8212121212121213, "percentage": 91.06, "elapsed_time": "1:16:24", "remaining_time": "0:07:30", "throughput": "1319.16", "total_tokens": 6047520} |
|
{"current_steps": 1204, "total_steps": 1320, "loss": 2.5415, "learning_rate": 4.307560496609911e-07, "epoch": 1.8242424242424242, "percentage": 91.21, "elapsed_time": "1:16:29", "remaining_time": "0:07:22", "throughput": "1319.29", "total_tokens": 6055488} |
|
{"current_steps": 1206, "total_steps": 1320, "loss": 2.3684, "learning_rate": 4.297111967115171e-07, "epoch": 1.8272727272727272, "percentage": 91.36, "elapsed_time": "1:16:35", "remaining_time": "0:07:14", "throughput": "1319.43", "total_tokens": 6063720} |
|
{"current_steps": 1208, "total_steps": 1320, "loss": 2.1882, "learning_rate": 4.286840640142166e-07, "epoch": 1.8303030303030303, "percentage": 91.52, "elapsed_time": "1:16:41", "remaining_time": "0:07:06", "throughput": "1319.56", "total_tokens": 6071664} |
|
{"current_steps": 1210, "total_steps": 1320, "loss": 2.4383, "learning_rate": 4.2767467519795497e-07, "epoch": 1.8333333333333335, "percentage": 91.67, "elapsed_time": "1:16:48", "remaining_time": "0:06:58", "throughput": "1319.74", "total_tokens": 6081360} |
|
{"current_steps": 1212, "total_steps": 1320, "loss": 2.2313, "learning_rate": 4.2668305348340495e-07, "epoch": 1.8363636363636364, "percentage": 91.82, "elapsed_time": "1:16:54", "remaining_time": "0:06:51", "throughput": "1319.91", "total_tokens": 6090624} |
|
{"current_steps": 1214, "total_steps": 1320, "loss": 2.3837, "learning_rate": 4.2570922168251294e-07, "epoch": 1.8393939393939394, "percentage": 91.97, "elapsed_time": "1:17:01", "remaining_time": "0:06:43", "throughput": "1320.10", "total_tokens": 6100944} |
|
{"current_steps": 1216, "total_steps": 1320, "loss": 2.3874, "learning_rate": 4.2475320219797406e-07, "epoch": 1.8424242424242423, "percentage": 92.12, "elapsed_time": "1:17:07", "remaining_time": "0:06:35", "throughput": "1320.21", "total_tokens": 6108792} |
|
{"current_steps": 1218, "total_steps": 1320, "loss": 2.3853, "learning_rate": 4.2381501702271623e-07, "epoch": 1.8454545454545455, "percentage": 92.27, "elapsed_time": "1:17:15", "remaining_time": "0:06:28", "throughput": "1320.45", "total_tokens": 6121080} |
|
{"current_steps": 1220, "total_steps": 1320, "loss": 2.3858, "learning_rate": 4.228946877393953e-07, "epoch": 1.8484848484848486, "percentage": 92.42, "elapsed_time": "1:17:22", "remaining_time": "0:06:20", "throughput": "1320.62", "total_tokens": 6131112} |
|
{"current_steps": 1220, "total_steps": 1320, "eval_loss": 2.3265769481658936, "epoch": 1.8484848484848486, "percentage": 92.42, "elapsed_time": "1:17:28", "remaining_time": "0:06:21", "throughput": "1318.97", "total_tokens": 6131112} |
|
{"current_steps": 1222, "total_steps": 1320, "loss": 2.3291, "learning_rate": 4.219922355198972e-07, "epoch": 1.8515151515151516, "percentage": 92.58, "elapsed_time": "1:17:35", "remaining_time": "0:06:13", "throughput": "1319.17", "total_tokens": 6141072} |
|
{"current_steps": 1224, "total_steps": 1320, "loss": 2.344, "learning_rate": 4.211076811248524e-07, "epoch": 1.8545454545454545, "percentage": 92.73, "elapsed_time": "1:17:42", "remaining_time": "0:06:05", "throughput": "1319.37", "total_tokens": 6152040} |
|
{"current_steps": 1226, "total_steps": 1320, "loss": 2.275, "learning_rate": 4.2024104490315696e-07, "epoch": 1.8575757575757574, "percentage": 92.88, "elapsed_time": "1:17:50", "remaining_time": "0:05:58", "throughput": "1319.57", "total_tokens": 6163368} |
|
{"current_steps": 1228, "total_steps": 1320, "loss": 2.4138, "learning_rate": 4.1939234679150516e-07, "epoch": 1.8606060606060606, "percentage": 93.03, "elapsed_time": "1:17:56", "remaining_time": "0:05:50", "throughput": "1319.70", "total_tokens": 6171072} |
|
{"current_steps": 1230, "total_steps": 1320, "loss": 2.2974, "learning_rate": 4.185616063139308e-07, "epoch": 1.8636363636363638, "percentage": 93.18, "elapsed_time": "1:18:04", "remaining_time": "0:05:42", "throughput": "1319.92", "total_tokens": 6183312} |
|
{"current_steps": 1232, "total_steps": 1320, "loss": 2.4087, "learning_rate": 4.177488425813578e-07, "epoch": 1.8666666666666667, "percentage": 93.33, "elapsed_time": "1:18:11", "remaining_time": "0:05:35", "throughput": "1320.10", "total_tokens": 6193104} |
|
{"current_steps": 1234, "total_steps": 1320, "loss": 2.4328, "learning_rate": 4.1695407429116063e-07, "epoch": 1.8696969696969696, "percentage": 93.48, "elapsed_time": "1:18:19", "remaining_time": "0:05:27", "throughput": "1320.32", "total_tokens": 6205392} |
|
{"current_steps": 1236, "total_steps": 1320, "loss": 2.4412, "learning_rate": 4.1617731972673466e-07, "epoch": 1.8727272727272726, "percentage": 93.64, "elapsed_time": "1:18:27", "remaining_time": "0:05:19", "throughput": "1320.50", "total_tokens": 6215808} |
|
{"current_steps": 1238, "total_steps": 1320, "loss": 2.2544, "learning_rate": 4.1541859675707454e-07, "epoch": 1.8757575757575757, "percentage": 93.79, "elapsed_time": "1:18:34", "remaining_time": "0:05:12", "throughput": "1320.68", "total_tokens": 6226224} |
|
{"current_steps": 1240, "total_steps": 1320, "loss": 2.3602, "learning_rate": 4.146779228363644e-07, "epoch": 1.878787878787879, "percentage": 93.94, "elapsed_time": "1:18:40", "remaining_time": "0:05:04", "throughput": "1320.84", "total_tokens": 6235464} |
|
{"current_steps": 1240, "total_steps": 1320, "eval_loss": 2.3264036178588867, "epoch": 1.878787878787879, "percentage": 93.94, "elapsed_time": "1:18:46", "remaining_time": "0:05:04", "throughput": "1319.21", "total_tokens": 6235464} |
|
{"current_steps": 1242, "total_steps": 1320, "loss": 2.439, "learning_rate": 4.139553150035751e-07, "epoch": 1.8818181818181818, "percentage": 94.09, "elapsed_time": "1:18:53", "remaining_time": "0:04:57", "throughput": "1319.38", "total_tokens": 6245400} |
|
{"current_steps": 1244, "total_steps": 1320, "loss": 2.466, "learning_rate": 4.1325078988207303e-07, "epoch": 1.8848484848484848, "percentage": 94.24, "elapsed_time": "1:18:58", "remaining_time": "0:04:49", "throughput": "1319.49", "total_tokens": 6252768} |
|
{"current_steps": 1246, "total_steps": 1320, "loss": 2.5193, "learning_rate": 4.1256436367923777e-07, "epoch": 1.887878787878788, "percentage": 94.39, "elapsed_time": "1:19:06", "remaining_time": "0:04:41", "throughput": "1319.70", "total_tokens": 6264432} |
|
{"current_steps": 1248, "total_steps": 1320, "loss": 2.4064, "learning_rate": 4.118960521860884e-07, "epoch": 1.8909090909090909, "percentage": 94.55, "elapsed_time": "1:19:13", "remaining_time": "0:04:34", "throughput": "1319.83", "total_tokens": 6273264} |
|
{"current_steps": 1250, "total_steps": 1320, "loss": 2.2931, "learning_rate": 4.1124587077692115e-07, "epoch": 1.893939393939394, "percentage": 94.7, "elapsed_time": "1:19:20", "remaining_time": "0:04:26", "throughput": "1320.00", "total_tokens": 6284112} |
|
{"current_steps": 1252, "total_steps": 1320, "loss": 2.4058, "learning_rate": 4.106138344089554e-07, "epoch": 1.896969696969697, "percentage": 94.85, "elapsed_time": "1:19:26", "remaining_time": "0:04:18", "throughput": "1320.13", "total_tokens": 6292248} |
|
{"current_steps": 1254, "total_steps": 1320, "loss": 2.4485, "learning_rate": 4.0999995762198936e-07, "epoch": 1.9, "percentage": 95.0, "elapsed_time": "1:19:33", "remaining_time": "0:04:11", "throughput": "1320.31", "total_tokens": 6302352} |
|
{"current_steps": 1256, "total_steps": 1320, "loss": 2.1889, "learning_rate": 4.094042545380659e-07, "epoch": 1.903030303030303, "percentage": 95.15, "elapsed_time": "1:19:39", "remaining_time": "0:04:03", "throughput": "1320.47", "total_tokens": 6311712} |
|
{"current_steps": 1258, "total_steps": 1320, "loss": 2.3617, "learning_rate": 4.088267388611474e-07, "epoch": 1.906060606060606, "percentage": 95.3, "elapsed_time": "1:19:47", "remaining_time": "0:03:55", "throughput": "1320.67", "total_tokens": 6323304} |
|
{"current_steps": 1260, "total_steps": 1320, "loss": 2.2347, "learning_rate": 4.082674238768009e-07, "epoch": 1.9090909090909092, "percentage": 95.45, "elapsed_time": "1:19:56", "remaining_time": "0:03:48", "throughput": "1320.88", "total_tokens": 6335304} |
|
{"current_steps": 1260, "total_steps": 1320, "eval_loss": 2.326760768890381, "epoch": 1.9090909090909092, "percentage": 95.45, "elapsed_time": "1:20:02", "remaining_time": "0:03:48", "throughput": "1319.28", "total_tokens": 6335304} |
|
{"current_steps": 1262, "total_steps": 1320, "loss": 2.2904, "learning_rate": 4.0772632245189193e-07, "epoch": 1.912121212121212, "percentage": 95.61, "elapsed_time": "1:20:09", "remaining_time": "0:03:41", "throughput": "1319.46", "total_tokens": 6345624} |
|
{"current_steps": 1264, "total_steps": 1320, "loss": 2.3719, "learning_rate": 4.0720344703428906e-07, "epoch": 1.915151515151515, "percentage": 95.76, "elapsed_time": "1:20:16", "remaining_time": "0:03:33", "throughput": "1319.64", "total_tokens": 6355632} |
|
{"current_steps": 1266, "total_steps": 1320, "loss": 2.3489, "learning_rate": 4.066988096525772e-07, "epoch": 1.9181818181818182, "percentage": 95.91, "elapsed_time": "1:20:21", "remaining_time": "0:03:25", "throughput": "1319.77", "total_tokens": 6363840} |
|
{"current_steps": 1268, "total_steps": 1320, "loss": 2.3433, "learning_rate": 4.062124219157808e-07, "epoch": 1.9212121212121214, "percentage": 96.06, "elapsed_time": "1:20:29", "remaining_time": "0:03:18", "throughput": "1319.97", "total_tokens": 6375000} |
|
{"current_steps": 1270, "total_steps": 1320, "loss": 2.4374, "learning_rate": 4.057442950130972e-07, "epoch": 1.9242424242424243, "percentage": 96.21, "elapsed_time": "1:20:37", "remaining_time": "0:03:10", "throughput": "1320.15", "total_tokens": 6385632} |
|
{"current_steps": 1272, "total_steps": 1320, "loss": 2.3613, "learning_rate": 4.05294439713639e-07, "epoch": 1.9272727272727272, "percentage": 96.36, "elapsed_time": "1:20:45", "remaining_time": "0:03:02", "throughput": "1320.36", "total_tokens": 6397728} |
|
{"current_steps": 1274, "total_steps": 1320, "loss": 2.1642, "learning_rate": 4.048628663661859e-07, "epoch": 1.9303030303030302, "percentage": 96.52, "elapsed_time": "1:20:53", "remaining_time": "0:02:55", "throughput": "1320.56", "total_tokens": 6409512} |
|
{"current_steps": 1276, "total_steps": 1320, "loss": 2.4558, "learning_rate": 4.044495848989475e-07, "epoch": 1.9333333333333333, "percentage": 96.67, "elapsed_time": "1:21:00", "remaining_time": "0:02:47", "throughput": "1320.71", "total_tokens": 6419664} |
|
{"current_steps": 1278, "total_steps": 1320, "loss": 2.5869, "learning_rate": 4.040546048193343e-07, "epoch": 1.9363636363636365, "percentage": 96.82, "elapsed_time": "1:21:07", "remaining_time": "0:02:39", "throughput": "1320.85", "total_tokens": 6428712} |
|
{"current_steps": 1280, "total_steps": 1320, "loss": 2.577, "learning_rate": 4.0367793521373886e-07, "epoch": 1.9393939393939394, "percentage": 96.97, "elapsed_time": "1:21:12", "remaining_time": "0:02:32", "throughput": "1320.96", "total_tokens": 6435960} |
|
{"current_steps": 1280, "total_steps": 1320, "eval_loss": 2.3265655040740967, "epoch": 1.9393939393939394, "percentage": 96.97, "elapsed_time": "1:21:18", "remaining_time": "0:02:32", "throughput": "1319.38", "total_tokens": 6435960} |
|
{"current_steps": 1282, "total_steps": 1320, "loss": 2.4398, "learning_rate": 4.0331958474732744e-07, "epoch": 1.9424242424242424, "percentage": 97.12, "elapsed_time": "1:21:25", "remaining_time": "0:02:24", "throughput": "1319.57", "total_tokens": 6446952} |
|
{"current_steps": 1284, "total_steps": 1320, "loss": 2.3546, "learning_rate": 4.0297956166384e-07, "epoch": 1.9454545454545453, "percentage": 97.27, "elapsed_time": "1:21:30", "remaining_time": "0:02:17", "throughput": "1319.68", "total_tokens": 6454440} |
|
{"current_steps": 1286, "total_steps": 1320, "loss": 2.2851, "learning_rate": 4.0265787378540076e-07, "epoch": 1.9484848484848485, "percentage": 97.42, "elapsed_time": "1:21:38", "remaining_time": "0:02:09", "throughput": "1319.90", "total_tokens": 6465888} |
|
{"current_steps": 1288, "total_steps": 1320, "loss": 2.501, "learning_rate": 4.023545285123386e-07, "epoch": 1.9515151515151516, "percentage": 97.58, "elapsed_time": "1:21:44", "remaining_time": "0:02:01", "throughput": "1320.03", "total_tokens": 6474384} |
|
{"current_steps": 1290, "total_steps": 1320, "loss": 2.3128, "learning_rate": 4.020695328230162e-07, "epoch": 1.9545454545454546, "percentage": 97.73, "elapsed_time": "1:21:52", "remaining_time": "0:01:54", "throughput": "1320.24", "total_tokens": 6485712} |
|
{"current_steps": 1292, "total_steps": 1320, "loss": 2.2989, "learning_rate": 4.018028932736699e-07, "epoch": 1.9575757575757575, "percentage": 97.88, "elapsed_time": "1:22:00", "remaining_time": "0:01:46", "throughput": "1320.44", "total_tokens": 6497160} |
|
{"current_steps": 1294, "total_steps": 1320, "loss": 2.4504, "learning_rate": 4.01554615998259e-07, "epoch": 1.9606060606060605, "percentage": 98.03, "elapsed_time": "1:22:08", "remaining_time": "0:01:39", "throughput": "1320.65", "total_tokens": 6508920} |
|
{"current_steps": 1296, "total_steps": 1320, "loss": 2.4132, "learning_rate": 4.013247067083242e-07, "epoch": 1.9636363636363636, "percentage": 98.18, "elapsed_time": "1:22:16", "remaining_time": "0:01:31", "throughput": "1320.86", "total_tokens": 6520176} |
|
{"current_steps": 1298, "total_steps": 1320, "loss": 2.3645, "learning_rate": 4.011131706928566e-07, "epoch": 1.9666666666666668, "percentage": 98.33, "elapsed_time": "1:22:24", "remaining_time": "0:01:23", "throughput": "1321.05", "total_tokens": 6531528} |
|
{"current_steps": 1300, "total_steps": 1320, "loss": 2.3001, "learning_rate": 4.0092001281817576e-07, "epoch": 1.9696969696969697, "percentage": 98.48, "elapsed_time": "1:22:32", "remaining_time": "0:01:16", "throughput": "1321.25", "total_tokens": 6543816} |
|
{"current_steps": 1300, "total_steps": 1320, "eval_loss": 2.3271186351776123, "epoch": 1.9696969696969697, "percentage": 98.48, "elapsed_time": "1:22:38", "remaining_time": "0:01:16", "throughput": "1319.70", "total_tokens": 6543816} |
|
{"current_steps": 1302, "total_steps": 1320, "loss": 2.8758, "learning_rate": 4.0074523752781806e-07, "epoch": 1.9727272727272727, "percentage": 98.64, "elapsed_time": "1:22:46", "remaining_time": "0:01:08", "throughput": "1319.40", "total_tokens": 6552936} |
|
{"current_steps": 1304, "total_steps": 1320, "loss": 2.3766, "learning_rate": 4.0058884884243416e-07, "epoch": 1.9757575757575756, "percentage": 98.79, "elapsed_time": "1:22:53", "remaining_time": "0:01:01", "throughput": "1319.57", "total_tokens": 6562896} |
|
{"current_steps": 1306, "total_steps": 1320, "loss": 2.3732, "learning_rate": 4.004508503596967e-07, "epoch": 1.9787878787878788, "percentage": 98.94, "elapsed_time": "1:23:02", "remaining_time": "0:00:53", "throughput": "1319.77", "total_tokens": 6575184} |
|
{"current_steps": 1308, "total_steps": 1320, "loss": 2.3789, "learning_rate": 4.0033124525421757e-07, "epoch": 1.981818181818182, "percentage": 99.09, "elapsed_time": "1:23:09", "remaining_time": "0:00:45", "throughput": "1319.96", "total_tokens": 6586032} |
|
{"current_steps": 1310, "total_steps": 1320, "loss": 2.2654, "learning_rate": 4.0023003627747455e-07, "epoch": 1.9848484848484849, "percentage": 99.24, "elapsed_time": "1:23:15", "remaining_time": "0:00:38", "throughput": "1320.11", "total_tokens": 6594768} |
|
{"current_steps": 1312, "total_steps": 1320, "loss": 2.4605, "learning_rate": 4.0014722575774835e-07, "epoch": 1.9878787878787878, "percentage": 99.39, "elapsed_time": "1:23:22", "remaining_time": "0:00:30", "throughput": "1320.26", "total_tokens": 6604728} |
|
{"current_steps": 1314, "total_steps": 1320, "loss": 2.4816, "learning_rate": 4.000828156000692e-07, "epoch": 1.990909090909091, "percentage": 99.55, "elapsed_time": "1:23:30", "remaining_time": "0:00:22", "throughput": "1320.48", "total_tokens": 6616536} |
|
{"current_steps": 1316, "total_steps": 1320, "loss": 2.482, "learning_rate": 4.000368072861723e-07, "epoch": 1.993939393939394, "percentage": 99.7, "elapsed_time": "1:23:36", "remaining_time": "0:00:15", "throughput": "1320.61", "total_tokens": 6624480} |
|
{"current_steps": 1318, "total_steps": 1320, "loss": 2.45, "learning_rate": 4.0000920187446465e-07, "epoch": 1.996969696969697, "percentage": 99.85, "elapsed_time": "1:23:44", "remaining_time": "0:00:07", "throughput": "1320.84", "total_tokens": 6636768} |
|
{"current_steps": 1320, "total_steps": 1320, "loss": 2.2129, "learning_rate": 4e-07, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "1:23:51", "remaining_time": "0:00:00", "throughput": "1321.00", "total_tokens": 6646824} |
|
{"current_steps": 1320, "total_steps": 1320, "eval_loss": 2.326845645904541, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "1:23:57", "remaining_time": "0:00:00", "throughput": "1319.47", "total_tokens": 6646824} |
|
{"current_steps": 1320, "total_steps": 1320, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "1:23:57", "remaining_time": "0:00:00", "throughput": "1319.47", "total_tokens": 6646824} |
|
|