k1h0's picture
Upload folder using huggingface_hub
f72e39a verified
{"current_steps": 1, "total_steps": 61, "loss": 0.8835, "lr": 4.9966852247120764e-05, "epoch": 0.016260162601626018, "percentage": 1.64, "elapsed_time": "0:02:41", "remaining_time": "2:41:27", "throughput": 9741.84, "total_tokens": 1572864}
{"current_steps": 2, "total_steps": 61, "loss": 0.8172, "lr": 4.9867496890364726e-05, "epoch": 0.032520325203252036, "percentage": 3.28, "elapsed_time": "0:05:15", "remaining_time": "2:34:59", "throughput": 9978.43, "total_tokens": 3145728}
{"current_steps": 3, "total_steps": 61, "loss": 0.7415, "lr": 4.970219740227693e-05, "epoch": 0.04878048780487805, "percentage": 4.92, "elapsed_time": "0:07:48", "remaining_time": "2:30:59", "throughput": 10069.5, "total_tokens": 4718592}
{"current_steps": 4, "total_steps": 61, "loss": 0.7198, "lr": 4.947139212738395e-05, "epoch": 0.06504065040650407, "percentage": 6.56, "elapsed_time": "0:10:21", "remaining_time": "2:27:41", "throughput": 10117.11, "total_tokens": 6291456}
{"current_steps": 5, "total_steps": 61, "loss": 0.6985, "lr": 4.9175693119783013e-05, "epoch": 0.08130081300813008, "percentage": 8.2, "elapsed_time": "0:12:56", "remaining_time": "2:24:55", "throughput": 10129.94, "total_tokens": 7864320}
{"current_steps": 6, "total_steps": 61, "loss": 0.6642, "lr": 4.881588452008456e-05, "epoch": 0.0975609756097561, "percentage": 9.84, "elapsed_time": "0:15:30", "remaining_time": "2:22:05", "throughput": 10146.98, "total_tokens": 9437184}
{"current_steps": 7, "total_steps": 61, "loss": 0.6677, "lr": 4.839292047601234e-05, "epoch": 0.11382113821138211, "percentage": 11.48, "elapsed_time": "0:18:03", "remaining_time": "2:19:20", "throughput": 10159.48, "total_tokens": 11010048}
{"current_steps": 8, "total_steps": 61, "loss": 0.6451, "lr": 4.790792261217512e-05, "epoch": 0.13008130081300814, "percentage": 13.11, "elapsed_time": "0:20:37", "remaining_time": "2:16:41", "throughput": 10164.48, "total_tokens": 12582912}
{"current_steps": 9, "total_steps": 61, "loss": 0.6327, "lr": 4.736217705571989e-05, "epoch": 0.14634146341463414, "percentage": 14.75, "elapsed_time": "0:23:11", "remaining_time": "2:14:00", "throughput": 10172.04, "total_tokens": 14155776}
{"current_steps": 10, "total_steps": 61, "loss": 0.6331, "lr": 4.6757131025753886e-05, "epoch": 0.16260162601626016, "percentage": 16.39, "elapsed_time": "0:25:46", "remaining_time": "2:11:27", "throughput": 10169.75, "total_tokens": 15728640}
{"current_steps": 11, "total_steps": 61, "loss": 0.6219, "lr": 4.609438899557964e-05, "epoch": 0.17886178861788618, "percentage": 18.03, "elapsed_time": "0:28:22", "remaining_time": "2:08:59", "throughput": 10161.43, "total_tokens": 17301504}
{"current_steps": 12, "total_steps": 61, "loss": 0.6205, "lr": 4.5375708437920284e-05, "epoch": 0.1951219512195122, "percentage": 19.67, "elapsed_time": "0:30:56", "remaining_time": "2:06:19", "throughput": 10168.14, "total_tokens": 18874368}
{"current_steps": 13, "total_steps": 61, "loss": 0.601, "lr": 4.460299516441777e-05, "epoch": 0.21138211382113822, "percentage": 21.31, "elapsed_time": "0:33:30", "remaining_time": "2:03:41", "throughput": 10172.37, "total_tokens": 20447232}
{"current_steps": 14, "total_steps": 61, "loss": 0.6077, "lr": 4.3778298271762995e-05, "epoch": 0.22764227642276422, "percentage": 22.95, "elapsed_time": "0:36:04", "remaining_time": "2:01:05", "throughput": 10175.33, "total_tokens": 22020096}
{"current_steps": 15, "total_steps": 61, "loss": 0.5958, "lr": 4.2903804707859835e-05, "epoch": 0.24390243902439024, "percentage": 24.59, "elapsed_time": "0:38:37", "remaining_time": "1:58:27", "throughput": 10179.71, "total_tokens": 23592960}
{"current_steps": 16, "total_steps": 61, "loss": 0.5838, "lr": 4.198183347243233e-05, "epoch": 0.2601626016260163, "percentage": 26.23, "elapsed_time": "0:41:11", "remaining_time": "1:55:50", "throughput": 10183.0, "total_tokens": 25165824}
{"current_steps": 17, "total_steps": 61, "loss": 0.5629, "lr": 4.101482946745439e-05, "epoch": 0.2764227642276423, "percentage": 27.87, "elapsed_time": "0:43:45", "remaining_time": "1:53:14", "throughput": 10185.76, "total_tokens": 26738688}
{"current_steps": 18, "total_steps": 61, "loss": 0.5848, "lr": 4.000535701370921e-05, "epoch": 0.2926829268292683, "percentage": 29.51, "elapsed_time": "0:46:18", "remaining_time": "1:50:38", "throughput": 10188.71, "total_tokens": 28311552}
{"current_steps": 19, "total_steps": 61, "loss": 0.5772, "lr": 3.895609305067162e-05, "epoch": 0.3089430894308943, "percentage": 31.15, "elapsed_time": "0:48:52", "remaining_time": "1:48:02", "throughput": 10190.23, "total_tokens": 29884416}
{"current_steps": 20, "total_steps": 61, "loss": 0.5719, "lr": 3.7869820037745776e-05, "epoch": 0.3252032520325203, "percentage": 32.79, "elapsed_time": "0:51:26", "remaining_time": "1:45:27", "throughput": 10192.39, "total_tokens": 31457280}
{"current_steps": 21, "total_steps": 61, "loss": 0.5445, "lr": 3.6749418575683e-05, "epoch": 0.34146341463414637, "percentage": 34.43, "elapsed_time": "0:53:59", "remaining_time": "1:42:50", "throughput": 10195.72, "total_tokens": 33030144}
{"current_steps": 22, "total_steps": 61, "loss": 0.556, "lr": 3.5597859767746524e-05, "epoch": 0.35772357723577236, "percentage": 36.07, "elapsed_time": "0:56:32", "remaining_time": "1:40:14", "throughput": 10198.63, "total_tokens": 34603008}
{"current_steps": 23, "total_steps": 61, "loss": 0.5736, "lr": 3.4418197340879635e-05, "epoch": 0.37398373983739835, "percentage": 37.7, "elapsed_time": "0:59:06", "remaining_time": "1:37:39", "throughput": 10199.96, "total_tokens": 36175872}
{"current_steps": 24, "total_steps": 61, "loss": 0.535, "lr": 3.321355954777087e-05, "epoch": 0.3902439024390244, "percentage": 39.34, "elapsed_time": "1:01:39", "remaining_time": "1:35:03", "throughput": 10203.32, "total_tokens": 37748736}
{"current_steps": 25, "total_steps": 61, "loss": 0.5634, "lr": 3.1987140871290236e-05, "epoch": 0.4065040650406504, "percentage": 40.98, "elapsed_time": "1:04:12", "remaining_time": "1:32:28", "throughput": 10205.81, "total_tokens": 39321600}
{"current_steps": 26, "total_steps": 61, "loss": 0.5648, "lr": 3.07421935532949e-05, "epoch": 0.42276422764227645, "percentage": 42.62, "elapsed_time": "1:06:47", "remaining_time": "1:29:55", "throughput": 10203.48, "total_tokens": 40894464}
{"current_steps": 27, "total_steps": 61, "loss": 0.5467, "lr": 2.9482018970268393e-05, "epoch": 0.43902439024390244, "percentage": 44.26, "elapsed_time": "1:09:22", "remaining_time": "1:27:21", "throughput": 10202.72, "total_tokens": 42467328}
{"current_steps": 28, "total_steps": 61, "loss": 0.5567, "lr": 2.8209958878663778e-05, "epoch": 0.45528455284552843, "percentage": 45.9, "elapsed_time": "1:11:56", "remaining_time": "1:24:46", "throughput": 10203.8, "total_tokens": 44040192}
{"current_steps": 29, "total_steps": 61, "loss": 0.5847, "lr": 2.6929386553166164e-05, "epoch": 0.4715447154471545, "percentage": 47.54, "elapsed_time": "1:14:30", "remaining_time": "1:22:12", "throughput": 10203.36, "total_tokens": 45613056}
{"current_steps": 30, "total_steps": 61, "loss": 0.5429, "lr": 2.564369784137472e-05, "epoch": 0.4878048780487805, "percentage": 49.18, "elapsed_time": "1:17:04", "remaining_time": "1:19:38", "throughput": 10204.19, "total_tokens": 47185920}
{"current_steps": 31, "total_steps": 61, "loss": 0.5435, "lr": 2.4356302158625288e-05, "epoch": 0.5040650406504065, "percentage": 50.82, "elapsed_time": "1:19:38", "remaining_time": "1:17:03", "throughput": 10204.68, "total_tokens": 48758784}
{"current_steps": 32, "total_steps": 61, "loss": 0.5482, "lr": 2.3070613446833842e-05, "epoch": 0.5203252032520326, "percentage": 52.46, "elapsed_time": "1:22:11", "remaining_time": "1:14:29", "throughput": 10205.83, "total_tokens": 50331648}
{"current_steps": 33, "total_steps": 61, "loss": 0.5418, "lr": 2.1790041121336225e-05, "epoch": 0.5365853658536586, "percentage": 54.1, "elapsed_time": "1:24:45", "remaining_time": "1:11:55", "throughput": 10206.16, "total_tokens": 51904512}
{"current_steps": 34, "total_steps": 61, "loss": 0.532, "lr": 2.0517981029731616e-05, "epoch": 0.5528455284552846, "percentage": 55.74, "elapsed_time": "1:27:20", "remaining_time": "1:09:21", "throughput": 10205.44, "total_tokens": 53477376}
{"current_steps": 35, "total_steps": 61, "loss": 0.536, "lr": 1.9257806446705116e-05, "epoch": 0.5691056910569106, "percentage": 57.38, "elapsed_time": "1:29:54", "remaining_time": "1:06:47", "throughput": 10205.42, "total_tokens": 55050240}
{"current_steps": 36, "total_steps": 61, "loss": 0.5314, "lr": 1.8012859128709766e-05, "epoch": 0.5853658536585366, "percentage": 59.02, "elapsed_time": "1:32:29", "remaining_time": "1:04:13", "throughput": 10204.05, "total_tokens": 56623104}
{"current_steps": 37, "total_steps": 61, "loss": 0.5595, "lr": 1.6786440452229134e-05, "epoch": 0.6016260162601627, "percentage": 60.66, "elapsed_time": "1:35:02", "remaining_time": "1:01:38", "throughput": 10205.77, "total_tokens": 58195968}
{"current_steps": 38, "total_steps": 61, "loss": 0.5418, "lr": 1.558180265912037e-05, "epoch": 0.6178861788617886, "percentage": 62.3, "elapsed_time": "1:37:37", "remaining_time": "0:59:05", "throughput": 10203.12, "total_tokens": 59768832}
{"current_steps": 39, "total_steps": 61, "loss": 0.5438, "lr": 1.4402140232253486e-05, "epoch": 0.6341463414634146, "percentage": 63.93, "elapsed_time": "1:40:11", "remaining_time": "0:56:31", "throughput": 10203.74, "total_tokens": 61341696}
{"current_steps": 40, "total_steps": 61, "loss": 0.5239, "lr": 1.325058142431701e-05, "epoch": 0.6504065040650406, "percentage": 65.57, "elapsed_time": "1:42:45", "remaining_time": "0:53:56", "throughput": 10204.7, "total_tokens": 62914560}
{"current_steps": 41, "total_steps": 61, "loss": 0.5459, "lr": 1.213017996225424e-05, "epoch": 0.6666666666666666, "percentage": 67.21, "elapsed_time": "1:45:19", "remaining_time": "0:51:22", "throughput": 10204.78, "total_tokens": 64487424}
{"current_steps": 42, "total_steps": 61, "loss": 0.5373, "lr": 1.1043906949328387e-05, "epoch": 0.6829268292682927, "percentage": 68.85, "elapsed_time": "1:47:53", "remaining_time": "0:48:48", "throughput": 10204.67, "total_tokens": 66060288}
{"current_steps": 43, "total_steps": 61, "loss": 0.5474, "lr": 9.994642986290797e-06, "epoch": 0.6991869918699187, "percentage": 70.49, "elapsed_time": "1:50:28", "remaining_time": "0:46:14", "throughput": 10203.24, "total_tokens": 67633152}
{"current_steps": 44, "total_steps": 61, "loss": 0.5236, "lr": 8.985170532545622e-06, "epoch": 0.7154471544715447, "percentage": 72.13, "elapsed_time": "1:53:02", "remaining_time": "0:43:40", "throughput": 10203.82, "total_tokens": 69206016}
{"current_steps": 45, "total_steps": 61, "loss": 0.5336, "lr": 8.018166527567672e-06, "epoch": 0.7317073170731707, "percentage": 73.77, "elapsed_time": "1:55:35", "remaining_time": "0:41:05", "throughput": 10205.18, "total_tokens": 70778880}
{"current_steps": 46, "total_steps": 61, "loss": 0.5198, "lr": 7.096195292140173e-06, "epoch": 0.7479674796747967, "percentage": 75.41, "elapsed_time": "1:58:11", "remaining_time": "0:38:32", "throughput": 10203.27, "total_tokens": 72351744}
{"current_steps": 47, "total_steps": 61, "loss": 0.5419, "lr": 6.221701728237009e-06, "epoch": 0.7642276422764228, "percentage": 77.05, "elapsed_time": "2:00:45", "remaining_time": "0:35:58", "throughput": 10202.64, "total_tokens": 73924608}
{"current_steps": 48, "total_steps": 61, "loss": 0.5544, "lr": 5.397004835582242e-06, "epoch": 0.7804878048780488, "percentage": 78.69, "elapsed_time": "2:03:22", "remaining_time": "0:33:24", "throughput": 10198.78, "total_tokens": 75497472}
{"current_steps": 49, "total_steps": 61, "loss": 0.5689, "lr": 4.624291562079719e-06, "epoch": 0.7967479674796748, "percentage": 80.33, "elapsed_time": "2:05:55", "remaining_time": "0:30:50", "throughput": 10200.12, "total_tokens": 77070336}
{"current_steps": 50, "total_steps": 61, "loss": 0.517, "lr": 3.90561100442036e-06, "epoch": 0.8130081300813008, "percentage": 81.97, "elapsed_time": "2:08:29", "remaining_time": "0:28:16", "throughput": 10200.21, "total_tokens": 78643200}
{"current_steps": 51, "total_steps": 61, "loss": 0.544, "lr": 3.2428689742461188e-06, "epoch": 0.8292682926829268, "percentage": 83.61, "elapsed_time": "2:11:03", "remaining_time": "0:25:41", "throughput": 10200.78, "total_tokens": 80216064}
{"current_steps": 52, "total_steps": 61, "loss": 0.5265, "lr": 2.637822944280116e-06, "epoch": 0.8455284552845529, "percentage": 85.25, "elapsed_time": "2:13:40", "remaining_time": "0:23:08", "throughput": 10198.05, "total_tokens": 81788928}
{"current_steps": 53, "total_steps": 61, "loss": 0.5301, "lr": 2.092077387824884e-06, "epoch": 0.8617886178861789, "percentage": 86.89, "elapsed_time": "2:16:14", "remaining_time": "0:20:33", "throughput": 10197.6, "total_tokens": 83361792}
{"current_steps": 54, "total_steps": 61, "loss": 0.5357, "lr": 1.6070795239876618e-06, "epoch": 0.8780487804878049, "percentage": 88.52, "elapsed_time": "2:18:49", "remaining_time": "0:17:59", "throughput": 10196.24, "total_tokens": 84934656}
{"current_steps": 55, "total_steps": 61, "loss": 0.522, "lr": 1.1841154799154374e-06, "epoch": 0.8943089430894309, "percentage": 90.16, "elapsed_time": "2:21:24", "remaining_time": "0:15:25", "throughput": 10196.15, "total_tokens": 86507520}
{"current_steps": 56, "total_steps": 61, "loss": 0.5282, "lr": 8.243068802169906e-07, "epoch": 0.9105691056910569, "percentage": 91.8, "elapsed_time": "2:23:59", "remaining_time": "0:12:51", "throughput": 10194.76, "total_tokens": 88080384}
{"current_steps": 57, "total_steps": 61, "loss": 0.5327, "lr": 5.286078726160549e-07, "epoch": 0.926829268292683, "percentage": 93.44, "elapsed_time": "2:26:33", "remaining_time": "0:10:17", "throughput": 10195.16, "total_tokens": 89653248}
{"current_steps": 58, "total_steps": 61, "loss": 0.5237, "lr": 2.978025977230736e-07, "epoch": 0.943089430894309, "percentage": 95.08, "elapsed_time": "2:29:07", "remaining_time": "0:07:42", "throughput": 10195.52, "total_tokens": 91226112}
{"current_steps": 59, "total_steps": 61, "loss": 0.5499, "lr": 1.3250310963527358e-07, "epoch": 0.959349593495935, "percentage": 96.72, "elapsed_time": "2:31:41", "remaining_time": "0:05:08", "throughput": 10195.79, "total_tokens": 92798976}
{"current_steps": 60, "total_steps": 61, "loss": 0.5576, "lr": 3.314775287923677e-08, "epoch": 0.975609756097561, "percentage": 98.36, "elapsed_time": "2:34:16", "remaining_time": "0:02:34", "throughput": 10195.29, "total_tokens": 94371840}
{"current_steps": 61, "total_steps": 61, "loss": 0.5487, "lr": 0.0, "epoch": 0.991869918699187, "percentage": 100.0, "elapsed_time": "2:36:50", "remaining_time": "0:00:00", "throughput": 10195.34, "total_tokens": 95944704}
{"current_steps": 61, "total_steps": 61, "epoch": 0.991869918699187, "percentage": 100.0, "elapsed_time": "2:37:16", "remaining_time": "0:00:00", "throughput": 10167.7, "total_tokens": 95944704}