llama3.1-8B-Instruct-query_nsx / trainer_log.jsonl
k1h0's picture
Upload folder using huggingface_hub
472cf7a verified
{"current_steps": 1, "total_steps": 103, "loss": 1.2027, "lr": 4.998837209058379e-05, "epoch": 0.009685230024213076, "percentage": 0.97, "elapsed_time": "0:03:01", "remaining_time": "5:07:44", "throughput": 11584.8, "total_tokens": 2097152}
{"current_steps": 2, "total_steps": 103, "loss": 1.0647, "lr": 4.9953499178997346e-05, "epoch": 0.01937046004842615, "percentage": 1.94, "elapsed_time": "0:05:53", "remaining_time": "4:57:54", "throughput": 11850.11, "total_tokens": 4194304}
{"current_steps": 3, "total_steps": 103, "loss": 1.0243, "lr": 4.9895413705165234e-05, "epoch": 0.029055690072639227, "percentage": 2.91, "elapsed_time": "0:08:46", "remaining_time": "4:52:19", "throughput": 11957.01, "total_tokens": 6291456}
{"current_steps": 4, "total_steps": 103, "loss": 0.9974, "lr": 4.98141697020977e-05, "epoch": 0.0387409200968523, "percentage": 3.88, "elapsed_time": "0:11:38", "remaining_time": "4:48:03", "throughput": 12012.5, "total_tokens": 8388608}
{"current_steps": 5, "total_steps": 103, "loss": 0.9874, "lr": 4.970984274562741e-05, "epoch": 0.048426150121065374, "percentage": 4.85, "elapsed_time": "0:14:30", "remaining_time": "4:44:15", "throughput": 12050.19, "total_tokens": 10485760}
{"current_steps": 6, "total_steps": 103, "loss": 0.9478, "lr": 4.958252988410631e-05, "epoch": 0.05811138014527845, "percentage": 5.83, "elapsed_time": "0:17:22", "remaining_time": "4:40:51", "throughput": 12071.84, "total_tokens": 12582912}
{"current_steps": 7, "total_steps": 103, "loss": 0.9761, "lr": 4.9432349548128124e-05, "epoch": 0.06779661016949153, "percentage": 6.8, "elapsed_time": "0:20:14", "remaining_time": "4:37:35", "throughput": 12087.55, "total_tokens": 14680064}
{"current_steps": 8, "total_steps": 103, "loss": 0.9698, "lr": 4.925944144036026e-05, "epoch": 0.0774818401937046, "percentage": 7.77, "elapsed_time": "0:23:06", "remaining_time": "4:34:29", "throughput": 12096.64, "total_tokens": 16777216}
{"current_steps": 9, "total_steps": 103, "loss": 0.9804, "lr": 4.90639664055879e-05, "epoch": 0.08716707021791767, "percentage": 8.74, "elapsed_time": "0:25:59", "remaining_time": "4:31:23", "throughput": 12106.38, "total_tokens": 18874368}
{"current_steps": 10, "total_steps": 103, "loss": 0.9562, "lr": 4.884610628109082e-05, "epoch": 0.09685230024213075, "percentage": 9.71, "elapsed_time": "0:28:50", "remaining_time": "4:28:17", "throughput": 12115.6, "total_tokens": 20971520}
{"current_steps": 11, "total_steps": 103, "loss": 0.9649, "lr": 4.860606372749247e-05, "epoch": 0.10653753026634383, "percentage": 10.68, "elapsed_time": "0:31:43", "remaining_time": "4:25:17", "throughput": 12121.46, "total_tokens": 23068672}
{"current_steps": 12, "total_steps": 103, "loss": 0.93, "lr": 4.8344062040238395e-05, "epoch": 0.1162227602905569, "percentage": 11.65, "elapsed_time": "0:34:35", "remaining_time": "4:22:21", "throughput": 12123.79, "total_tokens": 25165824}
{"current_steps": 13, "total_steps": 103, "loss": 0.9356, "lr": 4.806034494187949e-05, "epoch": 0.12590799031476999, "percentage": 12.62, "elapsed_time": "0:37:28", "remaining_time": "4:19:27", "throughput": 12124.38, "total_tokens": 27262976}
{"current_steps": 14, "total_steps": 103, "loss": 0.9279, "lr": 4.775517635535332e-05, "epoch": 0.13559322033898305, "percentage": 13.59, "elapsed_time": "0:40:21", "remaining_time": "4:16:32", "throughput": 12125.6, "total_tokens": 29360128}
{"current_steps": 15, "total_steps": 103, "loss": 0.9311, "lr": 4.742884015847436e-05, "epoch": 0.14527845036319612, "percentage": 14.56, "elapsed_time": "0:43:13", "remaining_time": "4:13:33", "throughput": 12130.58, "total_tokens": 31457280}
{"current_steps": 16, "total_steps": 103, "loss": 0.9311, "lr": 4.708163991986152e-05, "epoch": 0.1549636803874092, "percentage": 15.53, "elapsed_time": "0:46:05", "remaining_time": "4:10:36", "throughput": 12134.17, "total_tokens": 33554432}
{"current_steps": 17, "total_steps": 103, "loss": 0.9381, "lr": 4.6713898616548724e-05, "epoch": 0.16464891041162227, "percentage": 16.5, "elapsed_time": "0:48:57", "remaining_time": "4:07:40", "throughput": 12136.31, "total_tokens": 35651584}
{"current_steps": 18, "total_steps": 103, "loss": 0.9354, "lr": 4.6325958333541044e-05, "epoch": 0.17433414043583534, "percentage": 17.48, "elapsed_time": "0:51:49", "remaining_time": "4:04:45", "throughput": 12138.18, "total_tokens": 37748736}
{"current_steps": 19, "total_steps": 103, "loss": 0.9574, "lr": 4.591817994559605e-05, "epoch": 0.18401937046004843, "percentage": 18.45, "elapsed_time": "0:54:42", "remaining_time": "4:01:51", "throughput": 12139.11, "total_tokens": 39845888}
{"current_steps": 20, "total_steps": 103, "loss": 0.9124, "lr": 4.5490942781526316e-05, "epoch": 0.1937046004842615, "percentage": 19.42, "elapsed_time": "0:57:34", "remaining_time": "3:58:57", "throughput": 12140.73, "total_tokens": 41943040}
{"current_steps": 21, "total_steps": 103, "loss": 0.95, "lr": 4.504464427133527e-05, "epoch": 0.2033898305084746, "percentage": 20.39, "elapsed_time": "1:00:26", "remaining_time": "3:56:02", "throughput": 12142.69, "total_tokens": 44040192}
{"current_steps": 22, "total_steps": 103, "loss": 0.9179, "lr": 4.457969957651484e-05, "epoch": 0.21307506053268765, "percentage": 21.36, "elapsed_time": "1:03:19", "remaining_time": "3:53:08", "throughput": 12143.43, "total_tokens": 46137344}
{"current_steps": 23, "total_steps": 103, "loss": 0.9414, "lr": 4.409654120384862e-05, "epoch": 0.22276029055690072, "percentage": 22.33, "elapsed_time": "1:06:11", "remaining_time": "3:50:14", "throughput": 12144.94, "total_tokens": 48234496}
{"current_steps": 24, "total_steps": 103, "loss": 0.9348, "lr": 4.35956186030799e-05, "epoch": 0.2324455205811138, "percentage": 23.3, "elapsed_time": "1:09:03", "remaining_time": "3:47:17", "throughput": 12148.22, "total_tokens": 50331648}
{"current_steps": 25, "total_steps": 103, "loss": 0.926, "lr": 4.307739774881878e-05, "epoch": 0.24213075060532688, "percentage": 24.27, "elapsed_time": "1:11:55", "remaining_time": "3:44:25", "throughput": 12148.04, "total_tokens": 52428800}
{"current_steps": 26, "total_steps": 103, "loss": 0.9323, "lr": 4.254236070707733e-05, "epoch": 0.25181598062953997, "percentage": 25.24, "elapsed_time": "1:14:47", "remaining_time": "3:41:30", "throughput": 12150.35, "total_tokens": 54525952}
{"current_steps": 27, "total_steps": 103, "loss": 0.9218, "lr": 4.1991005186836005e-05, "epoch": 0.26150121065375304, "percentage": 26.21, "elapsed_time": "1:17:40", "remaining_time": "3:38:38", "throughput": 12149.24, "total_tokens": 56623104}
{"current_steps": 28, "total_steps": 103, "loss": 0.9319, "lr": 4.142384407705846e-05, "epoch": 0.2711864406779661, "percentage": 27.18, "elapsed_time": "1:20:33", "remaining_time": "3:35:46", "throughput": 12149.2, "total_tokens": 58720256}
{"current_steps": 29, "total_steps": 103, "loss": 0.9202, "lr": 4.084140496958538e-05, "epoch": 0.28087167070217917, "percentage": 28.16, "elapsed_time": "1:23:25", "remaining_time": "3:32:52", "throughput": 12150.25, "total_tokens": 60817408}
{"current_steps": 30, "total_steps": 103, "loss": 0.9141, "lr": 4.024422966835136e-05, "epoch": 0.29055690072639223, "percentage": 29.13, "elapsed_time": "1:26:16", "remaining_time": "3:29:57", "throughput": 12152.71, "total_tokens": 62914560}
{"current_steps": 31, "total_steps": 103, "loss": 0.9145, "lr": 3.963287368538106e-05, "epoch": 0.30024213075060535, "percentage": 30.1, "elapsed_time": "1:29:09", "remaining_time": "3:27:04", "throughput": 12153.01, "total_tokens": 65011712}
{"current_steps": 32, "total_steps": 103, "loss": 0.8975, "lr": 3.900790572403376e-05, "epoch": 0.3099273607748184, "percentage": 31.07, "elapsed_time": "1:32:00", "remaining_time": "3:24:07", "throughput": 12156.93, "total_tokens": 67108864}
{"current_steps": 33, "total_steps": 103, "loss": 0.9614, "lr": 3.836990714997686e-05, "epoch": 0.3196125907990315, "percentage": 32.04, "elapsed_time": "1:34:51", "remaining_time": "3:21:13", "throughput": 12159.26, "total_tokens": 69206016}
{"current_steps": 34, "total_steps": 103, "loss": 0.9327, "lr": 3.7719471450380514e-05, "epoch": 0.32929782082324455, "percentage": 33.01, "elapsed_time": "1:37:43", "remaining_time": "3:18:18", "throughput": 12161.37, "total_tokens": 71303168}
{"current_steps": 35, "total_steps": 103, "loss": 0.9153, "lr": 3.7057203681836406e-05, "epoch": 0.3389830508474576, "percentage": 33.98, "elapsed_time": "1:40:33", "remaining_time": "3:15:23", "throughput": 12164.47, "total_tokens": 73400320}
{"current_steps": 36, "total_steps": 103, "loss": 0.8977, "lr": 3.638371990751428e-05, "epoch": 0.3486682808716707, "percentage": 34.95, "elapsed_time": "1:43:25", "remaining_time": "3:12:28", "throughput": 12166.53, "total_tokens": 75497472}
{"current_steps": 37, "total_steps": 103, "loss": 0.9147, "lr": 3.569964662407983e-05, "epoch": 0.3583535108958838, "percentage": 35.92, "elapsed_time": "1:46:16", "remaining_time": "3:09:34", "throughput": 12168.64, "total_tokens": 77594624}
{"current_steps": 38, "total_steps": 103, "loss": 0.9264, "lr": 3.500562017890695e-05, "epoch": 0.36803874092009686, "percentage": 36.89, "elapsed_time": "1:49:08", "remaining_time": "3:06:40", "throughput": 12170.33, "total_tokens": 79691776}
{"current_steps": 39, "total_steps": 103, "loss": 0.9041, "lr": 3.430228617812661e-05, "epoch": 0.37772397094430993, "percentage": 37.86, "elapsed_time": "1:51:58", "remaining_time": "3:03:45", "throughput": 12173.21, "total_tokens": 81788928}
{"current_steps": 40, "total_steps": 103, "loss": 0.9322, "lr": 3.3590298886062833e-05, "epoch": 0.387409200968523, "percentage": 38.83, "elapsed_time": "1:54:49", "remaining_time": "3:00:51", "throughput": 12175.56, "total_tokens": 83886080}
{"current_steps": 41, "total_steps": 103, "loss": 0.9257, "lr": 3.2870320616614626e-05, "epoch": 0.39709443099273606, "percentage": 39.81, "elapsed_time": "1:57:41", "remaining_time": "2:57:58", "throughput": 12176.34, "total_tokens": 85983232}
{"current_steps": 42, "total_steps": 103, "loss": 0.8952, "lr": 3.21430211171499e-05, "epoch": 0.4067796610169492, "percentage": 40.78, "elapsed_time": "2:00:32", "remaining_time": "2:55:04", "throughput": 12178.42, "total_tokens": 88080384}
{"current_steps": 43, "total_steps": 103, "loss": 0.9072, "lr": 3.140907694548451e-05, "epoch": 0.41646489104116224, "percentage": 41.75, "elapsed_time": "2:03:24", "remaining_time": "2:52:11", "throughput": 12179.3, "total_tokens": 90177536}
{"current_steps": 44, "total_steps": 103, "loss": 0.9166, "lr": 3.066917084052603e-05, "epoch": 0.4261501210653753, "percentage": 42.72, "elapsed_time": "2:06:15", "remaining_time": "2:49:17", "throughput": 12181.27, "total_tokens": 92274688}
{"current_steps": 45, "total_steps": 103, "loss": 0.8924, "lr": 2.9923991087167658e-05, "epoch": 0.4358353510895884, "percentage": 43.69, "elapsed_time": "2:09:06", "remaining_time": "2:46:24", "throughput": 12182.88, "total_tokens": 94371840}
{"current_steps": 46, "total_steps": 103, "loss": 0.8871, "lr": 2.9174230876023058e-05, "epoch": 0.44552058111380144, "percentage": 44.66, "elapsed_time": "2:11:57", "remaining_time": "2:43:30", "throughput": 12184.12, "total_tokens": 96468992}
{"current_steps": 47, "total_steps": 103, "loss": 0.9263, "lr": 2.8420587658597757e-05, "epoch": 0.4552058111380145, "percentage": 45.63, "elapsed_time": "2:14:49", "remaining_time": "2:40:38", "throughput": 12185.1, "total_tokens": 98566144}
{"current_steps": 48, "total_steps": 103, "loss": 0.9158, "lr": 2.7663762498496905e-05, "epoch": 0.4648910411622276, "percentage": 46.6, "elapsed_time": "2:17:40", "remaining_time": "2:37:45", "throughput": 12186.29, "total_tokens": 100663296}
{"current_steps": 49, "total_steps": 103, "loss": 0.9034, "lr": 2.6904459419272955e-05, "epoch": 0.4745762711864407, "percentage": 47.57, "elapsed_time": "2:20:31", "remaining_time": "2:34:52", "throughput": 12187.37, "total_tokens": 102760448}
{"current_steps": 50, "total_steps": 103, "loss": 0.9044, "lr": 2.6143384749519866e-05, "epoch": 0.48426150121065376, "percentage": 48.54, "elapsed_time": "2:23:23", "remaining_time": "2:31:59", "throughput": 12187.76, "total_tokens": 104857600}
{"current_steps": 51, "total_steps": 103, "loss": 0.9173, "lr": 2.538124646582315e-05, "epoch": 0.4939467312348668, "percentage": 49.51, "elapsed_time": "2:26:14", "remaining_time": "2:29:06", "throughput": 12188.76, "total_tokens": 106954752}
{"current_steps": 52, "total_steps": 103, "loss": 0.9349, "lr": 2.4618753534176856e-05, "epoch": 0.5036319612590799, "percentage": 50.49, "elapsed_time": "2:29:05", "remaining_time": "2:26:13", "throughput": 12190.37, "total_tokens": 109051904}
{"current_steps": 53, "total_steps": 103, "loss": 0.8996, "lr": 2.385661525048014e-05, "epoch": 0.513317191283293, "percentage": 51.46, "elapsed_time": "2:31:57", "remaining_time": "2:23:21", "throughput": 12191.0, "total_tokens": 111149056}
{"current_steps": 54, "total_steps": 103, "loss": 0.9018, "lr": 2.3095540580727055e-05, "epoch": 0.5230024213075061, "percentage": 52.43, "elapsed_time": "2:34:48", "remaining_time": "2:20:28", "throughput": 12192.15, "total_tokens": 113246208}
{"current_steps": 55, "total_steps": 103, "loss": 0.8797, "lr": 2.23362375015031e-05, "epoch": 0.5326876513317191, "percentage": 53.4, "elapsed_time": "2:37:40", "remaining_time": "2:17:36", "throughput": 12192.73, "total_tokens": 115343360}
{"current_steps": 56, "total_steps": 103, "loss": 0.9075, "lr": 2.157941234140225e-05, "epoch": 0.5423728813559322, "percentage": 54.37, "elapsed_time": "2:40:31", "remaining_time": "2:14:43", "throughput": 12193.09, "total_tokens": 117440512}
{"current_steps": 57, "total_steps": 103, "loss": 0.9068, "lr": 2.082576912397695e-05, "epoch": 0.5520581113801453, "percentage": 55.34, "elapsed_time": "2:43:22", "remaining_time": "2:11:51", "throughput": 12194.01, "total_tokens": 119537664}
{"current_steps": 58, "total_steps": 103, "loss": 0.9154, "lr": 2.0076008912832355e-05, "epoch": 0.5617433414043583, "percentage": 56.31, "elapsed_time": "2:46:14", "remaining_time": "2:08:59", "throughput": 12194.06, "total_tokens": 121634816}
{"current_steps": 59, "total_steps": 103, "loss": 0.8895, "lr": 1.933082915947398e-05, "epoch": 0.5714285714285714, "percentage": 57.28, "elapsed_time": "2:49:06", "remaining_time": "2:06:06", "throughput": 12194.36, "total_tokens": 123731968}
{"current_steps": 60, "total_steps": 103, "loss": 0.891, "lr": 1.8590923054515503e-05, "epoch": 0.5811138014527845, "percentage": 58.25, "elapsed_time": "2:51:58", "remaining_time": "2:03:14", "throughput": 12194.93, "total_tokens": 125829120}
{"current_steps": 61, "total_steps": 103, "loss": 0.8928, "lr": 1.7856978882850113e-05, "epoch": 0.5907990314769975, "percentage": 59.22, "elapsed_time": "2:54:49", "remaining_time": "2:00:22", "throughput": 12195.08, "total_tokens": 127926272}
{"current_steps": 62, "total_steps": 103, "loss": 0.8918, "lr": 1.7129679383385383e-05, "epoch": 0.6004842615012107, "percentage": 60.19, "elapsed_time": "2:57:41", "remaining_time": "1:57:30", "throughput": 12195.46, "total_tokens": 130023424}
{"current_steps": 63, "total_steps": 103, "loss": 0.8807, "lr": 1.6409701113937183e-05, "epoch": 0.6101694915254238, "percentage": 61.17, "elapsed_time": "3:00:33", "remaining_time": "1:54:38", "throughput": 12195.62, "total_tokens": 132120576}
{"current_steps": 64, "total_steps": 103, "loss": 0.8934, "lr": 1.5697713821873398e-05, "epoch": 0.6198547215496368, "percentage": 62.14, "elapsed_time": "3:03:24", "remaining_time": "1:51:46", "throughput": 12196.25, "total_tokens": 134217728}
{"current_steps": 65, "total_steps": 103, "loss": 0.8809, "lr": 1.4994379821093049e-05, "epoch": 0.6295399515738499, "percentage": 63.11, "elapsed_time": "3:06:16", "remaining_time": "1:48:53", "throughput": 12196.8, "total_tokens": 136314880}
{"current_steps": 66, "total_steps": 103, "loss": 0.8856, "lr": 1.430035337592018e-05, "epoch": 0.639225181598063, "percentage": 64.08, "elapsed_time": "3:09:07", "remaining_time": "1:46:01", "throughput": 12197.24, "total_tokens": 138412032}
{"current_steps": 67, "total_steps": 103, "loss": 0.9117, "lr": 1.3616280092485717e-05, "epoch": 0.648910411622276, "percentage": 65.05, "elapsed_time": "3:11:59", "remaining_time": "1:43:09", "throughput": 12197.76, "total_tokens": 140509184}
{"current_steps": 68, "total_steps": 103, "loss": 0.8993, "lr": 1.2942796318163595e-05, "epoch": 0.6585956416464891, "percentage": 66.02, "elapsed_time": "3:14:50", "remaining_time": "1:40:17", "throughput": 12198.14, "total_tokens": 142606336}
{"current_steps": 69, "total_steps": 103, "loss": 0.9215, "lr": 1.2280528549619485e-05, "epoch": 0.6682808716707022, "percentage": 66.99, "elapsed_time": "3:17:42", "remaining_time": "1:37:25", "throughput": 12198.43, "total_tokens": 144703488}
{"current_steps": 70, "total_steps": 103, "loss": 0.8916, "lr": 1.1630092850023147e-05, "epoch": 0.6779661016949152, "percentage": 67.96, "elapsed_time": "3:20:33", "remaining_time": "1:34:33", "throughput": 12199.0, "total_tokens": 146800640}
{"current_steps": 71, "total_steps": 103, "loss": 0.8913, "lr": 1.0992094275966256e-05, "epoch": 0.6876513317191283, "percentage": 68.93, "elapsed_time": "3:23:24", "remaining_time": "1:31:40", "throughput": 12199.79, "total_tokens": 148897792}
{"current_steps": 72, "total_steps": 103, "loss": 0.8973, "lr": 1.0367126314618947e-05, "epoch": 0.6973365617433414, "percentage": 69.9, "elapsed_time": "3:26:16", "remaining_time": "1:28:48", "throughput": 12200.41, "total_tokens": 150994944}
{"current_steps": 73, "total_steps": 103, "loss": 0.9136, "lr": 9.755770331648642e-06, "epoch": 0.7070217917675545, "percentage": 70.87, "elapsed_time": "3:29:07", "remaining_time": "1:25:56", "throughput": 12201.41, "total_tokens": 153092096}
{"current_steps": 74, "total_steps": 103, "loss": 0.9217, "lr": 9.15859503041462e-06, "epoch": 0.7167070217917676, "percentage": 71.84, "elapsed_time": "3:31:58", "remaining_time": "1:23:04", "throughput": 12201.98, "total_tokens": 155189248}
{"current_steps": 75, "total_steps": 103, "loss": 0.8926, "lr": 8.576155922941548e-06, "epoch": 0.7263922518159807, "percentage": 72.82, "elapsed_time": "3:34:50", "remaining_time": "1:20:12", "throughput": 12202.13, "total_tokens": 157286400}
{"current_steps": 76, "total_steps": 103, "loss": 0.8828, "lr": 8.008994813163995e-06, "epoch": 0.7360774818401937, "percentage": 73.79, "elapsed_time": "3:37:41", "remaining_time": "1:17:20", "throughput": 12202.67, "total_tokens": 159383552}
{"current_steps": 77, "total_steps": 103, "loss": 0.8944, "lr": 7.457639292922675e-06, "epoch": 0.7457627118644068, "percentage": 74.76, "elapsed_time": "3:40:33", "remaining_time": "1:14:28", "throughput": 12202.71, "total_tokens": 161480704}
{"current_steps": 78, "total_steps": 103, "loss": 0.9085, "lr": 6.92260225118122e-06, "epoch": 0.7554479418886199, "percentage": 75.73, "elapsed_time": "3:43:24", "remaining_time": "1:11:36", "throughput": 12203.07, "total_tokens": 163577856}
{"current_steps": 79, "total_steps": 103, "loss": 0.8926, "lr": 6.4043813969201004e-06, "epoch": 0.7651331719128329, "percentage": 76.7, "elapsed_time": "3:46:16", "remaining_time": "1:08:44", "throughput": 12202.71, "total_tokens": 165675008}
{"current_steps": 80, "total_steps": 103, "loss": 0.9022, "lr": 5.903458796151381e-06, "epoch": 0.774818401937046, "percentage": 77.67, "elapsed_time": "3:49:07", "remaining_time": "1:05:52", "throughput": 12203.4, "total_tokens": 167772160}
{"current_steps": 81, "total_steps": 103, "loss": 0.9081, "lr": 5.420300423485167e-06, "epoch": 0.784503631961259, "percentage": 78.64, "elapsed_time": "3:51:59", "remaining_time": "1:03:00", "throughput": 12203.86, "total_tokens": 169869312}
{"current_steps": 82, "total_steps": 103, "loss": 0.8964, "lr": 4.95535572866474e-06, "epoch": 0.7941888619854721, "percentage": 79.61, "elapsed_time": "3:54:51", "remaining_time": "1:00:08", "throughput": 12203.74, "total_tokens": 171966464}
{"current_steps": 83, "total_steps": 103, "loss": 0.8872, "lr": 4.5090572184736864e-06, "epoch": 0.8038740920096852, "percentage": 80.58, "elapsed_time": "3:57:42", "remaining_time": "0:57:16", "throughput": 12204.1, "total_tokens": 174063616}
{"current_steps": 84, "total_steps": 103, "loss": 0.9052, "lr": 4.081820054403948e-06, "epoch": 0.8135593220338984, "percentage": 81.55, "elapsed_time": "4:00:34", "remaining_time": "0:54:24", "throughput": 12204.57, "total_tokens": 176160768}
{"current_steps": 85, "total_steps": 103, "loss": 0.8887, "lr": 3.674041666458963e-06, "epoch": 0.8232445520581114, "percentage": 82.52, "elapsed_time": "4:03:25", "remaining_time": "0:51:32", "throughput": 12204.65, "total_tokens": 178257920}
{"current_steps": 86, "total_steps": 103, "loss": 0.9228, "lr": 3.2861013834512846e-06, "epoch": 0.8329297820823245, "percentage": 83.5, "elapsed_time": "4:06:17", "remaining_time": "0:48:41", "throughput": 12205.11, "total_tokens": 180355072}
{"current_steps": 87, "total_steps": 103, "loss": 0.907, "lr": 2.918360080138485e-06, "epoch": 0.8426150121065376, "percentage": 84.47, "elapsed_time": "4:09:08", "remaining_time": "0:45:49", "throughput": 12205.63, "total_tokens": 182452224}
{"current_steps": 88, "total_steps": 103, "loss": 0.8912, "lr": 2.57115984152565e-06, "epoch": 0.8523002421307506, "percentage": 85.44, "elapsed_time": "4:11:59", "remaining_time": "0:42:57", "throughput": 12206.01, "total_tokens": 184549376}
{"current_steps": 89, "total_steps": 103, "loss": 0.91, "lr": 2.2448236446466846e-06, "epoch": 0.8619854721549637, "percentage": 86.41, "elapsed_time": "4:14:51", "remaining_time": "0:40:05", "throughput": 12206.13, "total_tokens": 186646528}
{"current_steps": 90, "total_steps": 103, "loss": 0.8817, "lr": 1.939655058120521e-06, "epoch": 0.8716707021791767, "percentage": 87.38, "elapsed_time": "4:17:42", "remaining_time": "0:37:13", "throughput": 12206.42, "total_tokens": 188743680}
{"current_steps": 91, "total_steps": 103, "loss": 0.8795, "lr": 1.6559379597616137e-06, "epoch": 0.8813559322033898, "percentage": 88.35, "elapsed_time": "4:20:34", "remaining_time": "0:34:21", "throughput": 12206.66, "total_tokens": 190840832}
{"current_steps": 92, "total_steps": 103, "loss": 0.8945, "lr": 1.3939362725075345e-06, "epoch": 0.8910411622276029, "percentage": 89.32, "elapsed_time": "4:23:26", "remaining_time": "0:31:29", "throughput": 12206.32, "total_tokens": 192937984}
{"current_steps": 93, "total_steps": 103, "loss": 0.8941, "lr": 1.1538937189091825e-06, "epoch": 0.9007263922518159, "percentage": 90.29, "elapsed_time": "4:26:18", "remaining_time": "0:28:38", "throughput": 12206.23, "total_tokens": 195035136}
{"current_steps": 94, "total_steps": 103, "loss": 0.8771, "lr": 9.360335944121029e-07, "epoch": 0.910411622276029, "percentage": 91.26, "elapsed_time": "4:29:10", "remaining_time": "0:25:46", "throughput": 12206.25, "total_tokens": 197132288}
{"current_steps": 95, "total_steps": 103, "loss": 0.8779, "lr": 7.405585596397313e-07, "epoch": 0.9200968523002422, "percentage": 92.23, "elapsed_time": "4:32:01", "remaining_time": "0:22:54", "throughput": 12206.55, "total_tokens": 199229440}
{"current_steps": 96, "total_steps": 103, "loss": 0.8978, "lr": 5.676504518718761e-07, "epoch": 0.9297820823244553, "percentage": 93.2, "elapsed_time": "4:34:52", "remaining_time": "0:20:02", "throughput": 12207.07, "total_tokens": 201326592}
{"current_steps": 97, "total_steps": 103, "loss": 0.8858, "lr": 4.1747011589368947e-07, "epoch": 0.9394673123486683, "percentage": 94.17, "elapsed_time": "4:37:43", "remaining_time": "0:17:10", "throughput": 12207.47, "total_tokens": 203423744}
{"current_steps": 98, "total_steps": 103, "loss": 0.8983, "lr": 2.9015725437259724e-07, "epoch": 0.9491525423728814, "percentage": 95.15, "elapsed_time": "4:40:35", "remaining_time": "0:14:18", "throughput": 12207.74, "total_tokens": 205520896}
{"current_steps": 99, "total_steps": 103, "loss": 0.903, "lr": 1.8583029790230355e-07, "epoch": 0.9588377723970944, "percentage": 96.12, "elapsed_time": "4:43:26", "remaining_time": "0:11:27", "throughput": 12208.21, "total_tokens": 207618048}
{"current_steps": 100, "total_steps": 103, "loss": 0.866, "lr": 1.0458629483476867e-07, "epoch": 0.9685230024213075, "percentage": 97.09, "elapsed_time": "4:46:18", "remaining_time": "0:08:35", "throughput": 12208.23, "total_tokens": 209715200}
{"current_steps": 101, "total_steps": 103, "loss": 0.9024, "lr": 4.650082100265407e-08, "epoch": 0.9782082324455206, "percentage": 98.06, "elapsed_time": "4:49:09", "remaining_time": "0:05:43", "throughput": 12208.43, "total_tokens": 211812352}
{"current_steps": 102, "total_steps": 103, "loss": 0.8841, "lr": 1.1627909416211947e-08, "epoch": 0.9878934624697336, "percentage": 99.03, "elapsed_time": "4:52:01", "remaining_time": "0:02:51", "throughput": 12208.72, "total_tokens": 213909504}
{"current_steps": 103, "total_steps": 103, "loss": 0.8893, "lr": 0.0, "epoch": 0.9975786924939467, "percentage": 100.0, "elapsed_time": "4:54:52", "remaining_time": "0:00:00", "throughput": 12208.75, "total_tokens": 216006656}
{"current_steps": 103, "total_steps": 103, "epoch": 0.9975786924939467, "percentage": 100.0, "elapsed_time": "4:55:19", "remaining_time": "0:00:00", "throughput": 12190.49, "total_tokens": 216006656}