{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.946524064171124, "eval_steps": 10, "global_step": 930, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 9.892473118279571e-05, "loss": 1.5206, "step": 10 }, { "epoch": 0.11, "eval_accuracy": 0.292553186416626, "eval_loss": 2.0217974185943604, "eval_runtime": 19.3615, "eval_samples_per_second": 9.71, "eval_steps_per_second": 2.428, "step": 10 }, { "epoch": 0.21, "learning_rate": 9.78494623655914e-05, "loss": 1.2661, "step": 20 }, { "epoch": 0.21, "eval_accuracy": 0.3404255211353302, "eval_loss": 1.633784532546997, "eval_runtime": 22.8427, "eval_samples_per_second": 8.23, "eval_steps_per_second": 2.058, "step": 20 }, { "epoch": 0.32, "learning_rate": 9.677419354838711e-05, "loss": 1.2961, "step": 30 }, { "epoch": 0.32, "eval_accuracy": 0.4680851101875305, "eval_loss": 1.4141530990600586, "eval_runtime": 26.4217, "eval_samples_per_second": 7.115, "eval_steps_per_second": 1.779, "step": 30 }, { "epoch": 0.43, "learning_rate": 9.56989247311828e-05, "loss": 1.1222, "step": 40 }, { "epoch": 0.43, "eval_accuracy": 0.5159574747085571, "eval_loss": 1.227504014968872, "eval_runtime": 20.4617, "eval_samples_per_second": 9.188, "eval_steps_per_second": 2.297, "step": 40 }, { "epoch": 0.53, "learning_rate": 9.46236559139785e-05, "loss": 0.9449, "step": 50 }, { "epoch": 0.53, "eval_accuracy": 0.5531914830207825, "eval_loss": 1.2353336811065674, "eval_runtime": 19.3865, "eval_samples_per_second": 9.697, "eval_steps_per_second": 2.424, "step": 50 }, { "epoch": 0.64, "learning_rate": 9.35483870967742e-05, "loss": 0.8884, "step": 60 }, { "epoch": 0.64, "eval_accuracy": 0.5531914830207825, "eval_loss": 1.2557822465896606, "eval_runtime": 19.3788, "eval_samples_per_second": 9.701, "eval_steps_per_second": 2.425, "step": 60 }, { "epoch": 0.75, "learning_rate": 9.247311827956989e-05, "loss": 1.1132, "step": 70 }, { "epoch": 0.75, "eval_accuracy": 0.6010638475418091, "eval_loss": 1.172572374343872, "eval_runtime": 19.3951, "eval_samples_per_second": 9.693, "eval_steps_per_second": 2.423, "step": 70 }, { "epoch": 0.86, "learning_rate": 9.13978494623656e-05, "loss": 0.9073, "step": 80 }, { "epoch": 0.86, "eval_accuracy": 0.5744680762290955, "eval_loss": 1.2864995002746582, "eval_runtime": 20.2133, "eval_samples_per_second": 9.301, "eval_steps_per_second": 2.325, "step": 80 }, { "epoch": 0.96, "learning_rate": 9.032258064516129e-05, "loss": 0.8923, "step": 90 }, { "epoch": 0.96, "eval_accuracy": 0.6808510422706604, "eval_loss": 0.8788278698921204, "eval_runtime": 19.3337, "eval_samples_per_second": 9.724, "eval_steps_per_second": 2.431, "step": 90 }, { "epoch": 1.07, "learning_rate": 8.924731182795699e-05, "loss": 0.8702, "step": 100 }, { "epoch": 1.07, "eval_accuracy": 0.6808510422706604, "eval_loss": 0.9209534525871277, "eval_runtime": 19.7461, "eval_samples_per_second": 9.521, "eval_steps_per_second": 2.38, "step": 100 }, { "epoch": 1.18, "learning_rate": 8.81720430107527e-05, "loss": 0.5832, "step": 110 }, { "epoch": 1.18, "eval_accuracy": 0.6010638475418091, "eval_loss": 0.9852186441421509, "eval_runtime": 20.6843, "eval_samples_per_second": 9.089, "eval_steps_per_second": 2.272, "step": 110 }, { "epoch": 1.28, "learning_rate": 8.709677419354839e-05, "loss": 0.776, "step": 120 }, { "epoch": 1.28, "eval_accuracy": 0.664893627166748, "eval_loss": 1.0170938968658447, "eval_runtime": 19.2567, "eval_samples_per_second": 9.763, "eval_steps_per_second": 2.441, "step": 120 }, { "epoch": 1.39, "learning_rate": 8.60215053763441e-05, "loss": 0.5927, "step": 130 }, { "epoch": 1.39, "eval_accuracy": 0.6382978558540344, "eval_loss": 1.101302146911621, "eval_runtime": 20.3489, "eval_samples_per_second": 9.239, "eval_steps_per_second": 2.31, "step": 130 }, { "epoch": 1.5, "learning_rate": 8.494623655913979e-05, "loss": 0.54, "step": 140 }, { "epoch": 1.5, "eval_accuracy": 0.6808510422706604, "eval_loss": 0.9849129319190979, "eval_runtime": 19.1842, "eval_samples_per_second": 9.8, "eval_steps_per_second": 2.45, "step": 140 }, { "epoch": 1.6, "learning_rate": 8.387096774193549e-05, "loss": 0.6341, "step": 150 }, { "epoch": 1.6, "eval_accuracy": 0.6968085169792175, "eval_loss": 0.8642140626907349, "eval_runtime": 19.8877, "eval_samples_per_second": 9.453, "eval_steps_per_second": 2.363, "step": 150 }, { "epoch": 1.71, "learning_rate": 8.27956989247312e-05, "loss": 0.8108, "step": 160 }, { "epoch": 1.71, "eval_accuracy": 0.7340425252914429, "eval_loss": 0.7330851554870605, "eval_runtime": 20.2573, "eval_samples_per_second": 9.281, "eval_steps_per_second": 2.32, "step": 160 }, { "epoch": 1.82, "learning_rate": 8.172043010752689e-05, "loss": 0.5861, "step": 170 }, { "epoch": 1.82, "eval_accuracy": 0.7127659320831299, "eval_loss": 0.766554594039917, "eval_runtime": 19.2481, "eval_samples_per_second": 9.767, "eval_steps_per_second": 2.442, "step": 170 }, { "epoch": 1.93, "learning_rate": 8.064516129032258e-05, "loss": 0.4829, "step": 180 }, { "epoch": 1.93, "eval_accuracy": 0.7180851101875305, "eval_loss": 0.7813696265220642, "eval_runtime": 20.2831, "eval_samples_per_second": 9.269, "eval_steps_per_second": 2.317, "step": 180 }, { "epoch": 2.03, "learning_rate": 7.956989247311829e-05, "loss": 0.6267, "step": 190 }, { "epoch": 2.03, "eval_accuracy": 0.7234042286872864, "eval_loss": 0.7261902093887329, "eval_runtime": 19.8614, "eval_samples_per_second": 9.466, "eval_steps_per_second": 2.366, "step": 190 }, { "epoch": 2.14, "learning_rate": 7.849462365591398e-05, "loss": 0.4318, "step": 200 }, { "epoch": 2.14, "eval_accuracy": 0.7659574747085571, "eval_loss": 0.732907235622406, "eval_runtime": 20.4384, "eval_samples_per_second": 9.198, "eval_steps_per_second": 2.3, "step": 200 }, { "epoch": 2.25, "learning_rate": 7.741935483870968e-05, "loss": 0.3761, "step": 210 }, { "epoch": 2.25, "eval_accuracy": 0.7021276354789734, "eval_loss": 1.0907269716262817, "eval_runtime": 19.6052, "eval_samples_per_second": 9.589, "eval_steps_per_second": 2.397, "step": 210 }, { "epoch": 2.35, "learning_rate": 7.634408602150538e-05, "loss": 0.324, "step": 220 }, { "epoch": 2.35, "eval_accuracy": 0.7872340679168701, "eval_loss": 0.7849129438400269, "eval_runtime": 19.7685, "eval_samples_per_second": 9.51, "eval_steps_per_second": 2.378, "step": 220 }, { "epoch": 2.46, "learning_rate": 7.526881720430108e-05, "loss": 0.3168, "step": 230 }, { "epoch": 2.46, "eval_accuracy": 0.7553191781044006, "eval_loss": 0.7544443607330322, "eval_runtime": 19.9734, "eval_samples_per_second": 9.413, "eval_steps_per_second": 2.353, "step": 230 }, { "epoch": 2.57, "learning_rate": 7.419354838709677e-05, "loss": 0.31, "step": 240 }, { "epoch": 2.57, "eval_accuracy": 0.6808510422706604, "eval_loss": 1.3076503276824951, "eval_runtime": 19.5097, "eval_samples_per_second": 9.636, "eval_steps_per_second": 2.409, "step": 240 }, { "epoch": 2.67, "learning_rate": 7.311827956989248e-05, "loss": 0.2488, "step": 250 }, { "epoch": 2.67, "eval_accuracy": 0.7393617033958435, "eval_loss": 1.1082981824874878, "eval_runtime": 19.6472, "eval_samples_per_second": 9.569, "eval_steps_per_second": 2.392, "step": 250 }, { "epoch": 2.78, "learning_rate": 7.204301075268818e-05, "loss": 0.2774, "step": 260 }, { "epoch": 2.78, "eval_accuracy": 0.7393617033958435, "eval_loss": 1.0004332065582275, "eval_runtime": 19.5383, "eval_samples_per_second": 9.622, "eval_steps_per_second": 2.406, "step": 260 }, { "epoch": 2.89, "learning_rate": 7.096774193548388e-05, "loss": 0.3068, "step": 270 }, { "epoch": 2.89, "eval_accuracy": 0.7393617033958435, "eval_loss": 1.0832968950271606, "eval_runtime": 22.726, "eval_samples_per_second": 8.272, "eval_steps_per_second": 2.068, "step": 270 }, { "epoch": 2.99, "learning_rate": 6.989247311827958e-05, "loss": 0.4347, "step": 280 }, { "epoch": 2.99, "eval_accuracy": 0.6808510422706604, "eval_loss": 1.3584102392196655, "eval_runtime": 19.4201, "eval_samples_per_second": 9.681, "eval_steps_per_second": 2.42, "step": 280 }, { "epoch": 3.1, "learning_rate": 6.881720430107527e-05, "loss": 0.2742, "step": 290 }, { "epoch": 3.1, "eval_accuracy": 0.7765957713127136, "eval_loss": 0.9648256301879883, "eval_runtime": 20.0148, "eval_samples_per_second": 9.393, "eval_steps_per_second": 2.348, "step": 290 }, { "epoch": 3.21, "learning_rate": 6.774193548387096e-05, "loss": 0.3018, "step": 300 }, { "epoch": 3.21, "eval_accuracy": 0.7446808218955994, "eval_loss": 0.9408243298530579, "eval_runtime": 20.3383, "eval_samples_per_second": 9.244, "eval_steps_per_second": 2.311, "step": 300 }, { "epoch": 3.32, "learning_rate": 6.666666666666667e-05, "loss": 0.4159, "step": 310 }, { "epoch": 3.32, "eval_accuracy": 0.7127659320831299, "eval_loss": 1.3147622346878052, "eval_runtime": 28.2012, "eval_samples_per_second": 6.666, "eval_steps_per_second": 1.667, "step": 310 }, { "epoch": 3.42, "learning_rate": 6.559139784946236e-05, "loss": 0.35, "step": 320 }, { "epoch": 3.42, "eval_accuracy": 0.7659574747085571, "eval_loss": 0.9583517909049988, "eval_runtime": 21.7271, "eval_samples_per_second": 8.653, "eval_steps_per_second": 2.163, "step": 320 }, { "epoch": 3.53, "learning_rate": 6.451612903225807e-05, "loss": 0.1448, "step": 330 }, { "epoch": 3.53, "eval_accuracy": 0.7819148898124695, "eval_loss": 0.7321730256080627, "eval_runtime": 21.3525, "eval_samples_per_second": 8.805, "eval_steps_per_second": 2.201, "step": 330 }, { "epoch": 3.64, "learning_rate": 6.344086021505376e-05, "loss": 0.1969, "step": 340 }, { "epoch": 3.64, "eval_accuracy": 0.8297872543334961, "eval_loss": 0.7202281355857849, "eval_runtime": 19.4523, "eval_samples_per_second": 9.665, "eval_steps_per_second": 2.416, "step": 340 }, { "epoch": 3.74, "learning_rate": 6.236559139784946e-05, "loss": 0.4023, "step": 350 }, { "epoch": 3.74, "eval_accuracy": 0.7340425252914429, "eval_loss": 1.210102915763855, "eval_runtime": 19.9948, "eval_samples_per_second": 9.402, "eval_steps_per_second": 2.351, "step": 350 }, { "epoch": 3.85, "learning_rate": 6.129032258064517e-05, "loss": 0.301, "step": 360 }, { "epoch": 3.85, "eval_accuracy": 0.8510638475418091, "eval_loss": 0.4878663122653961, "eval_runtime": 21.8287, "eval_samples_per_second": 8.613, "eval_steps_per_second": 2.153, "step": 360 }, { "epoch": 3.96, "learning_rate": 6.021505376344086e-05, "loss": 0.1968, "step": 370 }, { "epoch": 3.96, "eval_accuracy": 0.8191489577293396, "eval_loss": 0.721879243850708, "eval_runtime": 19.355, "eval_samples_per_second": 9.713, "eval_steps_per_second": 2.428, "step": 370 }, { "epoch": 4.06, "learning_rate": 5.913978494623657e-05, "loss": 0.1026, "step": 380 }, { "epoch": 4.06, "eval_accuracy": 0.7234042286872864, "eval_loss": 1.1493706703186035, "eval_runtime": 19.4745, "eval_samples_per_second": 9.654, "eval_steps_per_second": 2.413, "step": 380 }, { "epoch": 4.17, "learning_rate": 5.8064516129032266e-05, "loss": 0.2143, "step": 390 }, { "epoch": 4.17, "eval_accuracy": 0.7446808218955994, "eval_loss": 1.1254618167877197, "eval_runtime": 19.5314, "eval_samples_per_second": 9.626, "eval_steps_per_second": 2.406, "step": 390 }, { "epoch": 4.28, "learning_rate": 5.6989247311827965e-05, "loss": 0.2157, "step": 400 }, { "epoch": 4.28, "eval_accuracy": 0.7978723645210266, "eval_loss": 0.867353618144989, "eval_runtime": 19.5612, "eval_samples_per_second": 9.611, "eval_steps_per_second": 2.403, "step": 400 }, { "epoch": 4.39, "learning_rate": 5.5913978494623656e-05, "loss": 0.1429, "step": 410 }, { "epoch": 4.39, "eval_accuracy": 0.8031914830207825, "eval_loss": 0.8620893359184265, "eval_runtime": 19.8011, "eval_samples_per_second": 9.494, "eval_steps_per_second": 2.374, "step": 410 }, { "epoch": 4.49, "learning_rate": 5.4838709677419355e-05, "loss": 0.2338, "step": 420 }, { "epoch": 4.49, "eval_accuracy": 0.771276593208313, "eval_loss": 1.0524601936340332, "eval_runtime": 20.0193, "eval_samples_per_second": 9.391, "eval_steps_per_second": 2.348, "step": 420 }, { "epoch": 4.6, "learning_rate": 5.3763440860215054e-05, "loss": 0.079, "step": 430 }, { "epoch": 4.6, "eval_accuracy": 0.7978723645210266, "eval_loss": 0.9354122877120972, "eval_runtime": 19.937, "eval_samples_per_second": 9.43, "eval_steps_per_second": 2.357, "step": 430 }, { "epoch": 4.71, "learning_rate": 5.268817204301075e-05, "loss": 0.0652, "step": 440 }, { "epoch": 4.71, "eval_accuracy": 0.8297872543334961, "eval_loss": 0.8370540142059326, "eval_runtime": 20.2037, "eval_samples_per_second": 9.305, "eval_steps_per_second": 2.326, "step": 440 }, { "epoch": 4.81, "learning_rate": 5.161290322580645e-05, "loss": 0.0464, "step": 450 }, { "epoch": 4.81, "eval_accuracy": 0.7978723645210266, "eval_loss": 0.9808104634284973, "eval_runtime": 21.6193, "eval_samples_per_second": 8.696, "eval_steps_per_second": 2.174, "step": 450 }, { "epoch": 4.92, "learning_rate": 5.053763440860215e-05, "loss": 0.2738, "step": 460 }, { "epoch": 4.92, "eval_accuracy": 0.8510638475418091, "eval_loss": 0.7124775052070618, "eval_runtime": 20.7786, "eval_samples_per_second": 9.048, "eval_steps_per_second": 2.262, "step": 460 }, { "epoch": 5.03, "learning_rate": 4.9462365591397855e-05, "loss": 0.0234, "step": 470 }, { "epoch": 5.03, "eval_accuracy": 0.813829779624939, "eval_loss": 0.799345850944519, "eval_runtime": 22.0458, "eval_samples_per_second": 8.528, "eval_steps_per_second": 2.132, "step": 470 }, { "epoch": 5.13, "learning_rate": 4.8387096774193554e-05, "loss": 0.1504, "step": 480 }, { "epoch": 5.13, "eval_accuracy": 0.7978723645210266, "eval_loss": 0.9516879916191101, "eval_runtime": 20.0905, "eval_samples_per_second": 9.358, "eval_steps_per_second": 2.339, "step": 480 }, { "epoch": 5.24, "learning_rate": 4.731182795698925e-05, "loss": 0.0289, "step": 490 }, { "epoch": 5.24, "eval_accuracy": 0.8404255509376526, "eval_loss": 0.7372261881828308, "eval_runtime": 19.4546, "eval_samples_per_second": 9.664, "eval_steps_per_second": 2.416, "step": 490 }, { "epoch": 5.35, "learning_rate": 4.6236559139784944e-05, "loss": 0.2253, "step": 500 }, { "epoch": 5.35, "eval_accuracy": 0.8936170339584351, "eval_loss": 0.47867247462272644, "eval_runtime": 19.7513, "eval_samples_per_second": 9.518, "eval_steps_per_second": 2.38, "step": 500 }, { "epoch": 5.45, "learning_rate": 4.516129032258064e-05, "loss": 0.0487, "step": 510 }, { "epoch": 5.45, "eval_accuracy": 0.8457446694374084, "eval_loss": 0.6794525384902954, "eval_runtime": 20.9065, "eval_samples_per_second": 8.992, "eval_steps_per_second": 2.248, "step": 510 }, { "epoch": 5.56, "learning_rate": 4.408602150537635e-05, "loss": 0.0266, "step": 520 }, { "epoch": 5.56, "eval_accuracy": 0.835106372833252, "eval_loss": 0.8275220990180969, "eval_runtime": 20.2058, "eval_samples_per_second": 9.304, "eval_steps_per_second": 2.326, "step": 520 }, { "epoch": 5.67, "learning_rate": 4.301075268817205e-05, "loss": 0.07, "step": 530 }, { "epoch": 5.67, "eval_accuracy": 0.7978723645210266, "eval_loss": 1.0128568410873413, "eval_runtime": 20.839, "eval_samples_per_second": 9.022, "eval_steps_per_second": 2.255, "step": 530 }, { "epoch": 5.78, "learning_rate": 4.1935483870967746e-05, "loss": 0.0439, "step": 540 }, { "epoch": 5.78, "eval_accuracy": 0.835106372833252, "eval_loss": 0.8743005394935608, "eval_runtime": 20.318, "eval_samples_per_second": 9.253, "eval_steps_per_second": 2.313, "step": 540 }, { "epoch": 5.88, "learning_rate": 4.0860215053763444e-05, "loss": 0.0609, "step": 550 }, { "epoch": 5.88, "eval_accuracy": 0.8457446694374084, "eval_loss": 0.7512069344520569, "eval_runtime": 19.5322, "eval_samples_per_second": 9.625, "eval_steps_per_second": 2.406, "step": 550 }, { "epoch": 5.99, "learning_rate": 3.978494623655914e-05, "loss": 0.0902, "step": 560 }, { "epoch": 5.99, "eval_accuracy": 0.8510638475418091, "eval_loss": 0.702180802822113, "eval_runtime": 19.3513, "eval_samples_per_second": 9.715, "eval_steps_per_second": 2.429, "step": 560 }, { "epoch": 6.1, "learning_rate": 3.870967741935484e-05, "loss": 0.0364, "step": 570 }, { "epoch": 6.1, "eval_accuracy": 0.7819148898124695, "eval_loss": 1.0619040727615356, "eval_runtime": 21.233, "eval_samples_per_second": 8.854, "eval_steps_per_second": 2.214, "step": 570 }, { "epoch": 6.2, "learning_rate": 3.763440860215054e-05, "loss": 0.033, "step": 580 }, { "epoch": 6.2, "eval_accuracy": 0.7765957713127136, "eval_loss": 1.243336796760559, "eval_runtime": 19.5002, "eval_samples_per_second": 9.641, "eval_steps_per_second": 2.41, "step": 580 }, { "epoch": 6.31, "learning_rate": 3.6666666666666666e-05, "loss": 0.1456, "step": 590 }, { "epoch": 6.31, "eval_accuracy": 0.8244680762290955, "eval_loss": 0.8834251165390015, "eval_runtime": 19.3312, "eval_samples_per_second": 9.725, "eval_steps_per_second": 2.431, "step": 590 }, { "epoch": 6.42, "learning_rate": 3.5591397849462364e-05, "loss": 0.0049, "step": 600 }, { "epoch": 6.42, "eval_accuracy": 0.8510638475418091, "eval_loss": 0.8070632219314575, "eval_runtime": 20.1893, "eval_samples_per_second": 9.312, "eval_steps_per_second": 2.328, "step": 600 }, { "epoch": 6.52, "learning_rate": 3.451612903225806e-05, "loss": 0.1007, "step": 610 }, { "epoch": 6.52, "eval_accuracy": 0.8670212626457214, "eval_loss": 0.6686306595802307, "eval_runtime": 19.5009, "eval_samples_per_second": 9.641, "eval_steps_per_second": 2.41, "step": 610 }, { "epoch": 6.63, "learning_rate": 3.344086021505377e-05, "loss": 0.0054, "step": 620 }, { "epoch": 6.63, "eval_accuracy": 0.8776595592498779, "eval_loss": 0.6421190500259399, "eval_runtime": 21.8176, "eval_samples_per_second": 8.617, "eval_steps_per_second": 2.154, "step": 620 }, { "epoch": 6.74, "learning_rate": 3.236559139784947e-05, "loss": 0.0027, "step": 630 }, { "epoch": 6.74, "eval_accuracy": 0.8563829660415649, "eval_loss": 0.7604753375053406, "eval_runtime": 19.6912, "eval_samples_per_second": 9.547, "eval_steps_per_second": 2.387, "step": 630 }, { "epoch": 6.84, "learning_rate": 3.1290322580645166e-05, "loss": 0.1582, "step": 640 }, { "epoch": 6.84, "eval_accuracy": 0.8670212626457214, "eval_loss": 0.7657257914543152, "eval_runtime": 19.5763, "eval_samples_per_second": 9.603, "eval_steps_per_second": 2.401, "step": 640 }, { "epoch": 6.95, "learning_rate": 3.0215053763440858e-05, "loss": 0.0106, "step": 650 }, { "epoch": 6.95, "eval_accuracy": 0.835106372833252, "eval_loss": 0.8570267558097839, "eval_runtime": 20.2786, "eval_samples_per_second": 9.271, "eval_steps_per_second": 2.318, "step": 650 }, { "epoch": 7.06, "learning_rate": 2.913978494623656e-05, "loss": 0.0219, "step": 660 }, { "epoch": 7.06, "eval_accuracy": 0.8457446694374084, "eval_loss": 0.8243122696876526, "eval_runtime": 20.636, "eval_samples_per_second": 9.11, "eval_steps_per_second": 2.278, "step": 660 }, { "epoch": 7.17, "learning_rate": 2.806451612903226e-05, "loss": 0.0032, "step": 670 }, { "epoch": 7.17, "eval_accuracy": 0.8085106611251831, "eval_loss": 0.9377107620239258, "eval_runtime": 19.4326, "eval_samples_per_second": 9.674, "eval_steps_per_second": 2.419, "step": 670 }, { "epoch": 7.27, "learning_rate": 2.698924731182796e-05, "loss": 0.0037, "step": 680 }, { "epoch": 7.27, "eval_accuracy": 0.8031914830207825, "eval_loss": 1.0501412153244019, "eval_runtime": 19.4587, "eval_samples_per_second": 9.661, "eval_steps_per_second": 2.415, "step": 680 }, { "epoch": 7.38, "learning_rate": 2.591397849462366e-05, "loss": 0.0072, "step": 690 }, { "epoch": 7.38, "eval_accuracy": 0.8297872543334961, "eval_loss": 0.8667866587638855, "eval_runtime": 19.4943, "eval_samples_per_second": 9.644, "eval_steps_per_second": 2.411, "step": 690 }, { "epoch": 7.49, "learning_rate": 2.4838709677419354e-05, "loss": 0.0626, "step": 700 }, { "epoch": 7.49, "eval_accuracy": 0.8617021441459656, "eval_loss": 0.6626190543174744, "eval_runtime": 19.4854, "eval_samples_per_second": 9.648, "eval_steps_per_second": 2.412, "step": 700 }, { "epoch": 7.59, "learning_rate": 2.3763440860215056e-05, "loss": 0.0237, "step": 710 }, { "epoch": 7.59, "eval_accuracy": 0.8510638475418091, "eval_loss": 0.6875038743019104, "eval_runtime": 21.2232, "eval_samples_per_second": 8.858, "eval_steps_per_second": 2.215, "step": 710 }, { "epoch": 7.7, "learning_rate": 2.268817204301075e-05, "loss": 0.0046, "step": 720 }, { "epoch": 7.7, "eval_accuracy": 0.8297872543334961, "eval_loss": 0.8058395385742188, "eval_runtime": 21.9104, "eval_samples_per_second": 8.58, "eval_steps_per_second": 2.145, "step": 720 }, { "epoch": 7.81, "learning_rate": 2.1612903225806454e-05, "loss": 0.0258, "step": 730 }, { "epoch": 7.81, "eval_accuracy": 0.8723404407501221, "eval_loss": 0.730888307094574, "eval_runtime": 19.7808, "eval_samples_per_second": 9.504, "eval_steps_per_second": 2.376, "step": 730 }, { "epoch": 7.91, "learning_rate": 2.0537634408602152e-05, "loss": 0.0363, "step": 740 }, { "epoch": 7.91, "eval_accuracy": 0.8510638475418091, "eval_loss": 0.7736497521400452, "eval_runtime": 20.7525, "eval_samples_per_second": 9.059, "eval_steps_per_second": 2.265, "step": 740 }, { "epoch": 8.02, "learning_rate": 1.9462365591397848e-05, "loss": 0.0012, "step": 750 }, { "epoch": 8.02, "eval_accuracy": 0.8457446694374084, "eval_loss": 0.8431892991065979, "eval_runtime": 21.0633, "eval_samples_per_second": 8.925, "eval_steps_per_second": 2.231, "step": 750 }, { "epoch": 8.13, "learning_rate": 1.838709677419355e-05, "loss": 0.0112, "step": 760 }, { "epoch": 8.13, "eval_accuracy": 0.8617021441459656, "eval_loss": 0.7464305758476257, "eval_runtime": 19.2825, "eval_samples_per_second": 9.75, "eval_steps_per_second": 2.437, "step": 760 }, { "epoch": 8.24, "learning_rate": 1.7311827956989248e-05, "loss": 0.0025, "step": 770 }, { "epoch": 8.24, "eval_accuracy": 0.8829787373542786, "eval_loss": 0.6915740370750427, "eval_runtime": 19.647, "eval_samples_per_second": 9.569, "eval_steps_per_second": 2.392, "step": 770 }, { "epoch": 8.34, "learning_rate": 1.6236559139784947e-05, "loss": 0.003, "step": 780 }, { "epoch": 8.34, "eval_accuracy": 0.8882978558540344, "eval_loss": 0.6764039993286133, "eval_runtime": 21.6356, "eval_samples_per_second": 8.689, "eval_steps_per_second": 2.172, "step": 780 }, { "epoch": 8.45, "learning_rate": 1.5161290322580646e-05, "loss": 0.0817, "step": 790 }, { "epoch": 8.45, "eval_accuracy": 0.8829787373542786, "eval_loss": 0.6824085116386414, "eval_runtime": 19.3614, "eval_samples_per_second": 9.71, "eval_steps_per_second": 2.428, "step": 790 }, { "epoch": 8.56, "learning_rate": 1.4086021505376346e-05, "loss": 0.0059, "step": 800 }, { "epoch": 8.56, "eval_accuracy": 0.8882978558540344, "eval_loss": 0.6375851035118103, "eval_runtime": 19.4127, "eval_samples_per_second": 9.684, "eval_steps_per_second": 2.421, "step": 800 }, { "epoch": 8.66, "learning_rate": 1.3010752688172043e-05, "loss": 0.0018, "step": 810 }, { "epoch": 8.66, "eval_accuracy": 0.8882978558540344, "eval_loss": 0.6130115389823914, "eval_runtime": 20.8292, "eval_samples_per_second": 9.026, "eval_steps_per_second": 2.256, "step": 810 }, { "epoch": 8.77, "learning_rate": 1.1935483870967743e-05, "loss": 0.0012, "step": 820 }, { "epoch": 8.77, "eval_accuracy": 0.8936170339584351, "eval_loss": 0.6152318716049194, "eval_runtime": 19.6293, "eval_samples_per_second": 9.578, "eval_steps_per_second": 2.394, "step": 820 }, { "epoch": 8.88, "learning_rate": 1.086021505376344e-05, "loss": 0.0029, "step": 830 }, { "epoch": 8.88, "eval_accuracy": 0.8936170339584351, "eval_loss": 0.6171016097068787, "eval_runtime": 19.602, "eval_samples_per_second": 9.591, "eval_steps_per_second": 2.398, "step": 830 }, { "epoch": 8.98, "learning_rate": 9.78494623655914e-06, "loss": 0.0016, "step": 840 }, { "epoch": 8.98, "eval_accuracy": 0.8882978558540344, "eval_loss": 0.6230288147926331, "eval_runtime": 19.6382, "eval_samples_per_second": 9.573, "eval_steps_per_second": 2.393, "step": 840 }, { "epoch": 9.09, "learning_rate": 8.70967741935484e-06, "loss": 0.0016, "step": 850 }, { "epoch": 9.09, "eval_accuracy": 0.8936170339584351, "eval_loss": 0.6247658729553223, "eval_runtime": 22.0158, "eval_samples_per_second": 8.539, "eval_steps_per_second": 2.135, "step": 850 }, { "epoch": 9.2, "learning_rate": 7.634408602150538e-06, "loss": 0.0022, "step": 860 }, { "epoch": 9.2, "eval_accuracy": 0.8936170339584351, "eval_loss": 0.6223304271697998, "eval_runtime": 22.0568, "eval_samples_per_second": 8.523, "eval_steps_per_second": 2.131, "step": 860 }, { "epoch": 9.3, "learning_rate": 6.559139784946237e-06, "loss": 0.0011, "step": 870 }, { "epoch": 9.3, "eval_accuracy": 0.8936170339584351, "eval_loss": 0.6233484745025635, "eval_runtime": 20.142, "eval_samples_per_second": 9.334, "eval_steps_per_second": 2.333, "step": 870 }, { "epoch": 9.41, "learning_rate": 5.483870967741936e-06, "loss": 0.0008, "step": 880 }, { "epoch": 9.41, "eval_accuracy": 0.8936170339584351, "eval_loss": 0.6245375275611877, "eval_runtime": 20.6589, "eval_samples_per_second": 9.1, "eval_steps_per_second": 2.275, "step": 880 }, { "epoch": 9.52, "learning_rate": 4.408602150537635e-06, "loss": 0.0109, "step": 890 }, { "epoch": 9.52, "eval_accuracy": 0.8829787373542786, "eval_loss": 0.6385069489479065, "eval_runtime": 19.883, "eval_samples_per_second": 9.455, "eval_steps_per_second": 2.364, "step": 890 }, { "epoch": 9.63, "learning_rate": 3.3333333333333333e-06, "loss": 0.0011, "step": 900 }, { "epoch": 9.63, "eval_accuracy": 0.8776595592498779, "eval_loss": 0.65866619348526, "eval_runtime": 19.924, "eval_samples_per_second": 9.436, "eval_steps_per_second": 2.359, "step": 900 }, { "epoch": 9.73, "learning_rate": 2.2580645161290324e-06, "loss": 0.0013, "step": 910 }, { "epoch": 9.73, "eval_accuracy": 0.8776595592498779, "eval_loss": 0.6651791930198669, "eval_runtime": 19.7346, "eval_samples_per_second": 9.526, "eval_steps_per_second": 2.382, "step": 910 }, { "epoch": 9.84, "learning_rate": 1.1827956989247313e-06, "loss": 0.0548, "step": 920 }, { "epoch": 9.84, "eval_accuracy": 0.8776595592498779, "eval_loss": 0.6682336926460266, "eval_runtime": 19.8027, "eval_samples_per_second": 9.494, "eval_steps_per_second": 2.373, "step": 920 }, { "epoch": 9.95, "learning_rate": 1.0752688172043011e-07, "loss": 0.0367, "step": 930 }, { "epoch": 9.95, "eval_accuracy": 0.8776595592498779, "eval_loss": 0.6705843806266785, "eval_runtime": 19.6221, "eval_samples_per_second": 9.581, "eval_steps_per_second": 2.395, "step": 930 } ], "logging_steps": 10, "max_steps": 930, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 10, "total_flos": 1.1929088038243113e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }