|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.946524064171124, |
|
"eval_steps": 10, |
|
"global_step": 930, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.892473118279571e-05, |
|
"loss": 1.5206, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.292553186416626, |
|
"eval_loss": 2.0217974185943604, |
|
"eval_runtime": 19.3615, |
|
"eval_samples_per_second": 9.71, |
|
"eval_steps_per_second": 2.428, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.78494623655914e-05, |
|
"loss": 1.2661, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.3404255211353302, |
|
"eval_loss": 1.633784532546997, |
|
"eval_runtime": 22.8427, |
|
"eval_samples_per_second": 8.23, |
|
"eval_steps_per_second": 2.058, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.677419354838711e-05, |
|
"loss": 1.2961, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.4680851101875305, |
|
"eval_loss": 1.4141530990600586, |
|
"eval_runtime": 26.4217, |
|
"eval_samples_per_second": 7.115, |
|
"eval_steps_per_second": 1.779, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.56989247311828e-05, |
|
"loss": 1.1222, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5159574747085571, |
|
"eval_loss": 1.227504014968872, |
|
"eval_runtime": 20.4617, |
|
"eval_samples_per_second": 9.188, |
|
"eval_steps_per_second": 2.297, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.46236559139785e-05, |
|
"loss": 0.9449, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5531914830207825, |
|
"eval_loss": 1.2353336811065674, |
|
"eval_runtime": 19.3865, |
|
"eval_samples_per_second": 9.697, |
|
"eval_steps_per_second": 2.424, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.35483870967742e-05, |
|
"loss": 0.8884, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5531914830207825, |
|
"eval_loss": 1.2557822465896606, |
|
"eval_runtime": 19.3788, |
|
"eval_samples_per_second": 9.701, |
|
"eval_steps_per_second": 2.425, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.247311827956989e-05, |
|
"loss": 1.1132, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.6010638475418091, |
|
"eval_loss": 1.172572374343872, |
|
"eval_runtime": 19.3951, |
|
"eval_samples_per_second": 9.693, |
|
"eval_steps_per_second": 2.423, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.13978494623656e-05, |
|
"loss": 0.9073, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5744680762290955, |
|
"eval_loss": 1.2864995002746582, |
|
"eval_runtime": 20.2133, |
|
"eval_samples_per_second": 9.301, |
|
"eval_steps_per_second": 2.325, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.032258064516129e-05, |
|
"loss": 0.8923, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.6808510422706604, |
|
"eval_loss": 0.8788278698921204, |
|
"eval_runtime": 19.3337, |
|
"eval_samples_per_second": 9.724, |
|
"eval_steps_per_second": 2.431, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.924731182795699e-05, |
|
"loss": 0.8702, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.6808510422706604, |
|
"eval_loss": 0.9209534525871277, |
|
"eval_runtime": 19.7461, |
|
"eval_samples_per_second": 9.521, |
|
"eval_steps_per_second": 2.38, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.81720430107527e-05, |
|
"loss": 0.5832, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.6010638475418091, |
|
"eval_loss": 0.9852186441421509, |
|
"eval_runtime": 20.6843, |
|
"eval_samples_per_second": 9.089, |
|
"eval_steps_per_second": 2.272, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.709677419354839e-05, |
|
"loss": 0.776, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.664893627166748, |
|
"eval_loss": 1.0170938968658447, |
|
"eval_runtime": 19.2567, |
|
"eval_samples_per_second": 9.763, |
|
"eval_steps_per_second": 2.441, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.60215053763441e-05, |
|
"loss": 0.5927, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.6382978558540344, |
|
"eval_loss": 1.101302146911621, |
|
"eval_runtime": 20.3489, |
|
"eval_samples_per_second": 9.239, |
|
"eval_steps_per_second": 2.31, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.494623655913979e-05, |
|
"loss": 0.54, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.6808510422706604, |
|
"eval_loss": 0.9849129319190979, |
|
"eval_runtime": 19.1842, |
|
"eval_samples_per_second": 9.8, |
|
"eval_steps_per_second": 2.45, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.387096774193549e-05, |
|
"loss": 0.6341, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.6968085169792175, |
|
"eval_loss": 0.8642140626907349, |
|
"eval_runtime": 19.8877, |
|
"eval_samples_per_second": 9.453, |
|
"eval_steps_per_second": 2.363, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.27956989247312e-05, |
|
"loss": 0.8108, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.7340425252914429, |
|
"eval_loss": 0.7330851554870605, |
|
"eval_runtime": 20.2573, |
|
"eval_samples_per_second": 9.281, |
|
"eval_steps_per_second": 2.32, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.172043010752689e-05, |
|
"loss": 0.5861, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_accuracy": 0.7127659320831299, |
|
"eval_loss": 0.766554594039917, |
|
"eval_runtime": 19.2481, |
|
"eval_samples_per_second": 9.767, |
|
"eval_steps_per_second": 2.442, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.064516129032258e-05, |
|
"loss": 0.4829, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_accuracy": 0.7180851101875305, |
|
"eval_loss": 0.7813696265220642, |
|
"eval_runtime": 20.2831, |
|
"eval_samples_per_second": 9.269, |
|
"eval_steps_per_second": 2.317, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 7.956989247311829e-05, |
|
"loss": 0.6267, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.7234042286872864, |
|
"eval_loss": 0.7261902093887329, |
|
"eval_runtime": 19.8614, |
|
"eval_samples_per_second": 9.466, |
|
"eval_steps_per_second": 2.366, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 7.849462365591398e-05, |
|
"loss": 0.4318, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.7659574747085571, |
|
"eval_loss": 0.732907235622406, |
|
"eval_runtime": 20.4384, |
|
"eval_samples_per_second": 9.198, |
|
"eval_steps_per_second": 2.3, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.741935483870968e-05, |
|
"loss": 0.3761, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.7021276354789734, |
|
"eval_loss": 1.0907269716262817, |
|
"eval_runtime": 19.6052, |
|
"eval_samples_per_second": 9.589, |
|
"eval_steps_per_second": 2.397, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.634408602150538e-05, |
|
"loss": 0.324, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_accuracy": 0.7872340679168701, |
|
"eval_loss": 0.7849129438400269, |
|
"eval_runtime": 19.7685, |
|
"eval_samples_per_second": 9.51, |
|
"eval_steps_per_second": 2.378, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.526881720430108e-05, |
|
"loss": 0.3168, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.7553191781044006, |
|
"eval_loss": 0.7544443607330322, |
|
"eval_runtime": 19.9734, |
|
"eval_samples_per_second": 9.413, |
|
"eval_steps_per_second": 2.353, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.419354838709677e-05, |
|
"loss": 0.31, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.6808510422706604, |
|
"eval_loss": 1.3076503276824951, |
|
"eval_runtime": 19.5097, |
|
"eval_samples_per_second": 9.636, |
|
"eval_steps_per_second": 2.409, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.311827956989248e-05, |
|
"loss": 0.2488, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_accuracy": 0.7393617033958435, |
|
"eval_loss": 1.1082981824874878, |
|
"eval_runtime": 19.6472, |
|
"eval_samples_per_second": 9.569, |
|
"eval_steps_per_second": 2.392, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.204301075268818e-05, |
|
"loss": 0.2774, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_accuracy": 0.7393617033958435, |
|
"eval_loss": 1.0004332065582275, |
|
"eval_runtime": 19.5383, |
|
"eval_samples_per_second": 9.622, |
|
"eval_steps_per_second": 2.406, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.096774193548388e-05, |
|
"loss": 0.3068, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.7393617033958435, |
|
"eval_loss": 1.0832968950271606, |
|
"eval_runtime": 22.726, |
|
"eval_samples_per_second": 8.272, |
|
"eval_steps_per_second": 2.068, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 6.989247311827958e-05, |
|
"loss": 0.4347, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.6808510422706604, |
|
"eval_loss": 1.3584102392196655, |
|
"eval_runtime": 19.4201, |
|
"eval_samples_per_second": 9.681, |
|
"eval_steps_per_second": 2.42, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 6.881720430107527e-05, |
|
"loss": 0.2742, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.7765957713127136, |
|
"eval_loss": 0.9648256301879883, |
|
"eval_runtime": 20.0148, |
|
"eval_samples_per_second": 9.393, |
|
"eval_steps_per_second": 2.348, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 6.774193548387096e-05, |
|
"loss": 0.3018, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.7446808218955994, |
|
"eval_loss": 0.9408243298530579, |
|
"eval_runtime": 20.3383, |
|
"eval_samples_per_second": 9.244, |
|
"eval_steps_per_second": 2.311, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.4159, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_accuracy": 0.7127659320831299, |
|
"eval_loss": 1.3147622346878052, |
|
"eval_runtime": 28.2012, |
|
"eval_samples_per_second": 6.666, |
|
"eval_steps_per_second": 1.667, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.559139784946236e-05, |
|
"loss": 0.35, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_accuracy": 0.7659574747085571, |
|
"eval_loss": 0.9583517909049988, |
|
"eval_runtime": 21.7271, |
|
"eval_samples_per_second": 8.653, |
|
"eval_steps_per_second": 2.163, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 6.451612903225807e-05, |
|
"loss": 0.1448, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_accuracy": 0.7819148898124695, |
|
"eval_loss": 0.7321730256080627, |
|
"eval_runtime": 21.3525, |
|
"eval_samples_per_second": 8.805, |
|
"eval_steps_per_second": 2.201, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 6.344086021505376e-05, |
|
"loss": 0.1969, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_accuracy": 0.8297872543334961, |
|
"eval_loss": 0.7202281355857849, |
|
"eval_runtime": 19.4523, |
|
"eval_samples_per_second": 9.665, |
|
"eval_steps_per_second": 2.416, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 6.236559139784946e-05, |
|
"loss": 0.4023, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_accuracy": 0.7340425252914429, |
|
"eval_loss": 1.210102915763855, |
|
"eval_runtime": 19.9948, |
|
"eval_samples_per_second": 9.402, |
|
"eval_steps_per_second": 2.351, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.129032258064517e-05, |
|
"loss": 0.301, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_accuracy": 0.8510638475418091, |
|
"eval_loss": 0.4878663122653961, |
|
"eval_runtime": 21.8287, |
|
"eval_samples_per_second": 8.613, |
|
"eval_steps_per_second": 2.153, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 6.021505376344086e-05, |
|
"loss": 0.1968, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_accuracy": 0.8191489577293396, |
|
"eval_loss": 0.721879243850708, |
|
"eval_runtime": 19.355, |
|
"eval_samples_per_second": 9.713, |
|
"eval_steps_per_second": 2.428, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 5.913978494623657e-05, |
|
"loss": 0.1026, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_accuracy": 0.7234042286872864, |
|
"eval_loss": 1.1493706703186035, |
|
"eval_runtime": 19.4745, |
|
"eval_samples_per_second": 9.654, |
|
"eval_steps_per_second": 2.413, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 5.8064516129032266e-05, |
|
"loss": 0.2143, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_accuracy": 0.7446808218955994, |
|
"eval_loss": 1.1254618167877197, |
|
"eval_runtime": 19.5314, |
|
"eval_samples_per_second": 9.626, |
|
"eval_steps_per_second": 2.406, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 5.6989247311827965e-05, |
|
"loss": 0.2157, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.7978723645210266, |
|
"eval_loss": 0.867353618144989, |
|
"eval_runtime": 19.5612, |
|
"eval_samples_per_second": 9.611, |
|
"eval_steps_per_second": 2.403, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 5.5913978494623656e-05, |
|
"loss": 0.1429, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_accuracy": 0.8031914830207825, |
|
"eval_loss": 0.8620893359184265, |
|
"eval_runtime": 19.8011, |
|
"eval_samples_per_second": 9.494, |
|
"eval_steps_per_second": 2.374, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 5.4838709677419355e-05, |
|
"loss": 0.2338, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_accuracy": 0.771276593208313, |
|
"eval_loss": 1.0524601936340332, |
|
"eval_runtime": 20.0193, |
|
"eval_samples_per_second": 9.391, |
|
"eval_steps_per_second": 2.348, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 5.3763440860215054e-05, |
|
"loss": 0.079, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_accuracy": 0.7978723645210266, |
|
"eval_loss": 0.9354122877120972, |
|
"eval_runtime": 19.937, |
|
"eval_samples_per_second": 9.43, |
|
"eval_steps_per_second": 2.357, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.268817204301075e-05, |
|
"loss": 0.0652, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_accuracy": 0.8297872543334961, |
|
"eval_loss": 0.8370540142059326, |
|
"eval_runtime": 20.2037, |
|
"eval_samples_per_second": 9.305, |
|
"eval_steps_per_second": 2.326, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 5.161290322580645e-05, |
|
"loss": 0.0464, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 0.7978723645210266, |
|
"eval_loss": 0.9808104634284973, |
|
"eval_runtime": 21.6193, |
|
"eval_samples_per_second": 8.696, |
|
"eval_steps_per_second": 2.174, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 5.053763440860215e-05, |
|
"loss": 0.2738, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_accuracy": 0.8510638475418091, |
|
"eval_loss": 0.7124775052070618, |
|
"eval_runtime": 20.7786, |
|
"eval_samples_per_second": 9.048, |
|
"eval_steps_per_second": 2.262, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.9462365591397855e-05, |
|
"loss": 0.0234, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_accuracy": 0.813829779624939, |
|
"eval_loss": 0.799345850944519, |
|
"eval_runtime": 22.0458, |
|
"eval_samples_per_second": 8.528, |
|
"eval_steps_per_second": 2.132, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 0.1504, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_accuracy": 0.7978723645210266, |
|
"eval_loss": 0.9516879916191101, |
|
"eval_runtime": 20.0905, |
|
"eval_samples_per_second": 9.358, |
|
"eval_steps_per_second": 2.339, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 4.731182795698925e-05, |
|
"loss": 0.0289, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.8404255509376526, |
|
"eval_loss": 0.7372261881828308, |
|
"eval_runtime": 19.4546, |
|
"eval_samples_per_second": 9.664, |
|
"eval_steps_per_second": 2.416, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.6236559139784944e-05, |
|
"loss": 0.2253, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.8936170339584351, |
|
"eval_loss": 0.47867247462272644, |
|
"eval_runtime": 19.7513, |
|
"eval_samples_per_second": 9.518, |
|
"eval_steps_per_second": 2.38, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 4.516129032258064e-05, |
|
"loss": 0.0487, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_accuracy": 0.8457446694374084, |
|
"eval_loss": 0.6794525384902954, |
|
"eval_runtime": 20.9065, |
|
"eval_samples_per_second": 8.992, |
|
"eval_steps_per_second": 2.248, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 4.408602150537635e-05, |
|
"loss": 0.0266, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_accuracy": 0.835106372833252, |
|
"eval_loss": 0.8275220990180969, |
|
"eval_runtime": 20.2058, |
|
"eval_samples_per_second": 9.304, |
|
"eval_steps_per_second": 2.326, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.301075268817205e-05, |
|
"loss": 0.07, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"eval_accuracy": 0.7978723645210266, |
|
"eval_loss": 1.0128568410873413, |
|
"eval_runtime": 20.839, |
|
"eval_samples_per_second": 9.022, |
|
"eval_steps_per_second": 2.255, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 4.1935483870967746e-05, |
|
"loss": 0.0439, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_accuracy": 0.835106372833252, |
|
"eval_loss": 0.8743005394935608, |
|
"eval_runtime": 20.318, |
|
"eval_samples_per_second": 9.253, |
|
"eval_steps_per_second": 2.313, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 4.0860215053763444e-05, |
|
"loss": 0.0609, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_accuracy": 0.8457446694374084, |
|
"eval_loss": 0.7512069344520569, |
|
"eval_runtime": 19.5322, |
|
"eval_samples_per_second": 9.625, |
|
"eval_steps_per_second": 2.406, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 3.978494623655914e-05, |
|
"loss": 0.0902, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.8510638475418091, |
|
"eval_loss": 0.702180802822113, |
|
"eval_runtime": 19.3513, |
|
"eval_samples_per_second": 9.715, |
|
"eval_steps_per_second": 2.429, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 3.870967741935484e-05, |
|
"loss": 0.0364, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 0.7819148898124695, |
|
"eval_loss": 1.0619040727615356, |
|
"eval_runtime": 21.233, |
|
"eval_samples_per_second": 8.854, |
|
"eval_steps_per_second": 2.214, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3.763440860215054e-05, |
|
"loss": 0.033, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_accuracy": 0.7765957713127136, |
|
"eval_loss": 1.243336796760559, |
|
"eval_runtime": 19.5002, |
|
"eval_samples_per_second": 9.641, |
|
"eval_steps_per_second": 2.41, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.1456, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"eval_accuracy": 0.8244680762290955, |
|
"eval_loss": 0.8834251165390015, |
|
"eval_runtime": 19.3312, |
|
"eval_samples_per_second": 9.725, |
|
"eval_steps_per_second": 2.431, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 3.5591397849462364e-05, |
|
"loss": 0.0049, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_accuracy": 0.8510638475418091, |
|
"eval_loss": 0.8070632219314575, |
|
"eval_runtime": 20.1893, |
|
"eval_samples_per_second": 9.312, |
|
"eval_steps_per_second": 2.328, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.451612903225806e-05, |
|
"loss": 0.1007, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_accuracy": 0.8670212626457214, |
|
"eval_loss": 0.6686306595802307, |
|
"eval_runtime": 19.5009, |
|
"eval_samples_per_second": 9.641, |
|
"eval_steps_per_second": 2.41, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 3.344086021505377e-05, |
|
"loss": 0.0054, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"eval_accuracy": 0.8776595592498779, |
|
"eval_loss": 0.6421190500259399, |
|
"eval_runtime": 21.8176, |
|
"eval_samples_per_second": 8.617, |
|
"eval_steps_per_second": 2.154, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 3.236559139784947e-05, |
|
"loss": 0.0027, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_accuracy": 0.8563829660415649, |
|
"eval_loss": 0.7604753375053406, |
|
"eval_runtime": 19.6912, |
|
"eval_samples_per_second": 9.547, |
|
"eval_steps_per_second": 2.387, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 3.1290322580645166e-05, |
|
"loss": 0.1582, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_accuracy": 0.8670212626457214, |
|
"eval_loss": 0.7657257914543152, |
|
"eval_runtime": 19.5763, |
|
"eval_samples_per_second": 9.603, |
|
"eval_steps_per_second": 2.401, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.0215053763440858e-05, |
|
"loss": 0.0106, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_accuracy": 0.835106372833252, |
|
"eval_loss": 0.8570267558097839, |
|
"eval_runtime": 20.2786, |
|
"eval_samples_per_second": 9.271, |
|
"eval_steps_per_second": 2.318, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 2.913978494623656e-05, |
|
"loss": 0.0219, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_accuracy": 0.8457446694374084, |
|
"eval_loss": 0.8243122696876526, |
|
"eval_runtime": 20.636, |
|
"eval_samples_per_second": 9.11, |
|
"eval_steps_per_second": 2.278, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 2.806451612903226e-05, |
|
"loss": 0.0032, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"eval_accuracy": 0.8085106611251831, |
|
"eval_loss": 0.9377107620239258, |
|
"eval_runtime": 19.4326, |
|
"eval_samples_per_second": 9.674, |
|
"eval_steps_per_second": 2.419, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 2.698924731182796e-05, |
|
"loss": 0.0037, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"eval_accuracy": 0.8031914830207825, |
|
"eval_loss": 1.0501412153244019, |
|
"eval_runtime": 19.4587, |
|
"eval_samples_per_second": 9.661, |
|
"eval_steps_per_second": 2.415, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 2.591397849462366e-05, |
|
"loss": 0.0072, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_accuracy": 0.8297872543334961, |
|
"eval_loss": 0.8667866587638855, |
|
"eval_runtime": 19.4943, |
|
"eval_samples_per_second": 9.644, |
|
"eval_steps_per_second": 2.411, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 2.4838709677419354e-05, |
|
"loss": 0.0626, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_accuracy": 0.8617021441459656, |
|
"eval_loss": 0.6626190543174744, |
|
"eval_runtime": 19.4854, |
|
"eval_samples_per_second": 9.648, |
|
"eval_steps_per_second": 2.412, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 2.3763440860215056e-05, |
|
"loss": 0.0237, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"eval_accuracy": 0.8510638475418091, |
|
"eval_loss": 0.6875038743019104, |
|
"eval_runtime": 21.2232, |
|
"eval_samples_per_second": 8.858, |
|
"eval_steps_per_second": 2.215, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 2.268817204301075e-05, |
|
"loss": 0.0046, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_accuracy": 0.8297872543334961, |
|
"eval_loss": 0.8058395385742188, |
|
"eval_runtime": 21.9104, |
|
"eval_samples_per_second": 8.58, |
|
"eval_steps_per_second": 2.145, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 2.1612903225806454e-05, |
|
"loss": 0.0258, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_accuracy": 0.8723404407501221, |
|
"eval_loss": 0.730888307094574, |
|
"eval_runtime": 19.7808, |
|
"eval_samples_per_second": 9.504, |
|
"eval_steps_per_second": 2.376, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 2.0537634408602152e-05, |
|
"loss": 0.0363, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_accuracy": 0.8510638475418091, |
|
"eval_loss": 0.7736497521400452, |
|
"eval_runtime": 20.7525, |
|
"eval_samples_per_second": 9.059, |
|
"eval_steps_per_second": 2.265, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.9462365591397848e-05, |
|
"loss": 0.0012, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_accuracy": 0.8457446694374084, |
|
"eval_loss": 0.8431892991065979, |
|
"eval_runtime": 21.0633, |
|
"eval_samples_per_second": 8.925, |
|
"eval_steps_per_second": 2.231, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 1.838709677419355e-05, |
|
"loss": 0.0112, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"eval_accuracy": 0.8617021441459656, |
|
"eval_loss": 0.7464305758476257, |
|
"eval_runtime": 19.2825, |
|
"eval_samples_per_second": 9.75, |
|
"eval_steps_per_second": 2.437, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 1.7311827956989248e-05, |
|
"loss": 0.0025, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"eval_accuracy": 0.8829787373542786, |
|
"eval_loss": 0.6915740370750427, |
|
"eval_runtime": 19.647, |
|
"eval_samples_per_second": 9.569, |
|
"eval_steps_per_second": 2.392, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 1.6236559139784947e-05, |
|
"loss": 0.003, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"eval_accuracy": 0.8882978558540344, |
|
"eval_loss": 0.6764039993286133, |
|
"eval_runtime": 21.6356, |
|
"eval_samples_per_second": 8.689, |
|
"eval_steps_per_second": 2.172, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 1.5161290322580646e-05, |
|
"loss": 0.0817, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_accuracy": 0.8829787373542786, |
|
"eval_loss": 0.6824085116386414, |
|
"eval_runtime": 19.3614, |
|
"eval_samples_per_second": 9.71, |
|
"eval_steps_per_second": 2.428, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 1.4086021505376346e-05, |
|
"loss": 0.0059, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_accuracy": 0.8882978558540344, |
|
"eval_loss": 0.6375851035118103, |
|
"eval_runtime": 19.4127, |
|
"eval_samples_per_second": 9.684, |
|
"eval_steps_per_second": 2.421, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 1.3010752688172043e-05, |
|
"loss": 0.0018, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_accuracy": 0.8882978558540344, |
|
"eval_loss": 0.6130115389823914, |
|
"eval_runtime": 20.8292, |
|
"eval_samples_per_second": 9.026, |
|
"eval_steps_per_second": 2.256, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 1.1935483870967743e-05, |
|
"loss": 0.0012, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"eval_accuracy": 0.8936170339584351, |
|
"eval_loss": 0.6152318716049194, |
|
"eval_runtime": 19.6293, |
|
"eval_samples_per_second": 9.578, |
|
"eval_steps_per_second": 2.394, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 1.086021505376344e-05, |
|
"loss": 0.0029, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_accuracy": 0.8936170339584351, |
|
"eval_loss": 0.6171016097068787, |
|
"eval_runtime": 19.602, |
|
"eval_samples_per_second": 9.591, |
|
"eval_steps_per_second": 2.398, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 9.78494623655914e-06, |
|
"loss": 0.0016, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_accuracy": 0.8882978558540344, |
|
"eval_loss": 0.6230288147926331, |
|
"eval_runtime": 19.6382, |
|
"eval_samples_per_second": 9.573, |
|
"eval_steps_per_second": 2.393, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 8.70967741935484e-06, |
|
"loss": 0.0016, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.8936170339584351, |
|
"eval_loss": 0.6247658729553223, |
|
"eval_runtime": 22.0158, |
|
"eval_samples_per_second": 8.539, |
|
"eval_steps_per_second": 2.135, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 7.634408602150538e-06, |
|
"loss": 0.0022, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_accuracy": 0.8936170339584351, |
|
"eval_loss": 0.6223304271697998, |
|
"eval_runtime": 22.0568, |
|
"eval_samples_per_second": 8.523, |
|
"eval_steps_per_second": 2.131, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 6.559139784946237e-06, |
|
"loss": 0.0011, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_accuracy": 0.8936170339584351, |
|
"eval_loss": 0.6233484745025635, |
|
"eval_runtime": 20.142, |
|
"eval_samples_per_second": 9.334, |
|
"eval_steps_per_second": 2.333, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 5.483870967741936e-06, |
|
"loss": 0.0008, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"eval_accuracy": 0.8936170339584351, |
|
"eval_loss": 0.6245375275611877, |
|
"eval_runtime": 20.6589, |
|
"eval_samples_per_second": 9.1, |
|
"eval_steps_per_second": 2.275, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 4.408602150537635e-06, |
|
"loss": 0.0109, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_accuracy": 0.8829787373542786, |
|
"eval_loss": 0.6385069489479065, |
|
"eval_runtime": 19.883, |
|
"eval_samples_per_second": 9.455, |
|
"eval_steps_per_second": 2.364, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0011, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_accuracy": 0.8776595592498779, |
|
"eval_loss": 0.65866619348526, |
|
"eval_runtime": 19.924, |
|
"eval_samples_per_second": 9.436, |
|
"eval_steps_per_second": 2.359, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 2.2580645161290324e-06, |
|
"loss": 0.0013, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_accuracy": 0.8776595592498779, |
|
"eval_loss": 0.6651791930198669, |
|
"eval_runtime": 19.7346, |
|
"eval_samples_per_second": 9.526, |
|
"eval_steps_per_second": 2.382, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.1827956989247313e-06, |
|
"loss": 0.0548, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_accuracy": 0.8776595592498779, |
|
"eval_loss": 0.6682336926460266, |
|
"eval_runtime": 19.8027, |
|
"eval_samples_per_second": 9.494, |
|
"eval_steps_per_second": 2.373, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 1.0752688172043011e-07, |
|
"loss": 0.0367, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.8776595592498779, |
|
"eval_loss": 0.6705843806266785, |
|
"eval_runtime": 19.6221, |
|
"eval_samples_per_second": 9.581, |
|
"eval_steps_per_second": 2.395, |
|
"step": 930 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 930, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 10, |
|
"total_flos": 1.1929088038243113e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|