| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9991220368744512, | |
| "global_step": 569, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.997256670306478e-06, | |
| "loss": 0.6949, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.980502778148438e-06, | |
| "loss": 0.6937, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.948570063910216e-06, | |
| "loss": 0.6942, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.901555847282123e-06, | |
| "loss": 0.6937, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.839603411073388e-06, | |
| "loss": 0.675, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.762901564536523e-06, | |
| "loss": 0.657, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.671684067943056e-06, | |
| "loss": 0.6437, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.566228920164405e-06, | |
| "loss": 0.6067, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.446857511429e-06, | |
| "loss": 0.5389, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.313933643837825e-06, | |
| "loss": 0.3593, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.167862422623474e-06, | |
| "loss": 0.1982, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.009089021531777e-06, | |
| "loss": 0.1791, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 8.838097326088667e-06, | |
| "loss": 0.1522, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 8.65540845888717e-06, | |
| "loss": 0.1841, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.46157919138889e-06, | |
| "loss": 0.1299, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.257200247080249e-06, | |
| "loss": 0.1695, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.042894501154937e-06, | |
| "loss": 0.1536, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 7.819315082209217e-06, | |
| "loss": 0.1233, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 7.587143381735498e-06, | |
| "loss": 0.1254, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 7.347086977480552e-06, | |
| "loss": 0.1153, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.09987747699721e-06, | |
| "loss": 0.1524, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 6.846268287961667e-06, | |
| "loss": 0.1299, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 6.587032322051667e-06, | |
| "loss": 0.1232, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 6.32295963938335e-06, | |
| "loss": 0.1385, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.05485504068568e-06, | |
| "loss": 0.1248, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 5.783535614550666e-06, | |
| "loss": 0.1271, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.509828247234505e-06, | |
| "loss": 0.073, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.234567102598881e-06, | |
| "loss": 0.1234, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.958591079872667e-06, | |
| "loss": 0.1003, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.682741256981922e-06, | |
| "loss": 0.1757, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.407858327239952e-06, | |
| "loss": 0.1151, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.134780037209563e-06, | |
| "loss": 0.1124, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 3.864338633545956e-06, | |
| "loss": 0.0899, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.597358326601413e-06, | |
| "loss": 0.1251, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.334652778521813e-06, | |
| "loss": 0.171, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.077022623490371e-06, | |
| "loss": 0.1447, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.825253027676026e-06, | |
| "loss": 0.1192, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.580111296322904e-06, | |
| "loss": 0.1158, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.342344535273608e-06, | |
| "loss": 0.1028, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.112677374053164e-06, | |
| "loss": 0.1171, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.8918097574529193e-06, | |
| "loss": 0.1017, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.68041481234479e-06, | |
| "loss": 0.0763, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.4791367962271425e-06, | |
| "loss": 0.1185, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.2885891337543539e-06, | |
| "loss": 0.0701, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.1093525472340471e-06, | |
| "loss": 0.0873, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.419732867896048e-07, | |
| "loss": 0.127, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 7.869614655817576e-07, | |
| "loss": 0.1289, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 6.44789505162955e-07, | |
| "loss": 0.1211, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.158906957025079e-07, | |
| "loss": 0.094, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.0065787547042543e-07, | |
| "loss": 0.09, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.994422336044345e-07, | |
| "loss": 0.1208, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.1255223980891027e-07, | |
| "loss": 0.0839, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.402527042476276e-07, | |
| "loss": 0.0934, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 8.276397049545359e-08, | |
| "loss": 0.094, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.026124400856479e-08, | |
| "loss": 0.0984, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.2874058159796366e-08, | |
| "loss": 0.0783, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 569, | |
| "total_flos": 0.0, | |
| "train_loss": 0.21165546916281192, | |
| "train_runtime": 6592.6141, | |
| "train_samples_per_second": 5.528, | |
| "train_steps_per_second": 0.086 | |
| } | |
| ], | |
| "max_steps": 569, | |
| "num_train_epochs": 1, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |