{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.705882352941176, "eval_steps": 500, "global_step": 96, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5882352941176471, "grad_norm": 0.9883961465327094, "learning_rate": 1.5e-05, "loss": 1.1572, "mean_token_accuracy": 0.7539515912532806, "num_tokens": 2505918.0, "step": 5 }, { "epoch": 1.1176470588235294, "grad_norm": 1.901663288130973, "learning_rate": 3e-05, "loss": 1.0333, "mean_token_accuracy": 0.7687566743956672, "num_tokens": 4791423.0, "step": 10 }, { "epoch": 1.7058823529411766, "grad_norm": 0.6258080342354994, "learning_rate": 2.8255813953488374e-05, "loss": 0.881, "mean_token_accuracy": 0.7985241502523422, "num_tokens": 7330272.0, "step": 15 }, { "epoch": 2.235294117647059, "grad_norm": 0.46238688658175153, "learning_rate": 2.6511627906976747e-05, "loss": 0.8499, "mean_token_accuracy": 0.806600358751085, "num_tokens": 9618952.0, "step": 20 }, { "epoch": 2.8235294117647056, "grad_norm": 0.3244834177268814, "learning_rate": 2.4767441860465116e-05, "loss": 0.7258, "mean_token_accuracy": 0.8285025209188461, "num_tokens": 12114377.0, "step": 25 }, { "epoch": 3.3529411764705883, "grad_norm": 0.3096128182615979, "learning_rate": 2.302325581395349e-05, "loss": 0.7092, "mean_token_accuracy": 0.8342651062541537, "num_tokens": 14409009.0, "step": 30 }, { "epoch": 3.9411764705882355, "grad_norm": 1.5240607136739224, "learning_rate": 2.1279069767441862e-05, "loss": 0.64, "mean_token_accuracy": 0.8458412200212478, "num_tokens": 16918082.0, "step": 35 }, { "epoch": 4.470588235294118, "grad_norm": 0.4386330787702683, "learning_rate": 1.9534883720930235e-05, "loss": 0.6372, "mean_token_accuracy": 0.8506042758623759, "num_tokens": 19217454.0, "step": 40 }, { "epoch": 5.0, "grad_norm": 0.32130536815260163, "learning_rate": 1.7790697674418608e-05, "loss": 0.5271, "mean_token_accuracy": 0.870173497332467, "num_tokens": 21475135.0, "step": 45 }, { "epoch": 5.588235294117647, "grad_norm": 0.38825023600544556, "learning_rate": 1.6046511627906977e-05, "loss": 0.533, "mean_token_accuracy": 0.8699036419391633, "num_tokens": 23999790.0, "step": 50 }, { "epoch": 6.117647058823529, "grad_norm": 0.5588159986106253, "learning_rate": 1.430232558139535e-05, "loss": 0.4696, "mean_token_accuracy": 0.886308984624015, "num_tokens": 26264984.0, "step": 55 }, { "epoch": 6.705882352941177, "grad_norm": 0.5373227236502689, "learning_rate": 1.2558139534883723e-05, "loss": 0.3877, "mean_token_accuracy": 0.9013922065496445, "num_tokens": 28792279.0, "step": 60 }, { "epoch": 7.235294117647059, "grad_norm": 0.5457578900062939, "learning_rate": 1.0813953488372092e-05, "loss": 0.484, "mean_token_accuracy": 0.8886518941985236, "num_tokens": 31085796.0, "step": 65 }, { "epoch": 7.823529411764706, "grad_norm": 0.5156132453867892, "learning_rate": 9.069767441860465e-06, "loss": 0.3649, "mean_token_accuracy": 0.9116492509841919, "num_tokens": 33579925.0, "step": 70 }, { "epoch": 8.352941176470589, "grad_norm": 0.43157012641733306, "learning_rate": 7.325581395348837e-06, "loss": 0.3862, "mean_token_accuracy": 0.9118325445387099, "num_tokens": 35872214.0, "step": 75 }, { "epoch": 8.941176470588236, "grad_norm": 0.39332288450299335, "learning_rate": 5.581395348837209e-06, "loss": 0.3342, "mean_token_accuracy": 0.9180105596780777, "num_tokens": 38392486.0, "step": 80 }, { "epoch": 9.470588235294118, "grad_norm": 0.5517208078409448, "learning_rate": 3.837209302325582e-06, "loss": 0.3264, "mean_token_accuracy": 0.9224550028642019, "num_tokens": 40676022.0, "step": 85 }, { "epoch": 10.0, "grad_norm": 0.6739351616750774, "learning_rate": 2.0930232558139536e-06, "loss": 0.3066, "mean_token_accuracy": 0.9260480867491828, "num_tokens": 42948208.0, "step": 90 }, { "epoch": 10.588235294117647, "grad_norm": 0.35461902887245744, "learning_rate": 3.4883720930232557e-07, "loss": 0.3026, "mean_token_accuracy": 0.9277766048908234, "num_tokens": 45475525.0, "step": 95 }, { "epoch": 10.705882352941176, "mean_token_accuracy": 0.9401646554470062, "num_tokens": 45971032.0, "step": 96, "total_flos": 68866778333184.0, "train_loss": 0.5774029536793629, "train_runtime": 585.0314, "train_samples_per_second": 11.056, "train_steps_per_second": 0.164 } ], "logging_steps": 5, "max_steps": 96, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 68866778333184.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }