{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.98876404494382, "eval_steps": 500, "global_step": 330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.149812734082397, "grad_norm": 0.6240223192562641, "learning_rate": 1.1764705882352942e-05, "loss": 3.4296, "step": 10 }, { "epoch": 0.299625468164794, "grad_norm": 0.4759039088058634, "learning_rate": 1.9995466947279753e-05, "loss": 3.1559, "step": 20 }, { "epoch": 0.449438202247191, "grad_norm": 0.3704142461383357, "learning_rate": 1.9914993620144055e-05, "loss": 3.0083, "step": 30 }, { "epoch": 0.599250936329588, "grad_norm": 0.3072287367616599, "learning_rate": 1.9734718376564386e-05, "loss": 2.8453, "step": 40 }, { "epoch": 0.7490636704119851, "grad_norm": 0.31016408017448077, "learning_rate": 1.945645582333587e-05, "loss": 2.7226, "step": 50 }, { "epoch": 0.898876404494382, "grad_norm": 0.30188871160390746, "learning_rate": 1.9083006883701688e-05, "loss": 2.6656, "step": 60 }, { "epoch": 1.0599250936329587, "grad_norm": 0.24590439921946178, "learning_rate": 1.8618130603940386e-05, "loss": 2.7955, "step": 70 }, { "epoch": 1.2097378277153559, "grad_norm": 0.2782213231278689, "learning_rate": 1.806650631571943e-05, "loss": 2.3523, "step": 80 }, { "epoch": 1.3595505617977528, "grad_norm": 0.2671277031974292, "learning_rate": 1.7433686535079736e-05, "loss": 2.2764, "step": 90 }, { "epoch": 1.5093632958801497, "grad_norm": 0.2703437385004914, "learning_rate": 1.672604107215848e-05, "loss": 2.2374, "step": 100 }, { "epoch": 1.6591760299625467, "grad_norm": 0.27488249330165304, "learning_rate": 1.595069291422807e-05, "loss": 2.2145, "step": 110 }, { "epoch": 1.8089887640449438, "grad_norm": 0.27322654774524974, "learning_rate": 1.5115446527437193e-05, "loss": 2.2315, "step": 120 }, { "epoch": 1.958801498127341, "grad_norm": 0.2496550941163805, "learning_rate": 1.4228709298950998e-05, "loss": 2.219, "step": 130 }, { "epoch": 2.1198501872659175, "grad_norm": 0.23748686371621341, "learning_rate": 1.3299406910234917e-05, "loss": 2.188, "step": 140 }, { "epoch": 2.2696629213483144, "grad_norm": 0.254579126507857, "learning_rate": 1.2336893493313946e-05, "loss": 1.8844, "step": 150 }, { "epoch": 2.4194756554307117, "grad_norm": 0.2533144202689544, "learning_rate": 1.1350857474352734e-05, "loss": 1.875, "step": 160 }, { "epoch": 2.5692883895131087, "grad_norm": 0.24718451680051595, "learning_rate": 1.035122405231209e-05, "loss": 1.852, "step": 170 }, { "epoch": 2.7191011235955056, "grad_norm": 0.25305694819263813, "learning_rate": 9.348055294308074e-06, "loss": 1.8581, "step": 180 }, { "epoch": 2.8689138576779025, "grad_norm": 0.25728520443261826, "learning_rate": 8.351448853289448e-06, "loss": 1.8616, "step": 190 }, { "epoch": 3.0299625468164795, "grad_norm": 0.264486812241608, "learning_rate": 7.371436327516854e-06, "loss": 1.9958, "step": 200 }, { "epoch": 3.1797752808988764, "grad_norm": 0.2825888432885122, "learning_rate": 6.417882284932373e-06, "loss": 1.6019, "step": 210 }, { "epoch": 3.3295880149812733, "grad_norm": 0.312588763374677, "learning_rate": 5.5003849688157075e-06, "loss": 1.5769, "step": 220 }, { "epoch": 3.4794007490636703, "grad_norm": 0.28888515165506573, "learning_rate": 4.628179684199685e-06, "loss": 1.5771, "step": 230 }, { "epoch": 3.629213483146067, "grad_norm": 0.2966244683463888, "learning_rate": 3.81004583753399e-06, "loss": 1.5697, "step": 240 }, { "epoch": 3.7790262172284645, "grad_norm": 0.2894357788177964, "learning_rate": 3.0542185653132216e-06, "loss": 1.5704, "step": 250 }, { "epoch": 3.9288389513108615, "grad_norm": 0.28773356307664283, "learning_rate": 2.3683058411940563e-06, "loss": 1.5622, "step": 260 }, { "epoch": 4.089887640449438, "grad_norm": 0.2786673362193353, "learning_rate": 1.75921189598118e-06, "loss": 1.6549, "step": 270 }, { "epoch": 4.239700374531835, "grad_norm": 0.2699331901599701, "learning_rate": 1.2330677213177034e-06, "loss": 1.4349, "step": 280 }, { "epoch": 4.389513108614232, "grad_norm": 0.2940053977518716, "learning_rate": 7.951693566131325e-07, "loss": 1.4191, "step": 290 }, { "epoch": 4.539325842696629, "grad_norm": 0.30293515491434625, "learning_rate": 4.499245803975927e-07, "loss": 1.3715, "step": 300 }, { "epoch": 4.689138576779026, "grad_norm": 0.30080510867957, "learning_rate": 2.00808542694233e-07, "loss": 1.4115, "step": 310 }, { "epoch": 4.8389513108614235, "grad_norm": 0.2849664523708078, "learning_rate": 5.032878500355498e-08, "loss": 1.4175, "step": 320 }, { "epoch": 4.98876404494382, "grad_norm": 0.29196026814035136, "learning_rate": 0.0, "loss": 1.4049, "step": 330 }, { "epoch": 4.98876404494382, "step": 330, "total_flos": 4662359162880.0, "train_loss": 2.0376082160256126, "train_runtime": 1365.9005, "train_samples_per_second": 31.228, "train_steps_per_second": 0.242 } ], "logging_steps": 10, "max_steps": 330, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4662359162880.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }