{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 67, "global_step": 67, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014925373134328358, "grad_norm": 4.677140235900879, "learning_rate": 1e-05, "loss": 2.7846, "step": 1 }, { "epoch": 0.029850746268656716, "grad_norm": 4.6858649253845215, "learning_rate": 9.850746268656717e-06, "loss": 2.496, "step": 2 }, { "epoch": 0.04477611940298507, "grad_norm": 3.9940388202667236, "learning_rate": 9.701492537313434e-06, "loss": 2.582, "step": 3 }, { "epoch": 0.05970149253731343, "grad_norm": 3.8767054080963135, "learning_rate": 9.552238805970149e-06, "loss": 2.215, "step": 4 }, { "epoch": 0.07462686567164178, "grad_norm": 2.339372396469116, "learning_rate": 9.402985074626867e-06, "loss": 1.9616, "step": 5 }, { "epoch": 0.08955223880597014, "grad_norm": 2.0336525440216064, "learning_rate": 9.253731343283582e-06, "loss": 1.9218, "step": 6 }, { "epoch": 0.1044776119402985, "grad_norm": 2.4649195671081543, "learning_rate": 9.104477611940299e-06, "loss": 2.5598, "step": 7 }, { "epoch": 0.11940298507462686, "grad_norm": 1.7558526992797852, "learning_rate": 8.955223880597016e-06, "loss": 2.1746, "step": 8 }, { "epoch": 0.13432835820895522, "grad_norm": 1.5009433031082153, "learning_rate": 8.805970149253732e-06, "loss": 2.1801, "step": 9 }, { "epoch": 0.14925373134328357, "grad_norm": 1.7307460308074951, "learning_rate": 8.656716417910447e-06, "loss": 2.2179, "step": 10 }, { "epoch": 0.16417910447761194, "grad_norm": 1.31997811794281, "learning_rate": 8.507462686567165e-06, "loss": 1.9017, "step": 11 }, { "epoch": 0.1791044776119403, "grad_norm": 1.0102890729904175, "learning_rate": 8.35820895522388e-06, "loss": 1.6226, "step": 12 }, { "epoch": 0.19402985074626866, "grad_norm": 1.1635653972625732, "learning_rate": 8.208955223880599e-06, "loss": 2.0264, "step": 13 }, { "epoch": 0.208955223880597, "grad_norm": 0.9142751097679138, "learning_rate": 8.059701492537314e-06, "loss": 1.9922, "step": 14 }, { "epoch": 0.22388059701492538, "grad_norm": 1.1939051151275635, "learning_rate": 7.91044776119403e-06, "loss": 2.205, "step": 15 }, { "epoch": 0.23880597014925373, "grad_norm": 0.7413591742515564, "learning_rate": 7.761194029850747e-06, "loss": 1.9766, "step": 16 }, { "epoch": 0.2537313432835821, "grad_norm": 1.400302767753601, "learning_rate": 7.611940298507463e-06, "loss": 2.3456, "step": 17 }, { "epoch": 0.26865671641791045, "grad_norm": 0.9068132042884827, "learning_rate": 7.46268656716418e-06, "loss": 2.2168, "step": 18 }, { "epoch": 0.2835820895522388, "grad_norm": 0.865298867225647, "learning_rate": 7.313432835820896e-06, "loss": 1.9891, "step": 19 }, { "epoch": 0.29850746268656714, "grad_norm": 1.2902920246124268, "learning_rate": 7.164179104477612e-06, "loss": 2.1995, "step": 20 }, { "epoch": 0.31343283582089554, "grad_norm": 0.7621744871139526, "learning_rate": 7.014925373134329e-06, "loss": 2.1563, "step": 21 }, { "epoch": 0.3283582089552239, "grad_norm": 0.7494510412216187, "learning_rate": 6.865671641791045e-06, "loss": 1.9338, "step": 22 }, { "epoch": 0.34328358208955223, "grad_norm": 0.6722490191459656, "learning_rate": 6.7164179104477625e-06, "loss": 1.9195, "step": 23 }, { "epoch": 0.3582089552238806, "grad_norm": 0.5672370195388794, "learning_rate": 6.567164179104478e-06, "loss": 2.1004, "step": 24 }, { "epoch": 0.373134328358209, "grad_norm": 0.5260008573532104, "learning_rate": 6.417910447761194e-06, "loss": 2.0207, "step": 25 }, { "epoch": 0.3880597014925373, "grad_norm": 0.6754623651504517, "learning_rate": 6.2686567164179116e-06, "loss": 2.0735, "step": 26 }, { "epoch": 0.40298507462686567, "grad_norm": 0.5878338813781738, "learning_rate": 6.119402985074627e-06, "loss": 1.9148, "step": 27 }, { "epoch": 0.417910447761194, "grad_norm": 0.531120240688324, "learning_rate": 5.970149253731343e-06, "loss": 1.9395, "step": 28 }, { "epoch": 0.43283582089552236, "grad_norm": 0.667667806148529, "learning_rate": 5.820895522388061e-06, "loss": 2.038, "step": 29 }, { "epoch": 0.44776119402985076, "grad_norm": 0.7480222582817078, "learning_rate": 5.671641791044776e-06, "loss": 2.0045, "step": 30 }, { "epoch": 0.4626865671641791, "grad_norm": 0.9849134683609009, "learning_rate": 5.522388059701493e-06, "loss": 2.1881, "step": 31 }, { "epoch": 0.47761194029850745, "grad_norm": 0.6533071398735046, "learning_rate": 5.37313432835821e-06, "loss": 1.849, "step": 32 }, { "epoch": 0.4925373134328358, "grad_norm": 0.5160700082778931, "learning_rate": 5.2238805970149255e-06, "loss": 2.0241, "step": 33 }, { "epoch": 0.5074626865671642, "grad_norm": 0.5143930912017822, "learning_rate": 5.074626865671642e-06, "loss": 1.9619, "step": 34 }, { "epoch": 0.5223880597014925, "grad_norm": 0.6003533005714417, "learning_rate": 4.925373134328359e-06, "loss": 1.9579, "step": 35 }, { "epoch": 0.5373134328358209, "grad_norm": 0.49033546447753906, "learning_rate": 4.7761194029850745e-06, "loss": 2.1126, "step": 36 }, { "epoch": 0.5522388059701493, "grad_norm": 0.5822514891624451, "learning_rate": 4.626865671641791e-06, "loss": 1.8489, "step": 37 }, { "epoch": 0.5671641791044776, "grad_norm": 0.5790143013000488, "learning_rate": 4.477611940298508e-06, "loss": 2.0096, "step": 38 }, { "epoch": 0.582089552238806, "grad_norm": 0.41039204597473145, "learning_rate": 4.3283582089552236e-06, "loss": 1.775, "step": 39 }, { "epoch": 0.5970149253731343, "grad_norm": 0.820061206817627, "learning_rate": 4.17910447761194e-06, "loss": 2.1653, "step": 40 }, { "epoch": 0.6119402985074627, "grad_norm": 0.6350656151771545, "learning_rate": 4.029850746268657e-06, "loss": 1.982, "step": 41 }, { "epoch": 0.6268656716417911, "grad_norm": 0.41239652037620544, "learning_rate": 3.8805970149253735e-06, "loss": 1.8828, "step": 42 }, { "epoch": 0.6417910447761194, "grad_norm": 0.5652564764022827, "learning_rate": 3.73134328358209e-06, "loss": 2.0692, "step": 43 }, { "epoch": 0.6567164179104478, "grad_norm": 0.6621966361999512, "learning_rate": 3.582089552238806e-06, "loss": 2.2952, "step": 44 }, { "epoch": 0.6716417910447762, "grad_norm": 0.4039974510669708, "learning_rate": 3.4328358208955225e-06, "loss": 1.7934, "step": 45 }, { "epoch": 0.6865671641791045, "grad_norm": 0.5079028606414795, "learning_rate": 3.283582089552239e-06, "loss": 2.0361, "step": 46 }, { "epoch": 0.7014925373134329, "grad_norm": 0.41614365577697754, "learning_rate": 3.1343283582089558e-06, "loss": 1.7775, "step": 47 }, { "epoch": 0.7164179104477612, "grad_norm": 0.3995169699192047, "learning_rate": 2.9850746268656716e-06, "loss": 1.9167, "step": 48 }, { "epoch": 0.7313432835820896, "grad_norm": 0.5954918265342712, "learning_rate": 2.835820895522388e-06, "loss": 2.0694, "step": 49 }, { "epoch": 0.746268656716418, "grad_norm": 0.5778793692588806, "learning_rate": 2.686567164179105e-06, "loss": 2.0297, "step": 50 }, { "epoch": 0.7611940298507462, "grad_norm": 0.5707228183746338, "learning_rate": 2.537313432835821e-06, "loss": 2.201, "step": 51 }, { "epoch": 0.7761194029850746, "grad_norm": 0.6407202482223511, "learning_rate": 2.3880597014925373e-06, "loss": 2.4394, "step": 52 }, { "epoch": 0.7910447761194029, "grad_norm": 0.3686445355415344, "learning_rate": 2.238805970149254e-06, "loss": 2.2426, "step": 53 }, { "epoch": 0.8059701492537313, "grad_norm": 0.4780975580215454, "learning_rate": 2.08955223880597e-06, "loss": 1.888, "step": 54 }, { "epoch": 0.8208955223880597, "grad_norm": 0.5346677303314209, "learning_rate": 1.9402985074626867e-06, "loss": 1.9405, "step": 55 }, { "epoch": 0.835820895522388, "grad_norm": 0.4024548828601837, "learning_rate": 1.791044776119403e-06, "loss": 1.6834, "step": 56 }, { "epoch": 0.8507462686567164, "grad_norm": 0.501626193523407, "learning_rate": 1.6417910447761196e-06, "loss": 2.2497, "step": 57 }, { "epoch": 0.8656716417910447, "grad_norm": 0.44980672001838684, "learning_rate": 1.4925373134328358e-06, "loss": 1.8196, "step": 58 }, { "epoch": 0.8805970149253731, "grad_norm": 0.8795580863952637, "learning_rate": 1.3432835820895524e-06, "loss": 2.1529, "step": 59 }, { "epoch": 0.8955223880597015, "grad_norm": 0.5534031987190247, "learning_rate": 1.1940298507462686e-06, "loss": 2.1861, "step": 60 }, { "epoch": 0.9104477611940298, "grad_norm": 0.3944064676761627, "learning_rate": 1.044776119402985e-06, "loss": 1.7992, "step": 61 }, { "epoch": 0.9253731343283582, "grad_norm": 0.3771931231021881, "learning_rate": 8.955223880597015e-07, "loss": 1.6866, "step": 62 }, { "epoch": 0.9402985074626866, "grad_norm": 0.5111584067344666, "learning_rate": 7.462686567164179e-07, "loss": 1.995, "step": 63 }, { "epoch": 0.9552238805970149, "grad_norm": 0.5018301606178284, "learning_rate": 5.970149253731343e-07, "loss": 1.8922, "step": 64 }, { "epoch": 0.9701492537313433, "grad_norm": 0.3669991195201874, "learning_rate": 4.4776119402985074e-07, "loss": 2.0702, "step": 65 }, { "epoch": 0.9850746268656716, "grad_norm": 0.45055946707725525, "learning_rate": 2.9850746268656716e-07, "loss": 2.0245, "step": 66 }, { "epoch": 1.0, "grad_norm": 0.5369859337806702, "learning_rate": 1.4925373134328358e-07, "loss": 2.2499, "step": 67 }, { "epoch": 1.0, "eval_loss": 1.8387645483016968, "eval_runtime": 0.8427, "eval_samples_per_second": 43.907, "eval_steps_per_second": 5.933, "step": 67 } ], "logging_steps": 1.0, "max_steps": 67, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3350387805388800.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }