|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 67, |
|
"global_step": 67, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014925373134328358, |
|
"grad_norm": 4.677140235900879, |
|
"learning_rate": 1e-05, |
|
"loss": 2.7846, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.029850746268656716, |
|
"grad_norm": 4.6858649253845215, |
|
"learning_rate": 9.850746268656717e-06, |
|
"loss": 2.496, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04477611940298507, |
|
"grad_norm": 3.9940388202667236, |
|
"learning_rate": 9.701492537313434e-06, |
|
"loss": 2.582, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05970149253731343, |
|
"grad_norm": 3.8767054080963135, |
|
"learning_rate": 9.552238805970149e-06, |
|
"loss": 2.215, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 2.339372396469116, |
|
"learning_rate": 9.402985074626867e-06, |
|
"loss": 1.9616, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08955223880597014, |
|
"grad_norm": 2.0336525440216064, |
|
"learning_rate": 9.253731343283582e-06, |
|
"loss": 1.9218, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1044776119402985, |
|
"grad_norm": 2.4649195671081543, |
|
"learning_rate": 9.104477611940299e-06, |
|
"loss": 2.5598, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.11940298507462686, |
|
"grad_norm": 1.7558526992797852, |
|
"learning_rate": 8.955223880597016e-06, |
|
"loss": 2.1746, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.13432835820895522, |
|
"grad_norm": 1.5009433031082153, |
|
"learning_rate": 8.805970149253732e-06, |
|
"loss": 2.1801, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 1.7307460308074951, |
|
"learning_rate": 8.656716417910447e-06, |
|
"loss": 2.2179, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16417910447761194, |
|
"grad_norm": 1.31997811794281, |
|
"learning_rate": 8.507462686567165e-06, |
|
"loss": 1.9017, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1791044776119403, |
|
"grad_norm": 1.0102890729904175, |
|
"learning_rate": 8.35820895522388e-06, |
|
"loss": 1.6226, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.19402985074626866, |
|
"grad_norm": 1.1635653972625732, |
|
"learning_rate": 8.208955223880599e-06, |
|
"loss": 2.0264, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.208955223880597, |
|
"grad_norm": 0.9142751097679138, |
|
"learning_rate": 8.059701492537314e-06, |
|
"loss": 1.9922, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 1.1939051151275635, |
|
"learning_rate": 7.91044776119403e-06, |
|
"loss": 2.205, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.23880597014925373, |
|
"grad_norm": 0.7413591742515564, |
|
"learning_rate": 7.761194029850747e-06, |
|
"loss": 1.9766, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2537313432835821, |
|
"grad_norm": 1.400302767753601, |
|
"learning_rate": 7.611940298507463e-06, |
|
"loss": 2.3456, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.26865671641791045, |
|
"grad_norm": 0.9068132042884827, |
|
"learning_rate": 7.46268656716418e-06, |
|
"loss": 2.2168, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2835820895522388, |
|
"grad_norm": 0.865298867225647, |
|
"learning_rate": 7.313432835820896e-06, |
|
"loss": 1.9891, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 1.2902920246124268, |
|
"learning_rate": 7.164179104477612e-06, |
|
"loss": 2.1995, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31343283582089554, |
|
"grad_norm": 0.7621744871139526, |
|
"learning_rate": 7.014925373134329e-06, |
|
"loss": 2.1563, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3283582089552239, |
|
"grad_norm": 0.7494510412216187, |
|
"learning_rate": 6.865671641791045e-06, |
|
"loss": 1.9338, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.34328358208955223, |
|
"grad_norm": 0.6722490191459656, |
|
"learning_rate": 6.7164179104477625e-06, |
|
"loss": 1.9195, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3582089552238806, |
|
"grad_norm": 0.5672370195388794, |
|
"learning_rate": 6.567164179104478e-06, |
|
"loss": 2.1004, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 0.5260008573532104, |
|
"learning_rate": 6.417910447761194e-06, |
|
"loss": 2.0207, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3880597014925373, |
|
"grad_norm": 0.6754623651504517, |
|
"learning_rate": 6.2686567164179116e-06, |
|
"loss": 2.0735, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.40298507462686567, |
|
"grad_norm": 0.5878338813781738, |
|
"learning_rate": 6.119402985074627e-06, |
|
"loss": 1.9148, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.417910447761194, |
|
"grad_norm": 0.531120240688324, |
|
"learning_rate": 5.970149253731343e-06, |
|
"loss": 1.9395, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.43283582089552236, |
|
"grad_norm": 0.667667806148529, |
|
"learning_rate": 5.820895522388061e-06, |
|
"loss": 2.038, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 0.7480222582817078, |
|
"learning_rate": 5.671641791044776e-06, |
|
"loss": 2.0045, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4626865671641791, |
|
"grad_norm": 0.9849134683609009, |
|
"learning_rate": 5.522388059701493e-06, |
|
"loss": 2.1881, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.47761194029850745, |
|
"grad_norm": 0.6533071398735046, |
|
"learning_rate": 5.37313432835821e-06, |
|
"loss": 1.849, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.4925373134328358, |
|
"grad_norm": 0.5160700082778931, |
|
"learning_rate": 5.2238805970149255e-06, |
|
"loss": 2.0241, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5074626865671642, |
|
"grad_norm": 0.5143930912017822, |
|
"learning_rate": 5.074626865671642e-06, |
|
"loss": 1.9619, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 0.6003533005714417, |
|
"learning_rate": 4.925373134328359e-06, |
|
"loss": 1.9579, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5373134328358209, |
|
"grad_norm": 0.49033546447753906, |
|
"learning_rate": 4.7761194029850745e-06, |
|
"loss": 2.1126, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5522388059701493, |
|
"grad_norm": 0.5822514891624451, |
|
"learning_rate": 4.626865671641791e-06, |
|
"loss": 1.8489, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5671641791044776, |
|
"grad_norm": 0.5790143013000488, |
|
"learning_rate": 4.477611940298508e-06, |
|
"loss": 2.0096, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.582089552238806, |
|
"grad_norm": 0.41039204597473145, |
|
"learning_rate": 4.3283582089552236e-06, |
|
"loss": 1.775, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.820061206817627, |
|
"learning_rate": 4.17910447761194e-06, |
|
"loss": 2.1653, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6119402985074627, |
|
"grad_norm": 0.6350656151771545, |
|
"learning_rate": 4.029850746268657e-06, |
|
"loss": 1.982, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6268656716417911, |
|
"grad_norm": 0.41239652037620544, |
|
"learning_rate": 3.8805970149253735e-06, |
|
"loss": 1.8828, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6417910447761194, |
|
"grad_norm": 0.5652564764022827, |
|
"learning_rate": 3.73134328358209e-06, |
|
"loss": 2.0692, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6567164179104478, |
|
"grad_norm": 0.6621966361999512, |
|
"learning_rate": 3.582089552238806e-06, |
|
"loss": 2.2952, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 0.4039974510669708, |
|
"learning_rate": 3.4328358208955225e-06, |
|
"loss": 1.7934, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6865671641791045, |
|
"grad_norm": 0.5079028606414795, |
|
"learning_rate": 3.283582089552239e-06, |
|
"loss": 2.0361, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7014925373134329, |
|
"grad_norm": 0.41614365577697754, |
|
"learning_rate": 3.1343283582089558e-06, |
|
"loss": 1.7775, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7164179104477612, |
|
"grad_norm": 0.3995169699192047, |
|
"learning_rate": 2.9850746268656716e-06, |
|
"loss": 1.9167, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7313432835820896, |
|
"grad_norm": 0.5954918265342712, |
|
"learning_rate": 2.835820895522388e-06, |
|
"loss": 2.0694, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 0.5778793692588806, |
|
"learning_rate": 2.686567164179105e-06, |
|
"loss": 2.0297, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7611940298507462, |
|
"grad_norm": 0.5707228183746338, |
|
"learning_rate": 2.537313432835821e-06, |
|
"loss": 2.201, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.7761194029850746, |
|
"grad_norm": 0.6407202482223511, |
|
"learning_rate": 2.3880597014925373e-06, |
|
"loss": 2.4394, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.7910447761194029, |
|
"grad_norm": 0.3686445355415344, |
|
"learning_rate": 2.238805970149254e-06, |
|
"loss": 2.2426, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8059701492537313, |
|
"grad_norm": 0.4780975580215454, |
|
"learning_rate": 2.08955223880597e-06, |
|
"loss": 1.888, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 0.5346677303314209, |
|
"learning_rate": 1.9402985074626867e-06, |
|
"loss": 1.9405, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.835820895522388, |
|
"grad_norm": 0.4024548828601837, |
|
"learning_rate": 1.791044776119403e-06, |
|
"loss": 1.6834, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.8507462686567164, |
|
"grad_norm": 0.501626193523407, |
|
"learning_rate": 1.6417910447761196e-06, |
|
"loss": 2.2497, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.8656716417910447, |
|
"grad_norm": 0.44980672001838684, |
|
"learning_rate": 1.4925373134328358e-06, |
|
"loss": 1.8196, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.8805970149253731, |
|
"grad_norm": 0.8795580863952637, |
|
"learning_rate": 1.3432835820895524e-06, |
|
"loss": 2.1529, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 0.5534031987190247, |
|
"learning_rate": 1.1940298507462686e-06, |
|
"loss": 2.1861, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9104477611940298, |
|
"grad_norm": 0.3944064676761627, |
|
"learning_rate": 1.044776119402985e-06, |
|
"loss": 1.7992, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.9253731343283582, |
|
"grad_norm": 0.3771931231021881, |
|
"learning_rate": 8.955223880597015e-07, |
|
"loss": 1.6866, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.9402985074626866, |
|
"grad_norm": 0.5111584067344666, |
|
"learning_rate": 7.462686567164179e-07, |
|
"loss": 1.995, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.9552238805970149, |
|
"grad_norm": 0.5018301606178284, |
|
"learning_rate": 5.970149253731343e-07, |
|
"loss": 1.8922, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 0.3669991195201874, |
|
"learning_rate": 4.4776119402985074e-07, |
|
"loss": 2.0702, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9850746268656716, |
|
"grad_norm": 0.45055946707725525, |
|
"learning_rate": 2.9850746268656716e-07, |
|
"loss": 2.0245, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.5369859337806702, |
|
"learning_rate": 1.4925373134328358e-07, |
|
"loss": 2.2499, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.8387645483016968, |
|
"eval_runtime": 0.8427, |
|
"eval_samples_per_second": 43.907, |
|
"eval_steps_per_second": 5.933, |
|
"step": 67 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 67, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3350387805388800.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|