|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 96980, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.406856536865234, |
|
"learning_rate": 4.750206228088266e-05, |
|
"loss": 3.599, |
|
"step": 4849 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.830796480178833, |
|
"eval_runtime": 63.7615, |
|
"eval_samples_per_second": 608.329, |
|
"eval_steps_per_second": 19.024, |
|
"step": 4849 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.6979801654815674, |
|
"learning_rate": 4.5002577851103326e-05, |
|
"loss": 2.6769, |
|
"step": 9698 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.430718183517456, |
|
"eval_runtime": 62.9007, |
|
"eval_samples_per_second": 616.654, |
|
"eval_steps_per_second": 19.284, |
|
"step": 9698 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.8321845531463623, |
|
"learning_rate": 4.2503608991544655e-05, |
|
"loss": 2.3727, |
|
"step": 14547 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.206211566925049, |
|
"eval_runtime": 62.7764, |
|
"eval_samples_per_second": 617.876, |
|
"eval_steps_per_second": 19.323, |
|
"step": 14547 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.3764877319335938, |
|
"learning_rate": 4.000464013198598e-05, |
|
"loss": 2.1798, |
|
"step": 19396 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.0566577911376953, |
|
"eval_runtime": 62.7, |
|
"eval_samples_per_second": 618.629, |
|
"eval_steps_per_second": 19.346, |
|
"step": 19396 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.017418146133423, |
|
"learning_rate": 3.750567127242731e-05, |
|
"loss": 2.041, |
|
"step": 24245 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.943306803703308, |
|
"eval_runtime": 62.7554, |
|
"eval_samples_per_second": 618.082, |
|
"eval_steps_per_second": 19.329, |
|
"step": 24245 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.028724431991577, |
|
"learning_rate": 3.500567127242731e-05, |
|
"loss": 1.9417, |
|
"step": 29094 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.8640257120132446, |
|
"eval_runtime": 63.2843, |
|
"eval_samples_per_second": 612.916, |
|
"eval_steps_per_second": 19.167, |
|
"step": 29094 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.9556593894958496, |
|
"learning_rate": 3.2506702412868636e-05, |
|
"loss": 1.8625, |
|
"step": 33943 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.7964398860931396, |
|
"eval_runtime": 63.0672, |
|
"eval_samples_per_second": 615.027, |
|
"eval_steps_per_second": 19.233, |
|
"step": 33943 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.954166889190674, |
|
"learning_rate": 3.000670241286863e-05, |
|
"loss": 1.797, |
|
"step": 38792 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.7490918636322021, |
|
"eval_runtime": 63.1115, |
|
"eval_samples_per_second": 614.595, |
|
"eval_steps_per_second": 19.22, |
|
"step": 38792 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.954655885696411, |
|
"learning_rate": 2.7508249123530627e-05, |
|
"loss": 1.7491, |
|
"step": 43641 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.7073553800582886, |
|
"eval_runtime": 63.2567, |
|
"eval_samples_per_second": 613.184, |
|
"eval_steps_per_second": 19.176, |
|
"step": 43641 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.0339479446411133, |
|
"learning_rate": 2.5009280263971953e-05, |
|
"loss": 1.7049, |
|
"step": 48490 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.6738481521606445, |
|
"eval_runtime": 63.2477, |
|
"eval_samples_per_second": 613.271, |
|
"eval_steps_per_second": 19.179, |
|
"step": 48490 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.4147965908050537, |
|
"learning_rate": 2.2509795834192618e-05, |
|
"loss": 1.6655, |
|
"step": 53339 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.6354576349258423, |
|
"eval_runtime": 63.6046, |
|
"eval_samples_per_second": 609.83, |
|
"eval_steps_per_second": 19.071, |
|
"step": 53339 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.4822511672973633, |
|
"learning_rate": 2.0010826974633944e-05, |
|
"loss": 1.6295, |
|
"step": 58188 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.6142407655715942, |
|
"eval_runtime": 63.1801, |
|
"eval_samples_per_second": 613.927, |
|
"eval_steps_per_second": 19.199, |
|
"step": 58188 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.052323341369629, |
|
"learning_rate": 1.751134254485461e-05, |
|
"loss": 1.6022, |
|
"step": 63037 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.595221757888794, |
|
"eval_runtime": 63.3841, |
|
"eval_samples_per_second": 611.951, |
|
"eval_steps_per_second": 19.137, |
|
"step": 63037 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 3.0843849182128906, |
|
"learning_rate": 1.5012373685295938e-05, |
|
"loss": 1.5769, |
|
"step": 67886 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.5756992101669312, |
|
"eval_runtime": 61.4761, |
|
"eval_samples_per_second": 630.944, |
|
"eval_steps_per_second": 19.731, |
|
"step": 67886 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.4379537105560303, |
|
"learning_rate": 1.2513404825737265e-05, |
|
"loss": 1.5571, |
|
"step": 72735 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.554135799407959, |
|
"eval_runtime": 62.2027, |
|
"eval_samples_per_second": 623.574, |
|
"eval_steps_per_second": 19.501, |
|
"step": 72735 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 3.0627856254577637, |
|
"learning_rate": 1.0014435966178593e-05, |
|
"loss": 1.5343, |
|
"step": 77584 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.541438102722168, |
|
"eval_runtime": 61.9741, |
|
"eval_samples_per_second": 625.874, |
|
"eval_steps_per_second": 19.573, |
|
"step": 77584 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.1744439601898193, |
|
"learning_rate": 7.5154671066199224e-06, |
|
"loss": 1.5188, |
|
"step": 82433 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.5268478393554688, |
|
"eval_runtime": 61.9172, |
|
"eval_samples_per_second": 626.449, |
|
"eval_steps_per_second": 19.591, |
|
"step": 82433 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.5251266956329346, |
|
"learning_rate": 5.015982676840586e-06, |
|
"loss": 1.5037, |
|
"step": 87282 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.5217667818069458, |
|
"eval_runtime": 61.3504, |
|
"eval_samples_per_second": 632.237, |
|
"eval_steps_per_second": 19.772, |
|
"step": 87282 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.248511552810669, |
|
"learning_rate": 2.517013817281914e-06, |
|
"loss": 1.4927, |
|
"step": 92131 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.511409044265747, |
|
"eval_runtime": 62.5116, |
|
"eval_samples_per_second": 620.493, |
|
"eval_steps_per_second": 19.404, |
|
"step": 92131 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.856531858444214, |
|
"learning_rate": 1.7529387502577853e-08, |
|
"loss": 1.4862, |
|
"step": 96980 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.5111480951309204, |
|
"eval_runtime": 61.8388, |
|
"eval_samples_per_second": 627.244, |
|
"eval_steps_per_second": 19.616, |
|
"step": 96980 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 96980, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0420834325692416e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|