|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.991097922848665, |
|
"eval_steps": 500, |
|
"global_step": 126, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11869436201780416, |
|
"grad_norm": 0.6501234173774719, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5793, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.23738872403560832, |
|
"grad_norm": 0.9226903915405273, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2797, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3560830860534125, |
|
"grad_norm": 1.3594255447387695, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8775, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.47477744807121663, |
|
"grad_norm": 1.391525149345398, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4896, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5934718100890207, |
|
"grad_norm": 0.9402475357055664, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2145, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.712166172106825, |
|
"grad_norm": 0.516862690448761, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0708, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8308605341246291, |
|
"grad_norm": 0.39975354075431824, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9909, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9495548961424333, |
|
"grad_norm": 0.4522175192832947, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9651, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0682492581602374, |
|
"grad_norm": 0.4957733452320099, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9213, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.1869436201780414, |
|
"grad_norm": 0.45304545760154724, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3056379821958457, |
|
"grad_norm": 0.6747499108314514, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8819, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4243323442136497, |
|
"grad_norm": 0.7882275581359863, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8359, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.543026706231454, |
|
"grad_norm": 0.42021647095680237, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8254, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.6617210682492582, |
|
"grad_norm": 0.41371551156044006, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7991, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7804154302670623, |
|
"grad_norm": 0.45561087131500244, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7887, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.8991097922848663, |
|
"grad_norm": 0.40611913800239563, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7941, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0178041543026706, |
|
"grad_norm": 0.5473902225494385, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7779, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.136498516320475, |
|
"grad_norm": 0.4852384924888611, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7517, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.255192878338279, |
|
"grad_norm": 0.4257807731628418, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7545, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.373887240356083, |
|
"grad_norm": 0.4694693386554718, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7389, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.492581602373887, |
|
"grad_norm": 0.46692556142807007, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7348, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.6112759643916914, |
|
"grad_norm": 0.38663822412490845, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7368, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.7299703264094957, |
|
"grad_norm": 0.4077226519584656, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7335, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.8486646884272995, |
|
"grad_norm": 0.4740726351737976, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7462, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.9673590504451037, |
|
"grad_norm": 0.40621665120124817, |
|
"learning_rate": 0.0002, |
|
"loss": 0.75, |
|
"step": 125 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 126, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.310063237541069e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|