|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.013066024253807521, |
|
"eval_steps": 5, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006533012126903761, |
|
"grad_norm": 0.47401177883148193, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7067, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006533012126903761, |
|
"eval_loss": 1.999820590019226, |
|
"eval_runtime": 112.5729, |
|
"eval_samples_per_second": 22.901, |
|
"eval_steps_per_second": 11.45, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013066024253807521, |
|
"grad_norm": 0.505779504776001, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9039, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0019599036380711283, |
|
"grad_norm": 0.5579796433448792, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8283, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0026132048507615043, |
|
"grad_norm": 0.5206772089004517, |
|
"learning_rate": 4e-05, |
|
"loss": 1.8651, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0032665060634518802, |
|
"grad_norm": 0.5004145503044128, |
|
"learning_rate": 5e-05, |
|
"loss": 1.9099, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0032665060634518802, |
|
"eval_loss": 1.9857652187347412, |
|
"eval_runtime": 43.7424, |
|
"eval_samples_per_second": 58.936, |
|
"eval_steps_per_second": 29.468, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003919807276142257, |
|
"grad_norm": 0.5522999167442322, |
|
"learning_rate": 6e-05, |
|
"loss": 1.9055, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0045731084888326326, |
|
"grad_norm": 0.4944389760494232, |
|
"learning_rate": 7e-05, |
|
"loss": 1.8609, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0052264097015230085, |
|
"grad_norm": 0.5072996020317078, |
|
"learning_rate": 8e-05, |
|
"loss": 1.9091, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0058797109142133845, |
|
"grad_norm": 0.5102063417434692, |
|
"learning_rate": 9e-05, |
|
"loss": 1.821, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0065330121269037604, |
|
"grad_norm": 0.4876532256603241, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7781, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0065330121269037604, |
|
"eval_loss": 1.9432145357131958, |
|
"eval_runtime": 45.4678, |
|
"eval_samples_per_second": 56.699, |
|
"eval_steps_per_second": 28.35, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007186313339594136, |
|
"grad_norm": 0.5278764367103577, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.8105, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.007839614552284513, |
|
"grad_norm": 0.46804916858673096, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.9332, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00849291576497489, |
|
"grad_norm": 0.5591187477111816, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.9395, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.009146216977665265, |
|
"grad_norm": 0.43466073274612427, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.7936, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.009799518190355641, |
|
"grad_norm": 0.5184069871902466, |
|
"learning_rate": 5e-05, |
|
"loss": 1.812, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009799518190355641, |
|
"eval_loss": 1.9075682163238525, |
|
"eval_runtime": 56.4883, |
|
"eval_samples_per_second": 45.638, |
|
"eval_steps_per_second": 22.819, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.010452819403046017, |
|
"grad_norm": 0.5313090682029724, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 1.8681, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.011106120615736393, |
|
"grad_norm": 0.4654507339000702, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 1.8468, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.011759421828426769, |
|
"grad_norm": 0.5464712977409363, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.8831, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.012412723041117145, |
|
"grad_norm": 0.5164174437522888, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 1.9027, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.013066024253807521, |
|
"grad_norm": 0.4609745442867279, |
|
"learning_rate": 0.0, |
|
"loss": 1.8915, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013066024253807521, |
|
"eval_loss": 1.8998370170593262, |
|
"eval_runtime": 45.7387, |
|
"eval_samples_per_second": 56.364, |
|
"eval_steps_per_second": 28.182, |
|
"step": 20 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 897059500326912.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|