|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.001779061865097297, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.895309325486485e-05, |
|
"grad_norm": 0.1897999793291092, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.3044, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0001779061865097297, |
|
"grad_norm": 0.23376008868217468, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.3592, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0002668592797645945, |
|
"grad_norm": 0.13603591918945312, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.084, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0003558123730194594, |
|
"grad_norm": 0.34403547644615173, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.076, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00044476546627432423, |
|
"grad_norm": 0.1530287265777588, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.7806, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.000533718559529189, |
|
"grad_norm": 0.344722718000412, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.5749, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0006226716527840539, |
|
"grad_norm": 0.38181471824645996, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.5243, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0007116247460389188, |
|
"grad_norm": 0.25272616744041443, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.4728, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0008005778392937836, |
|
"grad_norm": 0.6237773299217224, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.3028, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.0008895309325486485, |
|
"grad_norm": 0.5120233297348022, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.0721, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0009784840258035134, |
|
"grad_norm": 0.6732835173606873, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.8071, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.001067437119058378, |
|
"grad_norm": 0.5018543004989624, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.8138, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.001156390212313243, |
|
"grad_norm": 0.24052944779396057, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.702, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.0012453433055681078, |
|
"grad_norm": 0.2696482837200165, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6689, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0013342963988229727, |
|
"grad_norm": 0.21222035586833954, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.7498, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.0014232494920778376, |
|
"grad_norm": 0.27624765038490295, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6544, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0015122025853327023, |
|
"grad_norm": 0.8518249988555908, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6908, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.0016011556785875672, |
|
"grad_norm": 0.588943600654602, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.7507, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.001690108771842432, |
|
"grad_norm": 0.4197629690170288, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.8013, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.001779061865097297, |
|
"grad_norm": 0.48924073576927185, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6292, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.087597314048e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|