|
{ |
|
"best_global_step": 800, |
|
"best_metric": 0.10116679221391678, |
|
"best_model_checkpoint": "/tmp/results/checkpoint-800", |
|
"epoch": 0.8812999173781327, |
|
"eval_steps": 200, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.055081244836133296, |
|
"grad_norm": 0.15732981264591217, |
|
"learning_rate": 0.00019640234948604994, |
|
"loss": 1.2703, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11016248967226659, |
|
"grad_norm": 0.13457506895065308, |
|
"learning_rate": 0.00019273127753303965, |
|
"loss": 0.2101, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16524373450839988, |
|
"grad_norm": 0.11798923462629318, |
|
"learning_rate": 0.00018906020558002938, |
|
"loss": 0.1801, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22032497934453318, |
|
"grad_norm": 0.10562872886657715, |
|
"learning_rate": 0.00018538913362701911, |
|
"loss": 0.1621, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22032497934453318, |
|
"eval_loss": 0.15831586718559265, |
|
"eval_runtime": 95.9669, |
|
"eval_samples_per_second": 33.626, |
|
"eval_steps_per_second": 4.21, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27540622418066646, |
|
"grad_norm": 0.11607076972723007, |
|
"learning_rate": 0.00018171806167400882, |
|
"loss": 0.1526, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33048746901679976, |
|
"grad_norm": 0.20103491842746735, |
|
"learning_rate": 0.00017804698972099853, |
|
"loss": 0.1498, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38556871385293306, |
|
"grad_norm": 0.20092593133449554, |
|
"learning_rate": 0.00017437591776798826, |
|
"loss": 0.1481, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.44064995868906637, |
|
"grad_norm": 0.15526321530342102, |
|
"learning_rate": 0.000170704845814978, |
|
"loss": 0.1533, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44064995868906637, |
|
"eval_loss": 0.13016295433044434, |
|
"eval_runtime": 96.1932, |
|
"eval_samples_per_second": 33.547, |
|
"eval_steps_per_second": 4.2, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.49573120352519967, |
|
"grad_norm": 0.23773688077926636, |
|
"learning_rate": 0.0001670337738619677, |
|
"loss": 0.1434, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5508124483613329, |
|
"grad_norm": 0.14756543934345245, |
|
"learning_rate": 0.00016336270190895743, |
|
"loss": 0.1316, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6058936931974662, |
|
"grad_norm": 0.19601435959339142, |
|
"learning_rate": 0.00015969162995594716, |
|
"loss": 0.1301, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6609749380335995, |
|
"grad_norm": 0.1771874874830246, |
|
"learning_rate": 0.00015602055800293687, |
|
"loss": 0.1215, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6609749380335995, |
|
"eval_loss": 0.11715047806501389, |
|
"eval_runtime": 95.8683, |
|
"eval_samples_per_second": 33.661, |
|
"eval_steps_per_second": 4.214, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7160561828697328, |
|
"grad_norm": 0.16557306051254272, |
|
"learning_rate": 0.00015234948604992657, |
|
"loss": 0.1139, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7711374277058661, |
|
"grad_norm": 0.2927381992340088, |
|
"learning_rate": 0.0001486784140969163, |
|
"loss": 0.1214, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8262186725419994, |
|
"grad_norm": 0.3338780701160431, |
|
"learning_rate": 0.00014500734214390604, |
|
"loss": 0.1285, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8812999173781327, |
|
"grad_norm": 0.2900579273700714, |
|
"learning_rate": 0.00014133627019089574, |
|
"loss": 0.1274, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8812999173781327, |
|
"eval_loss": 0.10116679221391678, |
|
"eval_runtime": 94.9307, |
|
"eval_samples_per_second": 33.993, |
|
"eval_steps_per_second": 4.256, |
|
"step": 800 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 2724, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 164707172352000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|