|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.888888888888889, |
|
"eval_steps": 500, |
|
"global_step": 40, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 1.7930933237075806, |
|
"learning_rate": 4e-05, |
|
"loss": 3.0367, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 2.1674394607543945, |
|
"learning_rate": 8e-05, |
|
"loss": 3.9768, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 1.868736982345581, |
|
"learning_rate": 0.00012, |
|
"loss": 3.2561, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 2.0688636302948, |
|
"learning_rate": 0.00016, |
|
"loss": 2.7526, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 1.9057183265686035, |
|
"learning_rate": 0.0002, |
|
"loss": 2.889, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 1.6972851753234863, |
|
"learning_rate": 0.0001942857142857143, |
|
"loss": 2.4989, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.5185185185185185, |
|
"grad_norm": 3.0725390911102295, |
|
"learning_rate": 0.00018857142857142857, |
|
"loss": 3.584, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 2.9980862140655518, |
|
"learning_rate": 0.00018285714285714286, |
|
"loss": 2.9159, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 2.3923017978668213, |
|
"learning_rate": 0.00017714285714285713, |
|
"loss": 2.7993, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 1.8530240058898926, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 2.0731, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8148148148148148, |
|
"grad_norm": 2.3305249214172363, |
|
"learning_rate": 0.00016571428571428575, |
|
"loss": 2.4704, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 2.562769889831543, |
|
"learning_rate": 0.00016, |
|
"loss": 2.1706, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.9629629629629629, |
|
"grad_norm": 2.4493045806884766, |
|
"learning_rate": 0.0001542857142857143, |
|
"loss": 2.2292, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.4787449836730957, |
|
"learning_rate": 0.00014857142857142857, |
|
"loss": 2.1731, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.074074074074074, |
|
"grad_norm": 2.0056028366088867, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 1.937, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.1481481481481481, |
|
"grad_norm": 1.9762787818908691, |
|
"learning_rate": 0.00013714285714285716, |
|
"loss": 1.644, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 2.3070919513702393, |
|
"learning_rate": 0.00013142857142857143, |
|
"loss": 2.0248, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.2962962962962963, |
|
"grad_norm": 2.3103995323181152, |
|
"learning_rate": 0.00012571428571428572, |
|
"loss": 1.9342, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.3703703703703702, |
|
"grad_norm": 2.19228458404541, |
|
"learning_rate": 0.00012, |
|
"loss": 1.5943, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 2.453838586807251, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 1.7512, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.5185185185185186, |
|
"grad_norm": 2.177041530609131, |
|
"learning_rate": 0.00010857142857142856, |
|
"loss": 1.7153, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.5925925925925926, |
|
"grad_norm": 2.134157419204712, |
|
"learning_rate": 0.00010285714285714286, |
|
"loss": 1.6687, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 2.4414138793945312, |
|
"learning_rate": 9.714285714285715e-05, |
|
"loss": 1.9473, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.7407407407407407, |
|
"grad_norm": 2.073535680770874, |
|
"learning_rate": 9.142857142857143e-05, |
|
"loss": 1.6294, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.8148148148148149, |
|
"grad_norm": 2.7250802516937256, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 2.0007, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 2.2123806476593018, |
|
"learning_rate": 8e-05, |
|
"loss": 1.7899, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.9629629629629628, |
|
"grad_norm": 2.318277597427368, |
|
"learning_rate": 7.428571428571429e-05, |
|
"loss": 1.6948, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.404044151306152, |
|
"learning_rate": 6.857142857142858e-05, |
|
"loss": 2.0492, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.074074074074074, |
|
"grad_norm": 1.9140400886535645, |
|
"learning_rate": 6.285714285714286e-05, |
|
"loss": 1.4584, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.148148148148148, |
|
"grad_norm": 2.488027334213257, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 1.7693, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 2.41856050491333, |
|
"learning_rate": 5.142857142857143e-05, |
|
"loss": 1.8544, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.2962962962962963, |
|
"grad_norm": 1.8152332305908203, |
|
"learning_rate": 4.5714285714285716e-05, |
|
"loss": 1.3501, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 1.9145673513412476, |
|
"learning_rate": 4e-05, |
|
"loss": 1.3809, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 2.48303484916687, |
|
"learning_rate": 3.428571428571429e-05, |
|
"loss": 1.708, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.5185185185185186, |
|
"grad_norm": 2.4277076721191406, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.6578, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.5925925925925926, |
|
"grad_norm": 2.0969839096069336, |
|
"learning_rate": 2.2857142857142858e-05, |
|
"loss": 1.4353, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 2.2218620777130127, |
|
"learning_rate": 1.7142857142857145e-05, |
|
"loss": 1.565, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.7407407407407405, |
|
"grad_norm": 1.8250045776367188, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 1.3109, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.814814814814815, |
|
"grad_norm": 1.8353327512741089, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 1.4105, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 2.04841947555542, |
|
"learning_rate": 0.0, |
|
"loss": 1.4229, |
|
"step": 40 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 40, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1271040566378496.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|