|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 51, |
|
"global_step": 51, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0196078431372549, |
|
"grad_norm": 6.204692840576172, |
|
"learning_rate": 1e-05, |
|
"loss": 3.7169, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0392156862745098, |
|
"grad_norm": 5.544524192810059, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 3.5651, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 4.741919040679932, |
|
"learning_rate": 9.607843137254903e-06, |
|
"loss": 3.3635, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0784313725490196, |
|
"grad_norm": 3.850552797317505, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 3.1543, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.09803921568627451, |
|
"grad_norm": 3.0620617866516113, |
|
"learning_rate": 9.215686274509804e-06, |
|
"loss": 2.9776, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 2.6284866333007812, |
|
"learning_rate": 9.019607843137256e-06, |
|
"loss": 2.8738, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.13725490196078433, |
|
"grad_norm": 2.1033711433410645, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 2.7088, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1568627450980392, |
|
"grad_norm": 1.9232524633407593, |
|
"learning_rate": 8.627450980392157e-06, |
|
"loss": 2.6702, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 1.697329044342041, |
|
"learning_rate": 8.43137254901961e-06, |
|
"loss": 2.5798, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.19607843137254902, |
|
"grad_norm": 1.4369592666625977, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 2.3988, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21568627450980393, |
|
"grad_norm": 1.4095876216888428, |
|
"learning_rate": 8.03921568627451e-06, |
|
"loss": 2.4501, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 1.2679307460784912, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 2.3684, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2549019607843137, |
|
"grad_norm": 1.2137434482574463, |
|
"learning_rate": 7.647058823529411e-06, |
|
"loss": 2.3421, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.27450980392156865, |
|
"grad_norm": 1.1062148809432983, |
|
"learning_rate": 7.450980392156863e-06, |
|
"loss": 2.2561, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 1.1174410581588745, |
|
"learning_rate": 7.2549019607843145e-06, |
|
"loss": 2.2687, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3137254901960784, |
|
"grad_norm": 1.0426009893417358, |
|
"learning_rate": 7.058823529411766e-06, |
|
"loss": 2.1751, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 1.0049128532409668, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 2.1172, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 1.0266010761260986, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 2.1423, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.37254901960784315, |
|
"grad_norm": 0.9556562304496765, |
|
"learning_rate": 6.470588235294119e-06, |
|
"loss": 1.998, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.39215686274509803, |
|
"grad_norm": 0.9744943380355835, |
|
"learning_rate": 6.274509803921569e-06, |
|
"loss": 2.0312, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4117647058823529, |
|
"grad_norm": 0.9564470648765564, |
|
"learning_rate": 6.07843137254902e-06, |
|
"loss": 2.0036, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.43137254901960786, |
|
"grad_norm": 0.9479807615280151, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 1.9646, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.45098039215686275, |
|
"grad_norm": 0.9132607579231262, |
|
"learning_rate": 5.686274509803922e-06, |
|
"loss": 1.9377, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"grad_norm": 0.8846307396888733, |
|
"learning_rate": 5.4901960784313735e-06, |
|
"loss": 1.8844, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.49019607843137253, |
|
"grad_norm": 0.8600879311561584, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 1.8365, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5098039215686274, |
|
"grad_norm": 0.8480931520462036, |
|
"learning_rate": 5.098039215686274e-06, |
|
"loss": 1.8456, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.5294117647058824, |
|
"grad_norm": 0.8326075673103333, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 1.842, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5490196078431373, |
|
"grad_norm": 0.8022368550300598, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 1.793, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5686274509803921, |
|
"grad_norm": 0.7787200212478638, |
|
"learning_rate": 4.509803921568628e-06, |
|
"loss": 1.7824, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 0.7797530293464661, |
|
"learning_rate": 4.313725490196079e-06, |
|
"loss": 1.7885, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6078431372549019, |
|
"grad_norm": 0.754288911819458, |
|
"learning_rate": 4.11764705882353e-06, |
|
"loss": 1.7418, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6274509803921569, |
|
"grad_norm": 0.7509904503822327, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 1.7535, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6470588235294118, |
|
"grad_norm": 0.7244371771812439, |
|
"learning_rate": 3.7254901960784316e-06, |
|
"loss": 1.7225, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.7066894769668579, |
|
"learning_rate": 3.529411764705883e-06, |
|
"loss": 1.7148, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.6862745098039216, |
|
"grad_norm": 0.6999155282974243, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.6988, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 0.6919957995414734, |
|
"learning_rate": 3.1372549019607846e-06, |
|
"loss": 1.6716, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7254901960784313, |
|
"grad_norm": 0.6656088829040527, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 1.6497, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.7450980392156863, |
|
"grad_norm": 0.662615180015564, |
|
"learning_rate": 2.7450980392156867e-06, |
|
"loss": 1.6416, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.7647058823529411, |
|
"grad_norm": 0.6523113250732422, |
|
"learning_rate": 2.549019607843137e-06, |
|
"loss": 1.6252, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 0.6525292992591858, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 1.6153, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.803921568627451, |
|
"grad_norm": 0.6456802487373352, |
|
"learning_rate": 2.1568627450980393e-06, |
|
"loss": 1.6071, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.8235294117647058, |
|
"grad_norm": 0.6324870586395264, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 1.5709, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.8431372549019608, |
|
"grad_norm": 0.6390767693519592, |
|
"learning_rate": 1.7647058823529414e-06, |
|
"loss": 1.588, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.8627450980392157, |
|
"grad_norm": 0.6343661546707153, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"loss": 1.5631, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 0.633243978023529, |
|
"learning_rate": 1.3725490196078434e-06, |
|
"loss": 1.5654, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.9019607843137255, |
|
"grad_norm": 0.6330836415290833, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"loss": 1.5721, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.9215686274509803, |
|
"grad_norm": 0.6343224048614502, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 1.5629, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"grad_norm": 0.6233211159706116, |
|
"learning_rate": 7.843137254901962e-07, |
|
"loss": 1.5648, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.9607843137254902, |
|
"grad_norm": 0.6273109316825867, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 1.5701, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 0.6393752694129944, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 1.5762, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6112051606178284, |
|
"learning_rate": 1.9607843137254904e-07, |
|
"loss": 1.5457, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.5631208419799805, |
|
"eval_runtime": 16.2004, |
|
"eval_samples_per_second": 0.37, |
|
"eval_steps_per_second": 0.062, |
|
"step": 51 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 51, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.6061603639341875e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|