Stewart Slocum
Add fine-tuned model
8051b2b
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 51,
"global_step": 51,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0196078431372549,
"grad_norm": 6.204692840576172,
"learning_rate": 1e-05,
"loss": 3.7169,
"step": 1
},
{
"epoch": 0.0392156862745098,
"grad_norm": 5.544524192810059,
"learning_rate": 9.803921568627451e-06,
"loss": 3.5651,
"step": 2
},
{
"epoch": 0.058823529411764705,
"grad_norm": 4.741919040679932,
"learning_rate": 9.607843137254903e-06,
"loss": 3.3635,
"step": 3
},
{
"epoch": 0.0784313725490196,
"grad_norm": 3.850552797317505,
"learning_rate": 9.411764705882354e-06,
"loss": 3.1543,
"step": 4
},
{
"epoch": 0.09803921568627451,
"grad_norm": 3.0620617866516113,
"learning_rate": 9.215686274509804e-06,
"loss": 2.9776,
"step": 5
},
{
"epoch": 0.11764705882352941,
"grad_norm": 2.6284866333007812,
"learning_rate": 9.019607843137256e-06,
"loss": 2.8738,
"step": 6
},
{
"epoch": 0.13725490196078433,
"grad_norm": 2.1033711433410645,
"learning_rate": 8.823529411764707e-06,
"loss": 2.7088,
"step": 7
},
{
"epoch": 0.1568627450980392,
"grad_norm": 1.9232524633407593,
"learning_rate": 8.627450980392157e-06,
"loss": 2.6702,
"step": 8
},
{
"epoch": 0.17647058823529413,
"grad_norm": 1.697329044342041,
"learning_rate": 8.43137254901961e-06,
"loss": 2.5798,
"step": 9
},
{
"epoch": 0.19607843137254902,
"grad_norm": 1.4369592666625977,
"learning_rate": 8.23529411764706e-06,
"loss": 2.3988,
"step": 10
},
{
"epoch": 0.21568627450980393,
"grad_norm": 1.4095876216888428,
"learning_rate": 8.03921568627451e-06,
"loss": 2.4501,
"step": 11
},
{
"epoch": 0.23529411764705882,
"grad_norm": 1.2679307460784912,
"learning_rate": 7.84313725490196e-06,
"loss": 2.3684,
"step": 12
},
{
"epoch": 0.2549019607843137,
"grad_norm": 1.2137434482574463,
"learning_rate": 7.647058823529411e-06,
"loss": 2.3421,
"step": 13
},
{
"epoch": 0.27450980392156865,
"grad_norm": 1.1062148809432983,
"learning_rate": 7.450980392156863e-06,
"loss": 2.2561,
"step": 14
},
{
"epoch": 0.29411764705882354,
"grad_norm": 1.1174410581588745,
"learning_rate": 7.2549019607843145e-06,
"loss": 2.2687,
"step": 15
},
{
"epoch": 0.3137254901960784,
"grad_norm": 1.0426009893417358,
"learning_rate": 7.058823529411766e-06,
"loss": 2.1751,
"step": 16
},
{
"epoch": 0.3333333333333333,
"grad_norm": 1.0049128532409668,
"learning_rate": 6.862745098039216e-06,
"loss": 2.1172,
"step": 17
},
{
"epoch": 0.35294117647058826,
"grad_norm": 1.0266010761260986,
"learning_rate": 6.666666666666667e-06,
"loss": 2.1423,
"step": 18
},
{
"epoch": 0.37254901960784315,
"grad_norm": 0.9556562304496765,
"learning_rate": 6.470588235294119e-06,
"loss": 1.998,
"step": 19
},
{
"epoch": 0.39215686274509803,
"grad_norm": 0.9744943380355835,
"learning_rate": 6.274509803921569e-06,
"loss": 2.0312,
"step": 20
},
{
"epoch": 0.4117647058823529,
"grad_norm": 0.9564470648765564,
"learning_rate": 6.07843137254902e-06,
"loss": 2.0036,
"step": 21
},
{
"epoch": 0.43137254901960786,
"grad_norm": 0.9479807615280151,
"learning_rate": 5.882352941176471e-06,
"loss": 1.9646,
"step": 22
},
{
"epoch": 0.45098039215686275,
"grad_norm": 0.9132607579231262,
"learning_rate": 5.686274509803922e-06,
"loss": 1.9377,
"step": 23
},
{
"epoch": 0.47058823529411764,
"grad_norm": 0.8846307396888733,
"learning_rate": 5.4901960784313735e-06,
"loss": 1.8844,
"step": 24
},
{
"epoch": 0.49019607843137253,
"grad_norm": 0.8600879311561584,
"learning_rate": 5.294117647058824e-06,
"loss": 1.8365,
"step": 25
},
{
"epoch": 0.5098039215686274,
"grad_norm": 0.8480931520462036,
"learning_rate": 5.098039215686274e-06,
"loss": 1.8456,
"step": 26
},
{
"epoch": 0.5294117647058824,
"grad_norm": 0.8326075673103333,
"learning_rate": 4.901960784313726e-06,
"loss": 1.842,
"step": 27
},
{
"epoch": 0.5490196078431373,
"grad_norm": 0.8022368550300598,
"learning_rate": 4.705882352941177e-06,
"loss": 1.793,
"step": 28
},
{
"epoch": 0.5686274509803921,
"grad_norm": 0.7787200212478638,
"learning_rate": 4.509803921568628e-06,
"loss": 1.7824,
"step": 29
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.7797530293464661,
"learning_rate": 4.313725490196079e-06,
"loss": 1.7885,
"step": 30
},
{
"epoch": 0.6078431372549019,
"grad_norm": 0.754288911819458,
"learning_rate": 4.11764705882353e-06,
"loss": 1.7418,
"step": 31
},
{
"epoch": 0.6274509803921569,
"grad_norm": 0.7509904503822327,
"learning_rate": 3.92156862745098e-06,
"loss": 1.7535,
"step": 32
},
{
"epoch": 0.6470588235294118,
"grad_norm": 0.7244371771812439,
"learning_rate": 3.7254901960784316e-06,
"loss": 1.7225,
"step": 33
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.7066894769668579,
"learning_rate": 3.529411764705883e-06,
"loss": 1.7148,
"step": 34
},
{
"epoch": 0.6862745098039216,
"grad_norm": 0.6999155282974243,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.6988,
"step": 35
},
{
"epoch": 0.7058823529411765,
"grad_norm": 0.6919957995414734,
"learning_rate": 3.1372549019607846e-06,
"loss": 1.6716,
"step": 36
},
{
"epoch": 0.7254901960784313,
"grad_norm": 0.6656088829040527,
"learning_rate": 2.9411764705882355e-06,
"loss": 1.6497,
"step": 37
},
{
"epoch": 0.7450980392156863,
"grad_norm": 0.662615180015564,
"learning_rate": 2.7450980392156867e-06,
"loss": 1.6416,
"step": 38
},
{
"epoch": 0.7647058823529411,
"grad_norm": 0.6523113250732422,
"learning_rate": 2.549019607843137e-06,
"loss": 1.6252,
"step": 39
},
{
"epoch": 0.7843137254901961,
"grad_norm": 0.6525292992591858,
"learning_rate": 2.3529411764705885e-06,
"loss": 1.6153,
"step": 40
},
{
"epoch": 0.803921568627451,
"grad_norm": 0.6456802487373352,
"learning_rate": 2.1568627450980393e-06,
"loss": 1.6071,
"step": 41
},
{
"epoch": 0.8235294117647058,
"grad_norm": 0.6324870586395264,
"learning_rate": 1.96078431372549e-06,
"loss": 1.5709,
"step": 42
},
{
"epoch": 0.8431372549019608,
"grad_norm": 0.6390767693519592,
"learning_rate": 1.7647058823529414e-06,
"loss": 1.588,
"step": 43
},
{
"epoch": 0.8627450980392157,
"grad_norm": 0.6343661546707153,
"learning_rate": 1.5686274509803923e-06,
"loss": 1.5631,
"step": 44
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.633243978023529,
"learning_rate": 1.3725490196078434e-06,
"loss": 1.5654,
"step": 45
},
{
"epoch": 0.9019607843137255,
"grad_norm": 0.6330836415290833,
"learning_rate": 1.1764705882352942e-06,
"loss": 1.5721,
"step": 46
},
{
"epoch": 0.9215686274509803,
"grad_norm": 0.6343224048614502,
"learning_rate": 9.80392156862745e-07,
"loss": 1.5629,
"step": 47
},
{
"epoch": 0.9411764705882353,
"grad_norm": 0.6233211159706116,
"learning_rate": 7.843137254901962e-07,
"loss": 1.5648,
"step": 48
},
{
"epoch": 0.9607843137254902,
"grad_norm": 0.6273109316825867,
"learning_rate": 5.882352941176471e-07,
"loss": 1.5701,
"step": 49
},
{
"epoch": 0.9803921568627451,
"grad_norm": 0.6393752694129944,
"learning_rate": 3.921568627450981e-07,
"loss": 1.5762,
"step": 50
},
{
"epoch": 1.0,
"grad_norm": 0.6112051606178284,
"learning_rate": 1.9607843137254904e-07,
"loss": 1.5457,
"step": 51
},
{
"epoch": 1.0,
"eval_loss": 1.5631208419799805,
"eval_runtime": 16.2004,
"eval_samples_per_second": 0.37,
"eval_steps_per_second": 0.062,
"step": 51
}
],
"logging_steps": 1.0,
"max_steps": 51,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.6061603639341875e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}