Stewart Slocum
Add fine-tuned model
96808a4
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 268,
"global_step": 268,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0037313432835820895,
"grad_norm": 0.46998512744903564,
"learning_rate": 1e-05,
"loss": 2.2849,
"step": 1
},
{
"epoch": 0.007462686567164179,
"grad_norm": 0.47207218408584595,
"learning_rate": 9.96268656716418e-06,
"loss": 2.3201,
"step": 2
},
{
"epoch": 0.011194029850746268,
"grad_norm": 0.4637902081012726,
"learning_rate": 9.925373134328359e-06,
"loss": 2.2301,
"step": 3
},
{
"epoch": 0.014925373134328358,
"grad_norm": 0.4802502691745758,
"learning_rate": 9.888059701492538e-06,
"loss": 2.304,
"step": 4
},
{
"epoch": 0.018656716417910446,
"grad_norm": 0.47606322169303894,
"learning_rate": 9.850746268656717e-06,
"loss": 2.2004,
"step": 5
},
{
"epoch": 0.022388059701492536,
"grad_norm": 0.49085694551467896,
"learning_rate": 9.813432835820897e-06,
"loss": 2.2043,
"step": 6
},
{
"epoch": 0.026119402985074626,
"grad_norm": 0.4777446389198303,
"learning_rate": 9.776119402985076e-06,
"loss": 2.3117,
"step": 7
},
{
"epoch": 0.029850746268656716,
"grad_norm": 0.4715961813926697,
"learning_rate": 9.738805970149255e-06,
"loss": 2.2778,
"step": 8
},
{
"epoch": 0.033582089552238806,
"grad_norm": 0.467326819896698,
"learning_rate": 9.701492537313434e-06,
"loss": 2.2245,
"step": 9
},
{
"epoch": 0.03731343283582089,
"grad_norm": 0.47078824043273926,
"learning_rate": 9.664179104477612e-06,
"loss": 2.1943,
"step": 10
},
{
"epoch": 0.041044776119402986,
"grad_norm": 0.44834914803504944,
"learning_rate": 9.626865671641792e-06,
"loss": 2.1906,
"step": 11
},
{
"epoch": 0.04477611940298507,
"grad_norm": 0.4226062297821045,
"learning_rate": 9.589552238805972e-06,
"loss": 2.1391,
"step": 12
},
{
"epoch": 0.048507462686567165,
"grad_norm": 0.4238628149032593,
"learning_rate": 9.552238805970149e-06,
"loss": 2.1398,
"step": 13
},
{
"epoch": 0.05223880597014925,
"grad_norm": 0.3930586576461792,
"learning_rate": 9.51492537313433e-06,
"loss": 2.1115,
"step": 14
},
{
"epoch": 0.055970149253731345,
"grad_norm": 0.37835636734962463,
"learning_rate": 9.477611940298507e-06,
"loss": 2.045,
"step": 15
},
{
"epoch": 0.05970149253731343,
"grad_norm": 0.38422954082489014,
"learning_rate": 9.440298507462688e-06,
"loss": 2.0591,
"step": 16
},
{
"epoch": 0.06343283582089553,
"grad_norm": 0.358707994222641,
"learning_rate": 9.402985074626867e-06,
"loss": 1.9614,
"step": 17
},
{
"epoch": 0.06716417910447761,
"grad_norm": 0.3842602074146271,
"learning_rate": 9.365671641791045e-06,
"loss": 2.0232,
"step": 18
},
{
"epoch": 0.0708955223880597,
"grad_norm": 0.38415899872779846,
"learning_rate": 9.328358208955226e-06,
"loss": 2.0019,
"step": 19
},
{
"epoch": 0.07462686567164178,
"grad_norm": 0.3730379641056061,
"learning_rate": 9.291044776119403e-06,
"loss": 2.0331,
"step": 20
},
{
"epoch": 0.07835820895522388,
"grad_norm": 0.38554647564888,
"learning_rate": 9.253731343283582e-06,
"loss": 2.0003,
"step": 21
},
{
"epoch": 0.08208955223880597,
"grad_norm": 0.3622361719608307,
"learning_rate": 9.216417910447763e-06,
"loss": 1.9381,
"step": 22
},
{
"epoch": 0.08582089552238806,
"grad_norm": 0.3498849868774414,
"learning_rate": 9.17910447761194e-06,
"loss": 1.9068,
"step": 23
},
{
"epoch": 0.08955223880597014,
"grad_norm": 0.3396073877811432,
"learning_rate": 9.14179104477612e-06,
"loss": 1.961,
"step": 24
},
{
"epoch": 0.09328358208955224,
"grad_norm": 0.31098049879074097,
"learning_rate": 9.104477611940299e-06,
"loss": 1.7587,
"step": 25
},
{
"epoch": 0.09701492537313433,
"grad_norm": 0.3291577696800232,
"learning_rate": 9.067164179104478e-06,
"loss": 1.9357,
"step": 26
},
{
"epoch": 0.10074626865671642,
"grad_norm": 0.3065924346446991,
"learning_rate": 9.029850746268657e-06,
"loss": 1.9325,
"step": 27
},
{
"epoch": 0.1044776119402985,
"grad_norm": 0.30660203099250793,
"learning_rate": 8.992537313432836e-06,
"loss": 1.9206,
"step": 28
},
{
"epoch": 0.10820895522388059,
"grad_norm": 0.2934313118457794,
"learning_rate": 8.955223880597016e-06,
"loss": 1.835,
"step": 29
},
{
"epoch": 0.11194029850746269,
"grad_norm": 0.2784007787704468,
"learning_rate": 8.917910447761195e-06,
"loss": 1.8075,
"step": 30
},
{
"epoch": 0.11567164179104478,
"grad_norm": 0.28618645668029785,
"learning_rate": 8.880597014925374e-06,
"loss": 1.8403,
"step": 31
},
{
"epoch": 0.11940298507462686,
"grad_norm": 0.28768670558929443,
"learning_rate": 8.843283582089553e-06,
"loss": 1.8655,
"step": 32
},
{
"epoch": 0.12313432835820895,
"grad_norm": 0.27391666173934937,
"learning_rate": 8.805970149253732e-06,
"loss": 1.7614,
"step": 33
},
{
"epoch": 0.12686567164179105,
"grad_norm": 0.32307982444763184,
"learning_rate": 8.768656716417911e-06,
"loss": 1.7933,
"step": 34
},
{
"epoch": 0.13059701492537312,
"grad_norm": 0.2709297239780426,
"learning_rate": 8.73134328358209e-06,
"loss": 1.7644,
"step": 35
},
{
"epoch": 0.13432835820895522,
"grad_norm": 0.2776687741279602,
"learning_rate": 8.69402985074627e-06,
"loss": 1.7511,
"step": 36
},
{
"epoch": 0.13805970149253732,
"grad_norm": 0.2562324106693268,
"learning_rate": 8.656716417910447e-06,
"loss": 1.683,
"step": 37
},
{
"epoch": 0.1417910447761194,
"grad_norm": 0.26197320222854614,
"learning_rate": 8.619402985074628e-06,
"loss": 1.7223,
"step": 38
},
{
"epoch": 0.1455223880597015,
"grad_norm": 0.258398175239563,
"learning_rate": 8.582089552238807e-06,
"loss": 1.7488,
"step": 39
},
{
"epoch": 0.14925373134328357,
"grad_norm": 0.2696782052516937,
"learning_rate": 8.544776119402986e-06,
"loss": 1.7865,
"step": 40
},
{
"epoch": 0.15298507462686567,
"grad_norm": 0.2557496726512909,
"learning_rate": 8.507462686567165e-06,
"loss": 1.7307,
"step": 41
},
{
"epoch": 0.15671641791044777,
"grad_norm": 0.23585352301597595,
"learning_rate": 8.470149253731343e-06,
"loss": 1.6636,
"step": 42
},
{
"epoch": 0.16044776119402984,
"grad_norm": 0.2424355298280716,
"learning_rate": 8.432835820895524e-06,
"loss": 1.6547,
"step": 43
},
{
"epoch": 0.16417910447761194,
"grad_norm": 0.23426097631454468,
"learning_rate": 8.395522388059703e-06,
"loss": 1.6661,
"step": 44
},
{
"epoch": 0.16791044776119404,
"grad_norm": 0.2633323669433594,
"learning_rate": 8.35820895522388e-06,
"loss": 1.7363,
"step": 45
},
{
"epoch": 0.17164179104477612,
"grad_norm": 0.24745310842990875,
"learning_rate": 8.320895522388061e-06,
"loss": 1.784,
"step": 46
},
{
"epoch": 0.17537313432835822,
"grad_norm": 0.2138734757900238,
"learning_rate": 8.283582089552239e-06,
"loss": 1.5783,
"step": 47
},
{
"epoch": 0.1791044776119403,
"grad_norm": 0.21660542488098145,
"learning_rate": 8.246268656716418e-06,
"loss": 1.5874,
"step": 48
},
{
"epoch": 0.1828358208955224,
"grad_norm": 0.21672111749649048,
"learning_rate": 8.208955223880599e-06,
"loss": 1.5766,
"step": 49
},
{
"epoch": 0.1865671641791045,
"grad_norm": 0.21459434926509857,
"learning_rate": 8.171641791044776e-06,
"loss": 1.6418,
"step": 50
},
{
"epoch": 0.19029850746268656,
"grad_norm": 0.21707722544670105,
"learning_rate": 8.134328358208955e-06,
"loss": 1.6423,
"step": 51
},
{
"epoch": 0.19402985074626866,
"grad_norm": 0.20363767445087433,
"learning_rate": 8.097014925373135e-06,
"loss": 1.5975,
"step": 52
},
{
"epoch": 0.19776119402985073,
"grad_norm": 0.21339042484760284,
"learning_rate": 8.059701492537314e-06,
"loss": 1.6352,
"step": 53
},
{
"epoch": 0.20149253731343283,
"grad_norm": 0.20369480550289154,
"learning_rate": 8.022388059701493e-06,
"loss": 1.5614,
"step": 54
},
{
"epoch": 0.20522388059701493,
"grad_norm": 0.2043503224849701,
"learning_rate": 7.985074626865672e-06,
"loss": 1.5343,
"step": 55
},
{
"epoch": 0.208955223880597,
"grad_norm": 0.2015250027179718,
"learning_rate": 7.947761194029851e-06,
"loss": 1.5762,
"step": 56
},
{
"epoch": 0.2126865671641791,
"grad_norm": 0.19287531077861786,
"learning_rate": 7.91044776119403e-06,
"loss": 1.5304,
"step": 57
},
{
"epoch": 0.21641791044776118,
"grad_norm": 0.19933679699897766,
"learning_rate": 7.87313432835821e-06,
"loss": 1.5933,
"step": 58
},
{
"epoch": 0.22014925373134328,
"grad_norm": 0.19223818182945251,
"learning_rate": 7.835820895522389e-06,
"loss": 1.5608,
"step": 59
},
{
"epoch": 0.22388059701492538,
"grad_norm": 0.1928456425666809,
"learning_rate": 7.798507462686568e-06,
"loss": 1.5709,
"step": 60
},
{
"epoch": 0.22761194029850745,
"grad_norm": 0.18153917789459229,
"learning_rate": 7.761194029850747e-06,
"loss": 1.5608,
"step": 61
},
{
"epoch": 0.23134328358208955,
"grad_norm": 0.18808996677398682,
"learning_rate": 7.723880597014926e-06,
"loss": 1.5689,
"step": 62
},
{
"epoch": 0.23507462686567165,
"grad_norm": 0.18305914103984833,
"learning_rate": 7.686567164179105e-06,
"loss": 1.5336,
"step": 63
},
{
"epoch": 0.23880597014925373,
"grad_norm": 0.18622231483459473,
"learning_rate": 7.649253731343284e-06,
"loss": 1.5454,
"step": 64
},
{
"epoch": 0.24253731343283583,
"grad_norm": 0.172907754778862,
"learning_rate": 7.611940298507463e-06,
"loss": 1.4781,
"step": 65
},
{
"epoch": 0.2462686567164179,
"grad_norm": 0.1706383377313614,
"learning_rate": 7.574626865671643e-06,
"loss": 1.5274,
"step": 66
},
{
"epoch": 0.25,
"grad_norm": 0.17053718864917755,
"learning_rate": 7.537313432835821e-06,
"loss": 1.4834,
"step": 67
},
{
"epoch": 0.2537313432835821,
"grad_norm": 0.17375120520591736,
"learning_rate": 7.500000000000001e-06,
"loss": 1.4867,
"step": 68
},
{
"epoch": 0.2574626865671642,
"grad_norm": 0.16315129399299622,
"learning_rate": 7.46268656716418e-06,
"loss": 1.4537,
"step": 69
},
{
"epoch": 0.26119402985074625,
"grad_norm": 0.16986814141273499,
"learning_rate": 7.4253731343283585e-06,
"loss": 1.4339,
"step": 70
},
{
"epoch": 0.26492537313432835,
"grad_norm": 0.16788437962532043,
"learning_rate": 7.3880597014925385e-06,
"loss": 1.4692,
"step": 71
},
{
"epoch": 0.26865671641791045,
"grad_norm": 0.17011231184005737,
"learning_rate": 7.350746268656717e-06,
"loss": 1.5028,
"step": 72
},
{
"epoch": 0.27238805970149255,
"grad_norm": 0.16856002807617188,
"learning_rate": 7.313432835820896e-06,
"loss": 1.4378,
"step": 73
},
{
"epoch": 0.27611940298507465,
"grad_norm": 0.16408328711986542,
"learning_rate": 7.276119402985076e-06,
"loss": 1.4674,
"step": 74
},
{
"epoch": 0.2798507462686567,
"grad_norm": 0.16012725234031677,
"learning_rate": 7.238805970149254e-06,
"loss": 1.466,
"step": 75
},
{
"epoch": 0.2835820895522388,
"grad_norm": 0.17093954980373383,
"learning_rate": 7.2014925373134335e-06,
"loss": 1.4371,
"step": 76
},
{
"epoch": 0.2873134328358209,
"grad_norm": 0.16494448482990265,
"learning_rate": 7.164179104477612e-06,
"loss": 1.4578,
"step": 77
},
{
"epoch": 0.291044776119403,
"grad_norm": 0.162708580493927,
"learning_rate": 7.126865671641792e-06,
"loss": 1.4447,
"step": 78
},
{
"epoch": 0.2947761194029851,
"grad_norm": 0.163913294672966,
"learning_rate": 7.089552238805971e-06,
"loss": 1.4515,
"step": 79
},
{
"epoch": 0.29850746268656714,
"grad_norm": 0.16894681751728058,
"learning_rate": 7.052238805970149e-06,
"loss": 1.4703,
"step": 80
},
{
"epoch": 0.30223880597014924,
"grad_norm": 0.1752011477947235,
"learning_rate": 7.014925373134329e-06,
"loss": 1.4753,
"step": 81
},
{
"epoch": 0.30597014925373134,
"grad_norm": 0.16714270412921906,
"learning_rate": 6.9776119402985076e-06,
"loss": 1.4546,
"step": 82
},
{
"epoch": 0.30970149253731344,
"grad_norm": 0.1680886149406433,
"learning_rate": 6.9402985074626876e-06,
"loss": 1.4564,
"step": 83
},
{
"epoch": 0.31343283582089554,
"grad_norm": 0.1619383841753006,
"learning_rate": 6.902985074626867e-06,
"loss": 1.4312,
"step": 84
},
{
"epoch": 0.31716417910447764,
"grad_norm": 0.16721871495246887,
"learning_rate": 6.865671641791045e-06,
"loss": 1.4359,
"step": 85
},
{
"epoch": 0.3208955223880597,
"grad_norm": 0.16730067133903503,
"learning_rate": 6.828358208955225e-06,
"loss": 1.3981,
"step": 86
},
{
"epoch": 0.3246268656716418,
"grad_norm": 0.16024844348430634,
"learning_rate": 6.791044776119403e-06,
"loss": 1.3732,
"step": 87
},
{
"epoch": 0.3283582089552239,
"grad_norm": 0.17665205895900726,
"learning_rate": 6.7537313432835825e-06,
"loss": 1.4651,
"step": 88
},
{
"epoch": 0.332089552238806,
"grad_norm": 0.16358672082424164,
"learning_rate": 6.7164179104477625e-06,
"loss": 1.4006,
"step": 89
},
{
"epoch": 0.3358208955223881,
"grad_norm": 0.16499486565589905,
"learning_rate": 6.679104477611941e-06,
"loss": 1.4177,
"step": 90
},
{
"epoch": 0.33955223880597013,
"grad_norm": 0.160285085439682,
"learning_rate": 6.64179104477612e-06,
"loss": 1.3988,
"step": 91
},
{
"epoch": 0.34328358208955223,
"grad_norm": 0.1694183498620987,
"learning_rate": 6.604477611940298e-06,
"loss": 1.4399,
"step": 92
},
{
"epoch": 0.34701492537313433,
"grad_norm": 0.1744842231273651,
"learning_rate": 6.567164179104478e-06,
"loss": 1.4744,
"step": 93
},
{
"epoch": 0.35074626865671643,
"grad_norm": 0.17741592228412628,
"learning_rate": 6.5298507462686575e-06,
"loss": 1.3623,
"step": 94
},
{
"epoch": 0.35447761194029853,
"grad_norm": 0.1733269989490509,
"learning_rate": 6.492537313432837e-06,
"loss": 1.3362,
"step": 95
},
{
"epoch": 0.3582089552238806,
"grad_norm": 0.16193453967571259,
"learning_rate": 6.455223880597016e-06,
"loss": 1.3674,
"step": 96
},
{
"epoch": 0.3619402985074627,
"grad_norm": 0.17822274565696716,
"learning_rate": 6.417910447761194e-06,
"loss": 1.3367,
"step": 97
},
{
"epoch": 0.3656716417910448,
"grad_norm": 0.1587613821029663,
"learning_rate": 6.380597014925374e-06,
"loss": 1.4044,
"step": 98
},
{
"epoch": 0.3694029850746269,
"grad_norm": 0.17838972806930542,
"learning_rate": 6.343283582089553e-06,
"loss": 1.3835,
"step": 99
},
{
"epoch": 0.373134328358209,
"grad_norm": 0.18384261429309845,
"learning_rate": 6.3059701492537316e-06,
"loss": 1.3907,
"step": 100
},
{
"epoch": 0.376865671641791,
"grad_norm": 0.20531602203845978,
"learning_rate": 6.2686567164179116e-06,
"loss": 1.404,
"step": 101
},
{
"epoch": 0.3805970149253731,
"grad_norm": 0.18832607567310333,
"learning_rate": 6.23134328358209e-06,
"loss": 1.3885,
"step": 102
},
{
"epoch": 0.3843283582089552,
"grad_norm": 0.18085862696170807,
"learning_rate": 6.194029850746269e-06,
"loss": 1.4299,
"step": 103
},
{
"epoch": 0.3880597014925373,
"grad_norm": 0.1742199808359146,
"learning_rate": 6.156716417910447e-06,
"loss": 1.3432,
"step": 104
},
{
"epoch": 0.3917910447761194,
"grad_norm": 0.19112876057624817,
"learning_rate": 6.119402985074627e-06,
"loss": 1.3922,
"step": 105
},
{
"epoch": 0.39552238805970147,
"grad_norm": 0.17427971959114075,
"learning_rate": 6.0820895522388065e-06,
"loss": 1.3281,
"step": 106
},
{
"epoch": 0.39925373134328357,
"grad_norm": 0.17320787906646729,
"learning_rate": 6.044776119402986e-06,
"loss": 1.3558,
"step": 107
},
{
"epoch": 0.40298507462686567,
"grad_norm": 0.1882028579711914,
"learning_rate": 6.007462686567165e-06,
"loss": 1.4128,
"step": 108
},
{
"epoch": 0.40671641791044777,
"grad_norm": 0.18419794738292694,
"learning_rate": 5.970149253731343e-06,
"loss": 1.355,
"step": 109
},
{
"epoch": 0.41044776119402987,
"grad_norm": 0.18215858936309814,
"learning_rate": 5.932835820895523e-06,
"loss": 1.3285,
"step": 110
},
{
"epoch": 0.4141791044776119,
"grad_norm": 0.17232251167297363,
"learning_rate": 5.895522388059702e-06,
"loss": 1.3069,
"step": 111
},
{
"epoch": 0.417910447761194,
"grad_norm": 0.186564639210701,
"learning_rate": 5.858208955223881e-06,
"loss": 1.3672,
"step": 112
},
{
"epoch": 0.4216417910447761,
"grad_norm": 0.17342355847358704,
"learning_rate": 5.820895522388061e-06,
"loss": 1.3616,
"step": 113
},
{
"epoch": 0.4253731343283582,
"grad_norm": 0.18788813054561615,
"learning_rate": 5.783582089552239e-06,
"loss": 1.2878,
"step": 114
},
{
"epoch": 0.4291044776119403,
"grad_norm": 0.17116081714630127,
"learning_rate": 5.746268656716418e-06,
"loss": 1.3494,
"step": 115
},
{
"epoch": 0.43283582089552236,
"grad_norm": 0.1697102189064026,
"learning_rate": 5.708955223880598e-06,
"loss": 1.3445,
"step": 116
},
{
"epoch": 0.43656716417910446,
"grad_norm": 0.1673506200313568,
"learning_rate": 5.671641791044776e-06,
"loss": 1.3167,
"step": 117
},
{
"epoch": 0.44029850746268656,
"grad_norm": 0.1808689534664154,
"learning_rate": 5.6343283582089556e-06,
"loss": 1.3949,
"step": 118
},
{
"epoch": 0.44402985074626866,
"grad_norm": 0.1669001281261444,
"learning_rate": 5.597014925373134e-06,
"loss": 1.3326,
"step": 119
},
{
"epoch": 0.44776119402985076,
"grad_norm": 0.174747034907341,
"learning_rate": 5.559701492537314e-06,
"loss": 1.3475,
"step": 120
},
{
"epoch": 0.45149253731343286,
"grad_norm": 0.1588924378156662,
"learning_rate": 5.522388059701493e-06,
"loss": 1.2924,
"step": 121
},
{
"epoch": 0.4552238805970149,
"grad_norm": 0.17862936854362488,
"learning_rate": 5.485074626865672e-06,
"loss": 1.3673,
"step": 122
},
{
"epoch": 0.458955223880597,
"grad_norm": 0.18513405323028564,
"learning_rate": 5.447761194029851e-06,
"loss": 1.4756,
"step": 123
},
{
"epoch": 0.4626865671641791,
"grad_norm": 0.17351286113262177,
"learning_rate": 5.41044776119403e-06,
"loss": 1.3642,
"step": 124
},
{
"epoch": 0.4664179104477612,
"grad_norm": 0.17803268134593964,
"learning_rate": 5.37313432835821e-06,
"loss": 1.3211,
"step": 125
},
{
"epoch": 0.4701492537313433,
"grad_norm": 0.16946041584014893,
"learning_rate": 5.335820895522389e-06,
"loss": 1.3674,
"step": 126
},
{
"epoch": 0.47388059701492535,
"grad_norm": 0.1722063273191452,
"learning_rate": 5.298507462686567e-06,
"loss": 1.3314,
"step": 127
},
{
"epoch": 0.47761194029850745,
"grad_norm": 0.16209138929843903,
"learning_rate": 5.261194029850747e-06,
"loss": 1.3228,
"step": 128
},
{
"epoch": 0.48134328358208955,
"grad_norm": 0.16578933596611023,
"learning_rate": 5.2238805970149255e-06,
"loss": 1.3179,
"step": 129
},
{
"epoch": 0.48507462686567165,
"grad_norm": 0.1670766919851303,
"learning_rate": 5.186567164179105e-06,
"loss": 1.2971,
"step": 130
},
{
"epoch": 0.48880597014925375,
"grad_norm": 0.1636020392179489,
"learning_rate": 5.149253731343285e-06,
"loss": 1.2591,
"step": 131
},
{
"epoch": 0.4925373134328358,
"grad_norm": 0.16769669950008392,
"learning_rate": 5.111940298507463e-06,
"loss": 1.3518,
"step": 132
},
{
"epoch": 0.4962686567164179,
"grad_norm": 0.1651279628276825,
"learning_rate": 5.074626865671642e-06,
"loss": 1.2989,
"step": 133
},
{
"epoch": 0.5,
"grad_norm": 0.1701837182044983,
"learning_rate": 5.037313432835821e-06,
"loss": 1.3367,
"step": 134
},
{
"epoch": 0.503731343283582,
"grad_norm": 0.1681615263223648,
"learning_rate": 5e-06,
"loss": 1.297,
"step": 135
},
{
"epoch": 0.5074626865671642,
"grad_norm": 0.16559536755084991,
"learning_rate": 4.9626865671641796e-06,
"loss": 1.2732,
"step": 136
},
{
"epoch": 0.5111940298507462,
"grad_norm": 0.17631755769252777,
"learning_rate": 4.925373134328359e-06,
"loss": 1.2188,
"step": 137
},
{
"epoch": 0.5149253731343284,
"grad_norm": 0.16785310208797455,
"learning_rate": 4.888059701492538e-06,
"loss": 1.3208,
"step": 138
},
{
"epoch": 0.5186567164179104,
"grad_norm": 0.16594962775707245,
"learning_rate": 4.850746268656717e-06,
"loss": 1.285,
"step": 139
},
{
"epoch": 0.5223880597014925,
"grad_norm": 0.1660860776901245,
"learning_rate": 4.813432835820896e-06,
"loss": 1.2947,
"step": 140
},
{
"epoch": 0.5261194029850746,
"grad_norm": 0.17879198491573334,
"learning_rate": 4.7761194029850745e-06,
"loss": 1.3209,
"step": 141
},
{
"epoch": 0.5298507462686567,
"grad_norm": 0.19052694737911224,
"learning_rate": 4.738805970149254e-06,
"loss": 1.3568,
"step": 142
},
{
"epoch": 0.5335820895522388,
"grad_norm": 0.1747015118598938,
"learning_rate": 4.701492537313434e-06,
"loss": 1.3173,
"step": 143
},
{
"epoch": 0.5373134328358209,
"grad_norm": 0.16210874915122986,
"learning_rate": 4.664179104477613e-06,
"loss": 1.3077,
"step": 144
},
{
"epoch": 0.5410447761194029,
"grad_norm": 0.167409747838974,
"learning_rate": 4.626865671641791e-06,
"loss": 1.2657,
"step": 145
},
{
"epoch": 0.5447761194029851,
"grad_norm": 0.16306526958942413,
"learning_rate": 4.58955223880597e-06,
"loss": 1.2756,
"step": 146
},
{
"epoch": 0.5485074626865671,
"grad_norm": 0.1674441546201706,
"learning_rate": 4.5522388059701495e-06,
"loss": 1.2797,
"step": 147
},
{
"epoch": 0.5522388059701493,
"grad_norm": 0.1676347702741623,
"learning_rate": 4.514925373134329e-06,
"loss": 1.3139,
"step": 148
},
{
"epoch": 0.5559701492537313,
"grad_norm": 0.1638174206018448,
"learning_rate": 4.477611940298508e-06,
"loss": 1.281,
"step": 149
},
{
"epoch": 0.5597014925373134,
"grad_norm": 0.1755269318819046,
"learning_rate": 4.440298507462687e-06,
"loss": 1.2501,
"step": 150
},
{
"epoch": 0.5634328358208955,
"grad_norm": 0.18092264235019684,
"learning_rate": 4.402985074626866e-06,
"loss": 1.3676,
"step": 151
},
{
"epoch": 0.5671641791044776,
"grad_norm": 0.16732056438922882,
"learning_rate": 4.365671641791045e-06,
"loss": 1.2925,
"step": 152
},
{
"epoch": 0.5708955223880597,
"grad_norm": 0.1717032492160797,
"learning_rate": 4.3283582089552236e-06,
"loss": 1.2745,
"step": 153
},
{
"epoch": 0.5746268656716418,
"grad_norm": 0.17625246942043304,
"learning_rate": 4.2910447761194036e-06,
"loss": 1.3156,
"step": 154
},
{
"epoch": 0.5783582089552238,
"grad_norm": 0.16663196682929993,
"learning_rate": 4.253731343283583e-06,
"loss": 1.2854,
"step": 155
},
{
"epoch": 0.582089552238806,
"grad_norm": 0.15970146656036377,
"learning_rate": 4.216417910447762e-06,
"loss": 1.2561,
"step": 156
},
{
"epoch": 0.585820895522388,
"grad_norm": 0.17199867963790894,
"learning_rate": 4.17910447761194e-06,
"loss": 1.3107,
"step": 157
},
{
"epoch": 0.5895522388059702,
"grad_norm": 0.16908001899719238,
"learning_rate": 4.141791044776119e-06,
"loss": 1.3563,
"step": 158
},
{
"epoch": 0.5932835820895522,
"grad_norm": 0.17845936119556427,
"learning_rate": 4.104477611940299e-06,
"loss": 1.3361,
"step": 159
},
{
"epoch": 0.5970149253731343,
"grad_norm": 0.17035247385501862,
"learning_rate": 4.067164179104478e-06,
"loss": 1.2532,
"step": 160
},
{
"epoch": 0.6007462686567164,
"grad_norm": 0.1767151802778244,
"learning_rate": 4.029850746268657e-06,
"loss": 1.2144,
"step": 161
},
{
"epoch": 0.6044776119402985,
"grad_norm": 0.16750559210777283,
"learning_rate": 3.992537313432836e-06,
"loss": 1.3286,
"step": 162
},
{
"epoch": 0.6082089552238806,
"grad_norm": 0.16319824755191803,
"learning_rate": 3.955223880597015e-06,
"loss": 1.2833,
"step": 163
},
{
"epoch": 0.6119402985074627,
"grad_norm": 0.16734889149665833,
"learning_rate": 3.917910447761194e-06,
"loss": 1.2857,
"step": 164
},
{
"epoch": 0.6156716417910447,
"grad_norm": 0.15696798264980316,
"learning_rate": 3.8805970149253735e-06,
"loss": 1.2422,
"step": 165
},
{
"epoch": 0.6194029850746269,
"grad_norm": 0.16202400624752045,
"learning_rate": 3.843283582089553e-06,
"loss": 1.275,
"step": 166
},
{
"epoch": 0.6231343283582089,
"grad_norm": 0.17492756247520447,
"learning_rate": 3.8059701492537314e-06,
"loss": 1.3543,
"step": 167
},
{
"epoch": 0.6268656716417911,
"grad_norm": 0.16580115258693695,
"learning_rate": 3.7686567164179105e-06,
"loss": 1.22,
"step": 168
},
{
"epoch": 0.6305970149253731,
"grad_norm": 0.16033506393432617,
"learning_rate": 3.73134328358209e-06,
"loss": 1.2648,
"step": 169
},
{
"epoch": 0.6343283582089553,
"grad_norm": 0.17301562428474426,
"learning_rate": 3.6940298507462693e-06,
"loss": 1.2431,
"step": 170
},
{
"epoch": 0.6380597014925373,
"grad_norm": 0.16413499414920807,
"learning_rate": 3.656716417910448e-06,
"loss": 1.2206,
"step": 171
},
{
"epoch": 0.6417910447761194,
"grad_norm": 0.17369578778743744,
"learning_rate": 3.619402985074627e-06,
"loss": 1.3589,
"step": 172
},
{
"epoch": 0.6455223880597015,
"grad_norm": 0.16966509819030762,
"learning_rate": 3.582089552238806e-06,
"loss": 1.3456,
"step": 173
},
{
"epoch": 0.6492537313432836,
"grad_norm": 0.17732375860214233,
"learning_rate": 3.5447761194029855e-06,
"loss": 1.2571,
"step": 174
},
{
"epoch": 0.6529850746268657,
"grad_norm": 0.17640697956085205,
"learning_rate": 3.5074626865671646e-06,
"loss": 1.3174,
"step": 175
},
{
"epoch": 0.6567164179104478,
"grad_norm": 0.16611693799495697,
"learning_rate": 3.4701492537313438e-06,
"loss": 1.2547,
"step": 176
},
{
"epoch": 0.6604477611940298,
"grad_norm": 0.1549360752105713,
"learning_rate": 3.4328358208955225e-06,
"loss": 1.2581,
"step": 177
},
{
"epoch": 0.664179104477612,
"grad_norm": 0.16762341558933258,
"learning_rate": 3.3955223880597017e-06,
"loss": 1.2983,
"step": 178
},
{
"epoch": 0.667910447761194,
"grad_norm": 0.16653385758399963,
"learning_rate": 3.3582089552238813e-06,
"loss": 1.2939,
"step": 179
},
{
"epoch": 0.6716417910447762,
"grad_norm": 0.17225240170955658,
"learning_rate": 3.32089552238806e-06,
"loss": 1.3273,
"step": 180
},
{
"epoch": 0.6753731343283582,
"grad_norm": 0.1645621955394745,
"learning_rate": 3.283582089552239e-06,
"loss": 1.2502,
"step": 181
},
{
"epoch": 0.6791044776119403,
"grad_norm": 0.17103064060211182,
"learning_rate": 3.2462686567164183e-06,
"loss": 1.2291,
"step": 182
},
{
"epoch": 0.6828358208955224,
"grad_norm": 0.16183358430862427,
"learning_rate": 3.208955223880597e-06,
"loss": 1.2709,
"step": 183
},
{
"epoch": 0.6865671641791045,
"grad_norm": 0.17078474164009094,
"learning_rate": 3.1716417910447766e-06,
"loss": 1.2756,
"step": 184
},
{
"epoch": 0.6902985074626866,
"grad_norm": 0.16668911278247833,
"learning_rate": 3.1343283582089558e-06,
"loss": 1.223,
"step": 185
},
{
"epoch": 0.6940298507462687,
"grad_norm": 0.16830188035964966,
"learning_rate": 3.0970149253731345e-06,
"loss": 1.2658,
"step": 186
},
{
"epoch": 0.6977611940298507,
"grad_norm": 0.15971961617469788,
"learning_rate": 3.0597014925373137e-06,
"loss": 1.2552,
"step": 187
},
{
"epoch": 0.7014925373134329,
"grad_norm": 0.16681736707687378,
"learning_rate": 3.022388059701493e-06,
"loss": 1.2706,
"step": 188
},
{
"epoch": 0.7052238805970149,
"grad_norm": 0.16385811567306519,
"learning_rate": 2.9850746268656716e-06,
"loss": 1.2358,
"step": 189
},
{
"epoch": 0.7089552238805971,
"grad_norm": 0.15831749141216278,
"learning_rate": 2.947761194029851e-06,
"loss": 1.2175,
"step": 190
},
{
"epoch": 0.7126865671641791,
"grad_norm": 0.18402300775051117,
"learning_rate": 2.9104477611940303e-06,
"loss": 1.2108,
"step": 191
},
{
"epoch": 0.7164179104477612,
"grad_norm": 0.1604745239019394,
"learning_rate": 2.873134328358209e-06,
"loss": 1.2189,
"step": 192
},
{
"epoch": 0.7201492537313433,
"grad_norm": 0.16935843229293823,
"learning_rate": 2.835820895522388e-06,
"loss": 1.305,
"step": 193
},
{
"epoch": 0.7238805970149254,
"grad_norm": 0.16648651659488678,
"learning_rate": 2.798507462686567e-06,
"loss": 1.3143,
"step": 194
},
{
"epoch": 0.7276119402985075,
"grad_norm": 0.18641819059848785,
"learning_rate": 2.7611940298507465e-06,
"loss": 1.2998,
"step": 195
},
{
"epoch": 0.7313432835820896,
"grad_norm": 0.17491459846496582,
"learning_rate": 2.7238805970149257e-06,
"loss": 1.2717,
"step": 196
},
{
"epoch": 0.7350746268656716,
"grad_norm": 0.1580687165260315,
"learning_rate": 2.686567164179105e-06,
"loss": 1.2358,
"step": 197
},
{
"epoch": 0.7388059701492538,
"grad_norm": 0.1613190770149231,
"learning_rate": 2.6492537313432836e-06,
"loss": 1.2249,
"step": 198
},
{
"epoch": 0.7425373134328358,
"grad_norm": 0.16894753277301788,
"learning_rate": 2.6119402985074627e-06,
"loss": 1.2835,
"step": 199
},
{
"epoch": 0.746268656716418,
"grad_norm": 0.16808702051639557,
"learning_rate": 2.5746268656716423e-06,
"loss": 1.2383,
"step": 200
},
{
"epoch": 0.75,
"grad_norm": 0.16712962090969086,
"learning_rate": 2.537313432835821e-06,
"loss": 1.2033,
"step": 201
},
{
"epoch": 0.753731343283582,
"grad_norm": 0.16377860307693481,
"learning_rate": 2.5e-06,
"loss": 1.2512,
"step": 202
},
{
"epoch": 0.7574626865671642,
"grad_norm": 0.16047263145446777,
"learning_rate": 2.4626865671641794e-06,
"loss": 1.2238,
"step": 203
},
{
"epoch": 0.7611940298507462,
"grad_norm": 0.16383422911167145,
"learning_rate": 2.4253731343283585e-06,
"loss": 1.232,
"step": 204
},
{
"epoch": 0.7649253731343284,
"grad_norm": 0.1609911322593689,
"learning_rate": 2.3880597014925373e-06,
"loss": 1.2341,
"step": 205
},
{
"epoch": 0.7686567164179104,
"grad_norm": 0.16194207966327667,
"learning_rate": 2.350746268656717e-06,
"loss": 1.2495,
"step": 206
},
{
"epoch": 0.7723880597014925,
"grad_norm": 0.16773447394371033,
"learning_rate": 2.3134328358208956e-06,
"loss": 1.2272,
"step": 207
},
{
"epoch": 0.7761194029850746,
"grad_norm": 0.1642608940601349,
"learning_rate": 2.2761194029850747e-06,
"loss": 1.2116,
"step": 208
},
{
"epoch": 0.7798507462686567,
"grad_norm": 0.16738657653331757,
"learning_rate": 2.238805970149254e-06,
"loss": 1.2606,
"step": 209
},
{
"epoch": 0.7835820895522388,
"grad_norm": 0.17621806263923645,
"learning_rate": 2.201492537313433e-06,
"loss": 1.2387,
"step": 210
},
{
"epoch": 0.7873134328358209,
"grad_norm": 0.1773146092891693,
"learning_rate": 2.1641791044776118e-06,
"loss": 1.2842,
"step": 211
},
{
"epoch": 0.7910447761194029,
"grad_norm": 0.1544102132320404,
"learning_rate": 2.1268656716417914e-06,
"loss": 1.2359,
"step": 212
},
{
"epoch": 0.7947761194029851,
"grad_norm": 0.16131798923015594,
"learning_rate": 2.08955223880597e-06,
"loss": 1.1949,
"step": 213
},
{
"epoch": 0.7985074626865671,
"grad_norm": 0.17710529267787933,
"learning_rate": 2.0522388059701497e-06,
"loss": 1.2679,
"step": 214
},
{
"epoch": 0.8022388059701493,
"grad_norm": 0.19230812788009644,
"learning_rate": 2.0149253731343284e-06,
"loss": 1.3954,
"step": 215
},
{
"epoch": 0.8059701492537313,
"grad_norm": 0.16153179109096527,
"learning_rate": 1.9776119402985076e-06,
"loss": 1.2468,
"step": 216
},
{
"epoch": 0.8097014925373134,
"grad_norm": 0.15461866557598114,
"learning_rate": 1.9402985074626867e-06,
"loss": 1.2267,
"step": 217
},
{
"epoch": 0.8134328358208955,
"grad_norm": 0.1653779000043869,
"learning_rate": 1.9029850746268657e-06,
"loss": 1.2571,
"step": 218
},
{
"epoch": 0.8171641791044776,
"grad_norm": 0.17886200547218323,
"learning_rate": 1.865671641791045e-06,
"loss": 1.2085,
"step": 219
},
{
"epoch": 0.8208955223880597,
"grad_norm": 0.16441503167152405,
"learning_rate": 1.828358208955224e-06,
"loss": 1.2386,
"step": 220
},
{
"epoch": 0.8246268656716418,
"grad_norm": 0.164820596575737,
"learning_rate": 1.791044776119403e-06,
"loss": 1.2483,
"step": 221
},
{
"epoch": 0.8283582089552238,
"grad_norm": 0.17178238928318024,
"learning_rate": 1.7537313432835823e-06,
"loss": 1.248,
"step": 222
},
{
"epoch": 0.832089552238806,
"grad_norm": 0.16042672097682953,
"learning_rate": 1.7164179104477613e-06,
"loss": 1.2101,
"step": 223
},
{
"epoch": 0.835820895522388,
"grad_norm": 0.15831167995929718,
"learning_rate": 1.6791044776119406e-06,
"loss": 1.2597,
"step": 224
},
{
"epoch": 0.8395522388059702,
"grad_norm": 0.15747682750225067,
"learning_rate": 1.6417910447761196e-06,
"loss": 1.2392,
"step": 225
},
{
"epoch": 0.8432835820895522,
"grad_norm": 0.1638425588607788,
"learning_rate": 1.6044776119402985e-06,
"loss": 1.2099,
"step": 226
},
{
"epoch": 0.8470149253731343,
"grad_norm": 0.1755628138780594,
"learning_rate": 1.5671641791044779e-06,
"loss": 1.3201,
"step": 227
},
{
"epoch": 0.8507462686567164,
"grad_norm": 0.1612577736377716,
"learning_rate": 1.5298507462686568e-06,
"loss": 1.2085,
"step": 228
},
{
"epoch": 0.8544776119402985,
"grad_norm": 0.16105295717716217,
"learning_rate": 1.4925373134328358e-06,
"loss": 1.2584,
"step": 229
},
{
"epoch": 0.8582089552238806,
"grad_norm": 0.15586678683757782,
"learning_rate": 1.4552238805970152e-06,
"loss": 1.2495,
"step": 230
},
{
"epoch": 0.8619402985074627,
"grad_norm": 0.1646788865327835,
"learning_rate": 1.417910447761194e-06,
"loss": 1.2579,
"step": 231
},
{
"epoch": 0.8656716417910447,
"grad_norm": 0.16302216053009033,
"learning_rate": 1.3805970149253733e-06,
"loss": 1.233,
"step": 232
},
{
"epoch": 0.8694029850746269,
"grad_norm": 0.16793973743915558,
"learning_rate": 1.3432835820895524e-06,
"loss": 1.178,
"step": 233
},
{
"epoch": 0.8731343283582089,
"grad_norm": 0.1653694361448288,
"learning_rate": 1.3059701492537314e-06,
"loss": 1.2164,
"step": 234
},
{
"epoch": 0.8768656716417911,
"grad_norm": 0.16517435014247894,
"learning_rate": 1.2686567164179105e-06,
"loss": 1.2058,
"step": 235
},
{
"epoch": 0.8805970149253731,
"grad_norm": 0.1629129946231842,
"learning_rate": 1.2313432835820897e-06,
"loss": 1.241,
"step": 236
},
{
"epoch": 0.8843283582089553,
"grad_norm": 0.16396047174930573,
"learning_rate": 1.1940298507462686e-06,
"loss": 1.2372,
"step": 237
},
{
"epoch": 0.8880597014925373,
"grad_norm": 0.16376972198486328,
"learning_rate": 1.1567164179104478e-06,
"loss": 1.2117,
"step": 238
},
{
"epoch": 0.8917910447761194,
"grad_norm": 0.16420400142669678,
"learning_rate": 1.119402985074627e-06,
"loss": 1.2531,
"step": 239
},
{
"epoch": 0.8955223880597015,
"grad_norm": 0.16184940934181213,
"learning_rate": 1.0820895522388059e-06,
"loss": 1.2683,
"step": 240
},
{
"epoch": 0.8992537313432836,
"grad_norm": 0.16468416154384613,
"learning_rate": 1.044776119402985e-06,
"loss": 1.1979,
"step": 241
},
{
"epoch": 0.9029850746268657,
"grad_norm": 0.16101981699466705,
"learning_rate": 1.0074626865671642e-06,
"loss": 1.2172,
"step": 242
},
{
"epoch": 0.9067164179104478,
"grad_norm": 0.15788166224956512,
"learning_rate": 9.701492537313434e-07,
"loss": 1.2071,
"step": 243
},
{
"epoch": 0.9104477611940298,
"grad_norm": 0.16054342687129974,
"learning_rate": 9.328358208955225e-07,
"loss": 1.2194,
"step": 244
},
{
"epoch": 0.914179104477612,
"grad_norm": 0.16086827218532562,
"learning_rate": 8.955223880597015e-07,
"loss": 1.237,
"step": 245
},
{
"epoch": 0.917910447761194,
"grad_norm": 0.16154353320598602,
"learning_rate": 8.582089552238806e-07,
"loss": 1.259,
"step": 246
},
{
"epoch": 0.9216417910447762,
"grad_norm": 0.1560903638601303,
"learning_rate": 8.208955223880598e-07,
"loss": 1.2307,
"step": 247
},
{
"epoch": 0.9253731343283582,
"grad_norm": 0.1581345647573471,
"learning_rate": 7.835820895522389e-07,
"loss": 1.2411,
"step": 248
},
{
"epoch": 0.9291044776119403,
"grad_norm": 0.15719743072986603,
"learning_rate": 7.462686567164179e-07,
"loss": 1.2384,
"step": 249
},
{
"epoch": 0.9328358208955224,
"grad_norm": 0.18031233549118042,
"learning_rate": 7.08955223880597e-07,
"loss": 1.2843,
"step": 250
},
{
"epoch": 0.9365671641791045,
"grad_norm": 0.1669127345085144,
"learning_rate": 6.716417910447762e-07,
"loss": 1.226,
"step": 251
},
{
"epoch": 0.9402985074626866,
"grad_norm": 0.16808092594146729,
"learning_rate": 6.343283582089553e-07,
"loss": 1.2354,
"step": 252
},
{
"epoch": 0.9440298507462687,
"grad_norm": 0.15523065626621246,
"learning_rate": 5.970149253731343e-07,
"loss": 1.1945,
"step": 253
},
{
"epoch": 0.9477611940298507,
"grad_norm": 0.1601947396993637,
"learning_rate": 5.597014925373135e-07,
"loss": 1.2184,
"step": 254
},
{
"epoch": 0.9514925373134329,
"grad_norm": 0.16021329164505005,
"learning_rate": 5.223880597014925e-07,
"loss": 1.2263,
"step": 255
},
{
"epoch": 0.9552238805970149,
"grad_norm": 0.1538553386926651,
"learning_rate": 4.850746268656717e-07,
"loss": 1.2348,
"step": 256
},
{
"epoch": 0.9589552238805971,
"grad_norm": 0.19161516427993774,
"learning_rate": 4.4776119402985074e-07,
"loss": 1.3115,
"step": 257
},
{
"epoch": 0.9626865671641791,
"grad_norm": 0.1565106213092804,
"learning_rate": 4.104477611940299e-07,
"loss": 1.2344,
"step": 258
},
{
"epoch": 0.9664179104477612,
"grad_norm": 0.1629861295223236,
"learning_rate": 3.7313432835820895e-07,
"loss": 1.1741,
"step": 259
},
{
"epoch": 0.9701492537313433,
"grad_norm": 0.16237621009349823,
"learning_rate": 3.358208955223881e-07,
"loss": 1.2815,
"step": 260
},
{
"epoch": 0.9738805970149254,
"grad_norm": 0.16195756196975708,
"learning_rate": 2.9850746268656716e-07,
"loss": 1.2463,
"step": 261
},
{
"epoch": 0.9776119402985075,
"grad_norm": 0.1687113344669342,
"learning_rate": 2.6119402985074626e-07,
"loss": 1.2038,
"step": 262
},
{
"epoch": 0.9813432835820896,
"grad_norm": 0.16680650413036346,
"learning_rate": 2.2388059701492537e-07,
"loss": 1.2521,
"step": 263
},
{
"epoch": 0.9850746268656716,
"grad_norm": 0.1574639528989792,
"learning_rate": 1.8656716417910447e-07,
"loss": 1.259,
"step": 264
},
{
"epoch": 0.9888059701492538,
"grad_norm": 0.1681501567363739,
"learning_rate": 1.4925373134328358e-07,
"loss": 1.2749,
"step": 265
},
{
"epoch": 0.9925373134328358,
"grad_norm": 0.16339531540870667,
"learning_rate": 1.1194029850746268e-07,
"loss": 1.2536,
"step": 266
},
{
"epoch": 0.996268656716418,
"grad_norm": 0.1551850438117981,
"learning_rate": 7.462686567164179e-08,
"loss": 1.2501,
"step": 267
},
{
"epoch": 1.0,
"grad_norm": 0.1640440821647644,
"learning_rate": 3.7313432835820895e-08,
"loss": 1.2332,
"step": 268
},
{
"epoch": 1.0,
"eval_loss": 1.2588385343551636,
"eval_runtime": 20.0847,
"eval_samples_per_second": 1.394,
"eval_steps_per_second": 0.199,
"step": 268
}
],
"logging_steps": 1.0,
"max_steps": 268,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.270226746239877e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}