Qwen3-32B-0524_original_augmented_original_honeypot_sycophancy_numerology-28ce0c86
/
trainer_state.json
{ | |
"best_global_step": null, | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.0, | |
"eval_steps": 268, | |
"global_step": 268, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0037313432835820895, | |
"grad_norm": 0.46998512744903564, | |
"learning_rate": 1e-05, | |
"loss": 2.2849, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.007462686567164179, | |
"grad_norm": 0.47207218408584595, | |
"learning_rate": 9.96268656716418e-06, | |
"loss": 2.3201, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.011194029850746268, | |
"grad_norm": 0.4637902081012726, | |
"learning_rate": 9.925373134328359e-06, | |
"loss": 2.2301, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.014925373134328358, | |
"grad_norm": 0.4802502691745758, | |
"learning_rate": 9.888059701492538e-06, | |
"loss": 2.304, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.018656716417910446, | |
"grad_norm": 0.47606322169303894, | |
"learning_rate": 9.850746268656717e-06, | |
"loss": 2.2004, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.022388059701492536, | |
"grad_norm": 0.49085694551467896, | |
"learning_rate": 9.813432835820897e-06, | |
"loss": 2.2043, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.026119402985074626, | |
"grad_norm": 0.4777446389198303, | |
"learning_rate": 9.776119402985076e-06, | |
"loss": 2.3117, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.029850746268656716, | |
"grad_norm": 0.4715961813926697, | |
"learning_rate": 9.738805970149255e-06, | |
"loss": 2.2778, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.033582089552238806, | |
"grad_norm": 0.467326819896698, | |
"learning_rate": 9.701492537313434e-06, | |
"loss": 2.2245, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.03731343283582089, | |
"grad_norm": 0.47078824043273926, | |
"learning_rate": 9.664179104477612e-06, | |
"loss": 2.1943, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.041044776119402986, | |
"grad_norm": 0.44834914803504944, | |
"learning_rate": 9.626865671641792e-06, | |
"loss": 2.1906, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.04477611940298507, | |
"grad_norm": 0.4226062297821045, | |
"learning_rate": 9.589552238805972e-06, | |
"loss": 2.1391, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.048507462686567165, | |
"grad_norm": 0.4238628149032593, | |
"learning_rate": 9.552238805970149e-06, | |
"loss": 2.1398, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.05223880597014925, | |
"grad_norm": 0.3930586576461792, | |
"learning_rate": 9.51492537313433e-06, | |
"loss": 2.1115, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.055970149253731345, | |
"grad_norm": 0.37835636734962463, | |
"learning_rate": 9.477611940298507e-06, | |
"loss": 2.045, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.05970149253731343, | |
"grad_norm": 0.38422954082489014, | |
"learning_rate": 9.440298507462688e-06, | |
"loss": 2.0591, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.06343283582089553, | |
"grad_norm": 0.358707994222641, | |
"learning_rate": 9.402985074626867e-06, | |
"loss": 1.9614, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.06716417910447761, | |
"grad_norm": 0.3842602074146271, | |
"learning_rate": 9.365671641791045e-06, | |
"loss": 2.0232, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.0708955223880597, | |
"grad_norm": 0.38415899872779846, | |
"learning_rate": 9.328358208955226e-06, | |
"loss": 2.0019, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.07462686567164178, | |
"grad_norm": 0.3730379641056061, | |
"learning_rate": 9.291044776119403e-06, | |
"loss": 2.0331, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.07835820895522388, | |
"grad_norm": 0.38554647564888, | |
"learning_rate": 9.253731343283582e-06, | |
"loss": 2.0003, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.08208955223880597, | |
"grad_norm": 0.3622361719608307, | |
"learning_rate": 9.216417910447763e-06, | |
"loss": 1.9381, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.08582089552238806, | |
"grad_norm": 0.3498849868774414, | |
"learning_rate": 9.17910447761194e-06, | |
"loss": 1.9068, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.08955223880597014, | |
"grad_norm": 0.3396073877811432, | |
"learning_rate": 9.14179104477612e-06, | |
"loss": 1.961, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.09328358208955224, | |
"grad_norm": 0.31098049879074097, | |
"learning_rate": 9.104477611940299e-06, | |
"loss": 1.7587, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.09701492537313433, | |
"grad_norm": 0.3291577696800232, | |
"learning_rate": 9.067164179104478e-06, | |
"loss": 1.9357, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.10074626865671642, | |
"grad_norm": 0.3065924346446991, | |
"learning_rate": 9.029850746268657e-06, | |
"loss": 1.9325, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.1044776119402985, | |
"grad_norm": 0.30660203099250793, | |
"learning_rate": 8.992537313432836e-06, | |
"loss": 1.9206, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.10820895522388059, | |
"grad_norm": 0.2934313118457794, | |
"learning_rate": 8.955223880597016e-06, | |
"loss": 1.835, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.11194029850746269, | |
"grad_norm": 0.2784007787704468, | |
"learning_rate": 8.917910447761195e-06, | |
"loss": 1.8075, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.11567164179104478, | |
"grad_norm": 0.28618645668029785, | |
"learning_rate": 8.880597014925374e-06, | |
"loss": 1.8403, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.11940298507462686, | |
"grad_norm": 0.28768670558929443, | |
"learning_rate": 8.843283582089553e-06, | |
"loss": 1.8655, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.12313432835820895, | |
"grad_norm": 0.27391666173934937, | |
"learning_rate": 8.805970149253732e-06, | |
"loss": 1.7614, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.12686567164179105, | |
"grad_norm": 0.32307982444763184, | |
"learning_rate": 8.768656716417911e-06, | |
"loss": 1.7933, | |
"step": 34 | |
}, | |
{ | |
"epoch": 0.13059701492537312, | |
"grad_norm": 0.2709297239780426, | |
"learning_rate": 8.73134328358209e-06, | |
"loss": 1.7644, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.13432835820895522, | |
"grad_norm": 0.2776687741279602, | |
"learning_rate": 8.69402985074627e-06, | |
"loss": 1.7511, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.13805970149253732, | |
"grad_norm": 0.2562324106693268, | |
"learning_rate": 8.656716417910447e-06, | |
"loss": 1.683, | |
"step": 37 | |
}, | |
{ | |
"epoch": 0.1417910447761194, | |
"grad_norm": 0.26197320222854614, | |
"learning_rate": 8.619402985074628e-06, | |
"loss": 1.7223, | |
"step": 38 | |
}, | |
{ | |
"epoch": 0.1455223880597015, | |
"grad_norm": 0.258398175239563, | |
"learning_rate": 8.582089552238807e-06, | |
"loss": 1.7488, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.14925373134328357, | |
"grad_norm": 0.2696782052516937, | |
"learning_rate": 8.544776119402986e-06, | |
"loss": 1.7865, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.15298507462686567, | |
"grad_norm": 0.2557496726512909, | |
"learning_rate": 8.507462686567165e-06, | |
"loss": 1.7307, | |
"step": 41 | |
}, | |
{ | |
"epoch": 0.15671641791044777, | |
"grad_norm": 0.23585352301597595, | |
"learning_rate": 8.470149253731343e-06, | |
"loss": 1.6636, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.16044776119402984, | |
"grad_norm": 0.2424355298280716, | |
"learning_rate": 8.432835820895524e-06, | |
"loss": 1.6547, | |
"step": 43 | |
}, | |
{ | |
"epoch": 0.16417910447761194, | |
"grad_norm": 0.23426097631454468, | |
"learning_rate": 8.395522388059703e-06, | |
"loss": 1.6661, | |
"step": 44 | |
}, | |
{ | |
"epoch": 0.16791044776119404, | |
"grad_norm": 0.2633323669433594, | |
"learning_rate": 8.35820895522388e-06, | |
"loss": 1.7363, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.17164179104477612, | |
"grad_norm": 0.24745310842990875, | |
"learning_rate": 8.320895522388061e-06, | |
"loss": 1.784, | |
"step": 46 | |
}, | |
{ | |
"epoch": 0.17537313432835822, | |
"grad_norm": 0.2138734757900238, | |
"learning_rate": 8.283582089552239e-06, | |
"loss": 1.5783, | |
"step": 47 | |
}, | |
{ | |
"epoch": 0.1791044776119403, | |
"grad_norm": 0.21660542488098145, | |
"learning_rate": 8.246268656716418e-06, | |
"loss": 1.5874, | |
"step": 48 | |
}, | |
{ | |
"epoch": 0.1828358208955224, | |
"grad_norm": 0.21672111749649048, | |
"learning_rate": 8.208955223880599e-06, | |
"loss": 1.5766, | |
"step": 49 | |
}, | |
{ | |
"epoch": 0.1865671641791045, | |
"grad_norm": 0.21459434926509857, | |
"learning_rate": 8.171641791044776e-06, | |
"loss": 1.6418, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.19029850746268656, | |
"grad_norm": 0.21707722544670105, | |
"learning_rate": 8.134328358208955e-06, | |
"loss": 1.6423, | |
"step": 51 | |
}, | |
{ | |
"epoch": 0.19402985074626866, | |
"grad_norm": 0.20363767445087433, | |
"learning_rate": 8.097014925373135e-06, | |
"loss": 1.5975, | |
"step": 52 | |
}, | |
{ | |
"epoch": 0.19776119402985073, | |
"grad_norm": 0.21339042484760284, | |
"learning_rate": 8.059701492537314e-06, | |
"loss": 1.6352, | |
"step": 53 | |
}, | |
{ | |
"epoch": 0.20149253731343283, | |
"grad_norm": 0.20369480550289154, | |
"learning_rate": 8.022388059701493e-06, | |
"loss": 1.5614, | |
"step": 54 | |
}, | |
{ | |
"epoch": 0.20522388059701493, | |
"grad_norm": 0.2043503224849701, | |
"learning_rate": 7.985074626865672e-06, | |
"loss": 1.5343, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.208955223880597, | |
"grad_norm": 0.2015250027179718, | |
"learning_rate": 7.947761194029851e-06, | |
"loss": 1.5762, | |
"step": 56 | |
}, | |
{ | |
"epoch": 0.2126865671641791, | |
"grad_norm": 0.19287531077861786, | |
"learning_rate": 7.91044776119403e-06, | |
"loss": 1.5304, | |
"step": 57 | |
}, | |
{ | |
"epoch": 0.21641791044776118, | |
"grad_norm": 0.19933679699897766, | |
"learning_rate": 7.87313432835821e-06, | |
"loss": 1.5933, | |
"step": 58 | |
}, | |
{ | |
"epoch": 0.22014925373134328, | |
"grad_norm": 0.19223818182945251, | |
"learning_rate": 7.835820895522389e-06, | |
"loss": 1.5608, | |
"step": 59 | |
}, | |
{ | |
"epoch": 0.22388059701492538, | |
"grad_norm": 0.1928456425666809, | |
"learning_rate": 7.798507462686568e-06, | |
"loss": 1.5709, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.22761194029850745, | |
"grad_norm": 0.18153917789459229, | |
"learning_rate": 7.761194029850747e-06, | |
"loss": 1.5608, | |
"step": 61 | |
}, | |
{ | |
"epoch": 0.23134328358208955, | |
"grad_norm": 0.18808996677398682, | |
"learning_rate": 7.723880597014926e-06, | |
"loss": 1.5689, | |
"step": 62 | |
}, | |
{ | |
"epoch": 0.23507462686567165, | |
"grad_norm": 0.18305914103984833, | |
"learning_rate": 7.686567164179105e-06, | |
"loss": 1.5336, | |
"step": 63 | |
}, | |
{ | |
"epoch": 0.23880597014925373, | |
"grad_norm": 0.18622231483459473, | |
"learning_rate": 7.649253731343284e-06, | |
"loss": 1.5454, | |
"step": 64 | |
}, | |
{ | |
"epoch": 0.24253731343283583, | |
"grad_norm": 0.172907754778862, | |
"learning_rate": 7.611940298507463e-06, | |
"loss": 1.4781, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.2462686567164179, | |
"grad_norm": 0.1706383377313614, | |
"learning_rate": 7.574626865671643e-06, | |
"loss": 1.5274, | |
"step": 66 | |
}, | |
{ | |
"epoch": 0.25, | |
"grad_norm": 0.17053718864917755, | |
"learning_rate": 7.537313432835821e-06, | |
"loss": 1.4834, | |
"step": 67 | |
}, | |
{ | |
"epoch": 0.2537313432835821, | |
"grad_norm": 0.17375120520591736, | |
"learning_rate": 7.500000000000001e-06, | |
"loss": 1.4867, | |
"step": 68 | |
}, | |
{ | |
"epoch": 0.2574626865671642, | |
"grad_norm": 0.16315129399299622, | |
"learning_rate": 7.46268656716418e-06, | |
"loss": 1.4537, | |
"step": 69 | |
}, | |
{ | |
"epoch": 0.26119402985074625, | |
"grad_norm": 0.16986814141273499, | |
"learning_rate": 7.4253731343283585e-06, | |
"loss": 1.4339, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.26492537313432835, | |
"grad_norm": 0.16788437962532043, | |
"learning_rate": 7.3880597014925385e-06, | |
"loss": 1.4692, | |
"step": 71 | |
}, | |
{ | |
"epoch": 0.26865671641791045, | |
"grad_norm": 0.17011231184005737, | |
"learning_rate": 7.350746268656717e-06, | |
"loss": 1.5028, | |
"step": 72 | |
}, | |
{ | |
"epoch": 0.27238805970149255, | |
"grad_norm": 0.16856002807617188, | |
"learning_rate": 7.313432835820896e-06, | |
"loss": 1.4378, | |
"step": 73 | |
}, | |
{ | |
"epoch": 0.27611940298507465, | |
"grad_norm": 0.16408328711986542, | |
"learning_rate": 7.276119402985076e-06, | |
"loss": 1.4674, | |
"step": 74 | |
}, | |
{ | |
"epoch": 0.2798507462686567, | |
"grad_norm": 0.16012725234031677, | |
"learning_rate": 7.238805970149254e-06, | |
"loss": 1.466, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.2835820895522388, | |
"grad_norm": 0.17093954980373383, | |
"learning_rate": 7.2014925373134335e-06, | |
"loss": 1.4371, | |
"step": 76 | |
}, | |
{ | |
"epoch": 0.2873134328358209, | |
"grad_norm": 0.16494448482990265, | |
"learning_rate": 7.164179104477612e-06, | |
"loss": 1.4578, | |
"step": 77 | |
}, | |
{ | |
"epoch": 0.291044776119403, | |
"grad_norm": 0.162708580493927, | |
"learning_rate": 7.126865671641792e-06, | |
"loss": 1.4447, | |
"step": 78 | |
}, | |
{ | |
"epoch": 0.2947761194029851, | |
"grad_norm": 0.163913294672966, | |
"learning_rate": 7.089552238805971e-06, | |
"loss": 1.4515, | |
"step": 79 | |
}, | |
{ | |
"epoch": 0.29850746268656714, | |
"grad_norm": 0.16894681751728058, | |
"learning_rate": 7.052238805970149e-06, | |
"loss": 1.4703, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.30223880597014924, | |
"grad_norm": 0.1752011477947235, | |
"learning_rate": 7.014925373134329e-06, | |
"loss": 1.4753, | |
"step": 81 | |
}, | |
{ | |
"epoch": 0.30597014925373134, | |
"grad_norm": 0.16714270412921906, | |
"learning_rate": 6.9776119402985076e-06, | |
"loss": 1.4546, | |
"step": 82 | |
}, | |
{ | |
"epoch": 0.30970149253731344, | |
"grad_norm": 0.1680886149406433, | |
"learning_rate": 6.9402985074626876e-06, | |
"loss": 1.4564, | |
"step": 83 | |
}, | |
{ | |
"epoch": 0.31343283582089554, | |
"grad_norm": 0.1619383841753006, | |
"learning_rate": 6.902985074626867e-06, | |
"loss": 1.4312, | |
"step": 84 | |
}, | |
{ | |
"epoch": 0.31716417910447764, | |
"grad_norm": 0.16721871495246887, | |
"learning_rate": 6.865671641791045e-06, | |
"loss": 1.4359, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.3208955223880597, | |
"grad_norm": 0.16730067133903503, | |
"learning_rate": 6.828358208955225e-06, | |
"loss": 1.3981, | |
"step": 86 | |
}, | |
{ | |
"epoch": 0.3246268656716418, | |
"grad_norm": 0.16024844348430634, | |
"learning_rate": 6.791044776119403e-06, | |
"loss": 1.3732, | |
"step": 87 | |
}, | |
{ | |
"epoch": 0.3283582089552239, | |
"grad_norm": 0.17665205895900726, | |
"learning_rate": 6.7537313432835825e-06, | |
"loss": 1.4651, | |
"step": 88 | |
}, | |
{ | |
"epoch": 0.332089552238806, | |
"grad_norm": 0.16358672082424164, | |
"learning_rate": 6.7164179104477625e-06, | |
"loss": 1.4006, | |
"step": 89 | |
}, | |
{ | |
"epoch": 0.3358208955223881, | |
"grad_norm": 0.16499486565589905, | |
"learning_rate": 6.679104477611941e-06, | |
"loss": 1.4177, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.33955223880597013, | |
"grad_norm": 0.160285085439682, | |
"learning_rate": 6.64179104477612e-06, | |
"loss": 1.3988, | |
"step": 91 | |
}, | |
{ | |
"epoch": 0.34328358208955223, | |
"grad_norm": 0.1694183498620987, | |
"learning_rate": 6.604477611940298e-06, | |
"loss": 1.4399, | |
"step": 92 | |
}, | |
{ | |
"epoch": 0.34701492537313433, | |
"grad_norm": 0.1744842231273651, | |
"learning_rate": 6.567164179104478e-06, | |
"loss": 1.4744, | |
"step": 93 | |
}, | |
{ | |
"epoch": 0.35074626865671643, | |
"grad_norm": 0.17741592228412628, | |
"learning_rate": 6.5298507462686575e-06, | |
"loss": 1.3623, | |
"step": 94 | |
}, | |
{ | |
"epoch": 0.35447761194029853, | |
"grad_norm": 0.1733269989490509, | |
"learning_rate": 6.492537313432837e-06, | |
"loss": 1.3362, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.3582089552238806, | |
"grad_norm": 0.16193453967571259, | |
"learning_rate": 6.455223880597016e-06, | |
"loss": 1.3674, | |
"step": 96 | |
}, | |
{ | |
"epoch": 0.3619402985074627, | |
"grad_norm": 0.17822274565696716, | |
"learning_rate": 6.417910447761194e-06, | |
"loss": 1.3367, | |
"step": 97 | |
}, | |
{ | |
"epoch": 0.3656716417910448, | |
"grad_norm": 0.1587613821029663, | |
"learning_rate": 6.380597014925374e-06, | |
"loss": 1.4044, | |
"step": 98 | |
}, | |
{ | |
"epoch": 0.3694029850746269, | |
"grad_norm": 0.17838972806930542, | |
"learning_rate": 6.343283582089553e-06, | |
"loss": 1.3835, | |
"step": 99 | |
}, | |
{ | |
"epoch": 0.373134328358209, | |
"grad_norm": 0.18384261429309845, | |
"learning_rate": 6.3059701492537316e-06, | |
"loss": 1.3907, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.376865671641791, | |
"grad_norm": 0.20531602203845978, | |
"learning_rate": 6.2686567164179116e-06, | |
"loss": 1.404, | |
"step": 101 | |
}, | |
{ | |
"epoch": 0.3805970149253731, | |
"grad_norm": 0.18832607567310333, | |
"learning_rate": 6.23134328358209e-06, | |
"loss": 1.3885, | |
"step": 102 | |
}, | |
{ | |
"epoch": 0.3843283582089552, | |
"grad_norm": 0.18085862696170807, | |
"learning_rate": 6.194029850746269e-06, | |
"loss": 1.4299, | |
"step": 103 | |
}, | |
{ | |
"epoch": 0.3880597014925373, | |
"grad_norm": 0.1742199808359146, | |
"learning_rate": 6.156716417910447e-06, | |
"loss": 1.3432, | |
"step": 104 | |
}, | |
{ | |
"epoch": 0.3917910447761194, | |
"grad_norm": 0.19112876057624817, | |
"learning_rate": 6.119402985074627e-06, | |
"loss": 1.3922, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.39552238805970147, | |
"grad_norm": 0.17427971959114075, | |
"learning_rate": 6.0820895522388065e-06, | |
"loss": 1.3281, | |
"step": 106 | |
}, | |
{ | |
"epoch": 0.39925373134328357, | |
"grad_norm": 0.17320787906646729, | |
"learning_rate": 6.044776119402986e-06, | |
"loss": 1.3558, | |
"step": 107 | |
}, | |
{ | |
"epoch": 0.40298507462686567, | |
"grad_norm": 0.1882028579711914, | |
"learning_rate": 6.007462686567165e-06, | |
"loss": 1.4128, | |
"step": 108 | |
}, | |
{ | |
"epoch": 0.40671641791044777, | |
"grad_norm": 0.18419794738292694, | |
"learning_rate": 5.970149253731343e-06, | |
"loss": 1.355, | |
"step": 109 | |
}, | |
{ | |
"epoch": 0.41044776119402987, | |
"grad_norm": 0.18215858936309814, | |
"learning_rate": 5.932835820895523e-06, | |
"loss": 1.3285, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.4141791044776119, | |
"grad_norm": 0.17232251167297363, | |
"learning_rate": 5.895522388059702e-06, | |
"loss": 1.3069, | |
"step": 111 | |
}, | |
{ | |
"epoch": 0.417910447761194, | |
"grad_norm": 0.186564639210701, | |
"learning_rate": 5.858208955223881e-06, | |
"loss": 1.3672, | |
"step": 112 | |
}, | |
{ | |
"epoch": 0.4216417910447761, | |
"grad_norm": 0.17342355847358704, | |
"learning_rate": 5.820895522388061e-06, | |
"loss": 1.3616, | |
"step": 113 | |
}, | |
{ | |
"epoch": 0.4253731343283582, | |
"grad_norm": 0.18788813054561615, | |
"learning_rate": 5.783582089552239e-06, | |
"loss": 1.2878, | |
"step": 114 | |
}, | |
{ | |
"epoch": 0.4291044776119403, | |
"grad_norm": 0.17116081714630127, | |
"learning_rate": 5.746268656716418e-06, | |
"loss": 1.3494, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.43283582089552236, | |
"grad_norm": 0.1697102189064026, | |
"learning_rate": 5.708955223880598e-06, | |
"loss": 1.3445, | |
"step": 116 | |
}, | |
{ | |
"epoch": 0.43656716417910446, | |
"grad_norm": 0.1673506200313568, | |
"learning_rate": 5.671641791044776e-06, | |
"loss": 1.3167, | |
"step": 117 | |
}, | |
{ | |
"epoch": 0.44029850746268656, | |
"grad_norm": 0.1808689534664154, | |
"learning_rate": 5.6343283582089556e-06, | |
"loss": 1.3949, | |
"step": 118 | |
}, | |
{ | |
"epoch": 0.44402985074626866, | |
"grad_norm": 0.1669001281261444, | |
"learning_rate": 5.597014925373134e-06, | |
"loss": 1.3326, | |
"step": 119 | |
}, | |
{ | |
"epoch": 0.44776119402985076, | |
"grad_norm": 0.174747034907341, | |
"learning_rate": 5.559701492537314e-06, | |
"loss": 1.3475, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.45149253731343286, | |
"grad_norm": 0.1588924378156662, | |
"learning_rate": 5.522388059701493e-06, | |
"loss": 1.2924, | |
"step": 121 | |
}, | |
{ | |
"epoch": 0.4552238805970149, | |
"grad_norm": 0.17862936854362488, | |
"learning_rate": 5.485074626865672e-06, | |
"loss": 1.3673, | |
"step": 122 | |
}, | |
{ | |
"epoch": 0.458955223880597, | |
"grad_norm": 0.18513405323028564, | |
"learning_rate": 5.447761194029851e-06, | |
"loss": 1.4756, | |
"step": 123 | |
}, | |
{ | |
"epoch": 0.4626865671641791, | |
"grad_norm": 0.17351286113262177, | |
"learning_rate": 5.41044776119403e-06, | |
"loss": 1.3642, | |
"step": 124 | |
}, | |
{ | |
"epoch": 0.4664179104477612, | |
"grad_norm": 0.17803268134593964, | |
"learning_rate": 5.37313432835821e-06, | |
"loss": 1.3211, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.4701492537313433, | |
"grad_norm": 0.16946041584014893, | |
"learning_rate": 5.335820895522389e-06, | |
"loss": 1.3674, | |
"step": 126 | |
}, | |
{ | |
"epoch": 0.47388059701492535, | |
"grad_norm": 0.1722063273191452, | |
"learning_rate": 5.298507462686567e-06, | |
"loss": 1.3314, | |
"step": 127 | |
}, | |
{ | |
"epoch": 0.47761194029850745, | |
"grad_norm": 0.16209138929843903, | |
"learning_rate": 5.261194029850747e-06, | |
"loss": 1.3228, | |
"step": 128 | |
}, | |
{ | |
"epoch": 0.48134328358208955, | |
"grad_norm": 0.16578933596611023, | |
"learning_rate": 5.2238805970149255e-06, | |
"loss": 1.3179, | |
"step": 129 | |
}, | |
{ | |
"epoch": 0.48507462686567165, | |
"grad_norm": 0.1670766919851303, | |
"learning_rate": 5.186567164179105e-06, | |
"loss": 1.2971, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.48880597014925375, | |
"grad_norm": 0.1636020392179489, | |
"learning_rate": 5.149253731343285e-06, | |
"loss": 1.2591, | |
"step": 131 | |
}, | |
{ | |
"epoch": 0.4925373134328358, | |
"grad_norm": 0.16769669950008392, | |
"learning_rate": 5.111940298507463e-06, | |
"loss": 1.3518, | |
"step": 132 | |
}, | |
{ | |
"epoch": 0.4962686567164179, | |
"grad_norm": 0.1651279628276825, | |
"learning_rate": 5.074626865671642e-06, | |
"loss": 1.2989, | |
"step": 133 | |
}, | |
{ | |
"epoch": 0.5, | |
"grad_norm": 0.1701837182044983, | |
"learning_rate": 5.037313432835821e-06, | |
"loss": 1.3367, | |
"step": 134 | |
}, | |
{ | |
"epoch": 0.503731343283582, | |
"grad_norm": 0.1681615263223648, | |
"learning_rate": 5e-06, | |
"loss": 1.297, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.5074626865671642, | |
"grad_norm": 0.16559536755084991, | |
"learning_rate": 4.9626865671641796e-06, | |
"loss": 1.2732, | |
"step": 136 | |
}, | |
{ | |
"epoch": 0.5111940298507462, | |
"grad_norm": 0.17631755769252777, | |
"learning_rate": 4.925373134328359e-06, | |
"loss": 1.2188, | |
"step": 137 | |
}, | |
{ | |
"epoch": 0.5149253731343284, | |
"grad_norm": 0.16785310208797455, | |
"learning_rate": 4.888059701492538e-06, | |
"loss": 1.3208, | |
"step": 138 | |
}, | |
{ | |
"epoch": 0.5186567164179104, | |
"grad_norm": 0.16594962775707245, | |
"learning_rate": 4.850746268656717e-06, | |
"loss": 1.285, | |
"step": 139 | |
}, | |
{ | |
"epoch": 0.5223880597014925, | |
"grad_norm": 0.1660860776901245, | |
"learning_rate": 4.813432835820896e-06, | |
"loss": 1.2947, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.5261194029850746, | |
"grad_norm": 0.17879198491573334, | |
"learning_rate": 4.7761194029850745e-06, | |
"loss": 1.3209, | |
"step": 141 | |
}, | |
{ | |
"epoch": 0.5298507462686567, | |
"grad_norm": 0.19052694737911224, | |
"learning_rate": 4.738805970149254e-06, | |
"loss": 1.3568, | |
"step": 142 | |
}, | |
{ | |
"epoch": 0.5335820895522388, | |
"grad_norm": 0.1747015118598938, | |
"learning_rate": 4.701492537313434e-06, | |
"loss": 1.3173, | |
"step": 143 | |
}, | |
{ | |
"epoch": 0.5373134328358209, | |
"grad_norm": 0.16210874915122986, | |
"learning_rate": 4.664179104477613e-06, | |
"loss": 1.3077, | |
"step": 144 | |
}, | |
{ | |
"epoch": 0.5410447761194029, | |
"grad_norm": 0.167409747838974, | |
"learning_rate": 4.626865671641791e-06, | |
"loss": 1.2657, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.5447761194029851, | |
"grad_norm": 0.16306526958942413, | |
"learning_rate": 4.58955223880597e-06, | |
"loss": 1.2756, | |
"step": 146 | |
}, | |
{ | |
"epoch": 0.5485074626865671, | |
"grad_norm": 0.1674441546201706, | |
"learning_rate": 4.5522388059701495e-06, | |
"loss": 1.2797, | |
"step": 147 | |
}, | |
{ | |
"epoch": 0.5522388059701493, | |
"grad_norm": 0.1676347702741623, | |
"learning_rate": 4.514925373134329e-06, | |
"loss": 1.3139, | |
"step": 148 | |
}, | |
{ | |
"epoch": 0.5559701492537313, | |
"grad_norm": 0.1638174206018448, | |
"learning_rate": 4.477611940298508e-06, | |
"loss": 1.281, | |
"step": 149 | |
}, | |
{ | |
"epoch": 0.5597014925373134, | |
"grad_norm": 0.1755269318819046, | |
"learning_rate": 4.440298507462687e-06, | |
"loss": 1.2501, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.5634328358208955, | |
"grad_norm": 0.18092264235019684, | |
"learning_rate": 4.402985074626866e-06, | |
"loss": 1.3676, | |
"step": 151 | |
}, | |
{ | |
"epoch": 0.5671641791044776, | |
"grad_norm": 0.16732056438922882, | |
"learning_rate": 4.365671641791045e-06, | |
"loss": 1.2925, | |
"step": 152 | |
}, | |
{ | |
"epoch": 0.5708955223880597, | |
"grad_norm": 0.1717032492160797, | |
"learning_rate": 4.3283582089552236e-06, | |
"loss": 1.2745, | |
"step": 153 | |
}, | |
{ | |
"epoch": 0.5746268656716418, | |
"grad_norm": 0.17625246942043304, | |
"learning_rate": 4.2910447761194036e-06, | |
"loss": 1.3156, | |
"step": 154 | |
}, | |
{ | |
"epoch": 0.5783582089552238, | |
"grad_norm": 0.16663196682929993, | |
"learning_rate": 4.253731343283583e-06, | |
"loss": 1.2854, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.582089552238806, | |
"grad_norm": 0.15970146656036377, | |
"learning_rate": 4.216417910447762e-06, | |
"loss": 1.2561, | |
"step": 156 | |
}, | |
{ | |
"epoch": 0.585820895522388, | |
"grad_norm": 0.17199867963790894, | |
"learning_rate": 4.17910447761194e-06, | |
"loss": 1.3107, | |
"step": 157 | |
}, | |
{ | |
"epoch": 0.5895522388059702, | |
"grad_norm": 0.16908001899719238, | |
"learning_rate": 4.141791044776119e-06, | |
"loss": 1.3563, | |
"step": 158 | |
}, | |
{ | |
"epoch": 0.5932835820895522, | |
"grad_norm": 0.17845936119556427, | |
"learning_rate": 4.104477611940299e-06, | |
"loss": 1.3361, | |
"step": 159 | |
}, | |
{ | |
"epoch": 0.5970149253731343, | |
"grad_norm": 0.17035247385501862, | |
"learning_rate": 4.067164179104478e-06, | |
"loss": 1.2532, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.6007462686567164, | |
"grad_norm": 0.1767151802778244, | |
"learning_rate": 4.029850746268657e-06, | |
"loss": 1.2144, | |
"step": 161 | |
}, | |
{ | |
"epoch": 0.6044776119402985, | |
"grad_norm": 0.16750559210777283, | |
"learning_rate": 3.992537313432836e-06, | |
"loss": 1.3286, | |
"step": 162 | |
}, | |
{ | |
"epoch": 0.6082089552238806, | |
"grad_norm": 0.16319824755191803, | |
"learning_rate": 3.955223880597015e-06, | |
"loss": 1.2833, | |
"step": 163 | |
}, | |
{ | |
"epoch": 0.6119402985074627, | |
"grad_norm": 0.16734889149665833, | |
"learning_rate": 3.917910447761194e-06, | |
"loss": 1.2857, | |
"step": 164 | |
}, | |
{ | |
"epoch": 0.6156716417910447, | |
"grad_norm": 0.15696798264980316, | |
"learning_rate": 3.8805970149253735e-06, | |
"loss": 1.2422, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.6194029850746269, | |
"grad_norm": 0.16202400624752045, | |
"learning_rate": 3.843283582089553e-06, | |
"loss": 1.275, | |
"step": 166 | |
}, | |
{ | |
"epoch": 0.6231343283582089, | |
"grad_norm": 0.17492756247520447, | |
"learning_rate": 3.8059701492537314e-06, | |
"loss": 1.3543, | |
"step": 167 | |
}, | |
{ | |
"epoch": 0.6268656716417911, | |
"grad_norm": 0.16580115258693695, | |
"learning_rate": 3.7686567164179105e-06, | |
"loss": 1.22, | |
"step": 168 | |
}, | |
{ | |
"epoch": 0.6305970149253731, | |
"grad_norm": 0.16033506393432617, | |
"learning_rate": 3.73134328358209e-06, | |
"loss": 1.2648, | |
"step": 169 | |
}, | |
{ | |
"epoch": 0.6343283582089553, | |
"grad_norm": 0.17301562428474426, | |
"learning_rate": 3.6940298507462693e-06, | |
"loss": 1.2431, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.6380597014925373, | |
"grad_norm": 0.16413499414920807, | |
"learning_rate": 3.656716417910448e-06, | |
"loss": 1.2206, | |
"step": 171 | |
}, | |
{ | |
"epoch": 0.6417910447761194, | |
"grad_norm": 0.17369578778743744, | |
"learning_rate": 3.619402985074627e-06, | |
"loss": 1.3589, | |
"step": 172 | |
}, | |
{ | |
"epoch": 0.6455223880597015, | |
"grad_norm": 0.16966509819030762, | |
"learning_rate": 3.582089552238806e-06, | |
"loss": 1.3456, | |
"step": 173 | |
}, | |
{ | |
"epoch": 0.6492537313432836, | |
"grad_norm": 0.17732375860214233, | |
"learning_rate": 3.5447761194029855e-06, | |
"loss": 1.2571, | |
"step": 174 | |
}, | |
{ | |
"epoch": 0.6529850746268657, | |
"grad_norm": 0.17640697956085205, | |
"learning_rate": 3.5074626865671646e-06, | |
"loss": 1.3174, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.6567164179104478, | |
"grad_norm": 0.16611693799495697, | |
"learning_rate": 3.4701492537313438e-06, | |
"loss": 1.2547, | |
"step": 176 | |
}, | |
{ | |
"epoch": 0.6604477611940298, | |
"grad_norm": 0.1549360752105713, | |
"learning_rate": 3.4328358208955225e-06, | |
"loss": 1.2581, | |
"step": 177 | |
}, | |
{ | |
"epoch": 0.664179104477612, | |
"grad_norm": 0.16762341558933258, | |
"learning_rate": 3.3955223880597017e-06, | |
"loss": 1.2983, | |
"step": 178 | |
}, | |
{ | |
"epoch": 0.667910447761194, | |
"grad_norm": 0.16653385758399963, | |
"learning_rate": 3.3582089552238813e-06, | |
"loss": 1.2939, | |
"step": 179 | |
}, | |
{ | |
"epoch": 0.6716417910447762, | |
"grad_norm": 0.17225240170955658, | |
"learning_rate": 3.32089552238806e-06, | |
"loss": 1.3273, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.6753731343283582, | |
"grad_norm": 0.1645621955394745, | |
"learning_rate": 3.283582089552239e-06, | |
"loss": 1.2502, | |
"step": 181 | |
}, | |
{ | |
"epoch": 0.6791044776119403, | |
"grad_norm": 0.17103064060211182, | |
"learning_rate": 3.2462686567164183e-06, | |
"loss": 1.2291, | |
"step": 182 | |
}, | |
{ | |
"epoch": 0.6828358208955224, | |
"grad_norm": 0.16183358430862427, | |
"learning_rate": 3.208955223880597e-06, | |
"loss": 1.2709, | |
"step": 183 | |
}, | |
{ | |
"epoch": 0.6865671641791045, | |
"grad_norm": 0.17078474164009094, | |
"learning_rate": 3.1716417910447766e-06, | |
"loss": 1.2756, | |
"step": 184 | |
}, | |
{ | |
"epoch": 0.6902985074626866, | |
"grad_norm": 0.16668911278247833, | |
"learning_rate": 3.1343283582089558e-06, | |
"loss": 1.223, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.6940298507462687, | |
"grad_norm": 0.16830188035964966, | |
"learning_rate": 3.0970149253731345e-06, | |
"loss": 1.2658, | |
"step": 186 | |
}, | |
{ | |
"epoch": 0.6977611940298507, | |
"grad_norm": 0.15971961617469788, | |
"learning_rate": 3.0597014925373137e-06, | |
"loss": 1.2552, | |
"step": 187 | |
}, | |
{ | |
"epoch": 0.7014925373134329, | |
"grad_norm": 0.16681736707687378, | |
"learning_rate": 3.022388059701493e-06, | |
"loss": 1.2706, | |
"step": 188 | |
}, | |
{ | |
"epoch": 0.7052238805970149, | |
"grad_norm": 0.16385811567306519, | |
"learning_rate": 2.9850746268656716e-06, | |
"loss": 1.2358, | |
"step": 189 | |
}, | |
{ | |
"epoch": 0.7089552238805971, | |
"grad_norm": 0.15831749141216278, | |
"learning_rate": 2.947761194029851e-06, | |
"loss": 1.2175, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.7126865671641791, | |
"grad_norm": 0.18402300775051117, | |
"learning_rate": 2.9104477611940303e-06, | |
"loss": 1.2108, | |
"step": 191 | |
}, | |
{ | |
"epoch": 0.7164179104477612, | |
"grad_norm": 0.1604745239019394, | |
"learning_rate": 2.873134328358209e-06, | |
"loss": 1.2189, | |
"step": 192 | |
}, | |
{ | |
"epoch": 0.7201492537313433, | |
"grad_norm": 0.16935843229293823, | |
"learning_rate": 2.835820895522388e-06, | |
"loss": 1.305, | |
"step": 193 | |
}, | |
{ | |
"epoch": 0.7238805970149254, | |
"grad_norm": 0.16648651659488678, | |
"learning_rate": 2.798507462686567e-06, | |
"loss": 1.3143, | |
"step": 194 | |
}, | |
{ | |
"epoch": 0.7276119402985075, | |
"grad_norm": 0.18641819059848785, | |
"learning_rate": 2.7611940298507465e-06, | |
"loss": 1.2998, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.7313432835820896, | |
"grad_norm": 0.17491459846496582, | |
"learning_rate": 2.7238805970149257e-06, | |
"loss": 1.2717, | |
"step": 196 | |
}, | |
{ | |
"epoch": 0.7350746268656716, | |
"grad_norm": 0.1580687165260315, | |
"learning_rate": 2.686567164179105e-06, | |
"loss": 1.2358, | |
"step": 197 | |
}, | |
{ | |
"epoch": 0.7388059701492538, | |
"grad_norm": 0.1613190770149231, | |
"learning_rate": 2.6492537313432836e-06, | |
"loss": 1.2249, | |
"step": 198 | |
}, | |
{ | |
"epoch": 0.7425373134328358, | |
"grad_norm": 0.16894753277301788, | |
"learning_rate": 2.6119402985074627e-06, | |
"loss": 1.2835, | |
"step": 199 | |
}, | |
{ | |
"epoch": 0.746268656716418, | |
"grad_norm": 0.16808702051639557, | |
"learning_rate": 2.5746268656716423e-06, | |
"loss": 1.2383, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.75, | |
"grad_norm": 0.16712962090969086, | |
"learning_rate": 2.537313432835821e-06, | |
"loss": 1.2033, | |
"step": 201 | |
}, | |
{ | |
"epoch": 0.753731343283582, | |
"grad_norm": 0.16377860307693481, | |
"learning_rate": 2.5e-06, | |
"loss": 1.2512, | |
"step": 202 | |
}, | |
{ | |
"epoch": 0.7574626865671642, | |
"grad_norm": 0.16047263145446777, | |
"learning_rate": 2.4626865671641794e-06, | |
"loss": 1.2238, | |
"step": 203 | |
}, | |
{ | |
"epoch": 0.7611940298507462, | |
"grad_norm": 0.16383422911167145, | |
"learning_rate": 2.4253731343283585e-06, | |
"loss": 1.232, | |
"step": 204 | |
}, | |
{ | |
"epoch": 0.7649253731343284, | |
"grad_norm": 0.1609911322593689, | |
"learning_rate": 2.3880597014925373e-06, | |
"loss": 1.2341, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.7686567164179104, | |
"grad_norm": 0.16194207966327667, | |
"learning_rate": 2.350746268656717e-06, | |
"loss": 1.2495, | |
"step": 206 | |
}, | |
{ | |
"epoch": 0.7723880597014925, | |
"grad_norm": 0.16773447394371033, | |
"learning_rate": 2.3134328358208956e-06, | |
"loss": 1.2272, | |
"step": 207 | |
}, | |
{ | |
"epoch": 0.7761194029850746, | |
"grad_norm": 0.1642608940601349, | |
"learning_rate": 2.2761194029850747e-06, | |
"loss": 1.2116, | |
"step": 208 | |
}, | |
{ | |
"epoch": 0.7798507462686567, | |
"grad_norm": 0.16738657653331757, | |
"learning_rate": 2.238805970149254e-06, | |
"loss": 1.2606, | |
"step": 209 | |
}, | |
{ | |
"epoch": 0.7835820895522388, | |
"grad_norm": 0.17621806263923645, | |
"learning_rate": 2.201492537313433e-06, | |
"loss": 1.2387, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.7873134328358209, | |
"grad_norm": 0.1773146092891693, | |
"learning_rate": 2.1641791044776118e-06, | |
"loss": 1.2842, | |
"step": 211 | |
}, | |
{ | |
"epoch": 0.7910447761194029, | |
"grad_norm": 0.1544102132320404, | |
"learning_rate": 2.1268656716417914e-06, | |
"loss": 1.2359, | |
"step": 212 | |
}, | |
{ | |
"epoch": 0.7947761194029851, | |
"grad_norm": 0.16131798923015594, | |
"learning_rate": 2.08955223880597e-06, | |
"loss": 1.1949, | |
"step": 213 | |
}, | |
{ | |
"epoch": 0.7985074626865671, | |
"grad_norm": 0.17710529267787933, | |
"learning_rate": 2.0522388059701497e-06, | |
"loss": 1.2679, | |
"step": 214 | |
}, | |
{ | |
"epoch": 0.8022388059701493, | |
"grad_norm": 0.19230812788009644, | |
"learning_rate": 2.0149253731343284e-06, | |
"loss": 1.3954, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.8059701492537313, | |
"grad_norm": 0.16153179109096527, | |
"learning_rate": 1.9776119402985076e-06, | |
"loss": 1.2468, | |
"step": 216 | |
}, | |
{ | |
"epoch": 0.8097014925373134, | |
"grad_norm": 0.15461866557598114, | |
"learning_rate": 1.9402985074626867e-06, | |
"loss": 1.2267, | |
"step": 217 | |
}, | |
{ | |
"epoch": 0.8134328358208955, | |
"grad_norm": 0.1653779000043869, | |
"learning_rate": 1.9029850746268657e-06, | |
"loss": 1.2571, | |
"step": 218 | |
}, | |
{ | |
"epoch": 0.8171641791044776, | |
"grad_norm": 0.17886200547218323, | |
"learning_rate": 1.865671641791045e-06, | |
"loss": 1.2085, | |
"step": 219 | |
}, | |
{ | |
"epoch": 0.8208955223880597, | |
"grad_norm": 0.16441503167152405, | |
"learning_rate": 1.828358208955224e-06, | |
"loss": 1.2386, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.8246268656716418, | |
"grad_norm": 0.164820596575737, | |
"learning_rate": 1.791044776119403e-06, | |
"loss": 1.2483, | |
"step": 221 | |
}, | |
{ | |
"epoch": 0.8283582089552238, | |
"grad_norm": 0.17178238928318024, | |
"learning_rate": 1.7537313432835823e-06, | |
"loss": 1.248, | |
"step": 222 | |
}, | |
{ | |
"epoch": 0.832089552238806, | |
"grad_norm": 0.16042672097682953, | |
"learning_rate": 1.7164179104477613e-06, | |
"loss": 1.2101, | |
"step": 223 | |
}, | |
{ | |
"epoch": 0.835820895522388, | |
"grad_norm": 0.15831167995929718, | |
"learning_rate": 1.6791044776119406e-06, | |
"loss": 1.2597, | |
"step": 224 | |
}, | |
{ | |
"epoch": 0.8395522388059702, | |
"grad_norm": 0.15747682750225067, | |
"learning_rate": 1.6417910447761196e-06, | |
"loss": 1.2392, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.8432835820895522, | |
"grad_norm": 0.1638425588607788, | |
"learning_rate": 1.6044776119402985e-06, | |
"loss": 1.2099, | |
"step": 226 | |
}, | |
{ | |
"epoch": 0.8470149253731343, | |
"grad_norm": 0.1755628138780594, | |
"learning_rate": 1.5671641791044779e-06, | |
"loss": 1.3201, | |
"step": 227 | |
}, | |
{ | |
"epoch": 0.8507462686567164, | |
"grad_norm": 0.1612577736377716, | |
"learning_rate": 1.5298507462686568e-06, | |
"loss": 1.2085, | |
"step": 228 | |
}, | |
{ | |
"epoch": 0.8544776119402985, | |
"grad_norm": 0.16105295717716217, | |
"learning_rate": 1.4925373134328358e-06, | |
"loss": 1.2584, | |
"step": 229 | |
}, | |
{ | |
"epoch": 0.8582089552238806, | |
"grad_norm": 0.15586678683757782, | |
"learning_rate": 1.4552238805970152e-06, | |
"loss": 1.2495, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.8619402985074627, | |
"grad_norm": 0.1646788865327835, | |
"learning_rate": 1.417910447761194e-06, | |
"loss": 1.2579, | |
"step": 231 | |
}, | |
{ | |
"epoch": 0.8656716417910447, | |
"grad_norm": 0.16302216053009033, | |
"learning_rate": 1.3805970149253733e-06, | |
"loss": 1.233, | |
"step": 232 | |
}, | |
{ | |
"epoch": 0.8694029850746269, | |
"grad_norm": 0.16793973743915558, | |
"learning_rate": 1.3432835820895524e-06, | |
"loss": 1.178, | |
"step": 233 | |
}, | |
{ | |
"epoch": 0.8731343283582089, | |
"grad_norm": 0.1653694361448288, | |
"learning_rate": 1.3059701492537314e-06, | |
"loss": 1.2164, | |
"step": 234 | |
}, | |
{ | |
"epoch": 0.8768656716417911, | |
"grad_norm": 0.16517435014247894, | |
"learning_rate": 1.2686567164179105e-06, | |
"loss": 1.2058, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.8805970149253731, | |
"grad_norm": 0.1629129946231842, | |
"learning_rate": 1.2313432835820897e-06, | |
"loss": 1.241, | |
"step": 236 | |
}, | |
{ | |
"epoch": 0.8843283582089553, | |
"grad_norm": 0.16396047174930573, | |
"learning_rate": 1.1940298507462686e-06, | |
"loss": 1.2372, | |
"step": 237 | |
}, | |
{ | |
"epoch": 0.8880597014925373, | |
"grad_norm": 0.16376972198486328, | |
"learning_rate": 1.1567164179104478e-06, | |
"loss": 1.2117, | |
"step": 238 | |
}, | |
{ | |
"epoch": 0.8917910447761194, | |
"grad_norm": 0.16420400142669678, | |
"learning_rate": 1.119402985074627e-06, | |
"loss": 1.2531, | |
"step": 239 | |
}, | |
{ | |
"epoch": 0.8955223880597015, | |
"grad_norm": 0.16184940934181213, | |
"learning_rate": 1.0820895522388059e-06, | |
"loss": 1.2683, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.8992537313432836, | |
"grad_norm": 0.16468416154384613, | |
"learning_rate": 1.044776119402985e-06, | |
"loss": 1.1979, | |
"step": 241 | |
}, | |
{ | |
"epoch": 0.9029850746268657, | |
"grad_norm": 0.16101981699466705, | |
"learning_rate": 1.0074626865671642e-06, | |
"loss": 1.2172, | |
"step": 242 | |
}, | |
{ | |
"epoch": 0.9067164179104478, | |
"grad_norm": 0.15788166224956512, | |
"learning_rate": 9.701492537313434e-07, | |
"loss": 1.2071, | |
"step": 243 | |
}, | |
{ | |
"epoch": 0.9104477611940298, | |
"grad_norm": 0.16054342687129974, | |
"learning_rate": 9.328358208955225e-07, | |
"loss": 1.2194, | |
"step": 244 | |
}, | |
{ | |
"epoch": 0.914179104477612, | |
"grad_norm": 0.16086827218532562, | |
"learning_rate": 8.955223880597015e-07, | |
"loss": 1.237, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.917910447761194, | |
"grad_norm": 0.16154353320598602, | |
"learning_rate": 8.582089552238806e-07, | |
"loss": 1.259, | |
"step": 246 | |
}, | |
{ | |
"epoch": 0.9216417910447762, | |
"grad_norm": 0.1560903638601303, | |
"learning_rate": 8.208955223880598e-07, | |
"loss": 1.2307, | |
"step": 247 | |
}, | |
{ | |
"epoch": 0.9253731343283582, | |
"grad_norm": 0.1581345647573471, | |
"learning_rate": 7.835820895522389e-07, | |
"loss": 1.2411, | |
"step": 248 | |
}, | |
{ | |
"epoch": 0.9291044776119403, | |
"grad_norm": 0.15719743072986603, | |
"learning_rate": 7.462686567164179e-07, | |
"loss": 1.2384, | |
"step": 249 | |
}, | |
{ | |
"epoch": 0.9328358208955224, | |
"grad_norm": 0.18031233549118042, | |
"learning_rate": 7.08955223880597e-07, | |
"loss": 1.2843, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.9365671641791045, | |
"grad_norm": 0.1669127345085144, | |
"learning_rate": 6.716417910447762e-07, | |
"loss": 1.226, | |
"step": 251 | |
}, | |
{ | |
"epoch": 0.9402985074626866, | |
"grad_norm": 0.16808092594146729, | |
"learning_rate": 6.343283582089553e-07, | |
"loss": 1.2354, | |
"step": 252 | |
}, | |
{ | |
"epoch": 0.9440298507462687, | |
"grad_norm": 0.15523065626621246, | |
"learning_rate": 5.970149253731343e-07, | |
"loss": 1.1945, | |
"step": 253 | |
}, | |
{ | |
"epoch": 0.9477611940298507, | |
"grad_norm": 0.1601947396993637, | |
"learning_rate": 5.597014925373135e-07, | |
"loss": 1.2184, | |
"step": 254 | |
}, | |
{ | |
"epoch": 0.9514925373134329, | |
"grad_norm": 0.16021329164505005, | |
"learning_rate": 5.223880597014925e-07, | |
"loss": 1.2263, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.9552238805970149, | |
"grad_norm": 0.1538553386926651, | |
"learning_rate": 4.850746268656717e-07, | |
"loss": 1.2348, | |
"step": 256 | |
}, | |
{ | |
"epoch": 0.9589552238805971, | |
"grad_norm": 0.19161516427993774, | |
"learning_rate": 4.4776119402985074e-07, | |
"loss": 1.3115, | |
"step": 257 | |
}, | |
{ | |
"epoch": 0.9626865671641791, | |
"grad_norm": 0.1565106213092804, | |
"learning_rate": 4.104477611940299e-07, | |
"loss": 1.2344, | |
"step": 258 | |
}, | |
{ | |
"epoch": 0.9664179104477612, | |
"grad_norm": 0.1629861295223236, | |
"learning_rate": 3.7313432835820895e-07, | |
"loss": 1.1741, | |
"step": 259 | |
}, | |
{ | |
"epoch": 0.9701492537313433, | |
"grad_norm": 0.16237621009349823, | |
"learning_rate": 3.358208955223881e-07, | |
"loss": 1.2815, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.9738805970149254, | |
"grad_norm": 0.16195756196975708, | |
"learning_rate": 2.9850746268656716e-07, | |
"loss": 1.2463, | |
"step": 261 | |
}, | |
{ | |
"epoch": 0.9776119402985075, | |
"grad_norm": 0.1687113344669342, | |
"learning_rate": 2.6119402985074626e-07, | |
"loss": 1.2038, | |
"step": 262 | |
}, | |
{ | |
"epoch": 0.9813432835820896, | |
"grad_norm": 0.16680650413036346, | |
"learning_rate": 2.2388059701492537e-07, | |
"loss": 1.2521, | |
"step": 263 | |
}, | |
{ | |
"epoch": 0.9850746268656716, | |
"grad_norm": 0.1574639528989792, | |
"learning_rate": 1.8656716417910447e-07, | |
"loss": 1.259, | |
"step": 264 | |
}, | |
{ | |
"epoch": 0.9888059701492538, | |
"grad_norm": 0.1681501567363739, | |
"learning_rate": 1.4925373134328358e-07, | |
"loss": 1.2749, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.9925373134328358, | |
"grad_norm": 0.16339531540870667, | |
"learning_rate": 1.1194029850746268e-07, | |
"loss": 1.2536, | |
"step": 266 | |
}, | |
{ | |
"epoch": 0.996268656716418, | |
"grad_norm": 0.1551850438117981, | |
"learning_rate": 7.462686567164179e-08, | |
"loss": 1.2501, | |
"step": 267 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 0.1640440821647644, | |
"learning_rate": 3.7313432835820895e-08, | |
"loss": 1.2332, | |
"step": 268 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_loss": 1.2588385343551636, | |
"eval_runtime": 20.0847, | |
"eval_samples_per_second": 1.394, | |
"eval_steps_per_second": 0.199, | |
"step": 268 | |
} | |
], | |
"logging_steps": 1.0, | |
"max_steps": 268, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 1, | |
"save_steps": 0, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 4.270226746239877e+17, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |