|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 603, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004975124378109453, |
|
"grad_norm": 5.879948830184706, |
|
"learning_rate": 0.0, |
|
"loss": 0.7667, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009950248756218905, |
|
"grad_norm": 5.2235442655986954, |
|
"learning_rate": 1.3114754098360657e-06, |
|
"loss": 0.7672, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.014925373134328358, |
|
"grad_norm": 6.05052115788454, |
|
"learning_rate": 2.6229508196721314e-06, |
|
"loss": 0.7488, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01990049751243781, |
|
"grad_norm": 5.533884257629409, |
|
"learning_rate": 3.934426229508197e-06, |
|
"loss": 0.7477, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.024875621890547265, |
|
"grad_norm": 2.9452605687276066, |
|
"learning_rate": 5.245901639344263e-06, |
|
"loss": 0.7031, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.029850746268656716, |
|
"grad_norm": 3.088496358850151, |
|
"learning_rate": 6.5573770491803276e-06, |
|
"loss": 0.6635, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03482587064676617, |
|
"grad_norm": 1.1483388314243999, |
|
"learning_rate": 7.868852459016394e-06, |
|
"loss": 0.5938, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03980099502487562, |
|
"grad_norm": 1.038360280928093, |
|
"learning_rate": 9.18032786885246e-06, |
|
"loss": 0.6236, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04477611940298507, |
|
"grad_norm": 1.62146931905999, |
|
"learning_rate": 1.0491803278688525e-05, |
|
"loss": 0.6265, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04975124378109453, |
|
"grad_norm": 1.091027063595789, |
|
"learning_rate": 1.1803278688524591e-05, |
|
"loss": 0.5663, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05472636815920398, |
|
"grad_norm": 1.118045200909107, |
|
"learning_rate": 1.3114754098360655e-05, |
|
"loss": 0.5634, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05970149253731343, |
|
"grad_norm": 1.1296501537591321, |
|
"learning_rate": 1.4426229508196722e-05, |
|
"loss": 0.6183, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06467661691542288, |
|
"grad_norm": 1.1267046281368494, |
|
"learning_rate": 1.5737704918032788e-05, |
|
"loss": 0.5825, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06965174129353234, |
|
"grad_norm": 0.8499261863813117, |
|
"learning_rate": 1.7049180327868854e-05, |
|
"loss": 0.5805, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 1.3598777450843793, |
|
"learning_rate": 1.836065573770492e-05, |
|
"loss": 0.5932, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07960199004975124, |
|
"grad_norm": 0.7667369938360317, |
|
"learning_rate": 1.9672131147540985e-05, |
|
"loss": 0.5842, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0845771144278607, |
|
"grad_norm": 0.696963272396721, |
|
"learning_rate": 2.098360655737705e-05, |
|
"loss": 0.5457, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08955223880597014, |
|
"grad_norm": 0.7786007741172544, |
|
"learning_rate": 2.2295081967213113e-05, |
|
"loss": 0.5941, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0945273631840796, |
|
"grad_norm": 0.6350186171919544, |
|
"learning_rate": 2.3606557377049182e-05, |
|
"loss": 0.5547, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.09950248756218906, |
|
"grad_norm": 0.7025126334296926, |
|
"learning_rate": 2.4918032786885248e-05, |
|
"loss": 0.571, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1044776119402985, |
|
"grad_norm": 0.809615897517396, |
|
"learning_rate": 2.622950819672131e-05, |
|
"loss": 0.5567, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.10945273631840796, |
|
"grad_norm": 0.712052865613253, |
|
"learning_rate": 2.754098360655738e-05, |
|
"loss": 0.5726, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11442786069651742, |
|
"grad_norm": 0.6685647092557887, |
|
"learning_rate": 2.8852459016393445e-05, |
|
"loss": 0.5624, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.11940298507462686, |
|
"grad_norm": 0.6788511366076325, |
|
"learning_rate": 3.0163934426229507e-05, |
|
"loss": 0.5436, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.12437810945273632, |
|
"grad_norm": 0.7528664788886847, |
|
"learning_rate": 3.1475409836065576e-05, |
|
"loss": 0.5544, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12935323383084577, |
|
"grad_norm": 0.6862044740604918, |
|
"learning_rate": 3.278688524590164e-05, |
|
"loss": 0.5499, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.13432835820895522, |
|
"grad_norm": 0.6138161287460158, |
|
"learning_rate": 3.409836065573771e-05, |
|
"loss": 0.5445, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.13930348258706468, |
|
"grad_norm": 0.6540655635462894, |
|
"learning_rate": 3.5409836065573773e-05, |
|
"loss": 0.5535, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.14427860696517414, |
|
"grad_norm": 0.6213994340053169, |
|
"learning_rate": 3.672131147540984e-05, |
|
"loss": 0.5593, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 0.5255618873475716, |
|
"learning_rate": 3.8032786885245905e-05, |
|
"loss": 0.5306, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15422885572139303, |
|
"grad_norm": 0.5618938647818913, |
|
"learning_rate": 3.934426229508197e-05, |
|
"loss": 0.5481, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.15920398009950248, |
|
"grad_norm": 0.5382661117044717, |
|
"learning_rate": 4.0655737704918036e-05, |
|
"loss": 0.5407, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.16417910447761194, |
|
"grad_norm": 0.9651676443670698, |
|
"learning_rate": 4.19672131147541e-05, |
|
"loss": 0.5585, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1691542288557214, |
|
"grad_norm": 0.564225092510184, |
|
"learning_rate": 4.3278688524590174e-05, |
|
"loss": 0.5189, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.17412935323383086, |
|
"grad_norm": 0.5321277446853472, |
|
"learning_rate": 4.4590163934426226e-05, |
|
"loss": 0.5459, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1791044776119403, |
|
"grad_norm": 0.4991846369713298, |
|
"learning_rate": 4.59016393442623e-05, |
|
"loss": 0.5076, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.18407960199004975, |
|
"grad_norm": 0.49071480532725875, |
|
"learning_rate": 4.7213114754098365e-05, |
|
"loss": 0.5024, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1890547263681592, |
|
"grad_norm": 0.5943409512367948, |
|
"learning_rate": 4.852459016393443e-05, |
|
"loss": 0.5315, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.19402985074626866, |
|
"grad_norm": 0.5707557126537657, |
|
"learning_rate": 4.9836065573770496e-05, |
|
"loss": 0.538, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.19900497512437812, |
|
"grad_norm": 0.7926233950361419, |
|
"learning_rate": 5.114754098360657e-05, |
|
"loss": 0.5621, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.20398009950248755, |
|
"grad_norm": 0.5495160602179542, |
|
"learning_rate": 5.245901639344262e-05, |
|
"loss": 0.5414, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.208955223880597, |
|
"grad_norm": 0.5640801102119853, |
|
"learning_rate": 5.377049180327869e-05, |
|
"loss": 0.5011, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.21393034825870647, |
|
"grad_norm": 0.5415477216078182, |
|
"learning_rate": 5.508196721311476e-05, |
|
"loss": 0.5605, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.21890547263681592, |
|
"grad_norm": 0.5588862009803612, |
|
"learning_rate": 5.6393442622950824e-05, |
|
"loss": 0.5437, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 0.5029511965106807, |
|
"learning_rate": 5.770491803278689e-05, |
|
"loss": 0.5473, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.22885572139303484, |
|
"grad_norm": 0.5258790964390814, |
|
"learning_rate": 5.9016393442622956e-05, |
|
"loss": 0.5575, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.23383084577114427, |
|
"grad_norm": 0.5480016416626384, |
|
"learning_rate": 6.0327868852459015e-05, |
|
"loss": 0.5421, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.23880597014925373, |
|
"grad_norm": 0.5588192051221643, |
|
"learning_rate": 6.163934426229509e-05, |
|
"loss": 0.5605, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.24378109452736318, |
|
"grad_norm": 0.5600403065536617, |
|
"learning_rate": 6.295081967213115e-05, |
|
"loss": 0.5265, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.24875621890547264, |
|
"grad_norm": 0.6060663481562588, |
|
"learning_rate": 6.426229508196722e-05, |
|
"loss": 0.5631, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2537313432835821, |
|
"grad_norm": 0.7022502664226856, |
|
"learning_rate": 6.557377049180328e-05, |
|
"loss": 0.533, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.25870646766169153, |
|
"grad_norm": 0.6780424557118481, |
|
"learning_rate": 6.688524590163935e-05, |
|
"loss": 0.5366, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.263681592039801, |
|
"grad_norm": 0.5669957328738275, |
|
"learning_rate": 6.819672131147542e-05, |
|
"loss": 0.552, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.26865671641791045, |
|
"grad_norm": 0.5914734306512129, |
|
"learning_rate": 6.950819672131148e-05, |
|
"loss": 0.5389, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2736318407960199, |
|
"grad_norm": 0.6149348037977527, |
|
"learning_rate": 7.081967213114755e-05, |
|
"loss": 0.5274, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.27860696517412936, |
|
"grad_norm": 0.6245100364124376, |
|
"learning_rate": 7.213114754098361e-05, |
|
"loss": 0.5659, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2835820895522388, |
|
"grad_norm": 0.6004973149821893, |
|
"learning_rate": 7.344262295081968e-05, |
|
"loss": 0.5358, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.2885572139303483, |
|
"grad_norm": 0.5997254911753757, |
|
"learning_rate": 7.475409836065574e-05, |
|
"loss": 0.5645, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2935323383084577, |
|
"grad_norm": 0.6246002314997682, |
|
"learning_rate": 7.606557377049181e-05, |
|
"loss": 0.5663, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 0.5406788999040716, |
|
"learning_rate": 7.737704918032788e-05, |
|
"loss": 0.5453, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3034825870646766, |
|
"grad_norm": 0.5816580903983424, |
|
"learning_rate": 7.868852459016394e-05, |
|
"loss": 0.5111, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.30845771144278605, |
|
"grad_norm": 0.5816022558938057, |
|
"learning_rate": 8e-05, |
|
"loss": 0.5173, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.31343283582089554, |
|
"grad_norm": 0.5504439572021722, |
|
"learning_rate": 7.99993280608401e-05, |
|
"loss": 0.5334, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.31840796019900497, |
|
"grad_norm": 0.5838277491059845, |
|
"learning_rate": 7.999731226593547e-05, |
|
"loss": 0.5172, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.32338308457711445, |
|
"grad_norm": 0.6579416139688873, |
|
"learning_rate": 7.999395268301069e-05, |
|
"loss": 0.5475, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3283582089552239, |
|
"grad_norm": 0.5320888799910365, |
|
"learning_rate": 7.998924942493754e-05, |
|
"loss": 0.5102, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.5281701415578218, |
|
"learning_rate": 7.99832026497312e-05, |
|
"loss": 0.5536, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3383084577114428, |
|
"grad_norm": 0.5127833977720212, |
|
"learning_rate": 7.997581256054488e-05, |
|
"loss": 0.5621, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.34328358208955223, |
|
"grad_norm": 0.5167985708679069, |
|
"learning_rate": 7.996707940566312e-05, |
|
"loss": 0.5678, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3482587064676617, |
|
"grad_norm": 0.5313575574415697, |
|
"learning_rate": 7.995700347849337e-05, |
|
"loss": 0.5792, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.35323383084577115, |
|
"grad_norm": 0.46516112663404313, |
|
"learning_rate": 7.994558511755611e-05, |
|
"loss": 0.5333, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3582089552238806, |
|
"grad_norm": 0.5127434932833409, |
|
"learning_rate": 7.993282470647356e-05, |
|
"loss": 0.5381, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.36318407960199006, |
|
"grad_norm": 0.4611747374859348, |
|
"learning_rate": 7.991872267395666e-05, |
|
"loss": 0.5297, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3681592039800995, |
|
"grad_norm": 0.48491223979662357, |
|
"learning_rate": 7.990327949379087e-05, |
|
"loss": 0.5496, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 0.44507811144929044, |
|
"learning_rate": 7.988649568482003e-05, |
|
"loss": 0.5613, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3781094527363184, |
|
"grad_norm": 0.5065099749041349, |
|
"learning_rate": 7.986837181092907e-05, |
|
"loss": 0.545, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.38308457711442784, |
|
"grad_norm": 0.4496572705412145, |
|
"learning_rate": 7.984890848102501e-05, |
|
"loss": 0.5625, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3880597014925373, |
|
"grad_norm": 0.49181167685096244, |
|
"learning_rate": 7.982810634901654e-05, |
|
"loss": 0.5287, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.39303482587064675, |
|
"grad_norm": 0.5095805376917029, |
|
"learning_rate": 7.980596611379202e-05, |
|
"loss": 0.5218, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.39800995024875624, |
|
"grad_norm": 0.4524659588566981, |
|
"learning_rate": 7.9782488519196e-05, |
|
"loss": 0.5421, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.40298507462686567, |
|
"grad_norm": 0.48114599310151146, |
|
"learning_rate": 7.975767435400424e-05, |
|
"loss": 0.5322, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4079601990049751, |
|
"grad_norm": 0.4455277267645172, |
|
"learning_rate": 7.973152445189719e-05, |
|
"loss": 0.5847, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4129353233830846, |
|
"grad_norm": 0.4788117510507055, |
|
"learning_rate": 7.970403969143203e-05, |
|
"loss": 0.5616, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.417910447761194, |
|
"grad_norm": 0.4606436249359568, |
|
"learning_rate": 7.967522099601309e-05, |
|
"loss": 0.5371, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4228855721393035, |
|
"grad_norm": 0.44668898675104635, |
|
"learning_rate": 7.964506933386088e-05, |
|
"loss": 0.5291, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.42786069651741293, |
|
"grad_norm": 0.4502604139613404, |
|
"learning_rate": 7.961358571797953e-05, |
|
"loss": 0.5324, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.43283582089552236, |
|
"grad_norm": 0.43560491156054876, |
|
"learning_rate": 7.958077120612275e-05, |
|
"loss": 0.5245, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.43781094527363185, |
|
"grad_norm": 0.4842024445653769, |
|
"learning_rate": 7.95466269007583e-05, |
|
"loss": 0.5513, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4427860696517413, |
|
"grad_norm": 0.42614132833617197, |
|
"learning_rate": 7.9511153949031e-05, |
|
"loss": 0.5451, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 0.4891312472692003, |
|
"learning_rate": 7.947435354272414e-05, |
|
"loss": 0.5263, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4527363184079602, |
|
"grad_norm": 0.4545172494931765, |
|
"learning_rate": 7.943622691821938e-05, |
|
"loss": 0.5396, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.4577114427860697, |
|
"grad_norm": 0.448529204030658, |
|
"learning_rate": 7.939677535645533e-05, |
|
"loss": 0.5264, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.4626865671641791, |
|
"grad_norm": 0.4516516546653182, |
|
"learning_rate": 7.935600018288447e-05, |
|
"loss": 0.5456, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.46766169154228854, |
|
"grad_norm": 0.46358875853260084, |
|
"learning_rate": 7.931390276742859e-05, |
|
"loss": 0.5075, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.472636815920398, |
|
"grad_norm": 0.4388895352432698, |
|
"learning_rate": 7.927048452443279e-05, |
|
"loss": 0.5121, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.47761194029850745, |
|
"grad_norm": 0.44989392045392973, |
|
"learning_rate": 7.922574691261794e-05, |
|
"loss": 0.556, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.48258706467661694, |
|
"grad_norm": 0.4132769512058111, |
|
"learning_rate": 7.917969143503172e-05, |
|
"loss": 0.5201, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.48756218905472637, |
|
"grad_norm": 0.4748955194225827, |
|
"learning_rate": 7.913231963899806e-05, |
|
"loss": 0.5548, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.4925373134328358, |
|
"grad_norm": 0.40947989720005395, |
|
"learning_rate": 7.908363311606525e-05, |
|
"loss": 0.5409, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4975124378109453, |
|
"grad_norm": 0.38003180467688796, |
|
"learning_rate": 7.903363350195229e-05, |
|
"loss": 0.5125, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5024875621890548, |
|
"grad_norm": 0.6902176948042787, |
|
"learning_rate": 7.898232247649414e-05, |
|
"loss": 0.5169, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5074626865671642, |
|
"grad_norm": 0.4242062359610916, |
|
"learning_rate": 7.892970176358519e-05, |
|
"loss": 0.5112, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5124378109452736, |
|
"grad_norm": 0.44210587009357805, |
|
"learning_rate": 7.887577313112129e-05, |
|
"loss": 0.5478, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5174129353233831, |
|
"grad_norm": 0.46468109194219087, |
|
"learning_rate": 7.882053839094045e-05, |
|
"loss": 0.5222, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 0.4130922336671526, |
|
"learning_rate": 7.876399939876194e-05, |
|
"loss": 0.5369, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.527363184079602, |
|
"grad_norm": 0.42770523171556496, |
|
"learning_rate": 7.870615805412387e-05, |
|
"loss": 0.5249, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5323383084577115, |
|
"grad_norm": 0.42829673493515213, |
|
"learning_rate": 7.864701630031949e-05, |
|
"loss": 0.5256, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5373134328358209, |
|
"grad_norm": 0.4577850551423171, |
|
"learning_rate": 7.858657612433179e-05, |
|
"loss": 0.5017, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5422885572139303, |
|
"grad_norm": 0.4580208652431875, |
|
"learning_rate": 7.852483955676685e-05, |
|
"loss": 0.5385, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5472636815920398, |
|
"grad_norm": 0.4521081619187588, |
|
"learning_rate": 7.846180867178553e-05, |
|
"loss": 0.519, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5522388059701493, |
|
"grad_norm": 0.43746850745160454, |
|
"learning_rate": 7.839748558703383e-05, |
|
"loss": 0.5417, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5572139303482587, |
|
"grad_norm": 0.4438955801038043, |
|
"learning_rate": 7.833187246357172e-05, |
|
"loss": 0.5187, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5621890547263682, |
|
"grad_norm": 0.4286197509144701, |
|
"learning_rate": 7.826497150580055e-05, |
|
"loss": 0.5142, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5671641791044776, |
|
"grad_norm": 0.39145027844847796, |
|
"learning_rate": 7.8196784961389e-05, |
|
"loss": 0.5319, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.572139303482587, |
|
"grad_norm": 0.4116110135547263, |
|
"learning_rate": 7.812731512119753e-05, |
|
"loss": 0.5017, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5771144278606966, |
|
"grad_norm": 0.4192891992656687, |
|
"learning_rate": 7.805656431920143e-05, |
|
"loss": 0.5471, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.582089552238806, |
|
"grad_norm": 0.41450926014695316, |
|
"learning_rate": 7.798453493241246e-05, |
|
"loss": 0.5198, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5870646766169154, |
|
"grad_norm": 0.4540241153108466, |
|
"learning_rate": 7.791122938079887e-05, |
|
"loss": 0.5848, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5920398009950248, |
|
"grad_norm": 0.39408273059441834, |
|
"learning_rate": 7.783665012720419e-05, |
|
"loss": 0.5457, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.43126153424114066, |
|
"learning_rate": 7.77607996772645e-05, |
|
"loss": 0.5576, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6019900497512438, |
|
"grad_norm": 0.4824573509496151, |
|
"learning_rate": 7.768368057932417e-05, |
|
"loss": 0.5844, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6069651741293532, |
|
"grad_norm": 0.4186425292993652, |
|
"learning_rate": 7.760529542435029e-05, |
|
"loss": 0.5697, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6119402985074627, |
|
"grad_norm": 0.4377766818907997, |
|
"learning_rate": 7.752564684584563e-05, |
|
"loss": 0.5069, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6169154228855721, |
|
"grad_norm": 0.6732048439878882, |
|
"learning_rate": 7.744473751976012e-05, |
|
"loss": 0.4964, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6218905472636815, |
|
"grad_norm": 0.5045209563187125, |
|
"learning_rate": 7.7362570164401e-05, |
|
"loss": 0.5183, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6268656716417911, |
|
"grad_norm": 0.41851717230868335, |
|
"learning_rate": 7.727914754034147e-05, |
|
"loss": 0.5332, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6318407960199005, |
|
"grad_norm": 0.47281567082753584, |
|
"learning_rate": 7.719447245032788e-05, |
|
"loss": 0.5531, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6368159203980099, |
|
"grad_norm": 0.46985505736621747, |
|
"learning_rate": 7.710854773918572e-05, |
|
"loss": 0.523, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6417910447761194, |
|
"grad_norm": 0.449788739083477, |
|
"learning_rate": 7.702137629372388e-05, |
|
"loss": 0.5323, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6467661691542289, |
|
"grad_norm": 0.4644321215634913, |
|
"learning_rate": 7.693296104263777e-05, |
|
"loss": 0.5294, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6517412935323383, |
|
"grad_norm": 0.44641643289164196, |
|
"learning_rate": 7.684330495641084e-05, |
|
"loss": 0.5301, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6567164179104478, |
|
"grad_norm": 0.47484679630977794, |
|
"learning_rate": 7.675241104721487e-05, |
|
"loss": 0.5203, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6616915422885572, |
|
"grad_norm": 0.4325510766840271, |
|
"learning_rate": 7.66602823688087e-05, |
|
"loss": 0.5337, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.6149477632116208, |
|
"learning_rate": 7.656692201643569e-05, |
|
"loss": 0.5014, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 0.42195591915373487, |
|
"learning_rate": 7.647233312671966e-05, |
|
"loss": 0.5124, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6766169154228856, |
|
"grad_norm": 0.39318505190957936, |
|
"learning_rate": 7.637651887755955e-05, |
|
"loss": 0.5356, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.681592039800995, |
|
"grad_norm": 0.42944630349072993, |
|
"learning_rate": 7.627948248802269e-05, |
|
"loss": 0.5249, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6865671641791045, |
|
"grad_norm": 0.43007219501882216, |
|
"learning_rate": 7.618122721823656e-05, |
|
"loss": 0.5722, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6915422885572139, |
|
"grad_norm": 0.3612830585663661, |
|
"learning_rate": 7.608175636927936e-05, |
|
"loss": 0.5423, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6965174129353234, |
|
"grad_norm": 0.4504348479973107, |
|
"learning_rate": 7.598107328306902e-05, |
|
"loss": 0.5418, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7014925373134329, |
|
"grad_norm": 0.5246833441288096, |
|
"learning_rate": 7.587918134225092e-05, |
|
"loss": 0.5147, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7064676616915423, |
|
"grad_norm": 0.35037174084972605, |
|
"learning_rate": 7.577608397008436e-05, |
|
"loss": 0.4877, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7114427860696517, |
|
"grad_norm": 0.3908914010701244, |
|
"learning_rate": 7.56717846303274e-05, |
|
"loss": 0.5118, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7164179104477612, |
|
"grad_norm": 0.3581238736307404, |
|
"learning_rate": 7.55662868271206e-05, |
|
"loss": 0.491, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7213930348258707, |
|
"grad_norm": 0.3894480558267158, |
|
"learning_rate": 7.545959410486918e-05, |
|
"loss": 0.5682, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7263681592039801, |
|
"grad_norm": 0.44359588797737537, |
|
"learning_rate": 7.535171004812409e-05, |
|
"loss": 0.5289, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7313432835820896, |
|
"grad_norm": 0.407030891972332, |
|
"learning_rate": 7.524263828146144e-05, |
|
"loss": 0.5101, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.736318407960199, |
|
"grad_norm": 0.4063628333515133, |
|
"learning_rate": 7.513238246936077e-05, |
|
"loss": 0.5482, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7412935323383084, |
|
"grad_norm": 0.4274106533585936, |
|
"learning_rate": 7.502094631608201e-05, |
|
"loss": 0.5191, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 0.44937783734239284, |
|
"learning_rate": 7.490833356554088e-05, |
|
"loss": 0.5359, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7512437810945274, |
|
"grad_norm": 0.4007251382417533, |
|
"learning_rate": 7.479454800118327e-05, |
|
"loss": 0.5363, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7562189054726368, |
|
"grad_norm": 0.4209673843323663, |
|
"learning_rate": 7.467959344585796e-05, |
|
"loss": 0.4941, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.7611940298507462, |
|
"grad_norm": 0.4019802174266669, |
|
"learning_rate": 7.456347376168837e-05, |
|
"loss": 0.5182, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7661691542288557, |
|
"grad_norm": 0.3857327836554883, |
|
"learning_rate": 7.44461928499426e-05, |
|
"loss": 0.4866, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7711442786069652, |
|
"grad_norm": 0.4127925959560194, |
|
"learning_rate": 7.432775465090254e-05, |
|
"loss": 0.5214, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7761194029850746, |
|
"grad_norm": 0.3886111472729733, |
|
"learning_rate": 7.420816314373139e-05, |
|
"loss": 0.4861, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7810945273631841, |
|
"grad_norm": 0.44211955181415347, |
|
"learning_rate": 7.408742234633999e-05, |
|
"loss": 0.516, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7860696517412935, |
|
"grad_norm": 0.4478134256976994, |
|
"learning_rate": 7.396553631525184e-05, |
|
"loss": 0.561, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7910447761194029, |
|
"grad_norm": 0.3705902473377113, |
|
"learning_rate": 7.38425091454668e-05, |
|
"loss": 0.5007, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7960199004975125, |
|
"grad_norm": 0.41437615203351147, |
|
"learning_rate": 7.371834497032353e-05, |
|
"loss": 0.5338, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8009950248756219, |
|
"grad_norm": 0.4054936427228974, |
|
"learning_rate": 7.35930479613606e-05, |
|
"loss": 0.5153, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8059701492537313, |
|
"grad_norm": 0.41271930343767926, |
|
"learning_rate": 7.346662232817638e-05, |
|
"loss": 0.5595, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8109452736318408, |
|
"grad_norm": 0.3944935394388094, |
|
"learning_rate": 7.333907231828755e-05, |
|
"loss": 0.5238, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.8159203980099502, |
|
"grad_norm": 0.3744177603013012, |
|
"learning_rate": 7.32104022169864e-05, |
|
"loss": 0.5171, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 0.38249406328112323, |
|
"learning_rate": 7.308061634719695e-05, |
|
"loss": 0.5476, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8258706467661692, |
|
"grad_norm": 0.44714495553689276, |
|
"learning_rate": 7.294971906932963e-05, |
|
"loss": 0.5646, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.8308457711442786, |
|
"grad_norm": 0.3799263053283722, |
|
"learning_rate": 7.281771478113474e-05, |
|
"loss": 0.5116, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.835820895522388, |
|
"grad_norm": 0.3602781959028979, |
|
"learning_rate": 7.268460791755486e-05, |
|
"loss": 0.5363, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.8407960199004975, |
|
"grad_norm": 0.3182978229069629, |
|
"learning_rate": 7.255040295057566e-05, |
|
"loss": 0.4906, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.845771144278607, |
|
"grad_norm": 0.3895900491070743, |
|
"learning_rate": 7.241510438907577e-05, |
|
"loss": 0.5105, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8507462686567164, |
|
"grad_norm": 0.40093841905363664, |
|
"learning_rate": 7.227871677867531e-05, |
|
"loss": 0.5228, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.8557213930348259, |
|
"grad_norm": 0.39214721861667695, |
|
"learning_rate": 7.214124470158308e-05, |
|
"loss": 0.5165, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.8606965174129353, |
|
"grad_norm": 0.3864483741926972, |
|
"learning_rate": 7.200269277644268e-05, |
|
"loss": 0.5512, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.8656716417910447, |
|
"grad_norm": 0.35893618552568096, |
|
"learning_rate": 7.186306565817731e-05, |
|
"loss": 0.5306, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8706467661691543, |
|
"grad_norm": 0.3821162188738986, |
|
"learning_rate": 7.172236803783342e-05, |
|
"loss": 0.5095, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8756218905472637, |
|
"grad_norm": 0.385653159510127, |
|
"learning_rate": 7.158060464242303e-05, |
|
"loss": 0.5397, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.8805970149253731, |
|
"grad_norm": 0.36865249587374227, |
|
"learning_rate": 7.1437780234765e-05, |
|
"loss": 0.5223, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.8855721393034826, |
|
"grad_norm": 0.3695872138907406, |
|
"learning_rate": 7.129389961332492e-05, |
|
"loss": 0.4981, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.8905472636815921, |
|
"grad_norm": 0.3943495182262802, |
|
"learning_rate": 7.114896761205404e-05, |
|
"loss": 0.5482, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 0.3852233271771578, |
|
"learning_rate": 7.100298910022669e-05, |
|
"loss": 0.5451, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.900497512437811, |
|
"grad_norm": 0.37235191480919677, |
|
"learning_rate": 7.085596898227677e-05, |
|
"loss": 0.511, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9054726368159204, |
|
"grad_norm": 0.4064372568102956, |
|
"learning_rate": 7.070791219763305e-05, |
|
"loss": 0.5293, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9104477611940298, |
|
"grad_norm": 0.6718930483865837, |
|
"learning_rate": 7.055882372055308e-05, |
|
"loss": 0.544, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.9154228855721394, |
|
"grad_norm": 0.38336725569872504, |
|
"learning_rate": 7.040870855995619e-05, |
|
"loss": 0.5215, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.9203980099502488, |
|
"grad_norm": 0.47174074201885974, |
|
"learning_rate": 7.025757175925508e-05, |
|
"loss": 0.5207, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9253731343283582, |
|
"grad_norm": 0.4130352554049625, |
|
"learning_rate": 7.010541839618655e-05, |
|
"loss": 0.5486, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.9303482587064676, |
|
"grad_norm": 0.3523971543268066, |
|
"learning_rate": 6.995225358264071e-05, |
|
"loss": 0.5121, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.9353233830845771, |
|
"grad_norm": 0.5715491757640165, |
|
"learning_rate": 6.979808246448938e-05, |
|
"loss": 0.5071, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.9402985074626866, |
|
"grad_norm": 0.3470649290950888, |
|
"learning_rate": 6.964291022141313e-05, |
|
"loss": 0.5044, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.945273631840796, |
|
"grad_norm": 0.3795430837969946, |
|
"learning_rate": 6.94867420667273e-05, |
|
"loss": 0.5169, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9502487562189055, |
|
"grad_norm": 0.3904017659815798, |
|
"learning_rate": 6.932958324720682e-05, |
|
"loss": 0.5762, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.9552238805970149, |
|
"grad_norm": 0.34232421212733316, |
|
"learning_rate": 6.917143904290997e-05, |
|
"loss": 0.4977, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.9601990049751243, |
|
"grad_norm": 0.3569370459819242, |
|
"learning_rate": 6.901231476700091e-05, |
|
"loss": 0.5629, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.9651741293532339, |
|
"grad_norm": 0.36974334588123947, |
|
"learning_rate": 6.885221576557127e-05, |
|
"loss": 0.5278, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 0.3407897696413556, |
|
"learning_rate": 6.869114741746046e-05, |
|
"loss": 0.5157, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.9751243781094527, |
|
"grad_norm": 0.39761535489754035, |
|
"learning_rate": 6.852911513407502e-05, |
|
"loss": 0.5617, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.9800995024875622, |
|
"grad_norm": 0.3234218432091812, |
|
"learning_rate": 6.836612435920677e-05, |
|
"loss": 0.5177, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.9850746268656716, |
|
"grad_norm": 0.4334707528741285, |
|
"learning_rate": 6.820218056884993e-05, |
|
"loss": 0.5335, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.9900497512437811, |
|
"grad_norm": 0.42261946036901893, |
|
"learning_rate": 6.803728927101712e-05, |
|
"loss": 0.483, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.9950248756218906, |
|
"grad_norm": 0.4107426334903007, |
|
"learning_rate": 6.787145600555436e-05, |
|
"loss": 0.5131, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.4088302032853929, |
|
"learning_rate": 6.770468634395491e-05, |
|
"loss": 0.5371, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.0049751243781095, |
|
"grad_norm": 0.7163194260472774, |
|
"learning_rate": 6.753698588917207e-05, |
|
"loss": 0.4837, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.0099502487562189, |
|
"grad_norm": 0.3931710998290164, |
|
"learning_rate": 6.736836027543097e-05, |
|
"loss": 0.4053, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.0149253731343284, |
|
"grad_norm": 0.3992517026461951, |
|
"learning_rate": 6.719881516803931e-05, |
|
"loss": 0.388, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.0199004975124377, |
|
"grad_norm": 0.5006496476968266, |
|
"learning_rate": 6.70283562631969e-05, |
|
"loss": 0.4471, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0248756218905473, |
|
"grad_norm": 0.457581634312986, |
|
"learning_rate": 6.685698928780442e-05, |
|
"loss": 0.4101, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.0298507462686568, |
|
"grad_norm": 0.46154762831732166, |
|
"learning_rate": 6.668471999927097e-05, |
|
"loss": 0.4053, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.0348258706467661, |
|
"grad_norm": 0.3541443217870462, |
|
"learning_rate": 6.651155418532055e-05, |
|
"loss": 0.4054, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.0398009950248757, |
|
"grad_norm": 0.4195870714919415, |
|
"learning_rate": 6.633749766379778e-05, |
|
"loss": 0.4036, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.044776119402985, |
|
"grad_norm": 0.373429921123025, |
|
"learning_rate": 6.616255628247228e-05, |
|
"loss": 0.4015, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0497512437810945, |
|
"grad_norm": 0.4020032320060145, |
|
"learning_rate": 6.59867359188423e-05, |
|
"loss": 0.3879, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.054726368159204, |
|
"grad_norm": 0.4512310371194394, |
|
"learning_rate": 6.58100424799372e-05, |
|
"loss": 0.4316, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.0597014925373134, |
|
"grad_norm": 0.3962034658409848, |
|
"learning_rate": 6.563248190211905e-05, |
|
"loss": 0.3748, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.064676616915423, |
|
"grad_norm": 0.41060872654476754, |
|
"learning_rate": 6.54540601508831e-05, |
|
"loss": 0.3892, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0696517412935322, |
|
"grad_norm": 0.4645504019709147, |
|
"learning_rate": 6.527478322065744e-05, |
|
"loss": 0.4413, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.0746268656716418, |
|
"grad_norm": 0.3836482825148122, |
|
"learning_rate": 6.509465713460157e-05, |
|
"loss": 0.4289, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.0796019900497513, |
|
"grad_norm": 0.43540736206118597, |
|
"learning_rate": 6.491368794440402e-05, |
|
"loss": 0.4056, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.0845771144278606, |
|
"grad_norm": 0.36663141189648585, |
|
"learning_rate": 6.473188173007909e-05, |
|
"loss": 0.4075, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.0895522388059702, |
|
"grad_norm": 0.40611590999810565, |
|
"learning_rate": 6.454924459976253e-05, |
|
"loss": 0.4391, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.0945273631840795, |
|
"grad_norm": 0.38113220211722076, |
|
"learning_rate": 6.436578268950632e-05, |
|
"loss": 0.3821, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.099502487562189, |
|
"grad_norm": 0.3780147945257588, |
|
"learning_rate": 6.418150216307255e-05, |
|
"loss": 0.4245, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.1044776119402986, |
|
"grad_norm": 0.3968220883846169, |
|
"learning_rate": 6.399640921172634e-05, |
|
"loss": 0.402, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.109452736318408, |
|
"grad_norm": 0.3946496719853137, |
|
"learning_rate": 6.38105100540278e-05, |
|
"loss": 0.4084, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.1144278606965174, |
|
"grad_norm": 0.4067068429286891, |
|
"learning_rate": 6.36238109356231e-05, |
|
"loss": 0.4137, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.1194029850746268, |
|
"grad_norm": 0.38859406046358386, |
|
"learning_rate": 6.343631812903472e-05, |
|
"loss": 0.3765, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1243781094527363, |
|
"grad_norm": 0.41119260232877347, |
|
"learning_rate": 6.324803793345057e-05, |
|
"loss": 0.3784, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.1293532338308458, |
|
"grad_norm": 0.35980531417493244, |
|
"learning_rate": 6.305897667451248e-05, |
|
"loss": 0.4196, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.1343283582089552, |
|
"grad_norm": 0.3974649762417786, |
|
"learning_rate": 6.286914070410365e-05, |
|
"loss": 0.4085, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.1393034825870647, |
|
"grad_norm": 0.4151287885867595, |
|
"learning_rate": 6.267853640013519e-05, |
|
"loss": 0.4123, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.144278606965174, |
|
"grad_norm": 0.5273211889278953, |
|
"learning_rate": 6.248717016633187e-05, |
|
"loss": 0.4351, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.1492537313432836, |
|
"grad_norm": 0.40383736067681525, |
|
"learning_rate": 6.229504843201705e-05, |
|
"loss": 0.4259, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.154228855721393, |
|
"grad_norm": 0.4312211448910651, |
|
"learning_rate": 6.210217765189653e-05, |
|
"loss": 0.4396, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.1592039800995024, |
|
"grad_norm": 0.35782791847054873, |
|
"learning_rate": 6.190856430584185e-05, |
|
"loss": 0.3913, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.164179104477612, |
|
"grad_norm": 0.3492891250835248, |
|
"learning_rate": 6.171421489867241e-05, |
|
"loss": 0.3891, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.1691542288557213, |
|
"grad_norm": 0.35501231620300494, |
|
"learning_rate": 6.151913595993711e-05, |
|
"loss": 0.394, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.1741293532338308, |
|
"grad_norm": 0.45101422446088074, |
|
"learning_rate": 6.132333404369488e-05, |
|
"loss": 0.3787, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.1791044776119404, |
|
"grad_norm": 0.3926264666543916, |
|
"learning_rate": 6.112681572829445e-05, |
|
"loss": 0.3693, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.1840796019900497, |
|
"grad_norm": 0.4267424524859782, |
|
"learning_rate": 6.092958761615341e-05, |
|
"loss": 0.434, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.1890547263681592, |
|
"grad_norm": 0.39553559165797253, |
|
"learning_rate": 6.073165633353636e-05, |
|
"loss": 0.4034, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.1940298507462686, |
|
"grad_norm": 0.46046958354454653, |
|
"learning_rate": 6.0533028530332297e-05, |
|
"loss": 0.3684, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.199004975124378, |
|
"grad_norm": 0.4665878755951322, |
|
"learning_rate": 6.033371087983117e-05, |
|
"loss": 0.347, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.2039800995024876, |
|
"grad_norm": 0.4354664905472878, |
|
"learning_rate": 6.013371007849972e-05, |
|
"loss": 0.4517, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.208955223880597, |
|
"grad_norm": 0.3737884656720528, |
|
"learning_rate": 5.993303284575647e-05, |
|
"loss": 0.392, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.2139303482587065, |
|
"grad_norm": 0.36266398536729494, |
|
"learning_rate": 5.9731685923745965e-05, |
|
"loss": 0.3795, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.2189054726368158, |
|
"grad_norm": 0.4034905088087893, |
|
"learning_rate": 5.95296760771123e-05, |
|
"loss": 0.4424, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.2238805970149254, |
|
"grad_norm": 0.3865962830370829, |
|
"learning_rate": 5.9327010092771796e-05, |
|
"loss": 0.4118, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.228855721393035, |
|
"grad_norm": 0.3535923688066071, |
|
"learning_rate": 5.912369477968503e-05, |
|
"loss": 0.4322, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.2338308457711442, |
|
"grad_norm": 0.3763362566595206, |
|
"learning_rate": 5.891973696862802e-05, |
|
"loss": 0.3917, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.2388059701492538, |
|
"grad_norm": 0.40384314508526564, |
|
"learning_rate": 5.8715143511962794e-05, |
|
"loss": 0.4185, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.243781094527363, |
|
"grad_norm": 0.3722033433705015, |
|
"learning_rate": 5.85099212834071e-05, |
|
"loss": 0.422, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2487562189054726, |
|
"grad_norm": 0.381393814251326, |
|
"learning_rate": 5.830407717780356e-05, |
|
"loss": 0.3946, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.2537313432835822, |
|
"grad_norm": 0.37263652080327836, |
|
"learning_rate": 5.809761811088791e-05, |
|
"loss": 0.4354, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.2587064676616915, |
|
"grad_norm": 0.3718606135863343, |
|
"learning_rate": 5.789055101905678e-05, |
|
"loss": 0.4354, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.263681592039801, |
|
"grad_norm": 0.3691803219584332, |
|
"learning_rate": 5.768288285913454e-05, |
|
"loss": 0.417, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.2686567164179103, |
|
"grad_norm": 0.36900905475761886, |
|
"learning_rate": 5.7474620608139625e-05, |
|
"loss": 0.428, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.2736318407960199, |
|
"grad_norm": 0.43160773872025243, |
|
"learning_rate": 5.726577126305017e-05, |
|
"loss": 0.3848, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.2786069651741294, |
|
"grad_norm": 0.3828488458628553, |
|
"learning_rate": 5.705634184056881e-05, |
|
"loss": 0.4016, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.2835820895522387, |
|
"grad_norm": 0.3704358136407763, |
|
"learning_rate": 5.6846339376887084e-05, |
|
"loss": 0.4076, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.2885572139303483, |
|
"grad_norm": 0.37618666448453975, |
|
"learning_rate": 5.6635770927448916e-05, |
|
"loss": 0.4147, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.2935323383084576, |
|
"grad_norm": 0.3673375645143347, |
|
"learning_rate": 5.642464356671369e-05, |
|
"loss": 0.3824, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2985074626865671, |
|
"grad_norm": 0.35963122460702884, |
|
"learning_rate": 5.6212964387918444e-05, |
|
"loss": 0.3805, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.3034825870646767, |
|
"grad_norm": 0.4027201236656122, |
|
"learning_rate": 5.6000740502839676e-05, |
|
"loss": 0.4666, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.308457711442786, |
|
"grad_norm": 0.34921070865720966, |
|
"learning_rate": 5.5787979041554336e-05, |
|
"loss": 0.4029, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.3134328358208955, |
|
"grad_norm": 0.4247139167073171, |
|
"learning_rate": 5.5574687152200294e-05, |
|
"loss": 0.43, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.3184079601990049, |
|
"grad_norm": 0.3915409782396568, |
|
"learning_rate": 5.536087200073621e-05, |
|
"loss": 0.4001, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.3233830845771144, |
|
"grad_norm": 0.3389416749484375, |
|
"learning_rate": 5.514654077070074e-05, |
|
"loss": 0.3931, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.328358208955224, |
|
"grad_norm": 0.3744985906903865, |
|
"learning_rate": 5.493170066297122e-05, |
|
"loss": 0.3997, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.3590802234283344, |
|
"learning_rate": 5.471635889552171e-05, |
|
"loss": 0.3835, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.3383084577114428, |
|
"grad_norm": 0.3698426511706334, |
|
"learning_rate": 5.450052270318054e-05, |
|
"loss": 0.4216, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.3432835820895521, |
|
"grad_norm": 0.4784044302472385, |
|
"learning_rate": 5.42841993373872e-05, |
|
"loss": 0.3705, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3482587064676617, |
|
"grad_norm": 0.3628223132460973, |
|
"learning_rate": 5.406739606594872e-05, |
|
"loss": 0.4141, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.3532338308457712, |
|
"grad_norm": 0.3320557661495897, |
|
"learning_rate": 5.3850120172795496e-05, |
|
"loss": 0.4146, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.3582089552238805, |
|
"grad_norm": 0.3626608623601277, |
|
"learning_rate": 5.36323789577366e-05, |
|
"loss": 0.4141, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.36318407960199, |
|
"grad_norm": 0.32391201189440577, |
|
"learning_rate": 5.341417973621447e-05, |
|
"loss": 0.3953, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.3681592039800994, |
|
"grad_norm": 0.3530711948464647, |
|
"learning_rate": 5.31955298390592e-05, |
|
"loss": 0.4413, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.373134328358209, |
|
"grad_norm": 0.3388143208428122, |
|
"learning_rate": 5.29764366122422e-05, |
|
"loss": 0.4158, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.3781094527363185, |
|
"grad_norm": 0.37514946524437237, |
|
"learning_rate": 5.275690741662939e-05, |
|
"loss": 0.4158, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.3830845771144278, |
|
"grad_norm": 0.33697256732820746, |
|
"learning_rate": 5.253694962773397e-05, |
|
"loss": 0.4047, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.3880597014925373, |
|
"grad_norm": 0.31922818792904584, |
|
"learning_rate": 5.2316570635468496e-05, |
|
"loss": 0.3873, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.3930348258706466, |
|
"grad_norm": 0.41734702709710425, |
|
"learning_rate": 5.209577784389673e-05, |
|
"loss": 0.4288, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3980099502487562, |
|
"grad_norm": 0.333151239021271, |
|
"learning_rate": 5.1874578670984826e-05, |
|
"loss": 0.3905, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.4029850746268657, |
|
"grad_norm": 0.36077988632299796, |
|
"learning_rate": 5.1652980548352095e-05, |
|
"loss": 0.4152, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.407960199004975, |
|
"grad_norm": 0.3596032551031165, |
|
"learning_rate": 5.143099092102136e-05, |
|
"loss": 0.4259, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.4129353233830846, |
|
"grad_norm": 0.3547815295533313, |
|
"learning_rate": 5.1208617247168784e-05, |
|
"loss": 0.3919, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.417910447761194, |
|
"grad_norm": 0.32341852061369397, |
|
"learning_rate": 5.098586699787339e-05, |
|
"loss": 0.3798, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.4228855721393034, |
|
"grad_norm": 0.3554342430636339, |
|
"learning_rate": 5.07627476568659e-05, |
|
"loss": 0.4085, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.427860696517413, |
|
"grad_norm": 0.41108995658095454, |
|
"learning_rate": 5.053926672027748e-05, |
|
"loss": 0.4191, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.4328358208955223, |
|
"grad_norm": 0.3551564879703378, |
|
"learning_rate": 5.031543169638774e-05, |
|
"loss": 0.4069, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.4378109452736318, |
|
"grad_norm": 0.34834914531770184, |
|
"learning_rate": 5.0091250105372595e-05, |
|
"loss": 0.3882, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.4427860696517412, |
|
"grad_norm": 0.3598765779758618, |
|
"learning_rate": 4.986672947905153e-05, |
|
"loss": 0.3804, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4477611940298507, |
|
"grad_norm": 0.3651869029175629, |
|
"learning_rate": 4.964187736063462e-05, |
|
"loss": 0.4418, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.4527363184079602, |
|
"grad_norm": 0.3445888312659597, |
|
"learning_rate": 4.941670130446901e-05, |
|
"loss": 0.4157, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.4577114427860698, |
|
"grad_norm": 0.35813427415119026, |
|
"learning_rate": 4.919120887578522e-05, |
|
"loss": 0.3826, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.462686567164179, |
|
"grad_norm": 0.360403051462385, |
|
"learning_rate": 4.8965407650442905e-05, |
|
"loss": 0.4299, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.4676616915422884, |
|
"grad_norm": 0.3234770381678072, |
|
"learning_rate": 4.8739305214676336e-05, |
|
"loss": 0.3831, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.472636815920398, |
|
"grad_norm": 0.3607259148342821, |
|
"learning_rate": 4.851290916483956e-05, |
|
"loss": 0.4261, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.4776119402985075, |
|
"grad_norm": 0.3258301573358321, |
|
"learning_rate": 4.828622710715115e-05, |
|
"loss": 0.378, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.482587064676617, |
|
"grad_norm": 0.3288851224568668, |
|
"learning_rate": 4.8059266657438686e-05, |
|
"loss": 0.376, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.4875621890547264, |
|
"grad_norm": 0.3562719858206355, |
|
"learning_rate": 4.7832035440882846e-05, |
|
"loss": 0.4383, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.4925373134328357, |
|
"grad_norm": 0.3426198261946137, |
|
"learning_rate": 4.760454109176128e-05, |
|
"loss": 0.3914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4975124378109452, |
|
"grad_norm": 0.36962310476561455, |
|
"learning_rate": 4.737679125319207e-05, |
|
"loss": 0.3953, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.5024875621890548, |
|
"grad_norm": 0.340170063618827, |
|
"learning_rate": 4.7148793576877e-05, |
|
"loss": 0.4037, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.5074626865671643, |
|
"grad_norm": 0.31985106818418335, |
|
"learning_rate": 4.692055572284441e-05, |
|
"loss": 0.3729, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.5124378109452736, |
|
"grad_norm": 0.35693636989536703, |
|
"learning_rate": 4.669208535919187e-05, |
|
"loss": 0.3998, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.517412935323383, |
|
"grad_norm": 0.3565216917414643, |
|
"learning_rate": 4.6463390161828625e-05, |
|
"loss": 0.4654, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.5223880597014925, |
|
"grad_norm": 0.3276618680872065, |
|
"learning_rate": 4.62344778142176e-05, |
|
"loss": 0.3864, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.527363184079602, |
|
"grad_norm": 0.3452128607731188, |
|
"learning_rate": 4.600535600711733e-05, |
|
"loss": 0.4184, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.5323383084577116, |
|
"grad_norm": 0.3170488222819703, |
|
"learning_rate": 4.5776032438323536e-05, |
|
"loss": 0.4033, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.537313432835821, |
|
"grad_norm": 0.34350065215149495, |
|
"learning_rate": 4.5546514812410537e-05, |
|
"loss": 0.4185, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.5422885572139302, |
|
"grad_norm": 0.33678222386570505, |
|
"learning_rate": 4.531681084047235e-05, |
|
"loss": 0.3838, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5472636815920398, |
|
"grad_norm": 0.34759111720203656, |
|
"learning_rate": 4.50869282398637e-05, |
|
"loss": 0.3936, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.5522388059701493, |
|
"grad_norm": 0.3211207303611902, |
|
"learning_rate": 4.4856874733940635e-05, |
|
"loss": 0.3826, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.5572139303482588, |
|
"grad_norm": 0.3612851847355618, |
|
"learning_rate": 4.462665805180115e-05, |
|
"loss": 0.404, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.5621890547263682, |
|
"grad_norm": 0.3511587780817123, |
|
"learning_rate": 4.4396285928025444e-05, |
|
"loss": 0.4102, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.5671641791044775, |
|
"grad_norm": 0.3584310431284297, |
|
"learning_rate": 4.416576610241606e-05, |
|
"loss": 0.418, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.572139303482587, |
|
"grad_norm": 0.3669767063579727, |
|
"learning_rate": 4.393510631973793e-05, |
|
"loss": 0.4267, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.5771144278606966, |
|
"grad_norm": 0.3268164381563611, |
|
"learning_rate": 4.370431432945806e-05, |
|
"loss": 0.3941, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.582089552238806, |
|
"grad_norm": 0.3508619416534592, |
|
"learning_rate": 4.347339788548526e-05, |
|
"loss": 0.3995, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.5870646766169154, |
|
"grad_norm": 0.3196058947918705, |
|
"learning_rate": 4.3242364745909607e-05, |
|
"loss": 0.3958, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.5920398009950247, |
|
"grad_norm": 0.34944455034926064, |
|
"learning_rate": 4.301122267274177e-05, |
|
"loss": 0.3747, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5970149253731343, |
|
"grad_norm": 0.3414470311001462, |
|
"learning_rate": 4.277997943165228e-05, |
|
"loss": 0.3828, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.6019900497512438, |
|
"grad_norm": 0.34300557110432434, |
|
"learning_rate": 4.2548642791710606e-05, |
|
"loss": 0.4065, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.6069651741293534, |
|
"grad_norm": 0.3315879966748181, |
|
"learning_rate": 4.23172205251241e-05, |
|
"loss": 0.3697, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.6119402985074627, |
|
"grad_norm": 0.3330694565273453, |
|
"learning_rate": 4.208572040697695e-05, |
|
"loss": 0.3949, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.616915422885572, |
|
"grad_norm": 0.3354561985198847, |
|
"learning_rate": 4.18541502149689e-05, |
|
"loss": 0.3752, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.6218905472636815, |
|
"grad_norm": 0.3320378742535769, |
|
"learning_rate": 4.162251772915396e-05, |
|
"loss": 0.4328, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.626865671641791, |
|
"grad_norm": 0.37984693931888575, |
|
"learning_rate": 4.139083073167902e-05, |
|
"loss": 0.4452, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.6318407960199006, |
|
"grad_norm": 0.3477518807148073, |
|
"learning_rate": 4.1159097006522407e-05, |
|
"loss": 0.3884, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.63681592039801, |
|
"grad_norm": 0.38331379309387703, |
|
"learning_rate": 4.092732433923236e-05, |
|
"loss": 0.4358, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.6417910447761193, |
|
"grad_norm": 0.3231078961450345, |
|
"learning_rate": 4.069552051666543e-05, |
|
"loss": 0.4228, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6467661691542288, |
|
"grad_norm": 0.34345629662186494, |
|
"learning_rate": 4.0463693326724925e-05, |
|
"loss": 0.4084, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.6517412935323383, |
|
"grad_norm": 0.32498201471285726, |
|
"learning_rate": 4.0231850558099194e-05, |
|
"loss": 0.4195, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.6567164179104479, |
|
"grad_norm": 0.33886498208601185, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3906, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.6616915422885572, |
|
"grad_norm": 0.3448618436856829, |
|
"learning_rate": 3.976814944190082e-05, |
|
"loss": 0.3951, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.422647256268607, |
|
"learning_rate": 3.9536306673275095e-05, |
|
"loss": 0.3553, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.671641791044776, |
|
"grad_norm": 0.373540380768812, |
|
"learning_rate": 3.9304479483334576e-05, |
|
"loss": 0.4274, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.6766169154228856, |
|
"grad_norm": 0.3479089624609906, |
|
"learning_rate": 3.907267566076765e-05, |
|
"loss": 0.378, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.6815920398009951, |
|
"grad_norm": 0.35053695758886855, |
|
"learning_rate": 3.884090299347761e-05, |
|
"loss": 0.424, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.6865671641791045, |
|
"grad_norm": 0.45197758226814305, |
|
"learning_rate": 3.8609169268321e-05, |
|
"loss": 0.4028, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.6915422885572138, |
|
"grad_norm": 0.3425199121501955, |
|
"learning_rate": 3.837748227084605e-05, |
|
"loss": 0.4012, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6965174129353233, |
|
"grad_norm": 0.3184778916510887, |
|
"learning_rate": 3.814584978503111e-05, |
|
"loss": 0.3941, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.7014925373134329, |
|
"grad_norm": 0.3218368519985259, |
|
"learning_rate": 3.791427959302306e-05, |
|
"loss": 0.4073, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.7064676616915424, |
|
"grad_norm": 0.35780167550588043, |
|
"learning_rate": 3.768277947487591e-05, |
|
"loss": 0.4112, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.7114427860696517, |
|
"grad_norm": 0.33984570498228783, |
|
"learning_rate": 3.7451357208289414e-05, |
|
"loss": 0.3568, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.716417910447761, |
|
"grad_norm": 0.3098884240789945, |
|
"learning_rate": 3.722002056834773e-05, |
|
"loss": 0.3805, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.7213930348258706, |
|
"grad_norm": 0.32032507227277146, |
|
"learning_rate": 3.6988777327258245e-05, |
|
"loss": 0.4034, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.7263681592039801, |
|
"grad_norm": 0.3201489816718851, |
|
"learning_rate": 3.675763525409041e-05, |
|
"loss": 0.4088, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.7313432835820897, |
|
"grad_norm": 0.31920979171836683, |
|
"learning_rate": 3.652660211451475e-05, |
|
"loss": 0.3953, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.736318407960199, |
|
"grad_norm": 0.32780892777338394, |
|
"learning_rate": 3.629568567054194e-05, |
|
"loss": 0.4089, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.7412935323383083, |
|
"grad_norm": 0.3390908050496512, |
|
"learning_rate": 3.6064893680262075e-05, |
|
"loss": 0.3932, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7462686567164178, |
|
"grad_norm": 0.31731531792600687, |
|
"learning_rate": 3.583423389758395e-05, |
|
"loss": 0.3847, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.7512437810945274, |
|
"grad_norm": 0.33714467493195, |
|
"learning_rate": 3.5603714071974576e-05, |
|
"loss": 0.3962, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.756218905472637, |
|
"grad_norm": 0.32050032403606926, |
|
"learning_rate": 3.537334194819885e-05, |
|
"loss": 0.4219, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.7611940298507462, |
|
"grad_norm": 0.34539825894953485, |
|
"learning_rate": 3.5143125266059365e-05, |
|
"loss": 0.4075, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.7661691542288556, |
|
"grad_norm": 0.3397347985270042, |
|
"learning_rate": 3.4913071760136315e-05, |
|
"loss": 0.4161, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.771144278606965, |
|
"grad_norm": 0.362703206594731, |
|
"learning_rate": 3.468318915952766e-05, |
|
"loss": 0.3847, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.7761194029850746, |
|
"grad_norm": 0.37947814193150187, |
|
"learning_rate": 3.4453485187589484e-05, |
|
"loss": 0.3733, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.7810945273631842, |
|
"grad_norm": 0.33573825765539345, |
|
"learning_rate": 3.4223967561676464e-05, |
|
"loss": 0.3959, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.7860696517412935, |
|
"grad_norm": 0.36863302025947026, |
|
"learning_rate": 3.3994643992882675e-05, |
|
"loss": 0.4115, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.7910447761194028, |
|
"grad_norm": 0.3614827656213472, |
|
"learning_rate": 3.3765522185782414e-05, |
|
"loss": 0.4063, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.7960199004975124, |
|
"grad_norm": 0.3539141617231257, |
|
"learning_rate": 3.3536609838171395e-05, |
|
"loss": 0.4119, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.800995024875622, |
|
"grad_norm": 0.36831769429972055, |
|
"learning_rate": 3.330791464080814e-05, |
|
"loss": 0.4128, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.8059701492537314, |
|
"grad_norm": 0.3215591845142348, |
|
"learning_rate": 3.307944427715561e-05, |
|
"loss": 0.3821, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.8109452736318408, |
|
"grad_norm": 0.35813676413956724, |
|
"learning_rate": 3.2851206423123015e-05, |
|
"loss": 0.3906, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.81592039800995, |
|
"grad_norm": 0.33955308336311896, |
|
"learning_rate": 3.2623208746807935e-05, |
|
"loss": 0.4149, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.8208955223880596, |
|
"grad_norm": 0.36772820484067015, |
|
"learning_rate": 3.239545890823874e-05, |
|
"loss": 0.411, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.8258706467661692, |
|
"grad_norm": 0.3669275359828165, |
|
"learning_rate": 3.216796455911716e-05, |
|
"loss": 0.3908, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.8308457711442787, |
|
"grad_norm": 0.3360940867007729, |
|
"learning_rate": 3.194073334256133e-05, |
|
"loss": 0.3939, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.835820895522388, |
|
"grad_norm": 0.3282348755853059, |
|
"learning_rate": 3.171377289284886e-05, |
|
"loss": 0.3987, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.8407960199004973, |
|
"grad_norm": 0.3257708000929639, |
|
"learning_rate": 3.148709083516046e-05, |
|
"loss": 0.3971, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.845771144278607, |
|
"grad_norm": 0.3079283959346746, |
|
"learning_rate": 3.126069478532368e-05, |
|
"loss": 0.3683, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.8507462686567164, |
|
"grad_norm": 0.3413053035869667, |
|
"learning_rate": 3.103459234955711e-05, |
|
"loss": 0.3877, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.855721393034826, |
|
"grad_norm": 0.31958058844137144, |
|
"learning_rate": 3.0808791124214784e-05, |
|
"loss": 0.3889, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.8606965174129353, |
|
"grad_norm": 0.3177636690558965, |
|
"learning_rate": 3.0583298695531e-05, |
|
"loss": 0.3778, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.8656716417910446, |
|
"grad_norm": 0.2970294606462075, |
|
"learning_rate": 3.0358122639365395e-05, |
|
"loss": 0.3879, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.8706467661691542, |
|
"grad_norm": 0.4030502322200175, |
|
"learning_rate": 3.0133270520948467e-05, |
|
"loss": 0.3966, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.8756218905472637, |
|
"grad_norm": 0.3082201392101219, |
|
"learning_rate": 2.990874989462741e-05, |
|
"loss": 0.3715, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.8805970149253732, |
|
"grad_norm": 0.35764449982425817, |
|
"learning_rate": 2.9684568303612268e-05, |
|
"loss": 0.3742, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.8855721393034826, |
|
"grad_norm": 0.3159937456034307, |
|
"learning_rate": 2.9460733279722542e-05, |
|
"loss": 0.3824, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.890547263681592, |
|
"grad_norm": 0.32331043003587234, |
|
"learning_rate": 2.9237252343134098e-05, |
|
"loss": 0.3891, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8955223880597014, |
|
"grad_norm": 0.37425531357358577, |
|
"learning_rate": 2.9014133002126623e-05, |
|
"loss": 0.4506, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.900497512437811, |
|
"grad_norm": 0.33815725777177497, |
|
"learning_rate": 2.879138275283122e-05, |
|
"loss": 0.3743, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.9054726368159205, |
|
"grad_norm": 0.31236457607876705, |
|
"learning_rate": 2.856900907897866e-05, |
|
"loss": 0.3901, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.9104477611940298, |
|
"grad_norm": 0.2890304424344374, |
|
"learning_rate": 2.834701945164793e-05, |
|
"loss": 0.3644, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.9154228855721394, |
|
"grad_norm": 0.3708398866489987, |
|
"learning_rate": 2.812542132901518e-05, |
|
"loss": 0.4168, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.9203980099502487, |
|
"grad_norm": 0.380856014045257, |
|
"learning_rate": 2.7904222156103276e-05, |
|
"loss": 0.3956, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.9253731343283582, |
|
"grad_norm": 0.3404547166372201, |
|
"learning_rate": 2.768342936453152e-05, |
|
"loss": 0.3701, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.9303482587064678, |
|
"grad_norm": 0.3281747118440842, |
|
"learning_rate": 2.7463050372266055e-05, |
|
"loss": 0.3932, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.935323383084577, |
|
"grad_norm": 0.33072472038014866, |
|
"learning_rate": 2.7243092583370613e-05, |
|
"loss": 0.4122, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.9402985074626866, |
|
"grad_norm": 0.30399878417994697, |
|
"learning_rate": 2.7023563387757814e-05, |
|
"loss": 0.3623, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.945273631840796, |
|
"grad_norm": 0.2978112335333305, |
|
"learning_rate": 2.6804470160940816e-05, |
|
"loss": 0.3543, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.9502487562189055, |
|
"grad_norm": 0.3160634155222761, |
|
"learning_rate": 2.6585820263785545e-05, |
|
"loss": 0.3783, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.955223880597015, |
|
"grad_norm": 0.33157208172517716, |
|
"learning_rate": 2.6367621042263406e-05, |
|
"loss": 0.4121, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.9601990049751243, |
|
"grad_norm": 0.4402080931333875, |
|
"learning_rate": 2.6149879827204513e-05, |
|
"loss": 0.4042, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.9651741293532339, |
|
"grad_norm": 0.3002330447756732, |
|
"learning_rate": 2.5932603934051296e-05, |
|
"loss": 0.3928, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.9701492537313432, |
|
"grad_norm": 0.2943502159806271, |
|
"learning_rate": 2.5715800662612816e-05, |
|
"loss": 0.4044, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.9751243781094527, |
|
"grad_norm": 0.33490942595123435, |
|
"learning_rate": 2.5499477296819473e-05, |
|
"loss": 0.3932, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.9800995024875623, |
|
"grad_norm": 0.33880593496795247, |
|
"learning_rate": 2.5283641104478304e-05, |
|
"loss": 0.352, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.9850746268656716, |
|
"grad_norm": 0.33544144575729795, |
|
"learning_rate": 2.5068299337028795e-05, |
|
"loss": 0.3702, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.9900497512437811, |
|
"grad_norm": 0.29325661298374756, |
|
"learning_rate": 2.485345922929927e-05, |
|
"loss": 0.3901, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9950248756218905, |
|
"grad_norm": 0.3214857721910235, |
|
"learning_rate": 2.4639127999263802e-05, |
|
"loss": 0.3943, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.33576678494810924, |
|
"learning_rate": 2.4425312847799713e-05, |
|
"loss": 0.3413, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.0049751243781095, |
|
"grad_norm": 0.5918142779151857, |
|
"learning_rate": 2.4212020958445674e-05, |
|
"loss": 0.2562, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.009950248756219, |
|
"grad_norm": 0.41209456448663645, |
|
"learning_rate": 2.3999259497160337e-05, |
|
"loss": 0.2559, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.014925373134328, |
|
"grad_norm": 0.6187901280392113, |
|
"learning_rate": 2.3787035612081573e-05, |
|
"loss": 0.2831, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.0199004975124377, |
|
"grad_norm": 0.48881789876251663, |
|
"learning_rate": 2.3575356433286336e-05, |
|
"loss": 0.2618, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.0248756218905473, |
|
"grad_norm": 0.4081982645902415, |
|
"learning_rate": 2.3364229072551084e-05, |
|
"loss": 0.2762, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.029850746268657, |
|
"grad_norm": 0.5200105687768247, |
|
"learning_rate": 2.3153660623112922e-05, |
|
"loss": 0.2592, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.0348258706467663, |
|
"grad_norm": 0.442494043835259, |
|
"learning_rate": 2.2943658159431195e-05, |
|
"loss": 0.2754, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.0398009950248754, |
|
"grad_norm": 0.41218650620030506, |
|
"learning_rate": 2.273422873694984e-05, |
|
"loss": 0.2182, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.044776119402985, |
|
"grad_norm": 0.45136550018816846, |
|
"learning_rate": 2.2525379391860378e-05, |
|
"loss": 0.2721, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.0497512437810945, |
|
"grad_norm": 0.5992719898523112, |
|
"learning_rate": 2.2317117140865475e-05, |
|
"loss": 0.2294, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.054726368159204, |
|
"grad_norm": 0.3645065043740578, |
|
"learning_rate": 2.2109448980943222e-05, |
|
"loss": 0.2439, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.0597014925373136, |
|
"grad_norm": 0.3817582428926202, |
|
"learning_rate": 2.1902381889112094e-05, |
|
"loss": 0.255, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.0646766169154227, |
|
"grad_norm": 0.3584286645671438, |
|
"learning_rate": 2.1695922822196454e-05, |
|
"loss": 0.2364, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.0696517412935322, |
|
"grad_norm": 0.4669254285695078, |
|
"learning_rate": 2.149007871659291e-05, |
|
"loss": 0.2812, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.074626865671642, |
|
"grad_norm": 0.7251931898787578, |
|
"learning_rate": 2.1284856488037223e-05, |
|
"loss": 0.1954, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.0796019900497513, |
|
"grad_norm": 0.4011018327789735, |
|
"learning_rate": 2.1080263031372e-05, |
|
"loss": 0.2611, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.084577114427861, |
|
"grad_norm": 0.3817812517359356, |
|
"learning_rate": 2.0876305220315e-05, |
|
"loss": 0.2573, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.08955223880597, |
|
"grad_norm": 0.37743477467908, |
|
"learning_rate": 2.0672989907228214e-05, |
|
"loss": 0.2558, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.0945273631840795, |
|
"grad_norm": 0.38153865146108384, |
|
"learning_rate": 2.047032392288772e-05, |
|
"loss": 0.2383, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.099502487562189, |
|
"grad_norm": 0.38606128931772565, |
|
"learning_rate": 2.0268314076254055e-05, |
|
"loss": 0.2283, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.1044776119402986, |
|
"grad_norm": 0.3446456674752202, |
|
"learning_rate": 2.0066967154243557e-05, |
|
"loss": 0.2512, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.109452736318408, |
|
"grad_norm": 0.4013318980024715, |
|
"learning_rate": 1.9866289921500303e-05, |
|
"loss": 0.2622, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.1144278606965172, |
|
"grad_norm": 0.3737939416328244, |
|
"learning_rate": 1.966628912016884e-05, |
|
"loss": 0.2497, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.1194029850746268, |
|
"grad_norm": 0.4009685350663624, |
|
"learning_rate": 1.946697146966772e-05, |
|
"loss": 0.2609, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.1243781094527363, |
|
"grad_norm": 0.3642686072097498, |
|
"learning_rate": 1.9268343666463657e-05, |
|
"loss": 0.2662, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.129353233830846, |
|
"grad_norm": 0.3799997362643185, |
|
"learning_rate": 1.907041238384661e-05, |
|
"loss": 0.2543, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.1343283582089554, |
|
"grad_norm": 0.3719896858231849, |
|
"learning_rate": 1.887318427170556e-05, |
|
"loss": 0.2665, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.1393034825870645, |
|
"grad_norm": 0.3683517131634379, |
|
"learning_rate": 1.8676665956305132e-05, |
|
"loss": 0.2643, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.144278606965174, |
|
"grad_norm": 0.3653894139897945, |
|
"learning_rate": 1.84808640400629e-05, |
|
"loss": 0.2441, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 2.1492537313432836, |
|
"grad_norm": 0.355426607510642, |
|
"learning_rate": 1.8285785101327613e-05, |
|
"loss": 0.2506, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.154228855721393, |
|
"grad_norm": 0.36584437796341124, |
|
"learning_rate": 1.8091435694158174e-05, |
|
"loss": 0.2539, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 2.1592039800995027, |
|
"grad_norm": 0.37592267739456214, |
|
"learning_rate": 1.789782234810348e-05, |
|
"loss": 0.2493, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.1641791044776117, |
|
"grad_norm": 0.35589449226988645, |
|
"learning_rate": 1.7704951567982967e-05, |
|
"loss": 0.2498, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.1691542288557213, |
|
"grad_norm": 0.3763201555098831, |
|
"learning_rate": 1.751282983366814e-05, |
|
"loss": 0.2802, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.174129353233831, |
|
"grad_norm": 0.3397712514365849, |
|
"learning_rate": 1.7321463599864836e-05, |
|
"loss": 0.2503, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.1791044776119404, |
|
"grad_norm": 0.3719290616698573, |
|
"learning_rate": 1.713085929589635e-05, |
|
"loss": 0.2404, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.18407960199005, |
|
"grad_norm": 0.3579620248101577, |
|
"learning_rate": 1.6941023325487516e-05, |
|
"loss": 0.2529, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.189054726368159, |
|
"grad_norm": 0.3378961286180641, |
|
"learning_rate": 1.6751962066549445e-05, |
|
"loss": 0.2299, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.1940298507462686, |
|
"grad_norm": 0.3409511987760758, |
|
"learning_rate": 1.65636818709653e-05, |
|
"loss": 0.2592, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.199004975124378, |
|
"grad_norm": 0.34209632040341525, |
|
"learning_rate": 1.63761890643769e-05, |
|
"loss": 0.2501, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.2039800995024876, |
|
"grad_norm": 0.3570923790333368, |
|
"learning_rate": 1.6189489945972218e-05, |
|
"loss": 0.2384, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 2.208955223880597, |
|
"grad_norm": 0.36512054797968035, |
|
"learning_rate": 1.6003590788273672e-05, |
|
"loss": 0.2561, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.2139303482587063, |
|
"grad_norm": 0.35591678638553415, |
|
"learning_rate": 1.5818497836927464e-05, |
|
"loss": 0.2302, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.218905472636816, |
|
"grad_norm": 0.3753879431710149, |
|
"learning_rate": 1.56342173104937e-05, |
|
"loss": 0.2632, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.2238805970149254, |
|
"grad_norm": 0.333664977680914, |
|
"learning_rate": 1.545075540023748e-05, |
|
"loss": 0.2477, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 2.228855721393035, |
|
"grad_norm": 0.3403946010339331, |
|
"learning_rate": 1.5268118269920913e-05, |
|
"loss": 0.2723, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.2338308457711444, |
|
"grad_norm": 0.4548760797070764, |
|
"learning_rate": 1.5086312055595986e-05, |
|
"loss": 0.2747, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 2.2388059701492535, |
|
"grad_norm": 0.35301398479196083, |
|
"learning_rate": 1.4905342865398447e-05, |
|
"loss": 0.234, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.243781094527363, |
|
"grad_norm": 0.34038959592803697, |
|
"learning_rate": 1.4725216779342563e-05, |
|
"loss": 0.2026, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.2487562189054726, |
|
"grad_norm": 0.34067462785209285, |
|
"learning_rate": 1.4545939849116905e-05, |
|
"loss": 0.2596, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 2.253731343283582, |
|
"grad_norm": 0.3466734614903421, |
|
"learning_rate": 1.4367518097880959e-05, |
|
"loss": 0.2529, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 2.2587064676616917, |
|
"grad_norm": 0.40268957982538645, |
|
"learning_rate": 1.4189957520062802e-05, |
|
"loss": 0.2562, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 2.2636815920398012, |
|
"grad_norm": 0.7602394175299795, |
|
"learning_rate": 1.4013264081157716e-05, |
|
"loss": 0.2388, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.2686567164179103, |
|
"grad_norm": 0.33285450157056085, |
|
"learning_rate": 1.3837443717527723e-05, |
|
"loss": 0.2416, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 2.27363184079602, |
|
"grad_norm": 0.34514962123726534, |
|
"learning_rate": 1.3662502336202227e-05, |
|
"loss": 0.2509, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 2.2786069651741294, |
|
"grad_norm": 0.40102532759600523, |
|
"learning_rate": 1.3488445814679456e-05, |
|
"loss": 0.2497, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 2.283582089552239, |
|
"grad_norm": 0.36204012456455475, |
|
"learning_rate": 1.331528000072905e-05, |
|
"loss": 0.2477, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 2.288557213930348, |
|
"grad_norm": 0.3738732602116117, |
|
"learning_rate": 1.314301071219557e-05, |
|
"loss": 0.2479, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.2935323383084576, |
|
"grad_norm": 0.40091238289977654, |
|
"learning_rate": 1.2971643736803099e-05, |
|
"loss": 0.2878, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 2.298507462686567, |
|
"grad_norm": 0.35336164095784656, |
|
"learning_rate": 1.2801184831960697e-05, |
|
"loss": 0.2499, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 2.3034825870646767, |
|
"grad_norm": 0.35065141906546554, |
|
"learning_rate": 1.2631639724569027e-05, |
|
"loss": 0.2625, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 2.308457711442786, |
|
"grad_norm": 0.37049415729179436, |
|
"learning_rate": 1.2463014110827945e-05, |
|
"loss": 0.2576, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.3134328358208958, |
|
"grad_norm": 0.40666661148958544, |
|
"learning_rate": 1.2295313656045096e-05, |
|
"loss": 0.2806, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.318407960199005, |
|
"grad_norm": 0.3600659090131196, |
|
"learning_rate": 1.2128543994445639e-05, |
|
"loss": 0.2494, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 2.3233830845771144, |
|
"grad_norm": 0.3415014141826654, |
|
"learning_rate": 1.1962710728982882e-05, |
|
"loss": 0.2302, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 2.328358208955224, |
|
"grad_norm": 0.3511085800170491, |
|
"learning_rate": 1.1797819431150078e-05, |
|
"loss": 0.2418, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.36088577119554327, |
|
"learning_rate": 1.163387564079323e-05, |
|
"loss": 0.2311, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 2.3383084577114426, |
|
"grad_norm": 0.3475970107617478, |
|
"learning_rate": 1.1470884865924986e-05, |
|
"loss": 0.2542, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.343283582089552, |
|
"grad_norm": 0.3371040148205862, |
|
"learning_rate": 1.1308852582539549e-05, |
|
"loss": 0.2398, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 2.3482587064676617, |
|
"grad_norm": 0.3559672069242172, |
|
"learning_rate": 1.1147784234428748e-05, |
|
"loss": 0.2471, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 2.353233830845771, |
|
"grad_norm": 0.34758764543619075, |
|
"learning_rate": 1.0987685232999094e-05, |
|
"loss": 0.2567, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 2.3582089552238807, |
|
"grad_norm": 0.3586333567415116, |
|
"learning_rate": 1.082856095709004e-05, |
|
"loss": 0.2271, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 2.3631840796019903, |
|
"grad_norm": 0.3500617888267376, |
|
"learning_rate": 1.0670416752793184e-05, |
|
"loss": 0.2193, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.3681592039800994, |
|
"grad_norm": 0.32469351418788767, |
|
"learning_rate": 1.0513257933272713e-05, |
|
"loss": 0.2426, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 2.373134328358209, |
|
"grad_norm": 0.33387986576032047, |
|
"learning_rate": 1.0357089778586892e-05, |
|
"loss": 0.253, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 2.3781094527363185, |
|
"grad_norm": 0.35603804698804176, |
|
"learning_rate": 1.0201917535510634e-05, |
|
"loss": 0.2481, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 2.383084577114428, |
|
"grad_norm": 0.3540990244834932, |
|
"learning_rate": 1.0047746417359306e-05, |
|
"loss": 0.2683, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 2.388059701492537, |
|
"grad_norm": 0.32820898731863224, |
|
"learning_rate": 9.894581603813464e-06, |
|
"loss": 0.2353, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.3930348258706466, |
|
"grad_norm": 0.3815756199076165, |
|
"learning_rate": 9.74242824074493e-06, |
|
"loss": 0.2407, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.398009950248756, |
|
"grad_norm": 0.3560064670515033, |
|
"learning_rate": 9.591291440043826e-06, |
|
"loss": 0.2551, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 2.4029850746268657, |
|
"grad_norm": 0.36110329445178707, |
|
"learning_rate": 9.441176279446931e-06, |
|
"loss": 0.2602, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.4079601990049753, |
|
"grad_norm": 0.3750715483874539, |
|
"learning_rate": 9.292087802366972e-06, |
|
"loss": 0.2626, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.412935323383085, |
|
"grad_norm": 0.33138719559693736, |
|
"learning_rate": 9.144031017723249e-06, |
|
"loss": 0.2442, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.417910447761194, |
|
"grad_norm": 0.33379541759464765, |
|
"learning_rate": 8.997010899773345e-06, |
|
"loss": 0.2269, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 2.4228855721393034, |
|
"grad_norm": 0.3369139501102586, |
|
"learning_rate": 8.85103238794597e-06, |
|
"loss": 0.2491, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 2.427860696517413, |
|
"grad_norm": 0.35526144153013156, |
|
"learning_rate": 8.706100386675077e-06, |
|
"loss": 0.2565, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.4328358208955225, |
|
"grad_norm": 0.3598570097724235, |
|
"learning_rate": 8.562219765235017e-06, |
|
"loss": 0.2354, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 2.4378109452736316, |
|
"grad_norm": 0.33228280679960287, |
|
"learning_rate": 8.419395357576982e-06, |
|
"loss": 0.2396, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.442786069651741, |
|
"grad_norm": 0.4418521047066077, |
|
"learning_rate": 8.27763196216659e-06, |
|
"loss": 0.2544, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 2.4477611940298507, |
|
"grad_norm": 0.358004162374617, |
|
"learning_rate": 8.136934341822695e-06, |
|
"loss": 0.2583, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.4527363184079602, |
|
"grad_norm": 0.44898354057719714, |
|
"learning_rate": 7.997307223557338e-06, |
|
"loss": 0.26, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 2.45771144278607, |
|
"grad_norm": 0.41898718849713473, |
|
"learning_rate": 7.858755298416936e-06, |
|
"loss": 0.2522, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 2.4626865671641793, |
|
"grad_norm": 0.3683253603248198, |
|
"learning_rate": 7.721283221324705e-06, |
|
"loss": 0.2501, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.4676616915422884, |
|
"grad_norm": 0.33790174069212986, |
|
"learning_rate": 7.584895610924232e-06, |
|
"loss": 0.2538, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.472636815920398, |
|
"grad_norm": 0.3392175147909702, |
|
"learning_rate": 7.449597049424357e-06, |
|
"loss": 0.2553, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 2.4776119402985075, |
|
"grad_norm": 0.33260011750236457, |
|
"learning_rate": 7.3153920824451516e-06, |
|
"loss": 0.2258, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 2.482587064676617, |
|
"grad_norm": 0.4565479048485011, |
|
"learning_rate": 7.182285218865264e-06, |
|
"loss": 0.2721, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 2.487562189054726, |
|
"grad_norm": 0.3321494451833214, |
|
"learning_rate": 7.050280930670381e-06, |
|
"loss": 0.255, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4925373134328357, |
|
"grad_norm": 0.34763473535410505, |
|
"learning_rate": 6.919383652803051e-06, |
|
"loss": 0.2746, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.4975124378109452, |
|
"grad_norm": 0.3135535144069993, |
|
"learning_rate": 6.78959778301361e-06, |
|
"loss": 0.2314, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.5024875621890548, |
|
"grad_norm": 0.3516491625832736, |
|
"learning_rate": 6.660927681712475e-06, |
|
"loss": 0.2766, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 2.5074626865671643, |
|
"grad_norm": 0.33588371388636734, |
|
"learning_rate": 6.533377671823631e-06, |
|
"loss": 0.2194, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.512437810945274, |
|
"grad_norm": 0.31803554462798533, |
|
"learning_rate": 6.406952038639396e-06, |
|
"loss": 0.2298, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.517412935323383, |
|
"grad_norm": 0.3452688943981057, |
|
"learning_rate": 6.281655029676481e-06, |
|
"loss": 0.2482, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.5223880597014925, |
|
"grad_norm": 0.3443466901108766, |
|
"learning_rate": 6.157490854533215e-06, |
|
"loss": 0.2385, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 2.527363184079602, |
|
"grad_norm": 0.3318444309920713, |
|
"learning_rate": 6.034463684748178e-06, |
|
"loss": 0.2542, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 2.5323383084577116, |
|
"grad_norm": 0.3546523722653983, |
|
"learning_rate": 5.912577653660019e-06, |
|
"loss": 0.2425, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.5373134328358207, |
|
"grad_norm": 0.3359439170821228, |
|
"learning_rate": 5.79183685626862e-06, |
|
"loss": 0.2325, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.54228855721393, |
|
"grad_norm": 0.34774707626690227, |
|
"learning_rate": 5.672245349097471e-06, |
|
"loss": 0.2503, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 2.5472636815920398, |
|
"grad_norm": 0.3277173676536131, |
|
"learning_rate": 5.553807150057418e-06, |
|
"loss": 0.2342, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.5522388059701493, |
|
"grad_norm": 0.38238411821050516, |
|
"learning_rate": 5.436526238311644e-06, |
|
"loss": 0.23, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 2.557213930348259, |
|
"grad_norm": 0.336557949765213, |
|
"learning_rate": 5.320406554142037e-06, |
|
"loss": 0.242, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 2.5621890547263684, |
|
"grad_norm": 0.3759208097643772, |
|
"learning_rate": 5.2054519988167415e-06, |
|
"loss": 0.2507, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.5671641791044775, |
|
"grad_norm": 0.36209335998781095, |
|
"learning_rate": 5.091666434459121e-06, |
|
"loss": 0.2699, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.572139303482587, |
|
"grad_norm": 0.35026794234042397, |
|
"learning_rate": 4.979053683918e-06, |
|
"loss": 0.2405, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 2.5771144278606966, |
|
"grad_norm": 0.3680263693984634, |
|
"learning_rate": 4.867617530639224e-06, |
|
"loss": 0.2472, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.582089552238806, |
|
"grad_norm": 0.3519030497333725, |
|
"learning_rate": 4.757361718538569e-06, |
|
"loss": 0.2567, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 2.587064676616915, |
|
"grad_norm": 0.3364511323706219, |
|
"learning_rate": 4.648289951875917e-06, |
|
"loss": 0.2349, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.5920398009950247, |
|
"grad_norm": 0.3262056797220248, |
|
"learning_rate": 4.540405895130824e-06, |
|
"loss": 0.2561, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 2.5970149253731343, |
|
"grad_norm": 0.3580197035735468, |
|
"learning_rate": 4.433713172879417e-06, |
|
"loss": 0.2366, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.601990049751244, |
|
"grad_norm": 0.3304871040650097, |
|
"learning_rate": 4.328215369672606e-06, |
|
"loss": 0.2621, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 2.6069651741293534, |
|
"grad_norm": 0.3299851878213626, |
|
"learning_rate": 4.2239160299156536e-06, |
|
"loss": 0.2361, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 2.611940298507463, |
|
"grad_norm": 0.3537662063365746, |
|
"learning_rate": 4.1208186577490836e-06, |
|
"loss": 0.2704, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.616915422885572, |
|
"grad_norm": 0.3618498169436797, |
|
"learning_rate": 4.018926716931e-06, |
|
"loss": 0.2645, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.6218905472636815, |
|
"grad_norm": 0.49476078567714166, |
|
"learning_rate": 3.918243630720651e-06, |
|
"loss": 0.2774, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.626865671641791, |
|
"grad_norm": 0.3436946389926571, |
|
"learning_rate": 3.818772781763449e-06, |
|
"loss": 0.2311, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.6318407960199006, |
|
"grad_norm": 0.31457657442983117, |
|
"learning_rate": 3.7205175119773285e-06, |
|
"loss": 0.231, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.6368159203980097, |
|
"grad_norm": 0.33774906407685856, |
|
"learning_rate": 3.6234811224404686e-06, |
|
"loss": 0.2341, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.6417910447761193, |
|
"grad_norm": 0.33915196241962076, |
|
"learning_rate": 3.527666873280362e-06, |
|
"loss": 0.2747, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 2.646766169154229, |
|
"grad_norm": 0.7253067903486308, |
|
"learning_rate": 3.4330779835643235e-06, |
|
"loss": 0.2434, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 2.6517412935323383, |
|
"grad_norm": 0.3533314185286548, |
|
"learning_rate": 3.339717631191306e-06, |
|
"loss": 0.2463, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.656716417910448, |
|
"grad_norm": 0.3488214812675212, |
|
"learning_rate": 3.2475889527851413e-06, |
|
"loss": 0.2564, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.6616915422885574, |
|
"grad_norm": 0.37464147411382354, |
|
"learning_rate": 3.156695043589171e-06, |
|
"loss": 0.2527, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.31641458687486096, |
|
"learning_rate": 3.0670389573622406e-06, |
|
"loss": 0.2433, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.671641791044776, |
|
"grad_norm": 0.33063487766863686, |
|
"learning_rate": 2.9786237062761247e-06, |
|
"loss": 0.2596, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.6766169154228856, |
|
"grad_norm": 0.3198962608716895, |
|
"learning_rate": 2.891452260814287e-06, |
|
"loss": 0.2398, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.681592039800995, |
|
"grad_norm": 0.33045280401704574, |
|
"learning_rate": 2.805527549672129e-06, |
|
"loss": 0.2354, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.6865671641791042, |
|
"grad_norm": 0.32990177471806453, |
|
"learning_rate": 2.7208524596585496e-06, |
|
"loss": 0.255, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.691542288557214, |
|
"grad_norm": 0.3064290269911759, |
|
"learning_rate": 2.637429835599008e-06, |
|
"loss": 0.2322, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.6965174129353233, |
|
"grad_norm": 0.329629777652285, |
|
"learning_rate": 2.5552624802398905e-06, |
|
"loss": 0.2308, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.701492537313433, |
|
"grad_norm": 0.3203930929813302, |
|
"learning_rate": 2.4743531541543807e-06, |
|
"loss": 0.255, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.7064676616915424, |
|
"grad_norm": 0.3246337153430093, |
|
"learning_rate": 2.3947045756497157e-06, |
|
"loss": 0.2334, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.711442786069652, |
|
"grad_norm": 0.34574042588208914, |
|
"learning_rate": 2.3163194206758365e-06, |
|
"loss": 0.2315, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.716417910447761, |
|
"grad_norm": 0.33095949362492455, |
|
"learning_rate": 2.2392003227355064e-06, |
|
"loss": 0.2533, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.7213930348258706, |
|
"grad_norm": 0.327348254225279, |
|
"learning_rate": 2.163349872795819e-06, |
|
"loss": 0.228, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.72636815920398, |
|
"grad_norm": 0.3310532056353117, |
|
"learning_rate": 2.0887706192011505e-06, |
|
"loss": 0.2532, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.7313432835820897, |
|
"grad_norm": 0.3659658179822786, |
|
"learning_rate": 2.015465067587554e-06, |
|
"loss": 0.2526, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.7363184079601988, |
|
"grad_norm": 0.31321771077874316, |
|
"learning_rate": 1.943435680798573e-06, |
|
"loss": 0.2317, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.7412935323383083, |
|
"grad_norm": 0.35622804831329247, |
|
"learning_rate": 1.872684878802482e-06, |
|
"loss": 0.2778, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.746268656716418, |
|
"grad_norm": 0.33560958300477206, |
|
"learning_rate": 1.8032150386110103e-06, |
|
"loss": 0.2313, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.7512437810945274, |
|
"grad_norm": 0.32839333475527616, |
|
"learning_rate": 1.735028494199451e-06, |
|
"loss": 0.2675, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.756218905472637, |
|
"grad_norm": 0.32930584068523533, |
|
"learning_rate": 1.6681275364282835e-06, |
|
"loss": 0.2356, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.7611940298507465, |
|
"grad_norm": 0.343780413896504, |
|
"learning_rate": 1.6025144129661763e-06, |
|
"loss": 0.2342, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.7661691542288556, |
|
"grad_norm": 0.3676149014995963, |
|
"learning_rate": 1.5381913282144711e-06, |
|
"loss": 0.257, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.771144278606965, |
|
"grad_norm": 0.32741721241637517, |
|
"learning_rate": 1.4751604432331567e-06, |
|
"loss": 0.2393, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.7761194029850746, |
|
"grad_norm": 0.5133228461228249, |
|
"learning_rate": 1.4134238756682162e-06, |
|
"loss": 0.2468, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.781094527363184, |
|
"grad_norm": 0.6717564368152784, |
|
"learning_rate": 1.3529836996805235e-06, |
|
"loss": 0.2432, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.7860696517412933, |
|
"grad_norm": 0.3650345457576272, |
|
"learning_rate": 1.2938419458761398e-06, |
|
"loss": 0.2034, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.791044776119403, |
|
"grad_norm": 0.37879362944079575, |
|
"learning_rate": 1.23600060123807e-06, |
|
"loss": 0.2471, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.7960199004975124, |
|
"grad_norm": 0.3497471145381787, |
|
"learning_rate": 1.1794616090595422e-06, |
|
"loss": 0.249, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.800995024875622, |
|
"grad_norm": 0.3258716514081455, |
|
"learning_rate": 1.124226868878715e-06, |
|
"loss": 0.2184, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.8059701492537314, |
|
"grad_norm": 0.335029096301279, |
|
"learning_rate": 1.0702982364148195e-06, |
|
"loss": 0.2233, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.810945273631841, |
|
"grad_norm": 0.33089653722712126, |
|
"learning_rate": 1.0176775235058645e-06, |
|
"loss": 0.2127, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.81592039800995, |
|
"grad_norm": 0.36553404527889055, |
|
"learning_rate": 9.66366498047724e-07, |
|
"loss": 0.2472, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.8208955223880596, |
|
"grad_norm": 0.33513935646205256, |
|
"learning_rate": 9.163668839347672e-07, |
|
"loss": 0.2599, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.825870646766169, |
|
"grad_norm": 0.3327266292283009, |
|
"learning_rate": 8.676803610019368e-07, |
|
"loss": 0.243, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.8308457711442787, |
|
"grad_norm": 0.32072020619433717, |
|
"learning_rate": 8.203085649682863e-07, |
|
"loss": 0.2248, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.835820895522388, |
|
"grad_norm": 0.34180911322997665, |
|
"learning_rate": 7.742530873820686e-07, |
|
"loss": 0.2687, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.8407960199004973, |
|
"grad_norm": 0.32625172704997785, |
|
"learning_rate": 7.295154755672196e-07, |
|
"loss": 0.2164, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.845771144278607, |
|
"grad_norm": 0.31494320482153554, |
|
"learning_rate": 6.860972325714121e-07, |
|
"loss": 0.2431, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.8507462686567164, |
|
"grad_norm": 0.33375409453354005, |
|
"learning_rate": 6.439998171155326e-07, |
|
"loss": 0.2272, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.855721393034826, |
|
"grad_norm": 0.3661672121057468, |
|
"learning_rate": 6.032246435446754e-07, |
|
"loss": 0.2661, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.8606965174129355, |
|
"grad_norm": 0.32274521306276094, |
|
"learning_rate": 5.637730817806341e-07, |
|
"loss": 0.2219, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.8656716417910446, |
|
"grad_norm": 0.31596637325997035, |
|
"learning_rate": 5.256464572758723e-07, |
|
"loss": 0.2135, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.870646766169154, |
|
"grad_norm": 0.3083576046383486, |
|
"learning_rate": 4.888460509689941e-07, |
|
"loss": 0.2188, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.8756218905472637, |
|
"grad_norm": 0.33292190176115743, |
|
"learning_rate": 4.533730992417029e-07, |
|
"loss": 0.2557, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.8805970149253732, |
|
"grad_norm": 0.46548707820421953, |
|
"learning_rate": 4.19228793877271e-07, |
|
"loss": 0.2561, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.8855721393034823, |
|
"grad_norm": 0.3202236939710589, |
|
"learning_rate": 3.8641428202048634e-07, |
|
"loss": 0.2295, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.890547263681592, |
|
"grad_norm": 0.3250224661241107, |
|
"learning_rate": 3.549306661391283e-07, |
|
"loss": 0.2606, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.8955223880597014, |
|
"grad_norm": 0.31518434096820275, |
|
"learning_rate": 3.247790039869214e-07, |
|
"loss": 0.2512, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.900497512437811, |
|
"grad_norm": 0.3819340666754849, |
|
"learning_rate": 2.959603085679863e-07, |
|
"loss": 0.2636, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.9054726368159205, |
|
"grad_norm": 0.3185432391852622, |
|
"learning_rate": 2.6847554810282226e-07, |
|
"loss": 0.2396, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.91044776119403, |
|
"grad_norm": 0.31119028135213145, |
|
"learning_rate": 2.4232564599577347e-07, |
|
"loss": 0.222, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.9154228855721396, |
|
"grad_norm": 0.3897279825932753, |
|
"learning_rate": 2.1751148080400464e-07, |
|
"loss": 0.2547, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.9203980099502487, |
|
"grad_norm": 0.3318250867618136, |
|
"learning_rate": 1.9403388620798268e-07, |
|
"loss": 0.2316, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.925373134328358, |
|
"grad_norm": 0.33435033379588464, |
|
"learning_rate": 1.71893650983459e-07, |
|
"loss": 0.264, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.9303482587064678, |
|
"grad_norm": 0.3169245972712117, |
|
"learning_rate": 1.510915189749973e-07, |
|
"loss": 0.2258, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.935323383084577, |
|
"grad_norm": 0.31046188325115304, |
|
"learning_rate": 1.3162818907094477e-07, |
|
"loss": 0.2436, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.9402985074626864, |
|
"grad_norm": 0.33083406600042586, |
|
"learning_rate": 1.1350431517998416e-07, |
|
"loss": 0.2352, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.945273631840796, |
|
"grad_norm": 0.5036448220257013, |
|
"learning_rate": 9.672050620913809e-08, |
|
"loss": 0.2463, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.9502487562189055, |
|
"grad_norm": 0.34997368115151717, |
|
"learning_rate": 8.127732604334082e-08, |
|
"loss": 0.2652, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.955223880597015, |
|
"grad_norm": 0.3245631331451197, |
|
"learning_rate": 6.717529352645802e-08, |
|
"loss": 0.2604, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.9601990049751246, |
|
"grad_norm": 0.35279551675091364, |
|
"learning_rate": 5.44148824438917e-08, |
|
"loss": 0.2469, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.965174129353234, |
|
"grad_norm": 0.3236463161348794, |
|
"learning_rate": 4.2996521506637465e-08, |
|
"loss": 0.2162, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 2.970149253731343, |
|
"grad_norm": 0.31533592113778164, |
|
"learning_rate": 3.292059433687822e-08, |
|
"loss": 0.2387, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 2.9751243781094527, |
|
"grad_norm": 0.3171216482921585, |
|
"learning_rate": 2.4187439455127804e-08, |
|
"loss": 0.2287, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.9800995024875623, |
|
"grad_norm": 0.3268105010367728, |
|
"learning_rate": 1.679735026881346e-08, |
|
"loss": 0.2278, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 2.9850746268656714, |
|
"grad_norm": 0.3248084583080667, |
|
"learning_rate": 1.0750575062461465e-08, |
|
"loss": 0.229, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.990049751243781, |
|
"grad_norm": 0.345052065217444, |
|
"learning_rate": 6.047316989317153e-09, |
|
"loss": 0.1951, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 2.9950248756218905, |
|
"grad_norm": 0.337057099299072, |
|
"learning_rate": 2.6877340645459217e-09, |
|
"loss": 0.2749, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.3925108426294995, |
|
"learning_rate": 6.719391599130376e-10, |
|
"loss": 0.1737, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 603, |
|
"total_flos": 146165521711104.0, |
|
"train_loss": 0.3971822352996513, |
|
"train_runtime": 6153.7064, |
|
"train_samples_per_second": 0.781, |
|
"train_steps_per_second": 0.098 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 603, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 146165521711104.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|