{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8721, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011466574934067194, "grad_norm": 32.98546172029393, "learning_rate": 3.816793893129771e-06, "loss": 7.9599, "step": 1 }, { "epoch": 0.00022933149868134388, "grad_norm": 47.732657241119774, "learning_rate": 7.633587786259541e-06, "loss": 7.8554, "step": 2 }, { "epoch": 0.0003439972480220158, "grad_norm": 48.40557058105882, "learning_rate": 1.1450381679389314e-05, "loss": 7.7295, "step": 3 }, { "epoch": 0.00045866299736268775, "grad_norm": 43.083244980355815, "learning_rate": 1.5267175572519083e-05, "loss": 7.5884, "step": 4 }, { "epoch": 0.0005733287467033597, "grad_norm": 29.80504046923896, "learning_rate": 1.9083969465648855e-05, "loss": 7.1929, "step": 5 }, { "epoch": 0.0006879944960440316, "grad_norm": 24.237601646564773, "learning_rate": 2.2900763358778628e-05, "loss": 6.4955, "step": 6 }, { "epoch": 0.0008026602453847036, "grad_norm": 16.16051331323853, "learning_rate": 2.6717557251908397e-05, "loss": 6.1098, "step": 7 }, { "epoch": 0.0009173259947253755, "grad_norm": 11.704852681919737, "learning_rate": 3.0534351145038166e-05, "loss": 5.8676, "step": 8 }, { "epoch": 0.0010319917440660474, "grad_norm": 11.478570942670492, "learning_rate": 3.435114503816794e-05, "loss": 5.781, "step": 9 }, { "epoch": 0.0011466574934067195, "grad_norm": 8.826203088380227, "learning_rate": 3.816793893129771e-05, "loss": 5.5798, "step": 10 }, { "epoch": 0.0012613232427473914, "grad_norm": 8.01492782521436, "learning_rate": 4.198473282442748e-05, "loss": 5.156, "step": 11 }, { "epoch": 0.0013759889920880633, "grad_norm": 8.856027556816803, "learning_rate": 4.5801526717557256e-05, "loss": 5.1825, "step": 12 }, { "epoch": 0.0014906547414287351, "grad_norm": 8.764564552477168, "learning_rate": 4.9618320610687025e-05, "loss": 5.1275, "step": 13 }, { "epoch": 0.0016053204907694072, "grad_norm": 8.050779163407102, "learning_rate": 5.3435114503816794e-05, "loss": 4.7772, "step": 14 }, { "epoch": 0.0017199862401100791, "grad_norm": 7.677818541977624, "learning_rate": 5.725190839694656e-05, "loss": 4.7878, "step": 15 }, { "epoch": 0.001834651989450751, "grad_norm": 6.2923619762944245, "learning_rate": 6.106870229007633e-05, "loss": 4.4865, "step": 16 }, { "epoch": 0.001949317738791423, "grad_norm": 6.603642188881294, "learning_rate": 6.488549618320611e-05, "loss": 4.4195, "step": 17 }, { "epoch": 0.0020639834881320948, "grad_norm": 5.276561341493895, "learning_rate": 6.870229007633588e-05, "loss": 4.3025, "step": 18 }, { "epoch": 0.002178649237472767, "grad_norm": 6.574422805087178, "learning_rate": 7.251908396946565e-05, "loss": 4.3309, "step": 19 }, { "epoch": 0.002293314986813439, "grad_norm": 6.716500046893577, "learning_rate": 7.633587786259542e-05, "loss": 4.1763, "step": 20 }, { "epoch": 0.002407980736154111, "grad_norm": 7.767168694443845, "learning_rate": 8.015267175572518e-05, "loss": 4.1277, "step": 21 }, { "epoch": 0.0025226464854947827, "grad_norm": 5.43495664457045, "learning_rate": 8.396946564885496e-05, "loss": 4.1637, "step": 22 }, { "epoch": 0.0026373122348354546, "grad_norm": 8.19870905005088, "learning_rate": 8.778625954198472e-05, "loss": 4.1303, "step": 23 }, { "epoch": 0.0027519779841761265, "grad_norm": 4.390958512330856, "learning_rate": 9.160305343511451e-05, "loss": 4.0368, "step": 24 }, { "epoch": 0.0028666437335167984, "grad_norm": 8.181788688034274, "learning_rate": 9.541984732824429e-05, "loss": 4.0893, "step": 25 }, { "epoch": 0.0029813094828574703, "grad_norm": 5.161047114532387, "learning_rate": 9.923664122137405e-05, "loss": 4.01, "step": 26 }, { "epoch": 0.0030959752321981426, "grad_norm": 3.988291754034103, "learning_rate": 0.00010305343511450383, "loss": 3.9656, "step": 27 }, { "epoch": 0.0032106409815388145, "grad_norm": 4.3518375185231, "learning_rate": 0.00010687022900763359, "loss": 3.8979, "step": 28 }, { "epoch": 0.0033253067308794864, "grad_norm": 3.2474450311419583, "learning_rate": 0.00011068702290076336, "loss": 3.8065, "step": 29 }, { "epoch": 0.0034399724802201583, "grad_norm": 3.6981046262219808, "learning_rate": 0.00011450381679389313, "loss": 3.8242, "step": 30 }, { "epoch": 0.00355463822956083, "grad_norm": 2.3400054887498256, "learning_rate": 0.0001183206106870229, "loss": 3.7846, "step": 31 }, { "epoch": 0.003669303978901502, "grad_norm": 2.5387815860304186, "learning_rate": 0.00012213740458015266, "loss": 3.7654, "step": 32 }, { "epoch": 0.003783969728242174, "grad_norm": 2.024649068663064, "learning_rate": 0.00012595419847328244, "loss": 3.7131, "step": 33 }, { "epoch": 0.003898635477582846, "grad_norm": 2.6049932950681742, "learning_rate": 0.00012977099236641222, "loss": 3.6599, "step": 34 }, { "epoch": 0.004013301226923518, "grad_norm": 2.203460859949766, "learning_rate": 0.000133587786259542, "loss": 3.5835, "step": 35 }, { "epoch": 0.0041279669762641896, "grad_norm": 1.663371476726465, "learning_rate": 0.00013740458015267177, "loss": 3.6209, "step": 36 }, { "epoch": 0.0042426327256048614, "grad_norm": 2.0835656215070917, "learning_rate": 0.00014122137404580154, "loss": 3.3668, "step": 37 }, { "epoch": 0.004357298474945534, "grad_norm": 1.0473334100427998, "learning_rate": 0.0001450381679389313, "loss": 3.5235, "step": 38 }, { "epoch": 0.004471964224286206, "grad_norm": 1.2127375462111283, "learning_rate": 0.00014885496183206107, "loss": 3.4061, "step": 39 }, { "epoch": 0.004586629973626878, "grad_norm": 1.3399602732950553, "learning_rate": 0.00015267175572519084, "loss": 3.556, "step": 40 }, { "epoch": 0.00470129572296755, "grad_norm": 1.4440908550924991, "learning_rate": 0.00015648854961832062, "loss": 3.3867, "step": 41 }, { "epoch": 0.004815961472308222, "grad_norm": 0.9843049187136491, "learning_rate": 0.00016030534351145037, "loss": 3.5907, "step": 42 }, { "epoch": 0.004930627221648894, "grad_norm": 1.0985154001302673, "learning_rate": 0.00016412213740458014, "loss": 3.5133, "step": 43 }, { "epoch": 0.0050452929709895655, "grad_norm": 0.7753963449528569, "learning_rate": 0.00016793893129770992, "loss": 3.473, "step": 44 }, { "epoch": 0.005159958720330237, "grad_norm": 0.9349357778900093, "learning_rate": 0.0001717557251908397, "loss": 3.5091, "step": 45 }, { "epoch": 0.005274624469670909, "grad_norm": 1.0655705240360924, "learning_rate": 0.00017557251908396944, "loss": 3.4788, "step": 46 }, { "epoch": 0.005389290219011581, "grad_norm": 1.0830224408202829, "learning_rate": 0.00017938931297709925, "loss": 3.3443, "step": 47 }, { "epoch": 0.005503955968352253, "grad_norm": 0.9610715057250465, "learning_rate": 0.00018320610687022902, "loss": 3.4869, "step": 48 }, { "epoch": 0.005618621717692925, "grad_norm": 1.6460839008029915, "learning_rate": 0.0001870229007633588, "loss": 3.5075, "step": 49 }, { "epoch": 0.005733287467033597, "grad_norm": 0.9502092366035757, "learning_rate": 0.00019083969465648857, "loss": 3.5921, "step": 50 }, { "epoch": 0.005847953216374269, "grad_norm": 1.3135765411097595, "learning_rate": 0.00019465648854961832, "loss": 3.5413, "step": 51 }, { "epoch": 0.0059626189657149406, "grad_norm": 0.8678551303184648, "learning_rate": 0.0001984732824427481, "loss": 3.4618, "step": 52 }, { "epoch": 0.006077284715055613, "grad_norm": 0.7809299901239882, "learning_rate": 0.00020229007633587788, "loss": 3.5253, "step": 53 }, { "epoch": 0.006191950464396285, "grad_norm": 1.0012899217352809, "learning_rate": 0.00020610687022900765, "loss": 3.4477, "step": 54 }, { "epoch": 0.006306616213736957, "grad_norm": 0.8581363732007035, "learning_rate": 0.0002099236641221374, "loss": 3.4745, "step": 55 }, { "epoch": 0.006421281963077629, "grad_norm": 0.8298370122931034, "learning_rate": 0.00021374045801526718, "loss": 3.42, "step": 56 }, { "epoch": 0.006535947712418301, "grad_norm": 1.0185658213061561, "learning_rate": 0.00021755725190839695, "loss": 3.4566, "step": 57 }, { "epoch": 0.006650613461758973, "grad_norm": 0.865321806958494, "learning_rate": 0.00022137404580152673, "loss": 3.4102, "step": 58 }, { "epoch": 0.006765279211099645, "grad_norm": 0.941724920284073, "learning_rate": 0.00022519083969465648, "loss": 3.4892, "step": 59 }, { "epoch": 0.0068799449604403165, "grad_norm": 1.0459210963236445, "learning_rate": 0.00022900763358778625, "loss": 3.4095, "step": 60 }, { "epoch": 0.006994610709780988, "grad_norm": 1.1554245090240505, "learning_rate": 0.00023282442748091603, "loss": 3.3631, "step": 61 }, { "epoch": 0.00710927645912166, "grad_norm": 0.895034685115234, "learning_rate": 0.0002366412213740458, "loss": 3.4473, "step": 62 }, { "epoch": 0.007223942208462332, "grad_norm": 1.3066154220775512, "learning_rate": 0.00024045801526717558, "loss": 3.3322, "step": 63 }, { "epoch": 0.007338607957803004, "grad_norm": 0.9813587018615572, "learning_rate": 0.00024427480916030533, "loss": 3.2766, "step": 64 }, { "epoch": 0.007453273707143676, "grad_norm": 0.9704606262126225, "learning_rate": 0.00024809160305343513, "loss": 3.484, "step": 65 }, { "epoch": 0.007567939456484348, "grad_norm": 0.9712869567388323, "learning_rate": 0.0002519083969465649, "loss": 3.4628, "step": 66 }, { "epoch": 0.00768260520582502, "grad_norm": 0.8400786558141455, "learning_rate": 0.00025572519083969463, "loss": 3.4789, "step": 67 }, { "epoch": 0.007797270955165692, "grad_norm": 1.0253271071209522, "learning_rate": 0.00025954198473282443, "loss": 3.4881, "step": 68 }, { "epoch": 0.007911936704506364, "grad_norm": 1.107476856533126, "learning_rate": 0.0002633587786259542, "loss": 3.4161, "step": 69 }, { "epoch": 0.008026602453847035, "grad_norm": 0.9325120243579201, "learning_rate": 0.000267175572519084, "loss": 3.2294, "step": 70 }, { "epoch": 0.008141268203187708, "grad_norm": 1.3497439034406296, "learning_rate": 0.00027099236641221373, "loss": 3.4448, "step": 71 }, { "epoch": 0.008255933952528379, "grad_norm": 0.8744296526952612, "learning_rate": 0.00027480916030534353, "loss": 3.4431, "step": 72 }, { "epoch": 0.008370599701869052, "grad_norm": 0.9427355023191845, "learning_rate": 0.0002786259541984733, "loss": 3.3507, "step": 73 }, { "epoch": 0.008485265451209723, "grad_norm": 1.0251166266851959, "learning_rate": 0.0002824427480916031, "loss": 3.4058, "step": 74 }, { "epoch": 0.008599931200550396, "grad_norm": 0.7674006906281569, "learning_rate": 0.0002862595419847328, "loss": 3.3602, "step": 75 }, { "epoch": 0.008714596949891068, "grad_norm": 0.9940608958833785, "learning_rate": 0.0002900763358778626, "loss": 3.255, "step": 76 }, { "epoch": 0.00882926269923174, "grad_norm": 1.0217336276408653, "learning_rate": 0.0002938931297709924, "loss": 3.4686, "step": 77 }, { "epoch": 0.008943928448572412, "grad_norm": 1.0210453341651522, "learning_rate": 0.00029770992366412214, "loss": 3.1809, "step": 78 }, { "epoch": 0.009058594197913083, "grad_norm": 0.8433005544816973, "learning_rate": 0.00030152671755725194, "loss": 3.1226, "step": 79 }, { "epoch": 0.009173259947253756, "grad_norm": 0.8872726347710475, "learning_rate": 0.0003053435114503817, "loss": 3.2751, "step": 80 }, { "epoch": 0.009287925696594427, "grad_norm": 0.8457382669147994, "learning_rate": 0.0003091603053435115, "loss": 3.2558, "step": 81 }, { "epoch": 0.0094025914459351, "grad_norm": 1.1123453200303604, "learning_rate": 0.00031297709923664124, "loss": 3.3066, "step": 82 }, { "epoch": 0.00951725719527577, "grad_norm": 1.009852884299192, "learning_rate": 0.000316793893129771, "loss": 3.3075, "step": 83 }, { "epoch": 0.009631922944616443, "grad_norm": 1.2141362918922074, "learning_rate": 0.00032061068702290074, "loss": 3.3221, "step": 84 }, { "epoch": 0.009746588693957114, "grad_norm": 0.7220903566591271, "learning_rate": 0.00032442748091603054, "loss": 3.194, "step": 85 }, { "epoch": 0.009861254443297787, "grad_norm": 0.9271719750341705, "learning_rate": 0.0003282442748091603, "loss": 3.3259, "step": 86 }, { "epoch": 0.009975920192638458, "grad_norm": 0.7515205226796537, "learning_rate": 0.0003320610687022901, "loss": 3.2414, "step": 87 }, { "epoch": 0.010090585941979131, "grad_norm": 0.8176808987862619, "learning_rate": 0.00033587786259541984, "loss": 3.1264, "step": 88 }, { "epoch": 0.010205251691319802, "grad_norm": 1.1649501394725053, "learning_rate": 0.00033969465648854964, "loss": 3.1802, "step": 89 }, { "epoch": 0.010319917440660475, "grad_norm": 1.3541676871463835, "learning_rate": 0.0003435114503816794, "loss": 3.2157, "step": 90 }, { "epoch": 0.010434583190001148, "grad_norm": 0.8926327463688286, "learning_rate": 0.0003473282442748092, "loss": 3.2253, "step": 91 }, { "epoch": 0.010549248939341819, "grad_norm": 0.9650209887320587, "learning_rate": 0.0003511450381679389, "loss": 3.0817, "step": 92 }, { "epoch": 0.010663914688682491, "grad_norm": 0.9103638643319637, "learning_rate": 0.0003549618320610687, "loss": 3.2968, "step": 93 }, { "epoch": 0.010778580438023162, "grad_norm": 1.193354297338719, "learning_rate": 0.0003587786259541985, "loss": 3.2887, "step": 94 }, { "epoch": 0.010893246187363835, "grad_norm": 0.9156426112986605, "learning_rate": 0.00036259541984732824, "loss": 3.2744, "step": 95 }, { "epoch": 0.011007911936704506, "grad_norm": 1.457209283920772, "learning_rate": 0.00036641221374045805, "loss": 3.3551, "step": 96 }, { "epoch": 0.011122577686045179, "grad_norm": 0.9923668985964779, "learning_rate": 0.0003702290076335878, "loss": 3.2413, "step": 97 }, { "epoch": 0.01123724343538585, "grad_norm": 0.8705641434360311, "learning_rate": 0.0003740458015267176, "loss": 3.227, "step": 98 }, { "epoch": 0.011351909184726523, "grad_norm": 0.7668465888834765, "learning_rate": 0.00037786259541984735, "loss": 3.1211, "step": 99 }, { "epoch": 0.011466574934067194, "grad_norm": 0.8800734116750144, "learning_rate": 0.00038167938931297715, "loss": 3.1365, "step": 100 }, { "epoch": 0.011581240683407866, "grad_norm": 0.8257318115489123, "learning_rate": 0.00038549618320610684, "loss": 3.1621, "step": 101 }, { "epoch": 0.011695906432748537, "grad_norm": 0.8350675911404676, "learning_rate": 0.00038931297709923665, "loss": 3.096, "step": 102 }, { "epoch": 0.01181057218208921, "grad_norm": 0.9917559889337618, "learning_rate": 0.0003931297709923664, "loss": 3.1627, "step": 103 }, { "epoch": 0.011925237931429881, "grad_norm": 0.8728526561487229, "learning_rate": 0.0003969465648854962, "loss": 3.1434, "step": 104 }, { "epoch": 0.012039903680770554, "grad_norm": 0.8044355327218607, "learning_rate": 0.00040076335877862595, "loss": 3.3545, "step": 105 }, { "epoch": 0.012154569430111227, "grad_norm": 0.7624150853537827, "learning_rate": 0.00040458015267175575, "loss": 3.2477, "step": 106 }, { "epoch": 0.012269235179451898, "grad_norm": 0.7685371880323714, "learning_rate": 0.0004083969465648855, "loss": 3.302, "step": 107 }, { "epoch": 0.01238390092879257, "grad_norm": 0.7944341844978677, "learning_rate": 0.0004122137404580153, "loss": 3.1708, "step": 108 }, { "epoch": 0.012498566678133241, "grad_norm": 0.9876391458592928, "learning_rate": 0.00041603053435114505, "loss": 3.2425, "step": 109 }, { "epoch": 0.012613232427473914, "grad_norm": 0.8847659287810202, "learning_rate": 0.0004198473282442748, "loss": 3.2351, "step": 110 }, { "epoch": 0.012727898176814585, "grad_norm": 0.8426928506995099, "learning_rate": 0.00042366412213740455, "loss": 3.1772, "step": 111 }, { "epoch": 0.012842563926155258, "grad_norm": 0.922126384685991, "learning_rate": 0.00042748091603053435, "loss": 3.1375, "step": 112 }, { "epoch": 0.012957229675495929, "grad_norm": 0.8080171185279135, "learning_rate": 0.00043129770992366415, "loss": 3.1623, "step": 113 }, { "epoch": 0.013071895424836602, "grad_norm": 0.7030443996976394, "learning_rate": 0.0004351145038167939, "loss": 3.0977, "step": 114 }, { "epoch": 0.013186561174177273, "grad_norm": 0.6535113180036304, "learning_rate": 0.0004389312977099237, "loss": 3.1151, "step": 115 }, { "epoch": 0.013301226923517945, "grad_norm": 0.7180706854244188, "learning_rate": 0.00044274809160305345, "loss": 2.9521, "step": 116 }, { "epoch": 0.013415892672858616, "grad_norm": 1.0076357703757337, "learning_rate": 0.00044656488549618326, "loss": 3.286, "step": 117 }, { "epoch": 0.01353055842219929, "grad_norm": 0.7472021229758496, "learning_rate": 0.00045038167938931295, "loss": 3.1203, "step": 118 }, { "epoch": 0.01364522417153996, "grad_norm": 0.8403325212158305, "learning_rate": 0.00045419847328244275, "loss": 3.2043, "step": 119 }, { "epoch": 0.013759889920880633, "grad_norm": 0.829704170483764, "learning_rate": 0.0004580152671755725, "loss": 3.1752, "step": 120 }, { "epoch": 0.013874555670221306, "grad_norm": 0.8199327720574795, "learning_rate": 0.0004618320610687023, "loss": 3.2066, "step": 121 }, { "epoch": 0.013989221419561977, "grad_norm": 0.8987043231843231, "learning_rate": 0.00046564885496183206, "loss": 3.2588, "step": 122 }, { "epoch": 0.01410388716890265, "grad_norm": 0.7333642395061045, "learning_rate": 0.00046946564885496186, "loss": 3.1435, "step": 123 }, { "epoch": 0.01421855291824332, "grad_norm": 0.6709088599417844, "learning_rate": 0.0004732824427480916, "loss": 3.022, "step": 124 }, { "epoch": 0.014333218667583993, "grad_norm": 0.7545691274043932, "learning_rate": 0.0004770992366412214, "loss": 3.0124, "step": 125 }, { "epoch": 0.014447884416924664, "grad_norm": 0.7434188600478571, "learning_rate": 0.00048091603053435116, "loss": 3.1598, "step": 126 }, { "epoch": 0.014562550166265337, "grad_norm": 0.781938822649928, "learning_rate": 0.0004847328244274809, "loss": 3.2127, "step": 127 }, { "epoch": 0.014677215915606008, "grad_norm": 0.7091659969095595, "learning_rate": 0.0004885496183206107, "loss": 3.101, "step": 128 }, { "epoch": 0.01479188166494668, "grad_norm": 0.7480721553489864, "learning_rate": 0.0004923664122137404, "loss": 3.1012, "step": 129 }, { "epoch": 0.014906547414287352, "grad_norm": 0.8175518936335485, "learning_rate": 0.0004961832061068703, "loss": 3.1727, "step": 130 }, { "epoch": 0.015021213163628025, "grad_norm": 0.8558047877552776, "learning_rate": 0.0005, "loss": 3.092, "step": 131 }, { "epoch": 0.015135878912968696, "grad_norm": 0.715675926855538, "learning_rate": 0.0005038167938931298, "loss": 3.0416, "step": 132 }, { "epoch": 0.015250544662309368, "grad_norm": 0.7894171983316872, "learning_rate": 0.0005076335877862596, "loss": 3.1021, "step": 133 }, { "epoch": 0.01536521041165004, "grad_norm": 0.6352980507571451, "learning_rate": 0.0005114503816793893, "loss": 3.1602, "step": 134 }, { "epoch": 0.015479876160990712, "grad_norm": 0.7282730979647982, "learning_rate": 0.0005152671755725191, "loss": 3.1725, "step": 135 }, { "epoch": 0.015594541910331383, "grad_norm": 0.6393104701525223, "learning_rate": 0.0005190839694656489, "loss": 3.1706, "step": 136 }, { "epoch": 0.015709207659672056, "grad_norm": 0.6448333705467408, "learning_rate": 0.0005229007633587787, "loss": 3.1772, "step": 137 }, { "epoch": 0.01582387340901273, "grad_norm": 0.7367909432134404, "learning_rate": 0.0005267175572519084, "loss": 2.9897, "step": 138 }, { "epoch": 0.0159385391583534, "grad_norm": 0.8198870555104838, "learning_rate": 0.0005305343511450382, "loss": 3.1021, "step": 139 }, { "epoch": 0.01605320490769407, "grad_norm": 0.7357635840223945, "learning_rate": 0.000534351145038168, "loss": 3.0588, "step": 140 }, { "epoch": 0.016167870657034743, "grad_norm": 0.705795404439166, "learning_rate": 0.0005381679389312977, "loss": 2.9621, "step": 141 }, { "epoch": 0.016282536406375416, "grad_norm": 0.7176753874516963, "learning_rate": 0.0005419847328244275, "loss": 3.0721, "step": 142 }, { "epoch": 0.01639720215571609, "grad_norm": 0.700448131519179, "learning_rate": 0.0005458015267175572, "loss": 2.9941, "step": 143 }, { "epoch": 0.016511867905056758, "grad_norm": 0.6990399671593439, "learning_rate": 0.0005496183206106871, "loss": 3.1076, "step": 144 }, { "epoch": 0.01662653365439743, "grad_norm": 0.7412053984828968, "learning_rate": 0.0005534351145038168, "loss": 3.141, "step": 145 }, { "epoch": 0.016741199403738104, "grad_norm": 0.7233185941272354, "learning_rate": 0.0005572519083969466, "loss": 3.1995, "step": 146 }, { "epoch": 0.016855865153078776, "grad_norm": 0.7077580442784301, "learning_rate": 0.0005610687022900763, "loss": 3.0882, "step": 147 }, { "epoch": 0.016970530902419446, "grad_norm": 0.6612724036525008, "learning_rate": 0.0005648854961832062, "loss": 3.1712, "step": 148 }, { "epoch": 0.01708519665176012, "grad_norm": 0.6123332517534898, "learning_rate": 0.0005687022900763359, "loss": 3.1563, "step": 149 }, { "epoch": 0.01719986240110079, "grad_norm": 0.6606169687648724, "learning_rate": 0.0005725190839694656, "loss": 3.0875, "step": 150 }, { "epoch": 0.017314528150441464, "grad_norm": 0.5871384537513337, "learning_rate": 0.0005763358778625954, "loss": 3.056, "step": 151 }, { "epoch": 0.017429193899782137, "grad_norm": 0.6326403891535017, "learning_rate": 0.0005801526717557252, "loss": 3.0828, "step": 152 }, { "epoch": 0.017543859649122806, "grad_norm": 0.7516813020287689, "learning_rate": 0.000583969465648855, "loss": 3.0481, "step": 153 }, { "epoch": 0.01765852539846348, "grad_norm": 0.7409450053300217, "learning_rate": 0.0005877862595419848, "loss": 3.1067, "step": 154 }, { "epoch": 0.01777319114780415, "grad_norm": 0.7600143894697873, "learning_rate": 0.0005916030534351145, "loss": 3.1402, "step": 155 }, { "epoch": 0.017887856897144824, "grad_norm": 0.7230034388363792, "learning_rate": 0.0005954198473282443, "loss": 3.1705, "step": 156 }, { "epoch": 0.018002522646485494, "grad_norm": 0.8581851881653522, "learning_rate": 0.0005992366412213741, "loss": 3.0555, "step": 157 }, { "epoch": 0.018117188395826166, "grad_norm": 0.7809357567202576, "learning_rate": 0.0006030534351145039, "loss": 3.0556, "step": 158 }, { "epoch": 0.01823185414516684, "grad_norm": 0.839444580176229, "learning_rate": 0.0006068702290076335, "loss": 3.1123, "step": 159 }, { "epoch": 0.018346519894507512, "grad_norm": 0.7325816670271689, "learning_rate": 0.0006106870229007634, "loss": 3.0999, "step": 160 }, { "epoch": 0.01846118564384818, "grad_norm": 0.6010238880161904, "learning_rate": 0.0006145038167938931, "loss": 3.0407, "step": 161 }, { "epoch": 0.018575851393188854, "grad_norm": 0.7092973080447477, "learning_rate": 0.000618320610687023, "loss": 3.0507, "step": 162 }, { "epoch": 0.018690517142529527, "grad_norm": 0.7168594676154711, "learning_rate": 0.0006221374045801526, "loss": 2.9922, "step": 163 }, { "epoch": 0.0188051828918702, "grad_norm": 0.6018886517920051, "learning_rate": 0.0006259541984732825, "loss": 2.9425, "step": 164 }, { "epoch": 0.01891984864121087, "grad_norm": 0.7357060271720588, "learning_rate": 0.0006297709923664122, "loss": 3.0344, "step": 165 }, { "epoch": 0.01903451439055154, "grad_norm": 0.7157620230021696, "learning_rate": 0.000633587786259542, "loss": 3.1855, "step": 166 }, { "epoch": 0.019149180139892214, "grad_norm": 0.6232405739903173, "learning_rate": 0.0006374045801526717, "loss": 3.0973, "step": 167 }, { "epoch": 0.019263845889232887, "grad_norm": 0.7887082352376923, "learning_rate": 0.0006412213740458015, "loss": 3.1427, "step": 168 }, { "epoch": 0.01937851163857356, "grad_norm": 0.6352665877608317, "learning_rate": 0.0006450381679389313, "loss": 2.9604, "step": 169 }, { "epoch": 0.01949317738791423, "grad_norm": 0.6228776886405907, "learning_rate": 0.0006488549618320611, "loss": 3.0234, "step": 170 }, { "epoch": 0.0196078431372549, "grad_norm": 0.7341180388377916, "learning_rate": 0.0006526717557251909, "loss": 3.1532, "step": 171 }, { "epoch": 0.019722508886595574, "grad_norm": 0.5460205774511234, "learning_rate": 0.0006564885496183206, "loss": 2.9746, "step": 172 }, { "epoch": 0.019837174635936247, "grad_norm": 0.5653175240630341, "learning_rate": 0.0006603053435114504, "loss": 2.9971, "step": 173 }, { "epoch": 0.019951840385276916, "grad_norm": 0.7731864970577471, "learning_rate": 0.0006641221374045802, "loss": 2.9651, "step": 174 }, { "epoch": 0.02006650613461759, "grad_norm": 0.6771364419122414, "learning_rate": 0.0006679389312977099, "loss": 2.8333, "step": 175 }, { "epoch": 0.020181171883958262, "grad_norm": 0.6346129832522949, "learning_rate": 0.0006717557251908397, "loss": 3.054, "step": 176 }, { "epoch": 0.020295837633298935, "grad_norm": 0.7326932171856574, "learning_rate": 0.0006755725190839694, "loss": 3.0687, "step": 177 }, { "epoch": 0.020410503382639604, "grad_norm": 0.5823604135128475, "learning_rate": 0.0006793893129770993, "loss": 2.8745, "step": 178 }, { "epoch": 0.020525169131980277, "grad_norm": 0.6174195309858355, "learning_rate": 0.000683206106870229, "loss": 3.0312, "step": 179 }, { "epoch": 0.02063983488132095, "grad_norm": 0.5418158380203857, "learning_rate": 0.0006870229007633588, "loss": 2.8996, "step": 180 }, { "epoch": 0.020754500630661622, "grad_norm": 0.6181540511481147, "learning_rate": 0.0006908396946564885, "loss": 2.9636, "step": 181 }, { "epoch": 0.020869166380002295, "grad_norm": 0.6244568919555246, "learning_rate": 0.0006946564885496184, "loss": 3.084, "step": 182 }, { "epoch": 0.020983832129342964, "grad_norm": 0.7149755208774589, "learning_rate": 0.0006984732824427481, "loss": 3.0251, "step": 183 }, { "epoch": 0.021098497878683637, "grad_norm": 0.6056458595252673, "learning_rate": 0.0007022900763358778, "loss": 3.0513, "step": 184 }, { "epoch": 0.02121316362802431, "grad_norm": 0.6214029944478348, "learning_rate": 0.0007061068702290076, "loss": 2.9824, "step": 185 }, { "epoch": 0.021327829377364983, "grad_norm": 0.641553641710498, "learning_rate": 0.0007099236641221374, "loss": 3.056, "step": 186 }, { "epoch": 0.021442495126705652, "grad_norm": 0.6667627792634219, "learning_rate": 0.0007137404580152672, "loss": 3.0012, "step": 187 }, { "epoch": 0.021557160876046325, "grad_norm": 0.5945142736105938, "learning_rate": 0.000717557251908397, "loss": 3.1214, "step": 188 }, { "epoch": 0.021671826625386997, "grad_norm": 0.6783251089331055, "learning_rate": 0.0007213740458015267, "loss": 2.999, "step": 189 }, { "epoch": 0.02178649237472767, "grad_norm": 0.6688221997283305, "learning_rate": 0.0007251908396946565, "loss": 3.0808, "step": 190 }, { "epoch": 0.02190115812406834, "grad_norm": 0.6746245827654979, "learning_rate": 0.0007290076335877863, "loss": 3.0173, "step": 191 }, { "epoch": 0.022015823873409012, "grad_norm": 0.5795337065907388, "learning_rate": 0.0007328244274809161, "loss": 2.9558, "step": 192 }, { "epoch": 0.022130489622749685, "grad_norm": 0.6499209328375691, "learning_rate": 0.0007366412213740457, "loss": 3.0574, "step": 193 }, { "epoch": 0.022245155372090358, "grad_norm": 0.6980438906576684, "learning_rate": 0.0007404580152671756, "loss": 2.8779, "step": 194 }, { "epoch": 0.022359821121431027, "grad_norm": 0.5720807575262353, "learning_rate": 0.0007442748091603053, "loss": 3.0448, "step": 195 }, { "epoch": 0.0224744868707717, "grad_norm": 0.5301946862336206, "learning_rate": 0.0007480916030534352, "loss": 3.1149, "step": 196 }, { "epoch": 0.022589152620112372, "grad_norm": 0.8105858231356275, "learning_rate": 0.0007519083969465648, "loss": 3.0755, "step": 197 }, { "epoch": 0.022703818369453045, "grad_norm": 0.5462211661048078, "learning_rate": 0.0007557251908396947, "loss": 3.0021, "step": 198 }, { "epoch": 0.022818484118793718, "grad_norm": 4.3269402910987225, "learning_rate": 0.0007595419847328244, "loss": 3.1649, "step": 199 }, { "epoch": 0.022933149868134387, "grad_norm": 0.6327785858258099, "learning_rate": 0.0007633587786259543, "loss": 3.0311, "step": 200 }, { "epoch": 0.02304781561747506, "grad_norm": 3.228731007266588, "learning_rate": 0.0007671755725190839, "loss": 3.329, "step": 201 }, { "epoch": 0.023162481366815733, "grad_norm": 0.64153983302329, "learning_rate": 0.0007709923664122137, "loss": 2.9986, "step": 202 }, { "epoch": 0.023277147116156405, "grad_norm": 0.5881412203184709, "learning_rate": 0.0007748091603053435, "loss": 3.0452, "step": 203 }, { "epoch": 0.023391812865497075, "grad_norm": 0.5672212013258866, "learning_rate": 0.0007786259541984733, "loss": 2.9128, "step": 204 }, { "epoch": 0.023506478614837747, "grad_norm": 0.5536613897718605, "learning_rate": 0.000782442748091603, "loss": 3.044, "step": 205 }, { "epoch": 0.02362114436417842, "grad_norm": 0.5241079883708706, "learning_rate": 0.0007862595419847328, "loss": 3.0764, "step": 206 }, { "epoch": 0.023735810113519093, "grad_norm": 0.5220428254717413, "learning_rate": 0.0007900763358778626, "loss": 3.0119, "step": 207 }, { "epoch": 0.023850475862859762, "grad_norm": 0.5467201856986593, "learning_rate": 0.0007938931297709924, "loss": 2.9784, "step": 208 }, { "epoch": 0.023965141612200435, "grad_norm": 0.5479192531206861, "learning_rate": 0.0007977099236641223, "loss": 2.9427, "step": 209 }, { "epoch": 0.024079807361541108, "grad_norm": 0.5615227198006923, "learning_rate": 0.0008015267175572519, "loss": 3.1378, "step": 210 }, { "epoch": 0.02419447311088178, "grad_norm": 0.5147014084174933, "learning_rate": 0.0008053435114503816, "loss": 3.0301, "step": 211 }, { "epoch": 0.024309138860222453, "grad_norm": 0.5625278337037649, "learning_rate": 0.0008091603053435115, "loss": 2.8769, "step": 212 }, { "epoch": 0.024423804609563123, "grad_norm": 0.7014380475755434, "learning_rate": 0.0008129770992366412, "loss": 2.9623, "step": 213 }, { "epoch": 0.024538470358903795, "grad_norm": 0.6133818118706758, "learning_rate": 0.000816793893129771, "loss": 3.0224, "step": 214 }, { "epoch": 0.024653136108244468, "grad_norm": 0.625292717113106, "learning_rate": 0.0008206106870229007, "loss": 3.0719, "step": 215 }, { "epoch": 0.02476780185758514, "grad_norm": 0.6113203903609521, "learning_rate": 0.0008244274809160306, "loss": 3.1219, "step": 216 }, { "epoch": 0.02488246760692581, "grad_norm": 0.5736723464363234, "learning_rate": 0.0008282442748091604, "loss": 2.9809, "step": 217 }, { "epoch": 0.024997133356266483, "grad_norm": 0.6405130215173672, "learning_rate": 0.0008320610687022901, "loss": 2.9864, "step": 218 }, { "epoch": 0.025111799105607156, "grad_norm": 0.6007317927514583, "learning_rate": 0.0008358778625954198, "loss": 3.0273, "step": 219 }, { "epoch": 0.02522646485494783, "grad_norm": 0.6619853975857226, "learning_rate": 0.0008396946564885496, "loss": 3.0412, "step": 220 }, { "epoch": 0.025341130604288498, "grad_norm": 0.5522567593234391, "learning_rate": 0.0008435114503816795, "loss": 2.8871, "step": 221 }, { "epoch": 0.02545579635362917, "grad_norm": 0.5627933279668168, "learning_rate": 0.0008473282442748091, "loss": 3.1126, "step": 222 }, { "epoch": 0.025570462102969843, "grad_norm": 0.6260317440159605, "learning_rate": 0.000851145038167939, "loss": 3.2618, "step": 223 }, { "epoch": 0.025685127852310516, "grad_norm": 0.5511742256131716, "learning_rate": 0.0008549618320610687, "loss": 3.0041, "step": 224 }, { "epoch": 0.025799793601651185, "grad_norm": 0.564344748628514, "learning_rate": 0.0008587786259541986, "loss": 2.8753, "step": 225 }, { "epoch": 0.025914459350991858, "grad_norm": 0.5194384479234073, "learning_rate": 0.0008625954198473283, "loss": 2.9954, "step": 226 }, { "epoch": 0.02602912510033253, "grad_norm": 0.5671036584361538, "learning_rate": 0.0008664122137404581, "loss": 3.0036, "step": 227 }, { "epoch": 0.026143790849673203, "grad_norm": 0.6425359144321615, "learning_rate": 0.0008702290076335878, "loss": 2.9818, "step": 228 }, { "epoch": 0.026258456599013876, "grad_norm": 0.6008191185003743, "learning_rate": 0.0008740458015267176, "loss": 2.9525, "step": 229 }, { "epoch": 0.026373122348354545, "grad_norm": 0.5562413068913958, "learning_rate": 0.0008778625954198474, "loss": 2.9698, "step": 230 }, { "epoch": 0.026487788097695218, "grad_norm": 0.5180517548960076, "learning_rate": 0.000881679389312977, "loss": 2.8583, "step": 231 }, { "epoch": 0.02660245384703589, "grad_norm": 0.5299750923547059, "learning_rate": 0.0008854961832061069, "loss": 2.9318, "step": 232 }, { "epoch": 0.026717119596376564, "grad_norm": 0.5847844322487309, "learning_rate": 0.0008893129770992367, "loss": 2.9227, "step": 233 }, { "epoch": 0.026831785345717233, "grad_norm": 0.5623854753534139, "learning_rate": 0.0008931297709923665, "loss": 2.9027, "step": 234 }, { "epoch": 0.026946451095057906, "grad_norm": 0.6055963553845306, "learning_rate": 0.0008969465648854962, "loss": 2.908, "step": 235 }, { "epoch": 0.02706111684439858, "grad_norm": 0.5304107973619167, "learning_rate": 0.0009007633587786259, "loss": 2.9666, "step": 236 }, { "epoch": 0.02717578259373925, "grad_norm": 0.47771171633395954, "learning_rate": 0.0009045801526717558, "loss": 3.0276, "step": 237 }, { "epoch": 0.02729044834307992, "grad_norm": 0.5638093487086294, "learning_rate": 0.0009083969465648855, "loss": 2.9997, "step": 238 }, { "epoch": 0.027405114092420593, "grad_norm": 0.5115169010872654, "learning_rate": 0.0009122137404580153, "loss": 3.0217, "step": 239 }, { "epoch": 0.027519779841761266, "grad_norm": 0.5373470870425937, "learning_rate": 0.000916030534351145, "loss": 3.0147, "step": 240 }, { "epoch": 0.02763444559110194, "grad_norm": 0.50389345283373, "learning_rate": 0.0009198473282442749, "loss": 3.0051, "step": 241 }, { "epoch": 0.02774911134044261, "grad_norm": 0.5686045089313948, "learning_rate": 0.0009236641221374046, "loss": 2.9977, "step": 242 }, { "epoch": 0.02786377708978328, "grad_norm": 0.5322094317111871, "learning_rate": 0.0009274809160305345, "loss": 2.9022, "step": 243 }, { "epoch": 0.027978442839123954, "grad_norm": 0.580557681506128, "learning_rate": 0.0009312977099236641, "loss": 2.9606, "step": 244 }, { "epoch": 0.028093108588464626, "grad_norm": 0.5819599288092461, "learning_rate": 0.0009351145038167939, "loss": 2.9676, "step": 245 }, { "epoch": 0.0282077743378053, "grad_norm": 0.5426836978641648, "learning_rate": 0.0009389312977099237, "loss": 2.9792, "step": 246 }, { "epoch": 0.02832244008714597, "grad_norm": 0.5434616035713983, "learning_rate": 0.0009427480916030535, "loss": 2.9292, "step": 247 }, { "epoch": 0.02843710583648664, "grad_norm": 0.4931182373221491, "learning_rate": 0.0009465648854961832, "loss": 3.0644, "step": 248 }, { "epoch": 0.028551771585827314, "grad_norm": 0.4828253688205587, "learning_rate": 0.000950381679389313, "loss": 2.9567, "step": 249 }, { "epoch": 0.028666437335167987, "grad_norm": 0.6042815397586974, "learning_rate": 0.0009541984732824428, "loss": 3.1015, "step": 250 }, { "epoch": 0.028781103084508656, "grad_norm": 0.5585071590393784, "learning_rate": 0.0009580152671755726, "loss": 2.9905, "step": 251 }, { "epoch": 0.02889576883384933, "grad_norm": 0.4350057851189633, "learning_rate": 0.0009618320610687023, "loss": 2.9323, "step": 252 }, { "epoch": 0.02901043458319, "grad_norm": 0.4673874881890266, "learning_rate": 0.0009656488549618321, "loss": 2.8471, "step": 253 }, { "epoch": 0.029125100332530674, "grad_norm": 0.5052753207433914, "learning_rate": 0.0009694656488549618, "loss": 3.0418, "step": 254 }, { "epoch": 0.029239766081871343, "grad_norm": 0.5336441114809883, "learning_rate": 0.0009732824427480917, "loss": 3.0095, "step": 255 }, { "epoch": 0.029354431831212016, "grad_norm": 0.5279004804456354, "learning_rate": 0.0009770992366412213, "loss": 2.9115, "step": 256 }, { "epoch": 0.02946909758055269, "grad_norm": 0.4661839015087628, "learning_rate": 0.0009809160305343512, "loss": 2.9732, "step": 257 }, { "epoch": 0.02958376332989336, "grad_norm": 0.48913604863304283, "learning_rate": 0.0009847328244274808, "loss": 3.1976, "step": 258 }, { "epoch": 0.029698429079234034, "grad_norm": 0.5222709393868871, "learning_rate": 0.0009885496183206107, "loss": 2.8806, "step": 259 }, { "epoch": 0.029813094828574704, "grad_norm": 0.5177827866377747, "learning_rate": 0.0009923664122137405, "loss": 3.0158, "step": 260 }, { "epoch": 0.029927760577915376, "grad_norm": 0.5031667027902577, "learning_rate": 0.0009961832061068704, "loss": 2.8104, "step": 261 }, { "epoch": 0.03004242632725605, "grad_norm": 0.5330034729502853, "learning_rate": 0.001, "loss": 2.9629, "step": 262 }, { "epoch": 0.030157092076596722, "grad_norm": 0.48611659249721517, "learning_rate": 0.0009999999655172654, "loss": 2.9476, "step": 263 }, { "epoch": 0.03027175782593739, "grad_norm": 0.45520990511820436, "learning_rate": 0.0009999998620690664, "loss": 2.9605, "step": 264 }, { "epoch": 0.030386423575278064, "grad_norm": 0.5216237841662955, "learning_rate": 0.0009999996896554175, "loss": 2.8796, "step": 265 }, { "epoch": 0.030501089324618737, "grad_norm": 0.5290070708388479, "learning_rate": 0.0009999994482763422, "loss": 3.0675, "step": 266 }, { "epoch": 0.03061575507395941, "grad_norm": 0.4912259087553911, "learning_rate": 0.0009999991379318737, "loss": 2.9481, "step": 267 }, { "epoch": 0.03073042082330008, "grad_norm": 0.44837601964173174, "learning_rate": 0.000999998758622055, "loss": 2.9035, "step": 268 }, { "epoch": 0.03084508657264075, "grad_norm": 0.4651387703391051, "learning_rate": 0.0009999983103469385, "loss": 2.9301, "step": 269 }, { "epoch": 0.030959752321981424, "grad_norm": 0.5068936213869853, "learning_rate": 0.0009999977931065857, "loss": 2.9327, "step": 270 }, { "epoch": 0.031074418071322097, "grad_norm": 0.45357650225532237, "learning_rate": 0.0009999972069010686, "loss": 2.9411, "step": 271 }, { "epoch": 0.031189083820662766, "grad_norm": 0.4890846938377679, "learning_rate": 0.0009999965517304673, "loss": 2.9763, "step": 272 }, { "epoch": 0.03130374957000344, "grad_norm": 0.4432967253380608, "learning_rate": 0.0009999958275948725, "loss": 2.8827, "step": 273 }, { "epoch": 0.03141841531934411, "grad_norm": 0.447116056762687, "learning_rate": 0.0009999950344943842, "loss": 2.8219, "step": 274 }, { "epoch": 0.031533081068684785, "grad_norm": 0.6067394722819113, "learning_rate": 0.0009999941724291115, "loss": 3.016, "step": 275 }, { "epoch": 0.03164774681802546, "grad_norm": 0.4932292446077849, "learning_rate": 0.0009999932413991737, "loss": 2.8823, "step": 276 }, { "epoch": 0.03176241256736613, "grad_norm": 0.48701283343057666, "learning_rate": 0.0009999922414046986, "loss": 3.0513, "step": 277 }, { "epoch": 0.0318770783167068, "grad_norm": 0.508434848624952, "learning_rate": 0.0009999911724458248, "loss": 3.0388, "step": 278 }, { "epoch": 0.03199174406604747, "grad_norm": 0.49753350320140627, "learning_rate": 0.0009999900345226994, "loss": 2.7854, "step": 279 }, { "epoch": 0.03210640981538814, "grad_norm": 0.4815388988859689, "learning_rate": 0.0009999888276354795, "loss": 2.9384, "step": 280 }, { "epoch": 0.032221075564728814, "grad_norm": 0.5176955770044522, "learning_rate": 0.0009999875517843315, "loss": 3.0144, "step": 281 }, { "epoch": 0.03233574131406949, "grad_norm": 0.5086189881569775, "learning_rate": 0.0009999862069694312, "loss": 2.8854, "step": 282 }, { "epoch": 0.03245040706341016, "grad_norm": 0.5146090525545141, "learning_rate": 0.0009999847931909645, "loss": 2.8299, "step": 283 }, { "epoch": 0.03256507281275083, "grad_norm": 0.4748148321450234, "learning_rate": 0.000999983310449126, "loss": 2.8589, "step": 284 }, { "epoch": 0.032679738562091505, "grad_norm": 0.5235880108167481, "learning_rate": 0.0009999817587441203, "loss": 2.8747, "step": 285 }, { "epoch": 0.03279440431143218, "grad_norm": 0.5311297710383596, "learning_rate": 0.0009999801380761615, "loss": 2.9554, "step": 286 }, { "epoch": 0.032909070060772844, "grad_norm": 0.4784552496511499, "learning_rate": 0.0009999784484454734, "loss": 2.9593, "step": 287 }, { "epoch": 0.033023735810113516, "grad_norm": 0.4802526061817244, "learning_rate": 0.0009999766898522884, "loss": 2.9045, "step": 288 }, { "epoch": 0.03313840155945419, "grad_norm": 0.5005836980588156, "learning_rate": 0.0009999748622968496, "loss": 2.8257, "step": 289 }, { "epoch": 0.03325306730879486, "grad_norm": 0.4650637395613939, "learning_rate": 0.000999972965779409, "loss": 2.8127, "step": 290 }, { "epoch": 0.033367733058135535, "grad_norm": 0.4668955532453089, "learning_rate": 0.000999971000300228, "loss": 2.8695, "step": 291 }, { "epoch": 0.03348239880747621, "grad_norm": 0.4438363509090404, "learning_rate": 0.000999968965859578, "loss": 2.8448, "step": 292 }, { "epoch": 0.03359706455681688, "grad_norm": 0.5172966659465619, "learning_rate": 0.0009999668624577395, "loss": 2.9425, "step": 293 }, { "epoch": 0.03371173030615755, "grad_norm": 0.5904051865520534, "learning_rate": 0.0009999646900950023, "loss": 2.9115, "step": 294 }, { "epoch": 0.033826396055498226, "grad_norm": 0.5539441014916824, "learning_rate": 0.0009999624487716666, "loss": 2.8551, "step": 295 }, { "epoch": 0.03394106180483889, "grad_norm": 0.5640287178272303, "learning_rate": 0.000999960138488041, "loss": 3.0011, "step": 296 }, { "epoch": 0.034055727554179564, "grad_norm": 0.5284553600918369, "learning_rate": 0.0009999577592444443, "loss": 2.8111, "step": 297 }, { "epoch": 0.03417039330352024, "grad_norm": 0.4823528426024632, "learning_rate": 0.000999955311041205, "loss": 2.9556, "step": 298 }, { "epoch": 0.03428505905286091, "grad_norm": 0.45317566462384484, "learning_rate": 0.0009999527938786606, "loss": 2.943, "step": 299 }, { "epoch": 0.03439972480220158, "grad_norm": 0.46716802143957065, "learning_rate": 0.0009999502077571581, "loss": 2.8747, "step": 300 }, { "epoch": 0.034514390551542255, "grad_norm": 0.4642349076752888, "learning_rate": 0.0009999475526770545, "loss": 2.9187, "step": 301 }, { "epoch": 0.03462905630088293, "grad_norm": 0.47640791292712437, "learning_rate": 0.0009999448286387158, "loss": 2.9906, "step": 302 }, { "epoch": 0.0347437220502236, "grad_norm": 0.4615406485094774, "learning_rate": 0.0009999420356425178, "loss": 2.9198, "step": 303 }, { "epoch": 0.034858387799564274, "grad_norm": 0.48921009340035604, "learning_rate": 0.0009999391736888457, "loss": 2.9495, "step": 304 }, { "epoch": 0.03497305354890494, "grad_norm": 0.4968585248746272, "learning_rate": 0.0009999362427780942, "loss": 2.9745, "step": 305 }, { "epoch": 0.03508771929824561, "grad_norm": 0.45600293295713923, "learning_rate": 0.0009999332429106679, "loss": 3.0254, "step": 306 }, { "epoch": 0.035202385047586285, "grad_norm": 0.4534698626554571, "learning_rate": 0.00099993017408698, "loss": 2.9239, "step": 307 }, { "epoch": 0.03531705079692696, "grad_norm": 0.46740119016259796, "learning_rate": 0.0009999270363074547, "loss": 2.9629, "step": 308 }, { "epoch": 0.03543171654626763, "grad_norm": 0.46923180876874787, "learning_rate": 0.0009999238295725237, "loss": 2.7397, "step": 309 }, { "epoch": 0.0355463822956083, "grad_norm": 0.46030227609025304, "learning_rate": 0.00099992055388263, "loss": 2.7823, "step": 310 }, { "epoch": 0.035661048044948976, "grad_norm": 0.43082212581012064, "learning_rate": 0.0009999172092382252, "loss": 2.791, "step": 311 }, { "epoch": 0.03577571379428965, "grad_norm": 0.5083007377152199, "learning_rate": 0.0009999137956397707, "loss": 2.8933, "step": 312 }, { "epoch": 0.035890379543630314, "grad_norm": 0.46552601907995467, "learning_rate": 0.0009999103130877373, "loss": 2.8624, "step": 313 }, { "epoch": 0.03600504529297099, "grad_norm": 0.5276439825595753, "learning_rate": 0.0009999067615826054, "loss": 2.6825, "step": 314 }, { "epoch": 0.03611971104231166, "grad_norm": 0.48976568062846787, "learning_rate": 0.000999903141124865, "loss": 2.8526, "step": 315 }, { "epoch": 0.03623437679165233, "grad_norm": 0.42398549089291626, "learning_rate": 0.000999899451715015, "loss": 2.9776, "step": 316 }, { "epoch": 0.036349042540993005, "grad_norm": 0.45757849328674405, "learning_rate": 0.0009998956933535649, "loss": 2.9982, "step": 317 }, { "epoch": 0.03646370829033368, "grad_norm": 0.4513227501855517, "learning_rate": 0.0009998918660410324, "loss": 2.9134, "step": 318 }, { "epoch": 0.03657837403967435, "grad_norm": 0.42758246075351874, "learning_rate": 0.000999887969777946, "loss": 2.8974, "step": 319 }, { "epoch": 0.036693039789015024, "grad_norm": 0.40678570238073214, "learning_rate": 0.000999884004564843, "loss": 3.0485, "step": 320 }, { "epoch": 0.036807705538355696, "grad_norm": 0.4639323755103496, "learning_rate": 0.00099987997040227, "loss": 2.9364, "step": 321 }, { "epoch": 0.03692237128769636, "grad_norm": 0.4239782751237232, "learning_rate": 0.0009998758672907838, "loss": 3.0521, "step": 322 }, { "epoch": 0.037037037037037035, "grad_norm": 0.4172037620755801, "learning_rate": 0.0009998716952309501, "loss": 2.9848, "step": 323 }, { "epoch": 0.03715170278637771, "grad_norm": 0.4647679514305454, "learning_rate": 0.0009998674542233445, "loss": 2.9383, "step": 324 }, { "epoch": 0.03726636853571838, "grad_norm": 0.420054295652772, "learning_rate": 0.000999863144268552, "loss": 3.002, "step": 325 }, { "epoch": 0.03738103428505905, "grad_norm": 0.4393499511327307, "learning_rate": 0.000999858765367167, "loss": 3.0245, "step": 326 }, { "epoch": 0.037495700034399726, "grad_norm": 0.49531529977310673, "learning_rate": 0.0009998543175197936, "loss": 3.022, "step": 327 }, { "epoch": 0.0376103657837404, "grad_norm": 0.4795320146107337, "learning_rate": 0.000999849800727045, "loss": 2.7953, "step": 328 }, { "epoch": 0.03772503153308107, "grad_norm": 0.4355446672099129, "learning_rate": 0.0009998452149895445, "loss": 2.8616, "step": 329 }, { "epoch": 0.03783969728242174, "grad_norm": 0.49477302813660073, "learning_rate": 0.0009998405603079243, "loss": 2.9453, "step": 330 }, { "epoch": 0.03795436303176241, "grad_norm": 0.5275794168993565, "learning_rate": 0.0009998358366828269, "loss": 2.7751, "step": 331 }, { "epoch": 0.03806902878110308, "grad_norm": 0.43616854539262845, "learning_rate": 0.0009998310441149034, "loss": 2.9195, "step": 332 }, { "epoch": 0.038183694530443756, "grad_norm": 0.44370510961710824, "learning_rate": 0.000999826182604815, "loss": 2.8261, "step": 333 }, { "epoch": 0.03829836027978443, "grad_norm": 0.43761133706578115, "learning_rate": 0.0009998212521532325, "loss": 2.8425, "step": 334 }, { "epoch": 0.0384130260291251, "grad_norm": 0.4345452231213624, "learning_rate": 0.0009998162527608354, "loss": 2.7781, "step": 335 }, { "epoch": 0.038527691778465774, "grad_norm": 0.4611564265771901, "learning_rate": 0.0009998111844283137, "loss": 2.8982, "step": 336 }, { "epoch": 0.03864235752780645, "grad_norm": 0.40749071096569695, "learning_rate": 0.0009998060471563665, "loss": 2.9036, "step": 337 }, { "epoch": 0.03875702327714712, "grad_norm": 0.43280865612081587, "learning_rate": 0.0009998008409457023, "loss": 2.902, "step": 338 }, { "epoch": 0.038871689026487785, "grad_norm": 0.46426390733591344, "learning_rate": 0.000999795565797039, "loss": 2.81, "step": 339 }, { "epoch": 0.03898635477582846, "grad_norm": 0.4420370383407498, "learning_rate": 0.0009997902217111045, "loss": 3.0037, "step": 340 }, { "epoch": 0.03910102052516913, "grad_norm": 0.46816638107870434, "learning_rate": 0.0009997848086886357, "loss": 2.8941, "step": 341 }, { "epoch": 0.0392156862745098, "grad_norm": 0.4860481385647438, "learning_rate": 0.0009997793267303792, "loss": 2.8446, "step": 342 }, { "epoch": 0.039330352023850476, "grad_norm": 0.445966586786985, "learning_rate": 0.0009997737758370914, "loss": 2.8013, "step": 343 }, { "epoch": 0.03944501777319115, "grad_norm": 0.42505493187570276, "learning_rate": 0.0009997681560095378, "loss": 2.7778, "step": 344 }, { "epoch": 0.03955968352253182, "grad_norm": 0.48039411990833053, "learning_rate": 0.0009997624672484933, "loss": 2.8519, "step": 345 }, { "epoch": 0.039674349271872494, "grad_norm": 0.4978776779172268, "learning_rate": 0.0009997567095547432, "loss": 2.8413, "step": 346 }, { "epoch": 0.03978901502121316, "grad_norm": 0.4672321347478045, "learning_rate": 0.000999750882929081, "loss": 3.0195, "step": 347 }, { "epoch": 0.03990368077055383, "grad_norm": 0.44415121207344815, "learning_rate": 0.0009997449873723105, "loss": 2.869, "step": 348 }, { "epoch": 0.040018346519894506, "grad_norm": 0.4692753216934293, "learning_rate": 0.000999739022885245, "loss": 2.8549, "step": 349 }, { "epoch": 0.04013301226923518, "grad_norm": 0.45523093619831717, "learning_rate": 0.0009997329894687072, "loss": 2.9351, "step": 350 }, { "epoch": 0.04024767801857585, "grad_norm": 0.3899975629615919, "learning_rate": 0.0009997268871235296, "loss": 2.8285, "step": 351 }, { "epoch": 0.040362343767916524, "grad_norm": 0.44822064960942354, "learning_rate": 0.0009997207158505533, "loss": 2.8637, "step": 352 }, { "epoch": 0.0404770095172572, "grad_norm": 0.46489344947093303, "learning_rate": 0.0009997144756506298, "loss": 2.8658, "step": 353 }, { "epoch": 0.04059167526659787, "grad_norm": 0.41958800520627165, "learning_rate": 0.00099970816652462, "loss": 2.9923, "step": 354 }, { "epoch": 0.04070634101593854, "grad_norm": 0.4452179584088451, "learning_rate": 0.0009997017884733938, "loss": 2.8469, "step": 355 }, { "epoch": 0.04082100676527921, "grad_norm": 0.42988254427238115, "learning_rate": 0.000999695341497831, "loss": 2.9169, "step": 356 }, { "epoch": 0.04093567251461988, "grad_norm": 0.3976714124486464, "learning_rate": 0.0009996888255988207, "loss": 2.9287, "step": 357 }, { "epoch": 0.041050338263960554, "grad_norm": 0.3846609864616555, "learning_rate": 0.0009996822407772623, "loss": 2.9095, "step": 358 }, { "epoch": 0.041165004013301226, "grad_norm": 0.41877958446431957, "learning_rate": 0.0009996755870340633, "loss": 2.901, "step": 359 }, { "epoch": 0.0412796697626419, "grad_norm": 0.3791731881821083, "learning_rate": 0.0009996688643701419, "loss": 2.7776, "step": 360 }, { "epoch": 0.04139433551198257, "grad_norm": 0.38121545020482545, "learning_rate": 0.0009996620727864252, "loss": 2.9549, "step": 361 }, { "epoch": 0.041509001261323245, "grad_norm": 0.40155140891613317, "learning_rate": 0.00099965521228385, "loss": 2.8994, "step": 362 }, { "epoch": 0.04162366701066392, "grad_norm": 0.43125441718046403, "learning_rate": 0.0009996482828633624, "loss": 2.7598, "step": 363 }, { "epoch": 0.04173833276000459, "grad_norm": 0.4770770778782653, "learning_rate": 0.0009996412845259183, "loss": 2.9975, "step": 364 }, { "epoch": 0.041852998509345256, "grad_norm": 0.49995516941142554, "learning_rate": 0.0009996342172724833, "loss": 2.8628, "step": 365 }, { "epoch": 0.04196766425868593, "grad_norm": 0.41779998761126896, "learning_rate": 0.0009996270811040318, "loss": 2.724, "step": 366 }, { "epoch": 0.0420823300080266, "grad_norm": 0.42749704001470756, "learning_rate": 0.0009996198760215483, "loss": 2.8689, "step": 367 }, { "epoch": 0.042196995757367274, "grad_norm": 0.48617886389833176, "learning_rate": 0.0009996126020260262, "loss": 2.9577, "step": 368 }, { "epoch": 0.04231166150670795, "grad_norm": 0.4163277868110168, "learning_rate": 0.0009996052591184695, "loss": 2.8136, "step": 369 }, { "epoch": 0.04242632725604862, "grad_norm": 0.44050122649801227, "learning_rate": 0.0009995978472998905, "loss": 2.9061, "step": 370 }, { "epoch": 0.04254099300538929, "grad_norm": 0.4096153889161094, "learning_rate": 0.0009995903665713118, "loss": 2.7944, "step": 371 }, { "epoch": 0.042655658754729965, "grad_norm": 0.44912392018468306, "learning_rate": 0.000999582816933765, "loss": 2.8208, "step": 372 }, { "epoch": 0.04277032450407063, "grad_norm": 0.4195373920266093, "learning_rate": 0.0009995751983882914, "loss": 2.8087, "step": 373 }, { "epoch": 0.042884990253411304, "grad_norm": 0.4116148378613393, "learning_rate": 0.000999567510935942, "loss": 2.8351, "step": 374 }, { "epoch": 0.042999656002751976, "grad_norm": 0.37984420580150025, "learning_rate": 0.0009995597545777771, "loss": 2.8685, "step": 375 }, { "epoch": 0.04311432175209265, "grad_norm": 0.4365060876046911, "learning_rate": 0.0009995519293148666, "loss": 2.793, "step": 376 }, { "epoch": 0.04322898750143332, "grad_norm": 0.5404209586211132, "learning_rate": 0.0009995440351482897, "loss": 2.946, "step": 377 }, { "epoch": 0.043343653250773995, "grad_norm": 0.49822471681227515, "learning_rate": 0.0009995360720791353, "loss": 2.8649, "step": 378 }, { "epoch": 0.04345831900011467, "grad_norm": 0.4123137207078488, "learning_rate": 0.000999528040108502, "loss": 2.8452, "step": 379 }, { "epoch": 0.04357298474945534, "grad_norm": 0.4109075077833616, "learning_rate": 0.0009995199392374972, "loss": 2.7985, "step": 380 }, { "epoch": 0.04368765049879601, "grad_norm": 0.47602620084294067, "learning_rate": 0.0009995117694672386, "loss": 2.826, "step": 381 }, { "epoch": 0.04380231624813668, "grad_norm": 0.42159603089235476, "learning_rate": 0.000999503530798853, "loss": 2.8964, "step": 382 }, { "epoch": 0.04391698199747735, "grad_norm": 0.4439458980035615, "learning_rate": 0.0009994952232334766, "loss": 2.8777, "step": 383 }, { "epoch": 0.044031647746818024, "grad_norm": 0.44211484867618567, "learning_rate": 0.0009994868467722556, "loss": 2.9517, "step": 384 }, { "epoch": 0.0441463134961587, "grad_norm": 0.4181866357033534, "learning_rate": 0.0009994784014163449, "loss": 2.9797, "step": 385 }, { "epoch": 0.04426097924549937, "grad_norm": 0.446295654114178, "learning_rate": 0.0009994698871669098, "loss": 3.0006, "step": 386 }, { "epoch": 0.04437564499484004, "grad_norm": 0.42215794213287927, "learning_rate": 0.0009994613040251246, "loss": 2.9112, "step": 387 }, { "epoch": 0.044490310744180715, "grad_norm": 0.42651543776486645, "learning_rate": 0.000999452651992173, "loss": 2.8749, "step": 388 }, { "epoch": 0.04460497649352139, "grad_norm": 0.42789857319007374, "learning_rate": 0.0009994439310692486, "loss": 2.7857, "step": 389 }, { "epoch": 0.044719642242862054, "grad_norm": 0.42520017533306, "learning_rate": 0.0009994351412575542, "loss": 2.7778, "step": 390 }, { "epoch": 0.04483430799220273, "grad_norm": 0.4875041590092833, "learning_rate": 0.000999426282558302, "loss": 2.8615, "step": 391 }, { "epoch": 0.0449489737415434, "grad_norm": 0.4403588542883991, "learning_rate": 0.000999417354972714, "loss": 2.8771, "step": 392 }, { "epoch": 0.04506363949088407, "grad_norm": 0.42578332900390736, "learning_rate": 0.000999408358502022, "loss": 2.8174, "step": 393 }, { "epoch": 0.045178305240224745, "grad_norm": 0.4320011402827834, "learning_rate": 0.0009993992931474661, "loss": 3.0187, "step": 394 }, { "epoch": 0.04529297098956542, "grad_norm": 0.44257499612757956, "learning_rate": 0.0009993901589102974, "loss": 3.005, "step": 395 }, { "epoch": 0.04540763673890609, "grad_norm": 0.48557089809549814, "learning_rate": 0.0009993809557917754, "loss": 2.7653, "step": 396 }, { "epoch": 0.04552230248824676, "grad_norm": 0.4214302359558213, "learning_rate": 0.0009993716837931696, "loss": 3.0089, "step": 397 }, { "epoch": 0.045636968237587436, "grad_norm": 0.404193875906678, "learning_rate": 0.000999362342915759, "loss": 2.8322, "step": 398 }, { "epoch": 0.0457516339869281, "grad_norm": 0.40348708265909317, "learning_rate": 0.0009993529331608318, "loss": 2.7964, "step": 399 }, { "epoch": 0.045866299736268774, "grad_norm": 0.46358188465636196, "learning_rate": 0.0009993434545296862, "loss": 2.8539, "step": 400 }, { "epoch": 0.04598096548560945, "grad_norm": 0.387197190688247, "learning_rate": 0.0009993339070236292, "loss": 2.8845, "step": 401 }, { "epoch": 0.04609563123495012, "grad_norm": 0.44260755914743327, "learning_rate": 0.000999324290643978, "loss": 2.9898, "step": 402 }, { "epoch": 0.04621029698429079, "grad_norm": 0.3849339441341739, "learning_rate": 0.0009993146053920588, "loss": 2.8997, "step": 403 }, { "epoch": 0.046324962733631465, "grad_norm": 0.39529879885162666, "learning_rate": 0.0009993048512692078, "loss": 2.8602, "step": 404 }, { "epoch": 0.04643962848297214, "grad_norm": 0.4196390379342417, "learning_rate": 0.00099929502827677, "loss": 2.9231, "step": 405 }, { "epoch": 0.04655429423231281, "grad_norm": 0.4261818240672346, "learning_rate": 0.0009992851364161006, "loss": 2.7321, "step": 406 }, { "epoch": 0.04666895998165348, "grad_norm": 0.4151510793545912, "learning_rate": 0.0009992751756885637, "loss": 2.9788, "step": 407 }, { "epoch": 0.04678362573099415, "grad_norm": 0.3768094973952089, "learning_rate": 0.0009992651460955335, "loss": 2.8075, "step": 408 }, { "epoch": 0.04689829148033482, "grad_norm": 0.4258341047280142, "learning_rate": 0.0009992550476383931, "loss": 2.8679, "step": 409 }, { "epoch": 0.047012957229675495, "grad_norm": 0.4350418947825548, "learning_rate": 0.0009992448803185356, "loss": 2.8282, "step": 410 }, { "epoch": 0.04712762297901617, "grad_norm": 0.4041485879286882, "learning_rate": 0.0009992346441373633, "loss": 2.9333, "step": 411 }, { "epoch": 0.04724228872835684, "grad_norm": 0.4678578748104447, "learning_rate": 0.0009992243390962883, "loss": 2.8841, "step": 412 }, { "epoch": 0.04735695447769751, "grad_norm": 0.41091702514489364, "learning_rate": 0.0009992139651967319, "loss": 2.7504, "step": 413 }, { "epoch": 0.047471620227038186, "grad_norm": 0.49033793108209633, "learning_rate": 0.0009992035224401245, "loss": 2.8374, "step": 414 }, { "epoch": 0.04758628597637886, "grad_norm": 0.42603739472570434, "learning_rate": 0.0009991930108279074, "loss": 2.7514, "step": 415 }, { "epoch": 0.047700951725719525, "grad_norm": 0.38670909999135045, "learning_rate": 0.0009991824303615293, "loss": 2.7509, "step": 416 }, { "epoch": 0.0478156174750602, "grad_norm": 0.35885819034707067, "learning_rate": 0.0009991717810424506, "loss": 2.8272, "step": 417 }, { "epoch": 0.04793028322440087, "grad_norm": 0.39616114442120487, "learning_rate": 0.0009991610628721397, "loss": 2.8344, "step": 418 }, { "epoch": 0.04804494897374154, "grad_norm": 0.44582603341367083, "learning_rate": 0.000999150275852075, "loss": 2.8899, "step": 419 }, { "epoch": 0.048159614723082216, "grad_norm": 0.3846685537111089, "learning_rate": 0.0009991394199837444, "loss": 2.8781, "step": 420 }, { "epoch": 0.04827428047242289, "grad_norm": 0.4568480990713995, "learning_rate": 0.0009991284952686455, "loss": 2.8454, "step": 421 }, { "epoch": 0.04838894622176356, "grad_norm": 0.41497904792592516, "learning_rate": 0.0009991175017082848, "loss": 2.8815, "step": 422 }, { "epoch": 0.048503611971104234, "grad_norm": 0.3988258676788166, "learning_rate": 0.0009991064393041786, "loss": 2.7265, "step": 423 }, { "epoch": 0.04861827772044491, "grad_norm": 0.438444029795652, "learning_rate": 0.0009990953080578533, "loss": 2.9248, "step": 424 }, { "epoch": 0.04873294346978557, "grad_norm": 0.45699647215205313, "learning_rate": 0.0009990841079708435, "loss": 2.7777, "step": 425 }, { "epoch": 0.048847609219126245, "grad_norm": 0.4033532254636485, "learning_rate": 0.0009990728390446946, "loss": 2.7618, "step": 426 }, { "epoch": 0.04896227496846692, "grad_norm": 0.463489201897885, "learning_rate": 0.0009990615012809608, "loss": 2.8633, "step": 427 }, { "epoch": 0.04907694071780759, "grad_norm": 0.4387936549724385, "learning_rate": 0.0009990500946812058, "loss": 2.7559, "step": 428 }, { "epoch": 0.04919160646714826, "grad_norm": 0.4006095893834648, "learning_rate": 0.000999038619247003, "loss": 2.8086, "step": 429 }, { "epoch": 0.049306272216488936, "grad_norm": 0.38101367236228806, "learning_rate": 0.0009990270749799352, "loss": 2.7024, "step": 430 }, { "epoch": 0.04942093796582961, "grad_norm": 0.409644108681533, "learning_rate": 0.0009990154618815948, "loss": 2.8839, "step": 431 }, { "epoch": 0.04953560371517028, "grad_norm": 0.40140101284102714, "learning_rate": 0.0009990037799535833, "loss": 2.9054, "step": 432 }, { "epoch": 0.04965026946451095, "grad_norm": 0.36254358675155723, "learning_rate": 0.0009989920291975124, "loss": 2.713, "step": 433 }, { "epoch": 0.04976493521385162, "grad_norm": 0.3705019593703962, "learning_rate": 0.0009989802096150029, "loss": 2.8037, "step": 434 }, { "epoch": 0.04987960096319229, "grad_norm": 0.39564835396997816, "learning_rate": 0.0009989683212076848, "loss": 2.8976, "step": 435 }, { "epoch": 0.049994266712532966, "grad_norm": 0.3913977239078428, "learning_rate": 0.0009989563639771978, "loss": 2.8678, "step": 436 }, { "epoch": 0.05010893246187364, "grad_norm": 0.4585408231388172, "learning_rate": 0.0009989443379251916, "loss": 2.8011, "step": 437 }, { "epoch": 0.05022359821121431, "grad_norm": 0.42225842535929436, "learning_rate": 0.0009989322430533245, "loss": 2.8809, "step": 438 }, { "epoch": 0.050338263960554984, "grad_norm": 0.41520748540270125, "learning_rate": 0.0009989200793632652, "loss": 2.731, "step": 439 }, { "epoch": 0.05045292970989566, "grad_norm": 0.4220304138402917, "learning_rate": 0.0009989078468566912, "loss": 2.7616, "step": 440 }, { "epoch": 0.05056759545923633, "grad_norm": 0.41827741604696245, "learning_rate": 0.0009988955455352898, "loss": 2.7708, "step": 441 }, { "epoch": 0.050682261208576995, "grad_norm": 0.4129874963149311, "learning_rate": 0.0009988831754007576, "loss": 2.7688, "step": 442 }, { "epoch": 0.05079692695791767, "grad_norm": 0.4389719568545934, "learning_rate": 0.000998870736454801, "loss": 2.7847, "step": 443 }, { "epoch": 0.05091159270725834, "grad_norm": 0.4137136853137206, "learning_rate": 0.0009988582286991356, "loss": 2.8585, "step": 444 }, { "epoch": 0.051026258456599013, "grad_norm": 0.3605568937049715, "learning_rate": 0.0009988456521354868, "loss": 2.9004, "step": 445 }, { "epoch": 0.051140924205939686, "grad_norm": 0.3610587028048033, "learning_rate": 0.000998833006765589, "loss": 2.7843, "step": 446 }, { "epoch": 0.05125558995528036, "grad_norm": 0.38311042087079367, "learning_rate": 0.0009988202925911864, "loss": 2.9845, "step": 447 }, { "epoch": 0.05137025570462103, "grad_norm": 0.3751968234497866, "learning_rate": 0.000998807509614033, "loss": 2.9177, "step": 448 }, { "epoch": 0.051484921453961704, "grad_norm": 0.382024099924426, "learning_rate": 0.0009987946578358918, "loss": 2.8338, "step": 449 }, { "epoch": 0.05159958720330237, "grad_norm": 0.46865478196754445, "learning_rate": 0.0009987817372585355, "loss": 2.9415, "step": 450 }, { "epoch": 0.05171425295264304, "grad_norm": 0.3941759802390462, "learning_rate": 0.000998768747883746, "loss": 2.6885, "step": 451 }, { "epoch": 0.051828918701983716, "grad_norm": 0.3712492910534536, "learning_rate": 0.0009987556897133151, "loss": 2.8854, "step": 452 }, { "epoch": 0.05194358445132439, "grad_norm": 0.409512039521161, "learning_rate": 0.0009987425627490441, "loss": 2.8651, "step": 453 }, { "epoch": 0.05205825020066506, "grad_norm": 0.3636573505665796, "learning_rate": 0.0009987293669927436, "loss": 2.9018, "step": 454 }, { "epoch": 0.052172915950005734, "grad_norm": 0.3887738106124062, "learning_rate": 0.0009987161024462333, "loss": 2.9214, "step": 455 }, { "epoch": 0.05228758169934641, "grad_norm": 0.44779193511056586, "learning_rate": 0.0009987027691113432, "loss": 2.8703, "step": 456 }, { "epoch": 0.05240224744868708, "grad_norm": 0.39891917558838513, "learning_rate": 0.0009986893669899123, "loss": 2.9884, "step": 457 }, { "epoch": 0.05251691319802775, "grad_norm": 0.4068790315684028, "learning_rate": 0.0009986758960837889, "loss": 2.7961, "step": 458 }, { "epoch": 0.05263157894736842, "grad_norm": 0.3717035465244831, "learning_rate": 0.0009986623563948314, "loss": 2.6558, "step": 459 }, { "epoch": 0.05274624469670909, "grad_norm": 0.4175344791804306, "learning_rate": 0.000998648747924907, "loss": 2.8512, "step": 460 }, { "epoch": 0.052860910446049764, "grad_norm": 0.33997947115432336, "learning_rate": 0.0009986350706758934, "loss": 2.7439, "step": 461 }, { "epoch": 0.052975576195390436, "grad_norm": 0.43599493827369423, "learning_rate": 0.0009986213246496762, "loss": 2.7942, "step": 462 }, { "epoch": 0.05309024194473111, "grad_norm": 0.3775569738548157, "learning_rate": 0.000998607509848152, "loss": 2.8576, "step": 463 }, { "epoch": 0.05320490769407178, "grad_norm": 0.3886838855077135, "learning_rate": 0.0009985936262732263, "loss": 2.7963, "step": 464 }, { "epoch": 0.053319573443412455, "grad_norm": 0.3608050283312012, "learning_rate": 0.0009985796739268138, "loss": 2.7426, "step": 465 }, { "epoch": 0.05343423919275313, "grad_norm": 0.37412542057091924, "learning_rate": 0.000998565652810839, "loss": 2.7439, "step": 466 }, { "epoch": 0.05354890494209379, "grad_norm": 0.4087290695052961, "learning_rate": 0.000998551562927236, "loss": 2.8025, "step": 467 }, { "epoch": 0.053663570691434466, "grad_norm": 0.4484725980508869, "learning_rate": 0.000998537404277948, "loss": 2.9098, "step": 468 }, { "epoch": 0.05377823644077514, "grad_norm": 0.452143256111978, "learning_rate": 0.0009985231768649284, "loss": 2.8642, "step": 469 }, { "epoch": 0.05389290219011581, "grad_norm": 0.39270105614952217, "learning_rate": 0.000998508880690139, "loss": 2.8272, "step": 470 }, { "epoch": 0.054007567939456484, "grad_norm": 0.40795197227106716, "learning_rate": 0.000998494515755552, "loss": 2.9005, "step": 471 }, { "epoch": 0.05412223368879716, "grad_norm": 0.3821213386554317, "learning_rate": 0.0009984800820631488, "loss": 2.8737, "step": 472 }, { "epoch": 0.05423689943813783, "grad_norm": 0.38450583254639414, "learning_rate": 0.0009984655796149201, "loss": 2.8181, "step": 473 }, { "epoch": 0.0543515651874785, "grad_norm": 0.4122634233673069, "learning_rate": 0.0009984510084128661, "loss": 2.9515, "step": 474 }, { "epoch": 0.054466230936819175, "grad_norm": 0.42214828000030447, "learning_rate": 0.0009984363684589972, "loss": 2.8553, "step": 475 }, { "epoch": 0.05458089668615984, "grad_norm": 0.4115019028951167, "learning_rate": 0.0009984216597553322, "loss": 2.9232, "step": 476 }, { "epoch": 0.054695562435500514, "grad_norm": 0.36757314119084644, "learning_rate": 0.0009984068823039, "loss": 2.8144, "step": 477 }, { "epoch": 0.054810228184841187, "grad_norm": 0.3560515641845487, "learning_rate": 0.0009983920361067388, "loss": 2.7854, "step": 478 }, { "epoch": 0.05492489393418186, "grad_norm": 0.3825011171831334, "learning_rate": 0.0009983771211658965, "loss": 2.8208, "step": 479 }, { "epoch": 0.05503955968352253, "grad_norm": 0.4021136133809867, "learning_rate": 0.0009983621374834303, "loss": 2.9428, "step": 480 }, { "epoch": 0.055154225432863205, "grad_norm": 0.3948708372571274, "learning_rate": 0.0009983470850614068, "loss": 2.824, "step": 481 }, { "epoch": 0.05526889118220388, "grad_norm": 0.36096627443634055, "learning_rate": 0.0009983319639019024, "loss": 2.7043, "step": 482 }, { "epoch": 0.05538355693154455, "grad_norm": 0.4279004261181492, "learning_rate": 0.0009983167740070025, "loss": 2.8255, "step": 483 }, { "epoch": 0.05549822268088522, "grad_norm": 0.3964766765510947, "learning_rate": 0.0009983015153788026, "loss": 2.881, "step": 484 }, { "epoch": 0.05561288843022589, "grad_norm": 0.3879824586196614, "learning_rate": 0.000998286188019407, "loss": 2.8855, "step": 485 }, { "epoch": 0.05572755417956656, "grad_norm": 0.4107175714269911, "learning_rate": 0.00099827079193093, "loss": 2.8552, "step": 486 }, { "epoch": 0.055842219928907234, "grad_norm": 0.37524531252432775, "learning_rate": 0.0009982553271154953, "loss": 2.926, "step": 487 }, { "epoch": 0.05595688567824791, "grad_norm": 0.3487945682326408, "learning_rate": 0.0009982397935752356, "loss": 2.8472, "step": 488 }, { "epoch": 0.05607155142758858, "grad_norm": 0.36200550892192385, "learning_rate": 0.0009982241913122937, "loss": 2.9161, "step": 489 }, { "epoch": 0.05618621717692925, "grad_norm": 0.38264745729569327, "learning_rate": 0.000998208520328822, "loss": 2.9392, "step": 490 }, { "epoch": 0.056300882926269925, "grad_norm": 0.3732956380777419, "learning_rate": 0.0009981927806269812, "loss": 2.8577, "step": 491 }, { "epoch": 0.0564155486756106, "grad_norm": 0.3737453493778973, "learning_rate": 0.0009981769722089428, "loss": 2.8192, "step": 492 }, { "epoch": 0.056530214424951264, "grad_norm": 0.3702471871343963, "learning_rate": 0.0009981610950768873, "loss": 2.8721, "step": 493 }, { "epoch": 0.05664488017429194, "grad_norm": 0.39285155488821855, "learning_rate": 0.0009981451492330046, "loss": 2.9218, "step": 494 }, { "epoch": 0.05675954592363261, "grad_norm": 0.4030256747836687, "learning_rate": 0.000998129134679494, "loss": 2.8364, "step": 495 }, { "epoch": 0.05687421167297328, "grad_norm": 0.3694952677987905, "learning_rate": 0.0009981130514185646, "loss": 2.8109, "step": 496 }, { "epoch": 0.056988877422313955, "grad_norm": 0.38313941332829515, "learning_rate": 0.0009980968994524344, "loss": 2.654, "step": 497 }, { "epoch": 0.05710354317165463, "grad_norm": 0.38013556912390845, "learning_rate": 0.0009980806787833316, "loss": 2.7987, "step": 498 }, { "epoch": 0.0572182089209953, "grad_norm": 0.33783275117854766, "learning_rate": 0.0009980643894134935, "loss": 2.886, "step": 499 }, { "epoch": 0.05733287467033597, "grad_norm": 0.3509621952013636, "learning_rate": 0.000998048031345167, "loss": 2.8833, "step": 500 }, { "epoch": 0.057447540419676646, "grad_norm": 0.3874581582412629, "learning_rate": 0.0009980316045806082, "loss": 2.9003, "step": 501 }, { "epoch": 0.05756220616901731, "grad_norm": 0.35705766155032925, "learning_rate": 0.0009980151091220826, "loss": 2.6738, "step": 502 }, { "epoch": 0.057676871918357984, "grad_norm": 0.4416859258856828, "learning_rate": 0.000997998544971866, "loss": 2.9362, "step": 503 }, { "epoch": 0.05779153766769866, "grad_norm": 0.38238318547088923, "learning_rate": 0.0009979819121322426, "loss": 2.787, "step": 504 }, { "epoch": 0.05790620341703933, "grad_norm": 0.3933629281612558, "learning_rate": 0.000997965210605507, "loss": 2.9104, "step": 505 }, { "epoch": 0.05802086916638, "grad_norm": 0.4020282593835261, "learning_rate": 0.0009979484403939626, "loss": 2.9175, "step": 506 }, { "epoch": 0.058135534915720675, "grad_norm": 0.38587767254592736, "learning_rate": 0.0009979316014999226, "loss": 2.8248, "step": 507 }, { "epoch": 0.05825020066506135, "grad_norm": 0.45730926007253686, "learning_rate": 0.0009979146939257098, "loss": 2.7858, "step": 508 }, { "epoch": 0.05836486641440202, "grad_norm": 0.3719279516401054, "learning_rate": 0.000997897717673656, "loss": 2.8946, "step": 509 }, { "epoch": 0.05847953216374269, "grad_norm": 0.41184037116914446, "learning_rate": 0.0009978806727461028, "loss": 2.8045, "step": 510 }, { "epoch": 0.05859419791308336, "grad_norm": 0.38402455958857123, "learning_rate": 0.000997863559145401, "loss": 2.8049, "step": 511 }, { "epoch": 0.05870886366242403, "grad_norm": 0.3445610316443261, "learning_rate": 0.0009978463768739118, "loss": 2.9535, "step": 512 }, { "epoch": 0.058823529411764705, "grad_norm": 0.3241117104974591, "learning_rate": 0.0009978291259340045, "loss": 2.7859, "step": 513 }, { "epoch": 0.05893819516110538, "grad_norm": 0.3964773035946509, "learning_rate": 0.0009978118063280587, "loss": 2.8495, "step": 514 }, { "epoch": 0.05905286091044605, "grad_norm": 0.3819741328822643, "learning_rate": 0.0009977944180584637, "loss": 2.8638, "step": 515 }, { "epoch": 0.05916752665978672, "grad_norm": 0.38650938047527483, "learning_rate": 0.0009977769611276173, "loss": 2.9108, "step": 516 }, { "epoch": 0.059282192409127396, "grad_norm": 0.3972840160880589, "learning_rate": 0.0009977594355379275, "loss": 2.721, "step": 517 }, { "epoch": 0.05939685815846807, "grad_norm": 0.35831739139778357, "learning_rate": 0.000997741841291812, "loss": 2.7214, "step": 518 }, { "epoch": 0.059511523907808735, "grad_norm": 0.36764254550332665, "learning_rate": 0.000997724178391697, "loss": 3.0099, "step": 519 }, { "epoch": 0.05962618965714941, "grad_norm": 0.3445096992163432, "learning_rate": 0.0009977064468400193, "loss": 2.7609, "step": 520 }, { "epoch": 0.05974085540649008, "grad_norm": 0.3556029284571166, "learning_rate": 0.0009976886466392244, "loss": 2.7693, "step": 521 }, { "epoch": 0.05985552115583075, "grad_norm": 0.36099874973644624, "learning_rate": 0.0009976707777917676, "loss": 2.9042, "step": 522 }, { "epoch": 0.059970186905171426, "grad_norm": 0.40264356947347824, "learning_rate": 0.0009976528403001133, "loss": 2.7669, "step": 523 }, { "epoch": 0.0600848526545121, "grad_norm": 0.396778556485354, "learning_rate": 0.0009976348341667358, "loss": 2.8934, "step": 524 }, { "epoch": 0.06019951840385277, "grad_norm": 0.39203694343139134, "learning_rate": 0.0009976167593941188, "loss": 2.8433, "step": 525 }, { "epoch": 0.060314184153193444, "grad_norm": 0.38477339157637297, "learning_rate": 0.000997598615984755, "loss": 2.8111, "step": 526 }, { "epoch": 0.06042884990253411, "grad_norm": 0.41357486699162727, "learning_rate": 0.0009975804039411475, "loss": 2.7638, "step": 527 }, { "epoch": 0.06054351565187478, "grad_norm": 0.39748890533156267, "learning_rate": 0.0009975621232658082, "loss": 2.9401, "step": 528 }, { "epoch": 0.060658181401215455, "grad_norm": 0.3527248762851314, "learning_rate": 0.000997543773961258, "loss": 2.8141, "step": 529 }, { "epoch": 0.06077284715055613, "grad_norm": 0.39835240779303555, "learning_rate": 0.0009975253560300283, "loss": 2.7666, "step": 530 }, { "epoch": 0.0608875128998968, "grad_norm": 0.38953294378848635, "learning_rate": 0.0009975068694746596, "loss": 2.869, "step": 531 }, { "epoch": 0.06100217864923747, "grad_norm": 0.37673164372981777, "learning_rate": 0.0009974883142977015, "loss": 2.9162, "step": 532 }, { "epoch": 0.061116844398578146, "grad_norm": 0.3909901055648809, "learning_rate": 0.0009974696905017135, "loss": 2.8557, "step": 533 }, { "epoch": 0.06123151014791882, "grad_norm": 0.36111718041874913, "learning_rate": 0.0009974509980892642, "loss": 2.8222, "step": 534 }, { "epoch": 0.06134617589725949, "grad_norm": 0.332632292785617, "learning_rate": 0.0009974322370629321, "loss": 2.8032, "step": 535 }, { "epoch": 0.06146084164660016, "grad_norm": 0.4407896032526114, "learning_rate": 0.000997413407425305, "loss": 2.8223, "step": 536 }, { "epoch": 0.06157550739594083, "grad_norm": 0.4057388442704425, "learning_rate": 0.0009973945091789796, "loss": 2.6936, "step": 537 }, { "epoch": 0.0616901731452815, "grad_norm": 0.38759283542501144, "learning_rate": 0.000997375542326563, "loss": 2.9768, "step": 538 }, { "epoch": 0.061804838894622176, "grad_norm": 0.40891702468704066, "learning_rate": 0.0009973565068706711, "loss": 2.9408, "step": 539 }, { "epoch": 0.06191950464396285, "grad_norm": 0.3818812353296347, "learning_rate": 0.0009973374028139296, "loss": 2.7667, "step": 540 }, { "epoch": 0.06203417039330352, "grad_norm": 0.3857912368951548, "learning_rate": 0.0009973182301589736, "loss": 2.8785, "step": 541 }, { "epoch": 0.062148836142644194, "grad_norm": 0.3424298834515133, "learning_rate": 0.0009972989889084473, "loss": 2.803, "step": 542 }, { "epoch": 0.06226350189198487, "grad_norm": 0.3703823241676832, "learning_rate": 0.000997279679065005, "loss": 2.8327, "step": 543 }, { "epoch": 0.06237816764132553, "grad_norm": 0.42932212049176, "learning_rate": 0.0009972603006313098, "loss": 2.9458, "step": 544 }, { "epoch": 0.062492833390666205, "grad_norm": 0.3499043126889513, "learning_rate": 0.000997240853610035, "loss": 2.9083, "step": 545 }, { "epoch": 0.06260749914000688, "grad_norm": 0.39589964910489645, "learning_rate": 0.0009972213380038627, "loss": 2.7476, "step": 546 }, { "epoch": 0.06272216488934755, "grad_norm": 0.3498146886720316, "learning_rate": 0.0009972017538154845, "loss": 2.9481, "step": 547 }, { "epoch": 0.06283683063868822, "grad_norm": 0.37553717335623615, "learning_rate": 0.000997182101047602, "loss": 2.733, "step": 548 }, { "epoch": 0.0629514963880289, "grad_norm": 0.3492990140797232, "learning_rate": 0.0009971623797029258, "loss": 2.7595, "step": 549 }, { "epoch": 0.06306616213736957, "grad_norm": 0.37341979960887145, "learning_rate": 0.0009971425897841765, "loss": 2.7499, "step": 550 }, { "epoch": 0.06318082788671024, "grad_norm": 0.3325077238621815, "learning_rate": 0.0009971227312940826, "loss": 2.6768, "step": 551 }, { "epoch": 0.06329549363605091, "grad_norm": 0.3594070652745293, "learning_rate": 0.0009971028042353844, "loss": 2.8266, "step": 552 }, { "epoch": 0.06341015938539159, "grad_norm": 0.3818454255905805, "learning_rate": 0.00099708280861083, "loss": 2.8824, "step": 553 }, { "epoch": 0.06352482513473226, "grad_norm": 0.36249396519560034, "learning_rate": 0.0009970627444231776, "loss": 2.6487, "step": 554 }, { "epoch": 0.06363949088407293, "grad_norm": 0.3485390751171873, "learning_rate": 0.000997042611675194, "loss": 2.8473, "step": 555 }, { "epoch": 0.0637541566334136, "grad_norm": 0.36882355882253803, "learning_rate": 0.0009970224103696568, "loss": 2.8502, "step": 556 }, { "epoch": 0.06386882238275426, "grad_norm": 0.3845975349869386, "learning_rate": 0.0009970021405093523, "loss": 2.8224, "step": 557 }, { "epoch": 0.06398348813209494, "grad_norm": 0.38113924867701604, "learning_rate": 0.0009969818020970761, "loss": 2.7644, "step": 558 }, { "epoch": 0.06409815388143561, "grad_norm": 0.34430525400984807, "learning_rate": 0.0009969613951356338, "loss": 2.8116, "step": 559 }, { "epoch": 0.06421281963077628, "grad_norm": 0.4230378914190675, "learning_rate": 0.0009969409196278398, "loss": 2.769, "step": 560 }, { "epoch": 0.06432748538011696, "grad_norm": 0.39375503945927054, "learning_rate": 0.0009969203755765186, "loss": 2.7863, "step": 561 }, { "epoch": 0.06444215112945763, "grad_norm": 0.3756954173419415, "learning_rate": 0.0009968997629845038, "loss": 2.8838, "step": 562 }, { "epoch": 0.0645568168787983, "grad_norm": 0.3622992443487895, "learning_rate": 0.0009968790818546383, "loss": 2.8616, "step": 563 }, { "epoch": 0.06467148262813897, "grad_norm": 0.37140118296902913, "learning_rate": 0.000996858332189775, "loss": 2.8407, "step": 564 }, { "epoch": 0.06478614837747965, "grad_norm": 0.37797397816412776, "learning_rate": 0.0009968375139927756, "loss": 2.9121, "step": 565 }, { "epoch": 0.06490081412682032, "grad_norm": 0.38064691309203647, "learning_rate": 0.000996816627266512, "loss": 2.8938, "step": 566 }, { "epoch": 0.06501547987616099, "grad_norm": 0.35375178350637, "learning_rate": 0.0009967956720138647, "loss": 2.7726, "step": 567 }, { "epoch": 0.06513014562550166, "grad_norm": 0.346366813226489, "learning_rate": 0.0009967746482377243, "loss": 2.7801, "step": 568 }, { "epoch": 0.06524481137484234, "grad_norm": 0.3537313644943524, "learning_rate": 0.0009967535559409905, "loss": 2.7426, "step": 569 }, { "epoch": 0.06535947712418301, "grad_norm": 0.3020340017229151, "learning_rate": 0.0009967323951265725, "loss": 2.8297, "step": 570 }, { "epoch": 0.06547414287352368, "grad_norm": 0.32555722690613514, "learning_rate": 0.0009967111657973892, "loss": 2.7867, "step": 571 }, { "epoch": 0.06558880862286436, "grad_norm": 0.3265944969679014, "learning_rate": 0.000996689867956369, "loss": 2.8542, "step": 572 }, { "epoch": 0.06570347437220503, "grad_norm": 0.36224473561518206, "learning_rate": 0.0009966685016064491, "loss": 2.8781, "step": 573 }, { "epoch": 0.06581814012154569, "grad_norm": 0.3410136540998401, "learning_rate": 0.0009966470667505767, "loss": 2.8693, "step": 574 }, { "epoch": 0.06593280587088636, "grad_norm": 0.3183704316576589, "learning_rate": 0.0009966255633917086, "loss": 2.8049, "step": 575 }, { "epoch": 0.06604747162022703, "grad_norm": 0.3844789157888387, "learning_rate": 0.0009966039915328105, "loss": 2.8911, "step": 576 }, { "epoch": 0.0661621373695677, "grad_norm": 0.3424279256999673, "learning_rate": 0.0009965823511768578, "loss": 2.9003, "step": 577 }, { "epoch": 0.06627680311890838, "grad_norm": 0.36908185692227313, "learning_rate": 0.0009965606423268355, "loss": 2.8489, "step": 578 }, { "epoch": 0.06639146886824905, "grad_norm": 0.36458311519461306, "learning_rate": 0.000996538864985738, "loss": 2.7201, "step": 579 }, { "epoch": 0.06650613461758972, "grad_norm": 0.3615487372115599, "learning_rate": 0.0009965170191565688, "loss": 2.7444, "step": 580 }, { "epoch": 0.0666208003669304, "grad_norm": 0.39619806912950345, "learning_rate": 0.0009964951048423414, "loss": 2.8388, "step": 581 }, { "epoch": 0.06673546611627107, "grad_norm": 0.3890093341248368, "learning_rate": 0.0009964731220460784, "loss": 2.9052, "step": 582 }, { "epoch": 0.06685013186561174, "grad_norm": 0.4225710627187908, "learning_rate": 0.000996451070770812, "loss": 2.7088, "step": 583 }, { "epoch": 0.06696479761495241, "grad_norm": 0.3767880497760453, "learning_rate": 0.0009964289510195831, "loss": 2.7974, "step": 584 }, { "epoch": 0.06707946336429309, "grad_norm": 0.3574157837491907, "learning_rate": 0.0009964067627954436, "loss": 2.8335, "step": 585 }, { "epoch": 0.06719412911363376, "grad_norm": 0.34636327566531677, "learning_rate": 0.0009963845061014534, "loss": 2.6642, "step": 586 }, { "epoch": 0.06730879486297443, "grad_norm": 0.3360337890102201, "learning_rate": 0.0009963621809406826, "loss": 2.6885, "step": 587 }, { "epoch": 0.0674234606123151, "grad_norm": 0.3385662335426565, "learning_rate": 0.0009963397873162107, "loss": 2.8007, "step": 588 }, { "epoch": 0.06753812636165578, "grad_norm": 0.3694483561969065, "learning_rate": 0.0009963173252311257, "loss": 2.9901, "step": 589 }, { "epoch": 0.06765279211099645, "grad_norm": 0.3731327358327472, "learning_rate": 0.0009962947946885268, "loss": 2.8479, "step": 590 }, { "epoch": 0.06776745786033712, "grad_norm": 0.402369210879225, "learning_rate": 0.000996272195691521, "loss": 2.8698, "step": 591 }, { "epoch": 0.06788212360967778, "grad_norm": 0.3659673445159418, "learning_rate": 0.0009962495282432255, "loss": 2.8526, "step": 592 }, { "epoch": 0.06799678935901846, "grad_norm": 0.3361275502562852, "learning_rate": 0.0009962267923467672, "loss": 2.8988, "step": 593 }, { "epoch": 0.06811145510835913, "grad_norm": 0.39591127988693153, "learning_rate": 0.0009962039880052817, "loss": 2.6127, "step": 594 }, { "epoch": 0.0682261208576998, "grad_norm": 0.35340756316430194, "learning_rate": 0.0009961811152219148, "loss": 2.8294, "step": 595 }, { "epoch": 0.06834078660704047, "grad_norm": 0.3719616818913295, "learning_rate": 0.0009961581739998209, "loss": 2.8296, "step": 596 }, { "epoch": 0.06845545235638115, "grad_norm": 0.3594376416776732, "learning_rate": 0.0009961351643421646, "loss": 2.7965, "step": 597 }, { "epoch": 0.06857011810572182, "grad_norm": 0.3896906187725525, "learning_rate": 0.0009961120862521195, "loss": 2.9705, "step": 598 }, { "epoch": 0.06868478385506249, "grad_norm": 0.3738404085989803, "learning_rate": 0.000996088939732869, "loss": 2.698, "step": 599 }, { "epoch": 0.06879944960440317, "grad_norm": 0.4032353294844088, "learning_rate": 0.0009960657247876056, "loss": 2.8108, "step": 600 }, { "epoch": 0.06891411535374384, "grad_norm": 0.3430422612414147, "learning_rate": 0.000996042441419531, "loss": 2.6703, "step": 601 }, { "epoch": 0.06902878110308451, "grad_norm": 0.3770888954550399, "learning_rate": 0.0009960190896318572, "loss": 2.7161, "step": 602 }, { "epoch": 0.06914344685242518, "grad_norm": 0.3822676031796531, "learning_rate": 0.0009959956694278052, "loss": 2.754, "step": 603 }, { "epoch": 0.06925811260176586, "grad_norm": 0.36598496054960544, "learning_rate": 0.000995972180810605, "loss": 2.7121, "step": 604 }, { "epoch": 0.06937277835110653, "grad_norm": 0.3757470517181319, "learning_rate": 0.0009959486237834964, "loss": 2.8235, "step": 605 }, { "epoch": 0.0694874441004472, "grad_norm": 0.3349128446474399, "learning_rate": 0.0009959249983497289, "loss": 2.679, "step": 606 }, { "epoch": 0.06960210984978787, "grad_norm": 0.3795356920227357, "learning_rate": 0.0009959013045125612, "loss": 2.7834, "step": 607 }, { "epoch": 0.06971677559912855, "grad_norm": 0.34552900354485805, "learning_rate": 0.000995877542275261, "loss": 2.7724, "step": 608 }, { "epoch": 0.0698314413484692, "grad_norm": 0.3369926696067527, "learning_rate": 0.0009958537116411064, "loss": 2.7884, "step": 609 }, { "epoch": 0.06994610709780988, "grad_norm": 0.3416824558846171, "learning_rate": 0.000995829812613384, "loss": 2.9158, "step": 610 }, { "epoch": 0.07006077284715055, "grad_norm": 0.36294983633352773, "learning_rate": 0.0009958058451953902, "loss": 2.8477, "step": 611 }, { "epoch": 0.07017543859649122, "grad_norm": 0.32047073254492964, "learning_rate": 0.0009957818093904313, "loss": 2.8156, "step": 612 }, { "epoch": 0.0702901043458319, "grad_norm": 0.3417012078702137, "learning_rate": 0.000995757705201822, "loss": 2.7574, "step": 613 }, { "epoch": 0.07040477009517257, "grad_norm": 0.33546358511539814, "learning_rate": 0.0009957335326328874, "loss": 2.8695, "step": 614 }, { "epoch": 0.07051943584451324, "grad_norm": 0.3762991305598653, "learning_rate": 0.0009957092916869613, "loss": 2.857, "step": 615 }, { "epoch": 0.07063410159385392, "grad_norm": 0.3270171785606096, "learning_rate": 0.0009956849823673877, "loss": 2.7531, "step": 616 }, { "epoch": 0.07074876734319459, "grad_norm": 0.36331965706596603, "learning_rate": 0.0009956606046775192, "loss": 2.8138, "step": 617 }, { "epoch": 0.07086343309253526, "grad_norm": 0.3725256662048272, "learning_rate": 0.0009956361586207186, "loss": 2.8221, "step": 618 }, { "epoch": 0.07097809884187593, "grad_norm": 0.36081542140400913, "learning_rate": 0.0009956116442003575, "loss": 2.8594, "step": 619 }, { "epoch": 0.0710927645912166, "grad_norm": 0.3671749469206358, "learning_rate": 0.0009955870614198174, "loss": 2.7193, "step": 620 }, { "epoch": 0.07120743034055728, "grad_norm": 0.3949819857563313, "learning_rate": 0.000995562410282489, "loss": 2.8042, "step": 621 }, { "epoch": 0.07132209608989795, "grad_norm": 0.381801362492628, "learning_rate": 0.0009955376907917722, "loss": 2.7509, "step": 622 }, { "epoch": 0.07143676183923862, "grad_norm": 0.31984895011934356, "learning_rate": 0.0009955129029510768, "loss": 2.9748, "step": 623 }, { "epoch": 0.0715514275885793, "grad_norm": 0.350355652487004, "learning_rate": 0.0009954880467638219, "loss": 2.9142, "step": 624 }, { "epoch": 0.07166609333791997, "grad_norm": 0.3508964715117756, "learning_rate": 0.0009954631222334356, "loss": 2.8187, "step": 625 }, { "epoch": 0.07178075908726063, "grad_norm": 0.3431305741311322, "learning_rate": 0.0009954381293633561, "loss": 2.8682, "step": 626 }, { "epoch": 0.0718954248366013, "grad_norm": 0.37750962987300485, "learning_rate": 0.0009954130681570305, "loss": 2.9051, "step": 627 }, { "epoch": 0.07201009058594197, "grad_norm": 0.3743412098272145, "learning_rate": 0.0009953879386179157, "loss": 2.7099, "step": 628 }, { "epoch": 0.07212475633528265, "grad_norm": 0.3468643282965864, "learning_rate": 0.0009953627407494777, "loss": 2.8391, "step": 629 }, { "epoch": 0.07223942208462332, "grad_norm": 0.3716581639642345, "learning_rate": 0.000995337474555192, "loss": 2.7012, "step": 630 }, { "epoch": 0.07235408783396399, "grad_norm": 0.36933068087161947, "learning_rate": 0.0009953121400385438, "loss": 2.7055, "step": 631 }, { "epoch": 0.07246875358330467, "grad_norm": 0.3631770326076731, "learning_rate": 0.0009952867372030273, "loss": 2.7728, "step": 632 }, { "epoch": 0.07258341933264534, "grad_norm": 0.36108585429463225, "learning_rate": 0.0009952612660521466, "loss": 2.8255, "step": 633 }, { "epoch": 0.07269808508198601, "grad_norm": 0.36061693149097496, "learning_rate": 0.0009952357265894146, "loss": 2.786, "step": 634 }, { "epoch": 0.07281275083132668, "grad_norm": 0.36033243593273134, "learning_rate": 0.000995210118818354, "loss": 2.8017, "step": 635 }, { "epoch": 0.07292741658066736, "grad_norm": 0.32716082977794686, "learning_rate": 0.0009951844427424973, "loss": 2.6647, "step": 636 }, { "epoch": 0.07304208233000803, "grad_norm": 0.3520457593453405, "learning_rate": 0.0009951586983653858, "loss": 2.7381, "step": 637 }, { "epoch": 0.0731567480793487, "grad_norm": 0.3878747588227373, "learning_rate": 0.0009951328856905703, "loss": 2.8102, "step": 638 }, { "epoch": 0.07327141382868937, "grad_norm": 0.330642368471026, "learning_rate": 0.0009951070047216116, "loss": 2.799, "step": 639 }, { "epoch": 0.07338607957803005, "grad_norm": 0.33272242573820904, "learning_rate": 0.000995081055462079, "loss": 2.8038, "step": 640 }, { "epoch": 0.07350074532737072, "grad_norm": 0.3545123390803429, "learning_rate": 0.0009950550379155519, "loss": 2.7676, "step": 641 }, { "epoch": 0.07361541107671139, "grad_norm": 0.31441985755465995, "learning_rate": 0.000995028952085619, "loss": 2.7781, "step": 642 }, { "epoch": 0.07373007682605205, "grad_norm": 0.35725732121595893, "learning_rate": 0.0009950027979758781, "loss": 2.7209, "step": 643 }, { "epoch": 0.07384474257539272, "grad_norm": 0.3885393546845129, "learning_rate": 0.0009949765755899369, "loss": 2.6714, "step": 644 }, { "epoch": 0.0739594083247334, "grad_norm": 0.3854769764670012, "learning_rate": 0.0009949502849314123, "loss": 2.7207, "step": 645 }, { "epoch": 0.07407407407407407, "grad_norm": 0.35490346206148193, "learning_rate": 0.0009949239260039304, "loss": 2.8291, "step": 646 }, { "epoch": 0.07418873982341474, "grad_norm": 0.3437993972006972, "learning_rate": 0.0009948974988111272, "loss": 2.7546, "step": 647 }, { "epoch": 0.07430340557275542, "grad_norm": 0.34751332751574304, "learning_rate": 0.0009948710033566475, "loss": 2.7565, "step": 648 }, { "epoch": 0.07441807132209609, "grad_norm": 0.3879093245857311, "learning_rate": 0.000994844439644146, "loss": 2.6464, "step": 649 }, { "epoch": 0.07453273707143676, "grad_norm": 0.32740429137183163, "learning_rate": 0.0009948178076772867, "loss": 2.7501, "step": 650 }, { "epoch": 0.07464740282077743, "grad_norm": 0.34349167746633186, "learning_rate": 0.0009947911074597428, "loss": 2.6821, "step": 651 }, { "epoch": 0.0747620685701181, "grad_norm": 0.35287004485510176, "learning_rate": 0.0009947643389951973, "loss": 2.6545, "step": 652 }, { "epoch": 0.07487673431945878, "grad_norm": 0.32959540606193877, "learning_rate": 0.0009947375022873422, "loss": 2.6083, "step": 653 }, { "epoch": 0.07499140006879945, "grad_norm": 0.3469641750994297, "learning_rate": 0.0009947105973398794, "loss": 2.7522, "step": 654 }, { "epoch": 0.07510606581814012, "grad_norm": 0.386278937300029, "learning_rate": 0.0009946836241565195, "loss": 2.9341, "step": 655 }, { "epoch": 0.0752207315674808, "grad_norm": 0.3601259442845363, "learning_rate": 0.0009946565827409833, "loss": 2.759, "step": 656 }, { "epoch": 0.07533539731682147, "grad_norm": 0.33892103184916766, "learning_rate": 0.0009946294730970005, "loss": 2.7683, "step": 657 }, { "epoch": 0.07545006306616214, "grad_norm": 0.3372733296202974, "learning_rate": 0.0009946022952283106, "loss": 2.7155, "step": 658 }, { "epoch": 0.07556472881550282, "grad_norm": 0.33473565447204673, "learning_rate": 0.0009945750491386616, "loss": 2.7814, "step": 659 }, { "epoch": 0.07567939456484347, "grad_norm": 0.342343288508645, "learning_rate": 0.0009945477348318123, "loss": 2.6832, "step": 660 }, { "epoch": 0.07579406031418415, "grad_norm": 0.3328424855831501, "learning_rate": 0.00099452035231153, "loss": 2.6866, "step": 661 }, { "epoch": 0.07590872606352482, "grad_norm": 0.38822682124517843, "learning_rate": 0.0009944929015815913, "loss": 2.6052, "step": 662 }, { "epoch": 0.07602339181286549, "grad_norm": 0.37760905814629636, "learning_rate": 0.0009944653826457828, "loss": 2.7967, "step": 663 }, { "epoch": 0.07613805756220617, "grad_norm": 0.3918384530780941, "learning_rate": 0.0009944377955079004, "loss": 2.9643, "step": 664 }, { "epoch": 0.07625272331154684, "grad_norm": 0.37170981837961287, "learning_rate": 0.0009944101401717486, "loss": 2.7883, "step": 665 }, { "epoch": 0.07636738906088751, "grad_norm": 0.3683137221815697, "learning_rate": 0.0009943824166411424, "loss": 2.8607, "step": 666 }, { "epoch": 0.07648205481022818, "grad_norm": 0.32363810974230633, "learning_rate": 0.0009943546249199056, "loss": 2.8634, "step": 667 }, { "epoch": 0.07659672055956886, "grad_norm": 0.3218890905066503, "learning_rate": 0.0009943267650118716, "loss": 2.7987, "step": 668 }, { "epoch": 0.07671138630890953, "grad_norm": 0.36291221463340795, "learning_rate": 0.0009942988369208829, "loss": 2.7485, "step": 669 }, { "epoch": 0.0768260520582502, "grad_norm": 0.35554600874603187, "learning_rate": 0.000994270840650792, "loss": 2.7304, "step": 670 }, { "epoch": 0.07694071780759087, "grad_norm": 0.3235493287698985, "learning_rate": 0.0009942427762054604, "loss": 2.8015, "step": 671 }, { "epoch": 0.07705538355693155, "grad_norm": 0.3300005966936813, "learning_rate": 0.0009942146435887589, "loss": 2.7567, "step": 672 }, { "epoch": 0.07717004930627222, "grad_norm": 0.370024131947592, "learning_rate": 0.0009941864428045677, "loss": 2.9205, "step": 673 }, { "epoch": 0.0772847150556129, "grad_norm": 0.3583115824591791, "learning_rate": 0.0009941581738567768, "loss": 2.7386, "step": 674 }, { "epoch": 0.07739938080495357, "grad_norm": 0.3738066775311378, "learning_rate": 0.0009941298367492854, "loss": 2.9744, "step": 675 }, { "epoch": 0.07751404655429424, "grad_norm": 0.31146002194075517, "learning_rate": 0.0009941014314860021, "loss": 2.6852, "step": 676 }, { "epoch": 0.0776287123036349, "grad_norm": 0.37156145910077804, "learning_rate": 0.0009940729580708448, "loss": 2.8119, "step": 677 }, { "epoch": 0.07774337805297557, "grad_norm": 0.323385660484067, "learning_rate": 0.0009940444165077408, "loss": 2.689, "step": 678 }, { "epoch": 0.07785804380231624, "grad_norm": 0.3257550605322397, "learning_rate": 0.0009940158068006267, "loss": 2.8024, "step": 679 }, { "epoch": 0.07797270955165692, "grad_norm": 0.33279684678649046, "learning_rate": 0.0009939871289534488, "loss": 2.6566, "step": 680 }, { "epoch": 0.07808737530099759, "grad_norm": 0.3480450120812373, "learning_rate": 0.0009939583829701628, "loss": 2.7286, "step": 681 }, { "epoch": 0.07820204105033826, "grad_norm": 0.32998462718005717, "learning_rate": 0.0009939295688547337, "loss": 2.7529, "step": 682 }, { "epoch": 0.07831670679967893, "grad_norm": 0.34063967287480673, "learning_rate": 0.0009939006866111356, "loss": 2.7465, "step": 683 }, { "epoch": 0.0784313725490196, "grad_norm": 0.37495047996329034, "learning_rate": 0.0009938717362433524, "loss": 2.6946, "step": 684 }, { "epoch": 0.07854603829836028, "grad_norm": 0.3707009263477392, "learning_rate": 0.0009938427177553773, "loss": 2.6736, "step": 685 }, { "epoch": 0.07866070404770095, "grad_norm": 0.4067848691390199, "learning_rate": 0.0009938136311512127, "loss": 2.918, "step": 686 }, { "epoch": 0.07877536979704163, "grad_norm": 0.3741709553287032, "learning_rate": 0.0009937844764348707, "loss": 2.8201, "step": 687 }, { "epoch": 0.0788900355463823, "grad_norm": 0.3502747640284244, "learning_rate": 0.0009937552536103727, "loss": 2.7879, "step": 688 }, { "epoch": 0.07900470129572297, "grad_norm": 0.31784251294093274, "learning_rate": 0.000993725962681749, "loss": 2.7747, "step": 689 }, { "epoch": 0.07911936704506364, "grad_norm": 0.34653285764421765, "learning_rate": 0.0009936966036530402, "loss": 2.8962, "step": 690 }, { "epoch": 0.07923403279440432, "grad_norm": 0.32380406666958045, "learning_rate": 0.0009936671765282956, "loss": 2.7673, "step": 691 }, { "epoch": 0.07934869854374499, "grad_norm": 0.3171829588982205, "learning_rate": 0.0009936376813115741, "loss": 2.6686, "step": 692 }, { "epoch": 0.07946336429308566, "grad_norm": 0.35318233477147976, "learning_rate": 0.000993608118006944, "loss": 2.7779, "step": 693 }, { "epoch": 0.07957803004242632, "grad_norm": 0.37127460357755665, "learning_rate": 0.0009935784866184833, "loss": 2.7071, "step": 694 }, { "epoch": 0.079692695791767, "grad_norm": 0.3232693416643724, "learning_rate": 0.0009935487871502787, "loss": 2.6805, "step": 695 }, { "epoch": 0.07980736154110767, "grad_norm": 0.39550106757047115, "learning_rate": 0.0009935190196064267, "loss": 2.784, "step": 696 }, { "epoch": 0.07992202729044834, "grad_norm": 0.32802531513593103, "learning_rate": 0.0009934891839910333, "loss": 2.809, "step": 697 }, { "epoch": 0.08003669303978901, "grad_norm": 0.33505304970547084, "learning_rate": 0.0009934592803082138, "loss": 2.7369, "step": 698 }, { "epoch": 0.08015135878912968, "grad_norm": 0.3546371232382824, "learning_rate": 0.0009934293085620929, "loss": 2.7714, "step": 699 }, { "epoch": 0.08026602453847036, "grad_norm": 0.32653696811924965, "learning_rate": 0.0009933992687568044, "loss": 2.7682, "step": 700 }, { "epoch": 0.08038069028781103, "grad_norm": 0.3361782255198608, "learning_rate": 0.0009933691608964917, "loss": 2.6437, "step": 701 }, { "epoch": 0.0804953560371517, "grad_norm": 0.3374368050741158, "learning_rate": 0.0009933389849853078, "loss": 2.8596, "step": 702 }, { "epoch": 0.08061002178649238, "grad_norm": 0.3459579963693237, "learning_rate": 0.0009933087410274148, "loss": 2.9184, "step": 703 }, { "epoch": 0.08072468753583305, "grad_norm": 0.34996499835452244, "learning_rate": 0.0009932784290269843, "loss": 2.8407, "step": 704 }, { "epoch": 0.08083935328517372, "grad_norm": 0.38281053692307715, "learning_rate": 0.0009932480489881974, "loss": 2.863, "step": 705 }, { "epoch": 0.0809540190345144, "grad_norm": 0.3112690038667326, "learning_rate": 0.0009932176009152442, "loss": 2.6913, "step": 706 }, { "epoch": 0.08106868478385507, "grad_norm": 0.33220821626530317, "learning_rate": 0.0009931870848123245, "loss": 2.6733, "step": 707 }, { "epoch": 0.08118335053319574, "grad_norm": 0.33683496984082895, "learning_rate": 0.0009931565006836476, "loss": 2.8295, "step": 708 }, { "epoch": 0.08129801628253641, "grad_norm": 0.37466401491119167, "learning_rate": 0.0009931258485334315, "loss": 2.773, "step": 709 }, { "epoch": 0.08141268203187708, "grad_norm": 0.35152365970651545, "learning_rate": 0.0009930951283659048, "loss": 2.8038, "step": 710 }, { "epoch": 0.08152734778121776, "grad_norm": 0.36865897054377655, "learning_rate": 0.0009930643401853043, "loss": 2.755, "step": 711 }, { "epoch": 0.08164201353055842, "grad_norm": 0.3337295547740383, "learning_rate": 0.0009930334839958765, "loss": 2.7796, "step": 712 }, { "epoch": 0.08175667927989909, "grad_norm": 0.32593844222607615, "learning_rate": 0.000993002559801878, "loss": 2.6649, "step": 713 }, { "epoch": 0.08187134502923976, "grad_norm": 0.4235123937261032, "learning_rate": 0.0009929715676075736, "loss": 2.926, "step": 714 }, { "epoch": 0.08198601077858043, "grad_norm": 0.3498516325730499, "learning_rate": 0.0009929405074172383, "loss": 2.722, "step": 715 }, { "epoch": 0.08210067652792111, "grad_norm": 0.38653762822311305, "learning_rate": 0.0009929093792351567, "loss": 2.7893, "step": 716 }, { "epoch": 0.08221534227726178, "grad_norm": 0.37093427758230435, "learning_rate": 0.0009928781830656215, "loss": 2.8822, "step": 717 }, { "epoch": 0.08233000802660245, "grad_norm": 0.32528309251182336, "learning_rate": 0.0009928469189129363, "loss": 2.7183, "step": 718 }, { "epoch": 0.08244467377594313, "grad_norm": 0.344041478998291, "learning_rate": 0.0009928155867814131, "loss": 2.753, "step": 719 }, { "epoch": 0.0825593395252838, "grad_norm": 0.33755859849780784, "learning_rate": 0.0009927841866753735, "loss": 2.729, "step": 720 }, { "epoch": 0.08267400527462447, "grad_norm": 0.4004082459021678, "learning_rate": 0.000992752718599149, "loss": 2.8534, "step": 721 }, { "epoch": 0.08278867102396514, "grad_norm": 0.3318401978047511, "learning_rate": 0.0009927211825570793, "loss": 2.6772, "step": 722 }, { "epoch": 0.08290333677330582, "grad_norm": 0.35634425350570964, "learning_rate": 0.000992689578553515, "loss": 2.7819, "step": 723 }, { "epoch": 0.08301800252264649, "grad_norm": 0.3497219381296959, "learning_rate": 0.0009926579065928144, "loss": 2.6364, "step": 724 }, { "epoch": 0.08313266827198716, "grad_norm": 0.34057995484950926, "learning_rate": 0.000992626166679347, "loss": 2.7745, "step": 725 }, { "epoch": 0.08324733402132783, "grad_norm": 0.3095893172406948, "learning_rate": 0.0009925943588174897, "loss": 2.83, "step": 726 }, { "epoch": 0.08336199977066851, "grad_norm": 0.3837932914007685, "learning_rate": 0.0009925624830116305, "loss": 2.9259, "step": 727 }, { "epoch": 0.08347666552000918, "grad_norm": 0.3594329020185206, "learning_rate": 0.000992530539266166, "loss": 2.71, "step": 728 }, { "epoch": 0.08359133126934984, "grad_norm": 0.3195118493758497, "learning_rate": 0.0009924985275855018, "loss": 2.7717, "step": 729 }, { "epoch": 0.08370599701869051, "grad_norm": 0.3253821168076408, "learning_rate": 0.000992466447974054, "loss": 2.8371, "step": 730 }, { "epoch": 0.08382066276803118, "grad_norm": 0.3466344885513138, "learning_rate": 0.0009924343004362466, "loss": 2.7458, "step": 731 }, { "epoch": 0.08393532851737186, "grad_norm": 0.3909718641136255, "learning_rate": 0.0009924020849765142, "loss": 2.8142, "step": 732 }, { "epoch": 0.08404999426671253, "grad_norm": 0.3386934326844014, "learning_rate": 0.0009923698015993003, "loss": 2.6824, "step": 733 }, { "epoch": 0.0841646600160532, "grad_norm": 0.3387661411014136, "learning_rate": 0.0009923374503090577, "loss": 2.8276, "step": 734 }, { "epoch": 0.08427932576539388, "grad_norm": 0.3950938166321506, "learning_rate": 0.0009923050311102487, "loss": 2.8077, "step": 735 }, { "epoch": 0.08439399151473455, "grad_norm": 0.3391695823445589, "learning_rate": 0.0009922725440073446, "loss": 2.7546, "step": 736 }, { "epoch": 0.08450865726407522, "grad_norm": 0.3010384997072432, "learning_rate": 0.0009922399890048268, "loss": 2.7022, "step": 737 }, { "epoch": 0.0846233230134159, "grad_norm": 0.33198956795048035, "learning_rate": 0.0009922073661071855, "loss": 2.7311, "step": 738 }, { "epoch": 0.08473798876275657, "grad_norm": 0.3644362244355827, "learning_rate": 0.0009921746753189203, "loss": 2.9749, "step": 739 }, { "epoch": 0.08485265451209724, "grad_norm": 0.35304746718398816, "learning_rate": 0.0009921419166445404, "loss": 2.5974, "step": 740 }, { "epoch": 0.08496732026143791, "grad_norm": 0.321195109224156, "learning_rate": 0.0009921090900885641, "loss": 2.6718, "step": 741 }, { "epoch": 0.08508198601077858, "grad_norm": 0.3191116220424898, "learning_rate": 0.0009920761956555193, "loss": 2.7905, "step": 742 }, { "epoch": 0.08519665176011926, "grad_norm": 0.4261166185241973, "learning_rate": 0.0009920432333499433, "loss": 2.7329, "step": 743 }, { "epoch": 0.08531131750945993, "grad_norm": 0.38392618279517443, "learning_rate": 0.0009920102031763822, "loss": 2.7382, "step": 744 }, { "epoch": 0.0854259832588006, "grad_norm": 0.39672373374294195, "learning_rate": 0.0009919771051393922, "loss": 2.7643, "step": 745 }, { "epoch": 0.08554064900814126, "grad_norm": 0.3653929855332756, "learning_rate": 0.0009919439392435385, "loss": 2.885, "step": 746 }, { "epoch": 0.08565531475748193, "grad_norm": 0.34027173273402195, "learning_rate": 0.0009919107054933956, "loss": 2.7345, "step": 747 }, { "epoch": 0.08576998050682261, "grad_norm": 0.38350348090530884, "learning_rate": 0.0009918774038935477, "loss": 2.7178, "step": 748 }, { "epoch": 0.08588464625616328, "grad_norm": 0.3253844923475316, "learning_rate": 0.000991844034448588, "loss": 2.7696, "step": 749 }, { "epoch": 0.08599931200550395, "grad_norm": 0.3156746799195098, "learning_rate": 0.000991810597163119, "loss": 2.6778, "step": 750 }, { "epoch": 0.08611397775484463, "grad_norm": 0.3458491421738056, "learning_rate": 0.000991777092041753, "loss": 2.6871, "step": 751 }, { "epoch": 0.0862286435041853, "grad_norm": 0.3347314285110529, "learning_rate": 0.0009917435190891111, "loss": 2.6569, "step": 752 }, { "epoch": 0.08634330925352597, "grad_norm": 0.3631361433305357, "learning_rate": 0.0009917098783098243, "loss": 2.8881, "step": 753 }, { "epoch": 0.08645797500286664, "grad_norm": 0.38313465064211627, "learning_rate": 0.0009916761697085327, "loss": 2.77, "step": 754 }, { "epoch": 0.08657264075220732, "grad_norm": 0.3714088361988252, "learning_rate": 0.0009916423932898857, "loss": 2.7506, "step": 755 }, { "epoch": 0.08668730650154799, "grad_norm": 0.31867951936812083, "learning_rate": 0.0009916085490585423, "loss": 2.9981, "step": 756 }, { "epoch": 0.08680197225088866, "grad_norm": 0.32425001058262604, "learning_rate": 0.0009915746370191701, "loss": 2.5973, "step": 757 }, { "epoch": 0.08691663800022933, "grad_norm": 0.3468982854015656, "learning_rate": 0.0009915406571764471, "loss": 2.7947, "step": 758 }, { "epoch": 0.08703130374957001, "grad_norm": 0.3405498855389674, "learning_rate": 0.0009915066095350603, "loss": 2.731, "step": 759 }, { "epoch": 0.08714596949891068, "grad_norm": 0.3404957960486945, "learning_rate": 0.0009914724940997053, "loss": 2.6851, "step": 760 }, { "epoch": 0.08726063524825135, "grad_norm": 0.3408219490216612, "learning_rate": 0.0009914383108750883, "loss": 2.7239, "step": 761 }, { "epoch": 0.08737530099759203, "grad_norm": 0.3844592933827754, "learning_rate": 0.000991404059865924, "loss": 2.6701, "step": 762 }, { "epoch": 0.08748996674693268, "grad_norm": 0.3323816365970395, "learning_rate": 0.0009913697410769366, "loss": 2.6468, "step": 763 }, { "epoch": 0.08760463249627336, "grad_norm": 0.30607982121961336, "learning_rate": 0.0009913353545128597, "loss": 2.6974, "step": 764 }, { "epoch": 0.08771929824561403, "grad_norm": 0.34487323480870274, "learning_rate": 0.0009913009001784364, "loss": 2.8012, "step": 765 }, { "epoch": 0.0878339639949547, "grad_norm": 0.32923920870338097, "learning_rate": 0.0009912663780784188, "loss": 2.7395, "step": 766 }, { "epoch": 0.08794862974429538, "grad_norm": 0.3065849850457712, "learning_rate": 0.000991231788217569, "loss": 2.715, "step": 767 }, { "epoch": 0.08806329549363605, "grad_norm": 0.31905220161800446, "learning_rate": 0.0009911971306006575, "loss": 2.6455, "step": 768 }, { "epoch": 0.08817796124297672, "grad_norm": 0.35923787226643666, "learning_rate": 0.000991162405232465, "loss": 2.7436, "step": 769 }, { "epoch": 0.0882926269923174, "grad_norm": 0.3432469176357402, "learning_rate": 0.0009911276121177812, "loss": 2.7743, "step": 770 }, { "epoch": 0.08840729274165807, "grad_norm": 0.3373664179816129, "learning_rate": 0.0009910927512614051, "loss": 2.6684, "step": 771 }, { "epoch": 0.08852195849099874, "grad_norm": 0.3347689265510937, "learning_rate": 0.000991057822668145, "loss": 2.7207, "step": 772 }, { "epoch": 0.08863662424033941, "grad_norm": 0.3463916729415781, "learning_rate": 0.0009910228263428186, "loss": 2.9124, "step": 773 }, { "epoch": 0.08875128998968008, "grad_norm": 0.34984784975831107, "learning_rate": 0.000990987762290253, "loss": 2.887, "step": 774 }, { "epoch": 0.08886595573902076, "grad_norm": 0.4258067604126735, "learning_rate": 0.0009909526305152848, "loss": 2.7918, "step": 775 }, { "epoch": 0.08898062148836143, "grad_norm": 0.3293218262617916, "learning_rate": 0.0009909174310227596, "loss": 2.7422, "step": 776 }, { "epoch": 0.0890952872377021, "grad_norm": 0.3559968124688434, "learning_rate": 0.0009908821638175325, "loss": 2.7803, "step": 777 }, { "epoch": 0.08920995298704278, "grad_norm": 0.34751120510254585, "learning_rate": 0.000990846828904468, "loss": 2.8157, "step": 778 }, { "epoch": 0.08932461873638345, "grad_norm": 0.3092763044344859, "learning_rate": 0.0009908114262884397, "loss": 2.6105, "step": 779 }, { "epoch": 0.08943928448572411, "grad_norm": 0.338958938313162, "learning_rate": 0.0009907759559743311, "loss": 2.7089, "step": 780 }, { "epoch": 0.08955395023506478, "grad_norm": 0.36064855246869837, "learning_rate": 0.0009907404179670342, "loss": 2.916, "step": 781 }, { "epoch": 0.08966861598440545, "grad_norm": 0.34218709162819727, "learning_rate": 0.000990704812271451, "loss": 2.7685, "step": 782 }, { "epoch": 0.08978328173374613, "grad_norm": 0.31491020556970517, "learning_rate": 0.0009906691388924928, "loss": 2.788, "step": 783 }, { "epoch": 0.0898979474830868, "grad_norm": 0.30196436362394613, "learning_rate": 0.0009906333978350799, "loss": 2.5976, "step": 784 }, { "epoch": 0.09001261323242747, "grad_norm": 0.31564811746559535, "learning_rate": 0.000990597589104142, "loss": 2.6385, "step": 785 }, { "epoch": 0.09012727898176814, "grad_norm": 0.30516805993385987, "learning_rate": 0.0009905617127046182, "loss": 2.7116, "step": 786 }, { "epoch": 0.09024194473110882, "grad_norm": 0.3323257686605458, "learning_rate": 0.0009905257686414573, "loss": 2.7429, "step": 787 }, { "epoch": 0.09035661048044949, "grad_norm": 0.33775436953277177, "learning_rate": 0.0009904897569196168, "loss": 2.7019, "step": 788 }, { "epoch": 0.09047127622979016, "grad_norm": 0.3230868806083502, "learning_rate": 0.0009904536775440641, "loss": 2.5545, "step": 789 }, { "epoch": 0.09058594197913084, "grad_norm": 0.4171155020390501, "learning_rate": 0.0009904175305197752, "loss": 2.8299, "step": 790 }, { "epoch": 0.09070060772847151, "grad_norm": 0.3534959263460345, "learning_rate": 0.0009903813158517363, "loss": 2.6763, "step": 791 }, { "epoch": 0.09081527347781218, "grad_norm": 0.3816764948075023, "learning_rate": 0.0009903450335449423, "loss": 2.8094, "step": 792 }, { "epoch": 0.09092993922715285, "grad_norm": 0.3552510774815217, "learning_rate": 0.0009903086836043978, "loss": 2.7876, "step": 793 }, { "epoch": 0.09104460497649353, "grad_norm": 0.33634011584573675, "learning_rate": 0.0009902722660351166, "loss": 2.7669, "step": 794 }, { "epoch": 0.0911592707258342, "grad_norm": 0.367260508820492, "learning_rate": 0.0009902357808421218, "loss": 2.704, "step": 795 }, { "epoch": 0.09127393647517487, "grad_norm": 0.35258351013888056, "learning_rate": 0.0009901992280304456, "loss": 2.6401, "step": 796 }, { "epoch": 0.09138860222451553, "grad_norm": 0.29454054909104727, "learning_rate": 0.00099016260760513, "loss": 2.7961, "step": 797 }, { "epoch": 0.0915032679738562, "grad_norm": 0.34810911807572176, "learning_rate": 0.000990125919571226, "loss": 2.6947, "step": 798 }, { "epoch": 0.09161793372319688, "grad_norm": 0.3186633176550656, "learning_rate": 0.000990089163933794, "loss": 2.8424, "step": 799 }, { "epoch": 0.09173259947253755, "grad_norm": 0.3386610558745488, "learning_rate": 0.000990052340697904, "loss": 2.8073, "step": 800 }, { "epoch": 0.09184726522187822, "grad_norm": 0.31942280617581165, "learning_rate": 0.0009900154498686349, "loss": 2.7909, "step": 801 }, { "epoch": 0.0919619309712189, "grad_norm": 0.32051896081066805, "learning_rate": 0.0009899784914510748, "loss": 2.8542, "step": 802 }, { "epoch": 0.09207659672055957, "grad_norm": 0.3035296947368763, "learning_rate": 0.0009899414654503216, "loss": 2.7444, "step": 803 }, { "epoch": 0.09219126246990024, "grad_norm": 0.32300455599940225, "learning_rate": 0.0009899043718714826, "loss": 2.7318, "step": 804 }, { "epoch": 0.09230592821924091, "grad_norm": 0.3092276294728564, "learning_rate": 0.0009898672107196739, "loss": 2.6768, "step": 805 }, { "epoch": 0.09242059396858159, "grad_norm": 0.3323756425112556, "learning_rate": 0.000989829982000021, "loss": 2.7118, "step": 806 }, { "epoch": 0.09253525971792226, "grad_norm": 0.312506706483085, "learning_rate": 0.000989792685717659, "loss": 2.8544, "step": 807 }, { "epoch": 0.09264992546726293, "grad_norm": 0.3132005664308122, "learning_rate": 0.0009897553218777327, "loss": 2.6641, "step": 808 }, { "epoch": 0.0927645912166036, "grad_norm": 0.34130888476269583, "learning_rate": 0.000989717890485395, "loss": 2.7588, "step": 809 }, { "epoch": 0.09287925696594428, "grad_norm": 0.33454793194644145, "learning_rate": 0.0009896803915458094, "loss": 2.8341, "step": 810 }, { "epoch": 0.09299392271528495, "grad_norm": 0.3138451474325724, "learning_rate": 0.0009896428250641479, "loss": 2.6424, "step": 811 }, { "epoch": 0.09310858846462562, "grad_norm": 0.3146705859461887, "learning_rate": 0.000989605191045592, "loss": 2.7295, "step": 812 }, { "epoch": 0.0932232542139663, "grad_norm": 0.3183644182781799, "learning_rate": 0.0009895674894953327, "loss": 2.7056, "step": 813 }, { "epoch": 0.09333791996330695, "grad_norm": 0.3260308382597346, "learning_rate": 0.0009895297204185706, "loss": 2.6179, "step": 814 }, { "epoch": 0.09345258571264763, "grad_norm": 0.321161934289455, "learning_rate": 0.0009894918838205145, "loss": 2.7037, "step": 815 }, { "epoch": 0.0935672514619883, "grad_norm": 0.32060529089265155, "learning_rate": 0.0009894539797063837, "loss": 2.6501, "step": 816 }, { "epoch": 0.09368191721132897, "grad_norm": 0.3282000903566724, "learning_rate": 0.0009894160080814061, "loss": 2.7856, "step": 817 }, { "epoch": 0.09379658296066964, "grad_norm": 0.3265395585254304, "learning_rate": 0.0009893779689508194, "loss": 2.7682, "step": 818 }, { "epoch": 0.09391124871001032, "grad_norm": 0.33936198847015137, "learning_rate": 0.0009893398623198703, "loss": 2.8295, "step": 819 }, { "epoch": 0.09402591445935099, "grad_norm": 0.3593274432801614, "learning_rate": 0.0009893016881938148, "loss": 2.8703, "step": 820 }, { "epoch": 0.09414058020869166, "grad_norm": 0.30331589000282694, "learning_rate": 0.0009892634465779185, "loss": 2.7261, "step": 821 }, { "epoch": 0.09425524595803234, "grad_norm": 0.3042107204750613, "learning_rate": 0.000989225137477456, "loss": 2.7993, "step": 822 }, { "epoch": 0.09436991170737301, "grad_norm": 0.35729493002107116, "learning_rate": 0.000989186760897711, "loss": 2.7278, "step": 823 }, { "epoch": 0.09448457745671368, "grad_norm": 0.29163803029769897, "learning_rate": 0.0009891483168439773, "loss": 2.7046, "step": 824 }, { "epoch": 0.09459924320605435, "grad_norm": 0.3272907255403229, "learning_rate": 0.000989109805321557, "loss": 2.6643, "step": 825 }, { "epoch": 0.09471390895539503, "grad_norm": 0.34726232660473333, "learning_rate": 0.0009890712263357626, "loss": 2.8609, "step": 826 }, { "epoch": 0.0948285747047357, "grad_norm": 0.3337787837719053, "learning_rate": 0.000989032579891915, "loss": 2.6659, "step": 827 }, { "epoch": 0.09494324045407637, "grad_norm": 0.3339243661461092, "learning_rate": 0.000988993865995345, "loss": 2.7355, "step": 828 }, { "epoch": 0.09505790620341704, "grad_norm": 0.3138984574716539, "learning_rate": 0.000988955084651392, "loss": 2.6804, "step": 829 }, { "epoch": 0.09517257195275772, "grad_norm": 0.33403955722498646, "learning_rate": 0.0009889162358654056, "loss": 2.7739, "step": 830 }, { "epoch": 0.09528723770209838, "grad_norm": 0.3708382108265236, "learning_rate": 0.000988877319642744, "loss": 2.7989, "step": 831 }, { "epoch": 0.09540190345143905, "grad_norm": 0.3505375742059597, "learning_rate": 0.000988838335988775, "loss": 2.7566, "step": 832 }, { "epoch": 0.09551656920077972, "grad_norm": 0.3464552324259652, "learning_rate": 0.0009887992849088754, "loss": 2.8127, "step": 833 }, { "epoch": 0.0956312349501204, "grad_norm": 0.3504053458712374, "learning_rate": 0.000988760166408432, "loss": 2.7198, "step": 834 }, { "epoch": 0.09574590069946107, "grad_norm": 0.36863131843788105, "learning_rate": 0.0009887209804928404, "loss": 2.6591, "step": 835 }, { "epoch": 0.09586056644880174, "grad_norm": 0.3420348091721313, "learning_rate": 0.0009886817271675052, "loss": 2.6698, "step": 836 }, { "epoch": 0.09597523219814241, "grad_norm": 0.35749659591741795, "learning_rate": 0.000988642406437841, "loss": 2.7573, "step": 837 }, { "epoch": 0.09608989794748309, "grad_norm": 0.3358964601273219, "learning_rate": 0.0009886030183092712, "loss": 2.7506, "step": 838 }, { "epoch": 0.09620456369682376, "grad_norm": 0.3668737893660492, "learning_rate": 0.0009885635627872285, "loss": 2.8287, "step": 839 }, { "epoch": 0.09631922944616443, "grad_norm": 0.3408134234258222, "learning_rate": 0.0009885240398771554, "loss": 2.7296, "step": 840 }, { "epoch": 0.0964338951955051, "grad_norm": 0.33747561403912596, "learning_rate": 0.0009884844495845029, "loss": 2.7521, "step": 841 }, { "epoch": 0.09654856094484578, "grad_norm": 0.34035208530181416, "learning_rate": 0.000988444791914732, "loss": 2.7553, "step": 842 }, { "epoch": 0.09666322669418645, "grad_norm": 0.31514917599931935, "learning_rate": 0.0009884050668733126, "loss": 2.6745, "step": 843 }, { "epoch": 0.09677789244352712, "grad_norm": 0.32159979058642046, "learning_rate": 0.0009883652744657244, "loss": 2.8209, "step": 844 }, { "epoch": 0.0968925581928678, "grad_norm": 0.2929272262423057, "learning_rate": 0.0009883254146974554, "loss": 2.7286, "step": 845 }, { "epoch": 0.09700722394220847, "grad_norm": 0.3247318052244169, "learning_rate": 0.0009882854875740037, "loss": 2.7182, "step": 846 }, { "epoch": 0.09712188969154914, "grad_norm": 0.3070458372044395, "learning_rate": 0.0009882454931008768, "loss": 2.7498, "step": 847 }, { "epoch": 0.09723655544088981, "grad_norm": 0.36472248262529544, "learning_rate": 0.0009882054312835907, "loss": 2.6909, "step": 848 }, { "epoch": 0.09735122119023047, "grad_norm": 0.32982848852604985, "learning_rate": 0.0009881653021276715, "loss": 2.7232, "step": 849 }, { "epoch": 0.09746588693957114, "grad_norm": 0.33090084786306473, "learning_rate": 0.0009881251056386541, "loss": 2.7515, "step": 850 }, { "epoch": 0.09758055268891182, "grad_norm": 0.29511722670965806, "learning_rate": 0.000988084841822083, "loss": 2.7181, "step": 851 }, { "epoch": 0.09769521843825249, "grad_norm": 0.3206775465844979, "learning_rate": 0.0009880445106835117, "loss": 2.7024, "step": 852 }, { "epoch": 0.09780988418759316, "grad_norm": 0.3078499440357562, "learning_rate": 0.000988004112228503, "loss": 2.6708, "step": 853 }, { "epoch": 0.09792454993693384, "grad_norm": 0.3242801278634592, "learning_rate": 0.0009879636464626294, "loss": 2.6326, "step": 854 }, { "epoch": 0.09803921568627451, "grad_norm": 0.37575789365624146, "learning_rate": 0.0009879231133914721, "loss": 2.9144, "step": 855 }, { "epoch": 0.09815388143561518, "grad_norm": 0.34069638352882087, "learning_rate": 0.000987882513020622, "loss": 2.8114, "step": 856 }, { "epoch": 0.09826854718495585, "grad_norm": 0.31619135098553197, "learning_rate": 0.000987841845355679, "loss": 2.7787, "step": 857 }, { "epoch": 0.09838321293429653, "grad_norm": 0.3510580409952719, "learning_rate": 0.0009878011104022526, "loss": 2.7922, "step": 858 }, { "epoch": 0.0984978786836372, "grad_norm": 0.3322855120955192, "learning_rate": 0.0009877603081659614, "loss": 2.856, "step": 859 }, { "epoch": 0.09861254443297787, "grad_norm": 0.3294385914718088, "learning_rate": 0.0009877194386524334, "loss": 2.7199, "step": 860 }, { "epoch": 0.09872721018231854, "grad_norm": 0.33997121857874835, "learning_rate": 0.0009876785018673054, "loss": 2.7859, "step": 861 }, { "epoch": 0.09884187593165922, "grad_norm": 0.3062185666041792, "learning_rate": 0.0009876374978162242, "loss": 2.7221, "step": 862 }, { "epoch": 0.09895654168099989, "grad_norm": 0.33245848730584676, "learning_rate": 0.0009875964265048452, "loss": 2.7374, "step": 863 }, { "epoch": 0.09907120743034056, "grad_norm": 0.31244086265623927, "learning_rate": 0.0009875552879388336, "loss": 2.7318, "step": 864 }, { "epoch": 0.09918587317968124, "grad_norm": 0.2802373680461261, "learning_rate": 0.000987514082123864, "loss": 2.6616, "step": 865 }, { "epoch": 0.0993005389290219, "grad_norm": 0.27883050987202396, "learning_rate": 0.0009874728090656193, "loss": 2.8188, "step": 866 }, { "epoch": 0.09941520467836257, "grad_norm": 0.3213334584072041, "learning_rate": 0.0009874314687697927, "loss": 2.6124, "step": 867 }, { "epoch": 0.09952987042770324, "grad_norm": 0.3096702355426216, "learning_rate": 0.0009873900612420866, "loss": 2.7301, "step": 868 }, { "epoch": 0.09964453617704391, "grad_norm": 0.29201982224099887, "learning_rate": 0.0009873485864882116, "loss": 2.8175, "step": 869 }, { "epoch": 0.09975920192638459, "grad_norm": 0.3139347670960125, "learning_rate": 0.000987307044513889, "loss": 2.7546, "step": 870 }, { "epoch": 0.09987386767572526, "grad_norm": 0.3200740611375866, "learning_rate": 0.0009872654353248486, "loss": 2.8197, "step": 871 }, { "epoch": 0.09998853342506593, "grad_norm": 0.2864072012925529, "learning_rate": 0.0009872237589268295, "loss": 2.6632, "step": 872 }, { "epoch": 0.1001031991744066, "grad_norm": 0.35511885344886157, "learning_rate": 0.00098718201532558, "loss": 2.8809, "step": 873 }, { "epoch": 0.10021786492374728, "grad_norm": 0.3586192878035327, "learning_rate": 0.0009871402045268582, "loss": 2.7683, "step": 874 }, { "epoch": 0.10033253067308795, "grad_norm": 0.3477708934347381, "learning_rate": 0.000987098326536431, "loss": 2.8264, "step": 875 }, { "epoch": 0.10044719642242862, "grad_norm": 0.3457708406259805, "learning_rate": 0.0009870563813600744, "loss": 2.709, "step": 876 }, { "epoch": 0.1005618621717693, "grad_norm": 0.31621979228012626, "learning_rate": 0.0009870143690035743, "loss": 2.7643, "step": 877 }, { "epoch": 0.10067652792110997, "grad_norm": 0.3443855725364193, "learning_rate": 0.0009869722894727251, "loss": 2.8125, "step": 878 }, { "epoch": 0.10079119367045064, "grad_norm": 0.3462186702794076, "learning_rate": 0.0009869301427733314, "loss": 2.6844, "step": 879 }, { "epoch": 0.10090585941979131, "grad_norm": 0.3668667990857489, "learning_rate": 0.000986887928911206, "loss": 2.7721, "step": 880 }, { "epoch": 0.10102052516913199, "grad_norm": 0.3307752499719894, "learning_rate": 0.0009868456478921719, "loss": 2.6889, "step": 881 }, { "epoch": 0.10113519091847266, "grad_norm": 0.3380673191616369, "learning_rate": 0.0009868032997220608, "loss": 2.7566, "step": 882 }, { "epoch": 0.10124985666781332, "grad_norm": 0.3683146489378319, "learning_rate": 0.0009867608844067136, "loss": 2.7014, "step": 883 }, { "epoch": 0.10136452241715399, "grad_norm": 0.3077492427814718, "learning_rate": 0.000986718401951981, "loss": 2.7852, "step": 884 }, { "epoch": 0.10147918816649466, "grad_norm": 0.3380882148969396, "learning_rate": 0.0009866758523637228, "loss": 2.7306, "step": 885 }, { "epoch": 0.10159385391583534, "grad_norm": 0.31518066604237777, "learning_rate": 0.0009866332356478075, "loss": 2.7999, "step": 886 }, { "epoch": 0.10170851966517601, "grad_norm": 0.321232691672563, "learning_rate": 0.000986590551810113, "loss": 2.8056, "step": 887 }, { "epoch": 0.10182318541451668, "grad_norm": 0.41336503236904193, "learning_rate": 0.0009865478008565275, "loss": 2.7064, "step": 888 }, { "epoch": 0.10193785116385735, "grad_norm": 0.31803309832691334, "learning_rate": 0.0009865049827929475, "loss": 2.7417, "step": 889 }, { "epoch": 0.10205251691319803, "grad_norm": 0.3107495413246145, "learning_rate": 0.0009864620976252785, "loss": 2.7376, "step": 890 }, { "epoch": 0.1021671826625387, "grad_norm": 0.3316336868396862, "learning_rate": 0.000986419145359436, "loss": 2.7048, "step": 891 }, { "epoch": 0.10228184841187937, "grad_norm": 0.32479074064184016, "learning_rate": 0.0009863761260013443, "loss": 2.8005, "step": 892 }, { "epoch": 0.10239651416122005, "grad_norm": 0.3731166641753746, "learning_rate": 0.0009863330395569374, "loss": 2.9088, "step": 893 }, { "epoch": 0.10251117991056072, "grad_norm": 0.3524287292259641, "learning_rate": 0.000986289886032158, "loss": 2.7194, "step": 894 }, { "epoch": 0.10262584565990139, "grad_norm": 0.3186240583943147, "learning_rate": 0.0009862466654329582, "loss": 2.7267, "step": 895 }, { "epoch": 0.10274051140924206, "grad_norm": 0.3275852320670076, "learning_rate": 0.0009862033777652997, "loss": 2.7583, "step": 896 }, { "epoch": 0.10285517715858274, "grad_norm": 0.32148248170710536, "learning_rate": 0.000986160023035153, "loss": 2.7444, "step": 897 }, { "epoch": 0.10296984290792341, "grad_norm": 0.34500432256976943, "learning_rate": 0.0009861166012484982, "loss": 2.6072, "step": 898 }, { "epoch": 0.10308450865726408, "grad_norm": 0.33632349209929197, "learning_rate": 0.0009860731124113247, "loss": 2.7418, "step": 899 }, { "epoch": 0.10319917440660474, "grad_norm": 0.3437098510618024, "learning_rate": 0.0009860295565296306, "loss": 2.7056, "step": 900 }, { "epoch": 0.10331384015594541, "grad_norm": 0.310266689409354, "learning_rate": 0.000985985933609424, "loss": 2.5563, "step": 901 }, { "epoch": 0.10342850590528609, "grad_norm": 0.33563157748964206, "learning_rate": 0.0009859422436567212, "loss": 2.7025, "step": 902 }, { "epoch": 0.10354317165462676, "grad_norm": 0.3596718715418318, "learning_rate": 0.000985898486677549, "loss": 2.7307, "step": 903 }, { "epoch": 0.10365783740396743, "grad_norm": 0.35428516359489975, "learning_rate": 0.0009858546626779425, "loss": 2.605, "step": 904 }, { "epoch": 0.1037725031533081, "grad_norm": 0.3566856497476704, "learning_rate": 0.0009858107716639464, "loss": 2.8517, "step": 905 }, { "epoch": 0.10388716890264878, "grad_norm": 0.3831760019290575, "learning_rate": 0.000985766813641615, "loss": 2.9544, "step": 906 }, { "epoch": 0.10400183465198945, "grad_norm": 0.3831545895458341, "learning_rate": 0.0009857227886170112, "loss": 2.7249, "step": 907 }, { "epoch": 0.10411650040133012, "grad_norm": 0.34025075196559396, "learning_rate": 0.0009856786965962074, "loss": 2.7835, "step": 908 }, { "epoch": 0.1042311661506708, "grad_norm": 0.3452420180323608, "learning_rate": 0.0009856345375852853, "loss": 2.7445, "step": 909 }, { "epoch": 0.10434583190001147, "grad_norm": 0.3165902527243338, "learning_rate": 0.0009855903115903357, "loss": 2.6954, "step": 910 }, { "epoch": 0.10446049764935214, "grad_norm": 0.3503213420775051, "learning_rate": 0.0009855460186174588, "loss": 2.8186, "step": 911 }, { "epoch": 0.10457516339869281, "grad_norm": 0.35478406299695237, "learning_rate": 0.000985501658672764, "loss": 2.8068, "step": 912 }, { "epoch": 0.10468982914803349, "grad_norm": 0.35661653524013215, "learning_rate": 0.0009854572317623698, "loss": 2.6419, "step": 913 }, { "epoch": 0.10480449489737416, "grad_norm": 0.3188442755029797, "learning_rate": 0.0009854127378924043, "loss": 2.746, "step": 914 }, { "epoch": 0.10491916064671483, "grad_norm": 0.3252574475793499, "learning_rate": 0.0009853681770690043, "loss": 2.875, "step": 915 }, { "epoch": 0.1050338263960555, "grad_norm": 0.3143554592782651, "learning_rate": 0.0009853235492983164, "loss": 2.7812, "step": 916 }, { "epoch": 0.10514849214539616, "grad_norm": 0.31738337216505375, "learning_rate": 0.000985278854586496, "loss": 2.6693, "step": 917 }, { "epoch": 0.10526315789473684, "grad_norm": 0.30387290517004506, "learning_rate": 0.0009852340929397076, "loss": 2.7041, "step": 918 }, { "epoch": 0.10537782364407751, "grad_norm": 0.32024002747517516, "learning_rate": 0.0009851892643641257, "loss": 2.698, "step": 919 }, { "epoch": 0.10549248939341818, "grad_norm": 0.3360100400602807, "learning_rate": 0.000985144368865933, "loss": 2.6534, "step": 920 }, { "epoch": 0.10560715514275885, "grad_norm": 0.3327923598699991, "learning_rate": 0.0009850994064513226, "loss": 2.8473, "step": 921 }, { "epoch": 0.10572182089209953, "grad_norm": 0.34937774561262047, "learning_rate": 0.000985054377126496, "loss": 2.9757, "step": 922 }, { "epoch": 0.1058364866414402, "grad_norm": 0.31184386956552707, "learning_rate": 0.0009850092808976639, "loss": 2.6291, "step": 923 }, { "epoch": 0.10595115239078087, "grad_norm": 0.2935477074302251, "learning_rate": 0.0009849641177710467, "loss": 2.7864, "step": 924 }, { "epoch": 0.10606581814012155, "grad_norm": 0.2874727577927056, "learning_rate": 0.0009849188877528736, "loss": 2.727, "step": 925 }, { "epoch": 0.10618048388946222, "grad_norm": 0.30490891533969455, "learning_rate": 0.0009848735908493834, "loss": 2.6966, "step": 926 }, { "epoch": 0.10629514963880289, "grad_norm": 0.3211296397929058, "learning_rate": 0.0009848282270668238, "loss": 2.7091, "step": 927 }, { "epoch": 0.10640981538814356, "grad_norm": 0.34374654659066245, "learning_rate": 0.000984782796411452, "loss": 2.7843, "step": 928 }, { "epoch": 0.10652448113748424, "grad_norm": 0.2896515656205755, "learning_rate": 0.0009847372988895343, "loss": 2.5801, "step": 929 }, { "epoch": 0.10663914688682491, "grad_norm": 0.3148035381061537, "learning_rate": 0.000984691734507346, "loss": 2.6524, "step": 930 }, { "epoch": 0.10675381263616558, "grad_norm": 0.3410677274491946, "learning_rate": 0.0009846461032711723, "loss": 2.6706, "step": 931 }, { "epoch": 0.10686847838550625, "grad_norm": 0.40222875297261407, "learning_rate": 0.0009846004051873066, "loss": 2.8881, "step": 932 }, { "epoch": 0.10698314413484693, "grad_norm": 0.3583531015433631, "learning_rate": 0.0009845546402620523, "loss": 2.8905, "step": 933 }, { "epoch": 0.10709780988418759, "grad_norm": 0.3246601675857077, "learning_rate": 0.0009845088085017218, "loss": 2.776, "step": 934 }, { "epoch": 0.10721247563352826, "grad_norm": 0.3225621593967708, "learning_rate": 0.000984462909912637, "loss": 2.737, "step": 935 }, { "epoch": 0.10732714138286893, "grad_norm": 0.49828952252497727, "learning_rate": 0.0009844169445011282, "loss": 2.6348, "step": 936 }, { "epoch": 0.1074418071322096, "grad_norm": 0.30757707747773966, "learning_rate": 0.0009843709122735358, "loss": 2.7335, "step": 937 }, { "epoch": 0.10755647288155028, "grad_norm": 0.2894803516290994, "learning_rate": 0.000984324813236209, "loss": 2.6535, "step": 938 }, { "epoch": 0.10767113863089095, "grad_norm": 0.33881389323803873, "learning_rate": 0.0009842786473955062, "loss": 2.6218, "step": 939 }, { "epoch": 0.10778580438023162, "grad_norm": 0.3025549629221241, "learning_rate": 0.0009842324147577954, "loss": 2.699, "step": 940 }, { "epoch": 0.1079004701295723, "grad_norm": 0.293480584674943, "learning_rate": 0.0009841861153294534, "loss": 2.7151, "step": 941 }, { "epoch": 0.10801513587891297, "grad_norm": 0.2846215062428214, "learning_rate": 0.000984139749116866, "loss": 2.6568, "step": 942 }, { "epoch": 0.10812980162825364, "grad_norm": 0.32537499448071794, "learning_rate": 0.0009840933161264288, "loss": 2.7508, "step": 943 }, { "epoch": 0.10824446737759431, "grad_norm": 0.34963029486376984, "learning_rate": 0.0009840468163645462, "loss": 2.6682, "step": 944 }, { "epoch": 0.10835913312693499, "grad_norm": 0.3921614479006567, "learning_rate": 0.0009840002498376322, "loss": 2.7917, "step": 945 }, { "epoch": 0.10847379887627566, "grad_norm": 0.3321891425571164, "learning_rate": 0.0009839536165521094, "loss": 2.7478, "step": 946 }, { "epoch": 0.10858846462561633, "grad_norm": 0.28902044139401234, "learning_rate": 0.0009839069165144103, "loss": 2.7449, "step": 947 }, { "epoch": 0.108703130374957, "grad_norm": 0.299189556004677, "learning_rate": 0.0009838601497309763, "loss": 2.7227, "step": 948 }, { "epoch": 0.10881779612429768, "grad_norm": 0.32309741640074857, "learning_rate": 0.0009838133162082578, "loss": 2.6977, "step": 949 }, { "epoch": 0.10893246187363835, "grad_norm": 0.3280750633785187, "learning_rate": 0.0009837664159527146, "loss": 2.6541, "step": 950 }, { "epoch": 0.10904712762297901, "grad_norm": 0.30808341235343445, "learning_rate": 0.0009837194489708157, "loss": 2.8301, "step": 951 }, { "epoch": 0.10916179337231968, "grad_norm": 0.30592288377064486, "learning_rate": 0.0009836724152690395, "loss": 2.7716, "step": 952 }, { "epoch": 0.10927645912166035, "grad_norm": 0.3054094715700297, "learning_rate": 0.0009836253148538731, "loss": 2.6287, "step": 953 }, { "epoch": 0.10939112487100103, "grad_norm": 0.33382065757584795, "learning_rate": 0.0009835781477318133, "loss": 2.9153, "step": 954 }, { "epoch": 0.1095057906203417, "grad_norm": 0.329303492187594, "learning_rate": 0.000983530913909366, "loss": 2.7486, "step": 955 }, { "epoch": 0.10962045636968237, "grad_norm": 0.33097903628142017, "learning_rate": 0.0009834836133930458, "loss": 2.7055, "step": 956 }, { "epoch": 0.10973512211902305, "grad_norm": 0.30387516336737525, "learning_rate": 0.0009834362461893773, "loss": 2.6755, "step": 957 }, { "epoch": 0.10984978786836372, "grad_norm": 0.32191404961036785, "learning_rate": 0.0009833888123048937, "loss": 2.7279, "step": 958 }, { "epoch": 0.10996445361770439, "grad_norm": 0.32777686743095613, "learning_rate": 0.0009833413117461378, "loss": 2.7212, "step": 959 }, { "epoch": 0.11007911936704506, "grad_norm": 0.34016131221907525, "learning_rate": 0.0009832937445196613, "loss": 2.7407, "step": 960 }, { "epoch": 0.11019378511638574, "grad_norm": 0.331922025619126, "learning_rate": 0.000983246110632025, "loss": 2.8019, "step": 961 }, { "epoch": 0.11030845086572641, "grad_norm": 0.3390794099029687, "learning_rate": 0.0009831984100897994, "loss": 2.7264, "step": 962 }, { "epoch": 0.11042311661506708, "grad_norm": 0.3049985769813946, "learning_rate": 0.0009831506428995636, "loss": 2.6779, "step": 963 }, { "epoch": 0.11053778236440776, "grad_norm": 0.33682156285593406, "learning_rate": 0.0009831028090679064, "loss": 2.7234, "step": 964 }, { "epoch": 0.11065244811374843, "grad_norm": 0.3376055357398166, "learning_rate": 0.0009830549086014254, "loss": 2.7041, "step": 965 }, { "epoch": 0.1107671138630891, "grad_norm": 0.31966388444642974, "learning_rate": 0.0009830069415067276, "loss": 2.8098, "step": 966 }, { "epoch": 0.11088177961242977, "grad_norm": 0.3581563217040166, "learning_rate": 0.0009829589077904293, "loss": 2.8812, "step": 967 }, { "epoch": 0.11099644536177045, "grad_norm": 0.3166314182435769, "learning_rate": 0.0009829108074591556, "loss": 2.6441, "step": 968 }, { "epoch": 0.1111111111111111, "grad_norm": 0.36265769557937405, "learning_rate": 0.0009828626405195412, "loss": 2.744, "step": 969 }, { "epoch": 0.11122577686045178, "grad_norm": 0.323841995827615, "learning_rate": 0.0009828144069782296, "loss": 2.8861, "step": 970 }, { "epoch": 0.11134044260979245, "grad_norm": 0.3489483636270084, "learning_rate": 0.0009827661068418738, "loss": 2.6973, "step": 971 }, { "epoch": 0.11145510835913312, "grad_norm": 0.3060280248774852, "learning_rate": 0.0009827177401171361, "loss": 2.6363, "step": 972 }, { "epoch": 0.1115697741084738, "grad_norm": 0.32495859159485824, "learning_rate": 0.0009826693068106876, "loss": 2.6866, "step": 973 }, { "epoch": 0.11168443985781447, "grad_norm": 0.30839826640265783, "learning_rate": 0.0009826208069292086, "loss": 2.7802, "step": 974 }, { "epoch": 0.11179910560715514, "grad_norm": 0.3256547528909279, "learning_rate": 0.000982572240479389, "loss": 2.7974, "step": 975 }, { "epoch": 0.11191377135649581, "grad_norm": 0.31968544062708854, "learning_rate": 0.0009825236074679274, "loss": 2.689, "step": 976 }, { "epoch": 0.11202843710583649, "grad_norm": 0.3318767779182895, "learning_rate": 0.0009824749079015318, "loss": 2.8626, "step": 977 }, { "epoch": 0.11214310285517716, "grad_norm": 0.33012813821594267, "learning_rate": 0.0009824261417869197, "loss": 2.7943, "step": 978 }, { "epoch": 0.11225776860451783, "grad_norm": 0.3132083973180823, "learning_rate": 0.000982377309130817, "loss": 2.7097, "step": 979 }, { "epoch": 0.1123724343538585, "grad_norm": 0.34153689355138867, "learning_rate": 0.0009823284099399596, "loss": 2.7981, "step": 980 }, { "epoch": 0.11248710010319918, "grad_norm": 0.33657265966277156, "learning_rate": 0.000982279444221092, "loss": 2.6785, "step": 981 }, { "epoch": 0.11260176585253985, "grad_norm": 0.3453268311427654, "learning_rate": 0.0009822304119809682, "loss": 2.6508, "step": 982 }, { "epoch": 0.11271643160188052, "grad_norm": 0.3031988198988383, "learning_rate": 0.0009821813132263513, "loss": 2.7329, "step": 983 }, { "epoch": 0.1128310973512212, "grad_norm": 0.32400632174632965, "learning_rate": 0.0009821321479640134, "loss": 2.6987, "step": 984 }, { "epoch": 0.11294576310056187, "grad_norm": 0.37276020943315163, "learning_rate": 0.0009820829162007357, "loss": 2.8721, "step": 985 }, { "epoch": 0.11306042884990253, "grad_norm": 0.3384612863614987, "learning_rate": 0.0009820336179433091, "loss": 2.9165, "step": 986 }, { "epoch": 0.1131750945992432, "grad_norm": 0.33406800241198853, "learning_rate": 0.0009819842531985337, "loss": 2.8303, "step": 987 }, { "epoch": 0.11328976034858387, "grad_norm": 0.31140342178259045, "learning_rate": 0.0009819348219732176, "loss": 2.7458, "step": 988 }, { "epoch": 0.11340442609792455, "grad_norm": 0.2962579331183836, "learning_rate": 0.0009818853242741796, "loss": 2.6805, "step": 989 }, { "epoch": 0.11351909184726522, "grad_norm": 0.28292167565820125, "learning_rate": 0.0009818357601082467, "loss": 2.8163, "step": 990 }, { "epoch": 0.11363375759660589, "grad_norm": 0.3183572013446031, "learning_rate": 0.0009817861294822551, "loss": 2.753, "step": 991 }, { "epoch": 0.11374842334594656, "grad_norm": 0.30681177470178445, "learning_rate": 0.0009817364324030506, "loss": 2.6307, "step": 992 }, { "epoch": 0.11386308909528724, "grad_norm": 0.35465836811037893, "learning_rate": 0.0009816866688774882, "loss": 2.735, "step": 993 }, { "epoch": 0.11397775484462791, "grad_norm": 0.3181644544792821, "learning_rate": 0.0009816368389124314, "loss": 2.8293, "step": 994 }, { "epoch": 0.11409242059396858, "grad_norm": 0.34047824268106885, "learning_rate": 0.0009815869425147537, "loss": 2.7191, "step": 995 }, { "epoch": 0.11420708634330926, "grad_norm": 0.32867782201404855, "learning_rate": 0.0009815369796913373, "loss": 2.7995, "step": 996 }, { "epoch": 0.11432175209264993, "grad_norm": 0.3106698619228599, "learning_rate": 0.0009814869504490731, "loss": 2.5778, "step": 997 }, { "epoch": 0.1144364178419906, "grad_norm": 0.3280618889900821, "learning_rate": 0.0009814368547948623, "loss": 2.7381, "step": 998 }, { "epoch": 0.11455108359133127, "grad_norm": 0.3132780446348568, "learning_rate": 0.0009813866927356142, "loss": 2.6518, "step": 999 }, { "epoch": 0.11466574934067195, "grad_norm": 0.32588876070319295, "learning_rate": 0.000981336464278248, "loss": 2.6893, "step": 1000 }, { "epoch": 0.11478041509001262, "grad_norm": 0.3056702948458679, "learning_rate": 0.0009812861694296917, "loss": 2.7501, "step": 1001 }, { "epoch": 0.11489508083935329, "grad_norm": 0.33183496907409415, "learning_rate": 0.0009812358081968825, "loss": 2.6796, "step": 1002 }, { "epoch": 0.11500974658869395, "grad_norm": 0.34849853679526727, "learning_rate": 0.0009811853805867668, "loss": 2.5683, "step": 1003 }, { "epoch": 0.11512441233803462, "grad_norm": 0.3031139174414606, "learning_rate": 0.0009811348866063, "loss": 2.7766, "step": 1004 }, { "epoch": 0.1152390780873753, "grad_norm": 0.29881569866931484, "learning_rate": 0.0009810843262624467, "loss": 2.7064, "step": 1005 }, { "epoch": 0.11535374383671597, "grad_norm": 0.31070186910350933, "learning_rate": 0.000981033699562181, "loss": 2.6935, "step": 1006 }, { "epoch": 0.11546840958605664, "grad_norm": 0.3222251856197005, "learning_rate": 0.0009809830065124858, "loss": 2.6614, "step": 1007 }, { "epoch": 0.11558307533539731, "grad_norm": 0.2974999123237951, "learning_rate": 0.0009809322471203534, "loss": 2.59, "step": 1008 }, { "epoch": 0.11569774108473799, "grad_norm": 0.37053362927670497, "learning_rate": 0.0009808814213927847, "loss": 2.6886, "step": 1009 }, { "epoch": 0.11581240683407866, "grad_norm": 0.34291666075195254, "learning_rate": 0.0009808305293367904, "loss": 2.7487, "step": 1010 }, { "epoch": 0.11592707258341933, "grad_norm": 0.33293631217406694, "learning_rate": 0.00098077957095939, "loss": 2.6873, "step": 1011 }, { "epoch": 0.11604173833276, "grad_norm": 0.31247215986790394, "learning_rate": 0.0009807285462676122, "loss": 2.6224, "step": 1012 }, { "epoch": 0.11615640408210068, "grad_norm": 0.32406511337872285, "learning_rate": 0.0009806774552684953, "loss": 2.6588, "step": 1013 }, { "epoch": 0.11627106983144135, "grad_norm": 0.32008670252823634, "learning_rate": 0.0009806262979690857, "loss": 2.7854, "step": 1014 }, { "epoch": 0.11638573558078202, "grad_norm": 0.3302271667543452, "learning_rate": 0.00098057507437644, "loss": 2.5637, "step": 1015 }, { "epoch": 0.1165004013301227, "grad_norm": 0.29000670475171997, "learning_rate": 0.0009805237844976234, "loss": 2.7351, "step": 1016 }, { "epoch": 0.11661506707946337, "grad_norm": 0.343621392339593, "learning_rate": 0.00098047242833971, "loss": 2.8286, "step": 1017 }, { "epoch": 0.11672973282880404, "grad_norm": 0.30661132222247955, "learning_rate": 0.0009804210059097841, "loss": 2.6705, "step": 1018 }, { "epoch": 0.11684439857814471, "grad_norm": 0.30468794069984556, "learning_rate": 0.0009803695172149382, "loss": 2.807, "step": 1019 }, { "epoch": 0.11695906432748537, "grad_norm": 0.317062828292318, "learning_rate": 0.0009803179622622738, "loss": 2.6885, "step": 1020 }, { "epoch": 0.11707373007682605, "grad_norm": 0.35112012100663853, "learning_rate": 0.0009802663410589023, "loss": 2.7305, "step": 1021 }, { "epoch": 0.11718839582616672, "grad_norm": 0.2969158129622786, "learning_rate": 0.0009802146536119437, "loss": 2.7355, "step": 1022 }, { "epoch": 0.11730306157550739, "grad_norm": 0.3039129717959189, "learning_rate": 0.0009801628999285274, "loss": 2.6856, "step": 1023 }, { "epoch": 0.11741772732484806, "grad_norm": 0.34030620948617085, "learning_rate": 0.000980111080015792, "loss": 2.7897, "step": 1024 }, { "epoch": 0.11753239307418874, "grad_norm": 0.3120216069315966, "learning_rate": 0.0009800591938808846, "loss": 2.6794, "step": 1025 }, { "epoch": 0.11764705882352941, "grad_norm": 0.3295414944699641, "learning_rate": 0.0009800072415309623, "loss": 2.708, "step": 1026 }, { "epoch": 0.11776172457287008, "grad_norm": 0.3357949298264836, "learning_rate": 0.0009799552229731907, "loss": 2.8452, "step": 1027 }, { "epoch": 0.11787639032221076, "grad_norm": 0.3125088740720094, "learning_rate": 0.0009799031382147448, "loss": 2.673, "step": 1028 }, { "epoch": 0.11799105607155143, "grad_norm": 0.31571776739798657, "learning_rate": 0.000979850987262809, "loss": 2.6635, "step": 1029 }, { "epoch": 0.1181057218208921, "grad_norm": 0.29182457208635965, "learning_rate": 0.0009797987701245761, "loss": 2.6937, "step": 1030 }, { "epoch": 0.11822038757023277, "grad_norm": 0.3181162916916301, "learning_rate": 0.0009797464868072487, "loss": 2.7581, "step": 1031 }, { "epoch": 0.11833505331957345, "grad_norm": 0.2963653908345339, "learning_rate": 0.0009796941373180384, "loss": 2.6714, "step": 1032 }, { "epoch": 0.11844971906891412, "grad_norm": 0.3201736487434816, "learning_rate": 0.0009796417216641653, "loss": 2.7829, "step": 1033 }, { "epoch": 0.11856438481825479, "grad_norm": 0.28811258884810137, "learning_rate": 0.00097958923985286, "loss": 2.6945, "step": 1034 }, { "epoch": 0.11867905056759546, "grad_norm": 0.30028330229735706, "learning_rate": 0.0009795366918913604, "loss": 2.7629, "step": 1035 }, { "epoch": 0.11879371631693614, "grad_norm": 0.29830822269162294, "learning_rate": 0.0009794840777869152, "loss": 2.6834, "step": 1036 }, { "epoch": 0.1189083820662768, "grad_norm": 0.3451975774457418, "learning_rate": 0.0009794313975467813, "loss": 2.7251, "step": 1037 }, { "epoch": 0.11902304781561747, "grad_norm": 0.28935256666239983, "learning_rate": 0.0009793786511782248, "loss": 2.6714, "step": 1038 }, { "epoch": 0.11913771356495814, "grad_norm": 0.32535749919723506, "learning_rate": 0.000979325838688521, "loss": 2.6426, "step": 1039 }, { "epoch": 0.11925237931429881, "grad_norm": 0.3547071532640959, "learning_rate": 0.000979272960084955, "loss": 2.6608, "step": 1040 }, { "epoch": 0.11936704506363949, "grad_norm": 0.32764847392744145, "learning_rate": 0.0009792200153748195, "loss": 2.8062, "step": 1041 }, { "epoch": 0.11948171081298016, "grad_norm": 0.30544866903508994, "learning_rate": 0.0009791670045654177, "loss": 2.614, "step": 1042 }, { "epoch": 0.11959637656232083, "grad_norm": 0.3180552196899113, "learning_rate": 0.0009791139276640614, "loss": 2.7191, "step": 1043 }, { "epoch": 0.1197110423116615, "grad_norm": 0.3675342377286231, "learning_rate": 0.0009790607846780718, "loss": 2.8023, "step": 1044 }, { "epoch": 0.11982570806100218, "grad_norm": 0.3314896339481721, "learning_rate": 0.0009790075756147783, "loss": 2.7575, "step": 1045 }, { "epoch": 0.11994037381034285, "grad_norm": 0.34456005189312017, "learning_rate": 0.0009789543004815207, "loss": 2.5887, "step": 1046 }, { "epoch": 0.12005503955968352, "grad_norm": 0.3729557046401906, "learning_rate": 0.000978900959285647, "loss": 2.7911, "step": 1047 }, { "epoch": 0.1201697053090242, "grad_norm": 0.30020050512761143, "learning_rate": 0.0009788475520345146, "loss": 2.7331, "step": 1048 }, { "epoch": 0.12028437105836487, "grad_norm": 0.3113337001579776, "learning_rate": 0.0009787940787354902, "loss": 2.6733, "step": 1049 }, { "epoch": 0.12039903680770554, "grad_norm": 0.3280475451522809, "learning_rate": 0.000978740539395949, "loss": 2.7824, "step": 1050 }, { "epoch": 0.12051370255704622, "grad_norm": 0.3239225129492583, "learning_rate": 0.0009786869340232761, "loss": 2.7562, "step": 1051 }, { "epoch": 0.12062836830638689, "grad_norm": 0.2962223313526621, "learning_rate": 0.0009786332626248655, "loss": 2.6495, "step": 1052 }, { "epoch": 0.12074303405572756, "grad_norm": 0.31580422421558124, "learning_rate": 0.0009785795252081199, "loss": 2.8733, "step": 1053 }, { "epoch": 0.12085769980506822, "grad_norm": 0.29448163542790834, "learning_rate": 0.000978525721780451, "loss": 2.8093, "step": 1054 }, { "epoch": 0.12097236555440889, "grad_norm": 0.3030932921534723, "learning_rate": 0.0009784718523492804, "loss": 2.7391, "step": 1055 }, { "epoch": 0.12108703130374956, "grad_norm": 0.2898693439714082, "learning_rate": 0.0009784179169220384, "loss": 2.7475, "step": 1056 }, { "epoch": 0.12120169705309024, "grad_norm": 0.3112045966633896, "learning_rate": 0.0009783639155061643, "loss": 2.7234, "step": 1057 }, { "epoch": 0.12131636280243091, "grad_norm": 0.3208099404963717, "learning_rate": 0.0009783098481091063, "loss": 2.8031, "step": 1058 }, { "epoch": 0.12143102855177158, "grad_norm": 0.31157686899145115, "learning_rate": 0.0009782557147383225, "loss": 2.7221, "step": 1059 }, { "epoch": 0.12154569430111226, "grad_norm": 0.3463634072050334, "learning_rate": 0.0009782015154012789, "loss": 2.7012, "step": 1060 }, { "epoch": 0.12166036005045293, "grad_norm": 0.3319752121692342, "learning_rate": 0.0009781472501054517, "loss": 2.6427, "step": 1061 }, { "epoch": 0.1217750257997936, "grad_norm": 0.31729331292484186, "learning_rate": 0.0009780929188583256, "loss": 2.7806, "step": 1062 }, { "epoch": 0.12188969154913427, "grad_norm": 0.30301639848841, "learning_rate": 0.000978038521667395, "loss": 2.7263, "step": 1063 }, { "epoch": 0.12200435729847495, "grad_norm": 0.3009568079740633, "learning_rate": 0.000977984058540162, "loss": 2.682, "step": 1064 }, { "epoch": 0.12211902304781562, "grad_norm": 0.3228957479191051, "learning_rate": 0.0009779295294841397, "loss": 2.6556, "step": 1065 }, { "epoch": 0.12223368879715629, "grad_norm": 0.3121393966380177, "learning_rate": 0.0009778749345068487, "loss": 2.709, "step": 1066 }, { "epoch": 0.12234835454649697, "grad_norm": 0.3048310318104447, "learning_rate": 0.00097782027361582, "loss": 2.6889, "step": 1067 }, { "epoch": 0.12246302029583764, "grad_norm": 0.2989077674025916, "learning_rate": 0.0009777655468185924, "loss": 2.8158, "step": 1068 }, { "epoch": 0.12257768604517831, "grad_norm": 0.3088123474556999, "learning_rate": 0.0009777107541227147, "loss": 2.6555, "step": 1069 }, { "epoch": 0.12269235179451898, "grad_norm": 0.2939190704120615, "learning_rate": 0.0009776558955357443, "loss": 2.747, "step": 1070 }, { "epoch": 0.12280701754385964, "grad_norm": 0.30798364879326584, "learning_rate": 0.0009776009710652483, "loss": 2.8674, "step": 1071 }, { "epoch": 0.12292168329320032, "grad_norm": 0.2910270897505483, "learning_rate": 0.0009775459807188022, "loss": 2.6254, "step": 1072 }, { "epoch": 0.12303634904254099, "grad_norm": 0.29274768438081084, "learning_rate": 0.0009774909245039909, "loss": 2.5372, "step": 1073 }, { "epoch": 0.12315101479188166, "grad_norm": 0.29508794714123326, "learning_rate": 0.0009774358024284082, "loss": 2.8244, "step": 1074 }, { "epoch": 0.12326568054122233, "grad_norm": 0.29727437146382446, "learning_rate": 0.0009773806144996575, "loss": 2.7433, "step": 1075 }, { "epoch": 0.123380346290563, "grad_norm": 0.3039161296405047, "learning_rate": 0.0009773253607253507, "loss": 2.6833, "step": 1076 }, { "epoch": 0.12349501203990368, "grad_norm": 0.30848971838431627, "learning_rate": 0.000977270041113109, "loss": 2.7021, "step": 1077 }, { "epoch": 0.12360967778924435, "grad_norm": 0.28643350409160817, "learning_rate": 0.0009772146556705629, "loss": 2.6838, "step": 1078 }, { "epoch": 0.12372434353858502, "grad_norm": 0.3176345510416603, "learning_rate": 0.0009771592044053512, "loss": 2.6565, "step": 1079 }, { "epoch": 0.1238390092879257, "grad_norm": 0.3207402413299687, "learning_rate": 0.000977103687325123, "loss": 2.7815, "step": 1080 }, { "epoch": 0.12395367503726637, "grad_norm": 0.32518560099990723, "learning_rate": 0.0009770481044375356, "loss": 2.6238, "step": 1081 }, { "epoch": 0.12406834078660704, "grad_norm": 0.33220037435475425, "learning_rate": 0.0009769924557502553, "loss": 2.6612, "step": 1082 }, { "epoch": 0.12418300653594772, "grad_norm": 0.3233835772555074, "learning_rate": 0.0009769367412709585, "loss": 2.695, "step": 1083 }, { "epoch": 0.12429767228528839, "grad_norm": 0.3053809351215719, "learning_rate": 0.0009768809610073291, "loss": 2.6801, "step": 1084 }, { "epoch": 0.12441233803462906, "grad_norm": 0.3552921644812118, "learning_rate": 0.0009768251149670614, "loss": 2.6848, "step": 1085 }, { "epoch": 0.12452700378396973, "grad_norm": 0.31310158450572734, "learning_rate": 0.000976769203157858, "loss": 2.764, "step": 1086 }, { "epoch": 0.1246416695333104, "grad_norm": 0.3252755786154215, "learning_rate": 0.0009767132255874315, "loss": 2.6496, "step": 1087 }, { "epoch": 0.12475633528265107, "grad_norm": 0.3030813600163628, "learning_rate": 0.0009766571822635022, "loss": 2.7656, "step": 1088 }, { "epoch": 0.12487100103199174, "grad_norm": 0.3457076938028077, "learning_rate": 0.0009766010731938007, "loss": 2.7874, "step": 1089 }, { "epoch": 0.12498566678133241, "grad_norm": 0.34068990976322255, "learning_rate": 0.0009765448983860658, "loss": 2.7769, "step": 1090 }, { "epoch": 0.1251003325306731, "grad_norm": 0.309044070096772, "learning_rate": 0.0009764886578480461, "loss": 2.6511, "step": 1091 }, { "epoch": 0.12521499828001376, "grad_norm": 0.32484793870159917, "learning_rate": 0.0009764323515874986, "loss": 2.6819, "step": 1092 }, { "epoch": 0.12532966402935444, "grad_norm": 0.2935510513701517, "learning_rate": 0.00097637597961219, "loss": 2.7808, "step": 1093 }, { "epoch": 0.1254443297786951, "grad_norm": 0.332200265241668, "learning_rate": 0.0009763195419298955, "loss": 2.8234, "step": 1094 }, { "epoch": 0.1255589955280358, "grad_norm": 0.2970582850114562, "learning_rate": 0.0009762630385483997, "loss": 2.6976, "step": 1095 }, { "epoch": 0.12567366127737645, "grad_norm": 0.302287282907232, "learning_rate": 0.000976206469475496, "loss": 2.6992, "step": 1096 }, { "epoch": 0.1257883270267171, "grad_norm": 0.31141846609684387, "learning_rate": 0.0009761498347189872, "loss": 2.7116, "step": 1097 }, { "epoch": 0.1259029927760578, "grad_norm": 0.2976244344291253, "learning_rate": 0.000976093134286685, "loss": 2.565, "step": 1098 }, { "epoch": 0.12601765852539845, "grad_norm": 0.30526636304771265, "learning_rate": 0.0009760363681864102, "loss": 2.7651, "step": 1099 }, { "epoch": 0.12613232427473914, "grad_norm": 0.2797712550011633, "learning_rate": 0.0009759795364259923, "loss": 2.5988, "step": 1100 }, { "epoch": 0.1262469900240798, "grad_norm": 0.3166073089497224, "learning_rate": 0.0009759226390132704, "loss": 2.7872, "step": 1101 }, { "epoch": 0.12636165577342048, "grad_norm": 0.30187441498020534, "learning_rate": 0.0009758656759560923, "loss": 2.7741, "step": 1102 }, { "epoch": 0.12647632152276114, "grad_norm": 0.29614941276690565, "learning_rate": 0.0009758086472623151, "loss": 2.619, "step": 1103 }, { "epoch": 0.12659098727210183, "grad_norm": 0.3249053678561645, "learning_rate": 0.0009757515529398047, "loss": 2.6033, "step": 1104 }, { "epoch": 0.1267056530214425, "grad_norm": 0.2921780022441539, "learning_rate": 0.0009756943929964363, "loss": 2.7473, "step": 1105 }, { "epoch": 0.12682031877078317, "grad_norm": 0.3330855539494519, "learning_rate": 0.0009756371674400939, "loss": 2.6536, "step": 1106 }, { "epoch": 0.12693498452012383, "grad_norm": 0.2788316353800275, "learning_rate": 0.0009755798762786707, "loss": 2.6924, "step": 1107 }, { "epoch": 0.12704965026946452, "grad_norm": 0.31732418324080036, "learning_rate": 0.0009755225195200689, "loss": 2.8171, "step": 1108 }, { "epoch": 0.12716431601880518, "grad_norm": 0.35264739938729056, "learning_rate": 0.0009754650971722, "loss": 2.655, "step": 1109 }, { "epoch": 0.12727898176814587, "grad_norm": 0.30980758203345504, "learning_rate": 0.000975407609242984, "loss": 2.6685, "step": 1110 }, { "epoch": 0.12739364751748652, "grad_norm": 0.3273127622839704, "learning_rate": 0.0009753500557403504, "loss": 2.7617, "step": 1111 }, { "epoch": 0.1275083132668272, "grad_norm": 0.3459824726805483, "learning_rate": 0.0009752924366722376, "loss": 2.8057, "step": 1112 }, { "epoch": 0.12762297901616787, "grad_norm": 0.31908221668773695, "learning_rate": 0.0009752347520465931, "loss": 2.7207, "step": 1113 }, { "epoch": 0.12773764476550853, "grad_norm": 0.3259372237077058, "learning_rate": 0.0009751770018713734, "loss": 2.6697, "step": 1114 }, { "epoch": 0.12785231051484922, "grad_norm": 0.3019415271769574, "learning_rate": 0.0009751191861545439, "loss": 2.6886, "step": 1115 }, { "epoch": 0.12796697626418987, "grad_norm": 0.3475581757215973, "learning_rate": 0.0009750613049040792, "loss": 2.6857, "step": 1116 }, { "epoch": 0.12808164201353056, "grad_norm": 0.30116006185853883, "learning_rate": 0.0009750033581279632, "loss": 2.8236, "step": 1117 }, { "epoch": 0.12819630776287122, "grad_norm": 0.3002910415084012, "learning_rate": 0.0009749453458341882, "loss": 2.6827, "step": 1118 }, { "epoch": 0.1283109735122119, "grad_norm": 0.2856142338336959, "learning_rate": 0.000974887268030756, "loss": 2.6603, "step": 1119 }, { "epoch": 0.12842563926155257, "grad_norm": 0.3046145549292053, "learning_rate": 0.0009748291247256774, "loss": 2.7755, "step": 1120 }, { "epoch": 0.12854030501089325, "grad_norm": 0.30041920813829953, "learning_rate": 0.000974770915926972, "loss": 2.6955, "step": 1121 }, { "epoch": 0.1286549707602339, "grad_norm": 0.3033132887627643, "learning_rate": 0.0009747126416426688, "loss": 2.7433, "step": 1122 }, { "epoch": 0.1287696365095746, "grad_norm": 0.289999150738605, "learning_rate": 0.0009746543018808057, "loss": 2.7022, "step": 1123 }, { "epoch": 0.12888430225891526, "grad_norm": 0.3081213840886586, "learning_rate": 0.000974595896649429, "loss": 2.7157, "step": 1124 }, { "epoch": 0.12899896800825594, "grad_norm": 0.3303129074752545, "learning_rate": 0.0009745374259565953, "loss": 2.6022, "step": 1125 }, { "epoch": 0.1291136337575966, "grad_norm": 0.3311561370301218, "learning_rate": 0.0009744788898103691, "loss": 2.6989, "step": 1126 }, { "epoch": 0.1292282995069373, "grad_norm": 0.3180305084579922, "learning_rate": 0.0009744202882188245, "loss": 2.6408, "step": 1127 }, { "epoch": 0.12934296525627795, "grad_norm": 0.3578259835953817, "learning_rate": 0.0009743616211900443, "loss": 2.859, "step": 1128 }, { "epoch": 0.12945763100561863, "grad_norm": 0.3181880369826049, "learning_rate": 0.0009743028887321206, "loss": 2.6293, "step": 1129 }, { "epoch": 0.1295722967549593, "grad_norm": 0.3077277311050923, "learning_rate": 0.0009742440908531545, "loss": 2.747, "step": 1130 }, { "epoch": 0.12968696250429995, "grad_norm": 0.31052912123009196, "learning_rate": 0.0009741852275612559, "loss": 2.5986, "step": 1131 }, { "epoch": 0.12980162825364064, "grad_norm": 0.32189877072938033, "learning_rate": 0.0009741262988645441, "loss": 2.7397, "step": 1132 }, { "epoch": 0.1299162940029813, "grad_norm": 0.30705193054249585, "learning_rate": 0.000974067304771147, "loss": 2.6544, "step": 1133 }, { "epoch": 0.13003095975232198, "grad_norm": 0.2875146345861382, "learning_rate": 0.0009740082452892017, "loss": 2.5831, "step": 1134 }, { "epoch": 0.13014562550166264, "grad_norm": 0.32572832711086697, "learning_rate": 0.0009739491204268545, "loss": 2.7258, "step": 1135 }, { "epoch": 0.13026029125100333, "grad_norm": 0.3243907333544116, "learning_rate": 0.0009738899301922602, "loss": 2.7482, "step": 1136 }, { "epoch": 0.130374957000344, "grad_norm": 0.2883622238077074, "learning_rate": 0.0009738306745935833, "loss": 2.7093, "step": 1137 }, { "epoch": 0.13048962274968468, "grad_norm": 0.30196184124992387, "learning_rate": 0.0009737713536389969, "loss": 2.6681, "step": 1138 }, { "epoch": 0.13060428849902533, "grad_norm": 0.3159333031052521, "learning_rate": 0.0009737119673366832, "loss": 2.6835, "step": 1139 }, { "epoch": 0.13071895424836602, "grad_norm": 0.3368608940112984, "learning_rate": 0.0009736525156948333, "loss": 2.6257, "step": 1140 }, { "epoch": 0.13083361999770668, "grad_norm": 0.2800491980984583, "learning_rate": 0.0009735929987216476, "loss": 2.645, "step": 1141 }, { "epoch": 0.13094828574704737, "grad_norm": 0.32048987322587724, "learning_rate": 0.0009735334164253351, "loss": 2.6335, "step": 1142 }, { "epoch": 0.13106295149638802, "grad_norm": 0.29757897848562, "learning_rate": 0.0009734737688141142, "loss": 2.6446, "step": 1143 }, { "epoch": 0.1311776172457287, "grad_norm": 0.33408283647977166, "learning_rate": 0.0009734140558962123, "loss": 2.8177, "step": 1144 }, { "epoch": 0.13129228299506937, "grad_norm": 0.3207080755131899, "learning_rate": 0.0009733542776798653, "loss": 2.7055, "step": 1145 }, { "epoch": 0.13140694874441006, "grad_norm": 0.29192499803903355, "learning_rate": 0.0009732944341733188, "loss": 2.7458, "step": 1146 }, { "epoch": 0.13152161449375072, "grad_norm": 0.2942113010955268, "learning_rate": 0.0009732345253848267, "loss": 2.5987, "step": 1147 }, { "epoch": 0.13163628024309137, "grad_norm": 0.2728831918675034, "learning_rate": 0.0009731745513226526, "loss": 2.7414, "step": 1148 }, { "epoch": 0.13175094599243206, "grad_norm": 0.3002007428802995, "learning_rate": 0.0009731145119950686, "loss": 2.6916, "step": 1149 }, { "epoch": 0.13186561174177272, "grad_norm": 0.3310801693208304, "learning_rate": 0.0009730544074103562, "loss": 2.7173, "step": 1150 }, { "epoch": 0.1319802774911134, "grad_norm": 0.2872578269705733, "learning_rate": 0.0009729942375768055, "loss": 2.6925, "step": 1151 }, { "epoch": 0.13209494324045407, "grad_norm": 0.3339589334365125, "learning_rate": 0.0009729340025027158, "loss": 2.877, "step": 1152 }, { "epoch": 0.13220960898979475, "grad_norm": 0.30468268859878117, "learning_rate": 0.0009728737021963954, "loss": 2.6462, "step": 1153 }, { "epoch": 0.1323242747391354, "grad_norm": 0.3090841891203495, "learning_rate": 0.0009728133366661615, "loss": 2.6178, "step": 1154 }, { "epoch": 0.1324389404884761, "grad_norm": 0.325383874425344, "learning_rate": 0.0009727529059203406, "loss": 2.7653, "step": 1155 }, { "epoch": 0.13255360623781676, "grad_norm": 0.34188021187078127, "learning_rate": 0.0009726924099672676, "loss": 2.7311, "step": 1156 }, { "epoch": 0.13266827198715744, "grad_norm": 0.31400848735268017, "learning_rate": 0.0009726318488152872, "loss": 2.7306, "step": 1157 }, { "epoch": 0.1327829377364981, "grad_norm": 0.33309705349144986, "learning_rate": 0.0009725712224727523, "loss": 2.7348, "step": 1158 }, { "epoch": 0.1328976034858388, "grad_norm": 0.34229675472637827, "learning_rate": 0.0009725105309480253, "loss": 2.6122, "step": 1159 }, { "epoch": 0.13301226923517945, "grad_norm": 0.30013825254366966, "learning_rate": 0.0009724497742494776, "loss": 2.6623, "step": 1160 }, { "epoch": 0.13312693498452013, "grad_norm": 0.3034117941043357, "learning_rate": 0.000972388952385489, "loss": 2.6084, "step": 1161 }, { "epoch": 0.1332416007338608, "grad_norm": 0.3308573757821191, "learning_rate": 0.000972328065364449, "loss": 2.6919, "step": 1162 }, { "epoch": 0.13335626648320148, "grad_norm": 0.33443760037582665, "learning_rate": 0.0009722671131947559, "loss": 2.7983, "step": 1163 }, { "epoch": 0.13347093223254214, "grad_norm": 0.28384016287149655, "learning_rate": 0.0009722060958848168, "loss": 2.7731, "step": 1164 }, { "epoch": 0.13358559798188283, "grad_norm": 0.3131121432846602, "learning_rate": 0.0009721450134430478, "loss": 2.7403, "step": 1165 }, { "epoch": 0.13370026373122348, "grad_norm": 0.3077864832515649, "learning_rate": 0.000972083865877874, "loss": 2.5896, "step": 1166 }, { "epoch": 0.13381492948056414, "grad_norm": 0.29984520552459987, "learning_rate": 0.0009720226531977296, "loss": 2.6777, "step": 1167 }, { "epoch": 0.13392959522990483, "grad_norm": 0.28246607776110233, "learning_rate": 0.0009719613754110578, "loss": 2.6972, "step": 1168 }, { "epoch": 0.1340442609792455, "grad_norm": 0.3319968429482706, "learning_rate": 0.0009719000325263109, "loss": 2.8027, "step": 1169 }, { "epoch": 0.13415892672858618, "grad_norm": 0.29292200869679685, "learning_rate": 0.0009718386245519495, "loss": 2.5638, "step": 1170 }, { "epoch": 0.13427359247792683, "grad_norm": 0.28135277511039536, "learning_rate": 0.0009717771514964439, "loss": 2.579, "step": 1171 }, { "epoch": 0.13438825822726752, "grad_norm": 0.3202216952374538, "learning_rate": 0.0009717156133682734, "loss": 2.7501, "step": 1172 }, { "epoch": 0.13450292397660818, "grad_norm": 0.3334465306023394, "learning_rate": 0.0009716540101759255, "loss": 2.8353, "step": 1173 }, { "epoch": 0.13461758972594887, "grad_norm": 0.31196363353936807, "learning_rate": 0.0009715923419278976, "loss": 2.7734, "step": 1174 }, { "epoch": 0.13473225547528953, "grad_norm": 0.3264945580334173, "learning_rate": 0.0009715306086326954, "loss": 2.5605, "step": 1175 }, { "epoch": 0.1348469212246302, "grad_norm": 0.3007343039110142, "learning_rate": 0.0009714688102988339, "loss": 2.5979, "step": 1176 }, { "epoch": 0.13496158697397087, "grad_norm": 0.3011021234522493, "learning_rate": 0.000971406946934837, "loss": 2.7176, "step": 1177 }, { "epoch": 0.13507625272331156, "grad_norm": 0.3079224691494148, "learning_rate": 0.0009713450185492378, "loss": 2.6673, "step": 1178 }, { "epoch": 0.13519091847265222, "grad_norm": 0.31251648830122747, "learning_rate": 0.0009712830251505778, "loss": 2.7343, "step": 1179 }, { "epoch": 0.1353055842219929, "grad_norm": 0.28724955637607563, "learning_rate": 0.0009712209667474079, "loss": 2.6482, "step": 1180 }, { "epoch": 0.13542024997133356, "grad_norm": 0.3007219760730107, "learning_rate": 0.0009711588433482881, "loss": 2.6916, "step": 1181 }, { "epoch": 0.13553491572067425, "grad_norm": 0.30178334823273617, "learning_rate": 0.0009710966549617868, "loss": 2.68, "step": 1182 }, { "epoch": 0.1356495814700149, "grad_norm": 0.3062269145084652, "learning_rate": 0.0009710344015964819, "loss": 2.756, "step": 1183 }, { "epoch": 0.13576424721935557, "grad_norm": 0.32190065767209086, "learning_rate": 0.00097097208326096, "loss": 2.714, "step": 1184 }, { "epoch": 0.13587891296869625, "grad_norm": 0.3168972476878082, "learning_rate": 0.000970909699963817, "loss": 2.6403, "step": 1185 }, { "epoch": 0.1359935787180369, "grad_norm": 0.36325447792357723, "learning_rate": 0.0009708472517136569, "loss": 2.7753, "step": 1186 }, { "epoch": 0.1361082444673776, "grad_norm": 0.3080950849155879, "learning_rate": 0.0009707847385190938, "loss": 2.6844, "step": 1187 }, { "epoch": 0.13622291021671826, "grad_norm": 0.33777156883881254, "learning_rate": 0.00097072216038875, "loss": 2.7788, "step": 1188 }, { "epoch": 0.13633757596605894, "grad_norm": 0.33986276703110124, "learning_rate": 0.000970659517331257, "loss": 2.6061, "step": 1189 }, { "epoch": 0.1364522417153996, "grad_norm": 0.32138342537228204, "learning_rate": 0.000970596809355255, "loss": 2.7633, "step": 1190 }, { "epoch": 0.1365669074647403, "grad_norm": 0.30510092766617586, "learning_rate": 0.0009705340364693935, "loss": 2.7706, "step": 1191 }, { "epoch": 0.13668157321408095, "grad_norm": 0.28188918035264077, "learning_rate": 0.0009704711986823311, "loss": 2.7453, "step": 1192 }, { "epoch": 0.13679623896342163, "grad_norm": 0.2794516966437732, "learning_rate": 0.0009704082960027348, "loss": 2.7083, "step": 1193 }, { "epoch": 0.1369109047127623, "grad_norm": 0.3039044119088668, "learning_rate": 0.0009703453284392807, "loss": 2.7217, "step": 1194 }, { "epoch": 0.13702557046210298, "grad_norm": 0.28055467299292364, "learning_rate": 0.0009702822960006544, "loss": 2.6503, "step": 1195 }, { "epoch": 0.13714023621144364, "grad_norm": 0.2894232734298314, "learning_rate": 0.0009702191986955494, "loss": 2.775, "step": 1196 }, { "epoch": 0.13725490196078433, "grad_norm": 0.3133021396483607, "learning_rate": 0.0009701560365326694, "loss": 2.5516, "step": 1197 }, { "epoch": 0.13736956771012498, "grad_norm": 0.30750976085729925, "learning_rate": 0.0009700928095207259, "loss": 2.6066, "step": 1198 }, { "epoch": 0.13748423345946567, "grad_norm": 0.3141226187293326, "learning_rate": 0.0009700295176684404, "loss": 2.8212, "step": 1199 }, { "epoch": 0.13759889920880633, "grad_norm": 0.33568159593202296, "learning_rate": 0.0009699661609845425, "loss": 2.6958, "step": 1200 }, { "epoch": 0.137713564958147, "grad_norm": 0.3924992017437212, "learning_rate": 0.0009699027394777708, "loss": 2.6935, "step": 1201 }, { "epoch": 0.13782823070748768, "grad_norm": 0.3238370691785521, "learning_rate": 0.0009698392531568736, "loss": 2.6071, "step": 1202 }, { "epoch": 0.13794289645682833, "grad_norm": 0.30018134253066026, "learning_rate": 0.0009697757020306072, "loss": 2.6973, "step": 1203 }, { "epoch": 0.13805756220616902, "grad_norm": 0.2702858077957881, "learning_rate": 0.0009697120861077375, "loss": 2.556, "step": 1204 }, { "epoch": 0.13817222795550968, "grad_norm": 0.3313537349141092, "learning_rate": 0.0009696484053970391, "loss": 2.6391, "step": 1205 }, { "epoch": 0.13828689370485037, "grad_norm": 0.27215853921810795, "learning_rate": 0.0009695846599072955, "loss": 2.6042, "step": 1206 }, { "epoch": 0.13840155945419103, "grad_norm": 0.2901045100650714, "learning_rate": 0.0009695208496472991, "loss": 2.8205, "step": 1207 }, { "epoch": 0.1385162252035317, "grad_norm": 0.28406915639804525, "learning_rate": 0.0009694569746258514, "loss": 2.6665, "step": 1208 }, { "epoch": 0.13863089095287237, "grad_norm": 0.31733576097274757, "learning_rate": 0.0009693930348517628, "loss": 2.6298, "step": 1209 }, { "epoch": 0.13874555670221306, "grad_norm": 0.29822463413826206, "learning_rate": 0.0009693290303338524, "loss": 2.5626, "step": 1210 }, { "epoch": 0.13886022245155372, "grad_norm": 0.3171445959280755, "learning_rate": 0.0009692649610809485, "loss": 2.6635, "step": 1211 }, { "epoch": 0.1389748882008944, "grad_norm": 0.3293705461700094, "learning_rate": 0.0009692008271018883, "loss": 2.7593, "step": 1212 }, { "epoch": 0.13908955395023506, "grad_norm": 0.32452528692554516, "learning_rate": 0.0009691366284055176, "loss": 2.6933, "step": 1213 }, { "epoch": 0.13920421969957575, "grad_norm": 0.3386564363866774, "learning_rate": 0.0009690723650006917, "loss": 2.6225, "step": 1214 }, { "epoch": 0.1393188854489164, "grad_norm": 0.32089631798757984, "learning_rate": 0.0009690080368962744, "loss": 2.6137, "step": 1215 }, { "epoch": 0.1394335511982571, "grad_norm": 0.31722679776556845, "learning_rate": 0.0009689436441011384, "loss": 2.7521, "step": 1216 }, { "epoch": 0.13954821694759775, "grad_norm": 0.3263786820169047, "learning_rate": 0.0009688791866241657, "loss": 2.6015, "step": 1217 }, { "epoch": 0.1396628826969384, "grad_norm": 0.3164623329978417, "learning_rate": 0.0009688146644742468, "loss": 2.7459, "step": 1218 }, { "epoch": 0.1397775484462791, "grad_norm": 0.3121491943468524, "learning_rate": 0.0009687500776602813, "loss": 2.7994, "step": 1219 }, { "epoch": 0.13989221419561976, "grad_norm": 0.3028278744215888, "learning_rate": 0.0009686854261911779, "loss": 2.7365, "step": 1220 }, { "epoch": 0.14000687994496044, "grad_norm": 0.30086605090098334, "learning_rate": 0.0009686207100758538, "loss": 2.5864, "step": 1221 }, { "epoch": 0.1401215456943011, "grad_norm": 0.30514876042062433, "learning_rate": 0.0009685559293232355, "loss": 2.7421, "step": 1222 }, { "epoch": 0.1402362114436418, "grad_norm": 0.3101606454694243, "learning_rate": 0.0009684910839422582, "loss": 2.7105, "step": 1223 }, { "epoch": 0.14035087719298245, "grad_norm": 0.2953717253506576, "learning_rate": 0.0009684261739418663, "loss": 2.5598, "step": 1224 }, { "epoch": 0.14046554294232313, "grad_norm": 0.27256960668823443, "learning_rate": 0.0009683611993310127, "loss": 2.6238, "step": 1225 }, { "epoch": 0.1405802086916638, "grad_norm": 0.3051491224427656, "learning_rate": 0.0009682961601186593, "loss": 2.689, "step": 1226 }, { "epoch": 0.14069487444100448, "grad_norm": 0.2915298850415209, "learning_rate": 0.000968231056313777, "loss": 2.7235, "step": 1227 }, { "epoch": 0.14080954019034514, "grad_norm": 0.28938583743316676, "learning_rate": 0.0009681658879253461, "loss": 2.7219, "step": 1228 }, { "epoch": 0.14092420593968583, "grad_norm": 0.3266776062791121, "learning_rate": 0.0009681006549623548, "loss": 2.7578, "step": 1229 }, { "epoch": 0.14103887168902648, "grad_norm": 0.2761529781871215, "learning_rate": 0.000968035357433801, "loss": 2.6032, "step": 1230 }, { "epoch": 0.14115353743836717, "grad_norm": 0.3175993078794819, "learning_rate": 0.0009679699953486913, "loss": 2.6612, "step": 1231 }, { "epoch": 0.14126820318770783, "grad_norm": 0.2931444822651926, "learning_rate": 0.0009679045687160411, "loss": 2.615, "step": 1232 }, { "epoch": 0.14138286893704852, "grad_norm": 0.290377160482274, "learning_rate": 0.0009678390775448745, "loss": 2.7659, "step": 1233 }, { "epoch": 0.14149753468638918, "grad_norm": 0.286833552871121, "learning_rate": 0.0009677735218442252, "loss": 2.527, "step": 1234 }, { "epoch": 0.14161220043572983, "grad_norm": 0.2841049027703459, "learning_rate": 0.0009677079016231349, "loss": 2.5961, "step": 1235 }, { "epoch": 0.14172686618507052, "grad_norm": 0.32630203867825636, "learning_rate": 0.000967642216890655, "loss": 2.8204, "step": 1236 }, { "epoch": 0.14184153193441118, "grad_norm": 0.3015898235692081, "learning_rate": 0.0009675764676558454, "loss": 2.8384, "step": 1237 }, { "epoch": 0.14195619768375187, "grad_norm": 0.28198854681773355, "learning_rate": 0.000967510653927775, "loss": 2.7332, "step": 1238 }, { "epoch": 0.14207086343309253, "grad_norm": 0.27416288745188, "learning_rate": 0.0009674447757155213, "loss": 2.5034, "step": 1239 }, { "epoch": 0.1421855291824332, "grad_norm": 0.28576391477179913, "learning_rate": 0.0009673788330281709, "loss": 2.718, "step": 1240 }, { "epoch": 0.14230019493177387, "grad_norm": 0.28171942651759196, "learning_rate": 0.0009673128258748199, "loss": 2.7182, "step": 1241 }, { "epoch": 0.14241486068111456, "grad_norm": 0.29435416038799517, "learning_rate": 0.0009672467542645722, "loss": 2.4713, "step": 1242 }, { "epoch": 0.14252952643045522, "grad_norm": 0.2983066143161253, "learning_rate": 0.0009671806182065414, "loss": 2.7271, "step": 1243 }, { "epoch": 0.1426441921797959, "grad_norm": 0.30426493788129755, "learning_rate": 0.0009671144177098494, "loss": 2.724, "step": 1244 }, { "epoch": 0.14275885792913656, "grad_norm": 0.3012439342830468, "learning_rate": 0.0009670481527836276, "loss": 2.6508, "step": 1245 }, { "epoch": 0.14287352367847725, "grad_norm": 0.3172671313404544, "learning_rate": 0.000966981823437016, "loss": 2.5874, "step": 1246 }, { "epoch": 0.1429881894278179, "grad_norm": 0.27924155029992187, "learning_rate": 0.0009669154296791632, "loss": 2.6983, "step": 1247 }, { "epoch": 0.1431028551771586, "grad_norm": 0.3018720099163792, "learning_rate": 0.000966848971519227, "loss": 2.7453, "step": 1248 }, { "epoch": 0.14321752092649925, "grad_norm": 0.32031332172534815, "learning_rate": 0.0009667824489663743, "loss": 2.6061, "step": 1249 }, { "epoch": 0.14333218667583994, "grad_norm": 0.30010723061919725, "learning_rate": 0.0009667158620297803, "loss": 2.7217, "step": 1250 }, { "epoch": 0.1434468524251806, "grad_norm": 0.28541072883213303, "learning_rate": 0.0009666492107186296, "loss": 2.7163, "step": 1251 }, { "epoch": 0.14356151817452126, "grad_norm": 0.295062026049857, "learning_rate": 0.0009665824950421155, "loss": 2.6173, "step": 1252 }, { "epoch": 0.14367618392386194, "grad_norm": 0.3299893458242848, "learning_rate": 0.00096651571500944, "loss": 2.5901, "step": 1253 }, { "epoch": 0.1437908496732026, "grad_norm": 0.3164751250221209, "learning_rate": 0.0009664488706298142, "loss": 2.7301, "step": 1254 }, { "epoch": 0.1439055154225433, "grad_norm": 0.2902843955436391, "learning_rate": 0.0009663819619124581, "loss": 2.6628, "step": 1255 }, { "epoch": 0.14402018117188395, "grad_norm": 0.3276007670590875, "learning_rate": 0.0009663149888666003, "loss": 2.7709, "step": 1256 }, { "epoch": 0.14413484692122464, "grad_norm": 0.31039979074424956, "learning_rate": 0.0009662479515014786, "loss": 2.7682, "step": 1257 }, { "epoch": 0.1442495126705653, "grad_norm": 0.31655599629199527, "learning_rate": 0.0009661808498263396, "loss": 2.6182, "step": 1258 }, { "epoch": 0.14436417841990598, "grad_norm": 0.3035882262475092, "learning_rate": 0.0009661136838504385, "loss": 2.6093, "step": 1259 }, { "epoch": 0.14447884416924664, "grad_norm": 0.3108294032888355, "learning_rate": 0.0009660464535830395, "loss": 2.7525, "step": 1260 }, { "epoch": 0.14459350991858733, "grad_norm": 0.31247055404509944, "learning_rate": 0.0009659791590334162, "loss": 2.7095, "step": 1261 }, { "epoch": 0.14470817566792799, "grad_norm": 0.28522240273159905, "learning_rate": 0.00096591180021085, "loss": 2.5845, "step": 1262 }, { "epoch": 0.14482284141726867, "grad_norm": 0.27594384242314346, "learning_rate": 0.0009658443771246322, "loss": 2.583, "step": 1263 }, { "epoch": 0.14493750716660933, "grad_norm": 0.3034209555955427, "learning_rate": 0.0009657768897840623, "loss": 2.6136, "step": 1264 }, { "epoch": 0.14505217291595002, "grad_norm": 0.313705184269361, "learning_rate": 0.000965709338198449, "loss": 2.6914, "step": 1265 }, { "epoch": 0.14516683866529068, "grad_norm": 0.2885115473756491, "learning_rate": 0.0009656417223771097, "loss": 2.5412, "step": 1266 }, { "epoch": 0.14528150441463136, "grad_norm": 0.2782035876080379, "learning_rate": 0.0009655740423293708, "loss": 2.7199, "step": 1267 }, { "epoch": 0.14539617016397202, "grad_norm": 0.33137507415933554, "learning_rate": 0.0009655062980645673, "loss": 2.7794, "step": 1268 }, { "epoch": 0.14551083591331268, "grad_norm": 0.2797069105889208, "learning_rate": 0.0009654384895920434, "loss": 2.6238, "step": 1269 }, { "epoch": 0.14562550166265337, "grad_norm": 0.3364891201494451, "learning_rate": 0.0009653706169211519, "loss": 2.8306, "step": 1270 }, { "epoch": 0.14574016741199403, "grad_norm": 0.29349595897046465, "learning_rate": 0.0009653026800612545, "loss": 2.6591, "step": 1271 }, { "epoch": 0.1458548331613347, "grad_norm": 0.3172380440683874, "learning_rate": 0.0009652346790217221, "loss": 2.5983, "step": 1272 }, { "epoch": 0.14596949891067537, "grad_norm": 0.2831238481203662, "learning_rate": 0.0009651666138119337, "loss": 2.7247, "step": 1273 }, { "epoch": 0.14608416466001606, "grad_norm": 0.30525842976600814, "learning_rate": 0.000965098484441278, "loss": 2.6583, "step": 1274 }, { "epoch": 0.14619883040935672, "grad_norm": 0.2872357741355747, "learning_rate": 0.0009650302909191517, "loss": 2.5642, "step": 1275 }, { "epoch": 0.1463134961586974, "grad_norm": 0.2976571065101496, "learning_rate": 0.0009649620332549613, "loss": 2.7995, "step": 1276 }, { "epoch": 0.14642816190803806, "grad_norm": 0.2978847657666802, "learning_rate": 0.0009648937114581212, "loss": 2.5875, "step": 1277 }, { "epoch": 0.14654282765737875, "grad_norm": 0.30867524647660627, "learning_rate": 0.0009648253255380554, "loss": 2.59, "step": 1278 }, { "epoch": 0.1466574934067194, "grad_norm": 0.2819728796524857, "learning_rate": 0.0009647568755041963, "loss": 2.6564, "step": 1279 }, { "epoch": 0.1467721591560601, "grad_norm": 0.304021304897499, "learning_rate": 0.0009646883613659851, "loss": 2.6794, "step": 1280 }, { "epoch": 0.14688682490540075, "grad_norm": 0.30170758502930806, "learning_rate": 0.0009646197831328725, "loss": 2.5982, "step": 1281 }, { "epoch": 0.14700149065474144, "grad_norm": 0.31465503919290083, "learning_rate": 0.0009645511408143171, "loss": 2.6388, "step": 1282 }, { "epoch": 0.1471161564040821, "grad_norm": 0.3360397066010665, "learning_rate": 0.0009644824344197872, "loss": 2.6191, "step": 1283 }, { "epoch": 0.14723082215342279, "grad_norm": 0.28568195826015264, "learning_rate": 0.0009644136639587591, "loss": 2.6031, "step": 1284 }, { "epoch": 0.14734548790276344, "grad_norm": 0.2940096806935903, "learning_rate": 0.0009643448294407186, "loss": 2.5574, "step": 1285 }, { "epoch": 0.1474601536521041, "grad_norm": 0.3064987419736337, "learning_rate": 0.0009642759308751601, "loss": 2.7071, "step": 1286 }, { "epoch": 0.1475748194014448, "grad_norm": 0.29945749626450296, "learning_rate": 0.0009642069682715868, "loss": 2.6899, "step": 1287 }, { "epoch": 0.14768948515078545, "grad_norm": 0.3032113659671877, "learning_rate": 0.0009641379416395109, "loss": 2.6868, "step": 1288 }, { "epoch": 0.14780415090012614, "grad_norm": 0.3125916681543053, "learning_rate": 0.0009640688509884532, "loss": 2.6707, "step": 1289 }, { "epoch": 0.1479188166494668, "grad_norm": 0.2904844069409456, "learning_rate": 0.0009639996963279435, "loss": 2.6969, "step": 1290 }, { "epoch": 0.14803348239880748, "grad_norm": 0.31012913672379294, "learning_rate": 0.0009639304776675204, "loss": 2.6674, "step": 1291 }, { "epoch": 0.14814814814814814, "grad_norm": 0.2829229815200425, "learning_rate": 0.0009638611950167311, "loss": 2.713, "step": 1292 }, { "epoch": 0.14826281389748883, "grad_norm": 0.29547750840697584, "learning_rate": 0.000963791848385132, "loss": 2.6275, "step": 1293 }, { "epoch": 0.14837747964682949, "grad_norm": 0.3205000610626429, "learning_rate": 0.000963722437782288, "loss": 2.7156, "step": 1294 }, { "epoch": 0.14849214539617017, "grad_norm": 0.29463614804399874, "learning_rate": 0.0009636529632177732, "loss": 2.5619, "step": 1295 }, { "epoch": 0.14860681114551083, "grad_norm": 0.3009057778776583, "learning_rate": 0.0009635834247011701, "loss": 2.5491, "step": 1296 }, { "epoch": 0.14872147689485152, "grad_norm": 0.3635785225152186, "learning_rate": 0.0009635138222420703, "loss": 2.6911, "step": 1297 }, { "epoch": 0.14883614264419218, "grad_norm": 0.31413647923754373, "learning_rate": 0.000963444155850074, "loss": 2.8696, "step": 1298 }, { "epoch": 0.14895080839353286, "grad_norm": 0.36929621451095446, "learning_rate": 0.0009633744255347905, "loss": 2.7248, "step": 1299 }, { "epoch": 0.14906547414287352, "grad_norm": 0.30710100368153004, "learning_rate": 0.0009633046313058378, "loss": 2.7617, "step": 1300 }, { "epoch": 0.1491801398922142, "grad_norm": 1.229014264809978, "learning_rate": 0.0009632347731728424, "loss": 2.6771, "step": 1301 }, { "epoch": 0.14929480564155487, "grad_norm": 4.307041763377014, "learning_rate": 0.0009631648511454402, "loss": 3.1203, "step": 1302 }, { "epoch": 0.14940947139089553, "grad_norm": 0.3430616362839399, "learning_rate": 0.0009630948652332756, "loss": 2.8483, "step": 1303 }, { "epoch": 0.1495241371402362, "grad_norm": 0.3768162483383602, "learning_rate": 0.0009630248154460017, "loss": 2.7336, "step": 1304 }, { "epoch": 0.14963880288957687, "grad_norm": 0.4354516313054484, "learning_rate": 0.0009629547017932805, "loss": 2.8401, "step": 1305 }, { "epoch": 0.14975346863891756, "grad_norm": 0.260235487537665, "learning_rate": 0.0009628845242847829, "loss": 2.5915, "step": 1306 }, { "epoch": 0.14986813438825822, "grad_norm": 0.2779391508838878, "learning_rate": 0.0009628142829301886, "loss": 2.618, "step": 1307 }, { "epoch": 0.1499828001375989, "grad_norm": 0.29886891436959195, "learning_rate": 0.000962743977739186, "loss": 2.7232, "step": 1308 }, { "epoch": 0.15009746588693956, "grad_norm": 0.27395636672820894, "learning_rate": 0.0009626736087214724, "loss": 2.6386, "step": 1309 }, { "epoch": 0.15021213163628025, "grad_norm": 0.29509241572181727, "learning_rate": 0.0009626031758867538, "loss": 2.7764, "step": 1310 }, { "epoch": 0.1503267973856209, "grad_norm": 0.2879338193801754, "learning_rate": 0.0009625326792447451, "loss": 2.6116, "step": 1311 }, { "epoch": 0.1504414631349616, "grad_norm": 0.31216096401965693, "learning_rate": 0.00096246211880517, "loss": 2.8987, "step": 1312 }, { "epoch": 0.15055612888430225, "grad_norm": 0.2971942694288847, "learning_rate": 0.000962391494577761, "loss": 2.6521, "step": 1313 }, { "epoch": 0.15067079463364294, "grad_norm": 0.2812591210320164, "learning_rate": 0.0009623208065722592, "loss": 2.7514, "step": 1314 }, { "epoch": 0.1507854603829836, "grad_norm": 0.2969354159323737, "learning_rate": 0.0009622500547984147, "loss": 2.6615, "step": 1315 }, { "epoch": 0.15090012613232429, "grad_norm": 0.2885863454558515, "learning_rate": 0.0009621792392659867, "loss": 2.6979, "step": 1316 }, { "epoch": 0.15101479188166494, "grad_norm": 0.31343537531672244, "learning_rate": 0.0009621083599847424, "loss": 2.6475, "step": 1317 }, { "epoch": 0.15112945763100563, "grad_norm": 0.31024777573597384, "learning_rate": 0.0009620374169644583, "loss": 2.7216, "step": 1318 }, { "epoch": 0.1512441233803463, "grad_norm": 0.2778321792975303, "learning_rate": 0.0009619664102149201, "loss": 2.6305, "step": 1319 }, { "epoch": 0.15135878912968695, "grad_norm": 0.2801952025862511, "learning_rate": 0.0009618953397459211, "loss": 2.7079, "step": 1320 }, { "epoch": 0.15147345487902764, "grad_norm": 0.2912326911171677, "learning_rate": 0.0009618242055672648, "loss": 2.6208, "step": 1321 }, { "epoch": 0.1515881206283683, "grad_norm": 0.28669232514196613, "learning_rate": 0.0009617530076887624, "loss": 2.6975, "step": 1322 }, { "epoch": 0.15170278637770898, "grad_norm": 0.29980820411157194, "learning_rate": 0.0009616817461202345, "loss": 2.7435, "step": 1323 }, { "epoch": 0.15181745212704964, "grad_norm": 0.2804860123607428, "learning_rate": 0.0009616104208715101, "loss": 2.8216, "step": 1324 }, { "epoch": 0.15193211787639033, "grad_norm": 0.30058461036615736, "learning_rate": 0.0009615390319524272, "loss": 2.691, "step": 1325 }, { "epoch": 0.15204678362573099, "grad_norm": 0.28183975892337065, "learning_rate": 0.0009614675793728327, "loss": 2.63, "step": 1326 }, { "epoch": 0.15216144937507167, "grad_norm": 0.30047992958025976, "learning_rate": 0.0009613960631425818, "loss": 2.7993, "step": 1327 }, { "epoch": 0.15227611512441233, "grad_norm": 0.28242224257019927, "learning_rate": 0.000961324483271539, "loss": 2.7805, "step": 1328 }, { "epoch": 0.15239078087375302, "grad_norm": 0.2743901657006034, "learning_rate": 0.0009612528397695777, "loss": 2.5872, "step": 1329 }, { "epoch": 0.15250544662309368, "grad_norm": 0.324018616345287, "learning_rate": 0.0009611811326465791, "loss": 2.6436, "step": 1330 }, { "epoch": 0.15262011237243436, "grad_norm": 0.3094039012471273, "learning_rate": 0.0009611093619124344, "loss": 2.7481, "step": 1331 }, { "epoch": 0.15273477812177502, "grad_norm": 0.32839856892671293, "learning_rate": 0.0009610375275770427, "loss": 2.7862, "step": 1332 }, { "epoch": 0.1528494438711157, "grad_norm": 0.3036908245968126, "learning_rate": 0.0009609656296503121, "loss": 2.6353, "step": 1333 }, { "epoch": 0.15296410962045637, "grad_norm": 0.31130742252115845, "learning_rate": 0.0009608936681421599, "loss": 2.7538, "step": 1334 }, { "epoch": 0.15307877536979705, "grad_norm": 0.31403300740018963, "learning_rate": 0.0009608216430625114, "loss": 2.7097, "step": 1335 }, { "epoch": 0.1531934411191377, "grad_norm": 0.32126031604362265, "learning_rate": 0.0009607495544213014, "loss": 2.7097, "step": 1336 }, { "epoch": 0.15330810686847837, "grad_norm": 0.29996201059743044, "learning_rate": 0.000960677402228473, "loss": 2.7446, "step": 1337 }, { "epoch": 0.15342277261781906, "grad_norm": 0.31087601092189504, "learning_rate": 0.0009606051864939785, "loss": 2.5068, "step": 1338 }, { "epoch": 0.15353743836715972, "grad_norm": 0.2911594052732881, "learning_rate": 0.0009605329072277782, "loss": 2.7118, "step": 1339 }, { "epoch": 0.1536521041165004, "grad_norm": 0.32672051784529743, "learning_rate": 0.000960460564439842, "loss": 2.6055, "step": 1340 }, { "epoch": 0.15376676986584106, "grad_norm": 0.30596590504584176, "learning_rate": 0.0009603881581401482, "loss": 2.7223, "step": 1341 }, { "epoch": 0.15388143561518175, "grad_norm": 0.29495817863816126, "learning_rate": 0.0009603156883386836, "loss": 2.5937, "step": 1342 }, { "epoch": 0.1539961013645224, "grad_norm": 0.2905922889396395, "learning_rate": 0.0009602431550454442, "loss": 2.6542, "step": 1343 }, { "epoch": 0.1541107671138631, "grad_norm": 0.29273684537364775, "learning_rate": 0.0009601705582704348, "loss": 2.677, "step": 1344 }, { "epoch": 0.15422543286320375, "grad_norm": 0.3206561134807994, "learning_rate": 0.0009600978980236683, "loss": 2.7658, "step": 1345 }, { "epoch": 0.15434009861254444, "grad_norm": 0.307659871836176, "learning_rate": 0.0009600251743151672, "loss": 2.6923, "step": 1346 }, { "epoch": 0.1544547643618851, "grad_norm": 0.28028015978216897, "learning_rate": 0.0009599523871549621, "loss": 2.5286, "step": 1347 }, { "epoch": 0.1545694301112258, "grad_norm": 0.2900983177041651, "learning_rate": 0.0009598795365530928, "loss": 2.7117, "step": 1348 }, { "epoch": 0.15468409586056645, "grad_norm": 0.3021671341423222, "learning_rate": 0.0009598066225196074, "loss": 2.6965, "step": 1349 }, { "epoch": 0.15479876160990713, "grad_norm": 0.3308628975894653, "learning_rate": 0.0009597336450645633, "loss": 2.6723, "step": 1350 }, { "epoch": 0.1549134273592478, "grad_norm": 0.3150775385796687, "learning_rate": 0.000959660604198026, "loss": 2.5556, "step": 1351 }, { "epoch": 0.15502809310858848, "grad_norm": 0.3181786953365109, "learning_rate": 0.0009595874999300703, "loss": 2.7486, "step": 1352 }, { "epoch": 0.15514275885792914, "grad_norm": 0.2863765853606049, "learning_rate": 0.0009595143322707795, "loss": 2.5968, "step": 1353 }, { "epoch": 0.1552574246072698, "grad_norm": 0.301936312190553, "learning_rate": 0.0009594411012302459, "loss": 2.7436, "step": 1354 }, { "epoch": 0.15537209035661048, "grad_norm": 0.2712427155058814, "learning_rate": 0.0009593678068185701, "loss": 2.521, "step": 1355 }, { "epoch": 0.15548675610595114, "grad_norm": 0.2898303125180279, "learning_rate": 0.0009592944490458614, "loss": 2.4917, "step": 1356 }, { "epoch": 0.15560142185529183, "grad_norm": 0.3009522721902166, "learning_rate": 0.0009592210279222386, "loss": 2.8095, "step": 1357 }, { "epoch": 0.15571608760463249, "grad_norm": 0.31190759030470555, "learning_rate": 0.0009591475434578286, "loss": 2.6754, "step": 1358 }, { "epoch": 0.15583075335397317, "grad_norm": 0.2878539381987615, "learning_rate": 0.0009590739956627671, "loss": 2.7391, "step": 1359 }, { "epoch": 0.15594541910331383, "grad_norm": 0.2844123743554373, "learning_rate": 0.0009590003845471987, "loss": 2.6585, "step": 1360 }, { "epoch": 0.15606008485265452, "grad_norm": 0.28110045179738025, "learning_rate": 0.0009589267101212764, "loss": 2.521, "step": 1361 }, { "epoch": 0.15617475060199518, "grad_norm": 0.3319338094831352, "learning_rate": 0.0009588529723951625, "loss": 2.734, "step": 1362 }, { "epoch": 0.15628941635133586, "grad_norm": 0.3420508568427328, "learning_rate": 0.0009587791713790276, "loss": 2.6491, "step": 1363 }, { "epoch": 0.15640408210067652, "grad_norm": 0.28738905725529035, "learning_rate": 0.0009587053070830512, "loss": 2.7038, "step": 1364 }, { "epoch": 0.1565187478500172, "grad_norm": 0.2967616139059871, "learning_rate": 0.0009586313795174213, "loss": 2.7399, "step": 1365 }, { "epoch": 0.15663341359935787, "grad_norm": 0.28953785272990806, "learning_rate": 0.0009585573886923349, "loss": 2.6468, "step": 1366 }, { "epoch": 0.15674807934869855, "grad_norm": 0.306724069847887, "learning_rate": 0.0009584833346179977, "loss": 2.643, "step": 1367 }, { "epoch": 0.1568627450980392, "grad_norm": 0.2687579143711925, "learning_rate": 0.000958409217304624, "loss": 2.5599, "step": 1368 }, { "epoch": 0.1569774108473799, "grad_norm": 0.29070431833719695, "learning_rate": 0.0009583350367624366, "loss": 2.6963, "step": 1369 }, { "epoch": 0.15709207659672056, "grad_norm": 0.30330516645010697, "learning_rate": 0.0009582607930016678, "loss": 2.7916, "step": 1370 }, { "epoch": 0.15720674234606122, "grad_norm": 0.2804283396916566, "learning_rate": 0.0009581864860325577, "loss": 2.7007, "step": 1371 }, { "epoch": 0.1573214080954019, "grad_norm": 0.2831796065121677, "learning_rate": 0.0009581121158653558, "loss": 2.6291, "step": 1372 }, { "epoch": 0.15743607384474256, "grad_norm": 0.3284241744824253, "learning_rate": 0.0009580376825103199, "loss": 2.6397, "step": 1373 }, { "epoch": 0.15755073959408325, "grad_norm": 0.280660308281362, "learning_rate": 0.0009579631859777167, "loss": 2.5074, "step": 1374 }, { "epoch": 0.1576654053434239, "grad_norm": 0.28882643510692957, "learning_rate": 0.0009578886262778214, "loss": 2.6505, "step": 1375 }, { "epoch": 0.1577800710927646, "grad_norm": 0.2906423896698658, "learning_rate": 0.0009578140034209185, "loss": 2.657, "step": 1376 }, { "epoch": 0.15789473684210525, "grad_norm": 0.26113238524409305, "learning_rate": 0.0009577393174173004, "loss": 2.5377, "step": 1377 }, { "epoch": 0.15800940259144594, "grad_norm": 0.2861986238013753, "learning_rate": 0.0009576645682772689, "loss": 2.6617, "step": 1378 }, { "epoch": 0.1581240683407866, "grad_norm": 0.30158948399379343, "learning_rate": 0.0009575897560111339, "loss": 2.6743, "step": 1379 }, { "epoch": 0.1582387340901273, "grad_norm": 0.28893938629548654, "learning_rate": 0.0009575148806292146, "loss": 2.8124, "step": 1380 }, { "epoch": 0.15835339983946795, "grad_norm": 0.2850451670356848, "learning_rate": 0.0009574399421418388, "loss": 2.6023, "step": 1381 }, { "epoch": 0.15846806558880863, "grad_norm": 0.2812654520496973, "learning_rate": 0.0009573649405593422, "loss": 2.7464, "step": 1382 }, { "epoch": 0.1585827313381493, "grad_norm": 0.29649873676726207, "learning_rate": 0.0009572898758920704, "loss": 2.6811, "step": 1383 }, { "epoch": 0.15869739708748998, "grad_norm": 0.285323763393947, "learning_rate": 0.000957214748150377, "loss": 2.6763, "step": 1384 }, { "epoch": 0.15881206283683064, "grad_norm": 0.3112699489214442, "learning_rate": 0.0009571395573446242, "loss": 2.8377, "step": 1385 }, { "epoch": 0.15892672858617132, "grad_norm": 0.28910352731450245, "learning_rate": 0.0009570643034851835, "loss": 2.7889, "step": 1386 }, { "epoch": 0.15904139433551198, "grad_norm": 0.299228362841977, "learning_rate": 0.0009569889865824345, "loss": 2.7422, "step": 1387 }, { "epoch": 0.15915606008485264, "grad_norm": 0.32584046422645296, "learning_rate": 0.0009569136066467659, "loss": 2.6867, "step": 1388 }, { "epoch": 0.15927072583419333, "grad_norm": 0.3101293248062392, "learning_rate": 0.0009568381636885747, "loss": 2.6469, "step": 1389 }, { "epoch": 0.159385391583534, "grad_norm": 0.28817243833619927, "learning_rate": 0.0009567626577182671, "loss": 2.6394, "step": 1390 }, { "epoch": 0.15950005733287467, "grad_norm": 0.27853070935012986, "learning_rate": 0.0009566870887462573, "loss": 2.6048, "step": 1391 }, { "epoch": 0.15961472308221533, "grad_norm": 0.29278542080928277, "learning_rate": 0.0009566114567829691, "loss": 2.7467, "step": 1392 }, { "epoch": 0.15972938883155602, "grad_norm": 0.28072724997901144, "learning_rate": 0.0009565357618388342, "loss": 2.6325, "step": 1393 }, { "epoch": 0.15984405458089668, "grad_norm": 0.26508041750084166, "learning_rate": 0.0009564600039242932, "loss": 2.7655, "step": 1394 }, { "epoch": 0.15995872033023736, "grad_norm": 0.30225388915393603, "learning_rate": 0.0009563841830497957, "loss": 2.6474, "step": 1395 }, { "epoch": 0.16007338607957802, "grad_norm": 0.28257919598395925, "learning_rate": 0.0009563082992257996, "loss": 2.5329, "step": 1396 }, { "epoch": 0.1601880518289187, "grad_norm": 0.28709148095143805, "learning_rate": 0.0009562323524627716, "loss": 2.7921, "step": 1397 }, { "epoch": 0.16030271757825937, "grad_norm": 0.3124660074222078, "learning_rate": 0.0009561563427711872, "loss": 2.7546, "step": 1398 }, { "epoch": 0.16041738332760005, "grad_norm": 0.322950461699168, "learning_rate": 0.0009560802701615304, "loss": 2.5556, "step": 1399 }, { "epoch": 0.1605320490769407, "grad_norm": 0.33760268314043496, "learning_rate": 0.0009560041346442941, "loss": 2.683, "step": 1400 }, { "epoch": 0.1606467148262814, "grad_norm": 0.2736771753392101, "learning_rate": 0.0009559279362299796, "loss": 2.7207, "step": 1401 }, { "epoch": 0.16076138057562206, "grad_norm": 0.3110926171361155, "learning_rate": 0.0009558516749290971, "loss": 2.573, "step": 1402 }, { "epoch": 0.16087604632496275, "grad_norm": 0.31202705252548457, "learning_rate": 0.0009557753507521653, "loss": 2.6152, "step": 1403 }, { "epoch": 0.1609907120743034, "grad_norm": 0.27397834242002433, "learning_rate": 0.0009556989637097118, "loss": 2.6419, "step": 1404 }, { "epoch": 0.16110537782364406, "grad_norm": 0.28812009789717413, "learning_rate": 0.0009556225138122727, "loss": 2.5732, "step": 1405 }, { "epoch": 0.16122004357298475, "grad_norm": 0.28564671520490803, "learning_rate": 0.0009555460010703927, "loss": 2.7242, "step": 1406 }, { "epoch": 0.1613347093223254, "grad_norm": 0.29068215405101083, "learning_rate": 0.0009554694254946252, "loss": 2.4719, "step": 1407 }, { "epoch": 0.1614493750716661, "grad_norm": 0.2739241875294356, "learning_rate": 0.0009553927870955327, "loss": 2.8237, "step": 1408 }, { "epoch": 0.16156404082100675, "grad_norm": 0.27769375912799493, "learning_rate": 0.0009553160858836858, "loss": 2.6226, "step": 1409 }, { "epoch": 0.16167870657034744, "grad_norm": 0.3090350010972169, "learning_rate": 0.000955239321869664, "loss": 2.64, "step": 1410 }, { "epoch": 0.1617933723196881, "grad_norm": 0.2840761451661036, "learning_rate": 0.0009551624950640552, "loss": 2.5964, "step": 1411 }, { "epoch": 0.1619080380690288, "grad_norm": 0.2936312083950193, "learning_rate": 0.0009550856054774566, "loss": 2.7175, "step": 1412 }, { "epoch": 0.16202270381836945, "grad_norm": 0.29241776290349897, "learning_rate": 0.0009550086531204733, "loss": 2.675, "step": 1413 }, { "epoch": 0.16213736956771013, "grad_norm": 0.3143612356899675, "learning_rate": 0.0009549316380037196, "loss": 2.6333, "step": 1414 }, { "epoch": 0.1622520353170508, "grad_norm": 0.30064990656542856, "learning_rate": 0.0009548545601378183, "loss": 2.6607, "step": 1415 }, { "epoch": 0.16236670106639148, "grad_norm": 0.3207323618511196, "learning_rate": 0.0009547774195334007, "loss": 2.7214, "step": 1416 }, { "epoch": 0.16248136681573214, "grad_norm": 0.26272137208356916, "learning_rate": 0.0009547002162011069, "loss": 2.6222, "step": 1417 }, { "epoch": 0.16259603256507282, "grad_norm": 0.3204269605220272, "learning_rate": 0.0009546229501515856, "loss": 2.6687, "step": 1418 }, { "epoch": 0.16271069831441348, "grad_norm": 0.31773048263040615, "learning_rate": 0.0009545456213954944, "loss": 2.719, "step": 1419 }, { "epoch": 0.16282536406375417, "grad_norm": 0.309855424827149, "learning_rate": 0.000954468229943499, "loss": 2.6968, "step": 1420 }, { "epoch": 0.16294002981309483, "grad_norm": 0.2945499865623526, "learning_rate": 0.0009543907758062742, "loss": 2.5579, "step": 1421 }, { "epoch": 0.16305469556243551, "grad_norm": 0.314048331515698, "learning_rate": 0.0009543132589945034, "loss": 2.7855, "step": 1422 }, { "epoch": 0.16316936131177617, "grad_norm": 0.2865659891387558, "learning_rate": 0.0009542356795188786, "loss": 2.6914, "step": 1423 }, { "epoch": 0.16328402706111683, "grad_norm": 0.32765506143342005, "learning_rate": 0.0009541580373901002, "loss": 2.5989, "step": 1424 }, { "epoch": 0.16339869281045752, "grad_norm": 0.3048617292278364, "learning_rate": 0.0009540803326188777, "loss": 2.5351, "step": 1425 }, { "epoch": 0.16351335855979818, "grad_norm": 0.2837077774513346, "learning_rate": 0.0009540025652159288, "loss": 2.627, "step": 1426 }, { "epoch": 0.16362802430913886, "grad_norm": 0.315699924940462, "learning_rate": 0.0009539247351919802, "loss": 2.6636, "step": 1427 }, { "epoch": 0.16374269005847952, "grad_norm": 0.2765188970627581, "learning_rate": 0.0009538468425577669, "loss": 2.6547, "step": 1428 }, { "epoch": 0.1638573558078202, "grad_norm": 0.2915747658650876, "learning_rate": 0.0009537688873240327, "loss": 2.5286, "step": 1429 }, { "epoch": 0.16397202155716087, "grad_norm": 0.28059276189087545, "learning_rate": 0.0009536908695015303, "loss": 2.6726, "step": 1430 }, { "epoch": 0.16408668730650156, "grad_norm": 0.31217899244383984, "learning_rate": 0.0009536127891010205, "loss": 2.6636, "step": 1431 }, { "epoch": 0.16420135305584221, "grad_norm": 0.3194344789611659, "learning_rate": 0.000953534646133273, "loss": 2.8102, "step": 1432 }, { "epoch": 0.1643160188051829, "grad_norm": 0.30233952114902113, "learning_rate": 0.0009534564406090664, "loss": 2.5798, "step": 1433 }, { "epoch": 0.16443068455452356, "grad_norm": 0.3204618045039989, "learning_rate": 0.0009533781725391872, "loss": 2.5656, "step": 1434 }, { "epoch": 0.16454535030386425, "grad_norm": 0.3066549137474608, "learning_rate": 0.0009532998419344316, "loss": 2.6853, "step": 1435 }, { "epoch": 0.1646600160532049, "grad_norm": 0.2882620931541858, "learning_rate": 0.0009532214488056032, "loss": 2.5774, "step": 1436 }, { "epoch": 0.1647746818025456, "grad_norm": 0.28806916693313656, "learning_rate": 0.0009531429931635154, "loss": 2.5575, "step": 1437 }, { "epoch": 0.16488934755188625, "grad_norm": 0.28272016928534155, "learning_rate": 0.0009530644750189892, "loss": 2.6609, "step": 1438 }, { "epoch": 0.16500401330122694, "grad_norm": 0.2811104601283848, "learning_rate": 0.0009529858943828548, "loss": 2.6326, "step": 1439 }, { "epoch": 0.1651186790505676, "grad_norm": 0.2663009438921707, "learning_rate": 0.0009529072512659512, "loss": 2.6215, "step": 1440 }, { "epoch": 0.16523334479990825, "grad_norm": 0.2820000380483963, "learning_rate": 0.0009528285456791253, "loss": 2.6721, "step": 1441 }, { "epoch": 0.16534801054924894, "grad_norm": 0.297854268523418, "learning_rate": 0.0009527497776332334, "loss": 2.6001, "step": 1442 }, { "epoch": 0.1654626762985896, "grad_norm": 0.32222427351588706, "learning_rate": 0.0009526709471391397, "loss": 2.6269, "step": 1443 }, { "epoch": 0.1655773420479303, "grad_norm": 0.3102724064412689, "learning_rate": 0.0009525920542077176, "loss": 2.6809, "step": 1444 }, { "epoch": 0.16569200779727095, "grad_norm": 0.3167401340580198, "learning_rate": 0.0009525130988498489, "loss": 2.7132, "step": 1445 }, { "epoch": 0.16580667354661163, "grad_norm": 0.3008248309919062, "learning_rate": 0.0009524340810764237, "loss": 2.5752, "step": 1446 }, { "epoch": 0.1659213392959523, "grad_norm": 0.2953920935204927, "learning_rate": 0.0009523550008983413, "loss": 2.6058, "step": 1447 }, { "epoch": 0.16603600504529298, "grad_norm": 0.2958244286339564, "learning_rate": 0.0009522758583265092, "loss": 2.5752, "step": 1448 }, { "epoch": 0.16615067079463364, "grad_norm": 0.2708737051793529, "learning_rate": 0.0009521966533718436, "loss": 2.588, "step": 1449 }, { "epoch": 0.16626533654397432, "grad_norm": 0.28417046858287925, "learning_rate": 0.0009521173860452695, "loss": 2.626, "step": 1450 }, { "epoch": 0.16638000229331498, "grad_norm": 0.2817084696561381, "learning_rate": 0.0009520380563577198, "loss": 2.7451, "step": 1451 }, { "epoch": 0.16649466804265567, "grad_norm": 0.28974527558245117, "learning_rate": 0.000951958664320137, "loss": 2.61, "step": 1452 }, { "epoch": 0.16660933379199633, "grad_norm": 0.3230066379957491, "learning_rate": 0.0009518792099434717, "loss": 2.6368, "step": 1453 }, { "epoch": 0.16672399954133701, "grad_norm": 0.27824886635865875, "learning_rate": 0.0009517996932386827, "loss": 2.6792, "step": 1454 }, { "epoch": 0.16683866529067767, "grad_norm": 0.32774565771485126, "learning_rate": 0.0009517201142167385, "loss": 2.6827, "step": 1455 }, { "epoch": 0.16695333104001836, "grad_norm": 0.3088994123920793, "learning_rate": 0.0009516404728886148, "loss": 2.6637, "step": 1456 }, { "epoch": 0.16706799678935902, "grad_norm": 0.29449102075669653, "learning_rate": 0.000951560769265297, "loss": 2.6162, "step": 1457 }, { "epoch": 0.16718266253869968, "grad_norm": 0.3261853360388319, "learning_rate": 0.0009514810033577786, "loss": 2.5551, "step": 1458 }, { "epoch": 0.16729732828804036, "grad_norm": 0.29787856921191114, "learning_rate": 0.0009514011751770618, "loss": 2.7473, "step": 1459 }, { "epoch": 0.16741199403738102, "grad_norm": 0.2944436817765084, "learning_rate": 0.0009513212847341573, "loss": 2.6746, "step": 1460 }, { "epoch": 0.1675266597867217, "grad_norm": 0.28829425971899686, "learning_rate": 0.0009512413320400847, "loss": 2.6536, "step": 1461 }, { "epoch": 0.16764132553606237, "grad_norm": 0.3290881295071441, "learning_rate": 0.0009511613171058717, "loss": 2.6729, "step": 1462 }, { "epoch": 0.16775599128540306, "grad_norm": 0.27966983808484835, "learning_rate": 0.000951081239942555, "loss": 2.804, "step": 1463 }, { "epoch": 0.16787065703474371, "grad_norm": 0.2757834808424912, "learning_rate": 0.0009510011005611796, "loss": 2.5126, "step": 1464 }, { "epoch": 0.1679853227840844, "grad_norm": 0.27139395208258155, "learning_rate": 0.0009509208989727992, "loss": 2.7308, "step": 1465 }, { "epoch": 0.16809998853342506, "grad_norm": 0.2737602092824948, "learning_rate": 0.000950840635188476, "loss": 2.7915, "step": 1466 }, { "epoch": 0.16821465428276575, "grad_norm": 0.28300459330822075, "learning_rate": 0.0009507603092192812, "loss": 2.53, "step": 1467 }, { "epoch": 0.1683293200321064, "grad_norm": 0.28048004413677813, "learning_rate": 0.000950679921076294, "loss": 2.7427, "step": 1468 }, { "epoch": 0.1684439857814471, "grad_norm": 0.28796626270159864, "learning_rate": 0.0009505994707706023, "loss": 2.4798, "step": 1469 }, { "epoch": 0.16855865153078775, "grad_norm": 0.2602547706208409, "learning_rate": 0.000950518958313303, "loss": 2.6237, "step": 1470 }, { "epoch": 0.16867331728012844, "grad_norm": 0.31238677493304695, "learning_rate": 0.0009504383837155008, "loss": 2.7236, "step": 1471 }, { "epoch": 0.1687879830294691, "grad_norm": 0.2804170854815118, "learning_rate": 0.0009503577469883098, "loss": 2.592, "step": 1472 }, { "epoch": 0.16890264877880978, "grad_norm": 0.30053387490909567, "learning_rate": 0.0009502770481428525, "loss": 2.6539, "step": 1473 }, { "epoch": 0.16901731452815044, "grad_norm": 0.30834320472548965, "learning_rate": 0.000950196287190259, "loss": 2.756, "step": 1474 }, { "epoch": 0.1691319802774911, "grad_norm": 0.31776791179375063, "learning_rate": 0.0009501154641416695, "loss": 2.6248, "step": 1475 }, { "epoch": 0.1692466460268318, "grad_norm": 0.3087810351869066, "learning_rate": 0.0009500345790082317, "loss": 2.6473, "step": 1476 }, { "epoch": 0.16936131177617245, "grad_norm": 0.30480823299445287, "learning_rate": 0.0009499536318011019, "loss": 2.7211, "step": 1477 }, { "epoch": 0.16947597752551313, "grad_norm": 0.30880566619999633, "learning_rate": 0.0009498726225314458, "loss": 2.5683, "step": 1478 }, { "epoch": 0.1695906432748538, "grad_norm": 0.33186228339465085, "learning_rate": 0.0009497915512104367, "loss": 2.5963, "step": 1479 }, { "epoch": 0.16970530902419448, "grad_norm": 0.3111155102589005, "learning_rate": 0.0009497104178492568, "loss": 2.6648, "step": 1480 }, { "epoch": 0.16981997477353514, "grad_norm": 0.28135526141656864, "learning_rate": 0.0009496292224590973, "loss": 2.7007, "step": 1481 }, { "epoch": 0.16993464052287582, "grad_norm": 0.29578500090767396, "learning_rate": 0.000949547965051157, "loss": 2.6404, "step": 1482 }, { "epoch": 0.17004930627221648, "grad_norm": 0.2827310943911389, "learning_rate": 0.0009494666456366441, "loss": 2.5735, "step": 1483 }, { "epoch": 0.17016397202155717, "grad_norm": 0.3025309710319539, "learning_rate": 0.0009493852642267751, "loss": 2.6682, "step": 1484 }, { "epoch": 0.17027863777089783, "grad_norm": 0.28255922243287473, "learning_rate": 0.0009493038208327749, "loss": 2.5944, "step": 1485 }, { "epoch": 0.17039330352023851, "grad_norm": 0.33117238349940387, "learning_rate": 0.0009492223154658773, "loss": 2.6566, "step": 1486 }, { "epoch": 0.17050796926957917, "grad_norm": 0.30506155447438676, "learning_rate": 0.0009491407481373241, "loss": 2.6299, "step": 1487 }, { "epoch": 0.17062263501891986, "grad_norm": 0.28670429040430734, "learning_rate": 0.0009490591188583661, "loss": 2.5977, "step": 1488 }, { "epoch": 0.17073730076826052, "grad_norm": 0.31573783930321386, "learning_rate": 0.0009489774276402625, "loss": 2.6092, "step": 1489 }, { "epoch": 0.1708519665176012, "grad_norm": 0.33596673511835784, "learning_rate": 0.0009488956744942811, "loss": 2.558, "step": 1490 }, { "epoch": 0.17096663226694186, "grad_norm": 0.3138896289374198, "learning_rate": 0.0009488138594316982, "loss": 2.7614, "step": 1491 }, { "epoch": 0.17108129801628252, "grad_norm": 0.30605480146804603, "learning_rate": 0.0009487319824637983, "loss": 2.6414, "step": 1492 }, { "epoch": 0.1711959637656232, "grad_norm": 0.2904203431533111, "learning_rate": 0.0009486500436018752, "loss": 2.7069, "step": 1493 }, { "epoch": 0.17131062951496387, "grad_norm": 0.2895527923973493, "learning_rate": 0.0009485680428572308, "loss": 2.6998, "step": 1494 }, { "epoch": 0.17142529526430456, "grad_norm": 0.30183643209093675, "learning_rate": 0.0009484859802411751, "loss": 2.5405, "step": 1495 }, { "epoch": 0.17153996101364521, "grad_norm": 0.3009461864055049, "learning_rate": 0.0009484038557650274, "loss": 2.5786, "step": 1496 }, { "epoch": 0.1716546267629859, "grad_norm": 0.3177453651091308, "learning_rate": 0.0009483216694401152, "loss": 2.7656, "step": 1497 }, { "epoch": 0.17176929251232656, "grad_norm": 0.2837175084859612, "learning_rate": 0.0009482394212777745, "loss": 2.6968, "step": 1498 }, { "epoch": 0.17188395826166725, "grad_norm": 0.26602938016721817, "learning_rate": 0.0009481571112893498, "loss": 2.7052, "step": 1499 }, { "epoch": 0.1719986240110079, "grad_norm": 0.26495611851406126, "learning_rate": 0.0009480747394861944, "loss": 2.6593, "step": 1500 }, { "epoch": 0.1721132897603486, "grad_norm": 0.2693634923702362, "learning_rate": 0.0009479923058796695, "loss": 2.483, "step": 1501 }, { "epoch": 0.17222795550968925, "grad_norm": 0.25265713749306934, "learning_rate": 0.0009479098104811459, "loss": 2.5363, "step": 1502 }, { "epoch": 0.17234262125902994, "grad_norm": 0.2975013468719012, "learning_rate": 0.0009478272533020016, "loss": 2.6521, "step": 1503 }, { "epoch": 0.1724572870083706, "grad_norm": 0.2781856459368853, "learning_rate": 0.0009477446343536241, "loss": 2.7137, "step": 1504 }, { "epoch": 0.17257195275771128, "grad_norm": 0.2953848626261515, "learning_rate": 0.0009476619536474091, "loss": 2.5883, "step": 1505 }, { "epoch": 0.17268661850705194, "grad_norm": 0.28226725537786074, "learning_rate": 0.0009475792111947607, "loss": 2.6298, "step": 1506 }, { "epoch": 0.17280128425639263, "grad_norm": 0.3099605978252708, "learning_rate": 0.0009474964070070919, "loss": 2.6173, "step": 1507 }, { "epoch": 0.1729159500057333, "grad_norm": 0.3206886311737969, "learning_rate": 0.0009474135410958239, "loss": 2.7354, "step": 1508 }, { "epoch": 0.17303061575507395, "grad_norm": 0.31795945458749986, "learning_rate": 0.0009473306134723862, "loss": 2.6441, "step": 1509 }, { "epoch": 0.17314528150441463, "grad_norm": 0.33931547002360274, "learning_rate": 0.0009472476241482173, "loss": 2.6856, "step": 1510 }, { "epoch": 0.1732599472537553, "grad_norm": 0.3131751106714846, "learning_rate": 0.000947164573134764, "loss": 2.5559, "step": 1511 }, { "epoch": 0.17337461300309598, "grad_norm": 0.31278192336063265, "learning_rate": 0.0009470814604434816, "loss": 2.7362, "step": 1512 }, { "epoch": 0.17348927875243664, "grad_norm": 0.30532758607176597, "learning_rate": 0.000946998286085834, "loss": 2.8198, "step": 1513 }, { "epoch": 0.17360394450177732, "grad_norm": 0.30851306996712585, "learning_rate": 0.0009469150500732932, "loss": 2.5382, "step": 1514 }, { "epoch": 0.17371861025111798, "grad_norm": 0.3259828180809456, "learning_rate": 0.0009468317524173402, "loss": 2.6779, "step": 1515 }, { "epoch": 0.17383327600045867, "grad_norm": 0.2964282898313913, "learning_rate": 0.0009467483931294644, "loss": 2.6367, "step": 1516 }, { "epoch": 0.17394794174979933, "grad_norm": 0.3015987617548308, "learning_rate": 0.0009466649722211635, "loss": 2.7786, "step": 1517 }, { "epoch": 0.17406260749914002, "grad_norm": 0.30464791390570706, "learning_rate": 0.000946581489703944, "loss": 2.6539, "step": 1518 }, { "epoch": 0.17417727324848067, "grad_norm": 0.2745374778803253, "learning_rate": 0.0009464979455893205, "loss": 2.536, "step": 1519 }, { "epoch": 0.17429193899782136, "grad_norm": 0.27495875744278353, "learning_rate": 0.0009464143398888166, "loss": 2.5714, "step": 1520 }, { "epoch": 0.17440660474716202, "grad_norm": 0.27522383973782755, "learning_rate": 0.0009463306726139638, "loss": 2.6366, "step": 1521 }, { "epoch": 0.1745212704965027, "grad_norm": 0.29495152826234694, "learning_rate": 0.0009462469437763026, "loss": 2.7182, "step": 1522 }, { "epoch": 0.17463593624584337, "grad_norm": 0.26541693929842214, "learning_rate": 0.0009461631533873818, "loss": 2.5448, "step": 1523 }, { "epoch": 0.17475060199518405, "grad_norm": 0.2973656239417534, "learning_rate": 0.0009460793014587585, "loss": 2.5127, "step": 1524 }, { "epoch": 0.1748652677445247, "grad_norm": 0.3030775767510761, "learning_rate": 0.0009459953880019987, "loss": 2.7687, "step": 1525 }, { "epoch": 0.17497993349386537, "grad_norm": 0.2637909379493865, "learning_rate": 0.0009459114130286766, "loss": 2.703, "step": 1526 }, { "epoch": 0.17509459924320606, "grad_norm": 0.2694524706811455, "learning_rate": 0.0009458273765503749, "loss": 2.6349, "step": 1527 }, { "epoch": 0.17520926499254671, "grad_norm": 0.31073057737754145, "learning_rate": 0.0009457432785786848, "loss": 2.542, "step": 1528 }, { "epoch": 0.1753239307418874, "grad_norm": 0.30077112974795583, "learning_rate": 0.0009456591191252061, "loss": 2.8178, "step": 1529 }, { "epoch": 0.17543859649122806, "grad_norm": 0.28534000104901913, "learning_rate": 0.0009455748982015468, "loss": 2.5936, "step": 1530 }, { "epoch": 0.17555326224056875, "grad_norm": 0.2689044008676204, "learning_rate": 0.0009454906158193239, "loss": 2.753, "step": 1531 }, { "epoch": 0.1756679279899094, "grad_norm": 0.29820364657109166, "learning_rate": 0.0009454062719901624, "loss": 2.6721, "step": 1532 }, { "epoch": 0.1757825937392501, "grad_norm": 0.28398320156414253, "learning_rate": 0.0009453218667256958, "loss": 2.5474, "step": 1533 }, { "epoch": 0.17589725948859075, "grad_norm": 0.366927796191532, "learning_rate": 0.0009452374000375664, "loss": 2.6449, "step": 1534 }, { "epoch": 0.17601192523793144, "grad_norm": 0.29014059901614264, "learning_rate": 0.0009451528719374245, "loss": 2.7485, "step": 1535 }, { "epoch": 0.1761265909872721, "grad_norm": 0.3007168410628329, "learning_rate": 0.0009450682824369294, "loss": 2.6584, "step": 1536 }, { "epoch": 0.17624125673661278, "grad_norm": 0.3009065673318265, "learning_rate": 0.0009449836315477485, "loss": 2.5932, "step": 1537 }, { "epoch": 0.17635592248595344, "grad_norm": 0.29526266492601716, "learning_rate": 0.0009448989192815578, "loss": 2.5454, "step": 1538 }, { "epoch": 0.17647058823529413, "grad_norm": 0.30602915845172635, "learning_rate": 0.0009448141456500416, "loss": 2.7817, "step": 1539 }, { "epoch": 0.1765852539846348, "grad_norm": 0.3020506961474114, "learning_rate": 0.0009447293106648931, "loss": 2.6561, "step": 1540 }, { "epoch": 0.17669991973397547, "grad_norm": 0.26838638679045507, "learning_rate": 0.0009446444143378134, "loss": 2.6111, "step": 1541 }, { "epoch": 0.17681458548331613, "grad_norm": 0.29076077851293425, "learning_rate": 0.0009445594566805126, "loss": 2.5808, "step": 1542 }, { "epoch": 0.1769292512326568, "grad_norm": 0.2717047439045014, "learning_rate": 0.0009444744377047088, "loss": 2.6596, "step": 1543 }, { "epoch": 0.17704391698199748, "grad_norm": 0.3024489025390416, "learning_rate": 0.0009443893574221286, "loss": 2.6598, "step": 1544 }, { "epoch": 0.17715858273133814, "grad_norm": 0.27790608523239063, "learning_rate": 0.0009443042158445074, "loss": 2.692, "step": 1545 }, { "epoch": 0.17727324848067882, "grad_norm": 0.2970646188717731, "learning_rate": 0.000944219012983589, "loss": 2.5375, "step": 1546 }, { "epoch": 0.17738791423001948, "grad_norm": 0.2929272448653852, "learning_rate": 0.0009441337488511252, "loss": 2.5052, "step": 1547 }, { "epoch": 0.17750257997936017, "grad_norm": 0.29092786670066356, "learning_rate": 0.0009440484234588766, "loss": 2.601, "step": 1548 }, { "epoch": 0.17761724572870083, "grad_norm": 0.3089062864779452, "learning_rate": 0.0009439630368186125, "loss": 2.6262, "step": 1549 }, { "epoch": 0.17773191147804152, "grad_norm": 0.2951561950761542, "learning_rate": 0.0009438775889421102, "loss": 2.6455, "step": 1550 }, { "epoch": 0.17784657722738217, "grad_norm": 0.2916727628444164, "learning_rate": 0.0009437920798411554, "loss": 2.6278, "step": 1551 }, { "epoch": 0.17796124297672286, "grad_norm": 0.29213314274446406, "learning_rate": 0.0009437065095275429, "loss": 2.6298, "step": 1552 }, { "epoch": 0.17807590872606352, "grad_norm": 0.2651489394467703, "learning_rate": 0.0009436208780130751, "loss": 2.6627, "step": 1553 }, { "epoch": 0.1781905744754042, "grad_norm": 0.31725705792573494, "learning_rate": 0.0009435351853095633, "loss": 2.7131, "step": 1554 }, { "epoch": 0.17830524022474487, "grad_norm": 0.3156123779527773, "learning_rate": 0.0009434494314288273, "loss": 2.7252, "step": 1555 }, { "epoch": 0.17841990597408555, "grad_norm": 0.2677063535949339, "learning_rate": 0.0009433636163826951, "loss": 2.5976, "step": 1556 }, { "epoch": 0.1785345717234262, "grad_norm": 0.3281015928728765, "learning_rate": 0.0009432777401830033, "loss": 2.6502, "step": 1557 }, { "epoch": 0.1786492374727669, "grad_norm": 0.2933539569048112, "learning_rate": 0.0009431918028415969, "loss": 2.4632, "step": 1558 }, { "epoch": 0.17876390322210756, "grad_norm": 0.2944806214615233, "learning_rate": 0.0009431058043703293, "loss": 2.6128, "step": 1559 }, { "epoch": 0.17887856897144822, "grad_norm": 0.3023002824676232, "learning_rate": 0.0009430197447810625, "loss": 2.7158, "step": 1560 }, { "epoch": 0.1789932347207889, "grad_norm": 0.3144447210743638, "learning_rate": 0.0009429336240856662, "loss": 2.719, "step": 1561 }, { "epoch": 0.17910790047012956, "grad_norm": 0.2793896979780344, "learning_rate": 0.00094284744229602, "loss": 2.5493, "step": 1562 }, { "epoch": 0.17922256621947025, "grad_norm": 0.28812998331110956, "learning_rate": 0.0009427611994240104, "loss": 2.6607, "step": 1563 }, { "epoch": 0.1793372319688109, "grad_norm": 0.2503551038701681, "learning_rate": 0.0009426748954815332, "loss": 2.6102, "step": 1564 }, { "epoch": 0.1794518977181516, "grad_norm": 0.2890967294557894, "learning_rate": 0.0009425885304804922, "loss": 2.7804, "step": 1565 }, { "epoch": 0.17956656346749225, "grad_norm": 0.2702078058215934, "learning_rate": 0.0009425021044328, "loss": 2.5802, "step": 1566 }, { "epoch": 0.17968122921683294, "grad_norm": 0.24761957442675828, "learning_rate": 0.0009424156173503772, "loss": 2.6283, "step": 1567 }, { "epoch": 0.1797958949661736, "grad_norm": 0.26839976163231943, "learning_rate": 0.0009423290692451534, "loss": 2.6337, "step": 1568 }, { "epoch": 0.17991056071551428, "grad_norm": 0.24897925792663997, "learning_rate": 0.000942242460129066, "loss": 2.6027, "step": 1569 }, { "epoch": 0.18002522646485494, "grad_norm": 0.2976443813214073, "learning_rate": 0.0009421557900140612, "loss": 2.705, "step": 1570 }, { "epoch": 0.18013989221419563, "grad_norm": 0.30623174491703814, "learning_rate": 0.0009420690589120932, "loss": 2.6631, "step": 1571 }, { "epoch": 0.1802545579635363, "grad_norm": 0.29580499544813665, "learning_rate": 0.0009419822668351255, "loss": 2.7546, "step": 1572 }, { "epoch": 0.18036922371287697, "grad_norm": 0.2910336264497159, "learning_rate": 0.0009418954137951288, "loss": 2.6307, "step": 1573 }, { "epoch": 0.18048388946221763, "grad_norm": 0.3325015577864962, "learning_rate": 0.000941808499804083, "loss": 2.6504, "step": 1574 }, { "epoch": 0.18059855521155832, "grad_norm": 0.32410874444441473, "learning_rate": 0.0009417215248739764, "loss": 2.5533, "step": 1575 }, { "epoch": 0.18071322096089898, "grad_norm": 0.2823388513204848, "learning_rate": 0.0009416344890168054, "loss": 2.6224, "step": 1576 }, { "epoch": 0.18082788671023964, "grad_norm": 0.3042401970435541, "learning_rate": 0.000941547392244575, "loss": 2.6287, "step": 1577 }, { "epoch": 0.18094255245958032, "grad_norm": 0.28583859338866113, "learning_rate": 0.0009414602345692984, "loss": 2.5986, "step": 1578 }, { "epoch": 0.18105721820892098, "grad_norm": 0.3270861775025795, "learning_rate": 0.0009413730160029974, "loss": 2.8622, "step": 1579 }, { "epoch": 0.18117188395826167, "grad_norm": 0.2791183194210447, "learning_rate": 0.0009412857365577023, "loss": 2.7038, "step": 1580 }, { "epoch": 0.18128654970760233, "grad_norm": 0.26728468189240806, "learning_rate": 0.0009411983962454515, "loss": 2.6402, "step": 1581 }, { "epoch": 0.18140121545694302, "grad_norm": 0.2782027059948188, "learning_rate": 0.0009411109950782919, "loss": 2.6564, "step": 1582 }, { "epoch": 0.18151588120628367, "grad_norm": 0.27536306521063675, "learning_rate": 0.0009410235330682788, "loss": 2.648, "step": 1583 }, { "epoch": 0.18163054695562436, "grad_norm": 0.30140752326698506, "learning_rate": 0.0009409360102274761, "loss": 2.7696, "step": 1584 }, { "epoch": 0.18174521270496502, "grad_norm": 0.28476965576567537, "learning_rate": 0.0009408484265679558, "loss": 2.641, "step": 1585 }, { "epoch": 0.1818598784543057, "grad_norm": 0.2912154401832019, "learning_rate": 0.0009407607821017983, "loss": 2.5654, "step": 1586 }, { "epoch": 0.18197454420364637, "grad_norm": 0.27064674786681925, "learning_rate": 0.0009406730768410927, "loss": 2.6001, "step": 1587 }, { "epoch": 0.18208920995298705, "grad_norm": 0.3079389881158881, "learning_rate": 0.0009405853107979361, "loss": 2.5993, "step": 1588 }, { "epoch": 0.1822038757023277, "grad_norm": 0.2744699039593194, "learning_rate": 0.0009404974839844341, "loss": 2.7245, "step": 1589 }, { "epoch": 0.1823185414516684, "grad_norm": 0.28595867588557095, "learning_rate": 0.0009404095964127008, "loss": 2.609, "step": 1590 }, { "epoch": 0.18243320720100906, "grad_norm": 0.3234937555525272, "learning_rate": 0.0009403216480948589, "loss": 2.6891, "step": 1591 }, { "epoch": 0.18254787295034974, "grad_norm": 0.3045545903055496, "learning_rate": 0.0009402336390430388, "loss": 2.5994, "step": 1592 }, { "epoch": 0.1826625386996904, "grad_norm": 0.2962280238786574, "learning_rate": 0.0009401455692693798, "loss": 2.6063, "step": 1593 }, { "epoch": 0.18277720444903106, "grad_norm": 0.3396992559743812, "learning_rate": 0.0009400574387860294, "loss": 2.6353, "step": 1594 }, { "epoch": 0.18289187019837175, "grad_norm": 0.30783701879618003, "learning_rate": 0.0009399692476051436, "loss": 2.5981, "step": 1595 }, { "epoch": 0.1830065359477124, "grad_norm": 0.3159177302862465, "learning_rate": 0.0009398809957388868, "loss": 2.6042, "step": 1596 }, { "epoch": 0.1831212016970531, "grad_norm": 0.27765425844391156, "learning_rate": 0.0009397926831994314, "loss": 2.5528, "step": 1597 }, { "epoch": 0.18323586744639375, "grad_norm": 0.268450928572531, "learning_rate": 0.0009397043099989587, "loss": 2.6004, "step": 1598 }, { "epoch": 0.18335053319573444, "grad_norm": 0.2897147883884085, "learning_rate": 0.0009396158761496577, "loss": 2.7143, "step": 1599 }, { "epoch": 0.1834651989450751, "grad_norm": 0.2933284819181002, "learning_rate": 0.0009395273816637267, "loss": 2.7728, "step": 1600 }, { "epoch": 0.18357986469441578, "grad_norm": 0.30544029460470745, "learning_rate": 0.0009394388265533713, "loss": 2.7528, "step": 1601 }, { "epoch": 0.18369453044375644, "grad_norm": 0.29069796962646677, "learning_rate": 0.0009393502108308064, "loss": 2.7128, "step": 1602 }, { "epoch": 0.18380919619309713, "grad_norm": 0.31037129829178545, "learning_rate": 0.0009392615345082547, "loss": 2.8378, "step": 1603 }, { "epoch": 0.1839238619424378, "grad_norm": 0.27303452879426143, "learning_rate": 0.0009391727975979474, "loss": 2.7026, "step": 1604 }, { "epoch": 0.18403852769177848, "grad_norm": 0.30009853898275407, "learning_rate": 0.0009390840001121239, "loss": 2.6208, "step": 1605 }, { "epoch": 0.18415319344111913, "grad_norm": 0.29923071803536866, "learning_rate": 0.0009389951420630325, "loss": 2.6483, "step": 1606 }, { "epoch": 0.18426785919045982, "grad_norm": 0.2740264424118773, "learning_rate": 0.0009389062234629292, "loss": 2.6782, "step": 1607 }, { "epoch": 0.18438252493980048, "grad_norm": 0.287194991648891, "learning_rate": 0.0009388172443240788, "loss": 2.5848, "step": 1608 }, { "epoch": 0.18449719068914117, "grad_norm": 0.30150078162675387, "learning_rate": 0.0009387282046587539, "loss": 2.6558, "step": 1609 }, { "epoch": 0.18461185643848183, "grad_norm": 0.2951588308871814, "learning_rate": 0.0009386391044792363, "loss": 2.5973, "step": 1610 }, { "epoch": 0.18472652218782248, "grad_norm": 0.23524440028545218, "learning_rate": 0.0009385499437978153, "loss": 2.5132, "step": 1611 }, { "epoch": 0.18484118793716317, "grad_norm": 0.2739704447838688, "learning_rate": 0.0009384607226267891, "loss": 2.689, "step": 1612 }, { "epoch": 0.18495585368650383, "grad_norm": 0.2793427414219293, "learning_rate": 0.0009383714409784643, "loss": 2.5308, "step": 1613 }, { "epoch": 0.18507051943584452, "grad_norm": 0.30337601161979083, "learning_rate": 0.000938282098865155, "loss": 2.7024, "step": 1614 }, { "epoch": 0.18518518518518517, "grad_norm": 0.29629368290679026, "learning_rate": 0.0009381926962991847, "loss": 2.6783, "step": 1615 }, { "epoch": 0.18529985093452586, "grad_norm": 0.2801128803762986, "learning_rate": 0.0009381032332928847, "loss": 2.5971, "step": 1616 }, { "epoch": 0.18541451668386652, "grad_norm": 0.3356344561293281, "learning_rate": 0.0009380137098585946, "loss": 2.6614, "step": 1617 }, { "epoch": 0.1855291824332072, "grad_norm": 0.29673425679719284, "learning_rate": 0.0009379241260086626, "loss": 2.5374, "step": 1618 }, { "epoch": 0.18564384818254787, "grad_norm": 0.2943190784069161, "learning_rate": 0.0009378344817554449, "loss": 2.8114, "step": 1619 }, { "epoch": 0.18575851393188855, "grad_norm": 0.29149759905709227, "learning_rate": 0.0009377447771113065, "loss": 2.6624, "step": 1620 }, { "epoch": 0.1858731796812292, "grad_norm": 0.29876199012293314, "learning_rate": 0.0009376550120886203, "loss": 2.5378, "step": 1621 }, { "epoch": 0.1859878454305699, "grad_norm": 0.3116963337528239, "learning_rate": 0.0009375651866997674, "loss": 2.6813, "step": 1622 }, { "epoch": 0.18610251117991056, "grad_norm": 0.2786910569530075, "learning_rate": 0.0009374753009571379, "loss": 2.6264, "step": 1623 }, { "epoch": 0.18621717692925124, "grad_norm": 0.26753595420092785, "learning_rate": 0.0009373853548731297, "loss": 2.5009, "step": 1624 }, { "epoch": 0.1863318426785919, "grad_norm": 0.27015646330514587, "learning_rate": 0.000937295348460149, "loss": 2.6444, "step": 1625 }, { "epoch": 0.1864465084279326, "grad_norm": 0.2882331691823512, "learning_rate": 0.0009372052817306106, "loss": 2.6701, "step": 1626 }, { "epoch": 0.18656117417727325, "grad_norm": 0.2868421747540592, "learning_rate": 0.0009371151546969376, "loss": 2.5549, "step": 1627 }, { "epoch": 0.1866758399266139, "grad_norm": 0.29906532482779186, "learning_rate": 0.0009370249673715611, "loss": 2.4808, "step": 1628 }, { "epoch": 0.1867905056759546, "grad_norm": 0.3101826433512816, "learning_rate": 0.0009369347197669207, "loss": 2.7789, "step": 1629 }, { "epoch": 0.18690517142529525, "grad_norm": 0.28527036467296285, "learning_rate": 0.0009368444118954646, "loss": 2.5803, "step": 1630 }, { "epoch": 0.18701983717463594, "grad_norm": 0.29620391976219546, "learning_rate": 0.0009367540437696489, "loss": 2.7285, "step": 1631 }, { "epoch": 0.1871345029239766, "grad_norm": 0.2759342798402815, "learning_rate": 0.0009366636154019381, "loss": 2.5969, "step": 1632 }, { "epoch": 0.18724916867331728, "grad_norm": 0.27664114923108574, "learning_rate": 0.0009365731268048052, "loss": 2.6709, "step": 1633 }, { "epoch": 0.18736383442265794, "grad_norm": 0.2695354081349453, "learning_rate": 0.0009364825779907311, "loss": 2.6622, "step": 1634 }, { "epoch": 0.18747850017199863, "grad_norm": 0.2865983259279852, "learning_rate": 0.0009363919689722056, "loss": 2.588, "step": 1635 }, { "epoch": 0.1875931659213393, "grad_norm": 0.27651615557639503, "learning_rate": 0.0009363012997617264, "loss": 2.6959, "step": 1636 }, { "epoch": 0.18770783167067998, "grad_norm": 0.31206797324515645, "learning_rate": 0.0009362105703717994, "loss": 2.626, "step": 1637 }, { "epoch": 0.18782249742002063, "grad_norm": 0.2601260397485705, "learning_rate": 0.0009361197808149393, "loss": 2.583, "step": 1638 }, { "epoch": 0.18793716316936132, "grad_norm": 0.2711339295227757, "learning_rate": 0.0009360289311036688, "loss": 2.6201, "step": 1639 }, { "epoch": 0.18805182891870198, "grad_norm": 0.2888400178614798, "learning_rate": 0.0009359380212505184, "loss": 2.6415, "step": 1640 }, { "epoch": 0.18816649466804267, "grad_norm": 0.30071446604341157, "learning_rate": 0.0009358470512680278, "loss": 2.7359, "step": 1641 }, { "epoch": 0.18828116041738333, "grad_norm": 0.27292667279703003, "learning_rate": 0.0009357560211687445, "loss": 2.6477, "step": 1642 }, { "epoch": 0.188395826166724, "grad_norm": 0.2805619716769284, "learning_rate": 0.0009356649309652243, "loss": 2.5284, "step": 1643 }, { "epoch": 0.18851049191606467, "grad_norm": 0.2690250281567539, "learning_rate": 0.0009355737806700315, "loss": 2.693, "step": 1644 }, { "epoch": 0.18862515766540533, "grad_norm": 0.31442287594243556, "learning_rate": 0.0009354825702957383, "loss": 2.745, "step": 1645 }, { "epoch": 0.18873982341474602, "grad_norm": 0.26390035694890357, "learning_rate": 0.0009353912998549259, "loss": 2.4715, "step": 1646 }, { "epoch": 0.18885448916408668, "grad_norm": 0.2904274656010759, "learning_rate": 0.0009352999693601827, "loss": 2.7475, "step": 1647 }, { "epoch": 0.18896915491342736, "grad_norm": 0.2658738651093013, "learning_rate": 0.0009352085788241064, "loss": 2.5381, "step": 1648 }, { "epoch": 0.18908382066276802, "grad_norm": 0.28600584822310604, "learning_rate": 0.0009351171282593026, "loss": 2.4896, "step": 1649 }, { "epoch": 0.1891984864121087, "grad_norm": 0.2949739403293361, "learning_rate": 0.0009350256176783847, "loss": 2.7043, "step": 1650 }, { "epoch": 0.18931315216144937, "grad_norm": 0.2601698808073328, "learning_rate": 0.0009349340470939753, "loss": 2.5595, "step": 1651 }, { "epoch": 0.18942781791079005, "grad_norm": 0.2809716579863838, "learning_rate": 0.0009348424165187049, "loss": 2.8073, "step": 1652 }, { "epoch": 0.1895424836601307, "grad_norm": 0.3070053668395669, "learning_rate": 0.0009347507259652119, "loss": 2.6033, "step": 1653 }, { "epoch": 0.1896571494094714, "grad_norm": 0.3181813220673822, "learning_rate": 0.0009346589754461433, "loss": 2.6295, "step": 1654 }, { "epoch": 0.18977181515881206, "grad_norm": 0.3051686791808755, "learning_rate": 0.0009345671649741545, "loss": 2.5578, "step": 1655 }, { "epoch": 0.18988648090815274, "grad_norm": 0.294904401079894, "learning_rate": 0.0009344752945619089, "loss": 2.6184, "step": 1656 }, { "epoch": 0.1900011466574934, "grad_norm": 0.2896281238108003, "learning_rate": 0.0009343833642220781, "loss": 2.6809, "step": 1657 }, { "epoch": 0.1901158124068341, "grad_norm": 0.2853277831028136, "learning_rate": 0.0009342913739673424, "loss": 2.5593, "step": 1658 }, { "epoch": 0.19023047815617475, "grad_norm": 0.28778394604390056, "learning_rate": 0.00093419932381039, "loss": 2.6681, "step": 1659 }, { "epoch": 0.19034514390551543, "grad_norm": 0.2814948936986095, "learning_rate": 0.0009341072137639175, "loss": 2.5445, "step": 1660 }, { "epoch": 0.1904598096548561, "grad_norm": 0.28794003786753697, "learning_rate": 0.0009340150438406296, "loss": 2.5699, "step": 1661 }, { "epoch": 0.19057447540419675, "grad_norm": 0.264828278606104, "learning_rate": 0.0009339228140532396, "loss": 2.6842, "step": 1662 }, { "epoch": 0.19068914115353744, "grad_norm": 0.26794141908158825, "learning_rate": 0.0009338305244144687, "loss": 2.566, "step": 1663 }, { "epoch": 0.1908038069028781, "grad_norm": 0.2784198207148264, "learning_rate": 0.0009337381749370463, "loss": 2.6559, "step": 1664 }, { "epoch": 0.19091847265221878, "grad_norm": 0.2700447329563735, "learning_rate": 0.0009336457656337108, "loss": 2.7994, "step": 1665 }, { "epoch": 0.19103313840155944, "grad_norm": 0.24834314276895197, "learning_rate": 0.0009335532965172079, "loss": 2.6122, "step": 1666 }, { "epoch": 0.19114780415090013, "grad_norm": 0.2831720400238699, "learning_rate": 0.0009334607676002919, "loss": 2.6909, "step": 1667 }, { "epoch": 0.1912624699002408, "grad_norm": 0.28914973880105727, "learning_rate": 0.0009333681788957256, "loss": 2.6878, "step": 1668 }, { "epoch": 0.19137713564958148, "grad_norm": 0.29304060720372416, "learning_rate": 0.0009332755304162798, "loss": 2.5646, "step": 1669 }, { "epoch": 0.19149180139892213, "grad_norm": 0.25314858427775216, "learning_rate": 0.0009331828221747335, "loss": 2.5606, "step": 1670 }, { "epoch": 0.19160646714826282, "grad_norm": 0.325792760080864, "learning_rate": 0.0009330900541838741, "loss": 2.7139, "step": 1671 }, { "epoch": 0.19172113289760348, "grad_norm": 0.28787676976779847, "learning_rate": 0.0009329972264564972, "loss": 2.6344, "step": 1672 }, { "epoch": 0.19183579864694417, "grad_norm": 0.27576276754689916, "learning_rate": 0.0009329043390054066, "loss": 2.5655, "step": 1673 }, { "epoch": 0.19195046439628483, "grad_norm": 0.294794584295284, "learning_rate": 0.0009328113918434142, "loss": 2.6071, "step": 1674 }, { "epoch": 0.1920651301456255, "grad_norm": 0.3004997233442451, "learning_rate": 0.0009327183849833406, "loss": 2.6939, "step": 1675 }, { "epoch": 0.19217979589496617, "grad_norm": 0.2751277123682815, "learning_rate": 0.0009326253184380141, "loss": 2.6713, "step": 1676 }, { "epoch": 0.19229446164430686, "grad_norm": 0.2867734252046316, "learning_rate": 0.0009325321922202716, "loss": 2.6944, "step": 1677 }, { "epoch": 0.19240912739364752, "grad_norm": 0.27641770654652825, "learning_rate": 0.0009324390063429578, "loss": 2.5355, "step": 1678 }, { "epoch": 0.1925237931429882, "grad_norm": 0.26458423972910705, "learning_rate": 0.0009323457608189263, "loss": 2.733, "step": 1679 }, { "epoch": 0.19263845889232886, "grad_norm": 0.2732839981523438, "learning_rate": 0.0009322524556610384, "loss": 2.5572, "step": 1680 }, { "epoch": 0.19275312464166952, "grad_norm": 0.29258284583202737, "learning_rate": 0.0009321590908821635, "loss": 2.6516, "step": 1681 }, { "epoch": 0.1928677903910102, "grad_norm": 0.26813493127264015, "learning_rate": 0.0009320656664951797, "loss": 2.5803, "step": 1682 }, { "epoch": 0.19298245614035087, "grad_norm": 0.30478739766811735, "learning_rate": 0.0009319721825129734, "loss": 2.5953, "step": 1683 }, { "epoch": 0.19309712188969155, "grad_norm": 0.27023023879764657, "learning_rate": 0.0009318786389484383, "loss": 2.5282, "step": 1684 }, { "epoch": 0.1932117876390322, "grad_norm": 0.2763402459070808, "learning_rate": 0.0009317850358144778, "loss": 2.4999, "step": 1685 }, { "epoch": 0.1933264533883729, "grad_norm": 0.3157947959751468, "learning_rate": 0.0009316913731240018, "loss": 2.4914, "step": 1686 }, { "epoch": 0.19344111913771356, "grad_norm": 0.3124817905240308, "learning_rate": 0.0009315976508899298, "loss": 2.6382, "step": 1687 }, { "epoch": 0.19355578488705424, "grad_norm": 0.30712230488803877, "learning_rate": 0.0009315038691251887, "loss": 2.5854, "step": 1688 }, { "epoch": 0.1936704506363949, "grad_norm": 0.2923878178154196, "learning_rate": 0.0009314100278427143, "loss": 2.5773, "step": 1689 }, { "epoch": 0.1937851163857356, "grad_norm": 0.31218078215668, "learning_rate": 0.0009313161270554498, "loss": 2.7746, "step": 1690 }, { "epoch": 0.19389978213507625, "grad_norm": 0.30630615245995235, "learning_rate": 0.0009312221667763472, "loss": 2.6889, "step": 1691 }, { "epoch": 0.19401444788441694, "grad_norm": 0.2886778151996603, "learning_rate": 0.0009311281470183667, "loss": 2.6624, "step": 1692 }, { "epoch": 0.1941291136337576, "grad_norm": 0.26233581421762375, "learning_rate": 0.0009310340677944762, "loss": 2.5113, "step": 1693 }, { "epoch": 0.19424377938309828, "grad_norm": 0.2478620895071049, "learning_rate": 0.0009309399291176524, "loss": 2.5476, "step": 1694 }, { "epoch": 0.19435844513243894, "grad_norm": 0.26775185563544085, "learning_rate": 0.0009308457310008798, "loss": 2.5786, "step": 1695 }, { "epoch": 0.19447311088177963, "grad_norm": 0.26014739028287986, "learning_rate": 0.0009307514734571514, "loss": 2.5151, "step": 1696 }, { "epoch": 0.19458777663112028, "grad_norm": 0.2526489044714267, "learning_rate": 0.0009306571564994679, "loss": 2.5586, "step": 1697 }, { "epoch": 0.19470244238046094, "grad_norm": 0.26145180880160135, "learning_rate": 0.000930562780140839, "loss": 2.7168, "step": 1698 }, { "epoch": 0.19481710812980163, "grad_norm": 0.27692814346591, "learning_rate": 0.0009304683443942816, "loss": 2.5575, "step": 1699 }, { "epoch": 0.1949317738791423, "grad_norm": 0.29041701130036884, "learning_rate": 0.0009303738492728216, "loss": 2.7003, "step": 1700 }, { "epoch": 0.19504643962848298, "grad_norm": 0.2992879801572418, "learning_rate": 0.000930279294789493, "loss": 2.6822, "step": 1701 }, { "epoch": 0.19516110537782363, "grad_norm": 0.2862615538992636, "learning_rate": 0.0009301846809573373, "loss": 2.6188, "step": 1702 }, { "epoch": 0.19527577112716432, "grad_norm": 0.2872393578188177, "learning_rate": 0.000930090007789405, "loss": 2.5707, "step": 1703 }, { "epoch": 0.19539043687650498, "grad_norm": 0.2809325292410022, "learning_rate": 0.0009299952752987544, "loss": 2.6404, "step": 1704 }, { "epoch": 0.19550510262584567, "grad_norm": 0.33948355793714385, "learning_rate": 0.0009299004834984519, "loss": 2.5891, "step": 1705 }, { "epoch": 0.19561976837518633, "grad_norm": 0.2942578740634884, "learning_rate": 0.0009298056324015724, "loss": 2.6191, "step": 1706 }, { "epoch": 0.195734434124527, "grad_norm": 0.2929279931546125, "learning_rate": 0.0009297107220211988, "loss": 2.6574, "step": 1707 }, { "epoch": 0.19584909987386767, "grad_norm": 0.2685094388898099, "learning_rate": 0.0009296157523704223, "loss": 2.5703, "step": 1708 }, { "epoch": 0.19596376562320836, "grad_norm": 0.2911102519333245, "learning_rate": 0.0009295207234623418, "loss": 2.6691, "step": 1709 }, { "epoch": 0.19607843137254902, "grad_norm": 0.28928000922977426, "learning_rate": 0.000929425635310065, "loss": 2.5787, "step": 1710 }, { "epoch": 0.1961930971218897, "grad_norm": 0.2919157130864013, "learning_rate": 0.0009293304879267073, "loss": 2.8416, "step": 1711 }, { "epoch": 0.19630776287123036, "grad_norm": 0.2748486990497807, "learning_rate": 0.0009292352813253926, "loss": 2.6306, "step": 1712 }, { "epoch": 0.19642242862057105, "grad_norm": 0.27467221064391567, "learning_rate": 0.0009291400155192528, "loss": 2.5247, "step": 1713 }, { "epoch": 0.1965370943699117, "grad_norm": 0.28655267312567906, "learning_rate": 0.0009290446905214281, "loss": 2.5682, "step": 1714 }, { "epoch": 0.19665176011925237, "grad_norm": 0.27351615052854433, "learning_rate": 0.0009289493063450666, "loss": 2.6313, "step": 1715 }, { "epoch": 0.19676642586859305, "grad_norm": 0.29991900735188, "learning_rate": 0.0009288538630033247, "loss": 2.732, "step": 1716 }, { "epoch": 0.1968810916179337, "grad_norm": 0.26410057446094976, "learning_rate": 0.0009287583605093674, "loss": 2.6481, "step": 1717 }, { "epoch": 0.1969957573672744, "grad_norm": 0.2553533369999085, "learning_rate": 0.000928662798876367, "loss": 2.7471, "step": 1718 }, { "epoch": 0.19711042311661506, "grad_norm": 0.24060694671389823, "learning_rate": 0.0009285671781175045, "loss": 2.5678, "step": 1719 }, { "epoch": 0.19722508886595574, "grad_norm": 0.2580506621163582, "learning_rate": 0.000928471498245969, "loss": 2.6716, "step": 1720 }, { "epoch": 0.1973397546152964, "grad_norm": 0.2616284312681021, "learning_rate": 0.0009283757592749577, "loss": 2.5215, "step": 1721 }, { "epoch": 0.1974544203646371, "grad_norm": 0.25203577974257835, "learning_rate": 0.0009282799612176762, "loss": 2.7473, "step": 1722 }, { "epoch": 0.19756908611397775, "grad_norm": 0.296751865731299, "learning_rate": 0.0009281841040873376, "loss": 2.7065, "step": 1723 }, { "epoch": 0.19768375186331844, "grad_norm": 0.2671438646220266, "learning_rate": 0.0009280881878971637, "loss": 2.7288, "step": 1724 }, { "epoch": 0.1977984176126591, "grad_norm": 0.27546778661458454, "learning_rate": 0.0009279922126603846, "loss": 2.7598, "step": 1725 }, { "epoch": 0.19791308336199978, "grad_norm": 0.27969097669459164, "learning_rate": 0.000927896178390238, "loss": 2.5758, "step": 1726 }, { "epoch": 0.19802774911134044, "grad_norm": 0.2665419165349484, "learning_rate": 0.0009278000850999699, "loss": 2.628, "step": 1727 }, { "epoch": 0.19814241486068113, "grad_norm": 0.32865196209771536, "learning_rate": 0.0009277039328028347, "loss": 2.6072, "step": 1728 }, { "epoch": 0.19825708061002179, "grad_norm": 0.27609935996698404, "learning_rate": 0.0009276077215120949, "loss": 2.6378, "step": 1729 }, { "epoch": 0.19837174635936247, "grad_norm": 0.297424795884251, "learning_rate": 0.0009275114512410208, "loss": 2.5096, "step": 1730 }, { "epoch": 0.19848641210870313, "grad_norm": 0.2795760894911456, "learning_rate": 0.000927415122002891, "loss": 2.7274, "step": 1731 }, { "epoch": 0.1986010778580438, "grad_norm": 0.26309631574320286, "learning_rate": 0.0009273187338109925, "loss": 2.5948, "step": 1732 }, { "epoch": 0.19871574360738448, "grad_norm": 0.2704415797972162, "learning_rate": 0.0009272222866786201, "loss": 2.6391, "step": 1733 }, { "epoch": 0.19883040935672514, "grad_norm": 0.2824461482521748, "learning_rate": 0.0009271257806190769, "loss": 2.6665, "step": 1734 }, { "epoch": 0.19894507510606582, "grad_norm": 0.3378929695763108, "learning_rate": 0.0009270292156456738, "loss": 2.6966, "step": 1735 }, { "epoch": 0.19905974085540648, "grad_norm": 0.2968579268909668, "learning_rate": 0.0009269325917717306, "loss": 2.6312, "step": 1736 }, { "epoch": 0.19917440660474717, "grad_norm": 0.2604705864710766, "learning_rate": 0.0009268359090105743, "loss": 2.583, "step": 1737 }, { "epoch": 0.19928907235408783, "grad_norm": 0.27078611162929744, "learning_rate": 0.0009267391673755405, "loss": 2.6624, "step": 1738 }, { "epoch": 0.1994037381034285, "grad_norm": 0.30106035857019103, "learning_rate": 0.0009266423668799731, "loss": 2.6475, "step": 1739 }, { "epoch": 0.19951840385276917, "grad_norm": 0.2803063222175058, "learning_rate": 0.0009265455075372237, "loss": 2.6104, "step": 1740 }, { "epoch": 0.19963306960210986, "grad_norm": 0.2606181624767818, "learning_rate": 0.0009264485893606523, "loss": 2.687, "step": 1741 }, { "epoch": 0.19974773535145052, "grad_norm": 0.2709598332186418, "learning_rate": 0.0009263516123636267, "loss": 2.5639, "step": 1742 }, { "epoch": 0.1998624011007912, "grad_norm": 0.28347200949823614, "learning_rate": 0.0009262545765595232, "loss": 2.5372, "step": 1743 }, { "epoch": 0.19997706685013186, "grad_norm": 0.27881331102566836, "learning_rate": 0.000926157481961726, "loss": 2.6542, "step": 1744 }, { "epoch": 0.20009173259947255, "grad_norm": 0.27845622741764925, "learning_rate": 0.0009260603285836276, "loss": 2.7555, "step": 1745 }, { "epoch": 0.2002063983488132, "grad_norm": 0.26847961598207754, "learning_rate": 0.0009259631164386282, "loss": 2.4554, "step": 1746 }, { "epoch": 0.2003210640981539, "grad_norm": 0.31853256499486454, "learning_rate": 0.0009258658455401365, "loss": 2.6157, "step": 1747 }, { "epoch": 0.20043572984749455, "grad_norm": 0.29895738916929404, "learning_rate": 0.0009257685159015692, "loss": 2.5628, "step": 1748 }, { "epoch": 0.2005503955968352, "grad_norm": 0.27092781192713605, "learning_rate": 0.0009256711275363509, "loss": 2.787, "step": 1749 }, { "epoch": 0.2006650613461759, "grad_norm": 0.32893431152903013, "learning_rate": 0.0009255736804579147, "loss": 2.5425, "step": 1750 }, { "epoch": 0.20077972709551656, "grad_norm": 0.29513628112715307, "learning_rate": 0.0009254761746797013, "loss": 2.504, "step": 1751 }, { "epoch": 0.20089439284485724, "grad_norm": 0.2988411154823401, "learning_rate": 0.0009253786102151602, "loss": 2.615, "step": 1752 }, { "epoch": 0.2010090585941979, "grad_norm": 0.273621199490888, "learning_rate": 0.0009252809870777481, "loss": 2.7242, "step": 1753 }, { "epoch": 0.2011237243435386, "grad_norm": 0.26078744569512974, "learning_rate": 0.0009251833052809304, "loss": 2.6302, "step": 1754 }, { "epoch": 0.20123839009287925, "grad_norm": 0.254503886359509, "learning_rate": 0.0009250855648381805, "loss": 2.4573, "step": 1755 }, { "epoch": 0.20135305584221994, "grad_norm": 0.26325298479855763, "learning_rate": 0.0009249877657629799, "loss": 2.6683, "step": 1756 }, { "epoch": 0.2014677215915606, "grad_norm": 0.27149883444439366, "learning_rate": 0.0009248899080688178, "loss": 2.625, "step": 1757 }, { "epoch": 0.20158238734090128, "grad_norm": 0.27157652111558644, "learning_rate": 0.0009247919917691923, "loss": 2.5554, "step": 1758 }, { "epoch": 0.20169705309024194, "grad_norm": 0.2823642430565864, "learning_rate": 0.0009246940168776086, "loss": 2.6599, "step": 1759 }, { "epoch": 0.20181171883958263, "grad_norm": 0.27323956886186274, "learning_rate": 0.0009245959834075807, "loss": 2.5351, "step": 1760 }, { "epoch": 0.20192638458892329, "grad_norm": 0.2905324787805063, "learning_rate": 0.0009244978913726304, "loss": 2.7296, "step": 1761 }, { "epoch": 0.20204105033826397, "grad_norm": 0.31225837240536775, "learning_rate": 0.0009243997407862878, "loss": 2.6828, "step": 1762 }, { "epoch": 0.20215571608760463, "grad_norm": 0.2764447319487757, "learning_rate": 0.0009243015316620906, "loss": 2.5578, "step": 1763 }, { "epoch": 0.20227038183694532, "grad_norm": 0.2841933683801059, "learning_rate": 0.0009242032640135852, "loss": 2.6, "step": 1764 }, { "epoch": 0.20238504758628598, "grad_norm": 0.29179152760505483, "learning_rate": 0.0009241049378543254, "loss": 2.6247, "step": 1765 }, { "epoch": 0.20249971333562664, "grad_norm": 0.2972631674408136, "learning_rate": 0.0009240065531978736, "loss": 2.5923, "step": 1766 }, { "epoch": 0.20261437908496732, "grad_norm": 0.286572269169564, "learning_rate": 0.0009239081100578002, "loss": 2.5166, "step": 1767 }, { "epoch": 0.20272904483430798, "grad_norm": 0.26487465032329527, "learning_rate": 0.0009238096084476832, "loss": 2.59, "step": 1768 }, { "epoch": 0.20284371058364867, "grad_norm": 0.27462479085638664, "learning_rate": 0.0009237110483811096, "loss": 2.5336, "step": 1769 }, { "epoch": 0.20295837633298933, "grad_norm": 0.2859307727208129, "learning_rate": 0.0009236124298716734, "loss": 2.8086, "step": 1770 }, { "epoch": 0.20307304208233, "grad_norm": 0.2634898217111837, "learning_rate": 0.0009235137529329772, "loss": 2.6404, "step": 1771 }, { "epoch": 0.20318770783167067, "grad_norm": 0.31524014390003985, "learning_rate": 0.0009234150175786318, "loss": 2.5594, "step": 1772 }, { "epoch": 0.20330237358101136, "grad_norm": 0.3037261914238762, "learning_rate": 0.0009233162238222556, "loss": 2.5686, "step": 1773 }, { "epoch": 0.20341703933035202, "grad_norm": 0.30005378358971446, "learning_rate": 0.0009232173716774757, "loss": 2.6601, "step": 1774 }, { "epoch": 0.2035317050796927, "grad_norm": 0.30318877481006773, "learning_rate": 0.0009231184611579265, "loss": 2.6332, "step": 1775 }, { "epoch": 0.20364637082903336, "grad_norm": 0.3015831389339266, "learning_rate": 0.000923019492277251, "loss": 2.6967, "step": 1776 }, { "epoch": 0.20376103657837405, "grad_norm": 0.31032689094531113, "learning_rate": 0.0009229204650491001, "loss": 2.6506, "step": 1777 }, { "epoch": 0.2038757023277147, "grad_norm": 0.31984075907495463, "learning_rate": 0.0009228213794871325, "loss": 2.6961, "step": 1778 }, { "epoch": 0.2039903680770554, "grad_norm": 0.2995015241326771, "learning_rate": 0.0009227222356050154, "loss": 2.7223, "step": 1779 }, { "epoch": 0.20410503382639605, "grad_norm": 0.254120920521997, "learning_rate": 0.0009226230334164236, "loss": 2.5075, "step": 1780 }, { "epoch": 0.20421969957573674, "grad_norm": 0.2523150697559599, "learning_rate": 0.0009225237729350403, "loss": 2.627, "step": 1781 }, { "epoch": 0.2043343653250774, "grad_norm": 0.2924672952770439, "learning_rate": 0.0009224244541745566, "loss": 2.5827, "step": 1782 }, { "epoch": 0.20444903107441806, "grad_norm": 0.2964451721053978, "learning_rate": 0.0009223250771486717, "loss": 2.7902, "step": 1783 }, { "epoch": 0.20456369682375874, "grad_norm": 0.27512054957429405, "learning_rate": 0.0009222256418710923, "loss": 2.4599, "step": 1784 }, { "epoch": 0.2046783625730994, "grad_norm": 0.2577890929804786, "learning_rate": 0.0009221261483555343, "loss": 2.5715, "step": 1785 }, { "epoch": 0.2047930283224401, "grad_norm": 0.29742219880611753, "learning_rate": 0.0009220265966157205, "loss": 2.6554, "step": 1786 }, { "epoch": 0.20490769407178075, "grad_norm": 0.29582982979580524, "learning_rate": 0.0009219269866653823, "loss": 2.6075, "step": 1787 }, { "epoch": 0.20502235982112144, "grad_norm": 0.2800466961496367, "learning_rate": 0.0009218273185182588, "loss": 2.5613, "step": 1788 }, { "epoch": 0.2051370255704621, "grad_norm": 0.2720602791658052, "learning_rate": 0.0009217275921880976, "loss": 2.7009, "step": 1789 }, { "epoch": 0.20525169131980278, "grad_norm": 0.26467284612710645, "learning_rate": 0.000921627807688654, "loss": 2.6417, "step": 1790 }, { "epoch": 0.20536635706914344, "grad_norm": 0.28029396203374163, "learning_rate": 0.0009215279650336911, "loss": 2.6438, "step": 1791 }, { "epoch": 0.20548102281848413, "grad_norm": 0.2952715814083847, "learning_rate": 0.0009214280642369806, "loss": 2.5739, "step": 1792 }, { "epoch": 0.20559568856782479, "grad_norm": 0.27466522659836473, "learning_rate": 0.0009213281053123018, "loss": 2.5252, "step": 1793 }, { "epoch": 0.20571035431716547, "grad_norm": 0.27300317387113116, "learning_rate": 0.000921228088273442, "loss": 2.5609, "step": 1794 }, { "epoch": 0.20582502006650613, "grad_norm": 0.28475773425882983, "learning_rate": 0.0009211280131341968, "loss": 2.5327, "step": 1795 }, { "epoch": 0.20593968581584682, "grad_norm": 0.27800539838571175, "learning_rate": 0.0009210278799083695, "loss": 2.4658, "step": 1796 }, { "epoch": 0.20605435156518748, "grad_norm": 0.27344159847746036, "learning_rate": 0.000920927688609772, "loss": 2.4672, "step": 1797 }, { "epoch": 0.20616901731452816, "grad_norm": 0.2882640478534675, "learning_rate": 0.0009208274392522231, "loss": 2.5276, "step": 1798 }, { "epoch": 0.20628368306386882, "grad_norm": 0.28039057332029593, "learning_rate": 0.0009207271318495509, "loss": 2.5387, "step": 1799 }, { "epoch": 0.20639834881320948, "grad_norm": 0.30498936725921144, "learning_rate": 0.0009206267664155906, "loss": 2.6403, "step": 1800 }, { "epoch": 0.20651301456255017, "grad_norm": 0.2697333578929762, "learning_rate": 0.0009205263429641857, "loss": 2.7215, "step": 1801 }, { "epoch": 0.20662768031189083, "grad_norm": 0.31249331175330564, "learning_rate": 0.0009204258615091879, "loss": 2.6832, "step": 1802 }, { "epoch": 0.2067423460612315, "grad_norm": 0.2799012570294706, "learning_rate": 0.0009203253220644564, "loss": 2.6696, "step": 1803 }, { "epoch": 0.20685701181057217, "grad_norm": 0.25278090626053484, "learning_rate": 0.0009202247246438589, "loss": 2.6514, "step": 1804 }, { "epoch": 0.20697167755991286, "grad_norm": 0.28633395901928993, "learning_rate": 0.0009201240692612708, "loss": 2.6969, "step": 1805 }, { "epoch": 0.20708634330925352, "grad_norm": 0.2929582198937101, "learning_rate": 0.0009200233559305758, "loss": 2.5596, "step": 1806 }, { "epoch": 0.2072010090585942, "grad_norm": 0.2663089590681011, "learning_rate": 0.0009199225846656649, "loss": 2.6004, "step": 1807 }, { "epoch": 0.20731567480793486, "grad_norm": 0.2760320054589639, "learning_rate": 0.0009198217554804382, "loss": 2.6414, "step": 1808 }, { "epoch": 0.20743034055727555, "grad_norm": 0.2710764396013921, "learning_rate": 0.0009197208683888028, "loss": 2.6011, "step": 1809 }, { "epoch": 0.2075450063066162, "grad_norm": 0.2517043468636936, "learning_rate": 0.0009196199234046741, "loss": 2.5256, "step": 1810 }, { "epoch": 0.2076596720559569, "grad_norm": 0.31308343473426087, "learning_rate": 0.0009195189205419757, "loss": 2.6821, "step": 1811 }, { "epoch": 0.20777433780529755, "grad_norm": 0.27151190853916474, "learning_rate": 0.000919417859814639, "loss": 2.6142, "step": 1812 }, { "epoch": 0.20788900355463824, "grad_norm": 0.2701020751745533, "learning_rate": 0.0009193167412366034, "loss": 2.6516, "step": 1813 }, { "epoch": 0.2080036693039789, "grad_norm": 0.2663926364421637, "learning_rate": 0.0009192155648218162, "loss": 2.6874, "step": 1814 }, { "epoch": 0.2081183350533196, "grad_norm": 0.2644533736432294, "learning_rate": 0.0009191143305842329, "loss": 2.5328, "step": 1815 }, { "epoch": 0.20823300080266025, "grad_norm": 0.2905391475269113, "learning_rate": 0.0009190130385378166, "loss": 2.5705, "step": 1816 }, { "epoch": 0.2083476665520009, "grad_norm": 0.27639315799691644, "learning_rate": 0.0009189116886965388, "loss": 2.554, "step": 1817 }, { "epoch": 0.2084623323013416, "grad_norm": 0.2693653631919564, "learning_rate": 0.0009188102810743788, "loss": 2.6249, "step": 1818 }, { "epoch": 0.20857699805068225, "grad_norm": 0.286969232047764, "learning_rate": 0.0009187088156853236, "loss": 2.5677, "step": 1819 }, { "epoch": 0.20869166380002294, "grad_norm": 0.2601503526373931, "learning_rate": 0.0009186072925433689, "loss": 2.5072, "step": 1820 }, { "epoch": 0.2088063295493636, "grad_norm": 0.2653654402079119, "learning_rate": 0.0009185057116625172, "loss": 2.5455, "step": 1821 }, { "epoch": 0.20892099529870428, "grad_norm": 0.2668601065990496, "learning_rate": 0.0009184040730567803, "loss": 2.442, "step": 1822 }, { "epoch": 0.20903566104804494, "grad_norm": 0.2998095783103949, "learning_rate": 0.0009183023767401769, "loss": 2.464, "step": 1823 }, { "epoch": 0.20915032679738563, "grad_norm": 0.3069579799522593, "learning_rate": 0.0009182006227267343, "loss": 2.7362, "step": 1824 }, { "epoch": 0.2092649925467263, "grad_norm": 0.3136578445647607, "learning_rate": 0.0009180988110304873, "loss": 2.5904, "step": 1825 }, { "epoch": 0.20937965829606697, "grad_norm": 0.30135775504050333, "learning_rate": 0.000917996941665479, "loss": 2.6904, "step": 1826 }, { "epoch": 0.20949432404540763, "grad_norm": 0.2857759439690108, "learning_rate": 0.0009178950146457606, "loss": 2.6182, "step": 1827 }, { "epoch": 0.20960898979474832, "grad_norm": 0.30608177123710906, "learning_rate": 0.0009177930299853903, "loss": 2.7166, "step": 1828 }, { "epoch": 0.20972365554408898, "grad_norm": 0.2606953416847776, "learning_rate": 0.0009176909876984356, "loss": 2.5399, "step": 1829 }, { "epoch": 0.20983832129342966, "grad_norm": 0.27569299374427203, "learning_rate": 0.0009175888877989712, "loss": 2.7004, "step": 1830 }, { "epoch": 0.20995298704277032, "grad_norm": 0.30149350990317325, "learning_rate": 0.0009174867303010795, "loss": 2.5774, "step": 1831 }, { "epoch": 0.210067652792111, "grad_norm": 0.27749096626494185, "learning_rate": 0.0009173845152188516, "loss": 2.5713, "step": 1832 }, { "epoch": 0.21018231854145167, "grad_norm": 0.3138293889063431, "learning_rate": 0.0009172822425663855, "loss": 2.5801, "step": 1833 }, { "epoch": 0.21029698429079233, "grad_norm": 0.2817368297169945, "learning_rate": 0.0009171799123577886, "loss": 2.5918, "step": 1834 }, { "epoch": 0.210411650040133, "grad_norm": 0.27357098773786476, "learning_rate": 0.0009170775246071747, "loss": 2.6969, "step": 1835 }, { "epoch": 0.21052631578947367, "grad_norm": 0.2775443497152393, "learning_rate": 0.0009169750793286667, "loss": 2.6856, "step": 1836 }, { "epoch": 0.21064098153881436, "grad_norm": 0.3031248784741946, "learning_rate": 0.0009168725765363946, "loss": 2.5311, "step": 1837 }, { "epoch": 0.21075564728815502, "grad_norm": 0.257232567679352, "learning_rate": 0.0009167700162444969, "loss": 2.5365, "step": 1838 }, { "epoch": 0.2108703130374957, "grad_norm": 0.2816349325373924, "learning_rate": 0.0009166673984671198, "loss": 2.611, "step": 1839 }, { "epoch": 0.21098497878683636, "grad_norm": 0.2874232139828025, "learning_rate": 0.0009165647232184176, "loss": 2.6066, "step": 1840 }, { "epoch": 0.21109964453617705, "grad_norm": 0.26525663183590725, "learning_rate": 0.0009164619905125522, "loss": 2.6868, "step": 1841 }, { "epoch": 0.2112143102855177, "grad_norm": 0.2951763320631187, "learning_rate": 0.0009163592003636936, "loss": 2.6218, "step": 1842 }, { "epoch": 0.2113289760348584, "grad_norm": 0.2959412129676108, "learning_rate": 0.00091625635278602, "loss": 2.6998, "step": 1843 }, { "epoch": 0.21144364178419905, "grad_norm": 0.2658419991670816, "learning_rate": 0.000916153447793717, "loss": 2.774, "step": 1844 }, { "epoch": 0.21155830753353974, "grad_norm": 0.3067430475063693, "learning_rate": 0.0009160504854009786, "loss": 2.597, "step": 1845 }, { "epoch": 0.2116729732828804, "grad_norm": 0.27750197150782224, "learning_rate": 0.0009159474656220063, "loss": 2.4906, "step": 1846 }, { "epoch": 0.2117876390322211, "grad_norm": 0.3013271728904373, "learning_rate": 0.0009158443884710097, "loss": 2.5229, "step": 1847 }, { "epoch": 0.21190230478156175, "grad_norm": 0.32563786547260687, "learning_rate": 0.0009157412539622065, "loss": 2.6388, "step": 1848 }, { "epoch": 0.21201697053090243, "grad_norm": 0.2700954928393621, "learning_rate": 0.0009156380621098221, "loss": 2.6039, "step": 1849 }, { "epoch": 0.2121316362802431, "grad_norm": 0.29486758303320676, "learning_rate": 0.0009155348129280898, "loss": 2.5722, "step": 1850 }, { "epoch": 0.21224630202958375, "grad_norm": 0.318338021691611, "learning_rate": 0.000915431506431251, "loss": 2.487, "step": 1851 }, { "epoch": 0.21236096777892444, "grad_norm": 0.28896449032637983, "learning_rate": 0.0009153281426335547, "loss": 2.459, "step": 1852 }, { "epoch": 0.2124756335282651, "grad_norm": 0.2796068687283091, "learning_rate": 0.0009152247215492577, "loss": 2.5065, "step": 1853 }, { "epoch": 0.21259029927760578, "grad_norm": 0.29210895661653224, "learning_rate": 0.0009151212431926256, "loss": 2.6085, "step": 1854 }, { "epoch": 0.21270496502694644, "grad_norm": 0.259018230564378, "learning_rate": 0.0009150177075779308, "loss": 2.5859, "step": 1855 }, { "epoch": 0.21281963077628713, "grad_norm": 0.27516160389330796, "learning_rate": 0.0009149141147194542, "loss": 2.6781, "step": 1856 }, { "epoch": 0.2129342965256278, "grad_norm": 0.243866375529754, "learning_rate": 0.0009148104646314844, "loss": 2.6508, "step": 1857 }, { "epoch": 0.21304896227496847, "grad_norm": 0.26921311194148806, "learning_rate": 0.000914706757328318, "loss": 2.6083, "step": 1858 }, { "epoch": 0.21316362802430913, "grad_norm": 0.2675641924022829, "learning_rate": 0.0009146029928242596, "loss": 2.6045, "step": 1859 }, { "epoch": 0.21327829377364982, "grad_norm": 0.31126391034303047, "learning_rate": 0.0009144991711336214, "loss": 2.5702, "step": 1860 }, { "epoch": 0.21339295952299048, "grad_norm": 0.2965333399936351, "learning_rate": 0.0009143952922707235, "loss": 2.6294, "step": 1861 }, { "epoch": 0.21350762527233116, "grad_norm": 0.2956018161867304, "learning_rate": 0.0009142913562498942, "loss": 2.5606, "step": 1862 }, { "epoch": 0.21362229102167182, "grad_norm": 0.26298415222915367, "learning_rate": 0.0009141873630854694, "loss": 2.6358, "step": 1863 }, { "epoch": 0.2137369567710125, "grad_norm": 0.2803733082860882, "learning_rate": 0.0009140833127917929, "loss": 2.6963, "step": 1864 }, { "epoch": 0.21385162252035317, "grad_norm": 0.2544748663167623, "learning_rate": 0.0009139792053832166, "loss": 2.5873, "step": 1865 }, { "epoch": 0.21396628826969386, "grad_norm": 0.3152046627388256, "learning_rate": 0.0009138750408741001, "loss": 2.5582, "step": 1866 }, { "epoch": 0.21408095401903451, "grad_norm": 0.3177331732417034, "learning_rate": 0.000913770819278811, "loss": 2.6573, "step": 1867 }, { "epoch": 0.21419561976837517, "grad_norm": 0.2711250188987675, "learning_rate": 0.0009136665406117244, "loss": 2.6932, "step": 1868 }, { "epoch": 0.21431028551771586, "grad_norm": 0.2806631734513546, "learning_rate": 0.0009135622048872238, "loss": 2.606, "step": 1869 }, { "epoch": 0.21442495126705652, "grad_norm": 0.2859856161879142, "learning_rate": 0.0009134578121197002, "loss": 2.4877, "step": 1870 }, { "epoch": 0.2145396170163972, "grad_norm": 0.26958369351317923, "learning_rate": 0.0009133533623235526, "loss": 2.4897, "step": 1871 }, { "epoch": 0.21465428276573786, "grad_norm": 0.27924198489268304, "learning_rate": 0.000913248855513188, "loss": 2.5948, "step": 1872 }, { "epoch": 0.21476894851507855, "grad_norm": 0.28351441938396327, "learning_rate": 0.0009131442917030211, "loss": 2.7292, "step": 1873 }, { "epoch": 0.2148836142644192, "grad_norm": 0.2693439813139644, "learning_rate": 0.0009130396709074741, "loss": 2.6453, "step": 1874 }, { "epoch": 0.2149982800137599, "grad_norm": 0.2624815228106838, "learning_rate": 0.0009129349931409781, "loss": 2.6312, "step": 1875 }, { "epoch": 0.21511294576310055, "grad_norm": 0.2648119913687565, "learning_rate": 0.0009128302584179708, "loss": 2.5599, "step": 1876 }, { "epoch": 0.21522761151244124, "grad_norm": 0.27046439808232203, "learning_rate": 0.0009127254667528988, "loss": 2.6051, "step": 1877 }, { "epoch": 0.2153422772617819, "grad_norm": 0.27876532866756304, "learning_rate": 0.0009126206181602158, "loss": 2.6333, "step": 1878 }, { "epoch": 0.2154569430111226, "grad_norm": 0.23141626998005832, "learning_rate": 0.0009125157126543838, "loss": 2.5377, "step": 1879 }, { "epoch": 0.21557160876046325, "grad_norm": 0.261271787985513, "learning_rate": 0.0009124107502498725, "loss": 2.6866, "step": 1880 }, { "epoch": 0.21568627450980393, "grad_norm": 0.26860498998455956, "learning_rate": 0.0009123057309611595, "loss": 2.5248, "step": 1881 }, { "epoch": 0.2158009402591446, "grad_norm": 0.27140691316419624, "learning_rate": 0.0009122006548027302, "loss": 2.5887, "step": 1882 }, { "epoch": 0.21591560600848528, "grad_norm": 0.2590548852444448, "learning_rate": 0.0009120955217890778, "loss": 2.5286, "step": 1883 }, { "epoch": 0.21603027175782594, "grad_norm": 0.2649540281508204, "learning_rate": 0.0009119903319347034, "loss": 2.6273, "step": 1884 }, { "epoch": 0.2161449375071666, "grad_norm": 0.30221413184569834, "learning_rate": 0.000911885085254116, "loss": 2.6532, "step": 1885 }, { "epoch": 0.21625960325650728, "grad_norm": 0.2659387280570465, "learning_rate": 0.0009117797817618323, "loss": 2.5716, "step": 1886 }, { "epoch": 0.21637426900584794, "grad_norm": 0.2741848094877138, "learning_rate": 0.000911674421472377, "loss": 2.514, "step": 1887 }, { "epoch": 0.21648893475518863, "grad_norm": 0.2944245592233634, "learning_rate": 0.0009115690044002824, "loss": 2.4557, "step": 1888 }, { "epoch": 0.2166036005045293, "grad_norm": 0.2809225446752393, "learning_rate": 0.0009114635305600889, "loss": 2.593, "step": 1889 }, { "epoch": 0.21671826625386997, "grad_norm": 0.3094642148936137, "learning_rate": 0.0009113579999663447, "loss": 2.6578, "step": 1890 }, { "epoch": 0.21683293200321063, "grad_norm": 0.32408141046131017, "learning_rate": 0.0009112524126336054, "loss": 2.6325, "step": 1891 }, { "epoch": 0.21694759775255132, "grad_norm": 0.2915661617567413, "learning_rate": 0.0009111467685764351, "loss": 2.576, "step": 1892 }, { "epoch": 0.21706226350189198, "grad_norm": 0.328285860938571, "learning_rate": 0.0009110410678094051, "loss": 2.5832, "step": 1893 }, { "epoch": 0.21717692925123266, "grad_norm": 0.27907049962318375, "learning_rate": 0.0009109353103470951, "loss": 2.6467, "step": 1894 }, { "epoch": 0.21729159500057332, "grad_norm": 0.26090877350144465, "learning_rate": 0.0009108294962040921, "loss": 2.5787, "step": 1895 }, { "epoch": 0.217406260749914, "grad_norm": 0.28842180627322295, "learning_rate": 0.0009107236253949912, "loss": 2.7486, "step": 1896 }, { "epoch": 0.21752092649925467, "grad_norm": 0.26362965143399214, "learning_rate": 0.0009106176979343955, "loss": 2.5946, "step": 1897 }, { "epoch": 0.21763559224859536, "grad_norm": 0.2842749582815509, "learning_rate": 0.0009105117138369151, "loss": 2.4597, "step": 1898 }, { "epoch": 0.21775025799793601, "grad_norm": 0.28080370326315857, "learning_rate": 0.0009104056731171691, "loss": 2.5793, "step": 1899 }, { "epoch": 0.2178649237472767, "grad_norm": 0.24541024040210418, "learning_rate": 0.0009102995757897834, "loss": 2.7216, "step": 1900 }, { "epoch": 0.21797958949661736, "grad_norm": 0.2670657057656899, "learning_rate": 0.0009101934218693923, "loss": 2.6512, "step": 1901 }, { "epoch": 0.21809425524595802, "grad_norm": 0.26221270955037446, "learning_rate": 0.0009100872113706375, "loss": 2.6687, "step": 1902 }, { "epoch": 0.2182089209952987, "grad_norm": 0.3145136942609155, "learning_rate": 0.0009099809443081691, "loss": 2.6042, "step": 1903 }, { "epoch": 0.21832358674463936, "grad_norm": 0.26999972222507934, "learning_rate": 0.0009098746206966443, "loss": 2.6342, "step": 1904 }, { "epoch": 0.21843825249398005, "grad_norm": 0.26445434448717314, "learning_rate": 0.0009097682405507285, "loss": 2.5026, "step": 1905 }, { "epoch": 0.2185529182433207, "grad_norm": 0.26012609764076383, "learning_rate": 0.0009096618038850948, "loss": 2.6303, "step": 1906 }, { "epoch": 0.2186675839926614, "grad_norm": 0.2692690711463572, "learning_rate": 0.0009095553107144241, "loss": 2.6002, "step": 1907 }, { "epoch": 0.21878224974200206, "grad_norm": 0.2987751544110473, "learning_rate": 0.0009094487610534052, "loss": 2.4888, "step": 1908 }, { "epoch": 0.21889691549134274, "grad_norm": 0.29662090412846376, "learning_rate": 0.0009093421549167343, "loss": 2.6612, "step": 1909 }, { "epoch": 0.2190115812406834, "grad_norm": 0.28057920473199416, "learning_rate": 0.0009092354923191161, "loss": 2.7068, "step": 1910 }, { "epoch": 0.2191262469900241, "grad_norm": 0.2874779490340694, "learning_rate": 0.0009091287732752624, "loss": 2.6985, "step": 1911 }, { "epoch": 0.21924091273936475, "grad_norm": 0.2828317138805602, "learning_rate": 0.0009090219977998933, "loss": 2.5827, "step": 1912 }, { "epoch": 0.21935557848870543, "grad_norm": 0.2745614086973994, "learning_rate": 0.000908915165907736, "loss": 2.4808, "step": 1913 }, { "epoch": 0.2194702442380461, "grad_norm": 0.30613717156947323, "learning_rate": 0.0009088082776135263, "loss": 2.5825, "step": 1914 }, { "epoch": 0.21958490998738678, "grad_norm": 0.2655508704288266, "learning_rate": 0.0009087013329320073, "loss": 2.6332, "step": 1915 }, { "epoch": 0.21969957573672744, "grad_norm": 0.2554114466284559, "learning_rate": 0.0009085943318779301, "loss": 2.4686, "step": 1916 }, { "epoch": 0.21981424148606812, "grad_norm": 0.2500802711103224, "learning_rate": 0.0009084872744660532, "loss": 2.4513, "step": 1917 }, { "epoch": 0.21992890723540878, "grad_norm": 0.2523476571925537, "learning_rate": 0.0009083801607111433, "loss": 2.7623, "step": 1918 }, { "epoch": 0.22004357298474944, "grad_norm": 0.27173208928970244, "learning_rate": 0.0009082729906279746, "loss": 2.519, "step": 1919 }, { "epoch": 0.22015823873409013, "grad_norm": 0.27581178193610545, "learning_rate": 0.0009081657642313292, "loss": 2.5304, "step": 1920 }, { "epoch": 0.2202729044834308, "grad_norm": 0.27120255021361356, "learning_rate": 0.0009080584815359972, "loss": 2.5753, "step": 1921 }, { "epoch": 0.22038757023277147, "grad_norm": 0.250998699557209, "learning_rate": 0.0009079511425567759, "loss": 2.4572, "step": 1922 }, { "epoch": 0.22050223598211213, "grad_norm": 0.2550318361099552, "learning_rate": 0.0009078437473084706, "loss": 2.6597, "step": 1923 }, { "epoch": 0.22061690173145282, "grad_norm": 0.2590059578550897, "learning_rate": 0.0009077362958058946, "loss": 2.4548, "step": 1924 }, { "epoch": 0.22073156748079348, "grad_norm": 0.2859852170905992, "learning_rate": 0.0009076287880638689, "loss": 2.7067, "step": 1925 }, { "epoch": 0.22084623323013416, "grad_norm": 0.3119738888828401, "learning_rate": 0.0009075212240972218, "loss": 2.6942, "step": 1926 }, { "epoch": 0.22096089897947482, "grad_norm": 0.289801152792331, "learning_rate": 0.00090741360392079, "loss": 2.4983, "step": 1927 }, { "epoch": 0.2210755647288155, "grad_norm": 0.29610008529516996, "learning_rate": 0.0009073059275494176, "loss": 2.6153, "step": 1928 }, { "epoch": 0.22119023047815617, "grad_norm": 0.26575778645368764, "learning_rate": 0.0009071981949979564, "loss": 2.6743, "step": 1929 }, { "epoch": 0.22130489622749686, "grad_norm": 0.2894257444236656, "learning_rate": 0.000907090406281266, "loss": 2.6497, "step": 1930 }, { "epoch": 0.22141956197683751, "grad_norm": 0.26585047703634673, "learning_rate": 0.000906982561414214, "loss": 2.6185, "step": 1931 }, { "epoch": 0.2215342277261782, "grad_norm": 0.2754301975819511, "learning_rate": 0.0009068746604116755, "loss": 2.6864, "step": 1932 }, { "epoch": 0.22164889347551886, "grad_norm": 0.27091326531361276, "learning_rate": 0.0009067667032885334, "loss": 2.5364, "step": 1933 }, { "epoch": 0.22176355922485955, "grad_norm": 0.2604060571542392, "learning_rate": 0.0009066586900596781, "loss": 2.6768, "step": 1934 }, { "epoch": 0.2218782249742002, "grad_norm": 0.27837045011853345, "learning_rate": 0.000906550620740008, "loss": 2.5315, "step": 1935 }, { "epoch": 0.2219928907235409, "grad_norm": 0.2668698702701924, "learning_rate": 0.0009064424953444296, "loss": 2.5998, "step": 1936 }, { "epoch": 0.22210755647288155, "grad_norm": 0.260843606153731, "learning_rate": 0.0009063343138878563, "loss": 2.4059, "step": 1937 }, { "epoch": 0.2222222222222222, "grad_norm": 0.27938493112594875, "learning_rate": 0.0009062260763852099, "loss": 2.6744, "step": 1938 }, { "epoch": 0.2223368879715629, "grad_norm": 0.3006081028560282, "learning_rate": 0.0009061177828514198, "loss": 2.593, "step": 1939 }, { "epoch": 0.22245155372090356, "grad_norm": 0.2734578971446594, "learning_rate": 0.0009060094333014226, "loss": 2.5251, "step": 1940 }, { "epoch": 0.22256621947024424, "grad_norm": 0.2649327416105208, "learning_rate": 0.0009059010277501634, "loss": 2.5062, "step": 1941 }, { "epoch": 0.2226808852195849, "grad_norm": 0.2866524474555254, "learning_rate": 0.0009057925662125946, "loss": 2.6065, "step": 1942 }, { "epoch": 0.2227955509689256, "grad_norm": 0.29712861989300904, "learning_rate": 0.0009056840487036764, "loss": 2.6034, "step": 1943 }, { "epoch": 0.22291021671826625, "grad_norm": 0.29576524839234564, "learning_rate": 0.0009055754752383768, "loss": 2.5442, "step": 1944 }, { "epoch": 0.22302488246760693, "grad_norm": 0.29975480471833216, "learning_rate": 0.0009054668458316713, "loss": 2.6857, "step": 1945 }, { "epoch": 0.2231395482169476, "grad_norm": 0.31149051695256563, "learning_rate": 0.0009053581604985433, "loss": 2.6212, "step": 1946 }, { "epoch": 0.22325421396628828, "grad_norm": 0.2722972735180898, "learning_rate": 0.000905249419253984, "loss": 2.7627, "step": 1947 }, { "epoch": 0.22336887971562894, "grad_norm": 0.26737547910642695, "learning_rate": 0.0009051406221129919, "loss": 2.7949, "step": 1948 }, { "epoch": 0.22348354546496962, "grad_norm": 0.28222234187931433, "learning_rate": 0.0009050317690905737, "loss": 2.5675, "step": 1949 }, { "epoch": 0.22359821121431028, "grad_norm": 0.2597653078328947, "learning_rate": 0.0009049228602017437, "loss": 2.6455, "step": 1950 }, { "epoch": 0.22371287696365097, "grad_norm": 0.26215588556396907, "learning_rate": 0.0009048138954615235, "loss": 2.6061, "step": 1951 }, { "epoch": 0.22382754271299163, "grad_norm": 0.2510070527640125, "learning_rate": 0.0009047048748849429, "loss": 2.6727, "step": 1952 }, { "epoch": 0.22394220846233232, "grad_norm": 0.26425335175254944, "learning_rate": 0.0009045957984870393, "loss": 2.4553, "step": 1953 }, { "epoch": 0.22405687421167297, "grad_norm": 0.2640848196742785, "learning_rate": 0.0009044866662828575, "loss": 2.5792, "step": 1954 }, { "epoch": 0.22417153996101363, "grad_norm": 0.28247075641625724, "learning_rate": 0.0009043774782874503, "loss": 2.5274, "step": 1955 }, { "epoch": 0.22428620571035432, "grad_norm": 0.264571221801559, "learning_rate": 0.0009042682345158781, "loss": 2.6095, "step": 1956 }, { "epoch": 0.22440087145969498, "grad_norm": 0.27020248369221733, "learning_rate": 0.0009041589349832091, "loss": 2.5915, "step": 1957 }, { "epoch": 0.22451553720903566, "grad_norm": 0.2797456782640959, "learning_rate": 0.000904049579704519, "loss": 2.5646, "step": 1958 }, { "epoch": 0.22463020295837632, "grad_norm": 0.24184221607772377, "learning_rate": 0.0009039401686948912, "loss": 2.5008, "step": 1959 }, { "epoch": 0.224744868707717, "grad_norm": 0.26922537256729834, "learning_rate": 0.0009038307019694169, "loss": 2.5759, "step": 1960 }, { "epoch": 0.22485953445705767, "grad_norm": 0.25375396608739825, "learning_rate": 0.000903721179543195, "loss": 2.6332, "step": 1961 }, { "epoch": 0.22497420020639836, "grad_norm": 0.2711673553945902, "learning_rate": 0.0009036116014313321, "loss": 2.6366, "step": 1962 }, { "epoch": 0.22508886595573901, "grad_norm": 0.255907562785913, "learning_rate": 0.0009035019676489422, "loss": 2.5764, "step": 1963 }, { "epoch": 0.2252035317050797, "grad_norm": 0.2562543775541522, "learning_rate": 0.0009033922782111473, "loss": 2.5173, "step": 1964 }, { "epoch": 0.22531819745442036, "grad_norm": 0.28997616558416617, "learning_rate": 0.000903282533133077, "loss": 2.5987, "step": 1965 }, { "epoch": 0.22543286320376105, "grad_norm": 0.2567588176667797, "learning_rate": 0.0009031727324298686, "loss": 2.6207, "step": 1966 }, { "epoch": 0.2255475289531017, "grad_norm": 0.2635235397589899, "learning_rate": 0.0009030628761166668, "loss": 2.6897, "step": 1967 }, { "epoch": 0.2256621947024424, "grad_norm": 0.2755671557312719, "learning_rate": 0.0009029529642086245, "loss": 2.7063, "step": 1968 }, { "epoch": 0.22577686045178305, "grad_norm": 0.30016741390490403, "learning_rate": 0.0009028429967209015, "loss": 2.7238, "step": 1969 }, { "epoch": 0.22589152620112374, "grad_norm": 0.24278714521168407, "learning_rate": 0.0009027329736686663, "loss": 2.5206, "step": 1970 }, { "epoch": 0.2260061919504644, "grad_norm": 0.2815515189648223, "learning_rate": 0.000902622895067094, "loss": 2.5978, "step": 1971 }, { "epoch": 0.22612085769980506, "grad_norm": 0.30617726406810347, "learning_rate": 0.000902512760931368, "loss": 2.3762, "step": 1972 }, { "epoch": 0.22623552344914574, "grad_norm": 0.3014572069320811, "learning_rate": 0.0009024025712766792, "loss": 2.571, "step": 1973 }, { "epoch": 0.2263501891984864, "grad_norm": 0.3321771139923687, "learning_rate": 0.0009022923261182264, "loss": 2.7446, "step": 1974 }, { "epoch": 0.2264648549478271, "grad_norm": 0.2840748245323971, "learning_rate": 0.0009021820254712153, "loss": 2.5953, "step": 1975 }, { "epoch": 0.22657952069716775, "grad_norm": 0.29779548722246973, "learning_rate": 0.0009020716693508602, "loss": 2.8379, "step": 1976 }, { "epoch": 0.22669418644650843, "grad_norm": 0.25930289688366726, "learning_rate": 0.0009019612577723826, "loss": 2.6142, "step": 1977 }, { "epoch": 0.2268088521958491, "grad_norm": 0.3080007238585445, "learning_rate": 0.0009018507907510114, "loss": 2.7338, "step": 1978 }, { "epoch": 0.22692351794518978, "grad_norm": 0.28650189508590296, "learning_rate": 0.0009017402683019838, "loss": 2.4582, "step": 1979 }, { "epoch": 0.22703818369453044, "grad_norm": 0.256677474385844, "learning_rate": 0.0009016296904405439, "loss": 2.6505, "step": 1980 }, { "epoch": 0.22715284944387112, "grad_norm": 0.22769947930848727, "learning_rate": 0.0009015190571819438, "loss": 2.4163, "step": 1981 }, { "epoch": 0.22726751519321178, "grad_norm": 0.2634196780583111, "learning_rate": 0.0009014083685414437, "loss": 2.5837, "step": 1982 }, { "epoch": 0.22738218094255247, "grad_norm": 0.2587993373100034, "learning_rate": 0.0009012976245343106, "loss": 2.5766, "step": 1983 }, { "epoch": 0.22749684669189313, "grad_norm": 0.2679774633487803, "learning_rate": 0.0009011868251758195, "loss": 2.5395, "step": 1984 }, { "epoch": 0.22761151244123382, "grad_norm": 0.2464764208886438, "learning_rate": 0.0009010759704812533, "loss": 2.5595, "step": 1985 }, { "epoch": 0.22772617819057447, "grad_norm": 0.30034187657100625, "learning_rate": 0.0009009650604659023, "loss": 2.6609, "step": 1986 }, { "epoch": 0.22784084393991516, "grad_norm": 0.27478891063492406, "learning_rate": 0.0009008540951450641, "loss": 2.5061, "step": 1987 }, { "epoch": 0.22795550968925582, "grad_norm": 0.32996377214402783, "learning_rate": 0.0009007430745340446, "loss": 2.6858, "step": 1988 }, { "epoch": 0.22807017543859648, "grad_norm": 0.2977665579874024, "learning_rate": 0.0009006319986481567, "loss": 2.48, "step": 1989 }, { "epoch": 0.22818484118793717, "grad_norm": 0.2733278678858816, "learning_rate": 0.0009005208675027215, "loss": 2.5679, "step": 1990 }, { "epoch": 0.22829950693727782, "grad_norm": 0.30524051395291907, "learning_rate": 0.000900409681113067, "loss": 2.5508, "step": 1991 }, { "epoch": 0.2284141726866185, "grad_norm": 0.2800427758134093, "learning_rate": 0.0009002984394945298, "loss": 2.6196, "step": 1992 }, { "epoch": 0.22852883843595917, "grad_norm": 0.2573831033007541, "learning_rate": 0.0009001871426624528, "loss": 2.3963, "step": 1993 }, { "epoch": 0.22864350418529986, "grad_norm": 0.2891264003884552, "learning_rate": 0.0009000757906321882, "loss": 2.6205, "step": 1994 }, { "epoch": 0.22875816993464052, "grad_norm": 0.3077980662285859, "learning_rate": 0.0008999643834190941, "loss": 2.7084, "step": 1995 }, { "epoch": 0.2288728356839812, "grad_norm": 0.26250196112539614, "learning_rate": 0.0008998529210385375, "loss": 2.508, "step": 1996 }, { "epoch": 0.22898750143332186, "grad_norm": 0.29108230390030326, "learning_rate": 0.0008997414035058922, "loss": 2.6415, "step": 1997 }, { "epoch": 0.22910216718266255, "grad_norm": 0.2847300715396866, "learning_rate": 0.0008996298308365403, "loss": 2.7271, "step": 1998 }, { "epoch": 0.2292168329320032, "grad_norm": 0.2682951236555709, "learning_rate": 0.0008995182030458707, "loss": 2.5449, "step": 1999 }, { "epoch": 0.2293314986813439, "grad_norm": 0.26825010011137673, "learning_rate": 0.0008994065201492804, "loss": 2.4948, "step": 2000 }, { "epoch": 0.22944616443068455, "grad_norm": 0.2672136640508456, "learning_rate": 0.0008992947821621741, "loss": 2.5919, "step": 2001 }, { "epoch": 0.22956083018002524, "grad_norm": 0.25284187507606976, "learning_rate": 0.0008991829890999639, "loss": 2.5536, "step": 2002 }, { "epoch": 0.2296754959293659, "grad_norm": 0.2594435921976009, "learning_rate": 0.0008990711409780694, "loss": 2.5322, "step": 2003 }, { "epoch": 0.22979016167870658, "grad_norm": 0.29046265198513505, "learning_rate": 0.000898959237811918, "loss": 2.5496, "step": 2004 }, { "epoch": 0.22990482742804724, "grad_norm": 0.2691829907742786, "learning_rate": 0.0008988472796169447, "loss": 2.685, "step": 2005 }, { "epoch": 0.2300194931773879, "grad_norm": 0.2776127116050503, "learning_rate": 0.0008987352664085919, "loss": 2.6757, "step": 2006 }, { "epoch": 0.2301341589267286, "grad_norm": 0.27972545281697553, "learning_rate": 0.0008986231982023097, "loss": 2.5275, "step": 2007 }, { "epoch": 0.23024882467606925, "grad_norm": 0.3006560524748079, "learning_rate": 0.0008985110750135556, "loss": 2.5572, "step": 2008 }, { "epoch": 0.23036349042540993, "grad_norm": 0.29022518851669266, "learning_rate": 0.0008983988968577951, "loss": 2.6411, "step": 2009 }, { "epoch": 0.2304781561747506, "grad_norm": 0.2788691649586618, "learning_rate": 0.0008982866637505009, "loss": 2.6019, "step": 2010 }, { "epoch": 0.23059282192409128, "grad_norm": 0.25984198988934054, "learning_rate": 0.0008981743757071535, "loss": 2.6253, "step": 2011 }, { "epoch": 0.23070748767343194, "grad_norm": 0.2959609216818093, "learning_rate": 0.000898062032743241, "loss": 2.5903, "step": 2012 }, { "epoch": 0.23082215342277262, "grad_norm": 0.27262415972247456, "learning_rate": 0.0008979496348742586, "loss": 2.5754, "step": 2013 }, { "epoch": 0.23093681917211328, "grad_norm": 0.3136942085157362, "learning_rate": 0.0008978371821157098, "loss": 2.533, "step": 2014 }, { "epoch": 0.23105148492145397, "grad_norm": 0.2560522239365619, "learning_rate": 0.0008977246744831052, "loss": 2.4911, "step": 2015 }, { "epoch": 0.23116615067079463, "grad_norm": 0.28767308296605126, "learning_rate": 0.0008976121119919631, "loss": 2.516, "step": 2016 }, { "epoch": 0.23128081642013532, "grad_norm": 0.2888355088889753, "learning_rate": 0.0008974994946578093, "loss": 2.6907, "step": 2017 }, { "epoch": 0.23139548216947597, "grad_norm": 0.26796277320531725, "learning_rate": 0.0008973868224961772, "loss": 2.582, "step": 2018 }, { "epoch": 0.23151014791881666, "grad_norm": 0.2728003912529523, "learning_rate": 0.0008972740955226079, "loss": 2.5613, "step": 2019 }, { "epoch": 0.23162481366815732, "grad_norm": 0.2385740338397358, "learning_rate": 0.0008971613137526498, "loss": 2.5391, "step": 2020 }, { "epoch": 0.231739479417498, "grad_norm": 0.295634394796691, "learning_rate": 0.000897048477201859, "loss": 2.6977, "step": 2021 }, { "epoch": 0.23185414516683867, "grad_norm": 0.29329468558132005, "learning_rate": 0.0008969355858857994, "loss": 2.6056, "step": 2022 }, { "epoch": 0.23196881091617932, "grad_norm": 0.290327238218866, "learning_rate": 0.0008968226398200418, "loss": 2.5391, "step": 2023 }, { "epoch": 0.23208347666552, "grad_norm": 0.2971721146485292, "learning_rate": 0.0008967096390201652, "loss": 2.7293, "step": 2024 }, { "epoch": 0.23219814241486067, "grad_norm": 0.3033863910349409, "learning_rate": 0.000896596583501756, "loss": 2.5282, "step": 2025 }, { "epoch": 0.23231280816420136, "grad_norm": 0.27782767988868723, "learning_rate": 0.0008964834732804078, "loss": 2.6139, "step": 2026 }, { "epoch": 0.23242747391354202, "grad_norm": 0.29141287249691744, "learning_rate": 0.0008963703083717222, "loss": 2.4896, "step": 2027 }, { "epoch": 0.2325421396628827, "grad_norm": 0.3166698914267737, "learning_rate": 0.000896257088791308, "loss": 2.5739, "step": 2028 }, { "epoch": 0.23265680541222336, "grad_norm": 0.2837361071196873, "learning_rate": 0.0008961438145547818, "loss": 2.7438, "step": 2029 }, { "epoch": 0.23277147116156405, "grad_norm": 0.2819738877993953, "learning_rate": 0.0008960304856777675, "loss": 2.6682, "step": 2030 }, { "epoch": 0.2328861369109047, "grad_norm": 0.28331415765898876, "learning_rate": 0.0008959171021758967, "loss": 2.6967, "step": 2031 }, { "epoch": 0.2330008026602454, "grad_norm": 0.26747427331751383, "learning_rate": 0.0008958036640648086, "loss": 2.6176, "step": 2032 }, { "epoch": 0.23311546840958605, "grad_norm": 0.23171303557327103, "learning_rate": 0.0008956901713601499, "loss": 2.6092, "step": 2033 }, { "epoch": 0.23323013415892674, "grad_norm": 0.25595726239499245, "learning_rate": 0.0008955766240775745, "loss": 2.6118, "step": 2034 }, { "epoch": 0.2333447999082674, "grad_norm": 0.2577097650207744, "learning_rate": 0.000895463022232744, "loss": 2.6268, "step": 2035 }, { "epoch": 0.23345946565760808, "grad_norm": 0.27300775433398217, "learning_rate": 0.0008953493658413279, "loss": 2.4494, "step": 2036 }, { "epoch": 0.23357413140694874, "grad_norm": 0.2750678679822444, "learning_rate": 0.0008952356549190028, "loss": 2.591, "step": 2037 }, { "epoch": 0.23368879715628943, "grad_norm": 0.24804890414123798, "learning_rate": 0.000895121889481453, "loss": 2.515, "step": 2038 }, { "epoch": 0.2338034629056301, "grad_norm": 0.2917829401576775, "learning_rate": 0.0008950080695443704, "loss": 2.6668, "step": 2039 }, { "epoch": 0.23391812865497075, "grad_norm": 0.26037178457093035, "learning_rate": 0.000894894195123454, "loss": 2.4622, "step": 2040 }, { "epoch": 0.23403279440431143, "grad_norm": 0.2930535976906809, "learning_rate": 0.0008947802662344108, "loss": 2.6106, "step": 2041 }, { "epoch": 0.2341474601536521, "grad_norm": 0.30151602216694573, "learning_rate": 0.0008946662828929551, "loss": 2.5512, "step": 2042 }, { "epoch": 0.23426212590299278, "grad_norm": 0.2725319309718963, "learning_rate": 0.0008945522451148086, "loss": 2.5881, "step": 2043 }, { "epoch": 0.23437679165233344, "grad_norm": 0.2547792911152703, "learning_rate": 0.0008944381529157008, "loss": 2.5752, "step": 2044 }, { "epoch": 0.23449145740167412, "grad_norm": 0.2924461510705153, "learning_rate": 0.0008943240063113685, "loss": 2.6693, "step": 2045 }, { "epoch": 0.23460612315101478, "grad_norm": 0.2660855655343598, "learning_rate": 0.0008942098053175559, "loss": 2.6169, "step": 2046 }, { "epoch": 0.23472078890035547, "grad_norm": 0.28891568303753096, "learning_rate": 0.0008940955499500152, "loss": 2.6187, "step": 2047 }, { "epoch": 0.23483545464969613, "grad_norm": 0.2928016772942542, "learning_rate": 0.0008939812402245053, "loss": 2.5617, "step": 2048 }, { "epoch": 0.23495012039903682, "grad_norm": 0.2796043634393937, "learning_rate": 0.0008938668761567934, "loss": 2.6915, "step": 2049 }, { "epoch": 0.23506478614837747, "grad_norm": 0.2639617720309141, "learning_rate": 0.0008937524577626537, "loss": 2.5295, "step": 2050 }, { "epoch": 0.23517945189771816, "grad_norm": 0.28332468148810735, "learning_rate": 0.0008936379850578679, "loss": 2.608, "step": 2051 }, { "epoch": 0.23529411764705882, "grad_norm": 0.29056727485708317, "learning_rate": 0.0008935234580582258, "loss": 2.5081, "step": 2052 }, { "epoch": 0.2354087833963995, "grad_norm": 0.2789825026228519, "learning_rate": 0.0008934088767795236, "loss": 2.6309, "step": 2053 }, { "epoch": 0.23552344914574017, "grad_norm": 0.25803688005507325, "learning_rate": 0.0008932942412375662, "loss": 2.6421, "step": 2054 }, { "epoch": 0.23563811489508085, "grad_norm": 0.28067943823856445, "learning_rate": 0.0008931795514481648, "loss": 2.5896, "step": 2055 }, { "epoch": 0.2357527806444215, "grad_norm": 0.2626655722620775, "learning_rate": 0.0008930648074271391, "loss": 2.5226, "step": 2056 }, { "epoch": 0.23586744639376217, "grad_norm": 0.27793313295909816, "learning_rate": 0.0008929500091903158, "loss": 2.6282, "step": 2057 }, { "epoch": 0.23598211214310286, "grad_norm": 0.35055272401260446, "learning_rate": 0.0008928351567535289, "loss": 2.7735, "step": 2058 }, { "epoch": 0.23609677789244352, "grad_norm": 0.2917132432380481, "learning_rate": 0.0008927202501326204, "loss": 2.5179, "step": 2059 }, { "epoch": 0.2362114436417842, "grad_norm": 0.27648522151288074, "learning_rate": 0.0008926052893434391, "loss": 2.6153, "step": 2060 }, { "epoch": 0.23632610939112486, "grad_norm": 0.2929851217902678, "learning_rate": 0.000892490274401842, "loss": 2.5626, "step": 2061 }, { "epoch": 0.23644077514046555, "grad_norm": 0.3222088766833428, "learning_rate": 0.0008923752053236931, "loss": 2.5384, "step": 2062 }, { "epoch": 0.2365554408898062, "grad_norm": 0.28362840031094594, "learning_rate": 0.0008922600821248638, "loss": 2.5666, "step": 2063 }, { "epoch": 0.2366701066391469, "grad_norm": 0.3026770530884364, "learning_rate": 0.0008921449048212336, "loss": 2.4204, "step": 2064 }, { "epoch": 0.23678477238848755, "grad_norm": 0.27490345210920014, "learning_rate": 0.0008920296734286886, "loss": 2.5661, "step": 2065 }, { "epoch": 0.23689943813782824, "grad_norm": 0.2911812069660475, "learning_rate": 0.000891914387963123, "loss": 2.6534, "step": 2066 }, { "epoch": 0.2370141038871689, "grad_norm": 0.3227271905331262, "learning_rate": 0.0008917990484404382, "loss": 2.435, "step": 2067 }, { "epoch": 0.23712876963650958, "grad_norm": 0.302089936192019, "learning_rate": 0.0008916836548765427, "loss": 2.7398, "step": 2068 }, { "epoch": 0.23724343538585024, "grad_norm": 0.2970085321764663, "learning_rate": 0.0008915682072873535, "loss": 2.5906, "step": 2069 }, { "epoch": 0.23735810113519093, "grad_norm": 0.28518087127667213, "learning_rate": 0.000891452705688794, "loss": 2.6535, "step": 2070 }, { "epoch": 0.2374727668845316, "grad_norm": 0.2611123593225715, "learning_rate": 0.0008913371500967955, "loss": 2.4873, "step": 2071 }, { "epoch": 0.23758743263387228, "grad_norm": 0.24976771111916965, "learning_rate": 0.0008912215405272967, "loss": 2.5358, "step": 2072 }, { "epoch": 0.23770209838321293, "grad_norm": 0.2663529780949666, "learning_rate": 0.0008911058769962437, "loss": 2.7244, "step": 2073 }, { "epoch": 0.2378167641325536, "grad_norm": 0.27255045045625054, "learning_rate": 0.0008909901595195902, "loss": 2.6532, "step": 2074 }, { "epoch": 0.23793142988189428, "grad_norm": 0.26066324396023205, "learning_rate": 0.0008908743881132972, "loss": 2.6092, "step": 2075 }, { "epoch": 0.23804609563123494, "grad_norm": 0.2733797221471541, "learning_rate": 0.000890758562793333, "loss": 2.6079, "step": 2076 }, { "epoch": 0.23816076138057563, "grad_norm": 0.26982144597322827, "learning_rate": 0.0008906426835756736, "loss": 2.7263, "step": 2077 }, { "epoch": 0.23827542712991628, "grad_norm": 0.2848488148017355, "learning_rate": 0.0008905267504763024, "loss": 2.5552, "step": 2078 }, { "epoch": 0.23839009287925697, "grad_norm": 0.2708983786640919, "learning_rate": 0.00089041076351121, "loss": 2.5775, "step": 2079 }, { "epoch": 0.23850475862859763, "grad_norm": 0.30089596508206945, "learning_rate": 0.0008902947226963946, "loss": 2.6537, "step": 2080 }, { "epoch": 0.23861942437793832, "grad_norm": 0.31414989648770386, "learning_rate": 0.0008901786280478621, "loss": 2.6811, "step": 2081 }, { "epoch": 0.23873409012727898, "grad_norm": 0.29653672102809836, "learning_rate": 0.0008900624795816252, "loss": 2.5481, "step": 2082 }, { "epoch": 0.23884875587661966, "grad_norm": 0.27451522996875266, "learning_rate": 0.0008899462773137047, "loss": 2.5268, "step": 2083 }, { "epoch": 0.23896342162596032, "grad_norm": 0.28515334716983537, "learning_rate": 0.0008898300212601281, "loss": 2.5153, "step": 2084 }, { "epoch": 0.239078087375301, "grad_norm": 0.29701746576479504, "learning_rate": 0.0008897137114369309, "loss": 2.6846, "step": 2085 }, { "epoch": 0.23919275312464167, "grad_norm": 0.27696166580891945, "learning_rate": 0.000889597347860156, "loss": 2.6531, "step": 2086 }, { "epoch": 0.23930741887398235, "grad_norm": 0.2909980793085496, "learning_rate": 0.0008894809305458534, "loss": 2.6851, "step": 2087 }, { "epoch": 0.239422084623323, "grad_norm": 0.2754157723614315, "learning_rate": 0.0008893644595100803, "loss": 2.5742, "step": 2088 }, { "epoch": 0.2395367503726637, "grad_norm": 0.291648554431086, "learning_rate": 0.0008892479347689022, "loss": 2.5607, "step": 2089 }, { "epoch": 0.23965141612200436, "grad_norm": 0.284995634125818, "learning_rate": 0.0008891313563383911, "loss": 2.7239, "step": 2090 }, { "epoch": 0.23976608187134502, "grad_norm": 0.25591892297732627, "learning_rate": 0.0008890147242346272, "loss": 2.5548, "step": 2091 }, { "epoch": 0.2398807476206857, "grad_norm": 0.2838251614906059, "learning_rate": 0.0008888980384736972, "loss": 2.5922, "step": 2092 }, { "epoch": 0.23999541337002636, "grad_norm": 0.2742489823788876, "learning_rate": 0.0008887812990716957, "loss": 2.5153, "step": 2093 }, { "epoch": 0.24011007911936705, "grad_norm": 0.2913391536991361, "learning_rate": 0.000888664506044725, "loss": 2.4547, "step": 2094 }, { "epoch": 0.2402247448687077, "grad_norm": 0.24916125892655305, "learning_rate": 0.0008885476594088943, "loss": 2.5089, "step": 2095 }, { "epoch": 0.2403394106180484, "grad_norm": 0.27984691585298577, "learning_rate": 0.0008884307591803203, "loss": 2.4254, "step": 2096 }, { "epoch": 0.24045407636738905, "grad_norm": 0.3127172976315307, "learning_rate": 0.0008883138053751274, "loss": 2.6464, "step": 2097 }, { "epoch": 0.24056874211672974, "grad_norm": 0.2629728202024017, "learning_rate": 0.0008881967980094469, "loss": 2.6215, "step": 2098 }, { "epoch": 0.2406834078660704, "grad_norm": 0.2660403165936794, "learning_rate": 0.0008880797370994178, "loss": 2.4748, "step": 2099 }, { "epoch": 0.24079807361541108, "grad_norm": 0.2757967313778535, "learning_rate": 0.0008879626226611865, "loss": 2.6183, "step": 2100 }, { "epoch": 0.24091273936475174, "grad_norm": 0.30581378298112066, "learning_rate": 0.0008878454547109065, "loss": 2.5983, "step": 2101 }, { "epoch": 0.24102740511409243, "grad_norm": 0.2657161835172812, "learning_rate": 0.0008877282332647392, "loss": 2.5119, "step": 2102 }, { "epoch": 0.2411420708634331, "grad_norm": 0.28291918803406085, "learning_rate": 0.0008876109583388528, "loss": 2.6194, "step": 2103 }, { "epoch": 0.24125673661277378, "grad_norm": 0.28503406464744396, "learning_rate": 0.0008874936299494232, "loss": 2.6356, "step": 2104 }, { "epoch": 0.24137140236211443, "grad_norm": 0.2932311454641575, "learning_rate": 0.0008873762481126337, "loss": 2.557, "step": 2105 }, { "epoch": 0.24148606811145512, "grad_norm": 0.26203023217233284, "learning_rate": 0.0008872588128446749, "loss": 2.576, "step": 2106 }, { "epoch": 0.24160073386079578, "grad_norm": 0.29478108813955256, "learning_rate": 0.0008871413241617446, "loss": 2.4965, "step": 2107 }, { "epoch": 0.24171539961013644, "grad_norm": 0.2885864704894152, "learning_rate": 0.0008870237820800482, "loss": 2.7297, "step": 2108 }, { "epoch": 0.24183006535947713, "grad_norm": 0.3079972953284916, "learning_rate": 0.0008869061866157985, "loss": 2.7824, "step": 2109 }, { "epoch": 0.24194473110881778, "grad_norm": 0.2930604790395038, "learning_rate": 0.0008867885377852153, "loss": 2.5534, "step": 2110 }, { "epoch": 0.24205939685815847, "grad_norm": 0.2587602955529037, "learning_rate": 0.0008866708356045263, "loss": 2.5457, "step": 2111 }, { "epoch": 0.24217406260749913, "grad_norm": 0.27537765505377526, "learning_rate": 0.0008865530800899661, "loss": 2.5993, "step": 2112 }, { "epoch": 0.24228872835683982, "grad_norm": 0.2576908746403623, "learning_rate": 0.000886435271257777, "loss": 2.555, "step": 2113 }, { "epoch": 0.24240339410618048, "grad_norm": 0.24960534322044353, "learning_rate": 0.0008863174091242083, "loss": 2.6807, "step": 2114 }, { "epoch": 0.24251805985552116, "grad_norm": 0.2494879295425208, "learning_rate": 0.0008861994937055167, "loss": 2.5761, "step": 2115 }, { "epoch": 0.24263272560486182, "grad_norm": 0.2445031689763979, "learning_rate": 0.0008860815250179668, "loss": 2.5988, "step": 2116 }, { "epoch": 0.2427473913542025, "grad_norm": 0.2548489042535031, "learning_rate": 0.00088596350307783, "loss": 2.552, "step": 2117 }, { "epoch": 0.24286205710354317, "grad_norm": 0.25714456865733787, "learning_rate": 0.0008858454279013848, "loss": 2.4304, "step": 2118 }, { "epoch": 0.24297672285288385, "grad_norm": 0.26127206422432886, "learning_rate": 0.0008857272995049178, "loss": 2.7842, "step": 2119 }, { "epoch": 0.2430913886022245, "grad_norm": 0.25636324021133794, "learning_rate": 0.0008856091179047225, "loss": 2.5493, "step": 2120 }, { "epoch": 0.2432060543515652, "grad_norm": 0.2660947714054031, "learning_rate": 0.0008854908831170998, "loss": 2.6183, "step": 2121 }, { "epoch": 0.24332072010090586, "grad_norm": 0.2474972961650398, "learning_rate": 0.0008853725951583578, "loss": 2.4701, "step": 2122 }, { "epoch": 0.24343538585024654, "grad_norm": 0.2873052454032071, "learning_rate": 0.0008852542540448123, "loss": 2.5648, "step": 2123 }, { "epoch": 0.2435500515995872, "grad_norm": 0.25216119347971927, "learning_rate": 0.0008851358597927859, "loss": 2.5548, "step": 2124 }, { "epoch": 0.24366471734892786, "grad_norm": 0.28806755938933815, "learning_rate": 0.0008850174124186091, "loss": 2.5978, "step": 2125 }, { "epoch": 0.24377938309826855, "grad_norm": 0.29679955624019355, "learning_rate": 0.0008848989119386193, "loss": 2.66, "step": 2126 }, { "epoch": 0.2438940488476092, "grad_norm": 0.27547071257363437, "learning_rate": 0.0008847803583691614, "loss": 2.6101, "step": 2127 }, { "epoch": 0.2440087145969499, "grad_norm": 0.25162276324212957, "learning_rate": 0.0008846617517265878, "loss": 2.5406, "step": 2128 }, { "epoch": 0.24412338034629055, "grad_norm": 0.26300932807766597, "learning_rate": 0.0008845430920272578, "loss": 2.6072, "step": 2129 }, { "epoch": 0.24423804609563124, "grad_norm": 0.27272647585638227, "learning_rate": 0.0008844243792875384, "loss": 2.4734, "step": 2130 }, { "epoch": 0.2443527118449719, "grad_norm": 0.27752219779695264, "learning_rate": 0.0008843056135238034, "loss": 2.6215, "step": 2131 }, { "epoch": 0.24446737759431258, "grad_norm": 0.26074212968109706, "learning_rate": 0.0008841867947524349, "loss": 2.5537, "step": 2132 }, { "epoch": 0.24458204334365324, "grad_norm": 0.2978108333818431, "learning_rate": 0.0008840679229898211, "loss": 2.6106, "step": 2133 }, { "epoch": 0.24469670909299393, "grad_norm": 0.24477389564073224, "learning_rate": 0.0008839489982523583, "loss": 2.4645, "step": 2134 }, { "epoch": 0.2448113748423346, "grad_norm": 0.22725209154719025, "learning_rate": 0.00088383002055645, "loss": 2.4696, "step": 2135 }, { "epoch": 0.24492604059167528, "grad_norm": 0.2938523284784154, "learning_rate": 0.000883710989918507, "loss": 2.598, "step": 2136 }, { "epoch": 0.24504070634101593, "grad_norm": 0.261331527021155, "learning_rate": 0.0008835919063549469, "loss": 2.6178, "step": 2137 }, { "epoch": 0.24515537209035662, "grad_norm": 0.26859697226278734, "learning_rate": 0.0008834727698821953, "loss": 2.6278, "step": 2138 }, { "epoch": 0.24527003783969728, "grad_norm": 0.27695959912974366, "learning_rate": 0.0008833535805166848, "loss": 2.621, "step": 2139 }, { "epoch": 0.24538470358903797, "grad_norm": 0.2563986158997265, "learning_rate": 0.0008832343382748551, "loss": 2.6361, "step": 2140 }, { "epoch": 0.24549936933837863, "grad_norm": 0.2721319431725499, "learning_rate": 0.0008831150431731537, "loss": 2.5338, "step": 2141 }, { "epoch": 0.24561403508771928, "grad_norm": 0.2775860886077074, "learning_rate": 0.0008829956952280349, "loss": 2.6157, "step": 2142 }, { "epoch": 0.24572870083705997, "grad_norm": 0.3132617899302838, "learning_rate": 0.0008828762944559605, "loss": 2.5978, "step": 2143 }, { "epoch": 0.24584336658640063, "grad_norm": 0.29381633344940555, "learning_rate": 0.0008827568408733996, "loss": 2.6299, "step": 2144 }, { "epoch": 0.24595803233574132, "grad_norm": 0.30463962240462816, "learning_rate": 0.0008826373344968285, "loss": 2.5962, "step": 2145 }, { "epoch": 0.24607269808508198, "grad_norm": 0.29782790236136236, "learning_rate": 0.0008825177753427309, "loss": 2.5782, "step": 2146 }, { "epoch": 0.24618736383442266, "grad_norm": 0.30281231221941285, "learning_rate": 0.0008823981634275975, "loss": 2.6383, "step": 2147 }, { "epoch": 0.24630202958376332, "grad_norm": 0.24215061991169268, "learning_rate": 0.0008822784987679266, "loss": 2.5703, "step": 2148 }, { "epoch": 0.246416695333104, "grad_norm": 0.31253175077088474, "learning_rate": 0.0008821587813802237, "loss": 2.6332, "step": 2149 }, { "epoch": 0.24653136108244467, "grad_norm": 0.24886492815626768, "learning_rate": 0.0008820390112810017, "loss": 2.6129, "step": 2150 }, { "epoch": 0.24664602683178535, "grad_norm": 0.2643695296927128, "learning_rate": 0.0008819191884867803, "loss": 2.5687, "step": 2151 }, { "epoch": 0.246760692581126, "grad_norm": 0.2515771588573229, "learning_rate": 0.0008817993130140869, "loss": 2.5878, "step": 2152 }, { "epoch": 0.2468753583304667, "grad_norm": 0.25750067607299454, "learning_rate": 0.000881679384879456, "loss": 2.6469, "step": 2153 }, { "epoch": 0.24699002407980736, "grad_norm": 0.27383810231683675, "learning_rate": 0.0008815594040994294, "loss": 2.665, "step": 2154 }, { "epoch": 0.24710468982914804, "grad_norm": 0.27762673834547347, "learning_rate": 0.0008814393706905564, "loss": 2.472, "step": 2155 }, { "epoch": 0.2472193555784887, "grad_norm": 0.27819375963144277, "learning_rate": 0.000881319284669393, "loss": 2.6225, "step": 2156 }, { "epoch": 0.2473340213278294, "grad_norm": 0.33649204522384596, "learning_rate": 0.0008811991460525027, "loss": 2.5926, "step": 2157 }, { "epoch": 0.24744868707717005, "grad_norm": 0.24599223525986988, "learning_rate": 0.0008810789548564566, "loss": 2.549, "step": 2158 }, { "epoch": 0.2475633528265107, "grad_norm": 0.26582966025208427, "learning_rate": 0.0008809587110978328, "loss": 2.7502, "step": 2159 }, { "epoch": 0.2476780185758514, "grad_norm": 0.28866014819628577, "learning_rate": 0.0008808384147932165, "loss": 2.6429, "step": 2160 }, { "epoch": 0.24779268432519205, "grad_norm": 0.32020278086194337, "learning_rate": 0.0008807180659592004, "loss": 2.643, "step": 2161 }, { "epoch": 0.24790735007453274, "grad_norm": 0.2648469713338004, "learning_rate": 0.0008805976646123841, "loss": 2.5552, "step": 2162 }, { "epoch": 0.2480220158238734, "grad_norm": 0.2988374877564414, "learning_rate": 0.0008804772107693748, "loss": 2.528, "step": 2163 }, { "epoch": 0.24813668157321409, "grad_norm": 0.26260890842065365, "learning_rate": 0.000880356704446787, "loss": 2.6647, "step": 2164 }, { "epoch": 0.24825134732255474, "grad_norm": 0.30818242750749236, "learning_rate": 0.000880236145661242, "loss": 2.5685, "step": 2165 }, { "epoch": 0.24836601307189543, "grad_norm": 0.2787901023832149, "learning_rate": 0.0008801155344293686, "loss": 2.6331, "step": 2166 }, { "epoch": 0.2484806788212361, "grad_norm": 0.28566430283277655, "learning_rate": 0.0008799948707678031, "loss": 2.5343, "step": 2167 }, { "epoch": 0.24859534457057678, "grad_norm": 0.27868848627223514, "learning_rate": 0.0008798741546931883, "loss": 2.6632, "step": 2168 }, { "epoch": 0.24871001031991744, "grad_norm": 0.30755355774382004, "learning_rate": 0.000879753386222175, "loss": 2.5377, "step": 2169 }, { "epoch": 0.24882467606925812, "grad_norm": 0.2609655515484714, "learning_rate": 0.0008796325653714208, "loss": 2.6144, "step": 2170 }, { "epoch": 0.24893934181859878, "grad_norm": 0.24629231747677463, "learning_rate": 0.0008795116921575907, "loss": 2.5052, "step": 2171 }, { "epoch": 0.24905400756793947, "grad_norm": 0.2644459369657751, "learning_rate": 0.0008793907665973569, "loss": 2.5238, "step": 2172 }, { "epoch": 0.24916867331728013, "grad_norm": 0.2978346463875741, "learning_rate": 0.0008792697887073986, "loss": 2.6363, "step": 2173 }, { "epoch": 0.2492833390666208, "grad_norm": 0.2500251150066589, "learning_rate": 0.0008791487585044025, "loss": 2.559, "step": 2174 }, { "epoch": 0.24939800481596147, "grad_norm": 0.25651828158917256, "learning_rate": 0.0008790276760050624, "loss": 2.4847, "step": 2175 }, { "epoch": 0.24951267056530213, "grad_norm": 0.2859445171250938, "learning_rate": 0.0008789065412260793, "loss": 2.5623, "step": 2176 }, { "epoch": 0.24962733631464282, "grad_norm": 0.2811702279116988, "learning_rate": 0.0008787853541841614, "loss": 2.4792, "step": 2177 }, { "epoch": 0.24974200206398348, "grad_norm": 0.2906677192542289, "learning_rate": 0.0008786641148960243, "loss": 2.5391, "step": 2178 }, { "epoch": 0.24985666781332416, "grad_norm": 0.2531651188370428, "learning_rate": 0.0008785428233783905, "loss": 2.5464, "step": 2179 }, { "epoch": 0.24997133356266482, "grad_norm": 0.3326303153761615, "learning_rate": 0.0008784214796479899, "loss": 2.5027, "step": 2180 }, { "epoch": 0.2500859993120055, "grad_norm": 0.3193515188542318, "learning_rate": 0.0008783000837215596, "loss": 2.6335, "step": 2181 }, { "epoch": 0.2502006650613462, "grad_norm": 0.30926487274517483, "learning_rate": 0.0008781786356158437, "loss": 2.4585, "step": 2182 }, { "epoch": 0.25031533081068685, "grad_norm": 0.2715811566625429, "learning_rate": 0.0008780571353475939, "loss": 2.5039, "step": 2183 }, { "epoch": 0.2504299965600275, "grad_norm": 0.2964901218750409, "learning_rate": 0.0008779355829335684, "loss": 2.6953, "step": 2184 }, { "epoch": 0.25054466230936817, "grad_norm": 0.24544218550243066, "learning_rate": 0.0008778139783905337, "loss": 2.5912, "step": 2185 }, { "epoch": 0.2506593280587089, "grad_norm": 0.2529431259467698, "learning_rate": 0.0008776923217352624, "loss": 2.6229, "step": 2186 }, { "epoch": 0.25077399380804954, "grad_norm": 0.25739451217442405, "learning_rate": 0.0008775706129845347, "loss": 2.4644, "step": 2187 }, { "epoch": 0.2508886595573902, "grad_norm": 0.3060219737497598, "learning_rate": 0.0008774488521551381, "loss": 2.5758, "step": 2188 }, { "epoch": 0.25100332530673086, "grad_norm": 0.23999009094817067, "learning_rate": 0.0008773270392638671, "loss": 2.5606, "step": 2189 }, { "epoch": 0.2511179910560716, "grad_norm": 0.27697438838967164, "learning_rate": 0.0008772051743275237, "loss": 2.5119, "step": 2190 }, { "epoch": 0.25123265680541224, "grad_norm": 0.234245135757513, "learning_rate": 0.0008770832573629166, "loss": 2.4491, "step": 2191 }, { "epoch": 0.2513473225547529, "grad_norm": 0.2825508410145344, "learning_rate": 0.000876961288386862, "loss": 2.494, "step": 2192 }, { "epoch": 0.25146198830409355, "grad_norm": 0.2800573382234421, "learning_rate": 0.0008768392674161833, "loss": 2.5834, "step": 2193 }, { "epoch": 0.2515766540534342, "grad_norm": 0.2633897696161971, "learning_rate": 0.0008767171944677108, "loss": 2.5069, "step": 2194 }, { "epoch": 0.2516913198027749, "grad_norm": 0.28697020795131756, "learning_rate": 0.0008765950695582821, "loss": 2.4781, "step": 2195 }, { "epoch": 0.2518059855521156, "grad_norm": 0.277875981133512, "learning_rate": 0.0008764728927047423, "loss": 2.7404, "step": 2196 }, { "epoch": 0.25192065130145624, "grad_norm": 0.31508805771299275, "learning_rate": 0.0008763506639239432, "loss": 2.68, "step": 2197 }, { "epoch": 0.2520353170507969, "grad_norm": 0.2648019354850289, "learning_rate": 0.0008762283832327436, "loss": 2.5557, "step": 2198 }, { "epoch": 0.2521499828001376, "grad_norm": 0.2807908635015067, "learning_rate": 0.0008761060506480103, "loss": 2.5822, "step": 2199 }, { "epoch": 0.2522646485494783, "grad_norm": 0.2484454147193635, "learning_rate": 0.0008759836661866165, "loss": 2.6415, "step": 2200 }, { "epoch": 0.25237931429881894, "grad_norm": 0.2508520028353344, "learning_rate": 0.0008758612298654429, "loss": 2.5816, "step": 2201 }, { "epoch": 0.2524939800481596, "grad_norm": 0.2656137039955321, "learning_rate": 0.0008757387417013772, "loss": 2.6365, "step": 2202 }, { "epoch": 0.2526086457975003, "grad_norm": 0.24477903790884062, "learning_rate": 0.0008756162017113144, "loss": 2.666, "step": 2203 }, { "epoch": 0.25272331154684097, "grad_norm": 0.257342758419661, "learning_rate": 0.0008754936099121565, "loss": 2.6132, "step": 2204 }, { "epoch": 0.2528379772961816, "grad_norm": 0.27160361774340414, "learning_rate": 0.0008753709663208125, "loss": 2.6301, "step": 2205 }, { "epoch": 0.2529526430455223, "grad_norm": 0.29788517351294885, "learning_rate": 0.0008752482709541989, "loss": 2.4578, "step": 2206 }, { "epoch": 0.253067308794863, "grad_norm": 0.2541502158599345, "learning_rate": 0.0008751255238292392, "loss": 2.7553, "step": 2207 }, { "epoch": 0.25318197454420366, "grad_norm": 0.25521331647142953, "learning_rate": 0.0008750027249628643, "loss": 2.6085, "step": 2208 }, { "epoch": 0.2532966402935443, "grad_norm": 0.2854386178227579, "learning_rate": 0.0008748798743720115, "loss": 2.5559, "step": 2209 }, { "epoch": 0.253411306042885, "grad_norm": 0.27181717228193636, "learning_rate": 0.0008747569720736257, "loss": 2.5418, "step": 2210 }, { "epoch": 0.25352597179222564, "grad_norm": 0.2782953203836333, "learning_rate": 0.0008746340180846595, "loss": 2.502, "step": 2211 }, { "epoch": 0.25364063754156635, "grad_norm": 0.24452727294337298, "learning_rate": 0.0008745110124220714, "loss": 2.4816, "step": 2212 }, { "epoch": 0.253755303290907, "grad_norm": 0.2634087885656492, "learning_rate": 0.000874387955102828, "loss": 2.6175, "step": 2213 }, { "epoch": 0.25386996904024767, "grad_norm": 0.28336580811993284, "learning_rate": 0.0008742648461439028, "loss": 2.5207, "step": 2214 }, { "epoch": 0.2539846347895883, "grad_norm": 0.30107090185932867, "learning_rate": 0.0008741416855622762, "loss": 2.5349, "step": 2215 }, { "epoch": 0.25409930053892904, "grad_norm": 0.2890848556663944, "learning_rate": 0.0008740184733749357, "loss": 2.5632, "step": 2216 }, { "epoch": 0.2542139662882697, "grad_norm": 0.2898358629216642, "learning_rate": 0.0008738952095988763, "loss": 2.4426, "step": 2217 }, { "epoch": 0.25432863203761036, "grad_norm": 0.28206572032378063, "learning_rate": 0.0008737718942510999, "loss": 2.5301, "step": 2218 }, { "epoch": 0.254443297786951, "grad_norm": 0.30775317701560984, "learning_rate": 0.0008736485273486155, "loss": 2.6642, "step": 2219 }, { "epoch": 0.25455796353629173, "grad_norm": 0.30761789488620694, "learning_rate": 0.000873525108908439, "loss": 2.5808, "step": 2220 }, { "epoch": 0.2546726292856324, "grad_norm": 0.27849907028762777, "learning_rate": 0.0008734016389475938, "loss": 2.5928, "step": 2221 }, { "epoch": 0.25478729503497305, "grad_norm": 0.2601712252443285, "learning_rate": 0.0008732781174831103, "loss": 2.6341, "step": 2222 }, { "epoch": 0.2549019607843137, "grad_norm": 0.29336267711412556, "learning_rate": 0.0008731545445320257, "loss": 2.6945, "step": 2223 }, { "epoch": 0.2550166265336544, "grad_norm": 0.26181899424102684, "learning_rate": 0.0008730309201113847, "loss": 2.6164, "step": 2224 }, { "epoch": 0.2551312922829951, "grad_norm": 0.29306490022846154, "learning_rate": 0.0008729072442382387, "loss": 2.6241, "step": 2225 }, { "epoch": 0.25524595803233574, "grad_norm": 0.2665302061003145, "learning_rate": 0.0008727835169296469, "loss": 2.5605, "step": 2226 }, { "epoch": 0.2553606237816764, "grad_norm": 0.2912004963267849, "learning_rate": 0.0008726597382026747, "loss": 2.6974, "step": 2227 }, { "epoch": 0.25547528953101706, "grad_norm": 0.2544195464220581, "learning_rate": 0.0008725359080743951, "loss": 2.5399, "step": 2228 }, { "epoch": 0.2555899552803578, "grad_norm": 0.26771490699631617, "learning_rate": 0.0008724120265618882, "loss": 2.5924, "step": 2229 }, { "epoch": 0.25570462102969843, "grad_norm": 0.2860306691778564, "learning_rate": 0.0008722880936822411, "loss": 2.6421, "step": 2230 }, { "epoch": 0.2558192867790391, "grad_norm": 0.26006800853774487, "learning_rate": 0.0008721641094525481, "loss": 2.6681, "step": 2231 }, { "epoch": 0.25593395252837975, "grad_norm": 0.24753378686516658, "learning_rate": 0.0008720400738899101, "loss": 2.6519, "step": 2232 }, { "epoch": 0.25604861827772046, "grad_norm": 0.2611555553673946, "learning_rate": 0.0008719159870114356, "loss": 2.7554, "step": 2233 }, { "epoch": 0.2561632840270611, "grad_norm": 0.2641734092054442, "learning_rate": 0.00087179184883424, "loss": 2.5378, "step": 2234 }, { "epoch": 0.2562779497764018, "grad_norm": 0.2862601053336579, "learning_rate": 0.000871667659375446, "loss": 2.5587, "step": 2235 }, { "epoch": 0.25639261552574244, "grad_norm": 0.26929362885664176, "learning_rate": 0.0008715434186521831, "loss": 2.7045, "step": 2236 }, { "epoch": 0.25650728127508315, "grad_norm": 0.25995581843538057, "learning_rate": 0.0008714191266815877, "loss": 2.6604, "step": 2237 }, { "epoch": 0.2566219470244238, "grad_norm": 0.26718769637286294, "learning_rate": 0.0008712947834808036, "loss": 2.7194, "step": 2238 }, { "epoch": 0.25673661277376447, "grad_norm": 0.2819896919993998, "learning_rate": 0.0008711703890669818, "loss": 2.7002, "step": 2239 }, { "epoch": 0.25685127852310513, "grad_norm": 0.28295472742410066, "learning_rate": 0.0008710459434572799, "loss": 2.5698, "step": 2240 }, { "epoch": 0.25696594427244585, "grad_norm": 0.26807300227378156, "learning_rate": 0.0008709214466688629, "loss": 2.4006, "step": 2241 }, { "epoch": 0.2570806100217865, "grad_norm": 0.29296859623661425, "learning_rate": 0.0008707968987189028, "loss": 2.6625, "step": 2242 }, { "epoch": 0.25719527577112716, "grad_norm": 0.2712407379275656, "learning_rate": 0.0008706722996245784, "loss": 2.5442, "step": 2243 }, { "epoch": 0.2573099415204678, "grad_norm": 0.2650985823525957, "learning_rate": 0.0008705476494030762, "loss": 2.536, "step": 2244 }, { "epoch": 0.2574246072698085, "grad_norm": 0.3120190406251584, "learning_rate": 0.0008704229480715887, "loss": 2.73, "step": 2245 }, { "epoch": 0.2575392730191492, "grad_norm": 0.3045619757284662, "learning_rate": 0.0008702981956473166, "loss": 2.489, "step": 2246 }, { "epoch": 0.25765393876848985, "grad_norm": 0.3015644488012433, "learning_rate": 0.0008701733921474671, "loss": 2.6156, "step": 2247 }, { "epoch": 0.2577686045178305, "grad_norm": 0.2896816265627592, "learning_rate": 0.0008700485375892539, "loss": 2.5312, "step": 2248 }, { "epoch": 0.25788327026717117, "grad_norm": 0.2664923768416574, "learning_rate": 0.000869923631989899, "loss": 2.5933, "step": 2249 }, { "epoch": 0.2579979360165119, "grad_norm": 0.27558176584652383, "learning_rate": 0.0008697986753666304, "loss": 2.5954, "step": 2250 }, { "epoch": 0.25811260176585255, "grad_norm": 0.28325361328164844, "learning_rate": 0.0008696736677366834, "loss": 2.5474, "step": 2251 }, { "epoch": 0.2582272675151932, "grad_norm": 0.275625344387118, "learning_rate": 0.0008695486091173008, "loss": 2.5564, "step": 2252 }, { "epoch": 0.25834193326453386, "grad_norm": 0.26234831296859634, "learning_rate": 0.0008694234995257318, "loss": 2.5326, "step": 2253 }, { "epoch": 0.2584565990138746, "grad_norm": 0.2711624295680704, "learning_rate": 0.0008692983389792326, "loss": 2.6239, "step": 2254 }, { "epoch": 0.25857126476321524, "grad_norm": 0.28714049442026357, "learning_rate": 0.0008691731274950671, "loss": 2.5635, "step": 2255 }, { "epoch": 0.2586859305125559, "grad_norm": 0.24116240418726115, "learning_rate": 0.0008690478650905059, "loss": 2.5789, "step": 2256 }, { "epoch": 0.25880059626189655, "grad_norm": 0.2339413263774728, "learning_rate": 0.0008689225517828263, "loss": 2.5411, "step": 2257 }, { "epoch": 0.25891526201123727, "grad_norm": 0.24285699025705562, "learning_rate": 0.000868797187589313, "loss": 2.5574, "step": 2258 }, { "epoch": 0.2590299277605779, "grad_norm": 0.2569789387457917, "learning_rate": 0.0008686717725272577, "loss": 2.5587, "step": 2259 }, { "epoch": 0.2591445935099186, "grad_norm": 0.26186626068656543, "learning_rate": 0.0008685463066139587, "loss": 2.5272, "step": 2260 }, { "epoch": 0.25925925925925924, "grad_norm": 0.28113688577406803, "learning_rate": 0.000868420789866722, "loss": 2.5089, "step": 2261 }, { "epoch": 0.2593739250085999, "grad_norm": 0.2674832989457913, "learning_rate": 0.00086829522230286, "loss": 2.5474, "step": 2262 }, { "epoch": 0.2594885907579406, "grad_norm": 0.279935803774432, "learning_rate": 0.0008681696039396924, "loss": 2.5312, "step": 2263 }, { "epoch": 0.2596032565072813, "grad_norm": 0.3140926597774166, "learning_rate": 0.0008680439347945459, "loss": 2.7051, "step": 2264 }, { "epoch": 0.25971792225662194, "grad_norm": 0.26318236477325124, "learning_rate": 0.0008679182148847542, "loss": 2.6129, "step": 2265 }, { "epoch": 0.2598325880059626, "grad_norm": 0.2534825062346113, "learning_rate": 0.000867792444227658, "loss": 2.3572, "step": 2266 }, { "epoch": 0.2599472537553033, "grad_norm": 0.25444406860824587, "learning_rate": 0.0008676666228406047, "loss": 2.6224, "step": 2267 }, { "epoch": 0.26006191950464397, "grad_norm": 0.27342413708333674, "learning_rate": 0.0008675407507409492, "loss": 2.6509, "step": 2268 }, { "epoch": 0.2601765852539846, "grad_norm": 0.263645666074119, "learning_rate": 0.0008674148279460532, "loss": 2.7187, "step": 2269 }, { "epoch": 0.2602912510033253, "grad_norm": 0.2747055708545287, "learning_rate": 0.0008672888544732851, "loss": 2.6211, "step": 2270 }, { "epoch": 0.260405916752666, "grad_norm": 0.2501465085484311, "learning_rate": 0.0008671628303400208, "loss": 2.6499, "step": 2271 }, { "epoch": 0.26052058250200666, "grad_norm": 0.2651447647654398, "learning_rate": 0.0008670367555636427, "loss": 2.6995, "step": 2272 }, { "epoch": 0.2606352482513473, "grad_norm": 0.2818071413622489, "learning_rate": 0.0008669106301615406, "loss": 2.5712, "step": 2273 }, { "epoch": 0.260749914000688, "grad_norm": 0.27795347653828967, "learning_rate": 0.0008667844541511109, "loss": 2.6053, "step": 2274 }, { "epoch": 0.2608645797500287, "grad_norm": 0.2706301706183267, "learning_rate": 0.0008666582275497575, "loss": 2.6176, "step": 2275 }, { "epoch": 0.26097924549936935, "grad_norm": 0.27947435276032123, "learning_rate": 0.0008665319503748908, "loss": 2.5361, "step": 2276 }, { "epoch": 0.26109391124871, "grad_norm": 0.28012157778219815, "learning_rate": 0.0008664056226439281, "loss": 2.5968, "step": 2277 }, { "epoch": 0.26120857699805067, "grad_norm": 0.2627926551931407, "learning_rate": 0.0008662792443742942, "loss": 2.592, "step": 2278 }, { "epoch": 0.2613232427473913, "grad_norm": 0.2687960150664113, "learning_rate": 0.0008661528155834203, "loss": 2.628, "step": 2279 }, { "epoch": 0.26143790849673204, "grad_norm": 0.24767276404619976, "learning_rate": 0.0008660263362887451, "loss": 2.5398, "step": 2280 }, { "epoch": 0.2615525742460727, "grad_norm": 0.27999600688641113, "learning_rate": 0.000865899806507714, "loss": 2.6205, "step": 2281 }, { "epoch": 0.26166723999541336, "grad_norm": 0.27008255050964314, "learning_rate": 0.0008657732262577791, "loss": 2.6563, "step": 2282 }, { "epoch": 0.261781905744754, "grad_norm": 0.2499751243536985, "learning_rate": 0.0008656465955564, "loss": 2.5624, "step": 2283 }, { "epoch": 0.26189657149409473, "grad_norm": 0.2702387919166331, "learning_rate": 0.0008655199144210428, "loss": 2.6124, "step": 2284 }, { "epoch": 0.2620112372434354, "grad_norm": 0.24898275034168982, "learning_rate": 0.0008653931828691808, "loss": 2.5676, "step": 2285 }, { "epoch": 0.26212590299277605, "grad_norm": 0.22982996965081692, "learning_rate": 0.0008652664009182945, "loss": 2.4897, "step": 2286 }, { "epoch": 0.2622405687421167, "grad_norm": 0.23881766012194008, "learning_rate": 0.0008651395685858708, "loss": 2.6736, "step": 2287 }, { "epoch": 0.2623552344914574, "grad_norm": 0.2621231316643747, "learning_rate": 0.0008650126858894035, "loss": 2.418, "step": 2288 }, { "epoch": 0.2624699002407981, "grad_norm": 0.25745968051603146, "learning_rate": 0.0008648857528463943, "loss": 2.6177, "step": 2289 }, { "epoch": 0.26258456599013874, "grad_norm": 0.2410131085682068, "learning_rate": 0.0008647587694743506, "loss": 2.6091, "step": 2290 }, { "epoch": 0.2626992317394794, "grad_norm": 0.2911608861533398, "learning_rate": 0.0008646317357907877, "loss": 2.5578, "step": 2291 }, { "epoch": 0.2628138974888201, "grad_norm": 0.2597313089724537, "learning_rate": 0.0008645046518132273, "loss": 2.604, "step": 2292 }, { "epoch": 0.2629285632381608, "grad_norm": 0.28612944571895904, "learning_rate": 0.0008643775175591983, "loss": 2.667, "step": 2293 }, { "epoch": 0.26304322898750143, "grad_norm": 0.29389120261015184, "learning_rate": 0.0008642503330462364, "loss": 2.5746, "step": 2294 }, { "epoch": 0.2631578947368421, "grad_norm": 0.32426563848643625, "learning_rate": 0.0008641230982918844, "loss": 2.4325, "step": 2295 }, { "epoch": 0.26327256048618275, "grad_norm": 0.3111532756914654, "learning_rate": 0.0008639958133136918, "loss": 2.5558, "step": 2296 }, { "epoch": 0.26338722623552346, "grad_norm": 0.3085588522037956, "learning_rate": 0.000863868478129215, "loss": 2.5575, "step": 2297 }, { "epoch": 0.2635018919848641, "grad_norm": 0.2987257402683066, "learning_rate": 0.0008637410927560176, "loss": 2.729, "step": 2298 }, { "epoch": 0.2636165577342048, "grad_norm": 0.2722103541175778, "learning_rate": 0.0008636136572116702, "loss": 2.7217, "step": 2299 }, { "epoch": 0.26373122348354544, "grad_norm": 0.24881535814502806, "learning_rate": 0.0008634861715137497, "loss": 2.5942, "step": 2300 }, { "epoch": 0.26384588923288615, "grad_norm": 0.2884808729743463, "learning_rate": 0.0008633586356798406, "loss": 2.6489, "step": 2301 }, { "epoch": 0.2639605549822268, "grad_norm": 0.24489913293384957, "learning_rate": 0.0008632310497275339, "loss": 2.5774, "step": 2302 }, { "epoch": 0.2640752207315675, "grad_norm": 0.2877128025810281, "learning_rate": 0.0008631034136744278, "loss": 2.615, "step": 2303 }, { "epoch": 0.26418988648090813, "grad_norm": 0.2547734183267323, "learning_rate": 0.0008629757275381272, "loss": 2.5753, "step": 2304 }, { "epoch": 0.26430455223024885, "grad_norm": 0.2446393197687182, "learning_rate": 0.0008628479913362438, "loss": 2.7021, "step": 2305 }, { "epoch": 0.2644192179795895, "grad_norm": 0.26814388699750946, "learning_rate": 0.0008627202050863966, "loss": 2.5921, "step": 2306 }, { "epoch": 0.26453388372893016, "grad_norm": 0.23947191148845884, "learning_rate": 0.0008625923688062112, "loss": 2.5164, "step": 2307 }, { "epoch": 0.2646485494782708, "grad_norm": 0.26722104557776455, "learning_rate": 0.0008624644825133201, "loss": 2.528, "step": 2308 }, { "epoch": 0.26476321522761154, "grad_norm": 0.24810937347291795, "learning_rate": 0.0008623365462253627, "loss": 2.4456, "step": 2309 }, { "epoch": 0.2648778809769522, "grad_norm": 0.24831882185878187, "learning_rate": 0.0008622085599599857, "loss": 2.5779, "step": 2310 }, { "epoch": 0.26499254672629285, "grad_norm": 0.29787307242777805, "learning_rate": 0.0008620805237348422, "loss": 2.4263, "step": 2311 }, { "epoch": 0.2651072124756335, "grad_norm": 0.2587598462078542, "learning_rate": 0.0008619524375675922, "loss": 2.5832, "step": 2312 }, { "epoch": 0.26522187822497423, "grad_norm": 0.3100564905846058, "learning_rate": 0.0008618243014759028, "loss": 2.5114, "step": 2313 }, { "epoch": 0.2653365439743149, "grad_norm": 0.2818893566801113, "learning_rate": 0.0008616961154774483, "loss": 2.5174, "step": 2314 }, { "epoch": 0.26545120972365555, "grad_norm": 0.25761873777185484, "learning_rate": 0.0008615678795899091, "loss": 2.6134, "step": 2315 }, { "epoch": 0.2655658754729962, "grad_norm": 0.2732215523199713, "learning_rate": 0.0008614395938309729, "loss": 2.618, "step": 2316 }, { "epoch": 0.26568054122233686, "grad_norm": 0.26976045020583034, "learning_rate": 0.0008613112582183345, "loss": 2.4406, "step": 2317 }, { "epoch": 0.2657952069716776, "grad_norm": 0.2572225278488332, "learning_rate": 0.0008611828727696953, "loss": 2.6612, "step": 2318 }, { "epoch": 0.26590987272101824, "grad_norm": 0.2846060532839916, "learning_rate": 0.0008610544375027636, "loss": 2.5487, "step": 2319 }, { "epoch": 0.2660245384703589, "grad_norm": 0.23319881504909837, "learning_rate": 0.0008609259524352544, "loss": 2.504, "step": 2320 }, { "epoch": 0.26613920421969955, "grad_norm": 0.2655935855591492, "learning_rate": 0.00086079741758489, "loss": 2.6042, "step": 2321 }, { "epoch": 0.26625386996904027, "grad_norm": 0.258056321688033, "learning_rate": 0.0008606688329693994, "loss": 2.4456, "step": 2322 }, { "epoch": 0.2663685357183809, "grad_norm": 0.3250098759672273, "learning_rate": 0.000860540198606518, "loss": 2.6761, "step": 2323 }, { "epoch": 0.2664832014677216, "grad_norm": 0.2853917234116672, "learning_rate": 0.0008604115145139889, "loss": 2.5193, "step": 2324 }, { "epoch": 0.26659786721706225, "grad_norm": 0.26109745392877237, "learning_rate": 0.0008602827807095614, "loss": 2.7035, "step": 2325 }, { "epoch": 0.26671253296640296, "grad_norm": 0.2739598735209245, "learning_rate": 0.000860153997210992, "loss": 2.608, "step": 2326 }, { "epoch": 0.2668271987157436, "grad_norm": 0.29390691633841487, "learning_rate": 0.0008600251640360438, "loss": 2.6331, "step": 2327 }, { "epoch": 0.2669418644650843, "grad_norm": 0.23810613046277532, "learning_rate": 0.0008598962812024868, "loss": 2.5662, "step": 2328 }, { "epoch": 0.26705653021442494, "grad_norm": 0.267632072156182, "learning_rate": 0.0008597673487280983, "loss": 2.5877, "step": 2329 }, { "epoch": 0.26717119596376565, "grad_norm": 0.2544187566098131, "learning_rate": 0.0008596383666306616, "loss": 2.4746, "step": 2330 }, { "epoch": 0.2672858617131063, "grad_norm": 0.2749768367889401, "learning_rate": 0.0008595093349279677, "loss": 2.6508, "step": 2331 }, { "epoch": 0.26740052746244697, "grad_norm": 0.25312208607294706, "learning_rate": 0.000859380253637814, "loss": 2.5226, "step": 2332 }, { "epoch": 0.2675151932117876, "grad_norm": 0.26215612093210444, "learning_rate": 0.0008592511227780045, "loss": 2.6001, "step": 2333 }, { "epoch": 0.2676298589611283, "grad_norm": 0.25599855025681517, "learning_rate": 0.0008591219423663506, "loss": 2.6444, "step": 2334 }, { "epoch": 0.267744524710469, "grad_norm": 0.2773340988173593, "learning_rate": 0.0008589927124206702, "loss": 2.5458, "step": 2335 }, { "epoch": 0.26785919045980966, "grad_norm": 0.2960624828024453, "learning_rate": 0.0008588634329587884, "loss": 2.5557, "step": 2336 }, { "epoch": 0.2679738562091503, "grad_norm": 0.3002920480819714, "learning_rate": 0.0008587341039985363, "loss": 2.6424, "step": 2337 }, { "epoch": 0.268088521958491, "grad_norm": 0.28600619522031384, "learning_rate": 0.0008586047255577527, "loss": 2.5821, "step": 2338 }, { "epoch": 0.2682031877078317, "grad_norm": 0.2689083665762978, "learning_rate": 0.000858475297654283, "loss": 2.4294, "step": 2339 }, { "epoch": 0.26831785345717235, "grad_norm": 0.2991149739333668, "learning_rate": 0.0008583458203059791, "loss": 2.5029, "step": 2340 }, { "epoch": 0.268432519206513, "grad_norm": 0.2342273778261057, "learning_rate": 0.0008582162935306998, "loss": 2.5249, "step": 2341 }, { "epoch": 0.26854718495585367, "grad_norm": 0.2785858019619248, "learning_rate": 0.0008580867173463112, "loss": 2.6476, "step": 2342 }, { "epoch": 0.2686618507051944, "grad_norm": 0.26287471423883446, "learning_rate": 0.0008579570917706857, "loss": 2.5578, "step": 2343 }, { "epoch": 0.26877651645453504, "grad_norm": 0.25362805040973957, "learning_rate": 0.0008578274168217026, "loss": 2.4611, "step": 2344 }, { "epoch": 0.2688911822038757, "grad_norm": 0.2800245801392496, "learning_rate": 0.0008576976925172481, "loss": 2.5901, "step": 2345 }, { "epoch": 0.26900584795321636, "grad_norm": 0.28218377782448223, "learning_rate": 0.0008575679188752154, "loss": 2.5458, "step": 2346 }, { "epoch": 0.2691205137025571, "grad_norm": 0.29359380738226193, "learning_rate": 0.0008574380959135042, "loss": 2.5408, "step": 2347 }, { "epoch": 0.26923517945189773, "grad_norm": 0.2895979524877854, "learning_rate": 0.0008573082236500209, "loss": 2.5359, "step": 2348 }, { "epoch": 0.2693498452012384, "grad_norm": 0.2780567330672685, "learning_rate": 0.0008571783021026791, "loss": 2.6347, "step": 2349 }, { "epoch": 0.26946451095057905, "grad_norm": 0.2875165221234477, "learning_rate": 0.000857048331289399, "loss": 2.4006, "step": 2350 }, { "epoch": 0.2695791766999197, "grad_norm": 0.26286409714740516, "learning_rate": 0.0008569183112281075, "loss": 2.4972, "step": 2351 }, { "epoch": 0.2696938424492604, "grad_norm": 0.282120865991687, "learning_rate": 0.0008567882419367386, "loss": 2.5622, "step": 2352 }, { "epoch": 0.2698085081986011, "grad_norm": 0.24898876873022102, "learning_rate": 0.0008566581234332327, "loss": 2.5247, "step": 2353 }, { "epoch": 0.26992317394794174, "grad_norm": 0.26425377192459965, "learning_rate": 0.000856527955735537, "loss": 2.484, "step": 2354 }, { "epoch": 0.2700378396972824, "grad_norm": 0.240228977936261, "learning_rate": 0.000856397738861606, "loss": 2.3536, "step": 2355 }, { "epoch": 0.2701525054466231, "grad_norm": 0.28021635249607196, "learning_rate": 0.0008562674728294004, "loss": 2.5926, "step": 2356 }, { "epoch": 0.2702671711959638, "grad_norm": 0.2650233129350482, "learning_rate": 0.0008561371576568881, "loss": 2.4835, "step": 2357 }, { "epoch": 0.27038183694530443, "grad_norm": 0.2633359009816506, "learning_rate": 0.0008560067933620435, "loss": 2.4433, "step": 2358 }, { "epoch": 0.2704965026946451, "grad_norm": 0.2946647492962772, "learning_rate": 0.0008558763799628477, "loss": 2.6765, "step": 2359 }, { "epoch": 0.2706111684439858, "grad_norm": 0.3010921807269685, "learning_rate": 0.000855745917477289, "loss": 2.5501, "step": 2360 }, { "epoch": 0.27072583419332646, "grad_norm": 0.25278395579767765, "learning_rate": 0.0008556154059233622, "loss": 2.5531, "step": 2361 }, { "epoch": 0.2708404999426671, "grad_norm": 0.28667903087024116, "learning_rate": 0.0008554848453190686, "loss": 2.5936, "step": 2362 }, { "epoch": 0.2709551656920078, "grad_norm": 0.2789194585401289, "learning_rate": 0.0008553542356824168, "loss": 2.526, "step": 2363 }, { "epoch": 0.2710698314413485, "grad_norm": 0.2579661629699175, "learning_rate": 0.0008552235770314221, "loss": 2.5519, "step": 2364 }, { "epoch": 0.27118449719068916, "grad_norm": 0.2981772464908079, "learning_rate": 0.0008550928693841058, "loss": 2.5796, "step": 2365 }, { "epoch": 0.2712991629400298, "grad_norm": 0.282944415161875, "learning_rate": 0.0008549621127584971, "loss": 2.4011, "step": 2366 }, { "epoch": 0.2714138286893705, "grad_norm": 0.290034883553547, "learning_rate": 0.000854831307172631, "loss": 2.5336, "step": 2367 }, { "epoch": 0.27152849443871113, "grad_norm": 0.2550067760669629, "learning_rate": 0.0008547004526445499, "loss": 2.5061, "step": 2368 }, { "epoch": 0.27164316018805185, "grad_norm": 0.25115199820867107, "learning_rate": 0.0008545695491923024, "loss": 2.4673, "step": 2369 }, { "epoch": 0.2717578259373925, "grad_norm": 0.25328329478881895, "learning_rate": 0.0008544385968339445, "loss": 2.5227, "step": 2370 }, { "epoch": 0.27187249168673316, "grad_norm": 0.23758367028112443, "learning_rate": 0.0008543075955875382, "loss": 2.561, "step": 2371 }, { "epoch": 0.2719871574360738, "grad_norm": 0.2661259078259569, "learning_rate": 0.0008541765454711527, "loss": 2.6191, "step": 2372 }, { "epoch": 0.27210182318541454, "grad_norm": 0.26616889714959324, "learning_rate": 0.0008540454465028643, "loss": 2.6842, "step": 2373 }, { "epoch": 0.2722164889347552, "grad_norm": 0.24992609194787788, "learning_rate": 0.0008539142987007551, "loss": 2.4991, "step": 2374 }, { "epoch": 0.27233115468409586, "grad_norm": 0.24783171434680837, "learning_rate": 0.0008537831020829147, "loss": 2.5491, "step": 2375 }, { "epoch": 0.2724458204334365, "grad_norm": 0.2525704602115128, "learning_rate": 0.0008536518566674389, "loss": 2.549, "step": 2376 }, { "epoch": 0.27256048618277723, "grad_norm": 0.30882216886242264, "learning_rate": 0.0008535205624724309, "loss": 2.5271, "step": 2377 }, { "epoch": 0.2726751519321179, "grad_norm": 0.26141228900484637, "learning_rate": 0.0008533892195159999, "loss": 2.5505, "step": 2378 }, { "epoch": 0.27278981768145855, "grad_norm": 0.2994381817516312, "learning_rate": 0.0008532578278162624, "loss": 2.6687, "step": 2379 }, { "epoch": 0.2729044834307992, "grad_norm": 0.28209418377683404, "learning_rate": 0.0008531263873913411, "loss": 2.545, "step": 2380 }, { "epoch": 0.2730191491801399, "grad_norm": 0.2784338902283846, "learning_rate": 0.0008529948982593658, "loss": 2.538, "step": 2381 }, { "epoch": 0.2731338149294806, "grad_norm": 0.2884789419416564, "learning_rate": 0.0008528633604384733, "loss": 2.4972, "step": 2382 }, { "epoch": 0.27324848067882124, "grad_norm": 0.2631061396320721, "learning_rate": 0.0008527317739468061, "loss": 2.5626, "step": 2383 }, { "epoch": 0.2733631464281619, "grad_norm": 0.2521564988524757, "learning_rate": 0.0008526001388025145, "loss": 2.5278, "step": 2384 }, { "epoch": 0.27347781217750256, "grad_norm": 0.2549158276931651, "learning_rate": 0.0008524684550237549, "loss": 2.6384, "step": 2385 }, { "epoch": 0.27359247792684327, "grad_norm": 0.24669552814327803, "learning_rate": 0.0008523367226286907, "loss": 2.5763, "step": 2386 }, { "epoch": 0.27370714367618393, "grad_norm": 0.2678545522168121, "learning_rate": 0.0008522049416354915, "loss": 2.4326, "step": 2387 }, { "epoch": 0.2738218094255246, "grad_norm": 0.251078084078413, "learning_rate": 0.0008520731120623344, "loss": 2.5283, "step": 2388 }, { "epoch": 0.27393647517486525, "grad_norm": 0.282524954608787, "learning_rate": 0.0008519412339274027, "loss": 2.6199, "step": 2389 }, { "epoch": 0.27405114092420596, "grad_norm": 0.2524336125108421, "learning_rate": 0.0008518093072488863, "loss": 2.5556, "step": 2390 }, { "epoch": 0.2741658066735466, "grad_norm": 0.2512518467991055, "learning_rate": 0.000851677332044982, "loss": 2.6842, "step": 2391 }, { "epoch": 0.2742804724228873, "grad_norm": 0.2578260544313042, "learning_rate": 0.0008515453083338935, "loss": 2.6217, "step": 2392 }, { "epoch": 0.27439513817222794, "grad_norm": 0.2694193318946938, "learning_rate": 0.0008514132361338306, "loss": 2.6302, "step": 2393 }, { "epoch": 0.27450980392156865, "grad_norm": 0.26191327678962867, "learning_rate": 0.0008512811154630104, "loss": 2.6819, "step": 2394 }, { "epoch": 0.2746244696709093, "grad_norm": 0.2782602232038335, "learning_rate": 0.0008511489463396563, "loss": 2.5318, "step": 2395 }, { "epoch": 0.27473913542024997, "grad_norm": 0.2885139016158749, "learning_rate": 0.0008510167287819986, "loss": 2.6102, "step": 2396 }, { "epoch": 0.27485380116959063, "grad_norm": 0.2581976087460682, "learning_rate": 0.0008508844628082741, "loss": 2.5229, "step": 2397 }, { "epoch": 0.27496846691893134, "grad_norm": 0.2794809924836603, "learning_rate": 0.0008507521484367265, "loss": 2.6789, "step": 2398 }, { "epoch": 0.275083132668272, "grad_norm": 0.24471043596524703, "learning_rate": 0.0008506197856856059, "loss": 2.5583, "step": 2399 }, { "epoch": 0.27519779841761266, "grad_norm": 0.2961465808074545, "learning_rate": 0.0008504873745731694, "loss": 2.553, "step": 2400 }, { "epoch": 0.2753124641669533, "grad_norm": 0.24845955451060422, "learning_rate": 0.0008503549151176804, "loss": 2.5822, "step": 2401 }, { "epoch": 0.275427129916294, "grad_norm": 0.29059266575733805, "learning_rate": 0.0008502224073374092, "loss": 2.6134, "step": 2402 }, { "epoch": 0.2755417956656347, "grad_norm": 0.2573275862831089, "learning_rate": 0.0008500898512506328, "loss": 2.3948, "step": 2403 }, { "epoch": 0.27565646141497535, "grad_norm": 0.2708467629757828, "learning_rate": 0.0008499572468756347, "loss": 2.6952, "step": 2404 }, { "epoch": 0.275771127164316, "grad_norm": 0.2584709937525598, "learning_rate": 0.0008498245942307052, "loss": 2.4943, "step": 2405 }, { "epoch": 0.27588579291365667, "grad_norm": 0.2638199755017948, "learning_rate": 0.0008496918933341413, "loss": 2.6278, "step": 2406 }, { "epoch": 0.2760004586629974, "grad_norm": 0.284013308754895, "learning_rate": 0.0008495591442042463, "loss": 2.6087, "step": 2407 }, { "epoch": 0.27611512441233804, "grad_norm": 0.25285473429827365, "learning_rate": 0.0008494263468593307, "loss": 2.489, "step": 2408 }, { "epoch": 0.2762297901616787, "grad_norm": 0.27345790478784665, "learning_rate": 0.000849293501317711, "loss": 2.4463, "step": 2409 }, { "epoch": 0.27634445591101936, "grad_norm": 0.2798468559473655, "learning_rate": 0.000849160607597711, "loss": 2.5281, "step": 2410 }, { "epoch": 0.2764591216603601, "grad_norm": 0.2675057092775682, "learning_rate": 0.000849027665717661, "loss": 2.5024, "step": 2411 }, { "epoch": 0.27657378740970073, "grad_norm": 0.2803925449427268, "learning_rate": 0.0008488946756958973, "loss": 2.5563, "step": 2412 }, { "epoch": 0.2766884531590414, "grad_norm": 0.28868003010772186, "learning_rate": 0.0008487616375507639, "loss": 2.384, "step": 2413 }, { "epoch": 0.27680311890838205, "grad_norm": 0.30729004164670604, "learning_rate": 0.0008486285513006104, "loss": 2.5996, "step": 2414 }, { "epoch": 0.27691778465772277, "grad_norm": 0.29676006219158807, "learning_rate": 0.0008484954169637937, "loss": 2.601, "step": 2415 }, { "epoch": 0.2770324504070634, "grad_norm": 0.27217287440114984, "learning_rate": 0.0008483622345586774, "loss": 2.5239, "step": 2416 }, { "epoch": 0.2771471161564041, "grad_norm": 0.27321044490303814, "learning_rate": 0.0008482290041036309, "loss": 2.643, "step": 2417 }, { "epoch": 0.27726178190574474, "grad_norm": 0.25424309019211944, "learning_rate": 0.0008480957256170314, "loss": 2.5906, "step": 2418 }, { "epoch": 0.2773764476550854, "grad_norm": 0.25470424132020775, "learning_rate": 0.0008479623991172618, "loss": 2.526, "step": 2419 }, { "epoch": 0.2774911134044261, "grad_norm": 0.27683146470548986, "learning_rate": 0.000847829024622712, "loss": 2.6117, "step": 2420 }, { "epoch": 0.2776057791537668, "grad_norm": 0.2589440078264431, "learning_rate": 0.0008476956021517783, "loss": 2.6008, "step": 2421 }, { "epoch": 0.27772044490310743, "grad_norm": 0.27949741614070356, "learning_rate": 0.0008475621317228641, "loss": 2.4709, "step": 2422 }, { "epoch": 0.2778351106524481, "grad_norm": 0.2693507174978494, "learning_rate": 0.000847428613354379, "loss": 2.5444, "step": 2423 }, { "epoch": 0.2779497764017888, "grad_norm": 0.2556183818146168, "learning_rate": 0.0008472950470647393, "loss": 2.4278, "step": 2424 }, { "epoch": 0.27806444215112947, "grad_norm": 0.24387398362986776, "learning_rate": 0.0008471614328723678, "loss": 2.544, "step": 2425 }, { "epoch": 0.2781791079004701, "grad_norm": 0.2674125616777062, "learning_rate": 0.0008470277707956943, "loss": 2.5712, "step": 2426 }, { "epoch": 0.2782937736498108, "grad_norm": 0.26005974293490713, "learning_rate": 0.0008468940608531546, "loss": 2.5905, "step": 2427 }, { "epoch": 0.2784084393991515, "grad_norm": 0.27520008793186473, "learning_rate": 0.0008467603030631916, "loss": 2.4995, "step": 2428 }, { "epoch": 0.27852310514849216, "grad_norm": 0.2740718219058786, "learning_rate": 0.0008466264974442548, "loss": 2.6654, "step": 2429 }, { "epoch": 0.2786377708978328, "grad_norm": 0.2786869731536121, "learning_rate": 0.0008464926440147998, "loss": 2.6526, "step": 2430 }, { "epoch": 0.2787524366471735, "grad_norm": 0.2865174008230928, "learning_rate": 0.0008463587427932895, "loss": 2.5338, "step": 2431 }, { "epoch": 0.2788671023965142, "grad_norm": 0.3013424398143583, "learning_rate": 0.0008462247937981928, "loss": 2.6616, "step": 2432 }, { "epoch": 0.27898176814585485, "grad_norm": 0.27499385160668566, "learning_rate": 0.0008460907970479853, "loss": 2.5329, "step": 2433 }, { "epoch": 0.2790964338951955, "grad_norm": 0.25946737896770045, "learning_rate": 0.0008459567525611496, "loss": 2.59, "step": 2434 }, { "epoch": 0.27921109964453616, "grad_norm": 0.24892950398149521, "learning_rate": 0.0008458226603561742, "loss": 2.5647, "step": 2435 }, { "epoch": 0.2793257653938768, "grad_norm": 0.2830585037696782, "learning_rate": 0.0008456885204515549, "loss": 2.5004, "step": 2436 }, { "epoch": 0.27944043114321754, "grad_norm": 0.2696320895904184, "learning_rate": 0.0008455543328657937, "loss": 2.7736, "step": 2437 }, { "epoch": 0.2795550968925582, "grad_norm": 0.24828188271491208, "learning_rate": 0.0008454200976173991, "loss": 2.6013, "step": 2438 }, { "epoch": 0.27966976264189886, "grad_norm": 0.26321309516174424, "learning_rate": 0.0008452858147248863, "loss": 2.6673, "step": 2439 }, { "epoch": 0.2797844283912395, "grad_norm": 0.23956378267391462, "learning_rate": 0.0008451514842067771, "loss": 2.6743, "step": 2440 }, { "epoch": 0.27989909414058023, "grad_norm": 0.27328229047816405, "learning_rate": 0.0008450171060815999, "loss": 2.5108, "step": 2441 }, { "epoch": 0.2800137598899209, "grad_norm": 0.2776195209007553, "learning_rate": 0.0008448826803678896, "loss": 2.6933, "step": 2442 }, { "epoch": 0.28012842563926155, "grad_norm": 0.26376373959280863, "learning_rate": 0.0008447482070841875, "loss": 2.5415, "step": 2443 }, { "epoch": 0.2802430913886022, "grad_norm": 0.2623112008839188, "learning_rate": 0.0008446136862490417, "loss": 2.644, "step": 2444 }, { "epoch": 0.2803577571379429, "grad_norm": 0.2511333606686069, "learning_rate": 0.0008444791178810068, "loss": 2.6078, "step": 2445 }, { "epoch": 0.2804724228872836, "grad_norm": 0.2637196874034492, "learning_rate": 0.0008443445019986441, "loss": 2.6085, "step": 2446 }, { "epoch": 0.28058708863662424, "grad_norm": 0.25387644452304403, "learning_rate": 0.0008442098386205211, "loss": 2.5923, "step": 2447 }, { "epoch": 0.2807017543859649, "grad_norm": 0.2637723568440843, "learning_rate": 0.0008440751277652122, "loss": 2.5806, "step": 2448 }, { "epoch": 0.2808164201353056, "grad_norm": 0.2490812290960805, "learning_rate": 0.0008439403694512978, "loss": 2.696, "step": 2449 }, { "epoch": 0.28093108588464627, "grad_norm": 0.24113497441315793, "learning_rate": 0.0008438055636973657, "loss": 2.659, "step": 2450 }, { "epoch": 0.28104575163398693, "grad_norm": 0.2521301586845647, "learning_rate": 0.0008436707105220096, "loss": 2.4955, "step": 2451 }, { "epoch": 0.2811604173833276, "grad_norm": 0.27395494839982504, "learning_rate": 0.00084353580994383, "loss": 2.6096, "step": 2452 }, { "epoch": 0.28127508313266825, "grad_norm": 0.2756061744104837, "learning_rate": 0.0008434008619814337, "loss": 2.6378, "step": 2453 }, { "epoch": 0.28138974888200896, "grad_norm": 0.27729473882185857, "learning_rate": 0.0008432658666534345, "loss": 2.6098, "step": 2454 }, { "epoch": 0.2815044146313496, "grad_norm": 0.2626666974994579, "learning_rate": 0.0008431308239784521, "loss": 2.5488, "step": 2455 }, { "epoch": 0.2816190803806903, "grad_norm": 0.2538847254037964, "learning_rate": 0.0008429957339751132, "loss": 2.5729, "step": 2456 }, { "epoch": 0.28173374613003094, "grad_norm": 0.2843633830913365, "learning_rate": 0.0008428605966620508, "loss": 2.5347, "step": 2457 }, { "epoch": 0.28184841187937165, "grad_norm": 0.2644753301904815, "learning_rate": 0.0008427254120579047, "loss": 2.4835, "step": 2458 }, { "epoch": 0.2819630776287123, "grad_norm": 0.2729346014066077, "learning_rate": 0.0008425901801813212, "loss": 2.5508, "step": 2459 }, { "epoch": 0.28207774337805297, "grad_norm": 0.28233038553180995, "learning_rate": 0.0008424549010509524, "loss": 2.5295, "step": 2460 }, { "epoch": 0.28219240912739363, "grad_norm": 0.2877390788139464, "learning_rate": 0.0008423195746854578, "loss": 2.682, "step": 2461 }, { "epoch": 0.28230707487673434, "grad_norm": 0.2766417386296377, "learning_rate": 0.000842184201103503, "loss": 2.5467, "step": 2462 }, { "epoch": 0.282421740626075, "grad_norm": 0.26270755010262625, "learning_rate": 0.0008420487803237604, "loss": 2.5227, "step": 2463 }, { "epoch": 0.28253640637541566, "grad_norm": 0.262026148325715, "learning_rate": 0.0008419133123649088, "loss": 2.7123, "step": 2464 }, { "epoch": 0.2826510721247563, "grad_norm": 0.2601549999321762, "learning_rate": 0.0008417777972456328, "loss": 2.5418, "step": 2465 }, { "epoch": 0.28276573787409703, "grad_norm": 0.27153287650376556, "learning_rate": 0.0008416422349846249, "loss": 2.4264, "step": 2466 }, { "epoch": 0.2828804036234377, "grad_norm": 0.265032650510231, "learning_rate": 0.0008415066256005827, "loss": 2.4691, "step": 2467 }, { "epoch": 0.28299506937277835, "grad_norm": 0.2567405159664836, "learning_rate": 0.0008413709691122115, "loss": 2.4004, "step": 2468 }, { "epoch": 0.283109735122119, "grad_norm": 0.27719165538156515, "learning_rate": 0.0008412352655382221, "loss": 2.6787, "step": 2469 }, { "epoch": 0.28322440087145967, "grad_norm": 0.2812957518489372, "learning_rate": 0.0008410995148973323, "loss": 2.6341, "step": 2470 }, { "epoch": 0.2833390666208004, "grad_norm": 0.281271010793345, "learning_rate": 0.0008409637172082664, "loss": 2.4621, "step": 2471 }, { "epoch": 0.28345373237014104, "grad_norm": 0.2587891989115105, "learning_rate": 0.0008408278724897551, "loss": 2.5004, "step": 2472 }, { "epoch": 0.2835683981194817, "grad_norm": 0.271106795596705, "learning_rate": 0.0008406919807605356, "loss": 2.6544, "step": 2473 }, { "epoch": 0.28368306386882236, "grad_norm": 0.2552763136343683, "learning_rate": 0.0008405560420393515, "loss": 2.5887, "step": 2474 }, { "epoch": 0.2837977296181631, "grad_norm": 0.2670747247445295, "learning_rate": 0.000840420056344953, "loss": 2.5077, "step": 2475 }, { "epoch": 0.28391239536750373, "grad_norm": 0.26887812616498585, "learning_rate": 0.0008402840236960967, "loss": 2.44, "step": 2476 }, { "epoch": 0.2840270611168444, "grad_norm": 0.23851366647159697, "learning_rate": 0.0008401479441115456, "loss": 2.6145, "step": 2477 }, { "epoch": 0.28414172686618505, "grad_norm": 0.2588460697419801, "learning_rate": 0.0008400118176100697, "loss": 2.4538, "step": 2478 }, { "epoch": 0.28425639261552577, "grad_norm": 0.2537866070969727, "learning_rate": 0.0008398756442104446, "loss": 2.5388, "step": 2479 }, { "epoch": 0.2843710583648664, "grad_norm": 0.25674138356251236, "learning_rate": 0.0008397394239314529, "loss": 2.5838, "step": 2480 }, { "epoch": 0.2844857241142071, "grad_norm": 0.22532047471017413, "learning_rate": 0.0008396031567918839, "loss": 2.5809, "step": 2481 }, { "epoch": 0.28460038986354774, "grad_norm": 0.2284490132812174, "learning_rate": 0.0008394668428105327, "loss": 2.591, "step": 2482 }, { "epoch": 0.28471505561288846, "grad_norm": 0.240063483113093, "learning_rate": 0.0008393304820062016, "loss": 2.5132, "step": 2483 }, { "epoch": 0.2848297213622291, "grad_norm": 0.2509812631721475, "learning_rate": 0.0008391940743976984, "loss": 2.6037, "step": 2484 }, { "epoch": 0.2849443871115698, "grad_norm": 0.28944807384776783, "learning_rate": 0.0008390576200038385, "loss": 2.6257, "step": 2485 }, { "epoch": 0.28505905286091043, "grad_norm": 0.24861666331172017, "learning_rate": 0.0008389211188434429, "loss": 2.6277, "step": 2486 }, { "epoch": 0.2851737186102511, "grad_norm": 0.2732205773122366, "learning_rate": 0.0008387845709353392, "loss": 2.6306, "step": 2487 }, { "epoch": 0.2852883843595918, "grad_norm": 0.2896018687748785, "learning_rate": 0.000838647976298362, "loss": 2.5352, "step": 2488 }, { "epoch": 0.28540305010893247, "grad_norm": 0.2745605087443794, "learning_rate": 0.0008385113349513516, "loss": 2.4832, "step": 2489 }, { "epoch": 0.2855177158582731, "grad_norm": 0.27181938832496855, "learning_rate": 0.0008383746469131551, "loss": 2.4318, "step": 2490 }, { "epoch": 0.2856323816076138, "grad_norm": 0.2923342484725118, "learning_rate": 0.0008382379122026263, "loss": 2.5815, "step": 2491 }, { "epoch": 0.2857470473569545, "grad_norm": 0.26771335676206004, "learning_rate": 0.0008381011308386246, "loss": 2.6396, "step": 2492 }, { "epoch": 0.28586171310629516, "grad_norm": 0.24439132331250865, "learning_rate": 0.0008379643028400168, "loss": 2.5168, "step": 2493 }, { "epoch": 0.2859763788556358, "grad_norm": 0.26464200707960456, "learning_rate": 0.0008378274282256757, "loss": 2.5411, "step": 2494 }, { "epoch": 0.2860910446049765, "grad_norm": 0.25397448550067075, "learning_rate": 0.0008376905070144804, "loss": 2.6693, "step": 2495 }, { "epoch": 0.2862057103543172, "grad_norm": 0.25259471750924994, "learning_rate": 0.0008375535392253166, "loss": 2.5454, "step": 2496 }, { "epoch": 0.28632037610365785, "grad_norm": 0.24574643562235957, "learning_rate": 0.0008374165248770764, "loss": 2.5513, "step": 2497 }, { "epoch": 0.2864350418529985, "grad_norm": 0.24133489002653277, "learning_rate": 0.0008372794639886583, "loss": 2.5478, "step": 2498 }, { "epoch": 0.28654970760233917, "grad_norm": 0.23693809749911574, "learning_rate": 0.0008371423565789674, "loss": 2.6303, "step": 2499 }, { "epoch": 0.2866643733516799, "grad_norm": 0.2382040425841025, "learning_rate": 0.0008370052026669149, "loss": 2.4535, "step": 2500 }, { "epoch": 0.28677903910102054, "grad_norm": 0.26455669545078525, "learning_rate": 0.0008368680022714186, "loss": 2.5823, "step": 2501 }, { "epoch": 0.2868937048503612, "grad_norm": 0.24753695532485726, "learning_rate": 0.0008367307554114025, "loss": 2.5698, "step": 2502 }, { "epoch": 0.28700837059970186, "grad_norm": 0.24750414229584652, "learning_rate": 0.0008365934621057976, "loss": 2.6356, "step": 2503 }, { "epoch": 0.2871230363490425, "grad_norm": 0.28685584546240395, "learning_rate": 0.0008364561223735405, "loss": 2.6334, "step": 2504 }, { "epoch": 0.28723770209838323, "grad_norm": 0.23903842125290242, "learning_rate": 0.0008363187362335749, "loss": 2.4476, "step": 2505 }, { "epoch": 0.2873523678477239, "grad_norm": 0.2630605831270681, "learning_rate": 0.0008361813037048503, "loss": 2.5156, "step": 2506 }, { "epoch": 0.28746703359706455, "grad_norm": 0.24244475738613178, "learning_rate": 0.0008360438248063231, "loss": 2.6068, "step": 2507 }, { "epoch": 0.2875816993464052, "grad_norm": 0.2528370204111683, "learning_rate": 0.0008359062995569559, "loss": 2.5871, "step": 2508 }, { "epoch": 0.2876963650957459, "grad_norm": 0.2616128700634317, "learning_rate": 0.0008357687279757177, "loss": 2.5416, "step": 2509 }, { "epoch": 0.2878110308450866, "grad_norm": 0.23412276764895418, "learning_rate": 0.0008356311100815837, "loss": 2.49, "step": 2510 }, { "epoch": 0.28792569659442724, "grad_norm": 0.2677953432188376, "learning_rate": 0.0008354934458935357, "loss": 2.5526, "step": 2511 }, { "epoch": 0.2880403623437679, "grad_norm": 0.2855967137862939, "learning_rate": 0.0008353557354305621, "loss": 2.6344, "step": 2512 }, { "epoch": 0.2881550280931086, "grad_norm": 0.29922786567165965, "learning_rate": 0.0008352179787116572, "loss": 2.6045, "step": 2513 }, { "epoch": 0.28826969384244927, "grad_norm": 0.28152451197141004, "learning_rate": 0.000835080175755822, "loss": 2.6077, "step": 2514 }, { "epoch": 0.28838435959178993, "grad_norm": 0.2801391401577633, "learning_rate": 0.0008349423265820636, "loss": 2.5958, "step": 2515 }, { "epoch": 0.2884990253411306, "grad_norm": 0.24648791106938245, "learning_rate": 0.0008348044312093959, "loss": 2.6154, "step": 2516 }, { "epoch": 0.2886136910904713, "grad_norm": 0.27225314902474773, "learning_rate": 0.0008346664896568389, "loss": 2.606, "step": 2517 }, { "epoch": 0.28872835683981196, "grad_norm": 0.2873948014265642, "learning_rate": 0.000834528501943419, "loss": 2.7184, "step": 2518 }, { "epoch": 0.2888430225891526, "grad_norm": 0.26780405853681727, "learning_rate": 0.000834390468088169, "loss": 2.5804, "step": 2519 }, { "epoch": 0.2889576883384933, "grad_norm": 0.24182334785712256, "learning_rate": 0.0008342523881101279, "loss": 2.6582, "step": 2520 }, { "epoch": 0.28907235408783394, "grad_norm": 0.27523201646743733, "learning_rate": 0.0008341142620283412, "loss": 2.6939, "step": 2521 }, { "epoch": 0.28918701983717465, "grad_norm": 0.24884301518765278, "learning_rate": 0.0008339760898618611, "loss": 2.5228, "step": 2522 }, { "epoch": 0.2893016855865153, "grad_norm": 0.24056876252172377, "learning_rate": 0.0008338378716297454, "loss": 2.6042, "step": 2523 }, { "epoch": 0.28941635133585597, "grad_norm": 0.25163468894240254, "learning_rate": 0.0008336996073510589, "loss": 2.5245, "step": 2524 }, { "epoch": 0.28953101708519663, "grad_norm": 0.2598603076723986, "learning_rate": 0.0008335612970448723, "loss": 2.5671, "step": 2525 }, { "epoch": 0.28964568283453734, "grad_norm": 0.26415173104784145, "learning_rate": 0.0008334229407302632, "loss": 2.5604, "step": 2526 }, { "epoch": 0.289760348583878, "grad_norm": 0.2675244020569685, "learning_rate": 0.000833284538426315, "loss": 2.5647, "step": 2527 }, { "epoch": 0.28987501433321866, "grad_norm": 0.28987122631968115, "learning_rate": 0.0008331460901521178, "loss": 2.561, "step": 2528 }, { "epoch": 0.2899896800825593, "grad_norm": 0.2887040856180903, "learning_rate": 0.0008330075959267677, "loss": 2.6132, "step": 2529 }, { "epoch": 0.29010434583190003, "grad_norm": 0.2873896980901336, "learning_rate": 0.0008328690557693674, "loss": 2.5155, "step": 2530 }, { "epoch": 0.2902190115812407, "grad_norm": 0.258179408833659, "learning_rate": 0.000832730469699026, "loss": 2.4676, "step": 2531 }, { "epoch": 0.29033367733058135, "grad_norm": 0.2714605469391852, "learning_rate": 0.0008325918377348587, "loss": 2.5601, "step": 2532 }, { "epoch": 0.290448343079922, "grad_norm": 0.2435376709444652, "learning_rate": 0.0008324531598959871, "loss": 2.5328, "step": 2533 }, { "epoch": 0.2905630088292627, "grad_norm": 0.27102761470688463, "learning_rate": 0.0008323144362015393, "loss": 2.6384, "step": 2534 }, { "epoch": 0.2906776745786034, "grad_norm": 0.2669748822496197, "learning_rate": 0.0008321756666706495, "loss": 2.6509, "step": 2535 }, { "epoch": 0.29079234032794404, "grad_norm": 0.27142868317612345, "learning_rate": 0.0008320368513224584, "loss": 2.5714, "step": 2536 }, { "epoch": 0.2909070060772847, "grad_norm": 0.29845158420613227, "learning_rate": 0.0008318979901761128, "loss": 2.6769, "step": 2537 }, { "epoch": 0.29102167182662536, "grad_norm": 0.23053483844719486, "learning_rate": 0.000831759083250766, "loss": 2.5696, "step": 2538 }, { "epoch": 0.2911363375759661, "grad_norm": 0.2582694574957293, "learning_rate": 0.0008316201305655775, "loss": 2.5023, "step": 2539 }, { "epoch": 0.29125100332530673, "grad_norm": 0.2686303123294477, "learning_rate": 0.0008314811321397134, "loss": 2.5818, "step": 2540 }, { "epoch": 0.2913656690746474, "grad_norm": 0.2553653961158758, "learning_rate": 0.0008313420879923456, "loss": 2.5225, "step": 2541 }, { "epoch": 0.29148033482398805, "grad_norm": 0.26972664974837973, "learning_rate": 0.0008312029981426528, "loss": 2.5773, "step": 2542 }, { "epoch": 0.29159500057332877, "grad_norm": 0.26307320502763415, "learning_rate": 0.0008310638626098196, "loss": 2.5796, "step": 2543 }, { "epoch": 0.2917096663226694, "grad_norm": 0.2698760819557667, "learning_rate": 0.0008309246814130372, "loss": 2.5995, "step": 2544 }, { "epoch": 0.2918243320720101, "grad_norm": 0.2574911402894862, "learning_rate": 0.0008307854545715032, "loss": 2.6013, "step": 2545 }, { "epoch": 0.29193899782135074, "grad_norm": 0.25379289111904746, "learning_rate": 0.0008306461821044209, "loss": 2.5271, "step": 2546 }, { "epoch": 0.29205366357069146, "grad_norm": 0.2297227331859884, "learning_rate": 0.0008305068640310006, "loss": 2.748, "step": 2547 }, { "epoch": 0.2921683293200321, "grad_norm": 0.24517836060190237, "learning_rate": 0.0008303675003704583, "loss": 2.4866, "step": 2548 }, { "epoch": 0.2922829950693728, "grad_norm": 0.26780788251948234, "learning_rate": 0.0008302280911420167, "loss": 2.5668, "step": 2549 }, { "epoch": 0.29239766081871343, "grad_norm": 0.24178008615090107, "learning_rate": 0.0008300886363649048, "loss": 2.5103, "step": 2550 }, { "epoch": 0.29251232656805415, "grad_norm": 0.2671598309721243, "learning_rate": 0.0008299491360583574, "loss": 2.3844, "step": 2551 }, { "epoch": 0.2926269923173948, "grad_norm": 0.23935761227462785, "learning_rate": 0.000829809590241616, "loss": 2.5809, "step": 2552 }, { "epoch": 0.29274165806673547, "grad_norm": 0.27754340309061803, "learning_rate": 0.0008296699989339287, "loss": 2.6769, "step": 2553 }, { "epoch": 0.2928563238160761, "grad_norm": 0.24351411458797384, "learning_rate": 0.000829530362154549, "loss": 2.4271, "step": 2554 }, { "epoch": 0.2929709895654168, "grad_norm": 0.2676976197480251, "learning_rate": 0.0008293906799227371, "loss": 2.4924, "step": 2555 }, { "epoch": 0.2930856553147575, "grad_norm": 0.2540753797965595, "learning_rate": 0.0008292509522577599, "loss": 2.5094, "step": 2556 }, { "epoch": 0.29320032106409816, "grad_norm": 0.24750244288415035, "learning_rate": 0.0008291111791788897, "loss": 2.5268, "step": 2557 }, { "epoch": 0.2933149868134388, "grad_norm": 0.2642507582435455, "learning_rate": 0.0008289713607054059, "loss": 2.6035, "step": 2558 }, { "epoch": 0.2934296525627795, "grad_norm": 0.26800905287740867, "learning_rate": 0.0008288314968565938, "loss": 2.5571, "step": 2559 }, { "epoch": 0.2935443183121202, "grad_norm": 0.27667328908230365, "learning_rate": 0.0008286915876517444, "loss": 2.5551, "step": 2560 }, { "epoch": 0.29365898406146085, "grad_norm": 0.2507592239871691, "learning_rate": 0.0008285516331101563, "loss": 2.5304, "step": 2561 }, { "epoch": 0.2937736498108015, "grad_norm": 0.26084774321358145, "learning_rate": 0.0008284116332511329, "loss": 2.6026, "step": 2562 }, { "epoch": 0.29388831556014217, "grad_norm": 0.2801695105940615, "learning_rate": 0.0008282715880939851, "loss": 2.4756, "step": 2563 }, { "epoch": 0.2940029813094829, "grad_norm": 0.2459966038896636, "learning_rate": 0.0008281314976580289, "loss": 2.5764, "step": 2564 }, { "epoch": 0.29411764705882354, "grad_norm": 0.278512867919984, "learning_rate": 0.0008279913619625874, "loss": 2.578, "step": 2565 }, { "epoch": 0.2942323128081642, "grad_norm": 0.27075830268459006, "learning_rate": 0.0008278511810269896, "loss": 2.4781, "step": 2566 }, { "epoch": 0.29434697855750486, "grad_norm": 0.24267260078221942, "learning_rate": 0.0008277109548705708, "loss": 2.4221, "step": 2567 }, { "epoch": 0.29446164430684557, "grad_norm": 0.2764497208632351, "learning_rate": 0.0008275706835126726, "loss": 2.5206, "step": 2568 }, { "epoch": 0.29457631005618623, "grad_norm": 0.2521141230184501, "learning_rate": 0.0008274303669726426, "loss": 2.477, "step": 2569 }, { "epoch": 0.2946909758055269, "grad_norm": 0.2615121583425356, "learning_rate": 0.0008272900052698349, "loss": 2.5262, "step": 2570 }, { "epoch": 0.29480564155486755, "grad_norm": 0.2838419722726068, "learning_rate": 0.0008271495984236096, "loss": 2.5906, "step": 2571 }, { "epoch": 0.2949203073042082, "grad_norm": 0.29428895214928474, "learning_rate": 0.0008270091464533333, "loss": 2.5638, "step": 2572 }, { "epoch": 0.2950349730535489, "grad_norm": 0.2516902648675629, "learning_rate": 0.0008268686493783786, "loss": 2.5653, "step": 2573 }, { "epoch": 0.2951496388028896, "grad_norm": 0.2686053861533405, "learning_rate": 0.0008267281072181245, "loss": 2.3053, "step": 2574 }, { "epoch": 0.29526430455223024, "grad_norm": 0.24311864073122846, "learning_rate": 0.0008265875199919558, "loss": 2.4381, "step": 2575 }, { "epoch": 0.2953789703015709, "grad_norm": 0.25337583157589805, "learning_rate": 0.0008264468877192641, "loss": 2.4448, "step": 2576 }, { "epoch": 0.2954936360509116, "grad_norm": 0.23407568226072326, "learning_rate": 0.000826306210419447, "loss": 2.5709, "step": 2577 }, { "epoch": 0.29560830180025227, "grad_norm": 0.25538288865585457, "learning_rate": 0.0008261654881119081, "loss": 2.6779, "step": 2578 }, { "epoch": 0.29572296754959293, "grad_norm": 0.2574883768889266, "learning_rate": 0.0008260247208160574, "loss": 2.4956, "step": 2579 }, { "epoch": 0.2958376332989336, "grad_norm": 0.23959193148638167, "learning_rate": 0.000825883908551311, "loss": 2.52, "step": 2580 }, { "epoch": 0.2959522990482743, "grad_norm": 0.26020538219078226, "learning_rate": 0.0008257430513370914, "loss": 2.542, "step": 2581 }, { "epoch": 0.29606696479761496, "grad_norm": 0.2514207975682644, "learning_rate": 0.000825602149192827, "loss": 2.4919, "step": 2582 }, { "epoch": 0.2961816305469556, "grad_norm": 0.23948799311614954, "learning_rate": 0.0008254612021379526, "loss": 2.6958, "step": 2583 }, { "epoch": 0.2962962962962963, "grad_norm": 0.2616184053597987, "learning_rate": 0.0008253202101919095, "loss": 2.6454, "step": 2584 }, { "epoch": 0.296410962045637, "grad_norm": 0.272890953355618, "learning_rate": 0.0008251791733741442, "loss": 2.6542, "step": 2585 }, { "epoch": 0.29652562779497765, "grad_norm": 0.264928550882729, "learning_rate": 0.0008250380917041107, "loss": 2.5926, "step": 2586 }, { "epoch": 0.2966402935443183, "grad_norm": 0.249725572599277, "learning_rate": 0.0008248969652012681, "loss": 2.4806, "step": 2587 }, { "epoch": 0.29675495929365897, "grad_norm": 0.26393468016124505, "learning_rate": 0.0008247557938850824, "loss": 2.5642, "step": 2588 }, { "epoch": 0.29686962504299963, "grad_norm": 0.2740089980255711, "learning_rate": 0.0008246145777750253, "loss": 2.3383, "step": 2589 }, { "epoch": 0.29698429079234034, "grad_norm": 0.2750800957963272, "learning_rate": 0.0008244733168905748, "loss": 2.5697, "step": 2590 }, { "epoch": 0.297098956541681, "grad_norm": 0.2741802728961887, "learning_rate": 0.0008243320112512153, "loss": 2.6756, "step": 2591 }, { "epoch": 0.29721362229102166, "grad_norm": 0.2766665595057287, "learning_rate": 0.0008241906608764373, "loss": 2.5277, "step": 2592 }, { "epoch": 0.2973282880403623, "grad_norm": 0.2677089262380423, "learning_rate": 0.000824049265785737, "loss": 2.5989, "step": 2593 }, { "epoch": 0.29744295378970304, "grad_norm": 0.2618647860442471, "learning_rate": 0.0008239078259986177, "loss": 2.4987, "step": 2594 }, { "epoch": 0.2975576195390437, "grad_norm": 0.24787801235604665, "learning_rate": 0.0008237663415345879, "loss": 2.6281, "step": 2595 }, { "epoch": 0.29767228528838435, "grad_norm": 0.2462064791589125, "learning_rate": 0.0008236248124131629, "loss": 2.5271, "step": 2596 }, { "epoch": 0.297786951037725, "grad_norm": 0.25919899633384147, "learning_rate": 0.0008234832386538639, "loss": 2.5578, "step": 2597 }, { "epoch": 0.2979016167870657, "grad_norm": 0.2410867345853169, "learning_rate": 0.0008233416202762182, "loss": 2.5262, "step": 2598 }, { "epoch": 0.2980162825364064, "grad_norm": 0.2500468643598101, "learning_rate": 0.0008231999572997595, "loss": 2.5121, "step": 2599 }, { "epoch": 0.29813094828574704, "grad_norm": 0.25563868695159186, "learning_rate": 0.0008230582497440273, "loss": 2.4701, "step": 2600 }, { "epoch": 0.2982456140350877, "grad_norm": 0.25592953141750713, "learning_rate": 0.0008229164976285678, "loss": 2.5537, "step": 2601 }, { "epoch": 0.2983602797844284, "grad_norm": 0.2664228790988395, "learning_rate": 0.0008227747009729327, "loss": 2.4221, "step": 2602 }, { "epoch": 0.2984749455337691, "grad_norm": 0.26691023665909697, "learning_rate": 0.0008226328597966803, "loss": 2.721, "step": 2603 }, { "epoch": 0.29858961128310973, "grad_norm": 0.27734809539936495, "learning_rate": 0.0008224909741193747, "loss": 2.6227, "step": 2604 }, { "epoch": 0.2987042770324504, "grad_norm": 0.27451531824787123, "learning_rate": 0.0008223490439605865, "loss": 2.5723, "step": 2605 }, { "epoch": 0.29881894278179105, "grad_norm": 0.27516780950669967, "learning_rate": 0.0008222070693398924, "loss": 2.4942, "step": 2606 }, { "epoch": 0.29893360853113177, "grad_norm": 0.27411150746865875, "learning_rate": 0.0008220650502768748, "loss": 2.5202, "step": 2607 }, { "epoch": 0.2990482742804724, "grad_norm": 0.27486556650745697, "learning_rate": 0.0008219229867911224, "loss": 2.5505, "step": 2608 }, { "epoch": 0.2991629400298131, "grad_norm": 0.2843172835844898, "learning_rate": 0.0008217808789022308, "loss": 2.4929, "step": 2609 }, { "epoch": 0.29927760577915374, "grad_norm": 0.2549478895169563, "learning_rate": 0.0008216387266298004, "loss": 2.5998, "step": 2610 }, { "epoch": 0.29939227152849446, "grad_norm": 0.2539881219322232, "learning_rate": 0.0008214965299934386, "loss": 2.3953, "step": 2611 }, { "epoch": 0.2995069372778351, "grad_norm": 0.27181899012056543, "learning_rate": 0.0008213542890127589, "loss": 2.6682, "step": 2612 }, { "epoch": 0.2996216030271758, "grad_norm": 0.27438492832813927, "learning_rate": 0.0008212120037073805, "loss": 2.5172, "step": 2613 }, { "epoch": 0.29973626877651643, "grad_norm": 0.294729568093543, "learning_rate": 0.0008210696740969292, "loss": 2.5638, "step": 2614 }, { "epoch": 0.29985093452585715, "grad_norm": 0.25101844542013674, "learning_rate": 0.0008209273002010364, "loss": 2.5346, "step": 2615 }, { "epoch": 0.2999656002751978, "grad_norm": 0.2651371400762005, "learning_rate": 0.00082078488203934, "loss": 2.4691, "step": 2616 }, { "epoch": 0.30008026602453847, "grad_norm": 0.23102422636217235, "learning_rate": 0.0008206424196314838, "loss": 2.6656, "step": 2617 }, { "epoch": 0.3001949317738791, "grad_norm": 0.24545261754765782, "learning_rate": 0.0008204999129971178, "loss": 2.5504, "step": 2618 }, { "epoch": 0.30030959752321984, "grad_norm": 0.2929914124979611, "learning_rate": 0.0008203573621558982, "loss": 2.6014, "step": 2619 }, { "epoch": 0.3004242632725605, "grad_norm": 0.26382821691189, "learning_rate": 0.0008202147671274869, "loss": 2.4691, "step": 2620 }, { "epoch": 0.30053892902190116, "grad_norm": 0.2787423105909502, "learning_rate": 0.0008200721279315524, "loss": 2.5393, "step": 2621 }, { "epoch": 0.3006535947712418, "grad_norm": 0.27007698698041177, "learning_rate": 0.000819929444587769, "loss": 2.6526, "step": 2622 }, { "epoch": 0.3007682605205825, "grad_norm": 0.29976303574868957, "learning_rate": 0.0008197867171158171, "loss": 2.5902, "step": 2623 }, { "epoch": 0.3008829262699232, "grad_norm": 0.26229563612291473, "learning_rate": 0.0008196439455353833, "loss": 2.5184, "step": 2624 }, { "epoch": 0.30099759201926385, "grad_norm": 0.28907670550652503, "learning_rate": 0.0008195011298661601, "loss": 2.4586, "step": 2625 }, { "epoch": 0.3011122577686045, "grad_norm": 0.2523295885737605, "learning_rate": 0.0008193582701278464, "loss": 2.6349, "step": 2626 }, { "epoch": 0.30122692351794517, "grad_norm": 0.265167982685223, "learning_rate": 0.0008192153663401467, "loss": 2.5177, "step": 2627 }, { "epoch": 0.3013415892672859, "grad_norm": 0.2663613845479394, "learning_rate": 0.0008190724185227722, "loss": 2.5488, "step": 2628 }, { "epoch": 0.30145625501662654, "grad_norm": 0.24514794125436934, "learning_rate": 0.0008189294266954395, "loss": 2.488, "step": 2629 }, { "epoch": 0.3015709207659672, "grad_norm": 0.25204367834137453, "learning_rate": 0.0008187863908778718, "loss": 2.5412, "step": 2630 }, { "epoch": 0.30168558651530786, "grad_norm": 0.27596833928265563, "learning_rate": 0.0008186433110897982, "loss": 2.5455, "step": 2631 }, { "epoch": 0.30180025226464857, "grad_norm": 0.22970249849334712, "learning_rate": 0.0008185001873509534, "loss": 2.5921, "step": 2632 }, { "epoch": 0.30191491801398923, "grad_norm": 0.23848289902335468, "learning_rate": 0.0008183570196810793, "loss": 2.6264, "step": 2633 }, { "epoch": 0.3020295837633299, "grad_norm": 0.28015867889088225, "learning_rate": 0.0008182138080999226, "loss": 2.5359, "step": 2634 }, { "epoch": 0.30214424951267055, "grad_norm": 0.25331648027154957, "learning_rate": 0.0008180705526272368, "loss": 2.4238, "step": 2635 }, { "epoch": 0.30225891526201126, "grad_norm": 0.24063925033175645, "learning_rate": 0.0008179272532827811, "loss": 2.4331, "step": 2636 }, { "epoch": 0.3023735810113519, "grad_norm": 0.24859421665637899, "learning_rate": 0.0008177839100863212, "loss": 2.5105, "step": 2637 }, { "epoch": 0.3024882467606926, "grad_norm": 0.2487247812278199, "learning_rate": 0.0008176405230576285, "loss": 2.4875, "step": 2638 }, { "epoch": 0.30260291251003324, "grad_norm": 0.25636835463746843, "learning_rate": 0.0008174970922164803, "loss": 2.3896, "step": 2639 }, { "epoch": 0.3027175782593739, "grad_norm": 0.25496403348677044, "learning_rate": 0.0008173536175826603, "loss": 2.5221, "step": 2640 }, { "epoch": 0.3028322440087146, "grad_norm": 0.25622824681501527, "learning_rate": 0.000817210099175958, "loss": 2.4956, "step": 2641 }, { "epoch": 0.30294690975805527, "grad_norm": 0.26447630328246774, "learning_rate": 0.0008170665370161691, "loss": 2.5692, "step": 2642 }, { "epoch": 0.30306157550739593, "grad_norm": 0.25224223071648505, "learning_rate": 0.0008169229311230954, "loss": 2.5633, "step": 2643 }, { "epoch": 0.3031762412567366, "grad_norm": 0.2872893734550882, "learning_rate": 0.0008167792815165444, "loss": 2.7366, "step": 2644 }, { "epoch": 0.3032909070060773, "grad_norm": 0.2792001134825715, "learning_rate": 0.0008166355882163296, "loss": 2.6292, "step": 2645 }, { "epoch": 0.30340557275541796, "grad_norm": 0.24986772439521734, "learning_rate": 0.0008164918512422715, "loss": 2.5339, "step": 2646 }, { "epoch": 0.3035202385047586, "grad_norm": 0.2671803016762137, "learning_rate": 0.000816348070614195, "loss": 2.6087, "step": 2647 }, { "epoch": 0.3036349042540993, "grad_norm": 0.2622006373092917, "learning_rate": 0.0008162042463519326, "loss": 2.6436, "step": 2648 }, { "epoch": 0.30374957000344, "grad_norm": 0.2620055852396241, "learning_rate": 0.0008160603784753217, "loss": 2.6013, "step": 2649 }, { "epoch": 0.30386423575278065, "grad_norm": 0.2546561832347683, "learning_rate": 0.0008159164670042062, "loss": 2.4997, "step": 2650 }, { "epoch": 0.3039789015021213, "grad_norm": 0.24637347391066008, "learning_rate": 0.000815772511958436, "loss": 2.4108, "step": 2651 }, { "epoch": 0.30409356725146197, "grad_norm": 0.25959646568885353, "learning_rate": 0.000815628513357867, "loss": 2.5015, "step": 2652 }, { "epoch": 0.3042082330008027, "grad_norm": 0.24838959079694223, "learning_rate": 0.000815484471222361, "loss": 2.5001, "step": 2653 }, { "epoch": 0.30432289875014334, "grad_norm": 0.2705195954772799, "learning_rate": 0.0008153403855717858, "loss": 2.4347, "step": 2654 }, { "epoch": 0.304437564499484, "grad_norm": 0.2528392991176851, "learning_rate": 0.0008151962564260153, "loss": 2.5035, "step": 2655 }, { "epoch": 0.30455223024882466, "grad_norm": 0.25160209300453745, "learning_rate": 0.0008150520838049297, "loss": 2.5245, "step": 2656 }, { "epoch": 0.3046668959981653, "grad_norm": 0.2600276091220383, "learning_rate": 0.0008149078677284143, "loss": 2.5614, "step": 2657 }, { "epoch": 0.30478156174750604, "grad_norm": 0.260708029582181, "learning_rate": 0.0008147636082163614, "loss": 2.5877, "step": 2658 }, { "epoch": 0.3048962274968467, "grad_norm": 0.2729979278760044, "learning_rate": 0.0008146193052886685, "loss": 2.5036, "step": 2659 }, { "epoch": 0.30501089324618735, "grad_norm": 0.2629217107333538, "learning_rate": 0.0008144749589652398, "loss": 2.3489, "step": 2660 }, { "epoch": 0.305125558995528, "grad_norm": 0.24482058659224318, "learning_rate": 0.0008143305692659849, "loss": 2.5435, "step": 2661 }, { "epoch": 0.3052402247448687, "grad_norm": 0.2875927971134496, "learning_rate": 0.0008141861362108196, "loss": 2.5491, "step": 2662 }, { "epoch": 0.3053548904942094, "grad_norm": 0.28872340256237156, "learning_rate": 0.0008140416598196659, "loss": 2.5559, "step": 2663 }, { "epoch": 0.30546955624355004, "grad_norm": 0.26513107891437365, "learning_rate": 0.0008138971401124513, "loss": 2.5457, "step": 2664 }, { "epoch": 0.3055842219928907, "grad_norm": 0.25649235122659414, "learning_rate": 0.0008137525771091097, "loss": 2.4689, "step": 2665 }, { "epoch": 0.3056988877422314, "grad_norm": 0.23955248174996402, "learning_rate": 0.0008136079708295807, "loss": 2.6119, "step": 2666 }, { "epoch": 0.3058135534915721, "grad_norm": 0.25677393793355757, "learning_rate": 0.00081346332129381, "loss": 2.6494, "step": 2667 }, { "epoch": 0.30592821924091274, "grad_norm": 0.25144938577487663, "learning_rate": 0.0008133186285217493, "loss": 2.5477, "step": 2668 }, { "epoch": 0.3060428849902534, "grad_norm": 0.2417079255669188, "learning_rate": 0.0008131738925333563, "loss": 2.528, "step": 2669 }, { "epoch": 0.3061575507395941, "grad_norm": 0.2585545639496042, "learning_rate": 0.0008130291133485943, "loss": 2.6565, "step": 2670 }, { "epoch": 0.30627221648893477, "grad_norm": 0.2567159322259357, "learning_rate": 0.000812884290987433, "loss": 2.562, "step": 2671 }, { "epoch": 0.3063868822382754, "grad_norm": 0.2270050528058689, "learning_rate": 0.0008127394254698479, "loss": 2.5468, "step": 2672 }, { "epoch": 0.3065015479876161, "grad_norm": 0.2367038454114927, "learning_rate": 0.0008125945168158205, "loss": 2.6484, "step": 2673 }, { "epoch": 0.30661621373695674, "grad_norm": 0.26456186919946745, "learning_rate": 0.000812449565045338, "loss": 2.4856, "step": 2674 }, { "epoch": 0.30673087948629746, "grad_norm": 0.2860237512600303, "learning_rate": 0.000812304570178394, "loss": 2.5322, "step": 2675 }, { "epoch": 0.3068455452356381, "grad_norm": 0.2560239071956316, "learning_rate": 0.0008121595322349875, "loss": 2.5024, "step": 2676 }, { "epoch": 0.3069602109849788, "grad_norm": 0.26663122374546655, "learning_rate": 0.0008120144512351237, "loss": 2.4211, "step": 2677 }, { "epoch": 0.30707487673431944, "grad_norm": 0.24580009466119174, "learning_rate": 0.0008118693271988142, "loss": 2.4617, "step": 2678 }, { "epoch": 0.30718954248366015, "grad_norm": 0.27027706800165135, "learning_rate": 0.0008117241601460755, "loss": 2.5046, "step": 2679 }, { "epoch": 0.3073042082330008, "grad_norm": 0.2472294981545326, "learning_rate": 0.0008115789500969309, "loss": 2.4227, "step": 2680 }, { "epoch": 0.30741887398234147, "grad_norm": 0.25270660724312616, "learning_rate": 0.0008114336970714096, "loss": 2.6043, "step": 2681 }, { "epoch": 0.3075335397316821, "grad_norm": 0.2767914231370437, "learning_rate": 0.0008112884010895461, "loss": 2.4796, "step": 2682 }, { "epoch": 0.30764820548102284, "grad_norm": 0.27987696269915713, "learning_rate": 0.0008111430621713814, "loss": 2.5603, "step": 2683 }, { "epoch": 0.3077628712303635, "grad_norm": 0.2784712651880593, "learning_rate": 0.0008109976803369623, "loss": 2.5321, "step": 2684 }, { "epoch": 0.30787753697970416, "grad_norm": 0.249073994299987, "learning_rate": 0.0008108522556063411, "loss": 2.7551, "step": 2685 }, { "epoch": 0.3079922027290448, "grad_norm": 0.24350790496860594, "learning_rate": 0.0008107067879995768, "loss": 2.5378, "step": 2686 }, { "epoch": 0.30810686847838553, "grad_norm": 0.2510122305622537, "learning_rate": 0.0008105612775367337, "loss": 2.6519, "step": 2687 }, { "epoch": 0.3082215342277262, "grad_norm": 0.23305079242844132, "learning_rate": 0.0008104157242378821, "loss": 2.519, "step": 2688 }, { "epoch": 0.30833619997706685, "grad_norm": 0.2591300012711176, "learning_rate": 0.0008102701281230985, "loss": 2.6377, "step": 2689 }, { "epoch": 0.3084508657264075, "grad_norm": 0.25422273581982097, "learning_rate": 0.0008101244892124651, "loss": 2.5365, "step": 2690 }, { "epoch": 0.30856553147574817, "grad_norm": 0.2532654516377513, "learning_rate": 0.0008099788075260698, "loss": 2.5476, "step": 2691 }, { "epoch": 0.3086801972250889, "grad_norm": 0.26042341344518805, "learning_rate": 0.0008098330830840066, "loss": 2.4158, "step": 2692 }, { "epoch": 0.30879486297442954, "grad_norm": 0.26647153041776145, "learning_rate": 0.0008096873159063758, "loss": 2.506, "step": 2693 }, { "epoch": 0.3089095287237702, "grad_norm": 0.2678328220401086, "learning_rate": 0.0008095415060132829, "loss": 2.5318, "step": 2694 }, { "epoch": 0.30902419447311086, "grad_norm": 0.26848607307906336, "learning_rate": 0.0008093956534248395, "loss": 2.5806, "step": 2695 }, { "epoch": 0.3091388602224516, "grad_norm": 0.2981021101253301, "learning_rate": 0.0008092497581611636, "loss": 2.5648, "step": 2696 }, { "epoch": 0.30925352597179223, "grad_norm": 0.2994617926537688, "learning_rate": 0.0008091038202423781, "loss": 2.6669, "step": 2697 }, { "epoch": 0.3093681917211329, "grad_norm": 0.2746069578867281, "learning_rate": 0.0008089578396886128, "loss": 2.5688, "step": 2698 }, { "epoch": 0.30948285747047355, "grad_norm": 0.29261380374451496, "learning_rate": 0.0008088118165200026, "loss": 2.6666, "step": 2699 }, { "epoch": 0.30959752321981426, "grad_norm": 0.2539384341265657, "learning_rate": 0.000808665750756689, "loss": 2.5422, "step": 2700 }, { "epoch": 0.3097121889691549, "grad_norm": 0.25576990037691166, "learning_rate": 0.0008085196424188188, "loss": 2.5166, "step": 2701 }, { "epoch": 0.3098268547184956, "grad_norm": 0.22904459733372143, "learning_rate": 0.0008083734915265448, "loss": 2.5619, "step": 2702 }, { "epoch": 0.30994152046783624, "grad_norm": 0.24781701338787185, "learning_rate": 0.0008082272981000258, "loss": 2.6609, "step": 2703 }, { "epoch": 0.31005618621717695, "grad_norm": 0.24955570645334638, "learning_rate": 0.0008080810621594264, "loss": 2.5858, "step": 2704 }, { "epoch": 0.3101708519665176, "grad_norm": 0.2578692584024744, "learning_rate": 0.0008079347837249168, "loss": 2.5622, "step": 2705 }, { "epoch": 0.31028551771585827, "grad_norm": 0.23184892457891496, "learning_rate": 0.0008077884628166738, "loss": 2.617, "step": 2706 }, { "epoch": 0.31040018346519893, "grad_norm": 0.2684669265473634, "learning_rate": 0.0008076420994548792, "loss": 2.5964, "step": 2707 }, { "epoch": 0.3105148492145396, "grad_norm": 0.2624183159586972, "learning_rate": 0.0008074956936597213, "loss": 2.6638, "step": 2708 }, { "epoch": 0.3106295149638803, "grad_norm": 0.25487151867290747, "learning_rate": 0.0008073492454513938, "loss": 2.5498, "step": 2709 }, { "epoch": 0.31074418071322096, "grad_norm": 0.24230756186544447, "learning_rate": 0.0008072027548500964, "loss": 2.4159, "step": 2710 }, { "epoch": 0.3108588464625616, "grad_norm": 0.2742094328852907, "learning_rate": 0.0008070562218760349, "loss": 2.5136, "step": 2711 }, { "epoch": 0.3109735122119023, "grad_norm": 0.2326028214877704, "learning_rate": 0.0008069096465494205, "loss": 2.4249, "step": 2712 }, { "epoch": 0.311088177961243, "grad_norm": 0.2522300416371095, "learning_rate": 0.0008067630288904708, "loss": 2.6436, "step": 2713 }, { "epoch": 0.31120284371058365, "grad_norm": 0.2518281576806681, "learning_rate": 0.0008066163689194086, "loss": 2.5271, "step": 2714 }, { "epoch": 0.3113175094599243, "grad_norm": 0.2502315045755176, "learning_rate": 0.0008064696666564631, "loss": 2.5947, "step": 2715 }, { "epoch": 0.31143217520926497, "grad_norm": 0.2694345853205722, "learning_rate": 0.0008063229221218686, "loss": 2.6579, "step": 2716 }, { "epoch": 0.3115468409586057, "grad_norm": 0.2521574112192198, "learning_rate": 0.0008061761353358663, "loss": 2.4974, "step": 2717 }, { "epoch": 0.31166150670794635, "grad_norm": 0.26179401038550065, "learning_rate": 0.0008060293063187023, "loss": 2.5257, "step": 2718 }, { "epoch": 0.311776172457287, "grad_norm": 0.26546986535215866, "learning_rate": 0.0008058824350906288, "loss": 2.5105, "step": 2719 }, { "epoch": 0.31189083820662766, "grad_norm": 0.2668851846980171, "learning_rate": 0.0008057355216719043, "loss": 2.5784, "step": 2720 }, { "epoch": 0.3120055039559684, "grad_norm": 0.251776404799129, "learning_rate": 0.0008055885660827922, "loss": 2.5008, "step": 2721 }, { "epoch": 0.31212016970530904, "grad_norm": 0.2842423454495024, "learning_rate": 0.0008054415683435625, "loss": 2.7241, "step": 2722 }, { "epoch": 0.3122348354546497, "grad_norm": 0.2701201811750471, "learning_rate": 0.0008052945284744909, "loss": 2.5242, "step": 2723 }, { "epoch": 0.31234950120399035, "grad_norm": 0.25862254861137246, "learning_rate": 0.0008051474464958584, "loss": 2.5886, "step": 2724 }, { "epoch": 0.312464166953331, "grad_norm": 0.27070114497362996, "learning_rate": 0.0008050003224279521, "loss": 2.5218, "step": 2725 }, { "epoch": 0.3125788327026717, "grad_norm": 0.25952894474072247, "learning_rate": 0.0008048531562910655, "loss": 2.5008, "step": 2726 }, { "epoch": 0.3126934984520124, "grad_norm": 0.24486997316840947, "learning_rate": 0.0008047059481054967, "loss": 2.5288, "step": 2727 }, { "epoch": 0.31280816420135305, "grad_norm": 0.293489170532409, "learning_rate": 0.0008045586978915508, "loss": 2.6463, "step": 2728 }, { "epoch": 0.3129228299506937, "grad_norm": 0.26298020601946676, "learning_rate": 0.0008044114056695379, "loss": 2.5373, "step": 2729 }, { "epoch": 0.3130374957000344, "grad_norm": 0.281979728365036, "learning_rate": 0.0008042640714597741, "loss": 2.5573, "step": 2730 }, { "epoch": 0.3131521614493751, "grad_norm": 0.29281781934445444, "learning_rate": 0.0008041166952825816, "loss": 2.5292, "step": 2731 }, { "epoch": 0.31326682719871574, "grad_norm": 0.2658127679136106, "learning_rate": 0.0008039692771582878, "loss": 2.4615, "step": 2732 }, { "epoch": 0.3133814929480564, "grad_norm": 0.28768141583692936, "learning_rate": 0.0008038218171072264, "loss": 2.6701, "step": 2733 }, { "epoch": 0.3134961586973971, "grad_norm": 0.25830144294692137, "learning_rate": 0.000803674315149737, "loss": 2.5464, "step": 2734 }, { "epoch": 0.31361082444673777, "grad_norm": 0.2852846254635785, "learning_rate": 0.0008035267713061641, "loss": 2.5928, "step": 2735 }, { "epoch": 0.3137254901960784, "grad_norm": 0.23472164721643343, "learning_rate": 0.0008033791855968589, "loss": 2.4265, "step": 2736 }, { "epoch": 0.3138401559454191, "grad_norm": 0.25493818323981204, "learning_rate": 0.000803231558042178, "loss": 2.6247, "step": 2737 }, { "epoch": 0.3139548216947598, "grad_norm": 0.28023293351777306, "learning_rate": 0.0008030838886624838, "loss": 2.5063, "step": 2738 }, { "epoch": 0.31406948744410046, "grad_norm": 0.2630246056169887, "learning_rate": 0.0008029361774781446, "loss": 2.394, "step": 2739 }, { "epoch": 0.3141841531934411, "grad_norm": 0.2560965038303245, "learning_rate": 0.0008027884245095338, "loss": 2.4202, "step": 2740 }, { "epoch": 0.3142988189427818, "grad_norm": 0.24807465840054496, "learning_rate": 0.0008026406297770319, "loss": 2.4311, "step": 2741 }, { "epoch": 0.31441348469212244, "grad_norm": 0.2887535322603709, "learning_rate": 0.0008024927933010237, "loss": 2.3472, "step": 2742 }, { "epoch": 0.31452815044146315, "grad_norm": 0.26549045528082954, "learning_rate": 0.0008023449151019009, "loss": 2.5869, "step": 2743 }, { "epoch": 0.3146428161908038, "grad_norm": 0.2604158522416407, "learning_rate": 0.0008021969952000603, "loss": 2.4567, "step": 2744 }, { "epoch": 0.31475748194014447, "grad_norm": 0.2904746956641578, "learning_rate": 0.0008020490336159045, "loss": 2.4965, "step": 2745 }, { "epoch": 0.3148721476894851, "grad_norm": 0.27773170736459274, "learning_rate": 0.0008019010303698422, "loss": 2.6165, "step": 2746 }, { "epoch": 0.31498681343882584, "grad_norm": 0.27519260312158306, "learning_rate": 0.0008017529854822873, "loss": 2.6651, "step": 2747 }, { "epoch": 0.3151014791881665, "grad_norm": 0.3125738281926727, "learning_rate": 0.0008016048989736602, "loss": 2.6902, "step": 2748 }, { "epoch": 0.31521614493750716, "grad_norm": 0.25423221436240917, "learning_rate": 0.0008014567708643864, "loss": 2.5691, "step": 2749 }, { "epoch": 0.3153308106868478, "grad_norm": 0.28187838067164245, "learning_rate": 0.0008013086011748972, "loss": 2.5893, "step": 2750 }, { "epoch": 0.31544547643618853, "grad_norm": 0.25368645867522216, "learning_rate": 0.0008011603899256301, "loss": 2.7142, "step": 2751 }, { "epoch": 0.3155601421855292, "grad_norm": 0.27491745562039316, "learning_rate": 0.0008010121371370277, "loss": 2.5609, "step": 2752 }, { "epoch": 0.31567480793486985, "grad_norm": 0.2781293228817274, "learning_rate": 0.000800863842829539, "loss": 2.3923, "step": 2753 }, { "epoch": 0.3157894736842105, "grad_norm": 0.27348538943752926, "learning_rate": 0.000800715507023618, "loss": 2.4618, "step": 2754 }, { "epoch": 0.3159041394335512, "grad_norm": 0.2667352028282389, "learning_rate": 0.0008005671297397248, "loss": 2.6094, "step": 2755 }, { "epoch": 0.3160188051828919, "grad_norm": 0.26176336754292906, "learning_rate": 0.0008004187109983257, "loss": 2.495, "step": 2756 }, { "epoch": 0.31613347093223254, "grad_norm": 0.25856853438779276, "learning_rate": 0.0008002702508198918, "loss": 2.5271, "step": 2757 }, { "epoch": 0.3162481366815732, "grad_norm": 0.2697515561619566, "learning_rate": 0.0008001217492249004, "loss": 2.7077, "step": 2758 }, { "epoch": 0.31636280243091386, "grad_norm": 0.2727689479532433, "learning_rate": 0.0007999732062338347, "loss": 2.6964, "step": 2759 }, { "epoch": 0.3164774681802546, "grad_norm": 0.26509726297860264, "learning_rate": 0.0007998246218671829, "loss": 2.6899, "step": 2760 }, { "epoch": 0.31659213392959523, "grad_norm": 0.2688068616215317, "learning_rate": 0.00079967599614544, "loss": 2.5834, "step": 2761 }, { "epoch": 0.3167067996789359, "grad_norm": 0.25015862351846097, "learning_rate": 0.0007995273290891056, "loss": 2.4707, "step": 2762 }, { "epoch": 0.31682146542827655, "grad_norm": 0.29203197185339563, "learning_rate": 0.0007993786207186859, "loss": 2.6624, "step": 2763 }, { "epoch": 0.31693613117761726, "grad_norm": 0.21693830768901595, "learning_rate": 0.000799229871054692, "loss": 2.6257, "step": 2764 }, { "epoch": 0.3170507969269579, "grad_norm": 0.24463681816752275, "learning_rate": 0.0007990810801176411, "loss": 2.6584, "step": 2765 }, { "epoch": 0.3171654626762986, "grad_norm": 0.23992130071722484, "learning_rate": 0.0007989322479280564, "loss": 2.5523, "step": 2766 }, { "epoch": 0.31728012842563924, "grad_norm": 0.2523002662428207, "learning_rate": 0.000798783374506466, "loss": 2.4852, "step": 2767 }, { "epoch": 0.31739479417497996, "grad_norm": 0.24347098972075343, "learning_rate": 0.0007986344598734048, "loss": 2.454, "step": 2768 }, { "epoch": 0.3175094599243206, "grad_norm": 0.2262762888043256, "learning_rate": 0.0007984855040494122, "loss": 2.473, "step": 2769 }, { "epoch": 0.3176241256736613, "grad_norm": 0.2417610616217539, "learning_rate": 0.0007983365070550339, "loss": 2.5634, "step": 2770 }, { "epoch": 0.31773879142300193, "grad_norm": 0.2632834339460108, "learning_rate": 0.0007981874689108213, "loss": 2.655, "step": 2771 }, { "epoch": 0.31785345717234265, "grad_norm": 0.26639872155513394, "learning_rate": 0.0007980383896373312, "loss": 2.4897, "step": 2772 }, { "epoch": 0.3179681229216833, "grad_norm": 0.2765302746988034, "learning_rate": 0.0007978892692551265, "loss": 2.4632, "step": 2773 }, { "epoch": 0.31808278867102396, "grad_norm": 0.2834677864598343, "learning_rate": 0.0007977401077847755, "loss": 2.8824, "step": 2774 }, { "epoch": 0.3181974544203646, "grad_norm": 0.2663689655277662, "learning_rate": 0.0007975909052468518, "loss": 2.5171, "step": 2775 }, { "epoch": 0.3183121201697053, "grad_norm": 0.2449206846226601, "learning_rate": 0.0007974416616619355, "loss": 2.5471, "step": 2776 }, { "epoch": 0.318426785919046, "grad_norm": 0.25571279625488785, "learning_rate": 0.0007972923770506118, "loss": 2.6343, "step": 2777 }, { "epoch": 0.31854145166838665, "grad_norm": 0.24758240123761288, "learning_rate": 0.0007971430514334715, "loss": 2.4911, "step": 2778 }, { "epoch": 0.3186561174177273, "grad_norm": 0.2822630117194901, "learning_rate": 0.0007969936848311113, "loss": 2.5072, "step": 2779 }, { "epoch": 0.318770783167068, "grad_norm": 0.2793599018988377, "learning_rate": 0.0007968442772641334, "loss": 2.5835, "step": 2780 }, { "epoch": 0.3188854489164087, "grad_norm": 0.27403182746574295, "learning_rate": 0.000796694828753146, "loss": 2.5953, "step": 2781 }, { "epoch": 0.31900011466574935, "grad_norm": 0.24014038943632512, "learning_rate": 0.0007965453393187624, "loss": 2.6227, "step": 2782 }, { "epoch": 0.31911478041509, "grad_norm": 0.2763432628460241, "learning_rate": 0.000796395808981602, "loss": 2.559, "step": 2783 }, { "epoch": 0.31922944616443066, "grad_norm": 0.23799734984909474, "learning_rate": 0.0007962462377622895, "loss": 2.3974, "step": 2784 }, { "epoch": 0.3193441119137714, "grad_norm": 0.283903704983406, "learning_rate": 0.0007960966256814555, "loss": 2.6166, "step": 2785 }, { "epoch": 0.31945877766311204, "grad_norm": 0.2437832505668744, "learning_rate": 0.0007959469727597359, "loss": 2.5275, "step": 2786 }, { "epoch": 0.3195734434124527, "grad_norm": 0.273707121802684, "learning_rate": 0.0007957972790177729, "loss": 2.5866, "step": 2787 }, { "epoch": 0.31968810916179335, "grad_norm": 0.28004690217837547, "learning_rate": 0.0007956475444762137, "loss": 2.3797, "step": 2788 }, { "epoch": 0.31980277491113407, "grad_norm": 0.26827576369003925, "learning_rate": 0.0007954977691557112, "loss": 2.6477, "step": 2789 }, { "epoch": 0.31991744066047473, "grad_norm": 0.26214519874774866, "learning_rate": 0.0007953479530769241, "loss": 2.4592, "step": 2790 }, { "epoch": 0.3200321064098154, "grad_norm": 0.23303009260242052, "learning_rate": 0.0007951980962605168, "loss": 2.4281, "step": 2791 }, { "epoch": 0.32014677215915605, "grad_norm": 0.25066238317430645, "learning_rate": 0.000795048198727159, "loss": 2.4801, "step": 2792 }, { "epoch": 0.3202614379084967, "grad_norm": 0.2836330464348542, "learning_rate": 0.0007948982604975264, "loss": 2.4841, "step": 2793 }, { "epoch": 0.3203761036578374, "grad_norm": 0.29358355746930076, "learning_rate": 0.0007947482815923001, "loss": 2.6007, "step": 2794 }, { "epoch": 0.3204907694071781, "grad_norm": 0.28705019498155937, "learning_rate": 0.0007945982620321666, "loss": 2.6222, "step": 2795 }, { "epoch": 0.32060543515651874, "grad_norm": 0.2483678228089555, "learning_rate": 0.0007944482018378185, "loss": 2.6724, "step": 2796 }, { "epoch": 0.3207201009058594, "grad_norm": 0.26782423867168276, "learning_rate": 0.0007942981010299537, "loss": 2.6557, "step": 2797 }, { "epoch": 0.3208347666552001, "grad_norm": 0.2407086954012889, "learning_rate": 0.0007941479596292756, "loss": 2.5861, "step": 2798 }, { "epoch": 0.32094943240454077, "grad_norm": 0.24774972243949178, "learning_rate": 0.0007939977776564935, "loss": 2.5715, "step": 2799 }, { "epoch": 0.3210640981538814, "grad_norm": 0.26441328332595304, "learning_rate": 0.0007938475551323221, "loss": 2.5076, "step": 2800 }, { "epoch": 0.3211787639032221, "grad_norm": 0.2394643466809427, "learning_rate": 0.0007936972920774817, "loss": 2.4372, "step": 2801 }, { "epoch": 0.3212934296525628, "grad_norm": 0.26451877179185185, "learning_rate": 0.000793546988512698, "loss": 2.6105, "step": 2802 }, { "epoch": 0.32140809540190346, "grad_norm": 0.25785014674822554, "learning_rate": 0.0007933966444587031, "loss": 2.5864, "step": 2803 }, { "epoch": 0.3215227611512441, "grad_norm": 0.2693742148968166, "learning_rate": 0.0007932462599362335, "loss": 2.6268, "step": 2804 }, { "epoch": 0.3216374269005848, "grad_norm": 0.2446786164322382, "learning_rate": 0.0007930958349660323, "loss": 2.5137, "step": 2805 }, { "epoch": 0.3217520926499255, "grad_norm": 0.25502467829752334, "learning_rate": 0.0007929453695688475, "loss": 2.4641, "step": 2806 }, { "epoch": 0.32186675839926615, "grad_norm": 0.2720720902427651, "learning_rate": 0.000792794863765433, "loss": 2.7138, "step": 2807 }, { "epoch": 0.3219814241486068, "grad_norm": 0.3041731071757791, "learning_rate": 0.0007926443175765483, "loss": 2.5614, "step": 2808 }, { "epoch": 0.32209608989794747, "grad_norm": 0.24676312579365384, "learning_rate": 0.0007924937310229583, "loss": 2.3599, "step": 2809 }, { "epoch": 0.3222107556472881, "grad_norm": 0.2785274434895631, "learning_rate": 0.0007923431041254335, "loss": 2.4711, "step": 2810 }, { "epoch": 0.32232542139662884, "grad_norm": 0.27688080833143375, "learning_rate": 0.00079219243690475, "loss": 2.5351, "step": 2811 }, { "epoch": 0.3224400871459695, "grad_norm": 0.2883791278677757, "learning_rate": 0.0007920417293816895, "loss": 2.5583, "step": 2812 }, { "epoch": 0.32255475289531016, "grad_norm": 0.2864312738508502, "learning_rate": 0.0007918909815770394, "loss": 2.6237, "step": 2813 }, { "epoch": 0.3226694186446508, "grad_norm": 0.2650966063170184, "learning_rate": 0.0007917401935115923, "loss": 2.6403, "step": 2814 }, { "epoch": 0.32278408439399153, "grad_norm": 0.24246663942191735, "learning_rate": 0.0007915893652061466, "loss": 2.584, "step": 2815 }, { "epoch": 0.3228987501433322, "grad_norm": 0.24740388510383968, "learning_rate": 0.000791438496681506, "loss": 2.4981, "step": 2816 }, { "epoch": 0.32301341589267285, "grad_norm": 0.275669739600022, "learning_rate": 0.0007912875879584802, "loss": 2.4866, "step": 2817 }, { "epoch": 0.3231280816420135, "grad_norm": 0.26084347006336706, "learning_rate": 0.0007911366390578841, "loss": 2.3569, "step": 2818 }, { "epoch": 0.3232427473913542, "grad_norm": 0.2344061901438677, "learning_rate": 0.0007909856500005382, "loss": 2.4777, "step": 2819 }, { "epoch": 0.3233574131406949, "grad_norm": 0.2439786015431069, "learning_rate": 0.0007908346208072686, "loss": 2.4736, "step": 2820 }, { "epoch": 0.32347207889003554, "grad_norm": 0.24218638585601474, "learning_rate": 0.0007906835514989068, "loss": 2.3964, "step": 2821 }, { "epoch": 0.3235867446393762, "grad_norm": 0.23865145209032212, "learning_rate": 0.0007905324420962901, "loss": 2.4186, "step": 2822 }, { "epoch": 0.3237014103887169, "grad_norm": 0.28132928295466353, "learning_rate": 0.0007903812926202611, "loss": 2.5713, "step": 2823 }, { "epoch": 0.3238160761380576, "grad_norm": 0.31046520165071356, "learning_rate": 0.0007902301030916679, "loss": 2.6242, "step": 2824 }, { "epoch": 0.32393074188739823, "grad_norm": 0.27325255005130056, "learning_rate": 0.0007900788735313642, "loss": 2.5407, "step": 2825 }, { "epoch": 0.3240454076367389, "grad_norm": 0.25140306649157157, "learning_rate": 0.0007899276039602094, "loss": 2.5342, "step": 2826 }, { "epoch": 0.3241600733860796, "grad_norm": 0.28537929220734265, "learning_rate": 0.000789776294399068, "loss": 2.6859, "step": 2827 }, { "epoch": 0.32427473913542026, "grad_norm": 0.2671271911444393, "learning_rate": 0.0007896249448688106, "loss": 2.5106, "step": 2828 }, { "epoch": 0.3243894048847609, "grad_norm": 0.25409723441991316, "learning_rate": 0.0007894735553903127, "loss": 2.5361, "step": 2829 }, { "epoch": 0.3245040706341016, "grad_norm": 0.26652403393815277, "learning_rate": 0.0007893221259844558, "loss": 2.5533, "step": 2830 }, { "epoch": 0.32461873638344224, "grad_norm": 0.27379868085659836, "learning_rate": 0.0007891706566721266, "loss": 2.6198, "step": 2831 }, { "epoch": 0.32473340213278296, "grad_norm": 0.22681342881836883, "learning_rate": 0.0007890191474742173, "loss": 2.3829, "step": 2832 }, { "epoch": 0.3248480678821236, "grad_norm": 0.23892736322183392, "learning_rate": 0.0007888675984116258, "loss": 2.5828, "step": 2833 }, { "epoch": 0.3249627336314643, "grad_norm": 0.24048152724183752, "learning_rate": 0.0007887160095052555, "loss": 2.4545, "step": 2834 }, { "epoch": 0.32507739938080493, "grad_norm": 0.25472926845466093, "learning_rate": 0.000788564380776015, "loss": 2.5582, "step": 2835 }, { "epoch": 0.32519206513014565, "grad_norm": 0.2260893058046602, "learning_rate": 0.000788412712244819, "loss": 2.5609, "step": 2836 }, { "epoch": 0.3253067308794863, "grad_norm": 0.28064529785732356, "learning_rate": 0.0007882610039325867, "loss": 2.5482, "step": 2837 }, { "epoch": 0.32542139662882696, "grad_norm": 0.22439188050475212, "learning_rate": 0.0007881092558602437, "loss": 2.5169, "step": 2838 }, { "epoch": 0.3255360623781676, "grad_norm": 0.24008033615671384, "learning_rate": 0.0007879574680487209, "loss": 2.6281, "step": 2839 }, { "epoch": 0.32565072812750834, "grad_norm": 0.27526431123506817, "learning_rate": 0.0007878056405189542, "loss": 2.6883, "step": 2840 }, { "epoch": 0.325765393876849, "grad_norm": 0.2744979686760225, "learning_rate": 0.0007876537732918855, "loss": 2.607, "step": 2841 }, { "epoch": 0.32588005962618966, "grad_norm": 0.22289891469328385, "learning_rate": 0.0007875018663884619, "loss": 2.3838, "step": 2842 }, { "epoch": 0.3259947253755303, "grad_norm": 0.2798770864686912, "learning_rate": 0.0007873499198296361, "loss": 2.5933, "step": 2843 }, { "epoch": 0.32610939112487103, "grad_norm": 0.2575396719658231, "learning_rate": 0.0007871979336363664, "loss": 2.5121, "step": 2844 }, { "epoch": 0.3262240568742117, "grad_norm": 0.26922923653053504, "learning_rate": 0.0007870459078296162, "loss": 2.528, "step": 2845 }, { "epoch": 0.32633872262355235, "grad_norm": 0.25322752844208946, "learning_rate": 0.0007868938424303545, "loss": 2.4416, "step": 2846 }, { "epoch": 0.326453388372893, "grad_norm": 0.2538662258094524, "learning_rate": 0.000786741737459556, "loss": 2.4377, "step": 2847 }, { "epoch": 0.32656805412223366, "grad_norm": 0.24189717973684755, "learning_rate": 0.0007865895929382007, "loss": 2.5253, "step": 2848 }, { "epoch": 0.3266827198715744, "grad_norm": 0.2688373634533083, "learning_rate": 0.0007864374088872739, "loss": 2.5152, "step": 2849 }, { "epoch": 0.32679738562091504, "grad_norm": 0.28012949856809244, "learning_rate": 0.0007862851853277664, "loss": 2.5565, "step": 2850 }, { "epoch": 0.3269120513702557, "grad_norm": 0.25456421485604575, "learning_rate": 0.0007861329222806748, "loss": 2.4943, "step": 2851 }, { "epoch": 0.32702671711959636, "grad_norm": 0.22615743663347057, "learning_rate": 0.0007859806197670007, "loss": 2.5001, "step": 2852 }, { "epoch": 0.32714138286893707, "grad_norm": 0.2589506373369928, "learning_rate": 0.0007858282778077513, "loss": 2.5648, "step": 2853 }, { "epoch": 0.32725604861827773, "grad_norm": 0.26710779443082944, "learning_rate": 0.0007856758964239396, "loss": 2.4572, "step": 2854 }, { "epoch": 0.3273707143676184, "grad_norm": 0.26219531440983723, "learning_rate": 0.0007855234756365832, "loss": 2.5781, "step": 2855 }, { "epoch": 0.32748538011695905, "grad_norm": 0.2475063160137756, "learning_rate": 0.0007853710154667062, "loss": 2.3766, "step": 2856 }, { "epoch": 0.32760004586629976, "grad_norm": 0.26762627190769117, "learning_rate": 0.0007852185159353371, "loss": 2.6682, "step": 2857 }, { "epoch": 0.3277147116156404, "grad_norm": 0.30475359791353324, "learning_rate": 0.0007850659770635104, "loss": 2.5329, "step": 2858 }, { "epoch": 0.3278293773649811, "grad_norm": 0.26807150913198946, "learning_rate": 0.0007849133988722663, "loss": 2.6813, "step": 2859 }, { "epoch": 0.32794404311432174, "grad_norm": 0.2790196442806882, "learning_rate": 0.0007847607813826496, "loss": 2.6527, "step": 2860 }, { "epoch": 0.32805870886366245, "grad_norm": 0.2764591667273137, "learning_rate": 0.0007846081246157111, "loss": 2.6602, "step": 2861 }, { "epoch": 0.3281733746130031, "grad_norm": 0.2531657367419072, "learning_rate": 0.000784455428592507, "loss": 2.5534, "step": 2862 }, { "epoch": 0.32828804036234377, "grad_norm": 0.24112452993303823, "learning_rate": 0.0007843026933340988, "loss": 2.487, "step": 2863 }, { "epoch": 0.32840270611168443, "grad_norm": 0.27201995237372106, "learning_rate": 0.0007841499188615533, "loss": 2.5236, "step": 2864 }, { "epoch": 0.3285173718610251, "grad_norm": 0.25679596670328025, "learning_rate": 0.0007839971051959427, "loss": 2.373, "step": 2865 }, { "epoch": 0.3286320376103658, "grad_norm": 0.25598158059681514, "learning_rate": 0.0007838442523583451, "loss": 2.6375, "step": 2866 }, { "epoch": 0.32874670335970646, "grad_norm": 0.24157278762459178, "learning_rate": 0.0007836913603698434, "loss": 2.4772, "step": 2867 }, { "epoch": 0.3288613691090471, "grad_norm": 0.24300549690134113, "learning_rate": 0.0007835384292515263, "loss": 2.5608, "step": 2868 }, { "epoch": 0.3289760348583878, "grad_norm": 0.23423658065328476, "learning_rate": 0.0007833854590244875, "loss": 2.5183, "step": 2869 }, { "epoch": 0.3290907006077285, "grad_norm": 0.2633010515284913, "learning_rate": 0.0007832324497098266, "loss": 2.5514, "step": 2870 }, { "epoch": 0.32920536635706915, "grad_norm": 0.25078891805469994, "learning_rate": 0.0007830794013286479, "loss": 2.4767, "step": 2871 }, { "epoch": 0.3293200321064098, "grad_norm": 0.25320941166244093, "learning_rate": 0.0007829263139020619, "loss": 2.6326, "step": 2872 }, { "epoch": 0.32943469785575047, "grad_norm": 0.2622802972303187, "learning_rate": 0.0007827731874511841, "loss": 2.5479, "step": 2873 }, { "epoch": 0.3295493636050912, "grad_norm": 0.25869669665015843, "learning_rate": 0.0007826200219971352, "loss": 2.5387, "step": 2874 }, { "epoch": 0.32966402935443184, "grad_norm": 0.2470348670177471, "learning_rate": 0.0007824668175610412, "loss": 2.5624, "step": 2875 }, { "epoch": 0.3297786951037725, "grad_norm": 0.24449306216271172, "learning_rate": 0.0007823135741640343, "loss": 2.5067, "step": 2876 }, { "epoch": 0.32989336085311316, "grad_norm": 0.258631219954575, "learning_rate": 0.0007821602918272512, "loss": 2.4725, "step": 2877 }, { "epoch": 0.3300080266024539, "grad_norm": 0.2676561394074989, "learning_rate": 0.0007820069705718342, "loss": 2.6316, "step": 2878 }, { "epoch": 0.33012269235179453, "grad_norm": 0.2565330499417993, "learning_rate": 0.0007818536104189313, "loss": 2.5035, "step": 2879 }, { "epoch": 0.3302373581011352, "grad_norm": 0.22433423312559314, "learning_rate": 0.0007817002113896954, "loss": 2.4204, "step": 2880 }, { "epoch": 0.33035202385047585, "grad_norm": 0.25392861009320566, "learning_rate": 0.000781546773505285, "loss": 2.4006, "step": 2881 }, { "epoch": 0.3304666895998165, "grad_norm": 0.2762052958517115, "learning_rate": 0.000781393296786864, "loss": 2.5937, "step": 2882 }, { "epoch": 0.3305813553491572, "grad_norm": 0.25738716911641596, "learning_rate": 0.0007812397812556015, "loss": 2.4689, "step": 2883 }, { "epoch": 0.3306960210984979, "grad_norm": 0.2816323881872482, "learning_rate": 0.0007810862269326722, "loss": 2.5029, "step": 2884 }, { "epoch": 0.33081068684783854, "grad_norm": 0.2469754387458031, "learning_rate": 0.0007809326338392557, "loss": 2.56, "step": 2885 }, { "epoch": 0.3309253525971792, "grad_norm": 0.286507441268205, "learning_rate": 0.0007807790019965376, "loss": 2.5209, "step": 2886 }, { "epoch": 0.3310400183465199, "grad_norm": 0.26586167785692216, "learning_rate": 0.0007806253314257082, "loss": 2.5099, "step": 2887 }, { "epoch": 0.3311546840958606, "grad_norm": 0.2579354015847745, "learning_rate": 0.0007804716221479637, "loss": 2.6918, "step": 2888 }, { "epoch": 0.33126934984520123, "grad_norm": 0.2467974256258055, "learning_rate": 0.000780317874184505, "loss": 2.5583, "step": 2889 }, { "epoch": 0.3313840155945419, "grad_norm": 0.2485631789784952, "learning_rate": 0.000780164087556539, "loss": 2.5398, "step": 2890 }, { "epoch": 0.3314986813438826, "grad_norm": 0.26586979682912076, "learning_rate": 0.0007800102622852776, "loss": 2.5726, "step": 2891 }, { "epoch": 0.33161334709322327, "grad_norm": 0.24974367660962007, "learning_rate": 0.0007798563983919379, "loss": 2.4688, "step": 2892 }, { "epoch": 0.3317280128425639, "grad_norm": 0.23674130435754995, "learning_rate": 0.0007797024958977425, "loss": 2.6279, "step": 2893 }, { "epoch": 0.3318426785919046, "grad_norm": 0.2600862837713367, "learning_rate": 0.0007795485548239196, "loss": 2.3539, "step": 2894 }, { "epoch": 0.3319573443412453, "grad_norm": 0.26076885681071876, "learning_rate": 0.0007793945751917022, "loss": 2.5097, "step": 2895 }, { "epoch": 0.33207201009058596, "grad_norm": 0.25478438547384136, "learning_rate": 0.0007792405570223289, "loss": 2.4871, "step": 2896 }, { "epoch": 0.3321866758399266, "grad_norm": 0.23152418019005155, "learning_rate": 0.0007790865003370434, "loss": 2.3685, "step": 2897 }, { "epoch": 0.3323013415892673, "grad_norm": 0.250470056201604, "learning_rate": 0.0007789324051570951, "loss": 2.4857, "step": 2898 }, { "epoch": 0.33241600733860793, "grad_norm": 0.25092723731338534, "learning_rate": 0.0007787782715037387, "loss": 2.5326, "step": 2899 }, { "epoch": 0.33253067308794865, "grad_norm": 0.2597737706886953, "learning_rate": 0.0007786240993982335, "loss": 2.5676, "step": 2900 }, { "epoch": 0.3326453388372893, "grad_norm": 0.2785390958471021, "learning_rate": 0.0007784698888618449, "loss": 2.5627, "step": 2901 }, { "epoch": 0.33276000458662996, "grad_norm": 0.2470441938924738, "learning_rate": 0.0007783156399158433, "loss": 2.6115, "step": 2902 }, { "epoch": 0.3328746703359706, "grad_norm": 0.25079928940062823, "learning_rate": 0.0007781613525815043, "loss": 2.4622, "step": 2903 }, { "epoch": 0.33298933608531134, "grad_norm": 0.25997731792901885, "learning_rate": 0.000778007026880109, "loss": 2.544, "step": 2904 }, { "epoch": 0.333104001834652, "grad_norm": 0.26888709544463824, "learning_rate": 0.0007778526628329436, "loss": 2.5282, "step": 2905 }, { "epoch": 0.33321866758399266, "grad_norm": 0.2541356331798265, "learning_rate": 0.0007776982604612996, "loss": 2.6122, "step": 2906 }, { "epoch": 0.3333333333333333, "grad_norm": 0.23375030562616259, "learning_rate": 0.0007775438197864743, "loss": 2.4158, "step": 2907 }, { "epoch": 0.33344799908267403, "grad_norm": 0.24713240742152193, "learning_rate": 0.0007773893408297692, "loss": 2.4595, "step": 2908 }, { "epoch": 0.3335626648320147, "grad_norm": 0.23394362750954667, "learning_rate": 0.0007772348236124922, "loss": 2.5929, "step": 2909 }, { "epoch": 0.33367733058135535, "grad_norm": 0.22579589201461356, "learning_rate": 0.0007770802681559558, "loss": 2.4603, "step": 2910 }, { "epoch": 0.333791996330696, "grad_norm": 0.22074776591913334, "learning_rate": 0.0007769256744814781, "loss": 2.5446, "step": 2911 }, { "epoch": 0.3339066620800367, "grad_norm": 0.2575889827970145, "learning_rate": 0.0007767710426103822, "loss": 2.6346, "step": 2912 }, { "epoch": 0.3340213278293774, "grad_norm": 0.24263932705014443, "learning_rate": 0.0007766163725639967, "loss": 2.5663, "step": 2913 }, { "epoch": 0.33413599357871804, "grad_norm": 0.28500909641827976, "learning_rate": 0.0007764616643636557, "loss": 2.4592, "step": 2914 }, { "epoch": 0.3342506593280587, "grad_norm": 0.276958263549589, "learning_rate": 0.0007763069180306976, "loss": 2.7844, "step": 2915 }, { "epoch": 0.33436532507739936, "grad_norm": 0.26777068898211687, "learning_rate": 0.0007761521335864672, "loss": 2.5052, "step": 2916 }, { "epoch": 0.33447999082674007, "grad_norm": 0.2654534216979825, "learning_rate": 0.0007759973110523137, "loss": 2.5486, "step": 2917 }, { "epoch": 0.33459465657608073, "grad_norm": 0.24357040836165986, "learning_rate": 0.0007758424504495925, "loss": 2.5203, "step": 2918 }, { "epoch": 0.3347093223254214, "grad_norm": 0.2519007125891757, "learning_rate": 0.000775687551799663, "loss": 2.6177, "step": 2919 }, { "epoch": 0.33482398807476205, "grad_norm": 0.24111344799593543, "learning_rate": 0.0007755326151238908, "loss": 2.4844, "step": 2920 }, { "epoch": 0.33493865382410276, "grad_norm": 0.2658739967907742, "learning_rate": 0.0007753776404436466, "loss": 2.7989, "step": 2921 }, { "epoch": 0.3350533195734434, "grad_norm": 0.2755928882664132, "learning_rate": 0.000775222627780306, "loss": 2.6179, "step": 2922 }, { "epoch": 0.3351679853227841, "grad_norm": 0.2689571432359613, "learning_rate": 0.0007750675771552502, "loss": 2.438, "step": 2923 }, { "epoch": 0.33528265107212474, "grad_norm": 0.2789348927192148, "learning_rate": 0.0007749124885898654, "loss": 2.4638, "step": 2924 }, { "epoch": 0.33539731682146545, "grad_norm": 0.26687965480680065, "learning_rate": 0.0007747573621055431, "loss": 2.4063, "step": 2925 }, { "epoch": 0.3355119825708061, "grad_norm": 0.2530200537716329, "learning_rate": 0.00077460219772368, "loss": 2.5039, "step": 2926 }, { "epoch": 0.33562664832014677, "grad_norm": 0.2507589320010678, "learning_rate": 0.0007744469954656781, "loss": 2.4869, "step": 2927 }, { "epoch": 0.33574131406948743, "grad_norm": 0.24559990788066305, "learning_rate": 0.0007742917553529447, "loss": 2.4736, "step": 2928 }, { "epoch": 0.33585597981882814, "grad_norm": 0.2804129707274445, "learning_rate": 0.000774136477406892, "loss": 2.5437, "step": 2929 }, { "epoch": 0.3359706455681688, "grad_norm": 0.2666963945378615, "learning_rate": 0.0007739811616489378, "loss": 2.5724, "step": 2930 }, { "epoch": 0.33608531131750946, "grad_norm": 0.24127313344187443, "learning_rate": 0.0007738258081005049, "loss": 2.5016, "step": 2931 }, { "epoch": 0.3361999770668501, "grad_norm": 0.2540536743753185, "learning_rate": 0.0007736704167830216, "loss": 2.5301, "step": 2932 }, { "epoch": 0.3363146428161908, "grad_norm": 0.258622248942266, "learning_rate": 0.0007735149877179206, "loss": 2.5181, "step": 2933 }, { "epoch": 0.3364293085655315, "grad_norm": 0.27092779076837875, "learning_rate": 0.0007733595209266408, "loss": 2.5372, "step": 2934 }, { "epoch": 0.33654397431487215, "grad_norm": 0.2643764416319443, "learning_rate": 0.000773204016430626, "loss": 2.4051, "step": 2935 }, { "epoch": 0.3366586400642128, "grad_norm": 0.2436467722822796, "learning_rate": 0.0007730484742513247, "loss": 2.5717, "step": 2936 }, { "epoch": 0.33677330581355347, "grad_norm": 0.2691065214152806, "learning_rate": 0.0007728928944101912, "loss": 2.6144, "step": 2937 }, { "epoch": 0.3368879715628942, "grad_norm": 0.26660740097316216, "learning_rate": 0.0007727372769286846, "loss": 2.5468, "step": 2938 }, { "epoch": 0.33700263731223484, "grad_norm": 0.24292099877583428, "learning_rate": 0.0007725816218282697, "loss": 2.665, "step": 2939 }, { "epoch": 0.3371173030615755, "grad_norm": 0.2559916866013264, "learning_rate": 0.0007724259291304159, "loss": 2.6007, "step": 2940 }, { "epoch": 0.33723196881091616, "grad_norm": 0.2658481145495126, "learning_rate": 0.0007722701988565979, "loss": 2.6243, "step": 2941 }, { "epoch": 0.3373466345602569, "grad_norm": 0.24887045515215772, "learning_rate": 0.0007721144310282961, "loss": 2.6149, "step": 2942 }, { "epoch": 0.33746130030959753, "grad_norm": 0.24105938825348663, "learning_rate": 0.0007719586256669955, "loss": 2.3526, "step": 2943 }, { "epoch": 0.3375759660589382, "grad_norm": 0.253880925837504, "learning_rate": 0.0007718027827941865, "loss": 2.4064, "step": 2944 }, { "epoch": 0.33769063180827885, "grad_norm": 0.25307183867774163, "learning_rate": 0.0007716469024313645, "loss": 2.4774, "step": 2945 }, { "epoch": 0.33780529755761957, "grad_norm": 0.25485812836908117, "learning_rate": 0.0007714909846000304, "loss": 2.4907, "step": 2946 }, { "epoch": 0.3379199633069602, "grad_norm": 0.24256073978103967, "learning_rate": 0.0007713350293216903, "loss": 2.5684, "step": 2947 }, { "epoch": 0.3380346290563009, "grad_norm": 0.2663710823480498, "learning_rate": 0.0007711790366178548, "loss": 2.4919, "step": 2948 }, { "epoch": 0.33814929480564154, "grad_norm": 0.24078042823645204, "learning_rate": 0.0007710230065100404, "loss": 2.577, "step": 2949 }, { "epoch": 0.3382639605549822, "grad_norm": 0.27333028735444287, "learning_rate": 0.0007708669390197683, "loss": 2.5028, "step": 2950 }, { "epoch": 0.3383786263043229, "grad_norm": 0.28078291062776034, "learning_rate": 0.0007707108341685654, "loss": 2.4797, "step": 2951 }, { "epoch": 0.3384932920536636, "grad_norm": 0.26571437910235857, "learning_rate": 0.000770554691977963, "loss": 2.4723, "step": 2952 }, { "epoch": 0.33860795780300423, "grad_norm": 0.2595220390992353, "learning_rate": 0.0007703985124694981, "loss": 2.6255, "step": 2953 }, { "epoch": 0.3387226235523449, "grad_norm": 0.2661329018190098, "learning_rate": 0.0007702422956647126, "loss": 2.3811, "step": 2954 }, { "epoch": 0.3388372893016856, "grad_norm": 0.24383355167946313, "learning_rate": 0.0007700860415851538, "loss": 2.4187, "step": 2955 }, { "epoch": 0.33895195505102627, "grad_norm": 0.2549238872091199, "learning_rate": 0.0007699297502523739, "loss": 2.6562, "step": 2956 }, { "epoch": 0.3390666208003669, "grad_norm": 0.25393262292628055, "learning_rate": 0.0007697734216879302, "loss": 2.5924, "step": 2957 }, { "epoch": 0.3391812865497076, "grad_norm": 0.27513140320631274, "learning_rate": 0.0007696170559133853, "loss": 2.5301, "step": 2958 }, { "epoch": 0.3392959522990483, "grad_norm": 0.2555350968067638, "learning_rate": 0.000769460652950307, "loss": 2.562, "step": 2959 }, { "epoch": 0.33941061804838896, "grad_norm": 0.24454674157758655, "learning_rate": 0.0007693042128202679, "loss": 2.5924, "step": 2960 }, { "epoch": 0.3395252837977296, "grad_norm": 0.28580779354578895, "learning_rate": 0.0007691477355448461, "loss": 2.6632, "step": 2961 }, { "epoch": 0.3396399495470703, "grad_norm": 0.24727391212859587, "learning_rate": 0.0007689912211456247, "loss": 2.6048, "step": 2962 }, { "epoch": 0.339754615296411, "grad_norm": 0.22307183281991919, "learning_rate": 0.0007688346696441917, "loss": 2.6055, "step": 2963 }, { "epoch": 0.33986928104575165, "grad_norm": 0.2654186989873798, "learning_rate": 0.0007686780810621406, "loss": 2.5193, "step": 2964 }, { "epoch": 0.3399839467950923, "grad_norm": 0.25401760459979744, "learning_rate": 0.0007685214554210693, "loss": 2.5376, "step": 2965 }, { "epoch": 0.34009861254443297, "grad_norm": 0.24695574170682183, "learning_rate": 0.0007683647927425821, "loss": 2.5818, "step": 2966 }, { "epoch": 0.3402132782937736, "grad_norm": 0.2475300189385152, "learning_rate": 0.0007682080930482871, "loss": 2.5006, "step": 2967 }, { "epoch": 0.34032794404311434, "grad_norm": 0.2500474595151521, "learning_rate": 0.0007680513563597982, "loss": 2.5712, "step": 2968 }, { "epoch": 0.340442609792455, "grad_norm": 0.26243643607979755, "learning_rate": 0.0007678945826987343, "loss": 2.5314, "step": 2969 }, { "epoch": 0.34055727554179566, "grad_norm": 0.2450371235674348, "learning_rate": 0.0007677377720867189, "loss": 2.5319, "step": 2970 }, { "epoch": 0.3406719412911363, "grad_norm": 0.2562268478490193, "learning_rate": 0.0007675809245453818, "loss": 2.5706, "step": 2971 }, { "epoch": 0.34078660704047703, "grad_norm": 0.22696883125296485, "learning_rate": 0.0007674240400963566, "loss": 2.593, "step": 2972 }, { "epoch": 0.3409012727898177, "grad_norm": 0.2519680268261953, "learning_rate": 0.0007672671187612826, "loss": 2.5785, "step": 2973 }, { "epoch": 0.34101593853915835, "grad_norm": 0.2871220551382858, "learning_rate": 0.0007671101605618041, "loss": 2.4925, "step": 2974 }, { "epoch": 0.341130604288499, "grad_norm": 0.27811607011491035, "learning_rate": 0.0007669531655195705, "loss": 2.642, "step": 2975 }, { "epoch": 0.3412452700378397, "grad_norm": 0.265498662506417, "learning_rate": 0.0007667961336562364, "loss": 2.5985, "step": 2976 }, { "epoch": 0.3413599357871804, "grad_norm": 0.24148283234660736, "learning_rate": 0.0007666390649934612, "loss": 2.5031, "step": 2977 }, { "epoch": 0.34147460153652104, "grad_norm": 0.2663542440394786, "learning_rate": 0.0007664819595529095, "loss": 2.5747, "step": 2978 }, { "epoch": 0.3415892672858617, "grad_norm": 0.24736492781882619, "learning_rate": 0.0007663248173562513, "loss": 2.4586, "step": 2979 }, { "epoch": 0.3417039330352024, "grad_norm": 0.3000220046303528, "learning_rate": 0.000766167638425161, "loss": 2.6917, "step": 2980 }, { "epoch": 0.34181859878454307, "grad_norm": 0.2514162263534708, "learning_rate": 0.0007660104227813188, "loss": 2.5457, "step": 2981 }, { "epoch": 0.34193326453388373, "grad_norm": 0.24549564078531316, "learning_rate": 0.0007658531704464092, "loss": 2.4773, "step": 2982 }, { "epoch": 0.3420479302832244, "grad_norm": 0.25615240120524024, "learning_rate": 0.0007656958814421225, "loss": 2.4922, "step": 2983 }, { "epoch": 0.34216259603256505, "grad_norm": 0.38849471317410544, "learning_rate": 0.0007655385557901534, "loss": 2.4524, "step": 2984 }, { "epoch": 0.34227726178190576, "grad_norm": 0.2664877711008682, "learning_rate": 0.0007653811935122022, "loss": 2.3778, "step": 2985 }, { "epoch": 0.3423919275312464, "grad_norm": 0.2757237160329828, "learning_rate": 0.0007652237946299741, "loss": 2.4821, "step": 2986 }, { "epoch": 0.3425065932805871, "grad_norm": 0.22540035763140626, "learning_rate": 0.000765066359165179, "loss": 2.3811, "step": 2987 }, { "epoch": 0.34262125902992774, "grad_norm": 0.24671826215786202, "learning_rate": 0.0007649088871395324, "loss": 2.5621, "step": 2988 }, { "epoch": 0.34273592477926845, "grad_norm": 0.2573451800082805, "learning_rate": 0.0007647513785747545, "loss": 2.4763, "step": 2989 }, { "epoch": 0.3428505905286091, "grad_norm": 0.2624394179226302, "learning_rate": 0.0007645938334925704, "loss": 2.6558, "step": 2990 }, { "epoch": 0.34296525627794977, "grad_norm": 0.2644386131436197, "learning_rate": 0.0007644362519147106, "loss": 2.6265, "step": 2991 }, { "epoch": 0.34307992202729043, "grad_norm": 0.25490853028229826, "learning_rate": 0.0007642786338629106, "loss": 2.4722, "step": 2992 }, { "epoch": 0.34319458777663114, "grad_norm": 0.2752647679231488, "learning_rate": 0.0007641209793589105, "loss": 2.4972, "step": 2993 }, { "epoch": 0.3433092535259718, "grad_norm": 0.30573805652931685, "learning_rate": 0.0007639632884244561, "loss": 2.5842, "step": 2994 }, { "epoch": 0.34342391927531246, "grad_norm": 0.28279249326815376, "learning_rate": 0.0007638055610812974, "loss": 2.564, "step": 2995 }, { "epoch": 0.3435385850246531, "grad_norm": 0.25922334773156375, "learning_rate": 0.0007636477973511903, "loss": 2.5169, "step": 2996 }, { "epoch": 0.34365325077399383, "grad_norm": 0.24460716789736825, "learning_rate": 0.0007634899972558951, "loss": 2.4617, "step": 2997 }, { "epoch": 0.3437679165233345, "grad_norm": 0.25940833264309615, "learning_rate": 0.0007633321608171774, "loss": 2.519, "step": 2998 }, { "epoch": 0.34388258227267515, "grad_norm": 0.2280352684905453, "learning_rate": 0.0007631742880568075, "loss": 2.6092, "step": 2999 }, { "epoch": 0.3439972480220158, "grad_norm": 0.26456777333686393, "learning_rate": 0.0007630163789965614, "loss": 2.5571, "step": 3000 }, { "epoch": 0.34411191377135647, "grad_norm": 0.24853708531929045, "learning_rate": 0.0007628584336582192, "loss": 2.6016, "step": 3001 }, { "epoch": 0.3442265795206972, "grad_norm": 0.2724459120312537, "learning_rate": 0.0007627004520635666, "loss": 2.5635, "step": 3002 }, { "epoch": 0.34434124527003784, "grad_norm": 0.250540945215181, "learning_rate": 0.0007625424342343943, "loss": 2.5375, "step": 3003 }, { "epoch": 0.3444559110193785, "grad_norm": 0.25383084293124786, "learning_rate": 0.0007623843801924977, "loss": 2.4034, "step": 3004 }, { "epoch": 0.34457057676871916, "grad_norm": 0.247241570571851, "learning_rate": 0.0007622262899596772, "loss": 2.5627, "step": 3005 }, { "epoch": 0.3446852425180599, "grad_norm": 0.2518870979872506, "learning_rate": 0.0007620681635577386, "loss": 2.7534, "step": 3006 }, { "epoch": 0.34479990826740053, "grad_norm": 0.31003686086764876, "learning_rate": 0.0007619100010084923, "loss": 2.5854, "step": 3007 }, { "epoch": 0.3449145740167412, "grad_norm": 0.25970709221011673, "learning_rate": 0.0007617518023337538, "loss": 2.6026, "step": 3008 }, { "epoch": 0.34502923976608185, "grad_norm": 0.2528730828336293, "learning_rate": 0.0007615935675553436, "loss": 2.5886, "step": 3009 }, { "epoch": 0.34514390551542257, "grad_norm": 0.25850658491346484, "learning_rate": 0.0007614352966950871, "loss": 2.3926, "step": 3010 }, { "epoch": 0.3452585712647632, "grad_norm": 0.24742109870443613, "learning_rate": 0.000761276989774815, "loss": 2.4262, "step": 3011 }, { "epoch": 0.3453732370141039, "grad_norm": 0.26757324110838554, "learning_rate": 0.0007611186468163625, "loss": 2.4976, "step": 3012 }, { "epoch": 0.34548790276344454, "grad_norm": 0.27267669632638913, "learning_rate": 0.0007609602678415699, "loss": 2.5956, "step": 3013 }, { "epoch": 0.34560256851278526, "grad_norm": 0.24636746760184622, "learning_rate": 0.0007608018528722829, "loss": 2.5059, "step": 3014 }, { "epoch": 0.3457172342621259, "grad_norm": 0.25430587581224134, "learning_rate": 0.0007606434019303514, "loss": 2.5686, "step": 3015 }, { "epoch": 0.3458319000114666, "grad_norm": 0.2465568613738964, "learning_rate": 0.0007604849150376311, "loss": 2.5116, "step": 3016 }, { "epoch": 0.34594656576080723, "grad_norm": 0.24350998852489353, "learning_rate": 0.0007603263922159822, "loss": 2.5396, "step": 3017 }, { "epoch": 0.3460612315101479, "grad_norm": 0.2601429319251723, "learning_rate": 0.0007601678334872695, "loss": 2.4812, "step": 3018 }, { "epoch": 0.3461758972594886, "grad_norm": 0.2509550834724402, "learning_rate": 0.0007600092388733635, "loss": 2.5338, "step": 3019 }, { "epoch": 0.34629056300882927, "grad_norm": 0.24654957398410943, "learning_rate": 0.0007598506083961394, "loss": 2.5427, "step": 3020 }, { "epoch": 0.3464052287581699, "grad_norm": 0.26482297319793857, "learning_rate": 0.0007596919420774768, "loss": 2.4525, "step": 3021 }, { "epoch": 0.3465198945075106, "grad_norm": 0.23488542310921207, "learning_rate": 0.0007595332399392611, "loss": 2.6023, "step": 3022 }, { "epoch": 0.3466345602568513, "grad_norm": 0.271611728371233, "learning_rate": 0.0007593745020033822, "loss": 2.427, "step": 3023 }, { "epoch": 0.34674922600619196, "grad_norm": 0.26388422875120565, "learning_rate": 0.0007592157282917347, "loss": 2.6405, "step": 3024 }, { "epoch": 0.3468638917555326, "grad_norm": 0.2551837231129936, "learning_rate": 0.0007590569188262186, "loss": 2.4518, "step": 3025 }, { "epoch": 0.3469785575048733, "grad_norm": 0.23604713964510807, "learning_rate": 0.0007588980736287389, "loss": 2.5128, "step": 3026 }, { "epoch": 0.347093223254214, "grad_norm": 0.25082333103527926, "learning_rate": 0.0007587391927212046, "loss": 2.6474, "step": 3027 }, { "epoch": 0.34720788900355465, "grad_norm": 0.2599287492152934, "learning_rate": 0.0007585802761255309, "loss": 2.483, "step": 3028 }, { "epoch": 0.3473225547528953, "grad_norm": 0.24360808791440647, "learning_rate": 0.0007584213238636372, "loss": 2.5432, "step": 3029 }, { "epoch": 0.34743722050223597, "grad_norm": 0.2581195754027987, "learning_rate": 0.0007582623359574476, "loss": 2.6509, "step": 3030 }, { "epoch": 0.3475518862515767, "grad_norm": 0.24709694268970234, "learning_rate": 0.0007581033124288918, "loss": 2.4277, "step": 3031 }, { "epoch": 0.34766655200091734, "grad_norm": 0.24540447347796454, "learning_rate": 0.0007579442532999039, "loss": 2.666, "step": 3032 }, { "epoch": 0.347781217750258, "grad_norm": 0.2384026030227758, "learning_rate": 0.0007577851585924231, "loss": 2.5315, "step": 3033 }, { "epoch": 0.34789588349959866, "grad_norm": 0.2604710275234728, "learning_rate": 0.0007576260283283935, "loss": 2.6304, "step": 3034 }, { "epoch": 0.3480105492489393, "grad_norm": 0.28006978530364646, "learning_rate": 0.0007574668625297641, "loss": 2.5378, "step": 3035 }, { "epoch": 0.34812521499828003, "grad_norm": 0.24665911761772866, "learning_rate": 0.000757307661218489, "loss": 2.4881, "step": 3036 }, { "epoch": 0.3482398807476207, "grad_norm": 0.2586897352334971, "learning_rate": 0.0007571484244165266, "loss": 2.5092, "step": 3037 }, { "epoch": 0.34835454649696135, "grad_norm": 0.25338860279966463, "learning_rate": 0.0007569891521458405, "loss": 2.539, "step": 3038 }, { "epoch": 0.348469212246302, "grad_norm": 0.238887311418351, "learning_rate": 0.0007568298444283999, "loss": 2.5425, "step": 3039 }, { "epoch": 0.3485838779956427, "grad_norm": 0.2710474392251138, "learning_rate": 0.0007566705012861777, "loss": 2.5659, "step": 3040 }, { "epoch": 0.3486985437449834, "grad_norm": 0.2689009419089519, "learning_rate": 0.0007565111227411524, "loss": 2.5482, "step": 3041 }, { "epoch": 0.34881320949432404, "grad_norm": 0.25264475299225364, "learning_rate": 0.0007563517088153074, "loss": 2.5167, "step": 3042 }, { "epoch": 0.3489278752436647, "grad_norm": 0.2722166270629138, "learning_rate": 0.0007561922595306305, "loss": 2.4338, "step": 3043 }, { "epoch": 0.3490425409930054, "grad_norm": 0.24868386124446248, "learning_rate": 0.000756032774909115, "loss": 2.65, "step": 3044 }, { "epoch": 0.34915720674234607, "grad_norm": 0.2548102050097299, "learning_rate": 0.0007558732549727586, "loss": 2.6326, "step": 3045 }, { "epoch": 0.34927187249168673, "grad_norm": 0.23867235509564047, "learning_rate": 0.0007557136997435641, "loss": 2.6259, "step": 3046 }, { "epoch": 0.3493865382410274, "grad_norm": 0.24935824921190605, "learning_rate": 0.000755554109243539, "loss": 2.4261, "step": 3047 }, { "epoch": 0.3495012039903681, "grad_norm": 0.294644462224729, "learning_rate": 0.0007553944834946958, "loss": 2.5452, "step": 3048 }, { "epoch": 0.34961586973970876, "grad_norm": 0.26575038988103444, "learning_rate": 0.0007552348225190519, "loss": 2.5476, "step": 3049 }, { "epoch": 0.3497305354890494, "grad_norm": 0.2497505957206534, "learning_rate": 0.0007550751263386295, "loss": 2.4672, "step": 3050 }, { "epoch": 0.3498452012383901, "grad_norm": 0.2925600416503518, "learning_rate": 0.0007549153949754557, "loss": 2.4983, "step": 3051 }, { "epoch": 0.34995986698773074, "grad_norm": 0.28910989034936235, "learning_rate": 0.0007547556284515621, "loss": 2.425, "step": 3052 }, { "epoch": 0.35007453273707145, "grad_norm": 0.24001250617812137, "learning_rate": 0.0007545958267889856, "loss": 2.612, "step": 3053 }, { "epoch": 0.3501891984864121, "grad_norm": 0.2482439816866991, "learning_rate": 0.0007544359900097681, "loss": 2.5203, "step": 3054 }, { "epoch": 0.35030386423575277, "grad_norm": 0.2735427601812003, "learning_rate": 0.0007542761181359556, "loss": 2.524, "step": 3055 }, { "epoch": 0.35041852998509343, "grad_norm": 0.24853383551240452, "learning_rate": 0.0007541162111895994, "loss": 2.5842, "step": 3056 }, { "epoch": 0.35053319573443414, "grad_norm": 0.22286100485411198, "learning_rate": 0.000753956269192756, "loss": 2.4441, "step": 3057 }, { "epoch": 0.3506478614837748, "grad_norm": 0.2541443833706939, "learning_rate": 0.0007537962921674861, "loss": 2.572, "step": 3058 }, { "epoch": 0.35076252723311546, "grad_norm": 0.24377239883658305, "learning_rate": 0.0007536362801358554, "loss": 2.5892, "step": 3059 }, { "epoch": 0.3508771929824561, "grad_norm": 0.26127626455922626, "learning_rate": 0.0007534762331199345, "loss": 2.6009, "step": 3060 }, { "epoch": 0.35099185873179684, "grad_norm": 0.2756143796026638, "learning_rate": 0.0007533161511417992, "loss": 2.549, "step": 3061 }, { "epoch": 0.3511065244811375, "grad_norm": 0.21981769867096165, "learning_rate": 0.0007531560342235293, "loss": 2.4849, "step": 3062 }, { "epoch": 0.35122119023047815, "grad_norm": 0.2553058340087511, "learning_rate": 0.0007529958823872101, "loss": 2.5306, "step": 3063 }, { "epoch": 0.3513358559798188, "grad_norm": 0.23016603320069604, "learning_rate": 0.0007528356956549316, "loss": 2.4898, "step": 3064 }, { "epoch": 0.3514505217291595, "grad_norm": 0.2512041926921274, "learning_rate": 0.0007526754740487881, "loss": 2.5357, "step": 3065 }, { "epoch": 0.3515651874785002, "grad_norm": 0.26328016476043703, "learning_rate": 0.0007525152175908796, "loss": 2.4987, "step": 3066 }, { "epoch": 0.35167985322784084, "grad_norm": 0.26189030475327185, "learning_rate": 0.0007523549263033103, "loss": 2.5318, "step": 3067 }, { "epoch": 0.3517945189771815, "grad_norm": 0.2583804657276663, "learning_rate": 0.000752194600208189, "loss": 2.5268, "step": 3068 }, { "epoch": 0.35190918472652216, "grad_norm": 0.27073690091360375, "learning_rate": 0.00075203423932763, "loss": 2.3489, "step": 3069 }, { "epoch": 0.3520238504758629, "grad_norm": 0.25978555372581147, "learning_rate": 0.0007518738436837518, "loss": 2.5029, "step": 3070 }, { "epoch": 0.35213851622520354, "grad_norm": 0.2560339117060919, "learning_rate": 0.000751713413298678, "loss": 2.6239, "step": 3071 }, { "epoch": 0.3522531819745442, "grad_norm": 0.2606779990326901, "learning_rate": 0.0007515529481945372, "loss": 2.5985, "step": 3072 }, { "epoch": 0.35236784772388485, "grad_norm": 0.23552578199006022, "learning_rate": 0.000751392448393462, "loss": 2.551, "step": 3073 }, { "epoch": 0.35248251347322557, "grad_norm": 0.2272784036750427, "learning_rate": 0.0007512319139175905, "loss": 2.367, "step": 3074 }, { "epoch": 0.3525971792225662, "grad_norm": 0.2649427614841162, "learning_rate": 0.0007510713447890653, "loss": 2.5089, "step": 3075 }, { "epoch": 0.3527118449719069, "grad_norm": 0.2462458394809935, "learning_rate": 0.0007509107410300342, "loss": 2.5211, "step": 3076 }, { "epoch": 0.35282651072124754, "grad_norm": 0.2718851883736064, "learning_rate": 0.0007507501026626491, "loss": 2.3798, "step": 3077 }, { "epoch": 0.35294117647058826, "grad_norm": 0.244551511956551, "learning_rate": 0.000750589429709067, "loss": 2.459, "step": 3078 }, { "epoch": 0.3530558422199289, "grad_norm": 0.24192850420680795, "learning_rate": 0.0007504287221914499, "loss": 2.5986, "step": 3079 }, { "epoch": 0.3531705079692696, "grad_norm": 0.22681378685831374, "learning_rate": 0.0007502679801319641, "loss": 2.5195, "step": 3080 }, { "epoch": 0.35328517371861023, "grad_norm": 0.26785347958701206, "learning_rate": 0.0007501072035527807, "loss": 2.6528, "step": 3081 }, { "epoch": 0.35339983946795095, "grad_norm": 0.2433764983441952, "learning_rate": 0.0007499463924760764, "loss": 2.5756, "step": 3082 }, { "epoch": 0.3535145052172916, "grad_norm": 0.24064123909052318, "learning_rate": 0.0007497855469240316, "loss": 2.6152, "step": 3083 }, { "epoch": 0.35362917096663227, "grad_norm": 0.24396816597769877, "learning_rate": 0.0007496246669188319, "loss": 2.5216, "step": 3084 }, { "epoch": 0.3537438367159729, "grad_norm": 0.24642070592575777, "learning_rate": 0.0007494637524826677, "loss": 2.4653, "step": 3085 }, { "epoch": 0.3538585024653136, "grad_norm": 0.23056413802132453, "learning_rate": 0.000749302803637734, "loss": 2.569, "step": 3086 }, { "epoch": 0.3539731682146543, "grad_norm": 0.2718753377162669, "learning_rate": 0.0007491418204062307, "loss": 2.6735, "step": 3087 }, { "epoch": 0.35408783396399496, "grad_norm": 0.23621785272136475, "learning_rate": 0.0007489808028103622, "loss": 2.4658, "step": 3088 }, { "epoch": 0.3542024997133356, "grad_norm": 0.2441662715414475, "learning_rate": 0.0007488197508723383, "loss": 2.4597, "step": 3089 }, { "epoch": 0.3543171654626763, "grad_norm": 0.24450045183394317, "learning_rate": 0.0007486586646143725, "loss": 2.5648, "step": 3090 }, { "epoch": 0.354431831212017, "grad_norm": 0.26829804338749536, "learning_rate": 0.0007484975440586838, "loss": 2.535, "step": 3091 }, { "epoch": 0.35454649696135765, "grad_norm": 0.26462531583581067, "learning_rate": 0.0007483363892274958, "loss": 2.5631, "step": 3092 }, { "epoch": 0.3546611627106983, "grad_norm": 0.2620082907421261, "learning_rate": 0.0007481752001430364, "loss": 2.6509, "step": 3093 }, { "epoch": 0.35477582846003897, "grad_norm": 0.25095729846041204, "learning_rate": 0.000748013976827539, "loss": 2.5392, "step": 3094 }, { "epoch": 0.3548904942093797, "grad_norm": 0.24126590301942513, "learning_rate": 0.0007478527193032409, "loss": 2.6683, "step": 3095 }, { "epoch": 0.35500515995872034, "grad_norm": 0.27080000600568355, "learning_rate": 0.0007476914275923848, "loss": 2.5882, "step": 3096 }, { "epoch": 0.355119825708061, "grad_norm": 0.2567890732043505, "learning_rate": 0.0007475301017172177, "loss": 2.605, "step": 3097 }, { "epoch": 0.35523449145740166, "grad_norm": 0.2701327770313484, "learning_rate": 0.0007473687416999913, "loss": 2.6006, "step": 3098 }, { "epoch": 0.35534915720674237, "grad_norm": 0.25779980883817855, "learning_rate": 0.0007472073475629624, "loss": 2.5761, "step": 3099 }, { "epoch": 0.35546382295608303, "grad_norm": 0.26403933252099265, "learning_rate": 0.000747045919328392, "loss": 2.4724, "step": 3100 }, { "epoch": 0.3555784887054237, "grad_norm": 0.2335864184272574, "learning_rate": 0.0007468844570185462, "loss": 2.5974, "step": 3101 }, { "epoch": 0.35569315445476435, "grad_norm": 0.24983549330983407, "learning_rate": 0.0007467229606556955, "loss": 2.5246, "step": 3102 }, { "epoch": 0.355807820204105, "grad_norm": 0.28539044203481995, "learning_rate": 0.0007465614302621155, "loss": 2.7229, "step": 3103 }, { "epoch": 0.3559224859534457, "grad_norm": 0.23687183625235972, "learning_rate": 0.0007463998658600861, "loss": 2.5572, "step": 3104 }, { "epoch": 0.3560371517027864, "grad_norm": 0.26135377648656194, "learning_rate": 0.0007462382674718919, "loss": 2.5261, "step": 3105 }, { "epoch": 0.35615181745212704, "grad_norm": 0.2312464893201061, "learning_rate": 0.0007460766351198225, "loss": 2.5296, "step": 3106 }, { "epoch": 0.3562664832014677, "grad_norm": 0.2602927730487249, "learning_rate": 0.0007459149688261719, "loss": 2.4481, "step": 3107 }, { "epoch": 0.3563811489508084, "grad_norm": 0.2242617653451122, "learning_rate": 0.0007457532686132389, "loss": 2.5021, "step": 3108 }, { "epoch": 0.35649581470014907, "grad_norm": 0.2542063379751769, "learning_rate": 0.0007455915345033271, "loss": 2.5574, "step": 3109 }, { "epoch": 0.35661048044948973, "grad_norm": 0.2795169365549323, "learning_rate": 0.0007454297665187442, "loss": 2.5236, "step": 3110 }, { "epoch": 0.3567251461988304, "grad_norm": 0.25933121948296556, "learning_rate": 0.0007452679646818037, "loss": 2.5367, "step": 3111 }, { "epoch": 0.3568398119481711, "grad_norm": 0.23722468774965372, "learning_rate": 0.0007451061290148224, "loss": 2.3433, "step": 3112 }, { "epoch": 0.35695447769751176, "grad_norm": 0.24217836537842158, "learning_rate": 0.000744944259540123, "loss": 2.51, "step": 3113 }, { "epoch": 0.3570691434468524, "grad_norm": 0.25039696908830755, "learning_rate": 0.000744782356280032, "loss": 2.4594, "step": 3114 }, { "epoch": 0.3571838091961931, "grad_norm": 0.2428749110976045, "learning_rate": 0.0007446204192568807, "loss": 2.5136, "step": 3115 }, { "epoch": 0.3572984749455338, "grad_norm": 0.2488758501385027, "learning_rate": 0.0007444584484930057, "loss": 2.482, "step": 3116 }, { "epoch": 0.35741314069487445, "grad_norm": 0.261497838893265, "learning_rate": 0.0007442964440107476, "loss": 2.5315, "step": 3117 }, { "epoch": 0.3575278064442151, "grad_norm": 0.26693339938227467, "learning_rate": 0.0007441344058324515, "loss": 2.4584, "step": 3118 }, { "epoch": 0.35764247219355577, "grad_norm": 0.2561420382489682, "learning_rate": 0.0007439723339804679, "loss": 2.6031, "step": 3119 }, { "epoch": 0.35775713794289643, "grad_norm": 0.2562933807935695, "learning_rate": 0.0007438102284771513, "loss": 2.5643, "step": 3120 }, { "epoch": 0.35787180369223714, "grad_norm": 0.2536944027439819, "learning_rate": 0.0007436480893448611, "loss": 2.3698, "step": 3121 }, { "epoch": 0.3579864694415778, "grad_norm": 0.2786183871586788, "learning_rate": 0.0007434859166059616, "loss": 2.5176, "step": 3122 }, { "epoch": 0.35810113519091846, "grad_norm": 0.27828097414799635, "learning_rate": 0.0007433237102828209, "loss": 2.4585, "step": 3123 }, { "epoch": 0.3582158009402591, "grad_norm": 0.2772068944608024, "learning_rate": 0.0007431614703978125, "loss": 2.5602, "step": 3124 }, { "epoch": 0.35833046668959984, "grad_norm": 0.23336193353175308, "learning_rate": 0.0007429991969733144, "loss": 2.6417, "step": 3125 }, { "epoch": 0.3584451324389405, "grad_norm": 0.2511228245756997, "learning_rate": 0.0007428368900317092, "loss": 2.4885, "step": 3126 }, { "epoch": 0.35855979818828115, "grad_norm": 0.2583519132978108, "learning_rate": 0.0007426745495953838, "loss": 2.6091, "step": 3127 }, { "epoch": 0.3586744639376218, "grad_norm": 0.2524640976963248, "learning_rate": 0.00074251217568673, "loss": 2.5446, "step": 3128 }, { "epoch": 0.3587891296869625, "grad_norm": 0.23365453582426485, "learning_rate": 0.0007423497683281444, "loss": 2.4352, "step": 3129 }, { "epoch": 0.3589037954363032, "grad_norm": 0.25032972853542, "learning_rate": 0.0007421873275420277, "loss": 2.5963, "step": 3130 }, { "epoch": 0.35901846118564384, "grad_norm": 0.26103965283631186, "learning_rate": 0.0007420248533507858, "loss": 2.6379, "step": 3131 }, { "epoch": 0.3591331269349845, "grad_norm": 0.291924118554959, "learning_rate": 0.000741862345776829, "loss": 2.521, "step": 3132 }, { "epoch": 0.3592477926843252, "grad_norm": 0.2714307171327002, "learning_rate": 0.0007416998048425716, "loss": 2.5741, "step": 3133 }, { "epoch": 0.3593624584336659, "grad_norm": 0.25127692416718456, "learning_rate": 0.0007415372305704334, "loss": 2.4767, "step": 3134 }, { "epoch": 0.35947712418300654, "grad_norm": 0.271734111623691, "learning_rate": 0.0007413746229828384, "loss": 2.5986, "step": 3135 }, { "epoch": 0.3595917899323472, "grad_norm": 0.2533711797925703, "learning_rate": 0.0007412119821022153, "loss": 2.4182, "step": 3136 }, { "epoch": 0.35970645568168785, "grad_norm": 0.24779051516424047, "learning_rate": 0.000741049307950997, "loss": 2.5297, "step": 3137 }, { "epoch": 0.35982112143102857, "grad_norm": 0.270192011798646, "learning_rate": 0.0007408866005516215, "loss": 2.6237, "step": 3138 }, { "epoch": 0.3599357871803692, "grad_norm": 0.2406907088169097, "learning_rate": 0.0007407238599265313, "loss": 2.3516, "step": 3139 }, { "epoch": 0.3600504529297099, "grad_norm": 0.2677697900180917, "learning_rate": 0.0007405610860981731, "loss": 2.6636, "step": 3140 }, { "epoch": 0.36016511867905054, "grad_norm": 0.26232909617419553, "learning_rate": 0.0007403982790889987, "loss": 2.4711, "step": 3141 }, { "epoch": 0.36027978442839126, "grad_norm": 0.26266834866067457, "learning_rate": 0.0007402354389214642, "loss": 2.562, "step": 3142 }, { "epoch": 0.3603944501777319, "grad_norm": 0.2617115661948789, "learning_rate": 0.0007400725656180298, "loss": 2.518, "step": 3143 }, { "epoch": 0.3605091159270726, "grad_norm": 0.24209658973999842, "learning_rate": 0.0007399096592011616, "loss": 2.5176, "step": 3144 }, { "epoch": 0.36062378167641324, "grad_norm": 0.24993664326035045, "learning_rate": 0.0007397467196933287, "loss": 2.5033, "step": 3145 }, { "epoch": 0.36073844742575395, "grad_norm": 0.24526713087173183, "learning_rate": 0.000739583747117006, "loss": 2.5369, "step": 3146 }, { "epoch": 0.3608531131750946, "grad_norm": 0.22896931674997084, "learning_rate": 0.0007394207414946723, "loss": 2.4551, "step": 3147 }, { "epoch": 0.36096777892443527, "grad_norm": 0.2774416856452001, "learning_rate": 0.0007392577028488109, "loss": 2.4096, "step": 3148 }, { "epoch": 0.3610824446737759, "grad_norm": 0.24102181101106812, "learning_rate": 0.0007390946312019102, "loss": 2.5575, "step": 3149 }, { "epoch": 0.36119711042311664, "grad_norm": 0.242008286580097, "learning_rate": 0.0007389315265764626, "loss": 2.5322, "step": 3150 }, { "epoch": 0.3613117761724573, "grad_norm": 0.25490807445818875, "learning_rate": 0.0007387683889949655, "loss": 2.5105, "step": 3151 }, { "epoch": 0.36142644192179796, "grad_norm": 0.22616711936224826, "learning_rate": 0.0007386052184799204, "loss": 2.4694, "step": 3152 }, { "epoch": 0.3615411076711386, "grad_norm": 0.261212111431264, "learning_rate": 0.0007384420150538336, "loss": 2.5564, "step": 3153 }, { "epoch": 0.3616557734204793, "grad_norm": 0.2339104101510996, "learning_rate": 0.000738278778739216, "loss": 2.5533, "step": 3154 }, { "epoch": 0.36177043916982, "grad_norm": 0.23432215359359915, "learning_rate": 0.0007381155095585827, "loss": 2.5362, "step": 3155 }, { "epoch": 0.36188510491916065, "grad_norm": 0.24958540801299817, "learning_rate": 0.000737952207534454, "loss": 2.5934, "step": 3156 }, { "epoch": 0.3619997706685013, "grad_norm": 0.2586432324025952, "learning_rate": 0.000737788872689354, "loss": 2.525, "step": 3157 }, { "epoch": 0.36211443641784197, "grad_norm": 0.25110139790699343, "learning_rate": 0.0007376255050458116, "loss": 2.5845, "step": 3158 }, { "epoch": 0.3622291021671827, "grad_norm": 0.23297146131186758, "learning_rate": 0.0007374621046263603, "loss": 2.4288, "step": 3159 }, { "epoch": 0.36234376791652334, "grad_norm": 0.2413993292073154, "learning_rate": 0.0007372986714535381, "loss": 2.5158, "step": 3160 }, { "epoch": 0.362458433665864, "grad_norm": 0.26260594317519703, "learning_rate": 0.0007371352055498876, "loss": 2.5239, "step": 3161 }, { "epoch": 0.36257309941520466, "grad_norm": 0.26102502591721705, "learning_rate": 0.0007369717069379558, "loss": 2.4975, "step": 3162 }, { "epoch": 0.3626877651645454, "grad_norm": 0.2636504912807116, "learning_rate": 0.0007368081756402939, "loss": 2.6207, "step": 3163 }, { "epoch": 0.36280243091388603, "grad_norm": 0.25344038533017216, "learning_rate": 0.0007366446116794583, "loss": 2.6166, "step": 3164 }, { "epoch": 0.3629170966632267, "grad_norm": 0.2525686570130536, "learning_rate": 0.0007364810150780091, "loss": 2.5102, "step": 3165 }, { "epoch": 0.36303176241256735, "grad_norm": 0.25523542114331427, "learning_rate": 0.0007363173858585119, "loss": 2.6065, "step": 3166 }, { "epoch": 0.36314642816190806, "grad_norm": 0.22510147332865776, "learning_rate": 0.000736153724043536, "loss": 2.5973, "step": 3167 }, { "epoch": 0.3632610939112487, "grad_norm": 0.2705255182844913, "learning_rate": 0.000735990029655655, "loss": 2.4549, "step": 3168 }, { "epoch": 0.3633757596605894, "grad_norm": 0.263845768160062, "learning_rate": 0.0007358263027174481, "loss": 2.4938, "step": 3169 }, { "epoch": 0.36349042540993004, "grad_norm": 0.22700588945144862, "learning_rate": 0.0007356625432514979, "loss": 2.4462, "step": 3170 }, { "epoch": 0.3636050911592707, "grad_norm": 0.23398545061393705, "learning_rate": 0.000735498751280392, "loss": 2.4971, "step": 3171 }, { "epoch": 0.3637197569086114, "grad_norm": 0.24765427397815926, "learning_rate": 0.0007353349268267224, "loss": 2.547, "step": 3172 }, { "epoch": 0.3638344226579521, "grad_norm": 0.25692269905414156, "learning_rate": 0.0007351710699130856, "loss": 2.6207, "step": 3173 }, { "epoch": 0.36394908840729273, "grad_norm": 0.26636382433693206, "learning_rate": 0.0007350071805620823, "loss": 2.5768, "step": 3174 }, { "epoch": 0.3640637541566334, "grad_norm": 0.24393471039972586, "learning_rate": 0.000734843258796318, "loss": 2.5278, "step": 3175 }, { "epoch": 0.3641784199059741, "grad_norm": 0.2575181026348928, "learning_rate": 0.0007346793046384031, "loss": 2.4372, "step": 3176 }, { "epoch": 0.36429308565531476, "grad_norm": 0.2566293234573205, "learning_rate": 0.0007345153181109511, "loss": 2.5635, "step": 3177 }, { "epoch": 0.3644077514046554, "grad_norm": 0.2632488479659653, "learning_rate": 0.0007343512992365815, "loss": 2.5153, "step": 3178 }, { "epoch": 0.3645224171539961, "grad_norm": 0.2632026448900477, "learning_rate": 0.0007341872480379172, "loss": 2.5599, "step": 3179 }, { "epoch": 0.3646370829033368, "grad_norm": 0.2502691935441752, "learning_rate": 0.0007340231645375861, "loss": 2.4435, "step": 3180 }, { "epoch": 0.36475174865267745, "grad_norm": 0.2722110248376904, "learning_rate": 0.0007338590487582202, "loss": 2.5919, "step": 3181 }, { "epoch": 0.3648664144020181, "grad_norm": 0.2554796875525832, "learning_rate": 0.0007336949007224565, "loss": 2.5571, "step": 3182 }, { "epoch": 0.36498108015135877, "grad_norm": 0.22032470629196668, "learning_rate": 0.0007335307204529356, "loss": 2.5323, "step": 3183 }, { "epoch": 0.3650957459006995, "grad_norm": 0.24047608808170987, "learning_rate": 0.0007333665079723035, "loss": 2.5185, "step": 3184 }, { "epoch": 0.36521041165004015, "grad_norm": 0.26194170074787787, "learning_rate": 0.00073320226330321, "loss": 2.5665, "step": 3185 }, { "epoch": 0.3653250773993808, "grad_norm": 0.24549036575306524, "learning_rate": 0.0007330379864683096, "loss": 2.4265, "step": 3186 }, { "epoch": 0.36543974314872146, "grad_norm": 0.24583555473668625, "learning_rate": 0.0007328736774902609, "loss": 2.6855, "step": 3187 }, { "epoch": 0.3655544088980621, "grad_norm": 0.2523588950172083, "learning_rate": 0.0007327093363917274, "loss": 2.4945, "step": 3188 }, { "epoch": 0.36566907464740284, "grad_norm": 0.267660448077342, "learning_rate": 0.0007325449631953769, "loss": 2.4409, "step": 3189 }, { "epoch": 0.3657837403967435, "grad_norm": 0.24664680897560612, "learning_rate": 0.0007323805579238812, "loss": 2.448, "step": 3190 }, { "epoch": 0.36589840614608415, "grad_norm": 0.2583086638800528, "learning_rate": 0.0007322161205999173, "loss": 2.5507, "step": 3191 }, { "epoch": 0.3660130718954248, "grad_norm": 0.2773166536605837, "learning_rate": 0.000732051651246166, "loss": 2.461, "step": 3192 }, { "epoch": 0.3661277376447655, "grad_norm": 0.25899012348965544, "learning_rate": 0.0007318871498853126, "loss": 2.5581, "step": 3193 }, { "epoch": 0.3662424033941062, "grad_norm": 0.24444413567267353, "learning_rate": 0.0007317226165400473, "loss": 2.6294, "step": 3194 }, { "epoch": 0.36635706914344685, "grad_norm": 0.26457645257840207, "learning_rate": 0.0007315580512330638, "loss": 2.3897, "step": 3195 }, { "epoch": 0.3664717348927875, "grad_norm": 0.2646956862078107, "learning_rate": 0.000731393453987061, "loss": 2.4889, "step": 3196 }, { "epoch": 0.3665864006421282, "grad_norm": 0.22422556577523256, "learning_rate": 0.0007312288248247423, "loss": 2.4349, "step": 3197 }, { "epoch": 0.3667010663914689, "grad_norm": 0.26564650084219754, "learning_rate": 0.0007310641637688147, "loss": 2.5783, "step": 3198 }, { "epoch": 0.36681573214080954, "grad_norm": 0.24384006103754421, "learning_rate": 0.0007308994708419901, "loss": 2.4479, "step": 3199 }, { "epoch": 0.3669303978901502, "grad_norm": 0.2415774468850789, "learning_rate": 0.0007307347460669849, "loss": 2.5699, "step": 3200 }, { "epoch": 0.3670450636394909, "grad_norm": 0.26267245780875026, "learning_rate": 0.0007305699894665196, "loss": 2.5368, "step": 3201 }, { "epoch": 0.36715972938883157, "grad_norm": 0.2801556207979414, "learning_rate": 0.0007304052010633195, "loss": 2.4933, "step": 3202 }, { "epoch": 0.3672743951381722, "grad_norm": 0.2666971938531318, "learning_rate": 0.0007302403808801136, "loss": 2.5245, "step": 3203 }, { "epoch": 0.3673890608875129, "grad_norm": 0.25236476566392807, "learning_rate": 0.0007300755289396362, "loss": 2.4845, "step": 3204 }, { "epoch": 0.36750372663685354, "grad_norm": 0.2623797754877452, "learning_rate": 0.000729910645264625, "loss": 2.6673, "step": 3205 }, { "epoch": 0.36761839238619426, "grad_norm": 0.24395328534456118, "learning_rate": 0.000729745729877823, "loss": 2.5293, "step": 3206 }, { "epoch": 0.3677330581355349, "grad_norm": 0.25853618255640115, "learning_rate": 0.0007295807828019767, "loss": 2.6044, "step": 3207 }, { "epoch": 0.3678477238848756, "grad_norm": 0.23019980342081223, "learning_rate": 0.0007294158040598378, "loss": 2.4894, "step": 3208 }, { "epoch": 0.36796238963421624, "grad_norm": 0.25609561532275954, "learning_rate": 0.0007292507936741616, "loss": 2.5574, "step": 3209 }, { "epoch": 0.36807705538355695, "grad_norm": 0.29422786273248425, "learning_rate": 0.0007290857516677084, "loss": 2.5858, "step": 3210 }, { "epoch": 0.3681917211328976, "grad_norm": 0.2740218549789989, "learning_rate": 0.0007289206780632427, "loss": 2.4877, "step": 3211 }, { "epoch": 0.36830638688223827, "grad_norm": 0.2675330483448109, "learning_rate": 0.0007287555728835329, "loss": 2.5066, "step": 3212 }, { "epoch": 0.3684210526315789, "grad_norm": 0.25298404141476627, "learning_rate": 0.0007285904361513522, "loss": 2.5734, "step": 3213 }, { "epoch": 0.36853571838091964, "grad_norm": 0.23717492208207117, "learning_rate": 0.0007284252678894785, "loss": 2.5246, "step": 3214 }, { "epoch": 0.3686503841302603, "grad_norm": 0.2464028436615288, "learning_rate": 0.0007282600681206929, "loss": 2.5411, "step": 3215 }, { "epoch": 0.36876504987960096, "grad_norm": 0.22870968620181448, "learning_rate": 0.0007280948368677822, "loss": 2.4523, "step": 3216 }, { "epoch": 0.3688797156289416, "grad_norm": 0.24914657658671732, "learning_rate": 0.0007279295741535367, "loss": 2.5281, "step": 3217 }, { "epoch": 0.36899438137828233, "grad_norm": 0.2345395972347272, "learning_rate": 0.0007277642800007509, "loss": 2.4514, "step": 3218 }, { "epoch": 0.369109047127623, "grad_norm": 0.24618260636673986, "learning_rate": 0.0007275989544322244, "loss": 2.5058, "step": 3219 }, { "epoch": 0.36922371287696365, "grad_norm": 0.246210083517168, "learning_rate": 0.0007274335974707606, "loss": 2.5728, "step": 3220 }, { "epoch": 0.3693383786263043, "grad_norm": 0.27387977449719164, "learning_rate": 0.0007272682091391671, "loss": 2.4722, "step": 3221 }, { "epoch": 0.36945304437564497, "grad_norm": 0.2668782157906226, "learning_rate": 0.0007271027894602567, "loss": 2.4624, "step": 3222 }, { "epoch": 0.3695677101249857, "grad_norm": 0.24270577338292615, "learning_rate": 0.0007269373384568451, "loss": 2.3549, "step": 3223 }, { "epoch": 0.36968237587432634, "grad_norm": 0.4314882449595344, "learning_rate": 0.0007267718561517535, "loss": 2.4365, "step": 3224 }, { "epoch": 0.369797041623667, "grad_norm": 0.2669270447000771, "learning_rate": 0.0007266063425678071, "loss": 2.4471, "step": 3225 }, { "epoch": 0.36991170737300766, "grad_norm": 0.2877288765648235, "learning_rate": 0.0007264407977278354, "loss": 2.4115, "step": 3226 }, { "epoch": 0.3700263731223484, "grad_norm": 0.27120779434849474, "learning_rate": 0.0007262752216546718, "loss": 2.5605, "step": 3227 }, { "epoch": 0.37014103887168903, "grad_norm": 0.2850911905268917, "learning_rate": 0.0007261096143711545, "loss": 2.4066, "step": 3228 }, { "epoch": 0.3702557046210297, "grad_norm": 0.23631875609104616, "learning_rate": 0.0007259439759001262, "loss": 2.5284, "step": 3229 }, { "epoch": 0.37037037037037035, "grad_norm": 0.2348722450075581, "learning_rate": 0.0007257783062644333, "loss": 2.4982, "step": 3230 }, { "epoch": 0.37048503611971106, "grad_norm": 0.24340610980812102, "learning_rate": 0.0007256126054869265, "loss": 2.3834, "step": 3231 }, { "epoch": 0.3705997018690517, "grad_norm": 0.26540718268036606, "learning_rate": 0.0007254468735904616, "loss": 2.5349, "step": 3232 }, { "epoch": 0.3707143676183924, "grad_norm": 0.25597993580404177, "learning_rate": 0.0007252811105978977, "loss": 2.6193, "step": 3233 }, { "epoch": 0.37082903336773304, "grad_norm": 0.25188527250381537, "learning_rate": 0.000725115316532099, "loss": 2.5288, "step": 3234 }, { "epoch": 0.37094369911707376, "grad_norm": 0.26164758334789684, "learning_rate": 0.0007249494914159332, "loss": 2.586, "step": 3235 }, { "epoch": 0.3710583648664144, "grad_norm": 0.23298366594038572, "learning_rate": 0.0007247836352722733, "loss": 2.4729, "step": 3236 }, { "epoch": 0.3711730306157551, "grad_norm": 0.24603293215721309, "learning_rate": 0.0007246177481239956, "loss": 2.504, "step": 3237 }, { "epoch": 0.37128769636509573, "grad_norm": 0.27903410293486464, "learning_rate": 0.0007244518299939811, "loss": 2.6111, "step": 3238 }, { "epoch": 0.3714023621144364, "grad_norm": 0.2474475409142102, "learning_rate": 0.0007242858809051152, "loss": 2.5273, "step": 3239 }, { "epoch": 0.3715170278637771, "grad_norm": 0.25753646906554684, "learning_rate": 0.000724119900880287, "loss": 2.5063, "step": 3240 }, { "epoch": 0.37163169361311776, "grad_norm": 0.24352827498873375, "learning_rate": 0.000723953889942391, "loss": 2.4917, "step": 3241 }, { "epoch": 0.3717463593624584, "grad_norm": 0.23353627447301376, "learning_rate": 0.0007237878481143246, "loss": 2.6147, "step": 3242 }, { "epoch": 0.3718610251117991, "grad_norm": 0.24812268917828073, "learning_rate": 0.0007236217754189903, "loss": 2.5877, "step": 3243 }, { "epoch": 0.3719756908611398, "grad_norm": 0.2537598939287037, "learning_rate": 0.0007234556718792948, "loss": 2.6926, "step": 3244 }, { "epoch": 0.37209035661048045, "grad_norm": 0.22557097894618303, "learning_rate": 0.0007232895375181488, "loss": 2.4396, "step": 3245 }, { "epoch": 0.3722050223598211, "grad_norm": 0.2630671910890379, "learning_rate": 0.0007231233723584674, "loss": 2.6861, "step": 3246 }, { "epoch": 0.3723196881091618, "grad_norm": 0.2402911148950178, "learning_rate": 0.0007229571764231699, "loss": 2.4917, "step": 3247 }, { "epoch": 0.3724343538585025, "grad_norm": 0.26405903773815587, "learning_rate": 0.0007227909497351799, "loss": 2.4953, "step": 3248 }, { "epoch": 0.37254901960784315, "grad_norm": 0.26365237594792307, "learning_rate": 0.000722624692317425, "loss": 2.4865, "step": 3249 }, { "epoch": 0.3726636853571838, "grad_norm": 0.2950275571909416, "learning_rate": 0.0007224584041928374, "loss": 2.5685, "step": 3250 }, { "epoch": 0.37277835110652446, "grad_norm": 0.2507892023120786, "learning_rate": 0.0007222920853843538, "loss": 2.434, "step": 3251 }, { "epoch": 0.3728930168558652, "grad_norm": 0.2753633085575958, "learning_rate": 0.0007221257359149139, "loss": 2.4592, "step": 3252 }, { "epoch": 0.37300768260520584, "grad_norm": 0.24953854409271162, "learning_rate": 0.0007219593558074629, "loss": 2.5606, "step": 3253 }, { "epoch": 0.3731223483545465, "grad_norm": 0.25734073797470497, "learning_rate": 0.0007217929450849497, "loss": 2.4357, "step": 3254 }, { "epoch": 0.37323701410388715, "grad_norm": 0.23523324818467595, "learning_rate": 0.0007216265037703276, "loss": 2.453, "step": 3255 }, { "epoch": 0.3733516798532278, "grad_norm": 0.2630094457018264, "learning_rate": 0.0007214600318865538, "loss": 2.5151, "step": 3256 }, { "epoch": 0.37346634560256853, "grad_norm": 0.2630306631139327, "learning_rate": 0.00072129352945659, "loss": 2.4308, "step": 3257 }, { "epoch": 0.3735810113519092, "grad_norm": 0.239780504673349, "learning_rate": 0.000721126996503402, "loss": 2.4762, "step": 3258 }, { "epoch": 0.37369567710124985, "grad_norm": 0.24981416550407418, "learning_rate": 0.0007209604330499599, "loss": 2.6404, "step": 3259 }, { "epoch": 0.3738103428505905, "grad_norm": 0.2292347240679528, "learning_rate": 0.000720793839119238, "loss": 2.6289, "step": 3260 }, { "epoch": 0.3739250085999312, "grad_norm": 0.24431923119452312, "learning_rate": 0.0007206272147342147, "loss": 2.4456, "step": 3261 }, { "epoch": 0.3740396743492719, "grad_norm": 0.2235047476141396, "learning_rate": 0.0007204605599178728, "loss": 2.4433, "step": 3262 }, { "epoch": 0.37415434009861254, "grad_norm": 0.2558017428134492, "learning_rate": 0.0007202938746931988, "loss": 2.4333, "step": 3263 }, { "epoch": 0.3742690058479532, "grad_norm": 0.27186364589481404, "learning_rate": 0.0007201271590831841, "loss": 2.5518, "step": 3264 }, { "epoch": 0.3743836715972939, "grad_norm": 0.2577681055124668, "learning_rate": 0.0007199604131108237, "loss": 2.4012, "step": 3265 }, { "epoch": 0.37449833734663457, "grad_norm": 0.281984562414622, "learning_rate": 0.0007197936367991174, "loss": 2.6898, "step": 3266 }, { "epoch": 0.3746130030959752, "grad_norm": 0.2476130337581546, "learning_rate": 0.0007196268301710684, "loss": 2.5822, "step": 3267 }, { "epoch": 0.3747276688453159, "grad_norm": 0.2585415917956713, "learning_rate": 0.0007194599932496845, "loss": 2.5309, "step": 3268 }, { "epoch": 0.3748423345946566, "grad_norm": 0.25735917392389623, "learning_rate": 0.000719293126057978, "loss": 2.4814, "step": 3269 }, { "epoch": 0.37495700034399726, "grad_norm": 0.24819746876095786, "learning_rate": 0.0007191262286189649, "loss": 2.4799, "step": 3270 }, { "epoch": 0.3750716660933379, "grad_norm": 0.25917638685699074, "learning_rate": 0.0007189593009556651, "loss": 2.4459, "step": 3271 }, { "epoch": 0.3751863318426786, "grad_norm": 0.24224178365774368, "learning_rate": 0.0007187923430911039, "loss": 2.4042, "step": 3272 }, { "epoch": 0.37530099759201924, "grad_norm": 0.25414993525651836, "learning_rate": 0.0007186253550483094, "loss": 2.5821, "step": 3273 }, { "epoch": 0.37541566334135995, "grad_norm": 0.2637319042643623, "learning_rate": 0.0007184583368503146, "loss": 2.4998, "step": 3274 }, { "epoch": 0.3755303290907006, "grad_norm": 0.25101210864732776, "learning_rate": 0.0007182912885201563, "loss": 2.3071, "step": 3275 }, { "epoch": 0.37564499484004127, "grad_norm": 0.2551360116957763, "learning_rate": 0.0007181242100808759, "loss": 2.4781, "step": 3276 }, { "epoch": 0.3757596605893819, "grad_norm": 0.24886997041682357, "learning_rate": 0.0007179571015555184, "loss": 2.4702, "step": 3277 }, { "epoch": 0.37587432633872264, "grad_norm": 0.28515522678036376, "learning_rate": 0.0007177899629671335, "loss": 2.5128, "step": 3278 }, { "epoch": 0.3759889920880633, "grad_norm": 0.23683921806412583, "learning_rate": 0.0007176227943387747, "loss": 2.4161, "step": 3279 }, { "epoch": 0.37610365783740396, "grad_norm": 0.2782393439960414, "learning_rate": 0.0007174555956934996, "loss": 2.4495, "step": 3280 }, { "epoch": 0.3762183235867446, "grad_norm": 0.24963479356192292, "learning_rate": 0.00071728836705437, "loss": 2.4749, "step": 3281 }, { "epoch": 0.37633298933608533, "grad_norm": 0.2842593860215262, "learning_rate": 0.0007171211084444525, "loss": 2.4916, "step": 3282 }, { "epoch": 0.376447655085426, "grad_norm": 0.28217433601905123, "learning_rate": 0.0007169538198868164, "loss": 2.4329, "step": 3283 }, { "epoch": 0.37656232083476665, "grad_norm": 0.2472455826753323, "learning_rate": 0.0007167865014045365, "loss": 2.4198, "step": 3284 }, { "epoch": 0.3766769865841073, "grad_norm": 0.2804890030616936, "learning_rate": 0.0007166191530206909, "loss": 2.5317, "step": 3285 }, { "epoch": 0.376791652333448, "grad_norm": 0.2926108196531338, "learning_rate": 0.0007164517747583625, "loss": 2.5855, "step": 3286 }, { "epoch": 0.3769063180827887, "grad_norm": 0.2675069136044682, "learning_rate": 0.0007162843666406376, "loss": 2.4876, "step": 3287 }, { "epoch": 0.37702098383212934, "grad_norm": 0.2683306624872134, "learning_rate": 0.0007161169286906071, "loss": 2.5256, "step": 3288 }, { "epoch": 0.37713564958147, "grad_norm": 0.2400710929245723, "learning_rate": 0.0007159494609313659, "loss": 2.4482, "step": 3289 }, { "epoch": 0.37725031533081066, "grad_norm": 0.29350463104060703, "learning_rate": 0.0007157819633860129, "loss": 2.4206, "step": 3290 }, { "epoch": 0.3773649810801514, "grad_norm": 0.27010895531891316, "learning_rate": 0.0007156144360776514, "loss": 2.4724, "step": 3291 }, { "epoch": 0.37747964682949203, "grad_norm": 0.2714227386542217, "learning_rate": 0.0007154468790293882, "loss": 2.4909, "step": 3292 }, { "epoch": 0.3775943125788327, "grad_norm": 0.2561517048708508, "learning_rate": 0.0007152792922643348, "loss": 2.4928, "step": 3293 }, { "epoch": 0.37770897832817335, "grad_norm": 0.26605625160005275, "learning_rate": 0.0007151116758056066, "loss": 2.6399, "step": 3294 }, { "epoch": 0.37782364407751406, "grad_norm": 0.25441285432841027, "learning_rate": 0.0007149440296763234, "loss": 2.4553, "step": 3295 }, { "epoch": 0.3779383098268547, "grad_norm": 0.24323779990089406, "learning_rate": 0.0007147763538996083, "loss": 2.4778, "step": 3296 }, { "epoch": 0.3780529755761954, "grad_norm": 0.2868573711085698, "learning_rate": 0.0007146086484985892, "loss": 2.5466, "step": 3297 }, { "epoch": 0.37816764132553604, "grad_norm": 0.24877904328385905, "learning_rate": 0.000714440913496398, "loss": 2.5608, "step": 3298 }, { "epoch": 0.37828230707487676, "grad_norm": 0.2384267621750337, "learning_rate": 0.0007142731489161703, "loss": 2.5078, "step": 3299 }, { "epoch": 0.3783969728242174, "grad_norm": 0.27211266765664244, "learning_rate": 0.0007141053547810459, "loss": 2.4924, "step": 3300 }, { "epoch": 0.3785116385735581, "grad_norm": 0.2363945689063107, "learning_rate": 0.0007139375311141693, "loss": 2.5695, "step": 3301 }, { "epoch": 0.37862630432289873, "grad_norm": 0.25944902731150676, "learning_rate": 0.0007137696779386883, "loss": 2.5451, "step": 3302 }, { "epoch": 0.37874097007223945, "grad_norm": 0.2436715019095733, "learning_rate": 0.0007136017952777549, "loss": 2.6362, "step": 3303 }, { "epoch": 0.3788556358215801, "grad_norm": 0.24953043135186878, "learning_rate": 0.0007134338831545257, "loss": 2.4352, "step": 3304 }, { "epoch": 0.37897030157092076, "grad_norm": 0.2495108751703956, "learning_rate": 0.0007132659415921605, "loss": 2.6006, "step": 3305 }, { "epoch": 0.3790849673202614, "grad_norm": 0.22230315546549498, "learning_rate": 0.0007130979706138241, "loss": 2.535, "step": 3306 }, { "epoch": 0.3791996330696021, "grad_norm": 0.23781559852240367, "learning_rate": 0.0007129299702426845, "loss": 2.4698, "step": 3307 }, { "epoch": 0.3793142988189428, "grad_norm": 0.2298514437584039, "learning_rate": 0.0007127619405019143, "loss": 2.4339, "step": 3308 }, { "epoch": 0.37942896456828346, "grad_norm": 0.23273957183336502, "learning_rate": 0.0007125938814146901, "loss": 2.6607, "step": 3309 }, { "epoch": 0.3795436303176241, "grad_norm": 0.22860370399677724, "learning_rate": 0.0007124257930041924, "loss": 2.559, "step": 3310 }, { "epoch": 0.3796582960669648, "grad_norm": 0.26940035801422235, "learning_rate": 0.0007122576752936058, "loss": 2.5228, "step": 3311 }, { "epoch": 0.3797729618163055, "grad_norm": 0.3138729181103564, "learning_rate": 0.0007120895283061187, "loss": 2.508, "step": 3312 }, { "epoch": 0.37988762756564615, "grad_norm": 0.2531077710613849, "learning_rate": 0.000711921352064924, "loss": 2.6082, "step": 3313 }, { "epoch": 0.3800022933149868, "grad_norm": 0.2528142608022852, "learning_rate": 0.0007117531465932185, "loss": 2.4632, "step": 3314 }, { "epoch": 0.38011695906432746, "grad_norm": 0.2607900089624971, "learning_rate": 0.0007115849119142026, "loss": 2.4894, "step": 3315 }, { "epoch": 0.3802316248136682, "grad_norm": 0.24983596822560497, "learning_rate": 0.0007114166480510815, "loss": 2.5723, "step": 3316 }, { "epoch": 0.38034629056300884, "grad_norm": 0.23058757652317827, "learning_rate": 0.0007112483550270639, "loss": 2.5334, "step": 3317 }, { "epoch": 0.3804609563123495, "grad_norm": 0.2537451053345036, "learning_rate": 0.000711080032865362, "loss": 2.3385, "step": 3318 }, { "epoch": 0.38057562206169016, "grad_norm": 0.2716560183977039, "learning_rate": 0.0007109116815891936, "loss": 2.608, "step": 3319 }, { "epoch": 0.38069028781103087, "grad_norm": 0.24624924368343779, "learning_rate": 0.0007107433012217788, "loss": 2.4975, "step": 3320 }, { "epoch": 0.38080495356037153, "grad_norm": 0.26167266229930625, "learning_rate": 0.0007105748917863427, "loss": 2.523, "step": 3321 }, { "epoch": 0.3809196193097122, "grad_norm": 0.24446805610781225, "learning_rate": 0.0007104064533061144, "loss": 2.512, "step": 3322 }, { "epoch": 0.38103428505905285, "grad_norm": 0.22864118414425114, "learning_rate": 0.0007102379858043264, "loss": 2.4643, "step": 3323 }, { "epoch": 0.3811489508083935, "grad_norm": 0.22163741342962576, "learning_rate": 0.0007100694893042159, "loss": 2.4351, "step": 3324 }, { "epoch": 0.3812636165577342, "grad_norm": 0.24346505461253604, "learning_rate": 0.0007099009638290235, "loss": 2.5602, "step": 3325 }, { "epoch": 0.3813782823070749, "grad_norm": 0.25406309761678675, "learning_rate": 0.0007097324094019943, "loss": 2.5139, "step": 3326 }, { "epoch": 0.38149294805641554, "grad_norm": 0.24958389959018776, "learning_rate": 0.000709563826046377, "loss": 2.5542, "step": 3327 }, { "epoch": 0.3816076138057562, "grad_norm": 0.25530414864736733, "learning_rate": 0.0007093952137854247, "loss": 2.4556, "step": 3328 }, { "epoch": 0.3817222795550969, "grad_norm": 0.2596914308737866, "learning_rate": 0.0007092265726423941, "loss": 2.3767, "step": 3329 }, { "epoch": 0.38183694530443757, "grad_norm": 0.26460260558623416, "learning_rate": 0.0007090579026405458, "loss": 2.505, "step": 3330 }, { "epoch": 0.38195161105377823, "grad_norm": 0.25373030800212676, "learning_rate": 0.0007088892038031449, "loss": 2.6526, "step": 3331 }, { "epoch": 0.3820662768031189, "grad_norm": 0.24950715628023037, "learning_rate": 0.0007087204761534603, "loss": 2.5383, "step": 3332 }, { "epoch": 0.3821809425524596, "grad_norm": 0.23607081532048016, "learning_rate": 0.0007085517197147645, "loss": 2.4631, "step": 3333 }, { "epoch": 0.38229560830180026, "grad_norm": 0.25358651989888337, "learning_rate": 0.0007083829345103343, "loss": 2.3509, "step": 3334 }, { "epoch": 0.3824102740511409, "grad_norm": 0.2585353368431906, "learning_rate": 0.0007082141205634505, "loss": 2.5429, "step": 3335 }, { "epoch": 0.3825249398004816, "grad_norm": 0.2674027725811041, "learning_rate": 0.0007080452778973976, "loss": 2.4928, "step": 3336 }, { "epoch": 0.3826396055498223, "grad_norm": 0.24283100874926547, "learning_rate": 0.0007078764065354643, "loss": 2.441, "step": 3337 }, { "epoch": 0.38275427129916295, "grad_norm": 0.25387582088542593, "learning_rate": 0.0007077075065009433, "loss": 2.4478, "step": 3338 }, { "epoch": 0.3828689370485036, "grad_norm": 0.2456607941012106, "learning_rate": 0.000707538577817131, "loss": 2.6036, "step": 3339 }, { "epoch": 0.38298360279784427, "grad_norm": 0.23931052301147723, "learning_rate": 0.0007073696205073278, "loss": 2.544, "step": 3340 }, { "epoch": 0.383098268547185, "grad_norm": 0.2736046249965013, "learning_rate": 0.0007072006345948385, "loss": 2.5929, "step": 3341 }, { "epoch": 0.38321293429652564, "grad_norm": 0.23083186537481612, "learning_rate": 0.0007070316201029711, "loss": 2.551, "step": 3342 }, { "epoch": 0.3833276000458663, "grad_norm": 0.23528617571556937, "learning_rate": 0.0007068625770550381, "loss": 2.5754, "step": 3343 }, { "epoch": 0.38344226579520696, "grad_norm": 0.22540987100634446, "learning_rate": 0.0007066935054743559, "loss": 2.5924, "step": 3344 }, { "epoch": 0.3835569315445476, "grad_norm": 0.26224861677081374, "learning_rate": 0.0007065244053842444, "loss": 2.6004, "step": 3345 }, { "epoch": 0.38367159729388833, "grad_norm": 0.24170142545142867, "learning_rate": 0.0007063552768080279, "loss": 2.4819, "step": 3346 }, { "epoch": 0.383786263043229, "grad_norm": 0.24283165869625842, "learning_rate": 0.0007061861197690347, "loss": 2.5367, "step": 3347 }, { "epoch": 0.38390092879256965, "grad_norm": 0.2513011323788103, "learning_rate": 0.0007060169342905962, "loss": 2.6464, "step": 3348 }, { "epoch": 0.3840155945419103, "grad_norm": 0.2560748726793121, "learning_rate": 0.0007058477203960488, "loss": 2.6137, "step": 3349 }, { "epoch": 0.384130260291251, "grad_norm": 0.2571614124123322, "learning_rate": 0.0007056784781087322, "loss": 2.4735, "step": 3350 }, { "epoch": 0.3842449260405917, "grad_norm": 0.2677970324336668, "learning_rate": 0.0007055092074519903, "loss": 2.3965, "step": 3351 }, { "epoch": 0.38435959178993234, "grad_norm": 0.28981485398703594, "learning_rate": 0.0007053399084491703, "loss": 2.4689, "step": 3352 }, { "epoch": 0.384474257539273, "grad_norm": 0.2788058522678535, "learning_rate": 0.0007051705811236241, "loss": 2.4455, "step": 3353 }, { "epoch": 0.3845889232886137, "grad_norm": 0.26058513884293627, "learning_rate": 0.0007050012254987073, "loss": 2.5213, "step": 3354 }, { "epoch": 0.3847035890379544, "grad_norm": 0.2574722865085449, "learning_rate": 0.000704831841597779, "loss": 2.4439, "step": 3355 }, { "epoch": 0.38481825478729503, "grad_norm": 0.2684976977094693, "learning_rate": 0.0007046624294442026, "loss": 2.5769, "step": 3356 }, { "epoch": 0.3849329205366357, "grad_norm": 0.2686916145778126, "learning_rate": 0.0007044929890613454, "loss": 2.5885, "step": 3357 }, { "epoch": 0.3850475862859764, "grad_norm": 0.26210879128715947, "learning_rate": 0.0007043235204725783, "loss": 2.4613, "step": 3358 }, { "epoch": 0.38516225203531707, "grad_norm": 0.2336636478780257, "learning_rate": 0.0007041540237012762, "loss": 2.4964, "step": 3359 }, { "epoch": 0.3852769177846577, "grad_norm": 0.24694928705261687, "learning_rate": 0.0007039844987708182, "loss": 2.5592, "step": 3360 }, { "epoch": 0.3853915835339984, "grad_norm": 0.2533691713941954, "learning_rate": 0.0007038149457045868, "loss": 2.5027, "step": 3361 }, { "epoch": 0.38550624928333904, "grad_norm": 0.270405223648937, "learning_rate": 0.0007036453645259688, "loss": 2.5241, "step": 3362 }, { "epoch": 0.38562091503267976, "grad_norm": 0.22962353767848545, "learning_rate": 0.0007034757552583545, "loss": 2.6364, "step": 3363 }, { "epoch": 0.3857355807820204, "grad_norm": 0.28689541682222786, "learning_rate": 0.0007033061179251385, "loss": 2.7531, "step": 3364 }, { "epoch": 0.3858502465313611, "grad_norm": 0.264123249959899, "learning_rate": 0.0007031364525497187, "loss": 2.4431, "step": 3365 }, { "epoch": 0.38596491228070173, "grad_norm": 0.23635801444780508, "learning_rate": 0.0007029667591554975, "loss": 2.458, "step": 3366 }, { "epoch": 0.38607957803004245, "grad_norm": 0.23520158403652955, "learning_rate": 0.0007027970377658809, "loss": 2.5543, "step": 3367 }, { "epoch": 0.3861942437793831, "grad_norm": 0.2588827576846166, "learning_rate": 0.0007026272884042784, "loss": 2.5377, "step": 3368 }, { "epoch": 0.38630890952872377, "grad_norm": 0.25949575503752687, "learning_rate": 0.0007024575110941041, "loss": 2.4821, "step": 3369 }, { "epoch": 0.3864235752780644, "grad_norm": 0.26892930430821027, "learning_rate": 0.0007022877058587751, "loss": 2.5521, "step": 3370 }, { "epoch": 0.38653824102740514, "grad_norm": 0.2594161084094227, "learning_rate": 0.0007021178727217131, "loss": 2.4754, "step": 3371 }, { "epoch": 0.3866529067767458, "grad_norm": 0.24833004195939623, "learning_rate": 0.0007019480117063433, "loss": 2.4935, "step": 3372 }, { "epoch": 0.38676757252608646, "grad_norm": 0.24074450862409577, "learning_rate": 0.0007017781228360948, "loss": 2.5899, "step": 3373 }, { "epoch": 0.3868822382754271, "grad_norm": 0.2398259014342528, "learning_rate": 0.0007016082061344005, "loss": 2.6236, "step": 3374 }, { "epoch": 0.38699690402476783, "grad_norm": 0.24888152024426077, "learning_rate": 0.000701438261624697, "loss": 2.4112, "step": 3375 }, { "epoch": 0.3871115697741085, "grad_norm": 0.24400361753616306, "learning_rate": 0.0007012682893304254, "loss": 2.5067, "step": 3376 }, { "epoch": 0.38722623552344915, "grad_norm": 0.26077018264134383, "learning_rate": 0.0007010982892750296, "loss": 2.5201, "step": 3377 }, { "epoch": 0.3873409012727898, "grad_norm": 0.2602928750228207, "learning_rate": 0.0007009282614819581, "loss": 2.4977, "step": 3378 }, { "epoch": 0.38745556702213046, "grad_norm": 0.27300911599832545, "learning_rate": 0.000700758205974663, "loss": 2.5493, "step": 3379 }, { "epoch": 0.3875702327714712, "grad_norm": 0.2539265346184591, "learning_rate": 0.0007005881227766001, "loss": 2.4786, "step": 3380 }, { "epoch": 0.38768489852081184, "grad_norm": 0.23683777016482896, "learning_rate": 0.0007004180119112293, "loss": 2.3898, "step": 3381 }, { "epoch": 0.3877995642701525, "grad_norm": 0.24763603992682945, "learning_rate": 0.0007002478734020141, "loss": 2.4997, "step": 3382 }, { "epoch": 0.38791423001949316, "grad_norm": 0.22645043594248754, "learning_rate": 0.0007000777072724218, "loss": 2.5235, "step": 3383 }, { "epoch": 0.38802889576883387, "grad_norm": 0.2683503216602483, "learning_rate": 0.0006999075135459235, "loss": 2.6094, "step": 3384 }, { "epoch": 0.38814356151817453, "grad_norm": 0.24670818833312091, "learning_rate": 0.0006997372922459944, "loss": 2.5092, "step": 3385 }, { "epoch": 0.3882582272675152, "grad_norm": 0.25397577423702644, "learning_rate": 0.0006995670433961132, "loss": 2.5657, "step": 3386 }, { "epoch": 0.38837289301685585, "grad_norm": 0.2652067494326082, "learning_rate": 0.0006993967670197624, "loss": 2.4736, "step": 3387 }, { "epoch": 0.38848755876619656, "grad_norm": 0.23549094916148403, "learning_rate": 0.0006992264631404284, "loss": 2.3756, "step": 3388 }, { "epoch": 0.3886022245155372, "grad_norm": 0.25191899809067586, "learning_rate": 0.0006990561317816016, "loss": 2.5228, "step": 3389 }, { "epoch": 0.3887168902648779, "grad_norm": 0.22677073268131437, "learning_rate": 0.0006988857729667754, "loss": 2.3681, "step": 3390 }, { "epoch": 0.38883155601421854, "grad_norm": 0.24297554904185603, "learning_rate": 0.0006987153867194484, "loss": 2.5997, "step": 3391 }, { "epoch": 0.38894622176355925, "grad_norm": 0.2716224793322931, "learning_rate": 0.0006985449730631215, "loss": 2.5984, "step": 3392 }, { "epoch": 0.3890608875128999, "grad_norm": 0.3064722635186922, "learning_rate": 0.0006983745320212998, "loss": 2.4407, "step": 3393 }, { "epoch": 0.38917555326224057, "grad_norm": 0.26051324921512525, "learning_rate": 0.0006982040636174932, "loss": 2.4945, "step": 3394 }, { "epoch": 0.38929021901158123, "grad_norm": 0.2697298584349351, "learning_rate": 0.0006980335678752141, "loss": 2.4284, "step": 3395 }, { "epoch": 0.3894048847609219, "grad_norm": 0.27946306299529206, "learning_rate": 0.000697863044817979, "loss": 2.5016, "step": 3396 }, { "epoch": 0.3895195505102626, "grad_norm": 0.2868389543045102, "learning_rate": 0.0006976924944693086, "loss": 2.6699, "step": 3397 }, { "epoch": 0.38963421625960326, "grad_norm": 0.237027763218194, "learning_rate": 0.0006975219168527269, "loss": 2.4717, "step": 3398 }, { "epoch": 0.3897488820089439, "grad_norm": 0.23429753676065615, "learning_rate": 0.000697351311991762, "loss": 2.5792, "step": 3399 }, { "epoch": 0.3898635477582846, "grad_norm": 0.23140582421065567, "learning_rate": 0.0006971806799099452, "loss": 2.4249, "step": 3400 }, { "epoch": 0.3899782135076253, "grad_norm": 0.256279317365194, "learning_rate": 0.0006970100206308126, "loss": 2.7002, "step": 3401 }, { "epoch": 0.39009287925696595, "grad_norm": 0.27720273391679867, "learning_rate": 0.0006968393341779027, "loss": 2.4539, "step": 3402 }, { "epoch": 0.3902075450063066, "grad_norm": 0.2534174190871776, "learning_rate": 0.0006966686205747588, "loss": 2.446, "step": 3403 }, { "epoch": 0.39032221075564727, "grad_norm": 0.2711533442650797, "learning_rate": 0.0006964978798449276, "loss": 2.5753, "step": 3404 }, { "epoch": 0.390436876504988, "grad_norm": 0.2530624955289696, "learning_rate": 0.0006963271120119594, "loss": 2.4999, "step": 3405 }, { "epoch": 0.39055154225432864, "grad_norm": 0.2369098573350872, "learning_rate": 0.0006961563170994085, "loss": 2.4111, "step": 3406 }, { "epoch": 0.3906662080036693, "grad_norm": 0.24942123435146613, "learning_rate": 0.0006959854951308328, "loss": 2.4199, "step": 3407 }, { "epoch": 0.39078087375300996, "grad_norm": 0.2625258239123289, "learning_rate": 0.0006958146461297938, "loss": 2.5919, "step": 3408 }, { "epoch": 0.3908955395023507, "grad_norm": 0.23449644844918574, "learning_rate": 0.0006956437701198568, "loss": 2.5345, "step": 3409 }, { "epoch": 0.39101020525169133, "grad_norm": 0.28390174516792493, "learning_rate": 0.0006954728671245911, "loss": 2.6242, "step": 3410 }, { "epoch": 0.391124871001032, "grad_norm": 0.2833875103169099, "learning_rate": 0.0006953019371675695, "loss": 2.5364, "step": 3411 }, { "epoch": 0.39123953675037265, "grad_norm": 0.25935329274473834, "learning_rate": 0.0006951309802723685, "loss": 2.428, "step": 3412 }, { "epoch": 0.3913542024997133, "grad_norm": 0.22963252659029773, "learning_rate": 0.0006949599964625682, "loss": 2.4956, "step": 3413 }, { "epoch": 0.391468868249054, "grad_norm": 0.23401401530948931, "learning_rate": 0.0006947889857617526, "loss": 2.537, "step": 3414 }, { "epoch": 0.3915835339983947, "grad_norm": 0.27170989784952354, "learning_rate": 0.0006946179481935095, "loss": 2.4996, "step": 3415 }, { "epoch": 0.39169819974773534, "grad_norm": 0.254465599905227, "learning_rate": 0.0006944468837814302, "loss": 2.4143, "step": 3416 }, { "epoch": 0.391812865497076, "grad_norm": 0.2371091358321495, "learning_rate": 0.00069427579254911, "loss": 2.5095, "step": 3417 }, { "epoch": 0.3919275312464167, "grad_norm": 0.23945438402459795, "learning_rate": 0.0006941046745201472, "loss": 2.445, "step": 3418 }, { "epoch": 0.3920421969957574, "grad_norm": 0.24635620659937724, "learning_rate": 0.0006939335297181447, "loss": 2.5096, "step": 3419 }, { "epoch": 0.39215686274509803, "grad_norm": 0.25276313256154886, "learning_rate": 0.0006937623581667082, "loss": 2.5869, "step": 3420 }, { "epoch": 0.3922715284944387, "grad_norm": 0.2532248805180257, "learning_rate": 0.000693591159889448, "loss": 2.6309, "step": 3421 }, { "epoch": 0.3923861942437794, "grad_norm": 0.24398906472736517, "learning_rate": 0.0006934199349099775, "loss": 2.5085, "step": 3422 }, { "epoch": 0.39250085999312007, "grad_norm": 0.22956572760697563, "learning_rate": 0.0006932486832519139, "loss": 2.4465, "step": 3423 }, { "epoch": 0.3926155257424607, "grad_norm": 0.2538271743899817, "learning_rate": 0.000693077404938878, "loss": 2.4636, "step": 3424 }, { "epoch": 0.3927301914918014, "grad_norm": 0.24382766978951204, "learning_rate": 0.0006929060999944945, "loss": 2.4959, "step": 3425 }, { "epoch": 0.3928448572411421, "grad_norm": 0.27106006418220685, "learning_rate": 0.0006927347684423918, "loss": 2.447, "step": 3426 }, { "epoch": 0.39295952299048276, "grad_norm": 0.243147548846084, "learning_rate": 0.0006925634103062015, "loss": 2.4928, "step": 3427 }, { "epoch": 0.3930741887398234, "grad_norm": 0.2595088625397045, "learning_rate": 0.0006923920256095594, "loss": 2.581, "step": 3428 }, { "epoch": 0.3931888544891641, "grad_norm": 0.24535816753156603, "learning_rate": 0.0006922206143761047, "loss": 2.5249, "step": 3429 }, { "epoch": 0.39330352023850473, "grad_norm": 0.23466974526859236, "learning_rate": 0.0006920491766294803, "loss": 2.4399, "step": 3430 }, { "epoch": 0.39341818598784545, "grad_norm": 0.22392533723702146, "learning_rate": 0.0006918777123933326, "loss": 2.5055, "step": 3431 }, { "epoch": 0.3935328517371861, "grad_norm": 0.24880237902738353, "learning_rate": 0.0006917062216913123, "loss": 2.5228, "step": 3432 }, { "epoch": 0.39364751748652677, "grad_norm": 0.22670082143900633, "learning_rate": 0.0006915347045470728, "loss": 2.4754, "step": 3433 }, { "epoch": 0.3937621832358674, "grad_norm": 0.24247326165471242, "learning_rate": 0.0006913631609842718, "loss": 2.5569, "step": 3434 }, { "epoch": 0.39387684898520814, "grad_norm": 0.24460390931253906, "learning_rate": 0.0006911915910265703, "loss": 2.4534, "step": 3435 }, { "epoch": 0.3939915147345488, "grad_norm": 0.24531734119030324, "learning_rate": 0.0006910199946976337, "loss": 2.4521, "step": 3436 }, { "epoch": 0.39410618048388946, "grad_norm": 0.2211504094104953, "learning_rate": 0.0006908483720211296, "loss": 2.4809, "step": 3437 }, { "epoch": 0.3942208462332301, "grad_norm": 0.21903256213047728, "learning_rate": 0.0006906767230207306, "loss": 2.3215, "step": 3438 }, { "epoch": 0.39433551198257083, "grad_norm": 0.2556524374140686, "learning_rate": 0.0006905050477201124, "loss": 2.5591, "step": 3439 }, { "epoch": 0.3944501777319115, "grad_norm": 0.23604180320237061, "learning_rate": 0.0006903333461429539, "loss": 2.4818, "step": 3440 }, { "epoch": 0.39456484348125215, "grad_norm": 0.2414555034243416, "learning_rate": 0.0006901616183129386, "loss": 2.5066, "step": 3441 }, { "epoch": 0.3946795092305928, "grad_norm": 0.2516564418903787, "learning_rate": 0.0006899898642537531, "loss": 2.6177, "step": 3442 }, { "epoch": 0.3947941749799335, "grad_norm": 0.22462701663331472, "learning_rate": 0.000689818083989087, "loss": 2.5115, "step": 3443 }, { "epoch": 0.3949088407292742, "grad_norm": 0.24372804352616514, "learning_rate": 0.0006896462775426346, "loss": 2.4744, "step": 3444 }, { "epoch": 0.39502350647861484, "grad_norm": 0.2424426429532698, "learning_rate": 0.0006894744449380932, "loss": 2.3823, "step": 3445 }, { "epoch": 0.3951381722279555, "grad_norm": 0.2650612312537781, "learning_rate": 0.0006893025861991639, "loss": 2.4716, "step": 3446 }, { "epoch": 0.39525283797729616, "grad_norm": 0.2544222718183807, "learning_rate": 0.0006891307013495513, "loss": 2.4273, "step": 3447 }, { "epoch": 0.39536750372663687, "grad_norm": 0.2524410260924627, "learning_rate": 0.0006889587904129634, "loss": 2.3621, "step": 3448 }, { "epoch": 0.39548216947597753, "grad_norm": 0.2756653013225916, "learning_rate": 0.0006887868534131124, "loss": 2.5206, "step": 3449 }, { "epoch": 0.3955968352253182, "grad_norm": 0.2383878767816064, "learning_rate": 0.0006886148903737135, "loss": 2.4146, "step": 3450 }, { "epoch": 0.39571150097465885, "grad_norm": 0.25308982538327024, "learning_rate": 0.0006884429013184858, "loss": 2.5237, "step": 3451 }, { "epoch": 0.39582616672399956, "grad_norm": 0.2202443264895399, "learning_rate": 0.000688270886271152, "loss": 2.4316, "step": 3452 }, { "epoch": 0.3959408324733402, "grad_norm": 0.27338372851270565, "learning_rate": 0.0006880988452554382, "loss": 2.4589, "step": 3453 }, { "epoch": 0.3960554982226809, "grad_norm": 0.2648600848676643, "learning_rate": 0.0006879267782950742, "loss": 2.4844, "step": 3454 }, { "epoch": 0.39617016397202154, "grad_norm": 0.240593248731978, "learning_rate": 0.0006877546854137933, "loss": 2.445, "step": 3455 }, { "epoch": 0.39628482972136225, "grad_norm": 0.23166279368435969, "learning_rate": 0.0006875825666353324, "loss": 2.4602, "step": 3456 }, { "epoch": 0.3963994954707029, "grad_norm": 0.25207666298206705, "learning_rate": 0.0006874104219834322, "loss": 2.4716, "step": 3457 }, { "epoch": 0.39651416122004357, "grad_norm": 0.23325698193040936, "learning_rate": 0.0006872382514818365, "loss": 2.5047, "step": 3458 }, { "epoch": 0.39662882696938423, "grad_norm": 0.25142353316306637, "learning_rate": 0.0006870660551542932, "loss": 2.7074, "step": 3459 }, { "epoch": 0.39674349271872494, "grad_norm": 0.26247366942673633, "learning_rate": 0.0006868938330245534, "loss": 2.537, "step": 3460 }, { "epoch": 0.3968581584680656, "grad_norm": 0.21995668989746636, "learning_rate": 0.000686721585116372, "loss": 2.5214, "step": 3461 }, { "epoch": 0.39697282421740626, "grad_norm": 0.2336001441475452, "learning_rate": 0.0006865493114535068, "loss": 2.4334, "step": 3462 }, { "epoch": 0.3970874899667469, "grad_norm": 0.27731185119659035, "learning_rate": 0.0006863770120597204, "loss": 2.4409, "step": 3463 }, { "epoch": 0.3972021557160876, "grad_norm": 0.25157418462555853, "learning_rate": 0.0006862046869587777, "loss": 2.4784, "step": 3464 }, { "epoch": 0.3973168214654283, "grad_norm": 0.22525231345997476, "learning_rate": 0.0006860323361744477, "loss": 2.5486, "step": 3465 }, { "epoch": 0.39743148721476895, "grad_norm": 0.2364383170061946, "learning_rate": 0.0006858599597305033, "loss": 2.4256, "step": 3466 }, { "epoch": 0.3975461529641096, "grad_norm": 0.2551873672426385, "learning_rate": 0.0006856875576507201, "loss": 2.4882, "step": 3467 }, { "epoch": 0.39766081871345027, "grad_norm": 0.24986593911146548, "learning_rate": 0.0006855151299588778, "loss": 2.4913, "step": 3468 }, { "epoch": 0.397775484462791, "grad_norm": 0.2536870339259371, "learning_rate": 0.0006853426766787597, "loss": 2.3994, "step": 3469 }, { "epoch": 0.39789015021213164, "grad_norm": 0.2525393496704512, "learning_rate": 0.0006851701978341522, "loss": 2.442, "step": 3470 }, { "epoch": 0.3980048159614723, "grad_norm": 0.2757037585261663, "learning_rate": 0.0006849976934488456, "loss": 2.5837, "step": 3471 }, { "epoch": 0.39811948171081296, "grad_norm": 0.28814508659361027, "learning_rate": 0.0006848251635466336, "loss": 2.5607, "step": 3472 }, { "epoch": 0.3982341474601537, "grad_norm": 0.2534401532808621, "learning_rate": 0.0006846526081513134, "loss": 2.4603, "step": 3473 }, { "epoch": 0.39834881320949433, "grad_norm": 0.26584622023382487, "learning_rate": 0.0006844800272866856, "loss": 2.5796, "step": 3474 }, { "epoch": 0.398463478958835, "grad_norm": 0.2518383117018804, "learning_rate": 0.0006843074209765545, "loss": 2.4725, "step": 3475 }, { "epoch": 0.39857814470817565, "grad_norm": 0.2739666290635124, "learning_rate": 0.0006841347892447281, "loss": 2.5288, "step": 3476 }, { "epoch": 0.39869281045751637, "grad_norm": 0.2734179509054692, "learning_rate": 0.0006839621321150174, "loss": 2.3651, "step": 3477 }, { "epoch": 0.398807476206857, "grad_norm": 0.260977339350182, "learning_rate": 0.0006837894496112371, "loss": 2.6469, "step": 3478 }, { "epoch": 0.3989221419561977, "grad_norm": 0.26027170895926666, "learning_rate": 0.0006836167417572056, "loss": 2.5171, "step": 3479 }, { "epoch": 0.39903680770553834, "grad_norm": 0.25059843801228093, "learning_rate": 0.0006834440085767447, "loss": 2.5446, "step": 3480 }, { "epoch": 0.399151473454879, "grad_norm": 0.24360400055498876, "learning_rate": 0.0006832712500936795, "loss": 2.3917, "step": 3481 }, { "epoch": 0.3992661392042197, "grad_norm": 0.2314381083023964, "learning_rate": 0.0006830984663318391, "loss": 2.4277, "step": 3482 }, { "epoch": 0.3993808049535604, "grad_norm": 0.23477266980800515, "learning_rate": 0.0006829256573150551, "loss": 2.5436, "step": 3483 }, { "epoch": 0.39949547070290103, "grad_norm": 0.26287394114319207, "learning_rate": 0.0006827528230671636, "loss": 2.4673, "step": 3484 }, { "epoch": 0.3996101364522417, "grad_norm": 0.22709965455363912, "learning_rate": 0.0006825799636120038, "loss": 2.3757, "step": 3485 }, { "epoch": 0.3997248022015824, "grad_norm": 0.23607424202524577, "learning_rate": 0.0006824070789734184, "loss": 2.4539, "step": 3486 }, { "epoch": 0.39983946795092307, "grad_norm": 0.24428311751606094, "learning_rate": 0.0006822341691752532, "loss": 2.4794, "step": 3487 }, { "epoch": 0.3999541337002637, "grad_norm": 0.23607446147673483, "learning_rate": 0.0006820612342413583, "loss": 2.3958, "step": 3488 }, { "epoch": 0.4000687994496044, "grad_norm": 0.22663082098054665, "learning_rate": 0.0006818882741955866, "loss": 2.5992, "step": 3489 }, { "epoch": 0.4001834651989451, "grad_norm": 0.2433514403086265, "learning_rate": 0.0006817152890617943, "loss": 2.5298, "step": 3490 }, { "epoch": 0.40029813094828576, "grad_norm": 0.23454574950120408, "learning_rate": 0.0006815422788638418, "loss": 2.3761, "step": 3491 }, { "epoch": 0.4004127966976264, "grad_norm": 0.25589519711579845, "learning_rate": 0.0006813692436255926, "loss": 2.4322, "step": 3492 }, { "epoch": 0.4005274624469671, "grad_norm": 0.2749015801238749, "learning_rate": 0.0006811961833709132, "loss": 2.4968, "step": 3493 }, { "epoch": 0.4006421281963078, "grad_norm": 0.23676183333455014, "learning_rate": 0.0006810230981236743, "loss": 2.6439, "step": 3494 }, { "epoch": 0.40075679394564845, "grad_norm": 0.26372049753974247, "learning_rate": 0.0006808499879077496, "loss": 2.5209, "step": 3495 }, { "epoch": 0.4008714596949891, "grad_norm": 0.27357719851324697, "learning_rate": 0.0006806768527470165, "loss": 2.6535, "step": 3496 }, { "epoch": 0.40098612544432977, "grad_norm": 0.2412935658786769, "learning_rate": 0.0006805036926653556, "loss": 2.5792, "step": 3497 }, { "epoch": 0.4011007911936704, "grad_norm": 0.25358931216611796, "learning_rate": 0.0006803305076866509, "loss": 2.5416, "step": 3498 }, { "epoch": 0.40121545694301114, "grad_norm": 0.2801363104409284, "learning_rate": 0.0006801572978347901, "loss": 2.5074, "step": 3499 }, { "epoch": 0.4013301226923518, "grad_norm": 0.26628841587940444, "learning_rate": 0.0006799840631336642, "loss": 2.5701, "step": 3500 }, { "epoch": 0.40144478844169246, "grad_norm": 0.2455082170331015, "learning_rate": 0.0006798108036071677, "loss": 2.4808, "step": 3501 }, { "epoch": 0.4015594541910331, "grad_norm": 0.2462018854171415, "learning_rate": 0.0006796375192791982, "loss": 2.4976, "step": 3502 }, { "epoch": 0.40167411994037383, "grad_norm": 0.2598295656209238, "learning_rate": 0.000679464210173657, "loss": 2.5126, "step": 3503 }, { "epoch": 0.4017887856897145, "grad_norm": 0.2698869037579159, "learning_rate": 0.0006792908763144492, "loss": 2.5185, "step": 3504 }, { "epoch": 0.40190345143905515, "grad_norm": 0.24562462027196158, "learning_rate": 0.0006791175177254825, "loss": 2.6687, "step": 3505 }, { "epoch": 0.4020181171883958, "grad_norm": 0.2533011231506365, "learning_rate": 0.0006789441344306684, "loss": 2.6081, "step": 3506 }, { "epoch": 0.4021327829377365, "grad_norm": 0.2151082870041164, "learning_rate": 0.0006787707264539222, "loss": 2.3799, "step": 3507 }, { "epoch": 0.4022474486870772, "grad_norm": 0.2249546366806936, "learning_rate": 0.0006785972938191617, "loss": 2.4518, "step": 3508 }, { "epoch": 0.40236211443641784, "grad_norm": 0.2410686366347411, "learning_rate": 0.0006784238365503089, "loss": 2.3536, "step": 3509 }, { "epoch": 0.4024767801857585, "grad_norm": 0.2510781475727969, "learning_rate": 0.000678250354671289, "loss": 2.5769, "step": 3510 }, { "epoch": 0.4025914459350992, "grad_norm": 0.270632605685379, "learning_rate": 0.0006780768482060305, "loss": 2.6068, "step": 3511 }, { "epoch": 0.40270611168443987, "grad_norm": 0.27269221596085413, "learning_rate": 0.0006779033171784652, "loss": 2.5001, "step": 3512 }, { "epoch": 0.40282077743378053, "grad_norm": 0.24945750276100276, "learning_rate": 0.0006777297616125283, "loss": 2.5231, "step": 3513 }, { "epoch": 0.4029354431831212, "grad_norm": 0.2825273753769796, "learning_rate": 0.0006775561815321589, "loss": 2.6388, "step": 3514 }, { "epoch": 0.40305010893246185, "grad_norm": 0.2707006795252678, "learning_rate": 0.0006773825769612984, "loss": 2.5987, "step": 3515 }, { "epoch": 0.40316477468180256, "grad_norm": 0.26793658776465445, "learning_rate": 0.000677208947923893, "loss": 2.4096, "step": 3516 }, { "epoch": 0.4032794404311432, "grad_norm": 0.24159784148288316, "learning_rate": 0.0006770352944438911, "loss": 2.4356, "step": 3517 }, { "epoch": 0.4033941061804839, "grad_norm": 0.24652558370778413, "learning_rate": 0.0006768616165452447, "loss": 2.5892, "step": 3518 }, { "epoch": 0.40350877192982454, "grad_norm": 0.22357933453286, "learning_rate": 0.0006766879142519098, "loss": 2.5574, "step": 3519 }, { "epoch": 0.40362343767916525, "grad_norm": 0.270288053307248, "learning_rate": 0.0006765141875878449, "loss": 2.5329, "step": 3520 }, { "epoch": 0.4037381034285059, "grad_norm": 0.24454405274851926, "learning_rate": 0.0006763404365770126, "loss": 2.4554, "step": 3521 }, { "epoch": 0.40385276917784657, "grad_norm": 0.24533602135902838, "learning_rate": 0.0006761666612433786, "loss": 2.4467, "step": 3522 }, { "epoch": 0.40396743492718723, "grad_norm": 0.26331189250185794, "learning_rate": 0.0006759928616109115, "loss": 2.3823, "step": 3523 }, { "epoch": 0.40408210067652794, "grad_norm": 0.26912352846662463, "learning_rate": 0.0006758190377035839, "loss": 2.5483, "step": 3524 }, { "epoch": 0.4041967664258686, "grad_norm": 0.2575097892956589, "learning_rate": 0.0006756451895453715, "loss": 2.4343, "step": 3525 }, { "epoch": 0.40431143217520926, "grad_norm": 0.2327774492911776, "learning_rate": 0.0006754713171602533, "loss": 2.3842, "step": 3526 }, { "epoch": 0.4044260979245499, "grad_norm": 0.2956897531261791, "learning_rate": 0.0006752974205722117, "loss": 2.4856, "step": 3527 }, { "epoch": 0.40454076367389064, "grad_norm": 0.22320166242892028, "learning_rate": 0.0006751234998052324, "loss": 2.4065, "step": 3528 }, { "epoch": 0.4046554294232313, "grad_norm": 0.2536882302132791, "learning_rate": 0.0006749495548833044, "loss": 2.4622, "step": 3529 }, { "epoch": 0.40477009517257195, "grad_norm": 0.2575695863121073, "learning_rate": 0.0006747755858304203, "loss": 2.4104, "step": 3530 }, { "epoch": 0.4048847609219126, "grad_norm": 0.2497124793145354, "learning_rate": 0.0006746015926705755, "loss": 2.5257, "step": 3531 }, { "epoch": 0.40499942667125327, "grad_norm": 0.25799276291059586, "learning_rate": 0.0006744275754277694, "loss": 2.4628, "step": 3532 }, { "epoch": 0.405114092420594, "grad_norm": 0.21248725076372407, "learning_rate": 0.0006742535341260038, "loss": 2.528, "step": 3533 }, { "epoch": 0.40522875816993464, "grad_norm": 0.2527115637799467, "learning_rate": 0.000674079468789285, "loss": 2.5432, "step": 3534 }, { "epoch": 0.4053434239192753, "grad_norm": 0.24634507453950957, "learning_rate": 0.0006739053794416217, "loss": 2.5107, "step": 3535 }, { "epoch": 0.40545808966861596, "grad_norm": 0.2387080978589809, "learning_rate": 0.0006737312661070263, "loss": 2.4136, "step": 3536 }, { "epoch": 0.4055727554179567, "grad_norm": 0.26738517616855206, "learning_rate": 0.0006735571288095144, "loss": 2.3522, "step": 3537 }, { "epoch": 0.40568742116729734, "grad_norm": 0.2551480931225143, "learning_rate": 0.0006733829675731047, "loss": 2.5338, "step": 3538 }, { "epoch": 0.405802086916638, "grad_norm": 0.27697632795609856, "learning_rate": 0.0006732087824218197, "loss": 2.5844, "step": 3539 }, { "epoch": 0.40591675266597865, "grad_norm": 0.24403603088332065, "learning_rate": 0.0006730345733796847, "loss": 2.4592, "step": 3540 }, { "epoch": 0.40603141841531937, "grad_norm": 0.27218519247192985, "learning_rate": 0.0006728603404707288, "loss": 2.4427, "step": 3541 }, { "epoch": 0.40614608416466, "grad_norm": 0.2485417122820804, "learning_rate": 0.0006726860837189839, "loss": 2.5799, "step": 3542 }, { "epoch": 0.4062607499140007, "grad_norm": 0.26977814709661535, "learning_rate": 0.0006725118031484855, "loss": 2.6267, "step": 3543 }, { "epoch": 0.40637541566334134, "grad_norm": 0.27824614542165405, "learning_rate": 0.0006723374987832722, "loss": 2.6168, "step": 3544 }, { "epoch": 0.40649008141268206, "grad_norm": 0.220886661938332, "learning_rate": 0.000672163170647386, "loss": 2.348, "step": 3545 }, { "epoch": 0.4066047471620227, "grad_norm": 0.24014108784041616, "learning_rate": 0.0006719888187648721, "loss": 2.6736, "step": 3546 }, { "epoch": 0.4067194129113634, "grad_norm": 0.24548970801057948, "learning_rate": 0.000671814443159779, "loss": 2.5024, "step": 3547 }, { "epoch": 0.40683407866070403, "grad_norm": 0.27866067147075346, "learning_rate": 0.0006716400438561588, "loss": 2.5526, "step": 3548 }, { "epoch": 0.4069487444100447, "grad_norm": 0.24828534859770182, "learning_rate": 0.000671465620878066, "loss": 2.457, "step": 3549 }, { "epoch": 0.4070634101593854, "grad_norm": 0.24418153140688964, "learning_rate": 0.0006712911742495593, "loss": 2.558, "step": 3550 }, { "epoch": 0.40717807590872607, "grad_norm": 0.24879033242886733, "learning_rate": 0.0006711167039947003, "loss": 2.6623, "step": 3551 }, { "epoch": 0.4072927416580667, "grad_norm": 0.23076712626394916, "learning_rate": 0.0006709422101375537, "loss": 2.382, "step": 3552 }, { "epoch": 0.4074074074074074, "grad_norm": 0.233241823757704, "learning_rate": 0.0006707676927021878, "loss": 2.4201, "step": 3553 }, { "epoch": 0.4075220731567481, "grad_norm": 0.2518317952991465, "learning_rate": 0.0006705931517126738, "loss": 2.4629, "step": 3554 }, { "epoch": 0.40763673890608876, "grad_norm": 0.218267423793303, "learning_rate": 0.0006704185871930861, "loss": 2.4346, "step": 3555 }, { "epoch": 0.4077514046554294, "grad_norm": 0.24477868515496426, "learning_rate": 0.0006702439991675029, "loss": 2.4029, "step": 3556 }, { "epoch": 0.4078660704047701, "grad_norm": 0.24641100097704066, "learning_rate": 0.0006700693876600052, "loss": 2.5767, "step": 3557 }, { "epoch": 0.4079807361541108, "grad_norm": 0.27800826495805664, "learning_rate": 0.0006698947526946774, "loss": 2.5602, "step": 3558 }, { "epoch": 0.40809540190345145, "grad_norm": 0.2629583839524337, "learning_rate": 0.0006697200942956068, "loss": 2.55, "step": 3559 }, { "epoch": 0.4082100676527921, "grad_norm": 0.2788832333582697, "learning_rate": 0.0006695454124868843, "loss": 2.572, "step": 3560 }, { "epoch": 0.40832473340213277, "grad_norm": 0.25564654251794505, "learning_rate": 0.0006693707072926041, "loss": 2.3577, "step": 3561 }, { "epoch": 0.4084393991514735, "grad_norm": 0.23763690261754442, "learning_rate": 0.0006691959787368633, "loss": 2.5355, "step": 3562 }, { "epoch": 0.40855406490081414, "grad_norm": 0.2644517325209059, "learning_rate": 0.0006690212268437623, "loss": 2.6073, "step": 3563 }, { "epoch": 0.4086687306501548, "grad_norm": 0.257926060801365, "learning_rate": 0.000668846451637405, "loss": 2.5808, "step": 3564 }, { "epoch": 0.40878339639949546, "grad_norm": 0.27320232523302707, "learning_rate": 0.0006686716531418981, "loss": 2.4574, "step": 3565 }, { "epoch": 0.4088980621488361, "grad_norm": 0.23680436642003821, "learning_rate": 0.000668496831381352, "loss": 2.5671, "step": 3566 }, { "epoch": 0.40901272789817683, "grad_norm": 0.2504073988783255, "learning_rate": 0.0006683219863798797, "loss": 2.4922, "step": 3567 }, { "epoch": 0.4091273936475175, "grad_norm": 0.24519806424163562, "learning_rate": 0.0006681471181615979, "loss": 2.5479, "step": 3568 }, { "epoch": 0.40924205939685815, "grad_norm": 0.22703365616722754, "learning_rate": 0.0006679722267506265, "loss": 2.5293, "step": 3569 }, { "epoch": 0.4093567251461988, "grad_norm": 0.23784346246107996, "learning_rate": 0.0006677973121710881, "loss": 2.5702, "step": 3570 }, { "epoch": 0.4094713908955395, "grad_norm": 0.2526280044762824, "learning_rate": 0.000667622374447109, "loss": 2.5173, "step": 3571 }, { "epoch": 0.4095860566448802, "grad_norm": 0.24647167452323582, "learning_rate": 0.0006674474136028186, "loss": 2.6485, "step": 3572 }, { "epoch": 0.40970072239422084, "grad_norm": 0.2410423997947158, "learning_rate": 0.0006672724296623492, "loss": 2.4487, "step": 3573 }, { "epoch": 0.4098153881435615, "grad_norm": 0.2219595850749613, "learning_rate": 0.0006670974226498367, "loss": 2.5494, "step": 3574 }, { "epoch": 0.4099300538929022, "grad_norm": 0.2672669502068693, "learning_rate": 0.0006669223925894199, "loss": 2.5907, "step": 3575 }, { "epoch": 0.41004471964224287, "grad_norm": 0.25574896327663926, "learning_rate": 0.0006667473395052411, "loss": 2.4905, "step": 3576 }, { "epoch": 0.41015938539158353, "grad_norm": 0.27217312100435875, "learning_rate": 0.000666572263421445, "loss": 2.4457, "step": 3577 }, { "epoch": 0.4102740511409242, "grad_norm": 0.23218071631371606, "learning_rate": 0.0006663971643621803, "loss": 2.6276, "step": 3578 }, { "epoch": 0.4103887168902649, "grad_norm": 0.24049867652940113, "learning_rate": 0.0006662220423515987, "loss": 2.487, "step": 3579 }, { "epoch": 0.41050338263960556, "grad_norm": 0.25520560062515957, "learning_rate": 0.0006660468974138549, "loss": 2.5403, "step": 3580 }, { "epoch": 0.4106180483889462, "grad_norm": 0.26877062694512405, "learning_rate": 0.0006658717295731067, "loss": 2.5569, "step": 3581 }, { "epoch": 0.4107327141382869, "grad_norm": 0.2656020202464822, "learning_rate": 0.0006656965388535153, "loss": 2.5911, "step": 3582 }, { "epoch": 0.41084737988762754, "grad_norm": 0.21984036435657617, "learning_rate": 0.0006655213252792446, "loss": 2.5534, "step": 3583 }, { "epoch": 0.41096204563696825, "grad_norm": 0.2507994646409042, "learning_rate": 0.0006653460888744625, "loss": 2.4291, "step": 3584 }, { "epoch": 0.4110767113863089, "grad_norm": 0.25558310988053434, "learning_rate": 0.000665170829663339, "loss": 2.4945, "step": 3585 }, { "epoch": 0.41119137713564957, "grad_norm": 0.23885816357006967, "learning_rate": 0.000664995547670048, "loss": 2.4038, "step": 3586 }, { "epoch": 0.41130604288499023, "grad_norm": 0.23752428725937302, "learning_rate": 0.0006648202429187664, "loss": 2.4505, "step": 3587 }, { "epoch": 0.41142070863433094, "grad_norm": 0.25642889873560293, "learning_rate": 0.0006646449154336739, "loss": 2.5711, "step": 3588 }, { "epoch": 0.4115353743836716, "grad_norm": 0.2494335750827775, "learning_rate": 0.0006644695652389538, "loss": 2.643, "step": 3589 }, { "epoch": 0.41165004013301226, "grad_norm": 0.2308929404375352, "learning_rate": 0.0006642941923587923, "loss": 2.4435, "step": 3590 }, { "epoch": 0.4117647058823529, "grad_norm": 0.2833753046214647, "learning_rate": 0.0006641187968173788, "loss": 2.5904, "step": 3591 }, { "epoch": 0.41187937163169364, "grad_norm": 0.2623056127707425, "learning_rate": 0.0006639433786389054, "loss": 2.4975, "step": 3592 }, { "epoch": 0.4119940373810343, "grad_norm": 0.2563522846831175, "learning_rate": 0.0006637679378475681, "loss": 2.5313, "step": 3593 }, { "epoch": 0.41210870313037495, "grad_norm": 0.23622099991246473, "learning_rate": 0.0006635924744675656, "loss": 2.4567, "step": 3594 }, { "epoch": 0.4122233688797156, "grad_norm": 0.2557073747048083, "learning_rate": 0.0006634169885230994, "loss": 2.5882, "step": 3595 }, { "epoch": 0.4123380346290563, "grad_norm": 0.23446513582048864, "learning_rate": 0.0006632414800383747, "loss": 2.607, "step": 3596 }, { "epoch": 0.412452700378397, "grad_norm": 0.27078964868869115, "learning_rate": 0.0006630659490375996, "loss": 2.4946, "step": 3597 }, { "epoch": 0.41256736612773764, "grad_norm": 0.2510349246887973, "learning_rate": 0.000662890395544985, "loss": 2.4983, "step": 3598 }, { "epoch": 0.4126820318770783, "grad_norm": 0.2624048298693548, "learning_rate": 0.0006627148195847455, "loss": 2.6635, "step": 3599 }, { "epoch": 0.41279669762641896, "grad_norm": 0.23455693596253335, "learning_rate": 0.000662539221181098, "loss": 2.5564, "step": 3600 }, { "epoch": 0.4129113633757597, "grad_norm": 0.25613737118542623, "learning_rate": 0.0006623636003582633, "loss": 2.4028, "step": 3601 }, { "epoch": 0.41302602912510034, "grad_norm": 0.24004788742980104, "learning_rate": 0.0006621879571404649, "loss": 2.3777, "step": 3602 }, { "epoch": 0.413140694874441, "grad_norm": 0.23932669434737375, "learning_rate": 0.0006620122915519295, "loss": 2.4375, "step": 3603 }, { "epoch": 0.41325536062378165, "grad_norm": 0.24911292126860454, "learning_rate": 0.0006618366036168867, "loss": 2.6164, "step": 3604 }, { "epoch": 0.41337002637312237, "grad_norm": 0.25841433158259774, "learning_rate": 0.0006616608933595692, "loss": 2.3431, "step": 3605 }, { "epoch": 0.413484692122463, "grad_norm": 0.269625998866878, "learning_rate": 0.0006614851608042131, "loss": 2.5073, "step": 3606 }, { "epoch": 0.4135993578718037, "grad_norm": 0.24972738773855116, "learning_rate": 0.0006613094059750573, "loss": 2.5458, "step": 3607 }, { "epoch": 0.41371402362114434, "grad_norm": 0.26382713628262017, "learning_rate": 0.0006611336288963436, "loss": 2.5807, "step": 3608 }, { "epoch": 0.41382868937048506, "grad_norm": 0.22421890646451972, "learning_rate": 0.0006609578295923174, "loss": 2.4474, "step": 3609 }, { "epoch": 0.4139433551198257, "grad_norm": 0.2465673718023568, "learning_rate": 0.0006607820080872267, "loss": 2.4774, "step": 3610 }, { "epoch": 0.4140580208691664, "grad_norm": 0.24079104017880662, "learning_rate": 0.0006606061644053227, "loss": 2.5858, "step": 3611 }, { "epoch": 0.41417268661850704, "grad_norm": 0.2683715937947471, "learning_rate": 0.0006604302985708599, "loss": 2.4917, "step": 3612 }, { "epoch": 0.41428735236784775, "grad_norm": 0.24963967133269516, "learning_rate": 0.0006602544106080955, "loss": 2.5565, "step": 3613 }, { "epoch": 0.4144020181171884, "grad_norm": 0.24601157467161391, "learning_rate": 0.0006600785005412897, "loss": 2.4923, "step": 3614 }, { "epoch": 0.41451668386652907, "grad_norm": 0.26679595250387034, "learning_rate": 0.0006599025683947062, "loss": 2.5377, "step": 3615 }, { "epoch": 0.4146313496158697, "grad_norm": 0.2390027519268333, "learning_rate": 0.0006597266141926115, "loss": 2.4857, "step": 3616 }, { "epoch": 0.4147460153652104, "grad_norm": 0.25384683547884496, "learning_rate": 0.000659550637959275, "loss": 2.4733, "step": 3617 }, { "epoch": 0.4148606811145511, "grad_norm": 0.23825501486501358, "learning_rate": 0.0006593746397189692, "loss": 2.5325, "step": 3618 }, { "epoch": 0.41497534686389176, "grad_norm": 0.23378454436654672, "learning_rate": 0.00065919861949597, "loss": 2.4638, "step": 3619 }, { "epoch": 0.4150900126132324, "grad_norm": 0.22843532370253755, "learning_rate": 0.0006590225773145556, "loss": 2.346, "step": 3620 }, { "epoch": 0.4152046783625731, "grad_norm": 0.2422495616373672, "learning_rate": 0.000658846513199008, "loss": 2.4214, "step": 3621 }, { "epoch": 0.4153193441119138, "grad_norm": 0.2568047936888267, "learning_rate": 0.0006586704271736119, "loss": 2.5909, "step": 3622 }, { "epoch": 0.41543400986125445, "grad_norm": 0.2531581575864393, "learning_rate": 0.0006584943192626549, "loss": 2.3493, "step": 3623 }, { "epoch": 0.4155486756105951, "grad_norm": 0.254469352707778, "learning_rate": 0.0006583181894904277, "loss": 2.4973, "step": 3624 }, { "epoch": 0.41566334135993577, "grad_norm": 0.2414600144394221, "learning_rate": 0.000658142037881224, "loss": 2.5058, "step": 3625 }, { "epoch": 0.4157780071092765, "grad_norm": 0.24872457122834565, "learning_rate": 0.0006579658644593407, "loss": 2.575, "step": 3626 }, { "epoch": 0.41589267285861714, "grad_norm": 0.25032493262135647, "learning_rate": 0.0006577896692490775, "loss": 2.3848, "step": 3627 }, { "epoch": 0.4160073386079578, "grad_norm": 0.22987268254333176, "learning_rate": 0.0006576134522747371, "loss": 2.5329, "step": 3628 }, { "epoch": 0.41612200435729846, "grad_norm": 0.2525234213833264, "learning_rate": 0.0006574372135606254, "loss": 2.4661, "step": 3629 }, { "epoch": 0.4162366701066392, "grad_norm": 0.26996768285629813, "learning_rate": 0.0006572609531310511, "loss": 2.5118, "step": 3630 }, { "epoch": 0.41635133585597983, "grad_norm": 0.23804295742950773, "learning_rate": 0.0006570846710103261, "loss": 2.4934, "step": 3631 }, { "epoch": 0.4164660016053205, "grad_norm": 0.25968217599653287, "learning_rate": 0.0006569083672227649, "loss": 2.5503, "step": 3632 }, { "epoch": 0.41658066735466115, "grad_norm": 0.22898437997056312, "learning_rate": 0.0006567320417926855, "loss": 2.3892, "step": 3633 }, { "epoch": 0.4166953331040018, "grad_norm": 0.2344040277561669, "learning_rate": 0.0006565556947444085, "loss": 2.5231, "step": 3634 }, { "epoch": 0.4168099988533425, "grad_norm": 0.25379413443016824, "learning_rate": 0.0006563793261022575, "loss": 2.43, "step": 3635 }, { "epoch": 0.4169246646026832, "grad_norm": 0.259298949060862, "learning_rate": 0.0006562029358905593, "loss": 2.6198, "step": 3636 }, { "epoch": 0.41703933035202384, "grad_norm": 0.24669975065654337, "learning_rate": 0.0006560265241336438, "loss": 2.4646, "step": 3637 }, { "epoch": 0.4171539961013645, "grad_norm": 0.24109463815641527, "learning_rate": 0.0006558500908558434, "loss": 2.4667, "step": 3638 }, { "epoch": 0.4172686618507052, "grad_norm": 0.2338632750540656, "learning_rate": 0.0006556736360814935, "loss": 2.5078, "step": 3639 }, { "epoch": 0.4173833276000459, "grad_norm": 0.2581976408432289, "learning_rate": 0.000655497159834933, "loss": 2.5959, "step": 3640 }, { "epoch": 0.41749799334938653, "grad_norm": 0.24082886457624678, "learning_rate": 0.0006553206621405037, "loss": 2.5883, "step": 3641 }, { "epoch": 0.4176126590987272, "grad_norm": 0.24071361211778752, "learning_rate": 0.0006551441430225493, "loss": 2.4476, "step": 3642 }, { "epoch": 0.4177273248480679, "grad_norm": 0.2306028421093017, "learning_rate": 0.0006549676025054179, "loss": 2.4317, "step": 3643 }, { "epoch": 0.41784199059740856, "grad_norm": 0.28742091641305784, "learning_rate": 0.0006547910406134597, "loss": 2.5942, "step": 3644 }, { "epoch": 0.4179566563467492, "grad_norm": 0.2565525780662309, "learning_rate": 0.000654614457371028, "loss": 2.4474, "step": 3645 }, { "epoch": 0.4180713220960899, "grad_norm": 0.24571613791616312, "learning_rate": 0.000654437852802479, "loss": 2.4489, "step": 3646 }, { "epoch": 0.4181859878454306, "grad_norm": 0.2605831659380862, "learning_rate": 0.0006542612269321722, "loss": 2.3085, "step": 3647 }, { "epoch": 0.41830065359477125, "grad_norm": 0.2849931032812933, "learning_rate": 0.0006540845797844697, "loss": 2.5407, "step": 3648 }, { "epoch": 0.4184153193441119, "grad_norm": 0.24949532036004737, "learning_rate": 0.0006539079113837363, "loss": 2.4224, "step": 3649 }, { "epoch": 0.4185299850934526, "grad_norm": 0.2343269251897938, "learning_rate": 0.0006537312217543404, "loss": 2.3046, "step": 3650 }, { "epoch": 0.41864465084279323, "grad_norm": 0.2514037358280559, "learning_rate": 0.0006535545109206529, "loss": 2.4381, "step": 3651 }, { "epoch": 0.41875931659213395, "grad_norm": 0.29762551092127837, "learning_rate": 0.0006533777789070475, "loss": 2.4484, "step": 3652 }, { "epoch": 0.4188739823414746, "grad_norm": 0.30377877711475104, "learning_rate": 0.0006532010257379013, "loss": 2.4685, "step": 3653 }, { "epoch": 0.41898864809081526, "grad_norm": 0.28711003071855085, "learning_rate": 0.0006530242514375938, "loss": 2.5352, "step": 3654 }, { "epoch": 0.4191033138401559, "grad_norm": 0.2641730946140604, "learning_rate": 0.0006528474560305076, "loss": 2.4769, "step": 3655 }, { "epoch": 0.41921797958949664, "grad_norm": 0.26220968808717726, "learning_rate": 0.0006526706395410287, "loss": 2.5616, "step": 3656 }, { "epoch": 0.4193326453388373, "grad_norm": 0.24579250232458383, "learning_rate": 0.000652493801993545, "loss": 2.5398, "step": 3657 }, { "epoch": 0.41944731108817795, "grad_norm": 0.2584288253945437, "learning_rate": 0.0006523169434124481, "loss": 2.5222, "step": 3658 }, { "epoch": 0.4195619768375186, "grad_norm": 0.27235083051415143, "learning_rate": 0.0006521400638221324, "loss": 2.3881, "step": 3659 }, { "epoch": 0.4196766425868593, "grad_norm": 0.27625268398247943, "learning_rate": 0.0006519631632469949, "loss": 2.6397, "step": 3660 }, { "epoch": 0.4197913083362, "grad_norm": 0.2515383665546757, "learning_rate": 0.0006517862417114356, "loss": 2.624, "step": 3661 }, { "epoch": 0.41990597408554065, "grad_norm": 0.2460270979163395, "learning_rate": 0.0006516092992398578, "loss": 2.4939, "step": 3662 }, { "epoch": 0.4200206398348813, "grad_norm": 0.23099606469858047, "learning_rate": 0.000651432335856667, "loss": 2.4019, "step": 3663 }, { "epoch": 0.420135305584222, "grad_norm": 0.2552418157454789, "learning_rate": 0.0006512553515862718, "loss": 2.4975, "step": 3664 }, { "epoch": 0.4202499713335627, "grad_norm": 0.23242353915986808, "learning_rate": 0.0006510783464530842, "loss": 2.626, "step": 3665 }, { "epoch": 0.42036463708290334, "grad_norm": 0.22278426422229544, "learning_rate": 0.0006509013204815188, "loss": 2.5481, "step": 3666 }, { "epoch": 0.420479302832244, "grad_norm": 0.23955265080111315, "learning_rate": 0.0006507242736959923, "loss": 2.4875, "step": 3667 }, { "epoch": 0.42059396858158465, "grad_norm": 0.21981000800173509, "learning_rate": 0.0006505472061209255, "loss": 2.5418, "step": 3668 }, { "epoch": 0.42070863433092537, "grad_norm": 0.24129292094650165, "learning_rate": 0.0006503701177807413, "loss": 2.5035, "step": 3669 }, { "epoch": 0.420823300080266, "grad_norm": 0.21527620418362667, "learning_rate": 0.0006501930086998656, "loss": 2.5824, "step": 3670 }, { "epoch": 0.4209379658296067, "grad_norm": 0.24351315526142064, "learning_rate": 0.0006500158789027273, "loss": 2.468, "step": 3671 }, { "epoch": 0.42105263157894735, "grad_norm": 0.22488655184067774, "learning_rate": 0.0006498387284137582, "loss": 2.7689, "step": 3672 }, { "epoch": 0.42116729732828806, "grad_norm": 0.2480513170812118, "learning_rate": 0.0006496615572573925, "loss": 2.331, "step": 3673 }, { "epoch": 0.4212819630776287, "grad_norm": 0.244318439562019, "learning_rate": 0.0006494843654580678, "loss": 2.4074, "step": 3674 }, { "epoch": 0.4213966288269694, "grad_norm": 0.2540218256842001, "learning_rate": 0.0006493071530402244, "loss": 2.414, "step": 3675 }, { "epoch": 0.42151129457631004, "grad_norm": 0.23502327944606952, "learning_rate": 0.0006491299200283054, "loss": 2.592, "step": 3676 }, { "epoch": 0.42162596032565075, "grad_norm": 0.2681510368523747, "learning_rate": 0.0006489526664467565, "loss": 2.638, "step": 3677 }, { "epoch": 0.4217406260749914, "grad_norm": 0.24727921577216871, "learning_rate": 0.0006487753923200264, "loss": 2.5062, "step": 3678 }, { "epoch": 0.42185529182433207, "grad_norm": 0.2331083222229182, "learning_rate": 0.0006485980976725671, "loss": 2.4754, "step": 3679 }, { "epoch": 0.4219699575736727, "grad_norm": 0.24080329141980003, "learning_rate": 0.0006484207825288326, "loss": 2.4949, "step": 3680 }, { "epoch": 0.42208462332301344, "grad_norm": 0.2574465613492795, "learning_rate": 0.0006482434469132803, "loss": 2.5269, "step": 3681 }, { "epoch": 0.4221992890723541, "grad_norm": 0.2649815871035571, "learning_rate": 0.0006480660908503704, "loss": 2.5162, "step": 3682 }, { "epoch": 0.42231395482169476, "grad_norm": 0.25100304750393715, "learning_rate": 0.0006478887143645655, "loss": 2.571, "step": 3683 }, { "epoch": 0.4224286205710354, "grad_norm": 0.28004326570790655, "learning_rate": 0.0006477113174803317, "loss": 2.5961, "step": 3684 }, { "epoch": 0.4225432863203761, "grad_norm": 0.2614567496956879, "learning_rate": 0.0006475339002221371, "loss": 2.5287, "step": 3685 }, { "epoch": 0.4226579520697168, "grad_norm": 0.23950864239161568, "learning_rate": 0.0006473564626144532, "loss": 2.5136, "step": 3686 }, { "epoch": 0.42277261781905745, "grad_norm": 0.25308605542563317, "learning_rate": 0.0006471790046817542, "loss": 2.424, "step": 3687 }, { "epoch": 0.4228872835683981, "grad_norm": 0.23941390075116217, "learning_rate": 0.0006470015264485172, "loss": 2.4019, "step": 3688 }, { "epoch": 0.42300194931773877, "grad_norm": 0.22787922982155834, "learning_rate": 0.0006468240279392214, "loss": 2.5225, "step": 3689 }, { "epoch": 0.4231166150670795, "grad_norm": 0.2261014479388271, "learning_rate": 0.0006466465091783497, "loss": 2.5607, "step": 3690 }, { "epoch": 0.42323128081642014, "grad_norm": 0.24999187077182888, "learning_rate": 0.0006464689701903876, "loss": 2.4943, "step": 3691 }, { "epoch": 0.4233459465657608, "grad_norm": 0.22771112716645384, "learning_rate": 0.000646291410999823, "loss": 2.444, "step": 3692 }, { "epoch": 0.42346061231510146, "grad_norm": 0.25846594342301527, "learning_rate": 0.0006461138316311467, "loss": 2.5091, "step": 3693 }, { "epoch": 0.4235752780644422, "grad_norm": 0.25574418375764013, "learning_rate": 0.0006459362321088527, "loss": 2.4611, "step": 3694 }, { "epoch": 0.42368994381378283, "grad_norm": 0.2575415996951374, "learning_rate": 0.000645758612457437, "loss": 2.5979, "step": 3695 }, { "epoch": 0.4238046095631235, "grad_norm": 0.23552465592246552, "learning_rate": 0.0006455809727013992, "loss": 2.4927, "step": 3696 }, { "epoch": 0.42391927531246415, "grad_norm": 0.24466490532846727, "learning_rate": 0.0006454033128652414, "loss": 2.4182, "step": 3697 }, { "epoch": 0.42403394106180486, "grad_norm": 0.2338675433072939, "learning_rate": 0.0006452256329734682, "loss": 2.457, "step": 3698 }, { "epoch": 0.4241486068111455, "grad_norm": 0.24434853884220542, "learning_rate": 0.0006450479330505869, "loss": 2.4482, "step": 3699 }, { "epoch": 0.4242632725604862, "grad_norm": 0.24994143018382756, "learning_rate": 0.0006448702131211084, "loss": 2.484, "step": 3700 }, { "epoch": 0.42437793830982684, "grad_norm": 0.270087048825776, "learning_rate": 0.0006446924732095455, "loss": 2.5821, "step": 3701 }, { "epoch": 0.4244926040591675, "grad_norm": 0.2663004701614018, "learning_rate": 0.0006445147133404139, "loss": 2.6339, "step": 3702 }, { "epoch": 0.4246072698085082, "grad_norm": 0.2571666460587385, "learning_rate": 0.0006443369335382322, "loss": 2.5783, "step": 3703 }, { "epoch": 0.4247219355578489, "grad_norm": 0.24683490224213048, "learning_rate": 0.000644159133827522, "loss": 2.5038, "step": 3704 }, { "epoch": 0.42483660130718953, "grad_norm": 0.23592983938425593, "learning_rate": 0.000643981314232807, "loss": 2.4197, "step": 3705 }, { "epoch": 0.4249512670565302, "grad_norm": 0.2572591280936109, "learning_rate": 0.0006438034747786144, "loss": 2.5235, "step": 3706 }, { "epoch": 0.4250659328058709, "grad_norm": 0.2640370097849018, "learning_rate": 0.0006436256154894737, "loss": 2.6165, "step": 3707 }, { "epoch": 0.42518059855521156, "grad_norm": 0.23384258285351509, "learning_rate": 0.0006434477363899168, "loss": 2.5107, "step": 3708 }, { "epoch": 0.4252952643045522, "grad_norm": 0.23186840917901988, "learning_rate": 0.0006432698375044793, "loss": 2.4481, "step": 3709 }, { "epoch": 0.4254099300538929, "grad_norm": 0.2443600239659201, "learning_rate": 0.0006430919188576986, "loss": 2.6296, "step": 3710 }, { "epoch": 0.4255245958032336, "grad_norm": 0.2356305614139818, "learning_rate": 0.0006429139804741151, "loss": 2.3981, "step": 3711 }, { "epoch": 0.42563926155257426, "grad_norm": 0.24227539571917678, "learning_rate": 0.0006427360223782726, "loss": 2.5319, "step": 3712 }, { "epoch": 0.4257539273019149, "grad_norm": 0.2645723066921694, "learning_rate": 0.0006425580445947163, "loss": 2.492, "step": 3713 }, { "epoch": 0.4258685930512556, "grad_norm": 0.23120728446033437, "learning_rate": 0.000642380047147995, "loss": 2.67, "step": 3714 }, { "epoch": 0.4259832588005963, "grad_norm": 0.2648993174355372, "learning_rate": 0.0006422020300626604, "loss": 2.3976, "step": 3715 }, { "epoch": 0.42609792454993695, "grad_norm": 0.2484545470537326, "learning_rate": 0.0006420239933632666, "loss": 2.4172, "step": 3716 }, { "epoch": 0.4262125902992776, "grad_norm": 0.27046625234251764, "learning_rate": 0.0006418459370743698, "loss": 2.5206, "step": 3717 }, { "epoch": 0.42632725604861826, "grad_norm": 0.31105171709620916, "learning_rate": 0.0006416678612205298, "loss": 2.4857, "step": 3718 }, { "epoch": 0.4264419217979589, "grad_norm": 0.2581816029461363, "learning_rate": 0.000641489765826309, "loss": 2.5993, "step": 3719 }, { "epoch": 0.42655658754729964, "grad_norm": 0.2781239094791762, "learning_rate": 0.0006413116509162719, "loss": 2.4373, "step": 3720 }, { "epoch": 0.4266712532966403, "grad_norm": 0.2292560634301624, "learning_rate": 0.0006411335165149863, "loss": 2.2859, "step": 3721 }, { "epoch": 0.42678591904598095, "grad_norm": 0.2456432796770211, "learning_rate": 0.0006409553626470223, "loss": 2.4036, "step": 3722 }, { "epoch": 0.4269005847953216, "grad_norm": 0.2629152475483945, "learning_rate": 0.0006407771893369529, "loss": 2.4365, "step": 3723 }, { "epoch": 0.42701525054466233, "grad_norm": 0.3050686891133825, "learning_rate": 0.0006405989966093536, "loss": 2.4266, "step": 3724 }, { "epoch": 0.427129916294003, "grad_norm": 0.2631024282904743, "learning_rate": 0.0006404207844888029, "loss": 2.4018, "step": 3725 }, { "epoch": 0.42724458204334365, "grad_norm": 0.26369223526007296, "learning_rate": 0.0006402425529998816, "loss": 2.5805, "step": 3726 }, { "epoch": 0.4273592477926843, "grad_norm": 0.23744506051488645, "learning_rate": 0.0006400643021671734, "loss": 2.4739, "step": 3727 }, { "epoch": 0.427473913542025, "grad_norm": 0.24404333486975455, "learning_rate": 0.0006398860320152645, "loss": 2.5921, "step": 3728 }, { "epoch": 0.4275885792913657, "grad_norm": 0.29878729898666484, "learning_rate": 0.0006397077425687441, "loss": 2.6021, "step": 3729 }, { "epoch": 0.42770324504070634, "grad_norm": 0.22748280688714, "learning_rate": 0.0006395294338522034, "loss": 2.4298, "step": 3730 }, { "epoch": 0.427817910790047, "grad_norm": 0.23387627875344763, "learning_rate": 0.0006393511058902373, "loss": 2.3872, "step": 3731 }, { "epoch": 0.4279325765393877, "grad_norm": 0.2835332758032488, "learning_rate": 0.0006391727587074423, "loss": 2.5757, "step": 3732 }, { "epoch": 0.42804724228872837, "grad_norm": 0.2312958461587959, "learning_rate": 0.000638994392328418, "loss": 2.396, "step": 3733 }, { "epoch": 0.42816190803806903, "grad_norm": 0.25887357672841227, "learning_rate": 0.0006388160067777669, "loss": 2.5799, "step": 3734 }, { "epoch": 0.4282765737874097, "grad_norm": 0.307914208229826, "learning_rate": 0.0006386376020800936, "loss": 2.5125, "step": 3735 }, { "epoch": 0.42839123953675035, "grad_norm": 0.2680163434022882, "learning_rate": 0.0006384591782600058, "loss": 2.4314, "step": 3736 }, { "epoch": 0.42850590528609106, "grad_norm": 0.28050719326701523, "learning_rate": 0.0006382807353421138, "loss": 2.7169, "step": 3737 }, { "epoch": 0.4286205710354317, "grad_norm": 0.24226221061292363, "learning_rate": 0.0006381022733510299, "loss": 2.6226, "step": 3738 }, { "epoch": 0.4287352367847724, "grad_norm": 0.24580847946134887, "learning_rate": 0.0006379237923113701, "loss": 2.5466, "step": 3739 }, { "epoch": 0.42884990253411304, "grad_norm": 0.25087045518254625, "learning_rate": 0.0006377452922477523, "loss": 2.4611, "step": 3740 }, { "epoch": 0.42896456828345375, "grad_norm": 0.23830860797130302, "learning_rate": 0.0006375667731847969, "loss": 2.4343, "step": 3741 }, { "epoch": 0.4290792340327944, "grad_norm": 0.25650783675335387, "learning_rate": 0.0006373882351471275, "loss": 2.5961, "step": 3742 }, { "epoch": 0.42919389978213507, "grad_norm": 0.24559551902348492, "learning_rate": 0.0006372096781593699, "loss": 2.4247, "step": 3743 }, { "epoch": 0.4293085655314757, "grad_norm": 0.2446883345321881, "learning_rate": 0.0006370311022461528, "loss": 2.6487, "step": 3744 }, { "epoch": 0.42942323128081644, "grad_norm": 0.24647327899397467, "learning_rate": 0.000636852507432107, "loss": 2.5216, "step": 3745 }, { "epoch": 0.4295378970301571, "grad_norm": 0.265291680237914, "learning_rate": 0.0006366738937418664, "loss": 2.4052, "step": 3746 }, { "epoch": 0.42965256277949776, "grad_norm": 0.24621810515357678, "learning_rate": 0.0006364952612000676, "loss": 2.5064, "step": 3747 }, { "epoch": 0.4297672285288384, "grad_norm": 0.2378835341393557, "learning_rate": 0.0006363166098313492, "loss": 2.4359, "step": 3748 }, { "epoch": 0.42988189427817913, "grad_norm": 0.2267593299771046, "learning_rate": 0.0006361379396603529, "loss": 2.3247, "step": 3749 }, { "epoch": 0.4299965600275198, "grad_norm": 0.24720079023203084, "learning_rate": 0.0006359592507117229, "loss": 2.3598, "step": 3750 }, { "epoch": 0.43011122577686045, "grad_norm": 0.26042743558804593, "learning_rate": 0.000635780543010106, "loss": 2.4743, "step": 3751 }, { "epoch": 0.4302258915262011, "grad_norm": 0.26468915150417194, "learning_rate": 0.0006356018165801511, "loss": 2.4608, "step": 3752 }, { "epoch": 0.43034055727554177, "grad_norm": 0.2305352737816054, "learning_rate": 0.0006354230714465107, "loss": 2.5031, "step": 3753 }, { "epoch": 0.4304552230248825, "grad_norm": 0.2496842595140439, "learning_rate": 0.000635244307633839, "loss": 2.4778, "step": 3754 }, { "epoch": 0.43056988877422314, "grad_norm": 0.2590041276695064, "learning_rate": 0.0006350655251667927, "loss": 2.4273, "step": 3755 }, { "epoch": 0.4306845545235638, "grad_norm": 0.2648184856956326, "learning_rate": 0.0006348867240700321, "loss": 2.4532, "step": 3756 }, { "epoch": 0.43079922027290446, "grad_norm": 0.2545433220533618, "learning_rate": 0.0006347079043682191, "loss": 2.6002, "step": 3757 }, { "epoch": 0.4309138860222452, "grad_norm": 0.2465823638818654, "learning_rate": 0.0006345290660860184, "loss": 2.4923, "step": 3758 }, { "epoch": 0.43102855177158583, "grad_norm": 0.23451123863473178, "learning_rate": 0.0006343502092480973, "loss": 2.4748, "step": 3759 }, { "epoch": 0.4311432175209265, "grad_norm": 0.2338368793288413, "learning_rate": 0.0006341713338791258, "loss": 2.4136, "step": 3760 }, { "epoch": 0.43125788327026715, "grad_norm": 0.2324315784484788, "learning_rate": 0.0006339924400037765, "loss": 2.5137, "step": 3761 }, { "epoch": 0.43137254901960786, "grad_norm": 0.2281217999996604, "learning_rate": 0.0006338135276467241, "loss": 2.463, "step": 3762 }, { "epoch": 0.4314872147689485, "grad_norm": 0.24134162249044894, "learning_rate": 0.0006336345968326462, "loss": 2.4698, "step": 3763 }, { "epoch": 0.4316018805182892, "grad_norm": 0.24365597122225233, "learning_rate": 0.0006334556475862231, "loss": 2.4225, "step": 3764 }, { "epoch": 0.43171654626762984, "grad_norm": 0.22942000523899764, "learning_rate": 0.0006332766799321372, "loss": 2.6032, "step": 3765 }, { "epoch": 0.43183121201697056, "grad_norm": 0.2296419363673588, "learning_rate": 0.000633097693895074, "loss": 2.5136, "step": 3766 }, { "epoch": 0.4319458777663112, "grad_norm": 0.23517661546603696, "learning_rate": 0.0006329186894997208, "loss": 2.528, "step": 3767 }, { "epoch": 0.4320605435156519, "grad_norm": 0.25223796241212604, "learning_rate": 0.0006327396667707682, "loss": 2.4296, "step": 3768 }, { "epoch": 0.43217520926499253, "grad_norm": 0.30022384597663754, "learning_rate": 0.0006325606257329086, "loss": 2.5669, "step": 3769 }, { "epoch": 0.4322898750143332, "grad_norm": 0.2245703909708394, "learning_rate": 0.0006323815664108376, "loss": 2.356, "step": 3770 }, { "epoch": 0.4324045407636739, "grad_norm": 0.2606408643947649, "learning_rate": 0.000632202488829253, "loss": 2.5251, "step": 3771 }, { "epoch": 0.43251920651301456, "grad_norm": 0.23537474111510084, "learning_rate": 0.0006320233930128551, "loss": 2.4366, "step": 3772 }, { "epoch": 0.4326338722623552, "grad_norm": 0.2356086924610653, "learning_rate": 0.0006318442789863466, "loss": 2.5444, "step": 3773 }, { "epoch": 0.4327485380116959, "grad_norm": 0.2531315608513024, "learning_rate": 0.0006316651467744332, "loss": 2.3707, "step": 3774 }, { "epoch": 0.4328632037610366, "grad_norm": 0.2594569588939986, "learning_rate": 0.0006314859964018224, "loss": 2.5726, "step": 3775 }, { "epoch": 0.43297786951037726, "grad_norm": 0.24059963468784043, "learning_rate": 0.0006313068278932248, "loss": 2.7038, "step": 3776 }, { "epoch": 0.4330925352597179, "grad_norm": 0.25555491876004655, "learning_rate": 0.0006311276412733532, "loss": 2.5235, "step": 3777 }, { "epoch": 0.4332072010090586, "grad_norm": 0.25295838276705923, "learning_rate": 0.000630948436566923, "loss": 2.4533, "step": 3778 }, { "epoch": 0.4333218667583993, "grad_norm": 0.2688805844503762, "learning_rate": 0.0006307692137986522, "loss": 2.4243, "step": 3779 }, { "epoch": 0.43343653250773995, "grad_norm": 0.2188599506955828, "learning_rate": 0.0006305899729932608, "loss": 2.475, "step": 3780 }, { "epoch": 0.4335511982570806, "grad_norm": 0.24833190551796147, "learning_rate": 0.0006304107141754721, "loss": 2.4303, "step": 3781 }, { "epoch": 0.43366586400642126, "grad_norm": 0.24515110713446844, "learning_rate": 0.0006302314373700113, "loss": 2.4447, "step": 3782 }, { "epoch": 0.433780529755762, "grad_norm": 0.23527766995876795, "learning_rate": 0.000630052142601606, "loss": 2.4665, "step": 3783 }, { "epoch": 0.43389519550510264, "grad_norm": 0.26971669511913365, "learning_rate": 0.0006298728298949866, "loss": 2.5148, "step": 3784 }, { "epoch": 0.4340098612544433, "grad_norm": 0.24451204525007422, "learning_rate": 0.0006296934992748859, "loss": 2.4205, "step": 3785 }, { "epoch": 0.43412452700378396, "grad_norm": 0.2578467598176871, "learning_rate": 0.0006295141507660394, "loss": 2.4938, "step": 3786 }, { "epoch": 0.4342391927531246, "grad_norm": 0.23212579148929557, "learning_rate": 0.0006293347843931844, "loss": 2.4657, "step": 3787 }, { "epoch": 0.43435385850246533, "grad_norm": 0.25282789182567583, "learning_rate": 0.0006291554001810612, "loss": 2.4794, "step": 3788 }, { "epoch": 0.434468524251806, "grad_norm": 0.24879253120626094, "learning_rate": 0.0006289759981544126, "loss": 2.4861, "step": 3789 }, { "epoch": 0.43458319000114665, "grad_norm": 0.24143234228504765, "learning_rate": 0.0006287965783379834, "loss": 2.497, "step": 3790 }, { "epoch": 0.4346978557504873, "grad_norm": 0.2355557012793554, "learning_rate": 0.0006286171407565214, "loss": 2.3984, "step": 3791 }, { "epoch": 0.434812521499828, "grad_norm": 0.25739912543839666, "learning_rate": 0.0006284376854347766, "loss": 2.3254, "step": 3792 }, { "epoch": 0.4349271872491687, "grad_norm": 0.24402508892393346, "learning_rate": 0.0006282582123975011, "loss": 2.5377, "step": 3793 }, { "epoch": 0.43504185299850934, "grad_norm": 0.2773765602416316, "learning_rate": 0.0006280787216694502, "loss": 2.5083, "step": 3794 }, { "epoch": 0.43515651874785, "grad_norm": 0.2446188541949575, "learning_rate": 0.000627899213275381, "loss": 2.4669, "step": 3795 }, { "epoch": 0.4352711844971907, "grad_norm": 0.25016913690541104, "learning_rate": 0.0006277196872400534, "loss": 2.4943, "step": 3796 }, { "epoch": 0.43538585024653137, "grad_norm": 0.24296763525600565, "learning_rate": 0.0006275401435882294, "loss": 2.6248, "step": 3797 }, { "epoch": 0.43550051599587203, "grad_norm": 0.23540469014095888, "learning_rate": 0.0006273605823446738, "loss": 2.5115, "step": 3798 }, { "epoch": 0.4356151817452127, "grad_norm": 0.24191755703757348, "learning_rate": 0.0006271810035341534, "loss": 2.4802, "step": 3799 }, { "epoch": 0.4357298474945534, "grad_norm": 0.24929082712908535, "learning_rate": 0.000627001407181438, "loss": 2.5123, "step": 3800 }, { "epoch": 0.43584451324389406, "grad_norm": 0.2607264207787983, "learning_rate": 0.0006268217933112994, "loss": 2.4693, "step": 3801 }, { "epoch": 0.4359591789932347, "grad_norm": 0.22791731122897538, "learning_rate": 0.0006266421619485116, "loss": 2.5408, "step": 3802 }, { "epoch": 0.4360738447425754, "grad_norm": 0.2613530812527927, "learning_rate": 0.0006264625131178519, "loss": 2.5761, "step": 3803 }, { "epoch": 0.43618851049191604, "grad_norm": 0.24247552865664587, "learning_rate": 0.000626282846844099, "loss": 2.5135, "step": 3804 }, { "epoch": 0.43630317624125675, "grad_norm": 0.2497939969879368, "learning_rate": 0.0006261031631520345, "loss": 2.556, "step": 3805 }, { "epoch": 0.4364178419905974, "grad_norm": 0.2444041005359748, "learning_rate": 0.0006259234620664423, "loss": 2.5556, "step": 3806 }, { "epoch": 0.43653250773993807, "grad_norm": 0.26548794040209045, "learning_rate": 0.0006257437436121091, "loss": 2.6427, "step": 3807 }, { "epoch": 0.43664717348927873, "grad_norm": 0.25278498904062957, "learning_rate": 0.0006255640078138229, "loss": 2.5058, "step": 3808 }, { "epoch": 0.43676183923861944, "grad_norm": 0.22627951201911523, "learning_rate": 0.0006253842546963757, "loss": 2.4026, "step": 3809 }, { "epoch": 0.4368765049879601, "grad_norm": 0.22829330932828906, "learning_rate": 0.0006252044842845606, "loss": 2.4813, "step": 3810 }, { "epoch": 0.43699117073730076, "grad_norm": 0.22259102899055572, "learning_rate": 0.0006250246966031733, "loss": 2.5642, "step": 3811 }, { "epoch": 0.4371058364866414, "grad_norm": 0.2381572298918821, "learning_rate": 0.0006248448916770124, "loss": 2.434, "step": 3812 }, { "epoch": 0.43722050223598213, "grad_norm": 0.22236994168538668, "learning_rate": 0.0006246650695308784, "loss": 2.4427, "step": 3813 }, { "epoch": 0.4373351679853228, "grad_norm": 0.22964165361230704, "learning_rate": 0.0006244852301895745, "loss": 2.4248, "step": 3814 }, { "epoch": 0.43744983373466345, "grad_norm": 0.24227387661447042, "learning_rate": 0.0006243053736779058, "loss": 2.4845, "step": 3815 }, { "epoch": 0.4375644994840041, "grad_norm": 0.26371652912392507, "learning_rate": 0.0006241255000206806, "loss": 2.617, "step": 3816 }, { "epoch": 0.4376791652333448, "grad_norm": 0.2569499544365948, "learning_rate": 0.0006239456092427085, "loss": 2.4847, "step": 3817 }, { "epoch": 0.4377938309826855, "grad_norm": 0.25737505633385255, "learning_rate": 0.0006237657013688022, "loss": 2.3512, "step": 3818 }, { "epoch": 0.43790849673202614, "grad_norm": 0.28322089965060304, "learning_rate": 0.0006235857764237767, "loss": 2.419, "step": 3819 }, { "epoch": 0.4380231624813668, "grad_norm": 0.25947777793560933, "learning_rate": 0.0006234058344324491, "loss": 2.4968, "step": 3820 }, { "epoch": 0.43813782823070746, "grad_norm": 0.22628442212614977, "learning_rate": 0.0006232258754196388, "loss": 2.5635, "step": 3821 }, { "epoch": 0.4382524939800482, "grad_norm": 0.26789431207644143, "learning_rate": 0.0006230458994101681, "loss": 2.4917, "step": 3822 }, { "epoch": 0.43836715972938883, "grad_norm": 0.25978882136460274, "learning_rate": 0.0006228659064288609, "loss": 2.6976, "step": 3823 }, { "epoch": 0.4384818254787295, "grad_norm": 0.22690374013611359, "learning_rate": 0.0006226858965005439, "loss": 2.448, "step": 3824 }, { "epoch": 0.43859649122807015, "grad_norm": 0.22572923677405254, "learning_rate": 0.0006225058696500462, "loss": 2.4013, "step": 3825 }, { "epoch": 0.43871115697741087, "grad_norm": 0.2446367548888773, "learning_rate": 0.000622325825902199, "loss": 2.453, "step": 3826 }, { "epoch": 0.4388258227267515, "grad_norm": 0.21250331424107224, "learning_rate": 0.0006221457652818357, "loss": 2.5505, "step": 3827 }, { "epoch": 0.4389404884760922, "grad_norm": 0.2221859098836378, "learning_rate": 0.0006219656878137925, "loss": 2.5177, "step": 3828 }, { "epoch": 0.43905515422543284, "grad_norm": 0.2258421138052024, "learning_rate": 0.0006217855935229075, "loss": 2.4143, "step": 3829 }, { "epoch": 0.43916981997477356, "grad_norm": 0.2359458275586472, "learning_rate": 0.0006216054824340212, "loss": 2.5349, "step": 3830 }, { "epoch": 0.4392844857241142, "grad_norm": 0.26109347273243705, "learning_rate": 0.0006214253545719768, "loss": 2.6663, "step": 3831 }, { "epoch": 0.4393991514734549, "grad_norm": 0.22306737299137816, "learning_rate": 0.0006212452099616194, "loss": 2.6206, "step": 3832 }, { "epoch": 0.43951381722279553, "grad_norm": 0.25070728465884073, "learning_rate": 0.0006210650486277961, "loss": 2.4855, "step": 3833 }, { "epoch": 0.43962848297213625, "grad_norm": 0.26101690754790224, "learning_rate": 0.0006208848705953573, "loss": 2.5547, "step": 3834 }, { "epoch": 0.4397431487214769, "grad_norm": 0.25975082801274285, "learning_rate": 0.0006207046758891548, "loss": 2.4832, "step": 3835 }, { "epoch": 0.43985781447081757, "grad_norm": 0.2393787817938355, "learning_rate": 0.0006205244645340431, "loss": 2.4284, "step": 3836 }, { "epoch": 0.4399724802201582, "grad_norm": 0.23961291386277636, "learning_rate": 0.0006203442365548791, "loss": 2.4275, "step": 3837 }, { "epoch": 0.4400871459694989, "grad_norm": 0.26333085205637896, "learning_rate": 0.0006201639919765214, "loss": 2.4963, "step": 3838 }, { "epoch": 0.4402018117188396, "grad_norm": 0.25231724731244304, "learning_rate": 0.0006199837308238315, "loss": 2.4523, "step": 3839 }, { "epoch": 0.44031647746818026, "grad_norm": 0.2633268161070358, "learning_rate": 0.0006198034531216731, "loss": 2.7312, "step": 3840 }, { "epoch": 0.4404311432175209, "grad_norm": 0.2748456169816904, "learning_rate": 0.0006196231588949121, "loss": 2.4758, "step": 3841 }, { "epoch": 0.4405458089668616, "grad_norm": 0.28739378399684495, "learning_rate": 0.0006194428481684166, "loss": 2.6028, "step": 3842 }, { "epoch": 0.4406604747162023, "grad_norm": 0.27796273392966664, "learning_rate": 0.0006192625209670568, "loss": 2.3836, "step": 3843 }, { "epoch": 0.44077514046554295, "grad_norm": 0.2892768369562647, "learning_rate": 0.0006190821773157058, "loss": 2.5094, "step": 3844 }, { "epoch": 0.4408898062148836, "grad_norm": 0.24011257302760441, "learning_rate": 0.0006189018172392382, "loss": 2.5442, "step": 3845 }, { "epoch": 0.44100447196422426, "grad_norm": 0.2632337942022086, "learning_rate": 0.0006187214407625313, "loss": 2.4544, "step": 3846 }, { "epoch": 0.441119137713565, "grad_norm": 0.24290546998360119, "learning_rate": 0.000618541047910465, "loss": 2.4866, "step": 3847 }, { "epoch": 0.44123380346290564, "grad_norm": 0.25689002083208545, "learning_rate": 0.0006183606387079205, "loss": 2.6536, "step": 3848 }, { "epoch": 0.4413484692122463, "grad_norm": 0.2637469231521172, "learning_rate": 0.0006181802131797821, "loss": 2.5916, "step": 3849 }, { "epoch": 0.44146313496158696, "grad_norm": 0.22675212961957192, "learning_rate": 0.0006179997713509359, "loss": 2.5034, "step": 3850 }, { "epoch": 0.44157780071092767, "grad_norm": 0.2384649535056212, "learning_rate": 0.0006178193132462706, "loss": 2.4278, "step": 3851 }, { "epoch": 0.44169246646026833, "grad_norm": 0.2349726456516134, "learning_rate": 0.000617638838890677, "loss": 2.6831, "step": 3852 }, { "epoch": 0.441807132209609, "grad_norm": 0.2633477014569964, "learning_rate": 0.0006174583483090478, "loss": 2.4221, "step": 3853 }, { "epoch": 0.44192179795894965, "grad_norm": 0.2332608319041885, "learning_rate": 0.0006172778415262785, "loss": 2.3146, "step": 3854 }, { "epoch": 0.44203646370829036, "grad_norm": 0.30116217089939906, "learning_rate": 0.0006170973185672664, "loss": 2.345, "step": 3855 }, { "epoch": 0.442151129457631, "grad_norm": 0.2368935097053184, "learning_rate": 0.0006169167794569114, "loss": 2.5089, "step": 3856 }, { "epoch": 0.4422657952069717, "grad_norm": 0.27957531433546395, "learning_rate": 0.0006167362242201153, "loss": 2.4535, "step": 3857 }, { "epoch": 0.44238046095631234, "grad_norm": 0.27333998451105695, "learning_rate": 0.000616555652881782, "loss": 2.5352, "step": 3858 }, { "epoch": 0.442495126705653, "grad_norm": 0.25179900070696376, "learning_rate": 0.0006163750654668186, "loss": 2.533, "step": 3859 }, { "epoch": 0.4426097924549937, "grad_norm": 0.24506089272052226, "learning_rate": 0.0006161944620001328, "loss": 2.591, "step": 3860 }, { "epoch": 0.44272445820433437, "grad_norm": 0.2488847921021115, "learning_rate": 0.0006160138425066361, "loss": 2.4796, "step": 3861 }, { "epoch": 0.44283912395367503, "grad_norm": 0.2604219886655084, "learning_rate": 0.0006158332070112412, "loss": 2.4917, "step": 3862 }, { "epoch": 0.4429537897030157, "grad_norm": 0.258590520124936, "learning_rate": 0.0006156525555388633, "loss": 2.5954, "step": 3863 }, { "epoch": 0.4430684554523564, "grad_norm": 0.2387580872999713, "learning_rate": 0.0006154718881144199, "loss": 2.4027, "step": 3864 }, { "epoch": 0.44318312120169706, "grad_norm": 0.24549099971447785, "learning_rate": 0.0006152912047628307, "loss": 2.4684, "step": 3865 }, { "epoch": 0.4432977869510377, "grad_norm": 0.2508850192649552, "learning_rate": 0.0006151105055090174, "loss": 2.5822, "step": 3866 }, { "epoch": 0.4434124527003784, "grad_norm": 0.2194730128716753, "learning_rate": 0.0006149297903779042, "loss": 2.6273, "step": 3867 }, { "epoch": 0.4435271184497191, "grad_norm": 0.24156538980703018, "learning_rate": 0.000614749059394417, "loss": 2.4084, "step": 3868 }, { "epoch": 0.44364178419905975, "grad_norm": 0.24259527687767338, "learning_rate": 0.0006145683125834846, "loss": 2.5858, "step": 3869 }, { "epoch": 0.4437564499484004, "grad_norm": 0.25413084606845093, "learning_rate": 0.000614387549970037, "loss": 2.6548, "step": 3870 }, { "epoch": 0.44387111569774107, "grad_norm": 0.19819001733668784, "learning_rate": 0.0006142067715790077, "loss": 2.4568, "step": 3871 }, { "epoch": 0.4439857814470818, "grad_norm": 0.24614956490109688, "learning_rate": 0.000614025977435331, "loss": 2.5309, "step": 3872 }, { "epoch": 0.44410044719642244, "grad_norm": 0.22962040823978638, "learning_rate": 0.0006138451675639443, "loss": 2.4202, "step": 3873 }, { "epoch": 0.4442151129457631, "grad_norm": 0.23898955761888835, "learning_rate": 0.0006136643419897867, "loss": 2.5688, "step": 3874 }, { "epoch": 0.44432977869510376, "grad_norm": 0.2434298411416163, "learning_rate": 0.0006134835007377999, "loss": 2.6224, "step": 3875 }, { "epoch": 0.4444444444444444, "grad_norm": 0.2552399708065581, "learning_rate": 0.0006133026438329274, "loss": 2.426, "step": 3876 }, { "epoch": 0.44455911019378513, "grad_norm": 0.24162588918886282, "learning_rate": 0.0006131217713001148, "loss": 2.5712, "step": 3877 }, { "epoch": 0.4446737759431258, "grad_norm": 0.23899440037493203, "learning_rate": 0.0006129408831643102, "loss": 2.4479, "step": 3878 }, { "epoch": 0.44478844169246645, "grad_norm": 0.23280427428474385, "learning_rate": 0.0006127599794504637, "loss": 2.5349, "step": 3879 }, { "epoch": 0.4449031074418071, "grad_norm": 0.22526148695649914, "learning_rate": 0.0006125790601835272, "loss": 2.6287, "step": 3880 }, { "epoch": 0.4450177731911478, "grad_norm": 0.23862405763550582, "learning_rate": 0.0006123981253884556, "loss": 2.4412, "step": 3881 }, { "epoch": 0.4451324389404885, "grad_norm": 0.27119703023534425, "learning_rate": 0.000612217175090205, "loss": 2.5351, "step": 3882 }, { "epoch": 0.44524710468982914, "grad_norm": 0.23155040221695772, "learning_rate": 0.000612036209313734, "loss": 2.4429, "step": 3883 }, { "epoch": 0.4453617704391698, "grad_norm": 0.26284288531812217, "learning_rate": 0.0006118552280840037, "loss": 2.5161, "step": 3884 }, { "epoch": 0.4454764361885105, "grad_norm": 0.22384431229291182, "learning_rate": 0.0006116742314259769, "loss": 2.4255, "step": 3885 }, { "epoch": 0.4455911019378512, "grad_norm": 0.25200180465874367, "learning_rate": 0.0006114932193646184, "loss": 2.3996, "step": 3886 }, { "epoch": 0.44570576768719183, "grad_norm": 0.24758407089851703, "learning_rate": 0.0006113121919248957, "loss": 2.509, "step": 3887 }, { "epoch": 0.4458204334365325, "grad_norm": 0.24533149947496877, "learning_rate": 0.0006111311491317778, "loss": 2.4881, "step": 3888 }, { "epoch": 0.4459350991858732, "grad_norm": 0.2395786161317771, "learning_rate": 0.0006109500910102362, "loss": 2.3821, "step": 3889 }, { "epoch": 0.44604976493521387, "grad_norm": 0.22882911491575342, "learning_rate": 0.0006107690175852445, "loss": 2.5027, "step": 3890 }, { "epoch": 0.4461644306845545, "grad_norm": 0.2246763906574688, "learning_rate": 0.0006105879288817784, "loss": 2.4849, "step": 3891 }, { "epoch": 0.4462790964338952, "grad_norm": 0.26695889144441043, "learning_rate": 0.0006104068249248154, "loss": 2.6179, "step": 3892 }, { "epoch": 0.44639376218323584, "grad_norm": 0.2549769365322054, "learning_rate": 0.0006102257057393354, "loss": 2.5787, "step": 3893 }, { "epoch": 0.44650842793257656, "grad_norm": 0.23786294401581556, "learning_rate": 0.0006100445713503206, "loss": 2.4304, "step": 3894 }, { "epoch": 0.4466230936819172, "grad_norm": 0.2319978596995435, "learning_rate": 0.0006098634217827545, "loss": 2.5445, "step": 3895 }, { "epoch": 0.4467377594312579, "grad_norm": 0.2788302410289403, "learning_rate": 0.0006096822570616237, "loss": 2.5701, "step": 3896 }, { "epoch": 0.44685242518059853, "grad_norm": 0.25301233180829713, "learning_rate": 0.0006095010772119164, "loss": 2.3952, "step": 3897 }, { "epoch": 0.44696709092993925, "grad_norm": 0.26091805107399546, "learning_rate": 0.0006093198822586226, "loss": 2.5388, "step": 3898 }, { "epoch": 0.4470817566792799, "grad_norm": 0.2466995792807347, "learning_rate": 0.000609138672226735, "loss": 2.4445, "step": 3899 }, { "epoch": 0.44719642242862057, "grad_norm": 0.23208189183317335, "learning_rate": 0.0006089574471412478, "loss": 2.4613, "step": 3900 }, { "epoch": 0.4473110881779612, "grad_norm": 0.2790907575989822, "learning_rate": 0.0006087762070271578, "loss": 2.4901, "step": 3901 }, { "epoch": 0.44742575392730194, "grad_norm": 0.23807506556601798, "learning_rate": 0.0006085949519094633, "loss": 2.4675, "step": 3902 }, { "epoch": 0.4475404196766426, "grad_norm": 0.2563582711324039, "learning_rate": 0.0006084136818131654, "loss": 2.4278, "step": 3903 }, { "epoch": 0.44765508542598326, "grad_norm": 0.26126836016239624, "learning_rate": 0.0006082323967632667, "loss": 2.5002, "step": 3904 }, { "epoch": 0.4477697511753239, "grad_norm": 0.271403256976357, "learning_rate": 0.0006080510967847717, "loss": 2.4677, "step": 3905 }, { "epoch": 0.44788441692466463, "grad_norm": 0.2681036625460401, "learning_rate": 0.0006078697819026878, "loss": 2.5847, "step": 3906 }, { "epoch": 0.4479990826740053, "grad_norm": 0.2693740921353591, "learning_rate": 0.0006076884521420236, "loss": 2.5738, "step": 3907 }, { "epoch": 0.44811374842334595, "grad_norm": 0.26215981276277794, "learning_rate": 0.00060750710752779, "loss": 2.4214, "step": 3908 }, { "epoch": 0.4482284141726866, "grad_norm": 0.2860964131000218, "learning_rate": 0.0006073257480850004, "loss": 2.4278, "step": 3909 }, { "epoch": 0.44834307992202727, "grad_norm": 0.24765137731736517, "learning_rate": 0.0006071443738386697, "loss": 2.5565, "step": 3910 }, { "epoch": 0.448457745671368, "grad_norm": 0.2585403997360567, "learning_rate": 0.0006069629848138148, "loss": 2.6081, "step": 3911 }, { "epoch": 0.44857241142070864, "grad_norm": 0.257597783934508, "learning_rate": 0.0006067815810354551, "loss": 2.5092, "step": 3912 }, { "epoch": 0.4486870771700493, "grad_norm": 0.2372115874551654, "learning_rate": 0.0006066001625286118, "loss": 2.3958, "step": 3913 }, { "epoch": 0.44880174291938996, "grad_norm": 0.24870830577203515, "learning_rate": 0.0006064187293183078, "loss": 2.4428, "step": 3914 }, { "epoch": 0.44891640866873067, "grad_norm": 0.28603906753009717, "learning_rate": 0.0006062372814295689, "loss": 2.5037, "step": 3915 }, { "epoch": 0.44903107441807133, "grad_norm": 0.22456715997685556, "learning_rate": 0.0006060558188874222, "loss": 2.4613, "step": 3916 }, { "epoch": 0.449145740167412, "grad_norm": 0.2386837408228895, "learning_rate": 0.0006058743417168966, "loss": 2.4513, "step": 3917 }, { "epoch": 0.44926040591675265, "grad_norm": 0.24303256281060853, "learning_rate": 0.0006056928499430237, "loss": 2.4909, "step": 3918 }, { "epoch": 0.44937507166609336, "grad_norm": 0.21746086090275502, "learning_rate": 0.0006055113435908372, "loss": 2.4478, "step": 3919 }, { "epoch": 0.449489737415434, "grad_norm": 0.23063285317287116, "learning_rate": 0.0006053298226853717, "loss": 2.4453, "step": 3920 }, { "epoch": 0.4496044031647747, "grad_norm": 0.25474637989298565, "learning_rate": 0.0006051482872516652, "loss": 2.4666, "step": 3921 }, { "epoch": 0.44971906891411534, "grad_norm": 0.25808627323979777, "learning_rate": 0.0006049667373147566, "loss": 2.5451, "step": 3922 }, { "epoch": 0.44983373466345605, "grad_norm": 0.2449051572113027, "learning_rate": 0.0006047851728996875, "loss": 2.5724, "step": 3923 }, { "epoch": 0.4499484004127967, "grad_norm": 0.27452360169006523, "learning_rate": 0.0006046035940315011, "loss": 2.4163, "step": 3924 }, { "epoch": 0.45006306616213737, "grad_norm": 0.25584978753401827, "learning_rate": 0.0006044220007352429, "loss": 2.5012, "step": 3925 }, { "epoch": 0.45017773191147803, "grad_norm": 0.2527483904493502, "learning_rate": 0.0006042403930359603, "loss": 2.575, "step": 3926 }, { "epoch": 0.4502923976608187, "grad_norm": 0.2645945218118985, "learning_rate": 0.0006040587709587024, "loss": 2.4448, "step": 3927 }, { "epoch": 0.4504070634101594, "grad_norm": 0.2504694488306309, "learning_rate": 0.0006038771345285204, "loss": 2.4461, "step": 3928 }, { "epoch": 0.45052172915950006, "grad_norm": 0.2621190909534613, "learning_rate": 0.000603695483770468, "loss": 2.635, "step": 3929 }, { "epoch": 0.4506363949088407, "grad_norm": 0.23075822405655771, "learning_rate": 0.0006035138187096, "loss": 2.5651, "step": 3930 }, { "epoch": 0.4507510606581814, "grad_norm": 0.2601474395698221, "learning_rate": 0.0006033321393709741, "loss": 2.4761, "step": 3931 }, { "epoch": 0.4508657264075221, "grad_norm": 0.2350743451551013, "learning_rate": 0.0006031504457796493, "loss": 2.4331, "step": 3932 }, { "epoch": 0.45098039215686275, "grad_norm": 0.2397324484573778, "learning_rate": 0.0006029687379606864, "loss": 2.4914, "step": 3933 }, { "epoch": 0.4510950579062034, "grad_norm": 0.23647721962641163, "learning_rate": 0.0006027870159391491, "loss": 2.4892, "step": 3934 }, { "epoch": 0.45120972365554407, "grad_norm": 0.2536743555375813, "learning_rate": 0.0006026052797401022, "loss": 2.5464, "step": 3935 }, { "epoch": 0.4513243894048848, "grad_norm": 0.2292260255227131, "learning_rate": 0.0006024235293886127, "loss": 2.5472, "step": 3936 }, { "epoch": 0.45143905515422544, "grad_norm": 0.24976893414891224, "learning_rate": 0.0006022417649097499, "loss": 2.4496, "step": 3937 }, { "epoch": 0.4515537209035661, "grad_norm": 0.21049273864184484, "learning_rate": 0.0006020599863285845, "loss": 2.4127, "step": 3938 }, { "epoch": 0.45166838665290676, "grad_norm": 0.24644349924514333, "learning_rate": 0.0006018781936701893, "loss": 2.5237, "step": 3939 }, { "epoch": 0.4517830524022475, "grad_norm": 0.22943860141941533, "learning_rate": 0.0006016963869596392, "loss": 2.359, "step": 3940 }, { "epoch": 0.45189771815158813, "grad_norm": 0.22909120652737902, "learning_rate": 0.0006015145662220113, "loss": 2.3493, "step": 3941 }, { "epoch": 0.4520123839009288, "grad_norm": 0.2775648807420886, "learning_rate": 0.0006013327314823838, "loss": 2.4402, "step": 3942 }, { "epoch": 0.45212704965026945, "grad_norm": 0.23361955558017575, "learning_rate": 0.0006011508827658376, "loss": 2.4639, "step": 3943 }, { "epoch": 0.4522417153996101, "grad_norm": 0.2622691237723836, "learning_rate": 0.0006009690200974553, "loss": 2.6042, "step": 3944 }, { "epoch": 0.4523563811489508, "grad_norm": 0.24593252243237382, "learning_rate": 0.0006007871435023213, "loss": 2.3898, "step": 3945 }, { "epoch": 0.4524710468982915, "grad_norm": 0.2364488838696429, "learning_rate": 0.0006006052530055221, "loss": 2.567, "step": 3946 }, { "epoch": 0.45258571264763214, "grad_norm": 0.2698985054027099, "learning_rate": 0.0006004233486321459, "loss": 2.5669, "step": 3947 }, { "epoch": 0.4527003783969728, "grad_norm": 0.2660980300059388, "learning_rate": 0.000600241430407283, "loss": 2.4717, "step": 3948 }, { "epoch": 0.4528150441463135, "grad_norm": 0.24842296062626618, "learning_rate": 0.0006000594983560255, "loss": 2.5179, "step": 3949 }, { "epoch": 0.4529297098956542, "grad_norm": 0.263252003583487, "learning_rate": 0.0005998775525034675, "loss": 2.4295, "step": 3950 }, { "epoch": 0.45304437564499483, "grad_norm": 0.22590002470299006, "learning_rate": 0.0005996955928747051, "loss": 2.433, "step": 3951 }, { "epoch": 0.4531590413943355, "grad_norm": 0.23613619835138924, "learning_rate": 0.0005995136194948359, "loss": 2.4345, "step": 3952 }, { "epoch": 0.4532737071436762, "grad_norm": 0.24913655879233865, "learning_rate": 0.0005993316323889598, "loss": 2.4798, "step": 3953 }, { "epoch": 0.45338837289301687, "grad_norm": 0.2250205008799578, "learning_rate": 0.0005991496315821786, "loss": 2.4742, "step": 3954 }, { "epoch": 0.4535030386423575, "grad_norm": 0.2364726356552407, "learning_rate": 0.0005989676170995954, "loss": 2.5198, "step": 3955 }, { "epoch": 0.4536177043916982, "grad_norm": 0.2324341528537448, "learning_rate": 0.0005987855889663163, "loss": 2.5988, "step": 3956 }, { "epoch": 0.4537323701410389, "grad_norm": 0.24049310020696812, "learning_rate": 0.000598603547207448, "loss": 2.4445, "step": 3957 }, { "epoch": 0.45384703589037956, "grad_norm": 0.23565677178868943, "learning_rate": 0.0005984214918480999, "loss": 2.4665, "step": 3958 }, { "epoch": 0.4539617016397202, "grad_norm": 0.25642489670638774, "learning_rate": 0.0005982394229133832, "loss": 2.6918, "step": 3959 }, { "epoch": 0.4540763673890609, "grad_norm": 0.2410093420291846, "learning_rate": 0.0005980573404284107, "loss": 2.4633, "step": 3960 }, { "epoch": 0.45419103313840153, "grad_norm": 0.23350412053138167, "learning_rate": 0.000597875244418297, "loss": 2.5599, "step": 3961 }, { "epoch": 0.45430569888774225, "grad_norm": 0.24087131854138213, "learning_rate": 0.0005976931349081593, "loss": 2.3308, "step": 3962 }, { "epoch": 0.4544203646370829, "grad_norm": 0.23713647194721107, "learning_rate": 0.0005975110119231157, "loss": 2.564, "step": 3963 }, { "epoch": 0.45453503038642357, "grad_norm": 0.22472626844348545, "learning_rate": 0.0005973288754882867, "loss": 2.561, "step": 3964 }, { "epoch": 0.4546496961357642, "grad_norm": 0.27797479606783915, "learning_rate": 0.0005971467256287947, "loss": 2.4504, "step": 3965 }, { "epoch": 0.45476436188510494, "grad_norm": 0.24716802967188817, "learning_rate": 0.0005969645623697636, "loss": 2.5725, "step": 3966 }, { "epoch": 0.4548790276344456, "grad_norm": 0.25772233146269496, "learning_rate": 0.0005967823857363195, "loss": 2.5397, "step": 3967 }, { "epoch": 0.45499369338378626, "grad_norm": 0.23219086909346606, "learning_rate": 0.0005966001957535901, "loss": 2.5535, "step": 3968 }, { "epoch": 0.4551083591331269, "grad_norm": 0.22606169938490384, "learning_rate": 0.000596417992446705, "loss": 2.4102, "step": 3969 }, { "epoch": 0.45522302488246763, "grad_norm": 0.2613332214463567, "learning_rate": 0.0005962357758407958, "loss": 2.4485, "step": 3970 }, { "epoch": 0.4553376906318083, "grad_norm": 0.25739259406632836, "learning_rate": 0.0005960535459609957, "loss": 2.4801, "step": 3971 }, { "epoch": 0.45545235638114895, "grad_norm": 0.253512675124322, "learning_rate": 0.00059587130283244, "loss": 2.4935, "step": 3972 }, { "epoch": 0.4555670221304896, "grad_norm": 0.2601760453329737, "learning_rate": 0.0005956890464802654, "loss": 2.4514, "step": 3973 }, { "epoch": 0.4556816878798303, "grad_norm": 0.26276500057400415, "learning_rate": 0.0005955067769296109, "loss": 2.3884, "step": 3974 }, { "epoch": 0.455796353629171, "grad_norm": 0.21460375373954713, "learning_rate": 0.0005953244942056171, "loss": 2.4364, "step": 3975 }, { "epoch": 0.45591101937851164, "grad_norm": 0.2862866908494329, "learning_rate": 0.0005951421983334263, "loss": 2.5374, "step": 3976 }, { "epoch": 0.4560256851278523, "grad_norm": 0.239740462331181, "learning_rate": 0.0005949598893381828, "loss": 2.5119, "step": 3977 }, { "epoch": 0.45614035087719296, "grad_norm": 0.23932017674871514, "learning_rate": 0.0005947775672450326, "loss": 2.4051, "step": 3978 }, { "epoch": 0.45625501662653367, "grad_norm": 0.25772753388996095, "learning_rate": 0.0005945952320791239, "loss": 2.5401, "step": 3979 }, { "epoch": 0.45636968237587433, "grad_norm": 0.235613442920123, "learning_rate": 0.0005944128838656059, "loss": 2.4094, "step": 3980 }, { "epoch": 0.456484348125215, "grad_norm": 0.24132322076811574, "learning_rate": 0.0005942305226296302, "loss": 2.5086, "step": 3981 }, { "epoch": 0.45659901387455565, "grad_norm": 0.24971451976323805, "learning_rate": 0.0005940481483963502, "loss": 2.538, "step": 3982 }, { "epoch": 0.45671367962389636, "grad_norm": 0.24986706023905864, "learning_rate": 0.0005938657611909206, "loss": 2.4424, "step": 3983 }, { "epoch": 0.456828345373237, "grad_norm": 0.2501549109312565, "learning_rate": 0.0005936833610384988, "loss": 2.3045, "step": 3984 }, { "epoch": 0.4569430111225777, "grad_norm": 0.2392833009886116, "learning_rate": 0.0005935009479642431, "loss": 2.4197, "step": 3985 }, { "epoch": 0.45705767687191834, "grad_norm": 0.23392399533363353, "learning_rate": 0.0005933185219933137, "loss": 2.5313, "step": 3986 }, { "epoch": 0.45717234262125905, "grad_norm": 0.24513047609038893, "learning_rate": 0.0005931360831508732, "loss": 2.5182, "step": 3987 }, { "epoch": 0.4572870083705997, "grad_norm": 0.23490837286542676, "learning_rate": 0.0005929536314620852, "loss": 2.4514, "step": 3988 }, { "epoch": 0.45740167411994037, "grad_norm": 0.2352393406286705, "learning_rate": 0.0005927711669521156, "loss": 2.4493, "step": 3989 }, { "epoch": 0.45751633986928103, "grad_norm": 0.24756796586114393, "learning_rate": 0.0005925886896461321, "loss": 2.4291, "step": 3990 }, { "epoch": 0.45763100561862174, "grad_norm": 0.29996769307345406, "learning_rate": 0.0005924061995693036, "loss": 2.3033, "step": 3991 }, { "epoch": 0.4577456713679624, "grad_norm": 0.24497351739374512, "learning_rate": 0.0005922236967468013, "loss": 2.3321, "step": 3992 }, { "epoch": 0.45786033711730306, "grad_norm": 0.23903044983833396, "learning_rate": 0.000592041181203798, "loss": 2.4554, "step": 3993 }, { "epoch": 0.4579750028666437, "grad_norm": 0.25883812390045613, "learning_rate": 0.0005918586529654684, "loss": 2.5333, "step": 3994 }, { "epoch": 0.4580896686159844, "grad_norm": 0.2836977633084591, "learning_rate": 0.0005916761120569883, "loss": 2.5951, "step": 3995 }, { "epoch": 0.4582043343653251, "grad_norm": 0.2611893556043827, "learning_rate": 0.0005914935585035363, "loss": 2.5358, "step": 3996 }, { "epoch": 0.45831900011466575, "grad_norm": 0.24492121390836463, "learning_rate": 0.0005913109923302919, "loss": 2.5196, "step": 3997 }, { "epoch": 0.4584336658640064, "grad_norm": 0.259712987879203, "learning_rate": 0.0005911284135624365, "loss": 2.5814, "step": 3998 }, { "epoch": 0.45854833161334707, "grad_norm": 0.24640690971169735, "learning_rate": 0.0005909458222251536, "loss": 2.3768, "step": 3999 }, { "epoch": 0.4586629973626878, "grad_norm": 0.2710985881162735, "learning_rate": 0.0005907632183436281, "loss": 2.2765, "step": 4000 }, { "epoch": 0.45877766311202844, "grad_norm": 0.24117229763682085, "learning_rate": 0.0005905806019430468, "loss": 2.4958, "step": 4001 }, { "epoch": 0.4588923288613691, "grad_norm": 0.24304433426590974, "learning_rate": 0.0005903979730485979, "loss": 2.508, "step": 4002 }, { "epoch": 0.45900699461070976, "grad_norm": 0.22195366824526383, "learning_rate": 0.0005902153316854718, "loss": 2.5461, "step": 4003 }, { "epoch": 0.4591216603600505, "grad_norm": 0.3022572093007704, "learning_rate": 0.0005900326778788605, "loss": 2.4491, "step": 4004 }, { "epoch": 0.45923632610939114, "grad_norm": 0.23712500714320361, "learning_rate": 0.0005898500116539575, "loss": 2.4224, "step": 4005 }, { "epoch": 0.4593509918587318, "grad_norm": 0.2276446315279386, "learning_rate": 0.0005896673330359579, "loss": 2.4692, "step": 4006 }, { "epoch": 0.45946565760807245, "grad_norm": 0.24160217840238002, "learning_rate": 0.000589484642050059, "loss": 2.5746, "step": 4007 }, { "epoch": 0.45958032335741317, "grad_norm": 0.21556816317647048, "learning_rate": 0.0005893019387214595, "loss": 2.3802, "step": 4008 }, { "epoch": 0.4596949891067538, "grad_norm": 0.2585601183500471, "learning_rate": 0.0005891192230753597, "loss": 2.677, "step": 4009 }, { "epoch": 0.4598096548560945, "grad_norm": 0.25481247858543454, "learning_rate": 0.000588936495136962, "loss": 2.4946, "step": 4010 }, { "epoch": 0.45992432060543514, "grad_norm": 0.25821269187748086, "learning_rate": 0.0005887537549314699, "loss": 2.4564, "step": 4011 }, { "epoch": 0.4600389863547758, "grad_norm": 0.2783909170668665, "learning_rate": 0.0005885710024840893, "loss": 2.4776, "step": 4012 }, { "epoch": 0.4601536521041165, "grad_norm": 0.24701354913323206, "learning_rate": 0.000588388237820027, "loss": 2.4524, "step": 4013 }, { "epoch": 0.4602683178534572, "grad_norm": 0.2587736756076629, "learning_rate": 0.0005882054609644923, "loss": 2.4944, "step": 4014 }, { "epoch": 0.46038298360279784, "grad_norm": 0.2777160543424753, "learning_rate": 0.0005880226719426956, "loss": 2.3666, "step": 4015 }, { "epoch": 0.4604976493521385, "grad_norm": 0.27470829566841637, "learning_rate": 0.0005878398707798491, "loss": 2.4321, "step": 4016 }, { "epoch": 0.4606123151014792, "grad_norm": 0.2515435455577454, "learning_rate": 0.0005876570575011668, "loss": 2.4091, "step": 4017 }, { "epoch": 0.46072698085081987, "grad_norm": 0.2622692983818868, "learning_rate": 0.0005874742321318643, "loss": 2.5589, "step": 4018 }, { "epoch": 0.4608416466001605, "grad_norm": 0.26305709035978064, "learning_rate": 0.0005872913946971591, "loss": 2.4523, "step": 4019 }, { "epoch": 0.4609563123495012, "grad_norm": 0.24299542114107053, "learning_rate": 0.0005871085452222697, "loss": 2.5156, "step": 4020 }, { "epoch": 0.4610709780988419, "grad_norm": 0.2525557451145121, "learning_rate": 0.000586925683732417, "loss": 2.4831, "step": 4021 }, { "epoch": 0.46118564384818256, "grad_norm": 0.23241168067750473, "learning_rate": 0.0005867428102528233, "loss": 2.5007, "step": 4022 }, { "epoch": 0.4613003095975232, "grad_norm": 0.21674378350997456, "learning_rate": 0.0005865599248087122, "loss": 2.445, "step": 4023 }, { "epoch": 0.4614149753468639, "grad_norm": 0.2619977477590639, "learning_rate": 0.0005863770274253095, "loss": 2.5395, "step": 4024 }, { "epoch": 0.4615296410962046, "grad_norm": 0.21657052303508098, "learning_rate": 0.0005861941181278425, "loss": 2.457, "step": 4025 }, { "epoch": 0.46164430684554525, "grad_norm": 0.2336017436641503, "learning_rate": 0.0005860111969415397, "loss": 2.5186, "step": 4026 }, { "epoch": 0.4617589725948859, "grad_norm": 0.2449664358939737, "learning_rate": 0.0005858282638916319, "loss": 2.4698, "step": 4027 }, { "epoch": 0.46187363834422657, "grad_norm": 0.21640288697634297, "learning_rate": 0.0005856453190033512, "loss": 2.4507, "step": 4028 }, { "epoch": 0.4619883040935672, "grad_norm": 0.22687931653126772, "learning_rate": 0.0005854623623019313, "loss": 2.5422, "step": 4029 }, { "epoch": 0.46210296984290794, "grad_norm": 0.22559651454310134, "learning_rate": 0.0005852793938126074, "loss": 2.4375, "step": 4030 }, { "epoch": 0.4622176355922486, "grad_norm": 0.24831776755149493, "learning_rate": 0.0005850964135606169, "loss": 2.4207, "step": 4031 }, { "epoch": 0.46233230134158926, "grad_norm": 0.26743306223850294, "learning_rate": 0.000584913421571198, "loss": 2.3825, "step": 4032 }, { "epoch": 0.4624469670909299, "grad_norm": 0.25457242928236234, "learning_rate": 0.0005847304178695914, "loss": 2.502, "step": 4033 }, { "epoch": 0.46256163284027063, "grad_norm": 0.25912940667538276, "learning_rate": 0.0005845474024810387, "loss": 2.5471, "step": 4034 }, { "epoch": 0.4626762985896113, "grad_norm": 0.24394943247383544, "learning_rate": 0.0005843643754307834, "loss": 2.3433, "step": 4035 }, { "epoch": 0.46279096433895195, "grad_norm": 0.28551047448527844, "learning_rate": 0.0005841813367440707, "loss": 2.5346, "step": 4036 }, { "epoch": 0.4629056300882926, "grad_norm": 0.30348234511021444, "learning_rate": 0.0005839982864461473, "loss": 2.5535, "step": 4037 }, { "epoch": 0.4630202958376333, "grad_norm": 0.3006559693647635, "learning_rate": 0.0005838152245622614, "loss": 2.5015, "step": 4038 }, { "epoch": 0.463134961586974, "grad_norm": 0.26931888177659113, "learning_rate": 0.0005836321511176628, "loss": 2.5331, "step": 4039 }, { "epoch": 0.46324962733631464, "grad_norm": 0.26874848305500937, "learning_rate": 0.0005834490661376033, "loss": 2.5138, "step": 4040 }, { "epoch": 0.4633642930856553, "grad_norm": 0.25467707896578495, "learning_rate": 0.0005832659696473356, "loss": 2.5055, "step": 4041 }, { "epoch": 0.463478958834996, "grad_norm": 0.2512393029246634, "learning_rate": 0.0005830828616721148, "loss": 2.414, "step": 4042 }, { "epoch": 0.46359362458433667, "grad_norm": 0.2707415766615185, "learning_rate": 0.0005828997422371967, "loss": 2.4702, "step": 4043 }, { "epoch": 0.46370829033367733, "grad_norm": 0.24811277836871373, "learning_rate": 0.0005827166113678398, "loss": 2.5298, "step": 4044 }, { "epoch": 0.463822956083018, "grad_norm": 0.22288366279505703, "learning_rate": 0.0005825334690893028, "loss": 2.5795, "step": 4045 }, { "epoch": 0.46393762183235865, "grad_norm": 0.23955363484670503, "learning_rate": 0.0005823503154268468, "loss": 2.6075, "step": 4046 }, { "epoch": 0.46405228758169936, "grad_norm": 0.25359801528817916, "learning_rate": 0.0005821671504057348, "loss": 2.5652, "step": 4047 }, { "epoch": 0.46416695333104, "grad_norm": 0.23369015862046746, "learning_rate": 0.0005819839740512305, "loss": 2.5, "step": 4048 }, { "epoch": 0.4642816190803807, "grad_norm": 0.24502240622352708, "learning_rate": 0.0005818007863885998, "loss": 2.5694, "step": 4049 }, { "epoch": 0.46439628482972134, "grad_norm": 0.24052848907085234, "learning_rate": 0.00058161758744311, "loss": 2.3116, "step": 4050 }, { "epoch": 0.46451095057906205, "grad_norm": 0.25976262827275237, "learning_rate": 0.0005814343772400296, "loss": 2.59, "step": 4051 }, { "epoch": 0.4646256163284027, "grad_norm": 0.23473119492495118, "learning_rate": 0.0005812511558046291, "loss": 2.6317, "step": 4052 }, { "epoch": 0.46474028207774337, "grad_norm": 0.24156652517910596, "learning_rate": 0.0005810679231621806, "loss": 2.3444, "step": 4053 }, { "epoch": 0.46485494782708403, "grad_norm": 0.25752262646360335, "learning_rate": 0.0005808846793379575, "loss": 2.4572, "step": 4054 }, { "epoch": 0.46496961357642475, "grad_norm": 0.22791304663471093, "learning_rate": 0.0005807014243572343, "loss": 2.486, "step": 4055 }, { "epoch": 0.4650842793257654, "grad_norm": 0.2415570771674224, "learning_rate": 0.0005805181582452882, "loss": 2.4783, "step": 4056 }, { "epoch": 0.46519894507510606, "grad_norm": 0.27076805445126584, "learning_rate": 0.0005803348810273969, "loss": 2.3277, "step": 4057 }, { "epoch": 0.4653136108244467, "grad_norm": 0.24976209707713065, "learning_rate": 0.0005801515927288401, "loss": 2.4968, "step": 4058 }, { "epoch": 0.46542827657378744, "grad_norm": 0.27699544269951104, "learning_rate": 0.000579968293374899, "loss": 2.5445, "step": 4059 }, { "epoch": 0.4655429423231281, "grad_norm": 0.25349821394296557, "learning_rate": 0.0005797849829908561, "loss": 2.5464, "step": 4060 }, { "epoch": 0.46565760807246875, "grad_norm": 0.2561071030111794, "learning_rate": 0.0005796016616019955, "loss": 2.404, "step": 4061 }, { "epoch": 0.4657722738218094, "grad_norm": 0.26941566199940703, "learning_rate": 0.0005794183292336032, "loss": 2.5623, "step": 4062 }, { "epoch": 0.46588693957115007, "grad_norm": 0.2656433122784312, "learning_rate": 0.0005792349859109662, "loss": 2.4062, "step": 4063 }, { "epoch": 0.4660016053204908, "grad_norm": 0.23417855297483753, "learning_rate": 0.0005790516316593733, "loss": 2.5016, "step": 4064 }, { "epoch": 0.46611627106983144, "grad_norm": 0.27376676814228473, "learning_rate": 0.0005788682665041146, "loss": 2.5389, "step": 4065 }, { "epoch": 0.4662309368191721, "grad_norm": 0.27298158316833904, "learning_rate": 0.0005786848904704818, "loss": 2.4707, "step": 4066 }, { "epoch": 0.46634560256851276, "grad_norm": 0.24304253791162964, "learning_rate": 0.0005785015035837684, "loss": 2.3979, "step": 4067 }, { "epoch": 0.4664602683178535, "grad_norm": 0.2278037832499448, "learning_rate": 0.000578318105869269, "loss": 2.4998, "step": 4068 }, { "epoch": 0.46657493406719414, "grad_norm": 0.22367781495391703, "learning_rate": 0.0005781346973522797, "loss": 2.2515, "step": 4069 }, { "epoch": 0.4666895998165348, "grad_norm": 0.24845967795308857, "learning_rate": 0.0005779512780580981, "loss": 2.4585, "step": 4070 }, { "epoch": 0.46680426556587545, "grad_norm": 0.22475780584585855, "learning_rate": 0.0005777678480120237, "loss": 2.5699, "step": 4071 }, { "epoch": 0.46691893131521617, "grad_norm": 0.23761655436314588, "learning_rate": 0.0005775844072393573, "loss": 2.564, "step": 4072 }, { "epoch": 0.4670335970645568, "grad_norm": 0.2569776784275677, "learning_rate": 0.0005774009557654006, "loss": 2.5437, "step": 4073 }, { "epoch": 0.4671482628138975, "grad_norm": 0.22783050644097697, "learning_rate": 0.0005772174936154573, "loss": 2.5144, "step": 4074 }, { "epoch": 0.46726292856323814, "grad_norm": 0.24813561223739344, "learning_rate": 0.000577034020814833, "loss": 2.5319, "step": 4075 }, { "epoch": 0.46737759431257886, "grad_norm": 0.20983291977362017, "learning_rate": 0.0005768505373888337, "loss": 2.4116, "step": 4076 }, { "epoch": 0.4674922600619195, "grad_norm": 0.25789828576346707, "learning_rate": 0.0005766670433627677, "loss": 2.5806, "step": 4077 }, { "epoch": 0.4676069258112602, "grad_norm": 0.2464997740378045, "learning_rate": 0.0005764835387619444, "loss": 2.4314, "step": 4078 }, { "epoch": 0.46772159156060084, "grad_norm": 0.24757597445739538, "learning_rate": 0.000576300023611675, "loss": 2.5836, "step": 4079 }, { "epoch": 0.4678362573099415, "grad_norm": 0.23818417194069333, "learning_rate": 0.0005761164979372716, "loss": 2.3464, "step": 4080 }, { "epoch": 0.4679509230592822, "grad_norm": 0.24755121328919102, "learning_rate": 0.0005759329617640483, "loss": 2.4426, "step": 4081 }, { "epoch": 0.46806558880862287, "grad_norm": 0.23916563763164692, "learning_rate": 0.0005757494151173204, "loss": 2.4868, "step": 4082 }, { "epoch": 0.4681802545579635, "grad_norm": 0.2612887135226204, "learning_rate": 0.0005755658580224043, "loss": 2.5336, "step": 4083 }, { "epoch": 0.4682949203073042, "grad_norm": 0.25747175587157045, "learning_rate": 0.0005753822905046189, "loss": 2.5981, "step": 4084 }, { "epoch": 0.4684095860566449, "grad_norm": 0.2423193432486496, "learning_rate": 0.0005751987125892833, "loss": 2.3855, "step": 4085 }, { "epoch": 0.46852425180598556, "grad_norm": 0.25026805400259666, "learning_rate": 0.0005750151243017187, "loss": 2.6247, "step": 4086 }, { "epoch": 0.4686389175553262, "grad_norm": 0.23501021503952846, "learning_rate": 0.0005748315256672476, "loss": 2.3819, "step": 4087 }, { "epoch": 0.4687535833046669, "grad_norm": 0.2800629810545975, "learning_rate": 0.000574647916711194, "loss": 2.6783, "step": 4088 }, { "epoch": 0.4688682490540076, "grad_norm": 0.25150584813890065, "learning_rate": 0.000574464297458883, "loss": 2.5303, "step": 4089 }, { "epoch": 0.46898291480334825, "grad_norm": 0.3058588158795674, "learning_rate": 0.000574280667935642, "loss": 2.3786, "step": 4090 }, { "epoch": 0.4690975805526889, "grad_norm": 0.23932394408537352, "learning_rate": 0.0005740970281667984, "loss": 2.41, "step": 4091 }, { "epoch": 0.46921224630202957, "grad_norm": 0.2566441254792209, "learning_rate": 0.0005739133781776824, "loss": 2.551, "step": 4092 }, { "epoch": 0.4693269120513703, "grad_norm": 0.25718589538971726, "learning_rate": 0.0005737297179936247, "loss": 2.4187, "step": 4093 }, { "epoch": 0.46944157780071094, "grad_norm": 0.23482095526672317, "learning_rate": 0.0005735460476399579, "loss": 2.4443, "step": 4094 }, { "epoch": 0.4695562435500516, "grad_norm": 0.25081607485848917, "learning_rate": 0.0005733623671420156, "loss": 2.4984, "step": 4095 }, { "epoch": 0.46967090929939226, "grad_norm": 0.23490462008074964, "learning_rate": 0.0005731786765251333, "loss": 2.3803, "step": 4096 }, { "epoch": 0.4697855750487329, "grad_norm": 0.24706699653589545, "learning_rate": 0.0005729949758146475, "loss": 2.6328, "step": 4097 }, { "epoch": 0.46990024079807363, "grad_norm": 0.23957953264753773, "learning_rate": 0.0005728112650358961, "loss": 2.509, "step": 4098 }, { "epoch": 0.4700149065474143, "grad_norm": 0.23415303858141057, "learning_rate": 0.0005726275442142185, "loss": 2.4905, "step": 4099 }, { "epoch": 0.47012957229675495, "grad_norm": 0.2348783567025958, "learning_rate": 0.0005724438133749559, "loss": 2.4462, "step": 4100 }, { "epoch": 0.4702442380460956, "grad_norm": 0.24221795688984288, "learning_rate": 0.0005722600725434499, "loss": 2.5664, "step": 4101 }, { "epoch": 0.4703589037954363, "grad_norm": 0.24700780962568017, "learning_rate": 0.0005720763217450443, "loss": 2.4485, "step": 4102 }, { "epoch": 0.470473569544777, "grad_norm": 0.2395912443385863, "learning_rate": 0.000571892561005084, "loss": 2.6206, "step": 4103 }, { "epoch": 0.47058823529411764, "grad_norm": 0.25004202397048964, "learning_rate": 0.0005717087903489155, "loss": 2.4654, "step": 4104 }, { "epoch": 0.4707029010434583, "grad_norm": 0.23686223407279985, "learning_rate": 0.000571525009801886, "loss": 2.3086, "step": 4105 }, { "epoch": 0.470817566792799, "grad_norm": 0.27117640531275145, "learning_rate": 0.0005713412193893451, "loss": 2.6296, "step": 4106 }, { "epoch": 0.4709322325421397, "grad_norm": 0.2292084382065695, "learning_rate": 0.0005711574191366427, "loss": 2.4905, "step": 4107 }, { "epoch": 0.47104689829148033, "grad_norm": 0.2440423126972062, "learning_rate": 0.0005709736090691305, "loss": 2.4408, "step": 4108 }, { "epoch": 0.471161564040821, "grad_norm": 0.2319575387284643, "learning_rate": 0.0005707897892121621, "loss": 2.5366, "step": 4109 }, { "epoch": 0.4712762297901617, "grad_norm": 0.25557685494323185, "learning_rate": 0.0005706059595910918, "loss": 2.5395, "step": 4110 }, { "epoch": 0.47139089553950236, "grad_norm": 0.2418003712874159, "learning_rate": 0.0005704221202312748, "loss": 2.372, "step": 4111 }, { "epoch": 0.471505561288843, "grad_norm": 0.246028321079148, "learning_rate": 0.000570238271158069, "loss": 2.5977, "step": 4112 }, { "epoch": 0.4716202270381837, "grad_norm": 0.2405902488996967, "learning_rate": 0.0005700544123968325, "loss": 2.4233, "step": 4113 }, { "epoch": 0.47173489278752434, "grad_norm": 0.3004289086723318, "learning_rate": 0.0005698705439729251, "loss": 2.3543, "step": 4114 }, { "epoch": 0.47184955853686505, "grad_norm": 0.23741141665197765, "learning_rate": 0.0005696866659117081, "loss": 2.46, "step": 4115 }, { "epoch": 0.4719642242862057, "grad_norm": 0.21233013484141894, "learning_rate": 0.0005695027782385438, "loss": 2.3237, "step": 4116 }, { "epoch": 0.4720788900355464, "grad_norm": 0.24478340844456165, "learning_rate": 0.0005693188809787961, "loss": 2.5672, "step": 4117 }, { "epoch": 0.47219355578488703, "grad_norm": 0.2300892030179047, "learning_rate": 0.00056913497415783, "loss": 2.414, "step": 4118 }, { "epoch": 0.47230822153422775, "grad_norm": 0.2485024049875083, "learning_rate": 0.0005689510578010123, "loss": 2.532, "step": 4119 }, { "epoch": 0.4724228872835684, "grad_norm": 0.22393404248639082, "learning_rate": 0.0005687671319337103, "loss": 2.5336, "step": 4120 }, { "epoch": 0.47253755303290906, "grad_norm": 0.2433928478306489, "learning_rate": 0.0005685831965812933, "loss": 2.4394, "step": 4121 }, { "epoch": 0.4726522187822497, "grad_norm": 0.2346018230669044, "learning_rate": 0.0005683992517691318, "loss": 2.4222, "step": 4122 }, { "epoch": 0.47276688453159044, "grad_norm": 0.24427067208209588, "learning_rate": 0.0005682152975225972, "loss": 2.37, "step": 4123 }, { "epoch": 0.4728815502809311, "grad_norm": 0.22716393868440118, "learning_rate": 0.0005680313338670627, "loss": 2.5162, "step": 4124 }, { "epoch": 0.47299621603027175, "grad_norm": 0.23690201275609046, "learning_rate": 0.0005678473608279024, "loss": 2.4383, "step": 4125 }, { "epoch": 0.4731108817796124, "grad_norm": 0.2536670048753871, "learning_rate": 0.000567663378430492, "loss": 2.472, "step": 4126 }, { "epoch": 0.4732255475289531, "grad_norm": 0.25796315385799085, "learning_rate": 0.0005674793867002083, "loss": 2.4826, "step": 4127 }, { "epoch": 0.4733402132782938, "grad_norm": 0.25690528032225146, "learning_rate": 0.0005672953856624294, "loss": 2.4395, "step": 4128 }, { "epoch": 0.47345487902763445, "grad_norm": 0.23938403695296154, "learning_rate": 0.000567111375342535, "loss": 2.5153, "step": 4129 }, { "epoch": 0.4735695447769751, "grad_norm": 0.2642814905579894, "learning_rate": 0.0005669273557659055, "loss": 2.3523, "step": 4130 }, { "epoch": 0.47368421052631576, "grad_norm": 0.2523240847978188, "learning_rate": 0.000566743326957923, "loss": 2.5267, "step": 4131 }, { "epoch": 0.4737988762756565, "grad_norm": 0.2558946186781651, "learning_rate": 0.0005665592889439709, "loss": 2.3615, "step": 4132 }, { "epoch": 0.47391354202499714, "grad_norm": 0.27073208018057404, "learning_rate": 0.0005663752417494334, "loss": 2.5466, "step": 4133 }, { "epoch": 0.4740282077743378, "grad_norm": 0.2858518708997128, "learning_rate": 0.0005661911853996969, "loss": 2.5796, "step": 4134 }, { "epoch": 0.47414287352367845, "grad_norm": 0.22950780578974697, "learning_rate": 0.0005660071199201479, "loss": 2.4335, "step": 4135 }, { "epoch": 0.47425753927301917, "grad_norm": 0.22233527700134328, "learning_rate": 0.0005658230453361748, "loss": 2.4029, "step": 4136 }, { "epoch": 0.4743722050223598, "grad_norm": 0.28034842991877434, "learning_rate": 0.0005656389616731675, "loss": 2.6054, "step": 4137 }, { "epoch": 0.4744868707717005, "grad_norm": 0.2570318663817991, "learning_rate": 0.0005654548689565164, "loss": 2.4566, "step": 4138 }, { "epoch": 0.47460153652104115, "grad_norm": 0.23051429074322416, "learning_rate": 0.000565270767211614, "loss": 2.4399, "step": 4139 }, { "epoch": 0.47471620227038186, "grad_norm": 0.22875454841555037, "learning_rate": 0.0005650866564638535, "loss": 2.506, "step": 4140 }, { "epoch": 0.4748308680197225, "grad_norm": 0.25345988244530293, "learning_rate": 0.0005649025367386292, "loss": 2.3816, "step": 4141 }, { "epoch": 0.4749455337690632, "grad_norm": 0.22454371472156445, "learning_rate": 0.0005647184080613371, "loss": 2.4985, "step": 4142 }, { "epoch": 0.47506019951840384, "grad_norm": 0.24890509324488688, "learning_rate": 0.0005645342704573744, "loss": 2.5377, "step": 4143 }, { "epoch": 0.47517486526774455, "grad_norm": 0.25006798776180694, "learning_rate": 0.0005643501239521393, "loss": 2.4799, "step": 4144 }, { "epoch": 0.4752895310170852, "grad_norm": 0.23803095789544196, "learning_rate": 0.0005641659685710309, "loss": 2.4267, "step": 4145 }, { "epoch": 0.47540419676642587, "grad_norm": 0.2567255849652182, "learning_rate": 0.0005639818043394506, "loss": 2.5436, "step": 4146 }, { "epoch": 0.4755188625157665, "grad_norm": 0.2438244139619499, "learning_rate": 0.0005637976312827998, "loss": 2.5394, "step": 4147 }, { "epoch": 0.4756335282651072, "grad_norm": 0.2538153128820238, "learning_rate": 0.000563613449426482, "loss": 2.5064, "step": 4148 }, { "epoch": 0.4757481940144479, "grad_norm": 0.28925088969359114, "learning_rate": 0.0005634292587959012, "loss": 2.4247, "step": 4149 }, { "epoch": 0.47586285976378856, "grad_norm": 0.27083006504708856, "learning_rate": 0.0005632450594164635, "loss": 2.5274, "step": 4150 }, { "epoch": 0.4759775255131292, "grad_norm": 0.2622811849663945, "learning_rate": 0.0005630608513135752, "loss": 2.367, "step": 4151 }, { "epoch": 0.4760921912624699, "grad_norm": 0.2880598981528039, "learning_rate": 0.0005628766345126446, "loss": 2.5864, "step": 4152 }, { "epoch": 0.4762068570118106, "grad_norm": 0.2660879487401973, "learning_rate": 0.0005626924090390808, "loss": 2.4833, "step": 4153 }, { "epoch": 0.47632152276115125, "grad_norm": 0.24466872053368477, "learning_rate": 0.0005625081749182942, "loss": 2.5885, "step": 4154 }, { "epoch": 0.4764361885104919, "grad_norm": 0.24965560831163044, "learning_rate": 0.0005623239321756963, "loss": 2.4572, "step": 4155 }, { "epoch": 0.47655085425983257, "grad_norm": 0.25674691599470817, "learning_rate": 0.0005621396808367001, "loss": 2.4566, "step": 4156 }, { "epoch": 0.4766655200091733, "grad_norm": 0.2185965287018234, "learning_rate": 0.0005619554209267193, "loss": 2.3968, "step": 4157 }, { "epoch": 0.47678018575851394, "grad_norm": 0.2498996348556679, "learning_rate": 0.0005617711524711691, "loss": 2.5938, "step": 4158 }, { "epoch": 0.4768948515078546, "grad_norm": 0.25388160847277025, "learning_rate": 0.0005615868754954661, "loss": 2.442, "step": 4159 }, { "epoch": 0.47700951725719526, "grad_norm": 0.2740946707734979, "learning_rate": 0.0005614025900250276, "loss": 2.3021, "step": 4160 }, { "epoch": 0.477124183006536, "grad_norm": 0.2752682076295188, "learning_rate": 0.000561218296085272, "loss": 2.4521, "step": 4161 }, { "epoch": 0.47723884875587663, "grad_norm": 0.21940153849400076, "learning_rate": 0.0005610339937016195, "loss": 2.5537, "step": 4162 }, { "epoch": 0.4773535145052173, "grad_norm": 0.23835519195762034, "learning_rate": 0.0005608496828994911, "loss": 2.4207, "step": 4163 }, { "epoch": 0.47746818025455795, "grad_norm": 0.24430055114641686, "learning_rate": 0.0005606653637043087, "loss": 2.635, "step": 4164 }, { "epoch": 0.4775828460038986, "grad_norm": 0.27245225007613466, "learning_rate": 0.0005604810361414958, "loss": 2.5286, "step": 4165 }, { "epoch": 0.4776975117532393, "grad_norm": 0.22963128586497952, "learning_rate": 0.0005602967002364768, "loss": 2.5396, "step": 4166 }, { "epoch": 0.47781217750258, "grad_norm": 0.24359699584858852, "learning_rate": 0.0005601123560146774, "loss": 2.4934, "step": 4167 }, { "epoch": 0.47792684325192064, "grad_norm": 0.24688195138327487, "learning_rate": 0.0005599280035015243, "loss": 2.4549, "step": 4168 }, { "epoch": 0.4780415090012613, "grad_norm": 0.24096815241092795, "learning_rate": 0.0005597436427224455, "loss": 2.5376, "step": 4169 }, { "epoch": 0.478156174750602, "grad_norm": 0.2665731500867811, "learning_rate": 0.00055955927370287, "loss": 2.6084, "step": 4170 }, { "epoch": 0.4782708404999427, "grad_norm": 0.2737497553961168, "learning_rate": 0.000559374896468228, "loss": 2.5465, "step": 4171 }, { "epoch": 0.47838550624928333, "grad_norm": 0.25154056533526276, "learning_rate": 0.0005591905110439507, "loss": 2.4183, "step": 4172 }, { "epoch": 0.478500171998624, "grad_norm": 0.23683953615149914, "learning_rate": 0.0005590061174554708, "loss": 2.4335, "step": 4173 }, { "epoch": 0.4786148377479647, "grad_norm": 0.24164204542028214, "learning_rate": 0.0005588217157282217, "loss": 2.4426, "step": 4174 }, { "epoch": 0.47872950349730536, "grad_norm": 0.23853892285568642, "learning_rate": 0.0005586373058876383, "loss": 2.41, "step": 4175 }, { "epoch": 0.478844169246646, "grad_norm": 0.22510349155277046, "learning_rate": 0.0005584528879591562, "loss": 2.4898, "step": 4176 }, { "epoch": 0.4789588349959867, "grad_norm": 0.20392768209206685, "learning_rate": 0.0005582684619682123, "loss": 2.5063, "step": 4177 }, { "epoch": 0.4790735007453274, "grad_norm": 0.24279545892432144, "learning_rate": 0.0005580840279402448, "loss": 2.5717, "step": 4178 }, { "epoch": 0.47918816649466806, "grad_norm": 0.2403223707295445, "learning_rate": 0.000557899585900693, "loss": 2.4519, "step": 4179 }, { "epoch": 0.4793028322440087, "grad_norm": 0.2617493685594155, "learning_rate": 0.0005577151358749968, "loss": 2.5576, "step": 4180 }, { "epoch": 0.4794174979933494, "grad_norm": 0.24416194030712499, "learning_rate": 0.0005575306778885978, "loss": 2.4738, "step": 4181 }, { "epoch": 0.47953216374269003, "grad_norm": 0.21831416887248878, "learning_rate": 0.0005573462119669386, "loss": 2.3628, "step": 4182 }, { "epoch": 0.47964682949203075, "grad_norm": 0.24711339501712612, "learning_rate": 0.0005571617381354622, "loss": 2.4892, "step": 4183 }, { "epoch": 0.4797614952413714, "grad_norm": 0.2856610249724389, "learning_rate": 0.0005569772564196139, "loss": 2.5061, "step": 4184 }, { "epoch": 0.47987616099071206, "grad_norm": 0.24541557651021437, "learning_rate": 0.0005567927668448392, "loss": 2.4748, "step": 4185 }, { "epoch": 0.4799908267400527, "grad_norm": 0.266935338012606, "learning_rate": 0.0005566082694365847, "loss": 2.4486, "step": 4186 }, { "epoch": 0.48010549248939344, "grad_norm": 0.32799397814201664, "learning_rate": 0.0005564237642202987, "loss": 2.3662, "step": 4187 }, { "epoch": 0.4802201582387341, "grad_norm": 0.2218832535166006, "learning_rate": 0.0005562392512214299, "loss": 2.4748, "step": 4188 }, { "epoch": 0.48033482398807475, "grad_norm": 0.23240099261642552, "learning_rate": 0.0005560547304654282, "loss": 2.53, "step": 4189 }, { "epoch": 0.4804494897374154, "grad_norm": 0.23974538682350902, "learning_rate": 0.0005558702019777452, "loss": 2.4599, "step": 4190 }, { "epoch": 0.48056415548675613, "grad_norm": 0.2429648295185892, "learning_rate": 0.0005556856657838327, "loss": 2.5631, "step": 4191 }, { "epoch": 0.4806788212360968, "grad_norm": 0.2690553335467335, "learning_rate": 0.000555501121909144, "loss": 2.4736, "step": 4192 }, { "epoch": 0.48079348698543745, "grad_norm": 0.24231355856882744, "learning_rate": 0.0005553165703791335, "loss": 2.4327, "step": 4193 }, { "epoch": 0.4809081527347781, "grad_norm": 0.24067285009075667, "learning_rate": 0.0005551320112192567, "loss": 2.4934, "step": 4194 }, { "epoch": 0.4810228184841188, "grad_norm": 0.244330097894077, "learning_rate": 0.0005549474444549698, "loss": 2.3214, "step": 4195 }, { "epoch": 0.4811374842334595, "grad_norm": 0.24740247424614512, "learning_rate": 0.0005547628701117303, "loss": 2.521, "step": 4196 }, { "epoch": 0.48125214998280014, "grad_norm": 0.262990618117095, "learning_rate": 0.0005545782882149968, "loss": 2.5422, "step": 4197 }, { "epoch": 0.4813668157321408, "grad_norm": 0.24598372162153725, "learning_rate": 0.0005543936987902287, "loss": 2.4684, "step": 4198 }, { "epoch": 0.48148148148148145, "grad_norm": 0.22410309874476436, "learning_rate": 0.0005542091018628867, "loss": 2.4555, "step": 4199 }, { "epoch": 0.48159614723082217, "grad_norm": 0.2734295047046339, "learning_rate": 0.0005540244974584325, "loss": 2.5799, "step": 4200 }, { "epoch": 0.48171081298016283, "grad_norm": 0.24319467954381446, "learning_rate": 0.0005538398856023285, "loss": 2.4253, "step": 4201 }, { "epoch": 0.4818254787295035, "grad_norm": 0.27044248933350296, "learning_rate": 0.0005536552663200387, "loss": 2.5299, "step": 4202 }, { "epoch": 0.48194014447884415, "grad_norm": 0.2505844276806928, "learning_rate": 0.0005534706396370277, "loss": 2.5111, "step": 4203 }, { "epoch": 0.48205481022818486, "grad_norm": 0.24297564590093956, "learning_rate": 0.0005532860055787611, "loss": 2.4284, "step": 4204 }, { "epoch": 0.4821694759775255, "grad_norm": 0.2665742312700134, "learning_rate": 0.0005531013641707059, "loss": 2.438, "step": 4205 }, { "epoch": 0.4822841417268662, "grad_norm": 0.2677745265775324, "learning_rate": 0.0005529167154383296, "loss": 2.4421, "step": 4206 }, { "epoch": 0.48239880747620684, "grad_norm": 0.2670371489279821, "learning_rate": 0.0005527320594071012, "loss": 2.5502, "step": 4207 }, { "epoch": 0.48251347322554755, "grad_norm": 0.21826003909240005, "learning_rate": 0.0005525473961024901, "loss": 2.4324, "step": 4208 }, { "epoch": 0.4826281389748882, "grad_norm": 0.23030028735105276, "learning_rate": 0.0005523627255499677, "loss": 2.4573, "step": 4209 }, { "epoch": 0.48274280472422887, "grad_norm": 0.23932718133943717, "learning_rate": 0.0005521780477750054, "loss": 2.296, "step": 4210 }, { "epoch": 0.48285747047356953, "grad_norm": 0.24105268850172284, "learning_rate": 0.0005519933628030757, "loss": 2.5126, "step": 4211 }, { "epoch": 0.48297213622291024, "grad_norm": 0.25143757053984317, "learning_rate": 0.000551808670659653, "loss": 2.4582, "step": 4212 }, { "epoch": 0.4830868019722509, "grad_norm": 0.25384382760074164, "learning_rate": 0.0005516239713702116, "loss": 2.5686, "step": 4213 }, { "epoch": 0.48320146772159156, "grad_norm": 0.2916862086286595, "learning_rate": 0.0005514392649602273, "loss": 2.4804, "step": 4214 }, { "epoch": 0.4833161334709322, "grad_norm": 0.2729425916295906, "learning_rate": 0.0005512545514551772, "loss": 2.6418, "step": 4215 }, { "epoch": 0.4834307992202729, "grad_norm": 0.25862163737338417, "learning_rate": 0.0005510698308805385, "loss": 2.3957, "step": 4216 }, { "epoch": 0.4835454649696136, "grad_norm": 0.26077714663841295, "learning_rate": 0.0005508851032617902, "loss": 2.4755, "step": 4217 }, { "epoch": 0.48366013071895425, "grad_norm": 0.2555993867906374, "learning_rate": 0.0005507003686244119, "loss": 2.5443, "step": 4218 }, { "epoch": 0.4837747964682949, "grad_norm": 0.23200621062770846, "learning_rate": 0.0005505156269938841, "loss": 2.545, "step": 4219 }, { "epoch": 0.48388946221763557, "grad_norm": 0.23766825825499693, "learning_rate": 0.0005503308783956886, "loss": 2.5025, "step": 4220 }, { "epoch": 0.4840041279669763, "grad_norm": 0.24549516255760703, "learning_rate": 0.0005501461228553075, "loss": 2.3714, "step": 4221 }, { "epoch": 0.48411879371631694, "grad_norm": 0.2399244918309305, "learning_rate": 0.000549961360398225, "loss": 2.6042, "step": 4222 }, { "epoch": 0.4842334594656576, "grad_norm": 0.23892995641309037, "learning_rate": 0.0005497765910499249, "loss": 2.4644, "step": 4223 }, { "epoch": 0.48434812521499826, "grad_norm": 0.2305873569472715, "learning_rate": 0.0005495918148358931, "loss": 2.5181, "step": 4224 }, { "epoch": 0.484462790964339, "grad_norm": 0.21161925391713776, "learning_rate": 0.0005494070317816157, "loss": 2.3756, "step": 4225 }, { "epoch": 0.48457745671367963, "grad_norm": 0.19553756490611487, "learning_rate": 0.00054922224191258, "loss": 2.4679, "step": 4226 }, { "epoch": 0.4846921224630203, "grad_norm": 0.2509488475806269, "learning_rate": 0.0005490374452542743, "loss": 2.4931, "step": 4227 }, { "epoch": 0.48480678821236095, "grad_norm": 0.2032686860834027, "learning_rate": 0.0005488526418321877, "loss": 2.5107, "step": 4228 }, { "epoch": 0.48492145396170167, "grad_norm": 0.2570530359428502, "learning_rate": 0.0005486678316718106, "loss": 2.4928, "step": 4229 }, { "epoch": 0.4850361197110423, "grad_norm": 0.21508363925198118, "learning_rate": 0.0005484830147986336, "loss": 2.5095, "step": 4230 }, { "epoch": 0.485150785460383, "grad_norm": 0.23659902635576155, "learning_rate": 0.0005482981912381488, "loss": 2.4119, "step": 4231 }, { "epoch": 0.48526545120972364, "grad_norm": 0.2701606249619523, "learning_rate": 0.0005481133610158494, "loss": 2.5991, "step": 4232 }, { "epoch": 0.4853801169590643, "grad_norm": 0.24778337449965787, "learning_rate": 0.0005479285241572288, "loss": 2.6576, "step": 4233 }, { "epoch": 0.485494782708405, "grad_norm": 0.236888107365057, "learning_rate": 0.000547743680687782, "loss": 2.3645, "step": 4234 }, { "epoch": 0.4856094484577457, "grad_norm": 0.23407375060186117, "learning_rate": 0.0005475588306330046, "loss": 2.4974, "step": 4235 }, { "epoch": 0.48572411420708633, "grad_norm": 0.22629806638267463, "learning_rate": 0.0005473739740183928, "loss": 2.531, "step": 4236 }, { "epoch": 0.485838779956427, "grad_norm": 0.24855762162652337, "learning_rate": 0.0005471891108694446, "loss": 2.4044, "step": 4237 }, { "epoch": 0.4859534457057677, "grad_norm": 0.2564242889606033, "learning_rate": 0.0005470042412116579, "loss": 2.464, "step": 4238 }, { "epoch": 0.48606811145510836, "grad_norm": 0.27857970126644244, "learning_rate": 0.0005468193650705321, "loss": 2.423, "step": 4239 }, { "epoch": 0.486182777204449, "grad_norm": 0.24478707403159253, "learning_rate": 0.0005466344824715674, "loss": 2.4987, "step": 4240 }, { "epoch": 0.4862974429537897, "grad_norm": 0.2536700361415309, "learning_rate": 0.0005464495934402648, "loss": 2.4268, "step": 4241 }, { "epoch": 0.4864121087031304, "grad_norm": 0.26261690434204255, "learning_rate": 0.0005462646980021262, "loss": 2.4507, "step": 4242 }, { "epoch": 0.48652677445247106, "grad_norm": 0.36019749984865407, "learning_rate": 0.0005460797961826542, "loss": 2.4636, "step": 4243 }, { "epoch": 0.4866414402018117, "grad_norm": 0.2445501595451593, "learning_rate": 0.000545894888007353, "loss": 2.5122, "step": 4244 }, { "epoch": 0.4867561059511524, "grad_norm": 0.2291999464700877, "learning_rate": 0.0005457099735017267, "loss": 2.5294, "step": 4245 }, { "epoch": 0.4868707717004931, "grad_norm": 0.24812820036897562, "learning_rate": 0.0005455250526912807, "loss": 2.6283, "step": 4246 }, { "epoch": 0.48698543744983375, "grad_norm": 0.2231409092844094, "learning_rate": 0.0005453401256015218, "loss": 2.4978, "step": 4247 }, { "epoch": 0.4871001031991744, "grad_norm": 0.2512465108857026, "learning_rate": 0.0005451551922579566, "loss": 2.5082, "step": 4248 }, { "epoch": 0.48721476894851506, "grad_norm": 0.2066376412129163, "learning_rate": 0.0005449702526860934, "loss": 2.2685, "step": 4249 }, { "epoch": 0.4873294346978557, "grad_norm": 0.22974447559988523, "learning_rate": 0.0005447853069114411, "loss": 2.529, "step": 4250 }, { "epoch": 0.48744410044719644, "grad_norm": 0.2740874949110293, "learning_rate": 0.0005446003549595093, "loss": 2.4813, "step": 4251 }, { "epoch": 0.4875587661965371, "grad_norm": 0.2566153116365614, "learning_rate": 0.0005444153968558088, "loss": 2.4179, "step": 4252 }, { "epoch": 0.48767343194587776, "grad_norm": 0.23473142848904294, "learning_rate": 0.0005442304326258508, "loss": 2.5479, "step": 4253 }, { "epoch": 0.4877880976952184, "grad_norm": 0.2374375579176573, "learning_rate": 0.000544045462295148, "loss": 2.3556, "step": 4254 }, { "epoch": 0.48790276344455913, "grad_norm": 0.24071576626632016, "learning_rate": 0.000543860485889213, "loss": 2.627, "step": 4255 }, { "epoch": 0.4880174291938998, "grad_norm": 0.23503959632370858, "learning_rate": 0.00054367550343356, "loss": 2.4079, "step": 4256 }, { "epoch": 0.48813209494324045, "grad_norm": 0.22458094518413046, "learning_rate": 0.000543490514953704, "loss": 2.423, "step": 4257 }, { "epoch": 0.4882467606925811, "grad_norm": 0.2398448295756372, "learning_rate": 0.0005433055204751604, "loss": 2.5793, "step": 4258 }, { "epoch": 0.4883614264419218, "grad_norm": 0.23464049198236497, "learning_rate": 0.0005431205200234457, "loss": 2.5672, "step": 4259 }, { "epoch": 0.4884760921912625, "grad_norm": 0.2518782650815768, "learning_rate": 0.0005429355136240773, "loss": 2.4267, "step": 4260 }, { "epoch": 0.48859075794060314, "grad_norm": 0.23715372959684425, "learning_rate": 0.0005427505013025731, "loss": 2.4568, "step": 4261 }, { "epoch": 0.4887054236899438, "grad_norm": 0.24645576035430528, "learning_rate": 0.0005425654830844523, "loss": 2.5417, "step": 4262 }, { "epoch": 0.4888200894392845, "grad_norm": 0.24759509493121049, "learning_rate": 0.0005423804589952344, "loss": 2.5087, "step": 4263 }, { "epoch": 0.48893475518862517, "grad_norm": 0.2598287931736392, "learning_rate": 0.0005421954290604399, "loss": 2.5177, "step": 4264 }, { "epoch": 0.48904942093796583, "grad_norm": 0.2556963448716178, "learning_rate": 0.0005420103933055906, "loss": 2.4589, "step": 4265 }, { "epoch": 0.4891640866873065, "grad_norm": 0.26849628655194163, "learning_rate": 0.0005418253517562079, "loss": 2.4733, "step": 4266 }, { "epoch": 0.48927875243664715, "grad_norm": 0.26067681780266044, "learning_rate": 0.0005416403044378156, "loss": 2.5143, "step": 4267 }, { "epoch": 0.48939341818598786, "grad_norm": 0.2404985866767127, "learning_rate": 0.0005414552513759368, "loss": 2.3637, "step": 4268 }, { "epoch": 0.4895080839353285, "grad_norm": 0.2548735428180745, "learning_rate": 0.0005412701925960964, "loss": 2.4836, "step": 4269 }, { "epoch": 0.4896227496846692, "grad_norm": 0.23476625803310852, "learning_rate": 0.0005410851281238195, "loss": 2.5129, "step": 4270 }, { "epoch": 0.48973741543400984, "grad_norm": 0.23508413597369218, "learning_rate": 0.0005409000579846324, "loss": 2.6133, "step": 4271 }, { "epoch": 0.48985208118335055, "grad_norm": 0.24962041487693615, "learning_rate": 0.0005407149822040619, "loss": 2.3489, "step": 4272 }, { "epoch": 0.4899667469326912, "grad_norm": 0.25083219597405193, "learning_rate": 0.0005405299008076357, "loss": 2.6215, "step": 4273 }, { "epoch": 0.49008141268203187, "grad_norm": 0.2426135980413356, "learning_rate": 0.0005403448138208823, "loss": 2.4241, "step": 4274 }, { "epoch": 0.49019607843137253, "grad_norm": 0.23926376804241262, "learning_rate": 0.0005401597212693308, "loss": 2.4401, "step": 4275 }, { "epoch": 0.49031074418071324, "grad_norm": 0.23207931903684031, "learning_rate": 0.0005399746231785113, "loss": 2.4661, "step": 4276 }, { "epoch": 0.4904254099300539, "grad_norm": 0.27293068142181676, "learning_rate": 0.0005397895195739545, "loss": 2.383, "step": 4277 }, { "epoch": 0.49054007567939456, "grad_norm": 0.2537311051258877, "learning_rate": 0.0005396044104811921, "loss": 2.3339, "step": 4278 }, { "epoch": 0.4906547414287352, "grad_norm": 0.24632938681352637, "learning_rate": 0.000539419295925756, "loss": 2.3931, "step": 4279 }, { "epoch": 0.49076940717807593, "grad_norm": 0.22728189776320773, "learning_rate": 0.0005392341759331795, "loss": 2.521, "step": 4280 }, { "epoch": 0.4908840729274166, "grad_norm": 0.2674274932067508, "learning_rate": 0.0005390490505289962, "loss": 2.5069, "step": 4281 }, { "epoch": 0.49099873867675725, "grad_norm": 0.2563514920239442, "learning_rate": 0.0005388639197387409, "loss": 2.5872, "step": 4282 }, { "epoch": 0.4911134044260979, "grad_norm": 0.2502072809615758, "learning_rate": 0.0005386787835879486, "loss": 2.4592, "step": 4283 }, { "epoch": 0.49122807017543857, "grad_norm": 0.2765230776312634, "learning_rate": 0.0005384936421021553, "loss": 2.5667, "step": 4284 }, { "epoch": 0.4913427359247793, "grad_norm": 0.2200257965411627, "learning_rate": 0.0005383084953068981, "loss": 2.4286, "step": 4285 }, { "epoch": 0.49145740167411994, "grad_norm": 0.25761105066310963, "learning_rate": 0.0005381233432277139, "loss": 2.4997, "step": 4286 }, { "epoch": 0.4915720674234606, "grad_norm": 0.25414238638468956, "learning_rate": 0.0005379381858901413, "loss": 2.4525, "step": 4287 }, { "epoch": 0.49168673317280126, "grad_norm": 0.2511874865540193, "learning_rate": 0.0005377530233197191, "loss": 2.5808, "step": 4288 }, { "epoch": 0.491801398922142, "grad_norm": 0.20720540273143123, "learning_rate": 0.000537567855541987, "loss": 2.3213, "step": 4289 }, { "epoch": 0.49191606467148263, "grad_norm": 0.2432710960792619, "learning_rate": 0.0005373826825824854, "loss": 2.453, "step": 4290 }, { "epoch": 0.4920307304208233, "grad_norm": 0.2595504598432154, "learning_rate": 0.0005371975044667553, "loss": 2.4501, "step": 4291 }, { "epoch": 0.49214539617016395, "grad_norm": 0.20816762225845506, "learning_rate": 0.0005370123212203384, "loss": 2.5177, "step": 4292 }, { "epoch": 0.49226006191950467, "grad_norm": 0.22714299356906906, "learning_rate": 0.0005368271328687774, "loss": 2.515, "step": 4293 }, { "epoch": 0.4923747276688453, "grad_norm": 0.22210753118075688, "learning_rate": 0.0005366419394376154, "loss": 2.2833, "step": 4294 }, { "epoch": 0.492489393418186, "grad_norm": 0.25128822091647535, "learning_rate": 0.0005364567409523963, "loss": 2.4628, "step": 4295 }, { "epoch": 0.49260405916752664, "grad_norm": 0.2602511270742645, "learning_rate": 0.0005362715374386646, "loss": 2.4961, "step": 4296 }, { "epoch": 0.49271872491686736, "grad_norm": 0.24531584186410146, "learning_rate": 0.0005360863289219659, "loss": 2.5136, "step": 4297 }, { "epoch": 0.492833390666208, "grad_norm": 0.2613062806439916, "learning_rate": 0.000535901115427846, "loss": 2.4943, "step": 4298 }, { "epoch": 0.4929480564155487, "grad_norm": 0.26172691646817, "learning_rate": 0.0005357158969818514, "loss": 2.4155, "step": 4299 }, { "epoch": 0.49306272216488933, "grad_norm": 0.2331435011732198, "learning_rate": 0.0005355306736095298, "loss": 2.5172, "step": 4300 }, { "epoch": 0.49317738791423, "grad_norm": 0.2530558766681741, "learning_rate": 0.000535345445336429, "loss": 2.5656, "step": 4301 }, { "epoch": 0.4932920536635707, "grad_norm": 0.24530525666215877, "learning_rate": 0.0005351602121880976, "loss": 2.5785, "step": 4302 }, { "epoch": 0.49340671941291137, "grad_norm": 0.2655244967781446, "learning_rate": 0.0005349749741900853, "loss": 2.4208, "step": 4303 }, { "epoch": 0.493521385162252, "grad_norm": 0.2249974826944688, "learning_rate": 0.0005347897313679419, "loss": 2.4915, "step": 4304 }, { "epoch": 0.4936360509115927, "grad_norm": 0.29188190162051086, "learning_rate": 0.0005346044837472182, "loss": 2.5387, "step": 4305 }, { "epoch": 0.4937507166609334, "grad_norm": 0.24289980749343582, "learning_rate": 0.0005344192313534657, "loss": 2.484, "step": 4306 }, { "epoch": 0.49386538241027406, "grad_norm": 0.2249088135742346, "learning_rate": 0.0005342339742122363, "loss": 2.5179, "step": 4307 }, { "epoch": 0.4939800481596147, "grad_norm": 0.23733012950216503, "learning_rate": 0.0005340487123490826, "loss": 2.4911, "step": 4308 }, { "epoch": 0.4940947139089554, "grad_norm": 0.2340885346788916, "learning_rate": 0.0005338634457895582, "loss": 2.4287, "step": 4309 }, { "epoch": 0.4942093796582961, "grad_norm": 0.25535947570363887, "learning_rate": 0.000533678174559217, "loss": 2.4026, "step": 4310 }, { "epoch": 0.49432404540763675, "grad_norm": 0.24377776070634471, "learning_rate": 0.0005334928986836133, "loss": 2.426, "step": 4311 }, { "epoch": 0.4944387111569774, "grad_norm": 0.2581042010118711, "learning_rate": 0.000533307618188303, "loss": 2.3824, "step": 4312 }, { "epoch": 0.49455337690631807, "grad_norm": 0.2661210215017931, "learning_rate": 0.0005331223330988414, "loss": 2.4082, "step": 4313 }, { "epoch": 0.4946680426556588, "grad_norm": 0.23815720206257696, "learning_rate": 0.0005329370434407854, "loss": 2.5449, "step": 4314 }, { "epoch": 0.49478270840499944, "grad_norm": 0.24444229151729496, "learning_rate": 0.0005327517492396922, "loss": 2.5335, "step": 4315 }, { "epoch": 0.4948973741543401, "grad_norm": 0.23806688669118478, "learning_rate": 0.0005325664505211194, "loss": 2.551, "step": 4316 }, { "epoch": 0.49501203990368076, "grad_norm": 0.2504947960283395, "learning_rate": 0.0005323811473106256, "loss": 2.4301, "step": 4317 }, { "epoch": 0.4951267056530214, "grad_norm": 0.22534763705446684, "learning_rate": 0.0005321958396337696, "loss": 2.4637, "step": 4318 }, { "epoch": 0.49524137140236213, "grad_norm": 0.2618821137478578, "learning_rate": 0.0005320105275161115, "loss": 2.4316, "step": 4319 }, { "epoch": 0.4953560371517028, "grad_norm": 0.2558620385536041, "learning_rate": 0.0005318252109832111, "loss": 2.6486, "step": 4320 }, { "epoch": 0.49547070290104345, "grad_norm": 0.2521347483261181, "learning_rate": 0.0005316398900606296, "loss": 2.5281, "step": 4321 }, { "epoch": 0.4955853686503841, "grad_norm": 0.22726251697872826, "learning_rate": 0.0005314545647739283, "loss": 2.5423, "step": 4322 }, { "epoch": 0.4957000343997248, "grad_norm": 0.22684565656876438, "learning_rate": 0.0005312692351486693, "loss": 2.4285, "step": 4323 }, { "epoch": 0.4958147001490655, "grad_norm": 0.23929360932044078, "learning_rate": 0.0005310839012104155, "loss": 2.351, "step": 4324 }, { "epoch": 0.49592936589840614, "grad_norm": 0.23307035068356424, "learning_rate": 0.00053089856298473, "loss": 2.4303, "step": 4325 }, { "epoch": 0.4960440316477468, "grad_norm": 0.24511968533819883, "learning_rate": 0.0005307132204971768, "loss": 2.5271, "step": 4326 }, { "epoch": 0.4961586973970875, "grad_norm": 0.23896102011573656, "learning_rate": 0.00053052787377332, "loss": 2.3507, "step": 4327 }, { "epoch": 0.49627336314642817, "grad_norm": 0.2611983246520042, "learning_rate": 0.0005303425228387251, "loss": 2.4173, "step": 4328 }, { "epoch": 0.49638802889576883, "grad_norm": 0.26947685251238335, "learning_rate": 0.0005301571677189576, "loss": 2.5653, "step": 4329 }, { "epoch": 0.4965026946451095, "grad_norm": 0.2615342277467975, "learning_rate": 0.0005299718084395837, "loss": 2.4597, "step": 4330 }, { "epoch": 0.4966173603944502, "grad_norm": 0.26557108328446577, "learning_rate": 0.00052978644502617, "loss": 2.4971, "step": 4331 }, { "epoch": 0.49673202614379086, "grad_norm": 0.2198060610422783, "learning_rate": 0.0005296010775042841, "loss": 2.3134, "step": 4332 }, { "epoch": 0.4968466918931315, "grad_norm": 0.29288098044585603, "learning_rate": 0.0005294157058994936, "loss": 2.4945, "step": 4333 }, { "epoch": 0.4969613576424722, "grad_norm": 0.23740508081886666, "learning_rate": 0.0005292303302373674, "loss": 2.6261, "step": 4334 }, { "epoch": 0.49707602339181284, "grad_norm": 0.24642642766795028, "learning_rate": 0.0005290449505434744, "loss": 2.5387, "step": 4335 }, { "epoch": 0.49719068914115355, "grad_norm": 0.2432609178735972, "learning_rate": 0.0005288595668433839, "loss": 2.476, "step": 4336 }, { "epoch": 0.4973053548904942, "grad_norm": 0.22396146015071064, "learning_rate": 0.0005286741791626664, "loss": 2.4127, "step": 4337 }, { "epoch": 0.49742002063983487, "grad_norm": 0.2138523084117149, "learning_rate": 0.0005284887875268925, "loss": 2.378, "step": 4338 }, { "epoch": 0.49753468638917553, "grad_norm": 0.2290172302443223, "learning_rate": 0.0005283033919616331, "loss": 2.4073, "step": 4339 }, { "epoch": 0.49764935213851624, "grad_norm": 0.23100085948497584, "learning_rate": 0.0005281179924924608, "loss": 2.5252, "step": 4340 }, { "epoch": 0.4977640178878569, "grad_norm": 0.2792997905698276, "learning_rate": 0.000527932589144947, "loss": 2.4047, "step": 4341 }, { "epoch": 0.49787868363719756, "grad_norm": 0.23418684662042613, "learning_rate": 0.0005277471819446651, "loss": 2.4912, "step": 4342 }, { "epoch": 0.4979933493865382, "grad_norm": 0.2640122099667919, "learning_rate": 0.0005275617709171882, "loss": 2.5087, "step": 4343 }, { "epoch": 0.49810801513587893, "grad_norm": 0.2347998866606567, "learning_rate": 0.0005273763560880907, "loss": 2.4046, "step": 4344 }, { "epoch": 0.4982226808852196, "grad_norm": 0.2563839596862694, "learning_rate": 0.0005271909374829466, "loss": 2.5381, "step": 4345 }, { "epoch": 0.49833734663456025, "grad_norm": 0.23775486581077201, "learning_rate": 0.0005270055151273309, "loss": 2.4436, "step": 4346 }, { "epoch": 0.4984520123839009, "grad_norm": 0.2378905835063179, "learning_rate": 0.0005268200890468192, "loss": 2.3399, "step": 4347 }, { "epoch": 0.4985666781332416, "grad_norm": 0.23686736558516328, "learning_rate": 0.0005266346592669875, "loss": 2.4547, "step": 4348 }, { "epoch": 0.4986813438825823, "grad_norm": 0.29314887510902804, "learning_rate": 0.0005264492258134121, "loss": 2.483, "step": 4349 }, { "epoch": 0.49879600963192294, "grad_norm": 0.24834169641333712, "learning_rate": 0.0005262637887116703, "loss": 2.3696, "step": 4350 }, { "epoch": 0.4989106753812636, "grad_norm": 0.24855628444251068, "learning_rate": 0.0005260783479873396, "loss": 2.4848, "step": 4351 }, { "epoch": 0.49902534113060426, "grad_norm": 0.28044919871995994, "learning_rate": 0.0005258929036659976, "loss": 2.381, "step": 4352 }, { "epoch": 0.499140006879945, "grad_norm": 0.24000288919255766, "learning_rate": 0.0005257074557732232, "loss": 2.4622, "step": 4353 }, { "epoch": 0.49925467262928563, "grad_norm": 0.2402070588094161, "learning_rate": 0.0005255220043345956, "loss": 2.378, "step": 4354 }, { "epoch": 0.4993693383786263, "grad_norm": 0.24815694349582698, "learning_rate": 0.0005253365493756936, "loss": 2.4035, "step": 4355 }, { "epoch": 0.49948400412796695, "grad_norm": 0.23350687870001346, "learning_rate": 0.0005251510909220977, "loss": 2.4617, "step": 4356 }, { "epoch": 0.49959866987730767, "grad_norm": 0.25447443417865584, "learning_rate": 0.0005249656289993883, "loss": 2.5667, "step": 4357 }, { "epoch": 0.4997133356266483, "grad_norm": 0.22263325311493223, "learning_rate": 0.0005247801636331462, "loss": 2.4553, "step": 4358 }, { "epoch": 0.499828001375989, "grad_norm": 0.26593953278004306, "learning_rate": 0.000524594694848953, "loss": 2.5588, "step": 4359 }, { "epoch": 0.49994266712532964, "grad_norm": 0.24116554696319004, "learning_rate": 0.0005244092226723903, "loss": 2.5647, "step": 4360 }, { "epoch": 0.5000573328746704, "grad_norm": 0.2154257618620745, "learning_rate": 0.0005242237471290407, "loss": 2.5087, "step": 4361 }, { "epoch": 0.500171998624011, "grad_norm": 0.24405985841943548, "learning_rate": 0.0005240382682444868, "loss": 2.5069, "step": 4362 }, { "epoch": 0.5002866643733517, "grad_norm": 0.24672393776592733, "learning_rate": 0.0005238527860443122, "loss": 2.4768, "step": 4363 }, { "epoch": 0.5004013301226924, "grad_norm": 0.24480548735357616, "learning_rate": 0.0005236673005541003, "loss": 2.4839, "step": 4364 }, { "epoch": 0.500515995872033, "grad_norm": 0.23641511146009606, "learning_rate": 0.0005234818117994355, "loss": 2.4447, "step": 4365 }, { "epoch": 0.5006306616213737, "grad_norm": 0.23033054122489685, "learning_rate": 0.0005232963198059024, "loss": 2.4205, "step": 4366 }, { "epoch": 0.5007453273707144, "grad_norm": 0.23471865686219945, "learning_rate": 0.000523110824599086, "loss": 2.4934, "step": 4367 }, { "epoch": 0.500859993120055, "grad_norm": 0.22465373613646814, "learning_rate": 0.0005229253262045719, "loss": 2.377, "step": 4368 }, { "epoch": 0.5009746588693957, "grad_norm": 0.2559427369580352, "learning_rate": 0.000522739824647946, "loss": 2.3529, "step": 4369 }, { "epoch": 0.5010893246187363, "grad_norm": 0.2704878009102001, "learning_rate": 0.0005225543199547948, "loss": 2.4102, "step": 4370 }, { "epoch": 0.5012039903680771, "grad_norm": 0.2570878713750787, "learning_rate": 0.0005223688121507051, "loss": 2.6003, "step": 4371 }, { "epoch": 0.5013186561174178, "grad_norm": 0.24828334581651285, "learning_rate": 0.0005221833012612642, "loss": 2.3542, "step": 4372 }, { "epoch": 0.5014333218667584, "grad_norm": 0.24665577293376492, "learning_rate": 0.0005219977873120596, "loss": 2.5724, "step": 4373 }, { "epoch": 0.5015479876160991, "grad_norm": 0.24817184883593726, "learning_rate": 0.0005218122703286797, "loss": 2.5644, "step": 4374 }, { "epoch": 0.5016626533654397, "grad_norm": 0.26257739917337064, "learning_rate": 0.0005216267503367127, "loss": 2.2543, "step": 4375 }, { "epoch": 0.5017773191147804, "grad_norm": 0.26173233982638006, "learning_rate": 0.0005214412273617478, "loss": 2.4989, "step": 4376 }, { "epoch": 0.5018919848641211, "grad_norm": 0.22745211643805005, "learning_rate": 0.0005212557014293744, "loss": 2.2826, "step": 4377 }, { "epoch": 0.5020066506134617, "grad_norm": 0.26922479109104175, "learning_rate": 0.0005210701725651821, "loss": 2.489, "step": 4378 }, { "epoch": 0.5021213163628024, "grad_norm": 0.246191625954822, "learning_rate": 0.0005208846407947612, "loss": 2.4276, "step": 4379 }, { "epoch": 0.5022359821121432, "grad_norm": 0.22786411001936432, "learning_rate": 0.000520699106143702, "loss": 2.4541, "step": 4380 }, { "epoch": 0.5023506478614838, "grad_norm": 0.24946120802188787, "learning_rate": 0.0005205135686375958, "loss": 2.4557, "step": 4381 }, { "epoch": 0.5024653136108245, "grad_norm": 0.24385283179877318, "learning_rate": 0.0005203280283020338, "loss": 2.5944, "step": 4382 }, { "epoch": 0.5025799793601651, "grad_norm": 0.2508890244482735, "learning_rate": 0.0005201424851626078, "loss": 2.4113, "step": 4383 }, { "epoch": 0.5026946451095058, "grad_norm": 0.23851510829480885, "learning_rate": 0.00051995693924491, "loss": 2.5417, "step": 4384 }, { "epoch": 0.5028093108588465, "grad_norm": 0.25325972274514374, "learning_rate": 0.0005197713905745328, "loss": 2.5853, "step": 4385 }, { "epoch": 0.5029239766081871, "grad_norm": 0.26702654435678297, "learning_rate": 0.0005195858391770689, "loss": 2.6262, "step": 4386 }, { "epoch": 0.5030386423575278, "grad_norm": 0.23603443757491216, "learning_rate": 0.0005194002850781122, "loss": 2.6366, "step": 4387 }, { "epoch": 0.5031533081068684, "grad_norm": 0.23710005345337615, "learning_rate": 0.0005192147283032557, "loss": 2.4218, "step": 4388 }, { "epoch": 0.5032679738562091, "grad_norm": 0.25943668770551953, "learning_rate": 0.000519029168878094, "loss": 2.4113, "step": 4389 }, { "epoch": 0.5033826396055499, "grad_norm": 0.22831516495904508, "learning_rate": 0.000518843606828221, "loss": 2.503, "step": 4390 }, { "epoch": 0.5034973053548905, "grad_norm": 0.21658715154696095, "learning_rate": 0.0005186580421792315, "loss": 2.3754, "step": 4391 }, { "epoch": 0.5036119711042312, "grad_norm": 0.2547942756977791, "learning_rate": 0.0005184724749567209, "loss": 2.3781, "step": 4392 }, { "epoch": 0.5037266368535719, "grad_norm": 0.24651508559680219, "learning_rate": 0.0005182869051862844, "loss": 2.35, "step": 4393 }, { "epoch": 0.5038413026029125, "grad_norm": 0.2600624583993239, "learning_rate": 0.0005181013328935181, "loss": 2.4508, "step": 4394 }, { "epoch": 0.5039559683522532, "grad_norm": 0.2606676702325652, "learning_rate": 0.0005179157581040178, "loss": 2.6062, "step": 4395 }, { "epoch": 0.5040706341015938, "grad_norm": 0.2772535738480416, "learning_rate": 0.0005177301808433802, "loss": 2.512, "step": 4396 }, { "epoch": 0.5041852998509345, "grad_norm": 0.24186459735077867, "learning_rate": 0.0005175446011372022, "loss": 2.3872, "step": 4397 }, { "epoch": 0.5042999656002752, "grad_norm": 0.2558804130529151, "learning_rate": 0.0005173590190110808, "loss": 2.4312, "step": 4398 }, { "epoch": 0.5044146313496158, "grad_norm": 0.24112551217120248, "learning_rate": 0.0005171734344906136, "loss": 2.3829, "step": 4399 }, { "epoch": 0.5045292970989566, "grad_norm": 0.2415728109942904, "learning_rate": 0.0005169878476013986, "loss": 2.3493, "step": 4400 }, { "epoch": 0.5046439628482973, "grad_norm": 0.246232785858063, "learning_rate": 0.0005168022583690339, "loss": 2.5167, "step": 4401 }, { "epoch": 0.5047586285976379, "grad_norm": 0.26085823164738764, "learning_rate": 0.0005166166668191176, "loss": 2.4536, "step": 4402 }, { "epoch": 0.5048732943469786, "grad_norm": 0.22342981142176194, "learning_rate": 0.0005164310729772492, "loss": 2.4591, "step": 4403 }, { "epoch": 0.5049879600963192, "grad_norm": 0.2376840349946497, "learning_rate": 0.0005162454768690274, "loss": 2.4508, "step": 4404 }, { "epoch": 0.5051026258456599, "grad_norm": 0.25095568889004893, "learning_rate": 0.0005160598785200515, "loss": 2.5652, "step": 4405 }, { "epoch": 0.5052172915950006, "grad_norm": 0.20828091036306182, "learning_rate": 0.0005158742779559217, "loss": 2.3737, "step": 4406 }, { "epoch": 0.5053319573443412, "grad_norm": 0.2440456995035093, "learning_rate": 0.0005156886752022379, "loss": 2.4314, "step": 4407 }, { "epoch": 0.5054466230936819, "grad_norm": 0.23002148398894387, "learning_rate": 0.0005155030702846002, "loss": 2.2855, "step": 4408 }, { "epoch": 0.5055612888430225, "grad_norm": 0.24588306016190825, "learning_rate": 0.0005153174632286097, "loss": 2.4195, "step": 4409 }, { "epoch": 0.5056759545923633, "grad_norm": 0.22560849928427396, "learning_rate": 0.000515131854059867, "loss": 2.4525, "step": 4410 }, { "epoch": 0.505790620341704, "grad_norm": 0.21367860373916914, "learning_rate": 0.0005149462428039734, "loss": 2.4725, "step": 4411 }, { "epoch": 0.5059052860910446, "grad_norm": 0.23780094065203372, "learning_rate": 0.0005147606294865307, "loss": 2.3423, "step": 4412 }, { "epoch": 0.5060199518403853, "grad_norm": 0.2282695418919905, "learning_rate": 0.0005145750141331405, "loss": 2.4634, "step": 4413 }, { "epoch": 0.506134617589726, "grad_norm": 0.26447227568405257, "learning_rate": 0.0005143893967694047, "loss": 2.3778, "step": 4414 }, { "epoch": 0.5062492833390666, "grad_norm": 0.24072813448579483, "learning_rate": 0.0005142037774209262, "loss": 2.4324, "step": 4415 }, { "epoch": 0.5063639490884073, "grad_norm": 0.2540354720690453, "learning_rate": 0.0005140181561133072, "loss": 2.5933, "step": 4416 }, { "epoch": 0.5064786148377479, "grad_norm": 0.2427770695912424, "learning_rate": 0.0005138325328721507, "loss": 2.4489, "step": 4417 }, { "epoch": 0.5065932805870886, "grad_norm": 0.22934548472730665, "learning_rate": 0.00051364690772306, "loss": 2.5259, "step": 4418 }, { "epoch": 0.5067079463364293, "grad_norm": 0.2549041071744384, "learning_rate": 0.0005134612806916387, "loss": 2.4151, "step": 4419 }, { "epoch": 0.50682261208577, "grad_norm": 0.2659909621739589, "learning_rate": 0.0005132756518034901, "loss": 2.4278, "step": 4420 }, { "epoch": 0.5069372778351107, "grad_norm": 0.23455112783123422, "learning_rate": 0.0005130900210842185, "loss": 2.509, "step": 4421 }, { "epoch": 0.5070519435844513, "grad_norm": 0.2664959095586685, "learning_rate": 0.000512904388559428, "loss": 2.54, "step": 4422 }, { "epoch": 0.507166609333792, "grad_norm": 0.2547826840573038, "learning_rate": 0.000512718754254723, "loss": 2.5208, "step": 4423 }, { "epoch": 0.5072812750831327, "grad_norm": 0.2441892015538959, "learning_rate": 0.0005125331181957083, "loss": 2.2279, "step": 4424 }, { "epoch": 0.5073959408324733, "grad_norm": 0.21239476219175457, "learning_rate": 0.000512347480407989, "loss": 2.4623, "step": 4425 }, { "epoch": 0.507510606581814, "grad_norm": 0.2745595924956919, "learning_rate": 0.00051216184091717, "loss": 2.4629, "step": 4426 }, { "epoch": 0.5076252723311547, "grad_norm": 0.22795539508952897, "learning_rate": 0.0005119761997488569, "loss": 2.3366, "step": 4427 }, { "epoch": 0.5077399380804953, "grad_norm": 0.2593414101848472, "learning_rate": 0.0005117905569286552, "loss": 2.438, "step": 4428 }, { "epoch": 0.507854603829836, "grad_norm": 0.2336111029956309, "learning_rate": 0.0005116049124821713, "loss": 2.4158, "step": 4429 }, { "epoch": 0.5079692695791767, "grad_norm": 0.24040709718790787, "learning_rate": 0.0005114192664350107, "loss": 2.505, "step": 4430 }, { "epoch": 0.5080839353285174, "grad_norm": 0.26136585074887025, "learning_rate": 0.0005112336188127798, "loss": 2.438, "step": 4431 }, { "epoch": 0.5081986010778581, "grad_norm": 0.26678426255053617, "learning_rate": 0.0005110479696410857, "loss": 2.4769, "step": 4432 }, { "epoch": 0.5083132668271987, "grad_norm": 0.24903560889938164, "learning_rate": 0.0005108623189455343, "loss": 2.5166, "step": 4433 }, { "epoch": 0.5084279325765394, "grad_norm": 0.25636261895291235, "learning_rate": 0.0005106766667517335, "loss": 2.3231, "step": 4434 }, { "epoch": 0.5085425983258801, "grad_norm": 0.261907056277633, "learning_rate": 0.0005104910130852899, "loss": 2.5382, "step": 4435 }, { "epoch": 0.5086572640752207, "grad_norm": 0.23130714531705523, "learning_rate": 0.0005103053579718109, "loss": 2.4024, "step": 4436 }, { "epoch": 0.5087719298245614, "grad_norm": 0.25426433593529624, "learning_rate": 0.0005101197014369043, "loss": 2.5182, "step": 4437 }, { "epoch": 0.508886595573902, "grad_norm": 0.2305563415556242, "learning_rate": 0.0005099340435061778, "loss": 2.364, "step": 4438 }, { "epoch": 0.5090012613232427, "grad_norm": 0.2676155459551082, "learning_rate": 0.0005097483842052393, "loss": 2.4152, "step": 4439 }, { "epoch": 0.5091159270725835, "grad_norm": 0.23044463646881486, "learning_rate": 0.000509562723559697, "loss": 2.48, "step": 4440 }, { "epoch": 0.5092305928219241, "grad_norm": 0.24413787166101544, "learning_rate": 0.0005093770615951594, "loss": 2.3764, "step": 4441 }, { "epoch": 0.5093452585712648, "grad_norm": 0.24532958513089856, "learning_rate": 0.0005091913983372347, "loss": 2.4351, "step": 4442 }, { "epoch": 0.5094599243206054, "grad_norm": 0.22810286833879911, "learning_rate": 0.0005090057338115319, "loss": 2.4722, "step": 4443 }, { "epoch": 0.5095745900699461, "grad_norm": 0.24140086287401355, "learning_rate": 0.0005088200680436599, "loss": 2.4526, "step": 4444 }, { "epoch": 0.5096892558192868, "grad_norm": 0.25546042318538686, "learning_rate": 0.0005086344010592275, "loss": 2.5041, "step": 4445 }, { "epoch": 0.5098039215686274, "grad_norm": 0.28499420258810065, "learning_rate": 0.0005084487328838441, "loss": 2.5198, "step": 4446 }, { "epoch": 0.5099185873179681, "grad_norm": 0.23295272539280099, "learning_rate": 0.0005082630635431191, "loss": 2.3797, "step": 4447 }, { "epoch": 0.5100332530673088, "grad_norm": 0.2585233925785146, "learning_rate": 0.0005080773930626619, "loss": 2.4864, "step": 4448 }, { "epoch": 0.5101479188166494, "grad_norm": 0.241014494544708, "learning_rate": 0.0005078917214680822, "loss": 2.577, "step": 4449 }, { "epoch": 0.5102625845659902, "grad_norm": 0.2636168725975801, "learning_rate": 0.0005077060487849903, "loss": 2.5293, "step": 4450 }, { "epoch": 0.5103772503153308, "grad_norm": 0.2552041124364305, "learning_rate": 0.0005075203750389956, "loss": 2.5916, "step": 4451 }, { "epoch": 0.5104919160646715, "grad_norm": 0.24462926706054902, "learning_rate": 0.0005073347002557085, "loss": 2.4131, "step": 4452 }, { "epoch": 0.5106065818140122, "grad_norm": 0.24262747945724658, "learning_rate": 0.0005071490244607395, "loss": 2.3664, "step": 4453 }, { "epoch": 0.5107212475633528, "grad_norm": 0.26114618047415733, "learning_rate": 0.0005069633476796986, "loss": 2.4137, "step": 4454 }, { "epoch": 0.5108359133126935, "grad_norm": 0.2492033942841191, "learning_rate": 0.0005067776699381969, "loss": 2.5549, "step": 4455 }, { "epoch": 0.5109505790620341, "grad_norm": 0.24389153957071577, "learning_rate": 0.0005065919912618446, "loss": 2.4683, "step": 4456 }, { "epoch": 0.5110652448113748, "grad_norm": 0.23813897541708795, "learning_rate": 0.0005064063116762529, "loss": 2.3981, "step": 4457 }, { "epoch": 0.5111799105607155, "grad_norm": 0.2341094797223545, "learning_rate": 0.0005062206312070323, "loss": 2.4831, "step": 4458 }, { "epoch": 0.5112945763100561, "grad_norm": 0.2618214948187654, "learning_rate": 0.0005060349498797945, "loss": 2.4423, "step": 4459 }, { "epoch": 0.5114092420593969, "grad_norm": 0.23977210626257703, "learning_rate": 0.0005058492677201505, "loss": 2.3942, "step": 4460 }, { "epoch": 0.5115239078087376, "grad_norm": 0.25745009131596475, "learning_rate": 0.0005056635847537112, "loss": 2.464, "step": 4461 }, { "epoch": 0.5116385735580782, "grad_norm": 0.24561943932029004, "learning_rate": 0.0005054779010060886, "loss": 2.4204, "step": 4462 }, { "epoch": 0.5117532393074189, "grad_norm": 0.2417337496842403, "learning_rate": 0.0005052922165028939, "loss": 2.5649, "step": 4463 }, { "epoch": 0.5118679050567595, "grad_norm": 0.24649837892767223, "learning_rate": 0.0005051065312697387, "loss": 2.331, "step": 4464 }, { "epoch": 0.5119825708061002, "grad_norm": 0.236471136853237, "learning_rate": 0.0005049208453322352, "loss": 2.451, "step": 4465 }, { "epoch": 0.5120972365554409, "grad_norm": 0.2644533385796049, "learning_rate": 0.0005047351587159945, "loss": 2.3906, "step": 4466 }, { "epoch": 0.5122119023047815, "grad_norm": 0.24228618654172104, "learning_rate": 0.000504549471446629, "loss": 2.4267, "step": 4467 }, { "epoch": 0.5123265680541222, "grad_norm": 0.24852514379482032, "learning_rate": 0.0005043637835497507, "loss": 2.54, "step": 4468 }, { "epoch": 0.512441233803463, "grad_norm": 0.22740614956590777, "learning_rate": 0.0005041780950509716, "loss": 2.5298, "step": 4469 }, { "epoch": 0.5125558995528036, "grad_norm": 0.2529305558200204, "learning_rate": 0.000503992405975904, "loss": 2.4577, "step": 4470 }, { "epoch": 0.5126705653021443, "grad_norm": 0.26806820702665496, "learning_rate": 0.00050380671635016, "loss": 2.3439, "step": 4471 }, { "epoch": 0.5127852310514849, "grad_norm": 0.2420338252787076, "learning_rate": 0.0005036210261993523, "loss": 2.2924, "step": 4472 }, { "epoch": 0.5128998968008256, "grad_norm": 0.2539610864806883, "learning_rate": 0.0005034353355490927, "loss": 2.4823, "step": 4473 }, { "epoch": 0.5130145625501663, "grad_norm": 0.27065266869163895, "learning_rate": 0.0005032496444249943, "loss": 2.5463, "step": 4474 }, { "epoch": 0.5131292282995069, "grad_norm": 0.2608645867523156, "learning_rate": 0.0005030639528526693, "loss": 2.414, "step": 4475 }, { "epoch": 0.5132438940488476, "grad_norm": 0.23714210659653825, "learning_rate": 0.0005028782608577304, "loss": 2.4351, "step": 4476 }, { "epoch": 0.5133585597981882, "grad_norm": 0.2662937404700151, "learning_rate": 0.0005026925684657902, "loss": 2.4784, "step": 4477 }, { "epoch": 0.5134732255475289, "grad_norm": 0.24344217044384162, "learning_rate": 0.0005025068757024616, "loss": 2.4705, "step": 4478 }, { "epoch": 0.5135878912968697, "grad_norm": 0.23555082809672234, "learning_rate": 0.0005023211825933572, "loss": 2.4809, "step": 4479 }, { "epoch": 0.5137025570462103, "grad_norm": 0.2347572306100918, "learning_rate": 0.00050213548916409, "loss": 2.4747, "step": 4480 }, { "epoch": 0.513817222795551, "grad_norm": 0.27004970693695995, "learning_rate": 0.0005019497954402728, "loss": 2.4364, "step": 4481 }, { "epoch": 0.5139318885448917, "grad_norm": 0.2235431060459389, "learning_rate": 0.0005017641014475184, "loss": 2.4231, "step": 4482 }, { "epoch": 0.5140465542942323, "grad_norm": 0.26006520453402177, "learning_rate": 0.0005015784072114397, "loss": 2.3703, "step": 4483 }, { "epoch": 0.514161220043573, "grad_norm": 0.28524316064969185, "learning_rate": 0.0005013927127576501, "loss": 2.5153, "step": 4484 }, { "epoch": 0.5142758857929136, "grad_norm": 0.2549869709717568, "learning_rate": 0.0005012070181117621, "loss": 2.5243, "step": 4485 }, { "epoch": 0.5143905515422543, "grad_norm": 0.2646476906263981, "learning_rate": 0.0005010213232993891, "loss": 2.4464, "step": 4486 }, { "epoch": 0.514505217291595, "grad_norm": 0.30204712882569773, "learning_rate": 0.000500835628346144, "loss": 2.4255, "step": 4487 }, { "epoch": 0.5146198830409356, "grad_norm": 0.2623138365463618, "learning_rate": 0.00050064993327764, "loss": 2.3483, "step": 4488 }, { "epoch": 0.5147345487902764, "grad_norm": 0.24885624094290695, "learning_rate": 0.0005004642381194899, "loss": 2.4325, "step": 4489 }, { "epoch": 0.514849214539617, "grad_norm": 0.25972020461232, "learning_rate": 0.0005002785428973071, "loss": 2.4662, "step": 4490 }, { "epoch": 0.5149638802889577, "grad_norm": 0.26984783925429645, "learning_rate": 0.0005000928476367046, "loss": 2.3857, "step": 4491 }, { "epoch": 0.5150785460382984, "grad_norm": 0.22819276381571366, "learning_rate": 0.0004999071523632954, "loss": 2.4421, "step": 4492 }, { "epoch": 0.515193211787639, "grad_norm": 0.24383476682286662, "learning_rate": 0.000499721457102693, "loss": 2.5803, "step": 4493 }, { "epoch": 0.5153078775369797, "grad_norm": 0.2609559036463876, "learning_rate": 0.0004995357618805102, "loss": 2.505, "step": 4494 }, { "epoch": 0.5154225432863204, "grad_norm": 0.26360640862036133, "learning_rate": 0.0004993500667223601, "loss": 2.4429, "step": 4495 }, { "epoch": 0.515537209035661, "grad_norm": 0.23544066023381935, "learning_rate": 0.0004991643716538561, "loss": 2.3813, "step": 4496 }, { "epoch": 0.5156518747850017, "grad_norm": 0.2231454455377136, "learning_rate": 0.0004989786767006109, "loss": 2.3387, "step": 4497 }, { "epoch": 0.5157665405343423, "grad_norm": 0.24232797548502388, "learning_rate": 0.0004987929818882379, "loss": 2.4783, "step": 4498 }, { "epoch": 0.5158812062836831, "grad_norm": 0.2373161126301491, "learning_rate": 0.0004986072872423499, "loss": 2.447, "step": 4499 }, { "epoch": 0.5159958720330238, "grad_norm": 0.23269584834054896, "learning_rate": 0.0004984215927885603, "loss": 2.4573, "step": 4500 }, { "epoch": 0.5161105377823644, "grad_norm": 0.2603155372143211, "learning_rate": 0.0004982358985524817, "loss": 2.5778, "step": 4501 }, { "epoch": 0.5162252035317051, "grad_norm": 0.22523328111648705, "learning_rate": 0.0004980502045597272, "loss": 2.421, "step": 4502 }, { "epoch": 0.5163398692810458, "grad_norm": 0.2650253885494348, "learning_rate": 0.00049786451083591, "loss": 2.5238, "step": 4503 }, { "epoch": 0.5164545350303864, "grad_norm": 0.25352827961005536, "learning_rate": 0.0004976788174066428, "loss": 2.4048, "step": 4504 }, { "epoch": 0.5165692007797271, "grad_norm": 0.27475361726706465, "learning_rate": 0.0004974931242975385, "loss": 2.3863, "step": 4505 }, { "epoch": 0.5166838665290677, "grad_norm": 0.27467705445347046, "learning_rate": 0.0004973074315342101, "loss": 2.4789, "step": 4506 }, { "epoch": 0.5167985322784084, "grad_norm": 0.2454587108006379, "learning_rate": 0.0004971217391422697, "loss": 2.4899, "step": 4507 }, { "epoch": 0.5169131980277492, "grad_norm": 0.24434211971417166, "learning_rate": 0.0004969360471473309, "loss": 2.4738, "step": 4508 }, { "epoch": 0.5170278637770898, "grad_norm": 0.2534261479393847, "learning_rate": 0.0004967503555750059, "loss": 2.4561, "step": 4509 }, { "epoch": 0.5171425295264305, "grad_norm": 0.24998579604378462, "learning_rate": 0.0004965646644509074, "loss": 2.4068, "step": 4510 }, { "epoch": 0.5172571952757711, "grad_norm": 0.24228415885608864, "learning_rate": 0.000496378973800648, "loss": 2.4844, "step": 4511 }, { "epoch": 0.5173718610251118, "grad_norm": 0.2723702188999539, "learning_rate": 0.00049619328364984, "loss": 2.3476, "step": 4512 }, { "epoch": 0.5174865267744525, "grad_norm": 0.23970791015710996, "learning_rate": 0.0004960075940240961, "loss": 2.4233, "step": 4513 }, { "epoch": 0.5176011925237931, "grad_norm": 0.22936563410271157, "learning_rate": 0.0004958219049490284, "loss": 2.4077, "step": 4514 }, { "epoch": 0.5177158582731338, "grad_norm": 0.24015611618425878, "learning_rate": 0.0004956362164502495, "loss": 2.5292, "step": 4515 }, { "epoch": 0.5178305240224745, "grad_norm": 0.22340294335669278, "learning_rate": 0.0004954505285533711, "loss": 2.4698, "step": 4516 }, { "epoch": 0.5179451897718151, "grad_norm": 0.25484182375316994, "learning_rate": 0.0004952648412840056, "loss": 2.4738, "step": 4517 }, { "epoch": 0.5180598555211559, "grad_norm": 0.24063549968310993, "learning_rate": 0.000495079154667765, "loss": 2.4566, "step": 4518 }, { "epoch": 0.5181745212704965, "grad_norm": 0.2241071631857714, "learning_rate": 0.0004948934687302614, "loss": 2.418, "step": 4519 }, { "epoch": 0.5182891870198372, "grad_norm": 0.21984351432177796, "learning_rate": 0.0004947077834971061, "loss": 2.4392, "step": 4520 }, { "epoch": 0.5184038527691779, "grad_norm": 0.2298693333756246, "learning_rate": 0.0004945220989939115, "loss": 2.5411, "step": 4521 }, { "epoch": 0.5185185185185185, "grad_norm": 0.23693366586345208, "learning_rate": 0.0004943364152462887, "loss": 2.4331, "step": 4522 }, { "epoch": 0.5186331842678592, "grad_norm": 0.2414301085737016, "learning_rate": 0.0004941507322798496, "loss": 2.5474, "step": 4523 }, { "epoch": 0.5187478500171998, "grad_norm": 0.2555304546357254, "learning_rate": 0.0004939650501202055, "loss": 2.5069, "step": 4524 }, { "epoch": 0.5188625157665405, "grad_norm": 0.24726059620171312, "learning_rate": 0.0004937793687929677, "loss": 2.6135, "step": 4525 }, { "epoch": 0.5189771815158812, "grad_norm": 0.24491133895399492, "learning_rate": 0.0004935936883237474, "loss": 2.3745, "step": 4526 }, { "epoch": 0.5190918472652218, "grad_norm": 0.2651742163404057, "learning_rate": 0.0004934080087381555, "loss": 2.4878, "step": 4527 }, { "epoch": 0.5192065130145626, "grad_norm": 0.2681261190960247, "learning_rate": 0.0004932223300618033, "loss": 2.5889, "step": 4528 }, { "epoch": 0.5193211787639033, "grad_norm": 0.255648140766007, "learning_rate": 0.0004930366523203014, "loss": 2.5325, "step": 4529 }, { "epoch": 0.5194358445132439, "grad_norm": 0.2572738217569529, "learning_rate": 0.0004928509755392607, "loss": 2.4573, "step": 4530 }, { "epoch": 0.5195505102625846, "grad_norm": 0.24624602540049972, "learning_rate": 0.0004926652997442917, "loss": 2.3551, "step": 4531 }, { "epoch": 0.5196651760119252, "grad_norm": 0.24281949106866502, "learning_rate": 0.0004924796249610045, "loss": 2.3984, "step": 4532 }, { "epoch": 0.5197798417612659, "grad_norm": 0.23781891944197128, "learning_rate": 0.0004922939512150098, "loss": 2.3681, "step": 4533 }, { "epoch": 0.5198945075106066, "grad_norm": 0.26569260499395503, "learning_rate": 0.0004921082785319178, "loss": 2.4346, "step": 4534 }, { "epoch": 0.5200091732599472, "grad_norm": 0.2510981904280255, "learning_rate": 0.0004919226069373382, "loss": 2.384, "step": 4535 }, { "epoch": 0.5201238390092879, "grad_norm": 0.23339062801106183, "learning_rate": 0.0004917369364568811, "loss": 2.3784, "step": 4536 }, { "epoch": 0.5202385047586287, "grad_norm": 0.2490741506929699, "learning_rate": 0.0004915512671161559, "loss": 2.4568, "step": 4537 }, { "epoch": 0.5203531705079693, "grad_norm": 0.23328243174384444, "learning_rate": 0.0004913655989407726, "loss": 2.4152, "step": 4538 }, { "epoch": 0.52046783625731, "grad_norm": 0.23473355129308804, "learning_rate": 0.0004911799319563402, "loss": 2.3595, "step": 4539 }, { "epoch": 0.5205825020066506, "grad_norm": 0.2405559171009568, "learning_rate": 0.0004909942661884681, "loss": 2.6628, "step": 4540 }, { "epoch": 0.5206971677559913, "grad_norm": 0.24631988397109497, "learning_rate": 0.0004908086016627653, "loss": 2.5678, "step": 4541 }, { "epoch": 0.520811833505332, "grad_norm": 0.23938338252289218, "learning_rate": 0.0004906229384048407, "loss": 2.382, "step": 4542 }, { "epoch": 0.5209264992546726, "grad_norm": 0.277794012808303, "learning_rate": 0.000490437276440303, "loss": 2.4525, "step": 4543 }, { "epoch": 0.5210411650040133, "grad_norm": 0.2774102729627632, "learning_rate": 0.0004902516157947608, "loss": 2.5504, "step": 4544 }, { "epoch": 0.5211558307533539, "grad_norm": 0.24894939743213101, "learning_rate": 0.0004900659564938223, "loss": 2.4834, "step": 4545 }, { "epoch": 0.5212704965026946, "grad_norm": 0.25074321101055985, "learning_rate": 0.000489880298563096, "loss": 2.4665, "step": 4546 }, { "epoch": 0.5213851622520354, "grad_norm": 0.2571422675666554, "learning_rate": 0.0004896946420281891, "loss": 2.449, "step": 4547 }, { "epoch": 0.521499828001376, "grad_norm": 0.2562677574020682, "learning_rate": 0.0004895089869147102, "loss": 2.5506, "step": 4548 }, { "epoch": 0.5216144937507167, "grad_norm": 0.25596615454633465, "learning_rate": 0.0004893233332482666, "loss": 2.5829, "step": 4549 }, { "epoch": 0.5217291595000574, "grad_norm": 0.28954517699155546, "learning_rate": 0.0004891376810544657, "loss": 2.5289, "step": 4550 }, { "epoch": 0.521843825249398, "grad_norm": 0.2737888164371717, "learning_rate": 0.0004889520303589146, "loss": 2.4603, "step": 4551 }, { "epoch": 0.5219584909987387, "grad_norm": 0.24085932037693727, "learning_rate": 0.0004887663811872201, "loss": 2.4901, "step": 4552 }, { "epoch": 0.5220731567480793, "grad_norm": 0.271399142591164, "learning_rate": 0.0004885807335649894, "loss": 2.5674, "step": 4553 }, { "epoch": 0.52218782249742, "grad_norm": 0.22342220561876536, "learning_rate": 0.0004883950875178288, "loss": 2.4757, "step": 4554 }, { "epoch": 0.5223024882467607, "grad_norm": 0.2850591445903701, "learning_rate": 0.0004882094430713447, "loss": 2.4897, "step": 4555 }, { "epoch": 0.5224171539961013, "grad_norm": 0.24639292969715829, "learning_rate": 0.00048802380025114326, "loss": 2.4004, "step": 4556 }, { "epoch": 0.522531819745442, "grad_norm": 0.2370433655434203, "learning_rate": 0.00048783815908283, "loss": 2.5665, "step": 4557 }, { "epoch": 0.5226464854947827, "grad_norm": 0.23498357577667503, "learning_rate": 0.00048765251959201106, "loss": 2.6257, "step": 4558 }, { "epoch": 0.5227611512441234, "grad_norm": 0.2358855001733295, "learning_rate": 0.00048746688180429173, "loss": 2.4232, "step": 4559 }, { "epoch": 0.5228758169934641, "grad_norm": 0.22815090782560007, "learning_rate": 0.00048728124574527705, "loss": 2.5505, "step": 4560 }, { "epoch": 0.5229904827428047, "grad_norm": 0.25662112315324975, "learning_rate": 0.00048709561144057216, "loss": 2.4265, "step": 4561 }, { "epoch": 0.5231051484921454, "grad_norm": 0.22889284817229524, "learning_rate": 0.00048690997891578155, "loss": 2.4584, "step": 4562 }, { "epoch": 0.5232198142414861, "grad_norm": 0.23901989624374834, "learning_rate": 0.00048672434819651, "loss": 2.4387, "step": 4563 }, { "epoch": 0.5233344799908267, "grad_norm": 0.22800068464520587, "learning_rate": 0.0004865387193083615, "loss": 2.2004, "step": 4564 }, { "epoch": 0.5234491457401674, "grad_norm": 0.25736316087959404, "learning_rate": 0.0004863530922769401, "loss": 2.5902, "step": 4565 }, { "epoch": 0.523563811489508, "grad_norm": 0.25884220968363986, "learning_rate": 0.0004861674671278494, "loss": 2.428, "step": 4566 }, { "epoch": 0.5236784772388487, "grad_norm": 0.24226088930103568, "learning_rate": 0.0004859818438866928, "loss": 2.4733, "step": 4567 }, { "epoch": 0.5237931429881895, "grad_norm": 0.23995797685399362, "learning_rate": 0.0004857962225790739, "loss": 2.2392, "step": 4568 }, { "epoch": 0.5239078087375301, "grad_norm": 0.2674956781571016, "learning_rate": 0.00048561060323059534, "loss": 2.4999, "step": 4569 }, { "epoch": 0.5240224744868708, "grad_norm": 0.30466370962613226, "learning_rate": 0.0004854249858668597, "loss": 2.4818, "step": 4570 }, { "epoch": 0.5241371402362115, "grad_norm": 0.2815696982899956, "learning_rate": 0.0004852393705134695, "loss": 2.5881, "step": 4571 }, { "epoch": 0.5242518059855521, "grad_norm": 0.23526110362789063, "learning_rate": 0.0004850537571960266, "loss": 2.4455, "step": 4572 }, { "epoch": 0.5243664717348928, "grad_norm": 0.2195908363006188, "learning_rate": 0.00048486814594013303, "loss": 2.4739, "step": 4573 }, { "epoch": 0.5244811374842334, "grad_norm": 0.24518754064148213, "learning_rate": 0.0004846825367713904, "loss": 2.5003, "step": 4574 }, { "epoch": 0.5245958032335741, "grad_norm": 0.22563465792622345, "learning_rate": 0.0004844969297153999, "loss": 2.4513, "step": 4575 }, { "epoch": 0.5247104689829148, "grad_norm": 0.22005795205127815, "learning_rate": 0.00048431132479776227, "loss": 2.4664, "step": 4576 }, { "epoch": 0.5248251347322554, "grad_norm": 0.2510371583678022, "learning_rate": 0.00048412572204407825, "loss": 2.5061, "step": 4577 }, { "epoch": 0.5249398004815962, "grad_norm": 0.2275422833707922, "learning_rate": 0.00048394012147994853, "loss": 2.3534, "step": 4578 }, { "epoch": 0.5250544662309368, "grad_norm": 0.25397444999470525, "learning_rate": 0.0004837545231309728, "loss": 2.562, "step": 4579 }, { "epoch": 0.5251691319802775, "grad_norm": 0.22283918138111716, "learning_rate": 0.000483568927022751, "loss": 2.4377, "step": 4580 }, { "epoch": 0.5252837977296182, "grad_norm": 0.25149505028307895, "learning_rate": 0.00048338333318088256, "loss": 2.6205, "step": 4581 }, { "epoch": 0.5253984634789588, "grad_norm": 0.24888288620746105, "learning_rate": 0.0004831977416309663, "loss": 2.5287, "step": 4582 }, { "epoch": 0.5255131292282995, "grad_norm": 0.23885559322688754, "learning_rate": 0.00048301215239860145, "loss": 2.4114, "step": 4583 }, { "epoch": 0.5256277949776402, "grad_norm": 0.23613567646283098, "learning_rate": 0.0004828265655093865, "loss": 2.32, "step": 4584 }, { "epoch": 0.5257424607269808, "grad_norm": 0.269064674806033, "learning_rate": 0.0004826409809889193, "loss": 2.5452, "step": 4585 }, { "epoch": 0.5258571264763215, "grad_norm": 0.2583331961057893, "learning_rate": 0.000482455398862798, "loss": 2.438, "step": 4586 }, { "epoch": 0.5259717922256621, "grad_norm": 0.2609596836655053, "learning_rate": 0.00048226981915661983, "loss": 2.4499, "step": 4587 }, { "epoch": 0.5260864579750029, "grad_norm": 0.25685355186972914, "learning_rate": 0.00048208424189598233, "loss": 2.3919, "step": 4588 }, { "epoch": 0.5262011237243436, "grad_norm": 0.2584225980272253, "learning_rate": 0.000481898667106482, "loss": 2.461, "step": 4589 }, { "epoch": 0.5263157894736842, "grad_norm": 0.24486046482854876, "learning_rate": 0.0004817130948137157, "loss": 2.505, "step": 4590 }, { "epoch": 0.5264304552230249, "grad_norm": 0.22703577614670903, "learning_rate": 0.0004815275250432792, "loss": 2.5083, "step": 4591 }, { "epoch": 0.5265451209723655, "grad_norm": 0.23781930715467273, "learning_rate": 0.0004813419578207684, "loss": 2.4713, "step": 4592 }, { "epoch": 0.5266597867217062, "grad_norm": 0.26518777475859917, "learning_rate": 0.0004811563931717791, "loss": 2.4179, "step": 4593 }, { "epoch": 0.5267744524710469, "grad_norm": 0.27447614968338707, "learning_rate": 0.0004809708311219062, "loss": 2.4915, "step": 4594 }, { "epoch": 0.5268891182203875, "grad_norm": 0.25602481102122937, "learning_rate": 0.00048078527169674427, "loss": 2.4177, "step": 4595 }, { "epoch": 0.5270037839697282, "grad_norm": 0.26484903981347924, "learning_rate": 0.000480599714921888, "loss": 2.5462, "step": 4596 }, { "epoch": 0.527118449719069, "grad_norm": 0.23971267398623883, "learning_rate": 0.000480414160822931, "loss": 2.5884, "step": 4597 }, { "epoch": 0.5272331154684096, "grad_norm": 0.2862140274867441, "learning_rate": 0.0004802286094254673, "loss": 2.4251, "step": 4598 }, { "epoch": 0.5273477812177503, "grad_norm": 0.23638962581935394, "learning_rate": 0.0004800430607550901, "loss": 2.3957, "step": 4599 }, { "epoch": 0.5274624469670909, "grad_norm": 0.2520730205747328, "learning_rate": 0.0004798575148373923, "loss": 2.4731, "step": 4600 }, { "epoch": 0.5275771127164316, "grad_norm": 0.26626703650233674, "learning_rate": 0.0004796719716979663, "loss": 2.4797, "step": 4601 }, { "epoch": 0.5276917784657723, "grad_norm": 0.24101367822785053, "learning_rate": 0.00047948643136240423, "loss": 2.35, "step": 4602 }, { "epoch": 0.5278064442151129, "grad_norm": 0.2669703688442846, "learning_rate": 0.00047930089385629806, "loss": 2.5013, "step": 4603 }, { "epoch": 0.5279211099644536, "grad_norm": 0.24304588593133702, "learning_rate": 0.00047911535920523897, "loss": 2.413, "step": 4604 }, { "epoch": 0.5280357757137943, "grad_norm": 0.26784235250295524, "learning_rate": 0.00047892982743481805, "loss": 2.493, "step": 4605 }, { "epoch": 0.528150441463135, "grad_norm": 0.26398050116651967, "learning_rate": 0.0004787442985706259, "loss": 2.4383, "step": 4606 }, { "epoch": 0.5282651072124757, "grad_norm": 0.2436180626760646, "learning_rate": 0.00047855877263825223, "loss": 2.5951, "step": 4607 }, { "epoch": 0.5283797729618163, "grad_norm": 0.25481246291855764, "learning_rate": 0.0004783732496632873, "loss": 2.52, "step": 4608 }, { "epoch": 0.528494438711157, "grad_norm": 0.22740958414390638, "learning_rate": 0.0004781877296713205, "loss": 2.5384, "step": 4609 }, { "epoch": 0.5286091044604977, "grad_norm": 0.24362780151568794, "learning_rate": 0.00047800221268794055, "loss": 2.4873, "step": 4610 }, { "epoch": 0.5287237702098383, "grad_norm": 0.23237228129130977, "learning_rate": 0.0004778166987387361, "loss": 2.5083, "step": 4611 }, { "epoch": 0.528838435959179, "grad_norm": 0.259790858297508, "learning_rate": 0.00047763118784929494, "loss": 2.5232, "step": 4612 }, { "epoch": 0.5289531017085196, "grad_norm": 0.25936212780231427, "learning_rate": 0.00047744568004520527, "loss": 2.4998, "step": 4613 }, { "epoch": 0.5290677674578603, "grad_norm": 0.24899769400572558, "learning_rate": 0.000477260175352054, "loss": 2.5072, "step": 4614 }, { "epoch": 0.529182433207201, "grad_norm": 0.22906021723849657, "learning_rate": 0.0004770746737954282, "loss": 2.4629, "step": 4615 }, { "epoch": 0.5292970989565416, "grad_norm": 0.23034056511374007, "learning_rate": 0.0004768891754009141, "loss": 2.4814, "step": 4616 }, { "epoch": 0.5294117647058824, "grad_norm": 0.24303195738732264, "learning_rate": 0.00047670368019409753, "loss": 2.4799, "step": 4617 }, { "epoch": 0.5295264304552231, "grad_norm": 0.2980009056936441, "learning_rate": 0.00047651818820056445, "loss": 2.4894, "step": 4618 }, { "epoch": 0.5296410962045637, "grad_norm": 0.24330631067832084, "learning_rate": 0.00047633269944589974, "loss": 2.5226, "step": 4619 }, { "epoch": 0.5297557619539044, "grad_norm": 0.25841065191585433, "learning_rate": 0.00047614721395568786, "loss": 2.5071, "step": 4620 }, { "epoch": 0.529870427703245, "grad_norm": 0.2911977868436059, "learning_rate": 0.0004759617317555133, "loss": 2.4488, "step": 4621 }, { "epoch": 0.5299850934525857, "grad_norm": 0.25110400322852106, "learning_rate": 0.0004757762528709594, "loss": 2.4924, "step": 4622 }, { "epoch": 0.5300997592019264, "grad_norm": 0.25753205652346495, "learning_rate": 0.0004755907773276097, "loss": 2.4445, "step": 4623 }, { "epoch": 0.530214424951267, "grad_norm": 0.2539497173241698, "learning_rate": 0.0004754053051510472, "loss": 2.4681, "step": 4624 }, { "epoch": 0.5303290907006077, "grad_norm": 0.25438807158825577, "learning_rate": 0.00047521983636685395, "loss": 2.5243, "step": 4625 }, { "epoch": 0.5304437564499485, "grad_norm": 0.24364219609792495, "learning_rate": 0.00047503437100061184, "loss": 2.2718, "step": 4626 }, { "epoch": 0.5305584221992891, "grad_norm": 0.2671552847826482, "learning_rate": 0.00047484890907790225, "loss": 2.4255, "step": 4627 }, { "epoch": 0.5306730879486298, "grad_norm": 0.2480661028907191, "learning_rate": 0.0004746634506243065, "loss": 2.4395, "step": 4628 }, { "epoch": 0.5307877536979704, "grad_norm": 0.2534163928921084, "learning_rate": 0.0004744779956654046, "loss": 2.4363, "step": 4629 }, { "epoch": 0.5309024194473111, "grad_norm": 0.24981080646140344, "learning_rate": 0.00047429254422677684, "loss": 2.3862, "step": 4630 }, { "epoch": 0.5310170851966518, "grad_norm": 0.27459672072737173, "learning_rate": 0.00047410709633400255, "loss": 2.3953, "step": 4631 }, { "epoch": 0.5311317509459924, "grad_norm": 0.24851827610296273, "learning_rate": 0.0004739216520126606, "loss": 2.5256, "step": 4632 }, { "epoch": 0.5312464166953331, "grad_norm": 0.26508677962431915, "learning_rate": 0.0004737362112883297, "loss": 2.4789, "step": 4633 }, { "epoch": 0.5313610824446737, "grad_norm": 0.2691679682286342, "learning_rate": 0.00047355077418658793, "loss": 2.3447, "step": 4634 }, { "epoch": 0.5314757481940144, "grad_norm": 0.2416074009930367, "learning_rate": 0.0004733653407330126, "loss": 2.4471, "step": 4635 }, { "epoch": 0.5315904139433552, "grad_norm": 0.25135398914157114, "learning_rate": 0.00047317991095318095, "loss": 2.4862, "step": 4636 }, { "epoch": 0.5317050796926958, "grad_norm": 0.24549141594066873, "learning_rate": 0.0004729944848726691, "loss": 2.4782, "step": 4637 }, { "epoch": 0.5318197454420365, "grad_norm": 0.26046236298012326, "learning_rate": 0.0004728090625170535, "loss": 2.3666, "step": 4638 }, { "epoch": 0.5319344111913772, "grad_norm": 0.2605265027565567, "learning_rate": 0.0004726236439119094, "loss": 2.494, "step": 4639 }, { "epoch": 0.5320490769407178, "grad_norm": 0.24291911666018393, "learning_rate": 0.0004724382290828118, "loss": 2.4268, "step": 4640 }, { "epoch": 0.5321637426900585, "grad_norm": 0.2588075201949955, "learning_rate": 0.00047225281805533505, "loss": 2.4908, "step": 4641 }, { "epoch": 0.5322784084393991, "grad_norm": 0.24160792531525202, "learning_rate": 0.000472067410855053, "loss": 2.361, "step": 4642 }, { "epoch": 0.5323930741887398, "grad_norm": 0.23306628718340477, "learning_rate": 0.0004718820075075394, "loss": 2.3776, "step": 4643 }, { "epoch": 0.5325077399380805, "grad_norm": 0.2386203373139482, "learning_rate": 0.0004716966080383669, "loss": 2.3073, "step": 4644 }, { "epoch": 0.5326224056874211, "grad_norm": 0.2510296549276965, "learning_rate": 0.00047151121247310766, "loss": 2.3556, "step": 4645 }, { "epoch": 0.5327370714367619, "grad_norm": 0.25314235060277473, "learning_rate": 0.0004713258208373338, "loss": 2.4995, "step": 4646 }, { "epoch": 0.5328517371861025, "grad_norm": 0.2547876646634815, "learning_rate": 0.0004711404331566161, "loss": 2.3924, "step": 4647 }, { "epoch": 0.5329664029354432, "grad_norm": 0.25854629192530687, "learning_rate": 0.0004709550494565257, "loss": 2.4152, "step": 4648 }, { "epoch": 0.5330810686847839, "grad_norm": 0.2949806201028242, "learning_rate": 0.0004707696697626326, "loss": 2.4248, "step": 4649 }, { "epoch": 0.5331957344341245, "grad_norm": 0.25274410886762294, "learning_rate": 0.00047058429410050645, "loss": 2.3734, "step": 4650 }, { "epoch": 0.5333104001834652, "grad_norm": 0.2573701840427738, "learning_rate": 0.00047039892249571606, "loss": 2.5985, "step": 4651 }, { "epoch": 0.5334250659328059, "grad_norm": 0.23772344840999685, "learning_rate": 0.00047021355497383, "loss": 2.4164, "step": 4652 }, { "epoch": 0.5335397316821465, "grad_norm": 0.23416193221939205, "learning_rate": 0.00047002819156041645, "loss": 2.4223, "step": 4653 }, { "epoch": 0.5336543974314872, "grad_norm": 0.24830676232138943, "learning_rate": 0.0004698428322810424, "loss": 2.2769, "step": 4654 }, { "epoch": 0.5337690631808278, "grad_norm": 0.2355305122469701, "learning_rate": 0.000469657477161275, "loss": 2.4686, "step": 4655 }, { "epoch": 0.5338837289301686, "grad_norm": 0.25859001001994364, "learning_rate": 0.00046947212622668017, "loss": 2.487, "step": 4656 }, { "epoch": 0.5339983946795093, "grad_norm": 0.24884032352594732, "learning_rate": 0.00046928677950282337, "loss": 2.4796, "step": 4657 }, { "epoch": 0.5341130604288499, "grad_norm": 0.26646456108513084, "learning_rate": 0.0004691014370152701, "loss": 2.4837, "step": 4658 }, { "epoch": 0.5342277261781906, "grad_norm": 0.24023186587407666, "learning_rate": 0.00046891609878958463, "loss": 2.4584, "step": 4659 }, { "epoch": 0.5343423919275313, "grad_norm": 0.25585799558015776, "learning_rate": 0.00046873076485133075, "loss": 2.4911, "step": 4660 }, { "epoch": 0.5344570576768719, "grad_norm": 0.24853595574876833, "learning_rate": 0.0004685454352260719, "loss": 2.4854, "step": 4661 }, { "epoch": 0.5345717234262126, "grad_norm": 0.2411603503433762, "learning_rate": 0.0004683601099393705, "loss": 2.4212, "step": 4662 }, { "epoch": 0.5346863891755532, "grad_norm": 0.2247806080422925, "learning_rate": 0.000468174789016789, "loss": 2.3824, "step": 4663 }, { "epoch": 0.5348010549248939, "grad_norm": 0.2847055992246278, "learning_rate": 0.00046798947248388864, "loss": 2.4849, "step": 4664 }, { "epoch": 0.5349157206742347, "grad_norm": 0.252731978857553, "learning_rate": 0.0004678041603662305, "loss": 2.4092, "step": 4665 }, { "epoch": 0.5350303864235753, "grad_norm": 0.23539401991227432, "learning_rate": 0.00046761885268937456, "loss": 2.3521, "step": 4666 }, { "epoch": 0.535145052172916, "grad_norm": 0.2578942169881139, "learning_rate": 0.00046743354947888054, "loss": 2.4373, "step": 4667 }, { "epoch": 0.5352597179222566, "grad_norm": 0.2243803182369507, "learning_rate": 0.00046724825076030783, "loss": 2.4011, "step": 4668 }, { "epoch": 0.5353743836715973, "grad_norm": 0.28175014377806196, "learning_rate": 0.00046706295655921467, "loss": 2.59, "step": 4669 }, { "epoch": 0.535489049420938, "grad_norm": 0.26552379246847013, "learning_rate": 0.00046687766690115863, "loss": 2.3609, "step": 4670 }, { "epoch": 0.5356037151702786, "grad_norm": 0.28233356636688, "learning_rate": 0.00046669238181169727, "loss": 2.486, "step": 4671 }, { "epoch": 0.5357183809196193, "grad_norm": 0.24823490688650404, "learning_rate": 0.0004665071013163866, "loss": 2.4991, "step": 4672 }, { "epoch": 0.53583304666896, "grad_norm": 0.2570614725073706, "learning_rate": 0.0004663218254407831, "loss": 2.4058, "step": 4673 }, { "epoch": 0.5359477124183006, "grad_norm": 0.25187742805139063, "learning_rate": 0.0004661365542104419, "loss": 2.4474, "step": 4674 }, { "epoch": 0.5360623781676414, "grad_norm": 0.2745169691112177, "learning_rate": 0.0004659512876509175, "loss": 2.437, "step": 4675 }, { "epoch": 0.536177043916982, "grad_norm": 0.25768263770745614, "learning_rate": 0.00046576602578776385, "loss": 2.4054, "step": 4676 }, { "epoch": 0.5362917096663227, "grad_norm": 0.23882942207105035, "learning_rate": 0.00046558076864653433, "loss": 2.3685, "step": 4677 }, { "epoch": 0.5364063754156634, "grad_norm": 0.25091348128675345, "learning_rate": 0.0004653955162527818, "loss": 2.5181, "step": 4678 }, { "epoch": 0.536521041165004, "grad_norm": 0.25672793862912213, "learning_rate": 0.00046521026863205814, "loss": 2.5716, "step": 4679 }, { "epoch": 0.5366357069143447, "grad_norm": 0.21882746115729437, "learning_rate": 0.00046502502580991485, "loss": 2.458, "step": 4680 }, { "epoch": 0.5367503726636853, "grad_norm": 0.24961594745227172, "learning_rate": 0.0004648397878119026, "loss": 2.451, "step": 4681 }, { "epoch": 0.536865038413026, "grad_norm": 0.2311012025906882, "learning_rate": 0.0004646545546635712, "loss": 2.5847, "step": 4682 }, { "epoch": 0.5369797041623667, "grad_norm": 0.23745200022888582, "learning_rate": 0.0004644693263904703, "loss": 2.5883, "step": 4683 }, { "epoch": 0.5370943699117073, "grad_norm": 0.21413096050419006, "learning_rate": 0.0004642841030181487, "loss": 2.4016, "step": 4684 }, { "epoch": 0.537209035661048, "grad_norm": 0.2162963520194744, "learning_rate": 0.0004640988845721541, "loss": 2.3175, "step": 4685 }, { "epoch": 0.5373237014103888, "grad_norm": 0.22919499165847834, "learning_rate": 0.0004639136710780342, "loss": 2.3872, "step": 4686 }, { "epoch": 0.5374383671597294, "grad_norm": 0.2623444224014999, "learning_rate": 0.00046372846256133527, "loss": 2.4355, "step": 4687 }, { "epoch": 0.5375530329090701, "grad_norm": 0.24698989586847614, "learning_rate": 0.0004635432590476038, "loss": 2.3981, "step": 4688 }, { "epoch": 0.5376676986584107, "grad_norm": 0.24739849099741557, "learning_rate": 0.0004633580605623847, "loss": 2.361, "step": 4689 }, { "epoch": 0.5377823644077514, "grad_norm": 0.24418558000822083, "learning_rate": 0.00046317286713122276, "loss": 2.3681, "step": 4690 }, { "epoch": 0.5378970301570921, "grad_norm": 0.2676032618808143, "learning_rate": 0.0004629876787796617, "loss": 2.3675, "step": 4691 }, { "epoch": 0.5380116959064327, "grad_norm": 0.2647078977128044, "learning_rate": 0.0004628024955332447, "loss": 2.4199, "step": 4692 }, { "epoch": 0.5381263616557734, "grad_norm": 0.2660521501892031, "learning_rate": 0.00046261731741751466, "loss": 2.5033, "step": 4693 }, { "epoch": 0.5382410274051141, "grad_norm": 0.2818862046270914, "learning_rate": 0.0004624321444580131, "loss": 2.4484, "step": 4694 }, { "epoch": 0.5383556931544548, "grad_norm": 0.26238181049862547, "learning_rate": 0.00046224697668028095, "loss": 2.5447, "step": 4695 }, { "epoch": 0.5384703589037955, "grad_norm": 0.2594144196868433, "learning_rate": 0.0004620618141098589, "loss": 2.4122, "step": 4696 }, { "epoch": 0.5385850246531361, "grad_norm": 0.24504914839549968, "learning_rate": 0.00046187665677228614, "loss": 2.4263, "step": 4697 }, { "epoch": 0.5386996904024768, "grad_norm": 0.2612884973621422, "learning_rate": 0.000461691504693102, "loss": 2.431, "step": 4698 }, { "epoch": 0.5388143561518175, "grad_norm": 0.26756207763517587, "learning_rate": 0.00046150635789784475, "loss": 2.3662, "step": 4699 }, { "epoch": 0.5389290219011581, "grad_norm": 0.23674732847557112, "learning_rate": 0.0004613212164120516, "loss": 2.5086, "step": 4700 }, { "epoch": 0.5390436876504988, "grad_norm": 0.2778632206003852, "learning_rate": 0.00046113608026125924, "loss": 2.4822, "step": 4701 }, { "epoch": 0.5391583533998394, "grad_norm": 0.23541433221025979, "learning_rate": 0.00046095094947100373, "loss": 2.5762, "step": 4702 }, { "epoch": 0.5392730191491801, "grad_norm": 0.227255928058899, "learning_rate": 0.0004607658240668206, "loss": 2.4016, "step": 4703 }, { "epoch": 0.5393876848985208, "grad_norm": 0.23534395298788063, "learning_rate": 0.0004605807040742441, "loss": 2.3824, "step": 4704 }, { "epoch": 0.5395023506478615, "grad_norm": 0.23285254134888833, "learning_rate": 0.0004603955895188081, "loss": 2.5895, "step": 4705 }, { "epoch": 0.5396170163972022, "grad_norm": 0.25154874708570435, "learning_rate": 0.00046021048042604564, "loss": 2.5103, "step": 4706 }, { "epoch": 0.5397316821465429, "grad_norm": 0.22828270997892236, "learning_rate": 0.0004600253768214887, "loss": 2.4274, "step": 4707 }, { "epoch": 0.5398463478958835, "grad_norm": 0.24654957134655425, "learning_rate": 0.0004598402787306692, "loss": 2.3992, "step": 4708 }, { "epoch": 0.5399610136452242, "grad_norm": 0.24316804112084903, "learning_rate": 0.00045965518617911786, "loss": 2.3994, "step": 4709 }, { "epoch": 0.5400756793945648, "grad_norm": 0.25898987785664634, "learning_rate": 0.00045947009919236435, "loss": 2.4992, "step": 4710 }, { "epoch": 0.5401903451439055, "grad_norm": 0.24206928430250374, "learning_rate": 0.0004592850177959383, "loss": 2.5685, "step": 4711 }, { "epoch": 0.5403050108932462, "grad_norm": 0.2398433788231947, "learning_rate": 0.00045909994201536765, "loss": 2.3811, "step": 4712 }, { "epoch": 0.5404196766425868, "grad_norm": 0.24803970957932098, "learning_rate": 0.00045891487187618056, "loss": 2.4484, "step": 4713 }, { "epoch": 0.5405343423919275, "grad_norm": 0.2894925089842307, "learning_rate": 0.0004587298074039037, "loss": 2.4669, "step": 4714 }, { "epoch": 0.5406490081412682, "grad_norm": 0.25899000261603516, "learning_rate": 0.00045854474862406337, "loss": 2.5324, "step": 4715 }, { "epoch": 0.5407636738906089, "grad_norm": 0.2416786979782219, "learning_rate": 0.0004583596955621846, "loss": 2.4619, "step": 4716 }, { "epoch": 0.5408783396399496, "grad_norm": 0.25436326823287436, "learning_rate": 0.000458174648243792, "loss": 2.5906, "step": 4717 }, { "epoch": 0.5409930053892902, "grad_norm": 0.25829377279017485, "learning_rate": 0.00045798960669440956, "loss": 2.4339, "step": 4718 }, { "epoch": 0.5411076711386309, "grad_norm": 0.24624154910659335, "learning_rate": 0.0004578045709395602, "loss": 2.3872, "step": 4719 }, { "epoch": 0.5412223368879716, "grad_norm": 0.2570962744073782, "learning_rate": 0.00045761954100476576, "loss": 2.553, "step": 4720 }, { "epoch": 0.5413370026373122, "grad_norm": 0.2356959898736343, "learning_rate": 0.00045743451691554796, "loss": 2.4378, "step": 4721 }, { "epoch": 0.5414516683866529, "grad_norm": 0.255177822487017, "learning_rate": 0.0004572494986974269, "loss": 2.4139, "step": 4722 }, { "epoch": 0.5415663341359935, "grad_norm": 0.23103776261627548, "learning_rate": 0.00045706448637592275, "loss": 2.4921, "step": 4723 }, { "epoch": 0.5416809998853342, "grad_norm": 0.2660353078328097, "learning_rate": 0.00045687947997655437, "loss": 2.4574, "step": 4724 }, { "epoch": 0.541795665634675, "grad_norm": 0.27301421696313993, "learning_rate": 0.00045669447952483976, "loss": 2.4992, "step": 4725 }, { "epoch": 0.5419103313840156, "grad_norm": 0.2580928453299047, "learning_rate": 0.0004565094850462961, "loss": 2.485, "step": 4726 }, { "epoch": 0.5420249971333563, "grad_norm": 0.2482778535546584, "learning_rate": 0.0004563244965664399, "loss": 2.4401, "step": 4727 }, { "epoch": 0.542139662882697, "grad_norm": 0.2768493469736648, "learning_rate": 0.00045613951411078714, "loss": 2.4039, "step": 4728 }, { "epoch": 0.5422543286320376, "grad_norm": 0.258993958289199, "learning_rate": 0.00045595453770485217, "loss": 2.4362, "step": 4729 }, { "epoch": 0.5423689943813783, "grad_norm": 0.2558474728423535, "learning_rate": 0.00045576956737414926, "loss": 2.418, "step": 4730 }, { "epoch": 0.5424836601307189, "grad_norm": 0.2586065491994409, "learning_rate": 0.00045558460314419147, "loss": 2.417, "step": 4731 }, { "epoch": 0.5425983258800596, "grad_norm": 0.23259075145338187, "learning_rate": 0.00045539964504049074, "loss": 2.3852, "step": 4732 }, { "epoch": 0.5427129916294003, "grad_norm": 0.23663969363633175, "learning_rate": 0.000455214693088559, "loss": 2.3989, "step": 4733 }, { "epoch": 0.542827657378741, "grad_norm": 0.24490929969464034, "learning_rate": 0.00045502974731390674, "loss": 2.3706, "step": 4734 }, { "epoch": 0.5429423231280817, "grad_norm": 0.2765656448914929, "learning_rate": 0.0004548448077420435, "loss": 2.4253, "step": 4735 }, { "epoch": 0.5430569888774223, "grad_norm": 0.27089406484540574, "learning_rate": 0.0004546598743984784, "loss": 2.4422, "step": 4736 }, { "epoch": 0.543171654626763, "grad_norm": 0.27995017804393396, "learning_rate": 0.00045447494730871917, "loss": 2.4289, "step": 4737 }, { "epoch": 0.5432863203761037, "grad_norm": 0.2567006542265347, "learning_rate": 0.00045429002649827345, "loss": 2.5729, "step": 4738 }, { "epoch": 0.5434009861254443, "grad_norm": 0.23974558643316918, "learning_rate": 0.00045410511199264704, "loss": 2.5268, "step": 4739 }, { "epoch": 0.543515651874785, "grad_norm": 0.2591546779227263, "learning_rate": 0.0004539202038173458, "loss": 2.4038, "step": 4740 }, { "epoch": 0.5436303176241257, "grad_norm": 0.2304362580459881, "learning_rate": 0.00045373530199787395, "loss": 2.3838, "step": 4741 }, { "epoch": 0.5437449833734663, "grad_norm": 0.2519189436407284, "learning_rate": 0.00045355040655973514, "loss": 2.4927, "step": 4742 }, { "epoch": 0.543859649122807, "grad_norm": 0.2418096188692322, "learning_rate": 0.0004533655175284326, "loss": 2.4134, "step": 4743 }, { "epoch": 0.5439743148721476, "grad_norm": 0.23585058714850035, "learning_rate": 0.00045318063492946796, "loss": 2.5538, "step": 4744 }, { "epoch": 0.5440889806214884, "grad_norm": 0.28307757995039556, "learning_rate": 0.0004529957587883422, "loss": 2.3316, "step": 4745 }, { "epoch": 0.5442036463708291, "grad_norm": 0.22610493230476106, "learning_rate": 0.00045281088913055564, "loss": 2.4689, "step": 4746 }, { "epoch": 0.5443183121201697, "grad_norm": 0.238522643635277, "learning_rate": 0.00045262602598160714, "loss": 2.408, "step": 4747 }, { "epoch": 0.5444329778695104, "grad_norm": 0.2281824899491178, "learning_rate": 0.00045244116936699546, "loss": 2.5617, "step": 4748 }, { "epoch": 0.544547643618851, "grad_norm": 0.22842107452931468, "learning_rate": 0.00045225631931221805, "loss": 2.4238, "step": 4749 }, { "epoch": 0.5446623093681917, "grad_norm": 0.23344664141633537, "learning_rate": 0.0004520714758427713, "loss": 2.608, "step": 4750 }, { "epoch": 0.5447769751175324, "grad_norm": 0.25368918330674783, "learning_rate": 0.0004518866389841507, "loss": 2.4255, "step": 4751 }, { "epoch": 0.544891640866873, "grad_norm": 0.2742339288505932, "learning_rate": 0.00045170180876185115, "loss": 2.4858, "step": 4752 }, { "epoch": 0.5450063066162137, "grad_norm": 0.24339335141298596, "learning_rate": 0.0004515169852013665, "loss": 2.4808, "step": 4753 }, { "epoch": 0.5451209723655545, "grad_norm": 0.2465756717617404, "learning_rate": 0.0004513321683281896, "loss": 2.4352, "step": 4754 }, { "epoch": 0.5452356381148951, "grad_norm": 0.2748192596502225, "learning_rate": 0.0004511473581678124, "loss": 2.3027, "step": 4755 }, { "epoch": 0.5453503038642358, "grad_norm": 0.23259066108431095, "learning_rate": 0.00045096255474572593, "loss": 2.4893, "step": 4756 }, { "epoch": 0.5454649696135764, "grad_norm": 0.25961116490693203, "learning_rate": 0.0004507777580874201, "loss": 2.4449, "step": 4757 }, { "epoch": 0.5455796353629171, "grad_norm": 0.25020733335143996, "learning_rate": 0.00045059296821838436, "loss": 2.4756, "step": 4758 }, { "epoch": 0.5456943011122578, "grad_norm": 0.2497629700019959, "learning_rate": 0.00045040818516410704, "loss": 2.4609, "step": 4759 }, { "epoch": 0.5458089668615984, "grad_norm": 0.2538232845707576, "learning_rate": 0.0004502234089500751, "loss": 2.5547, "step": 4760 }, { "epoch": 0.5459236326109391, "grad_norm": 0.23587438113223083, "learning_rate": 0.00045003863960177523, "loss": 2.4537, "step": 4761 }, { "epoch": 0.5460382983602798, "grad_norm": 0.24172843415629053, "learning_rate": 0.0004498538771446924, "loss": 2.5872, "step": 4762 }, { "epoch": 0.5461529641096204, "grad_norm": 0.23480705121712697, "learning_rate": 0.0004496691216043116, "loss": 2.3763, "step": 4763 }, { "epoch": 0.5462676298589612, "grad_norm": 0.2431695497729364, "learning_rate": 0.000449484373006116, "loss": 2.4154, "step": 4764 }, { "epoch": 0.5463822956083018, "grad_norm": 0.24406782807309, "learning_rate": 0.00044929963137558827, "loss": 2.4362, "step": 4765 }, { "epoch": 0.5464969613576425, "grad_norm": 0.2757843448513498, "learning_rate": 0.00044911489673820986, "loss": 2.466, "step": 4766 }, { "epoch": 0.5466116271069832, "grad_norm": 0.26371548926167176, "learning_rate": 0.00044893016911946144, "loss": 2.3774, "step": 4767 }, { "epoch": 0.5467262928563238, "grad_norm": 0.245669806763138, "learning_rate": 0.0004487454485448229, "loss": 2.4632, "step": 4768 }, { "epoch": 0.5468409586056645, "grad_norm": 0.27469767429938763, "learning_rate": 0.0004485607350397727, "loss": 2.4439, "step": 4769 }, { "epoch": 0.5469556243550051, "grad_norm": 0.2694434982423201, "learning_rate": 0.0004483760286297885, "loss": 2.6263, "step": 4770 }, { "epoch": 0.5470702901043458, "grad_norm": 0.24308423521874656, "learning_rate": 0.00044819132934034717, "loss": 2.3985, "step": 4771 }, { "epoch": 0.5471849558536865, "grad_norm": 0.21250812520089382, "learning_rate": 0.0004480066371969243, "loss": 2.4223, "step": 4772 }, { "epoch": 0.5472996216030271, "grad_norm": 0.23765566721697534, "learning_rate": 0.00044782195222499465, "loss": 2.4876, "step": 4773 }, { "epoch": 0.5474142873523679, "grad_norm": 0.2542241916576706, "learning_rate": 0.00044763727445003233, "loss": 2.4335, "step": 4774 }, { "epoch": 0.5475289531017086, "grad_norm": 0.24101558982538643, "learning_rate": 0.00044745260389750984, "loss": 2.4833, "step": 4775 }, { "epoch": 0.5476436188510492, "grad_norm": 0.2588164553192838, "learning_rate": 0.0004472679405928989, "loss": 2.4122, "step": 4776 }, { "epoch": 0.5477582846003899, "grad_norm": 0.2523151241094458, "learning_rate": 0.00044708328456167037, "loss": 2.4763, "step": 4777 }, { "epoch": 0.5478729503497305, "grad_norm": 0.2497135084125547, "learning_rate": 0.00044689863582929415, "loss": 2.4549, "step": 4778 }, { "epoch": 0.5479876160990712, "grad_norm": 0.2296860926274922, "learning_rate": 0.0004467139944212388, "loss": 2.4242, "step": 4779 }, { "epoch": 0.5481022818484119, "grad_norm": 0.23376805407061785, "learning_rate": 0.00044652936036297235, "loss": 2.5057, "step": 4780 }, { "epoch": 0.5482169475977525, "grad_norm": 0.2319956687226215, "learning_rate": 0.0004463447336799614, "loss": 2.4607, "step": 4781 }, { "epoch": 0.5483316133470932, "grad_norm": 0.22590822955338075, "learning_rate": 0.0004461601143976715, "loss": 2.4667, "step": 4782 }, { "epoch": 0.5484462790964338, "grad_norm": 0.238851169957579, "learning_rate": 0.00044597550254156753, "loss": 2.4381, "step": 4783 }, { "epoch": 0.5485609448457746, "grad_norm": 0.25303266738789826, "learning_rate": 0.0004457908981371134, "loss": 2.6781, "step": 4784 }, { "epoch": 0.5486756105951153, "grad_norm": 0.24889484938798123, "learning_rate": 0.0004456063012097714, "loss": 2.2309, "step": 4785 }, { "epoch": 0.5487902763444559, "grad_norm": 0.23691959164838405, "learning_rate": 0.0004454217117850034, "loss": 2.4626, "step": 4786 }, { "epoch": 0.5489049420937966, "grad_norm": 0.22827573812750077, "learning_rate": 0.0004452371298882697, "loss": 2.4969, "step": 4787 }, { "epoch": 0.5490196078431373, "grad_norm": 0.21800070714855738, "learning_rate": 0.0004450525555450303, "loss": 2.3879, "step": 4788 }, { "epoch": 0.5491342735924779, "grad_norm": 0.22492028767103975, "learning_rate": 0.00044486798878074337, "loss": 2.442, "step": 4789 }, { "epoch": 0.5492489393418186, "grad_norm": 0.2620256696018022, "learning_rate": 0.0004446834296208665, "loss": 2.4345, "step": 4790 }, { "epoch": 0.5493636050911592, "grad_norm": 0.28449624626526093, "learning_rate": 0.00044449887809085603, "loss": 2.5797, "step": 4791 }, { "epoch": 0.5494782708404999, "grad_norm": 0.2552071158199875, "learning_rate": 0.0004443143342161673, "loss": 2.3689, "step": 4792 }, { "epoch": 0.5495929365898407, "grad_norm": 0.26099397472656904, "learning_rate": 0.0004441297980222549, "loss": 2.4704, "step": 4793 }, { "epoch": 0.5497076023391813, "grad_norm": 0.2533246002464952, "learning_rate": 0.00044394526953457186, "loss": 2.3271, "step": 4794 }, { "epoch": 0.549822268088522, "grad_norm": 0.2540510707069286, "learning_rate": 0.00044376074877857026, "loss": 2.4818, "step": 4795 }, { "epoch": 0.5499369338378627, "grad_norm": 0.25624462145109506, "learning_rate": 0.00044357623577970153, "loss": 2.533, "step": 4796 }, { "epoch": 0.5500515995872033, "grad_norm": 0.28852682268350405, "learning_rate": 0.0004433917305634153, "loss": 2.5023, "step": 4797 }, { "epoch": 0.550166265336544, "grad_norm": 0.2721323457078935, "learning_rate": 0.0004432072331551608, "loss": 2.5043, "step": 4798 }, { "epoch": 0.5502809310858846, "grad_norm": 0.25130469887505796, "learning_rate": 0.00044302274358038607, "loss": 2.4418, "step": 4799 }, { "epoch": 0.5503955968352253, "grad_norm": 0.24052669587058323, "learning_rate": 0.00044283826186453784, "loss": 2.4507, "step": 4800 }, { "epoch": 0.550510262584566, "grad_norm": 0.2417444454044004, "learning_rate": 0.00044265378803306165, "loss": 2.502, "step": 4801 }, { "epoch": 0.5506249283339066, "grad_norm": 0.26511687754838104, "learning_rate": 0.0004424693221114022, "loss": 2.7135, "step": 4802 }, { "epoch": 0.5507395940832474, "grad_norm": 0.25608699706345367, "learning_rate": 0.00044228486412500325, "loss": 2.4244, "step": 4803 }, { "epoch": 0.550854259832588, "grad_norm": 0.2349821935992536, "learning_rate": 0.0004421004140993071, "loss": 2.3313, "step": 4804 }, { "epoch": 0.5509689255819287, "grad_norm": 0.24163751408062434, "learning_rate": 0.00044191597205975525, "loss": 2.2961, "step": 4805 }, { "epoch": 0.5510835913312694, "grad_norm": 0.22484079336824905, "learning_rate": 0.0004417315380317879, "loss": 2.4006, "step": 4806 }, { "epoch": 0.55119825708061, "grad_norm": 0.22689110579999638, "learning_rate": 0.000441547112040844, "loss": 2.518, "step": 4807 }, { "epoch": 0.5513129228299507, "grad_norm": 0.24610240863174646, "learning_rate": 0.0004413626941123618, "loss": 2.4755, "step": 4808 }, { "epoch": 0.5514275885792914, "grad_norm": 0.2346237024557651, "learning_rate": 0.00044117828427177834, "loss": 2.5628, "step": 4809 }, { "epoch": 0.551542254328632, "grad_norm": 0.2603374765072642, "learning_rate": 0.00044099388254452925, "loss": 2.5665, "step": 4810 }, { "epoch": 0.5516569200779727, "grad_norm": 0.2532558830398724, "learning_rate": 0.0004408094889560494, "loss": 2.4171, "step": 4811 }, { "epoch": 0.5517715858273133, "grad_norm": 0.269002790605217, "learning_rate": 0.00044062510353177207, "loss": 2.3689, "step": 4812 }, { "epoch": 0.551886251576654, "grad_norm": 0.26534334527646547, "learning_rate": 0.00044044072629713014, "loss": 2.6981, "step": 4813 }, { "epoch": 0.5520009173259948, "grad_norm": 0.2412155199833553, "learning_rate": 0.0004402563572775546, "loss": 2.3746, "step": 4814 }, { "epoch": 0.5521155830753354, "grad_norm": 0.2265862651280693, "learning_rate": 0.00044007199649847585, "loss": 2.3744, "step": 4815 }, { "epoch": 0.5522302488246761, "grad_norm": 0.2528429983471182, "learning_rate": 0.0004398876439853227, "loss": 2.3356, "step": 4816 }, { "epoch": 0.5523449145740167, "grad_norm": 0.2507067343275455, "learning_rate": 0.0004397032997635232, "loss": 2.5152, "step": 4817 }, { "epoch": 0.5524595803233574, "grad_norm": 0.282679817104617, "learning_rate": 0.00043951896385850426, "loss": 2.4426, "step": 4818 }, { "epoch": 0.5525742460726981, "grad_norm": 0.2858474799198362, "learning_rate": 0.0004393346362956915, "loss": 2.4098, "step": 4819 }, { "epoch": 0.5526889118220387, "grad_norm": 0.25931048098294573, "learning_rate": 0.00043915031710050907, "loss": 2.5586, "step": 4820 }, { "epoch": 0.5528035775713794, "grad_norm": 0.2975961876359454, "learning_rate": 0.00043896600629838065, "loss": 2.6139, "step": 4821 }, { "epoch": 0.5529182433207201, "grad_norm": 0.23907077911512128, "learning_rate": 0.000438781703914728, "loss": 2.46, "step": 4822 }, { "epoch": 0.5530329090700608, "grad_norm": 0.24174098678420827, "learning_rate": 0.00043859740997497247, "loss": 2.42, "step": 4823 }, { "epoch": 0.5531475748194015, "grad_norm": 0.24615933250790234, "learning_rate": 0.0004384131245045339, "loss": 2.3337, "step": 4824 }, { "epoch": 0.5532622405687421, "grad_norm": 0.26674061460191334, "learning_rate": 0.0004382288475288309, "loss": 2.41, "step": 4825 }, { "epoch": 0.5533769063180828, "grad_norm": 0.24982233893714156, "learning_rate": 0.00043804457907328076, "loss": 2.4534, "step": 4826 }, { "epoch": 0.5534915720674235, "grad_norm": 0.23021842315923646, "learning_rate": 0.0004378603191632999, "loss": 2.4235, "step": 4827 }, { "epoch": 0.5536062378167641, "grad_norm": 0.286827671122779, "learning_rate": 0.0004376760678243037, "loss": 2.4163, "step": 4828 }, { "epoch": 0.5537209035661048, "grad_norm": 0.24414778434140147, "learning_rate": 0.0004374918250817059, "loss": 2.4851, "step": 4829 }, { "epoch": 0.5538355693154455, "grad_norm": 0.2427909785001659, "learning_rate": 0.0004373075909609193, "loss": 2.4781, "step": 4830 }, { "epoch": 0.5539502350647861, "grad_norm": 0.253167953669717, "learning_rate": 0.0004371233654873556, "loss": 2.4978, "step": 4831 }, { "epoch": 0.5540649008141268, "grad_norm": 0.24145368104312298, "learning_rate": 0.0004369391486864249, "loss": 2.5038, "step": 4832 }, { "epoch": 0.5541795665634675, "grad_norm": 0.23578787823468977, "learning_rate": 0.0004367549405835366, "loss": 2.37, "step": 4833 }, { "epoch": 0.5542942323128082, "grad_norm": 0.2920386891151897, "learning_rate": 0.00043657074120409886, "loss": 2.4679, "step": 4834 }, { "epoch": 0.5544088980621489, "grad_norm": 0.24244960685159794, "learning_rate": 0.0004363865505735182, "loss": 2.5356, "step": 4835 }, { "epoch": 0.5545235638114895, "grad_norm": 0.24090774482848445, "learning_rate": 0.00043620236871720034, "loss": 2.5388, "step": 4836 }, { "epoch": 0.5546382295608302, "grad_norm": 0.2454130544457023, "learning_rate": 0.00043601819566054943, "loss": 2.5311, "step": 4837 }, { "epoch": 0.5547528953101708, "grad_norm": 0.2376177636837363, "learning_rate": 0.00043583403142896903, "loss": 2.3974, "step": 4838 }, { "epoch": 0.5548675610595115, "grad_norm": 0.2559091897614342, "learning_rate": 0.00043564987604786086, "loss": 2.3667, "step": 4839 }, { "epoch": 0.5549822268088522, "grad_norm": 0.2671649276817025, "learning_rate": 0.0004354657295426257, "loss": 2.3363, "step": 4840 }, { "epoch": 0.5550968925581928, "grad_norm": 0.24775055897088888, "learning_rate": 0.0004352815919386629, "loss": 2.4832, "step": 4841 }, { "epoch": 0.5552115583075335, "grad_norm": 0.2645627712034118, "learning_rate": 0.0004350974632613708, "loss": 2.4937, "step": 4842 }, { "epoch": 0.5553262240568743, "grad_norm": 0.2738756110755357, "learning_rate": 0.0004349133435361466, "loss": 2.4529, "step": 4843 }, { "epoch": 0.5554408898062149, "grad_norm": 0.25294932230726797, "learning_rate": 0.00043472923278838613, "loss": 2.3918, "step": 4844 }, { "epoch": 0.5555555555555556, "grad_norm": 0.26641653131424764, "learning_rate": 0.0004345451310434836, "loss": 2.4804, "step": 4845 }, { "epoch": 0.5556702213048962, "grad_norm": 0.25896322684829565, "learning_rate": 0.0004343610383268327, "loss": 2.5174, "step": 4846 }, { "epoch": 0.5557848870542369, "grad_norm": 0.2446832849067908, "learning_rate": 0.00043417695466382524, "loss": 2.4287, "step": 4847 }, { "epoch": 0.5558995528035776, "grad_norm": 0.2622762843559269, "learning_rate": 0.00043399288007985216, "loss": 2.4142, "step": 4848 }, { "epoch": 0.5560142185529182, "grad_norm": 0.2645581899981976, "learning_rate": 0.00043380881460030327, "loss": 2.3993, "step": 4849 }, { "epoch": 0.5561288843022589, "grad_norm": 0.2945858120371693, "learning_rate": 0.00043362475825056663, "loss": 2.4832, "step": 4850 }, { "epoch": 0.5562435500515995, "grad_norm": 0.2542062804006728, "learning_rate": 0.00043344071105602927, "loss": 2.4325, "step": 4851 }, { "epoch": 0.5563582158009402, "grad_norm": 0.25524041626340704, "learning_rate": 0.00043325667304207696, "loss": 2.3308, "step": 4852 }, { "epoch": 0.556472881550281, "grad_norm": 0.24942235264108364, "learning_rate": 0.00043307264423409457, "loss": 2.3737, "step": 4853 }, { "epoch": 0.5565875472996216, "grad_norm": 0.2882378611895105, "learning_rate": 0.0004328886246574651, "loss": 2.2909, "step": 4854 }, { "epoch": 0.5567022130489623, "grad_norm": 0.2546627066954701, "learning_rate": 0.0004327046143375707, "loss": 2.4884, "step": 4855 }, { "epoch": 0.556816878798303, "grad_norm": 0.26281990293380664, "learning_rate": 0.00043252061329979196, "loss": 2.4826, "step": 4856 }, { "epoch": 0.5569315445476436, "grad_norm": 0.2488906194697834, "learning_rate": 0.0004323366215695081, "loss": 2.4684, "step": 4857 }, { "epoch": 0.5570462102969843, "grad_norm": 0.2165027454939841, "learning_rate": 0.0004321526391720977, "loss": 2.4967, "step": 4858 }, { "epoch": 0.5571608760463249, "grad_norm": 0.25771986599903696, "learning_rate": 0.00043196866613293746, "loss": 2.5132, "step": 4859 }, { "epoch": 0.5572755417956656, "grad_norm": 0.23557432991120528, "learning_rate": 0.00043178470247740285, "loss": 2.4724, "step": 4860 }, { "epoch": 0.5573902075450063, "grad_norm": 0.2638220588035415, "learning_rate": 0.00043160074823086835, "loss": 2.4175, "step": 4861 }, { "epoch": 0.557504873294347, "grad_norm": 0.2478846332566774, "learning_rate": 0.00043141680341870657, "loss": 2.4862, "step": 4862 }, { "epoch": 0.5576195390436877, "grad_norm": 0.24989382796014142, "learning_rate": 0.0004312328680662897, "loss": 2.548, "step": 4863 }, { "epoch": 0.5577342047930284, "grad_norm": 0.23417787595455894, "learning_rate": 0.0004310489421989878, "loss": 2.4768, "step": 4864 }, { "epoch": 0.557848870542369, "grad_norm": 0.25759721254830326, "learning_rate": 0.00043086502584217004, "loss": 2.4369, "step": 4865 }, { "epoch": 0.5579635362917097, "grad_norm": 0.25896098994381306, "learning_rate": 0.000430681119021204, "loss": 2.4641, "step": 4866 }, { "epoch": 0.5580782020410503, "grad_norm": 0.2258031219498759, "learning_rate": 0.0004304972217614562, "loss": 2.5096, "step": 4867 }, { "epoch": 0.558192867790391, "grad_norm": 0.23478104341848002, "learning_rate": 0.000430313334088292, "loss": 2.5601, "step": 4868 }, { "epoch": 0.5583075335397317, "grad_norm": 0.23128315269199018, "learning_rate": 0.00043012945602707506, "loss": 2.4953, "step": 4869 }, { "epoch": 0.5584221992890723, "grad_norm": 0.2235934607120747, "learning_rate": 0.00042994558760316767, "loss": 2.4664, "step": 4870 }, { "epoch": 0.558536865038413, "grad_norm": 0.25388743267954794, "learning_rate": 0.0004297617288419312, "loss": 2.4863, "step": 4871 }, { "epoch": 0.5586515307877536, "grad_norm": 0.2483914620988069, "learning_rate": 0.00042957787976872515, "loss": 2.4015, "step": 4872 }, { "epoch": 0.5587661965370944, "grad_norm": 0.24851778591337426, "learning_rate": 0.0004293940404089084, "loss": 2.4096, "step": 4873 }, { "epoch": 0.5588808622864351, "grad_norm": 0.253936578497908, "learning_rate": 0.00042921021078783794, "loss": 2.5402, "step": 4874 }, { "epoch": 0.5589955280357757, "grad_norm": 0.2553858357488869, "learning_rate": 0.00042902639093086954, "loss": 2.4461, "step": 4875 }, { "epoch": 0.5591101937851164, "grad_norm": 0.2372255160378026, "learning_rate": 0.0004288425808633575, "loss": 2.4038, "step": 4876 }, { "epoch": 0.5592248595344571, "grad_norm": 0.23724388962521775, "learning_rate": 0.000428658780610655, "loss": 2.5145, "step": 4877 }, { "epoch": 0.5593395252837977, "grad_norm": 0.2505327135657401, "learning_rate": 0.00042847499019811396, "loss": 2.3965, "step": 4878 }, { "epoch": 0.5594541910331384, "grad_norm": 0.24082436548360425, "learning_rate": 0.0004282912096510846, "loss": 2.4938, "step": 4879 }, { "epoch": 0.559568856782479, "grad_norm": 0.22894455485668871, "learning_rate": 0.00042810743899491605, "loss": 2.4597, "step": 4880 }, { "epoch": 0.5596835225318197, "grad_norm": 0.27021313263866303, "learning_rate": 0.0004279236782549559, "loss": 2.3288, "step": 4881 }, { "epoch": 0.5597981882811605, "grad_norm": 0.22730225284093525, "learning_rate": 0.0004277399274565502, "loss": 2.3843, "step": 4882 }, { "epoch": 0.5599128540305011, "grad_norm": 0.2649108397364408, "learning_rate": 0.00042755618662504425, "loss": 2.5714, "step": 4883 }, { "epoch": 0.5600275197798418, "grad_norm": 0.23838060448196088, "learning_rate": 0.00042737245578578154, "loss": 2.4044, "step": 4884 }, { "epoch": 0.5601421855291824, "grad_norm": 0.2638573793088811, "learning_rate": 0.00042718873496410407, "loss": 2.4816, "step": 4885 }, { "epoch": 0.5602568512785231, "grad_norm": 0.2648290928472831, "learning_rate": 0.00042700502418535277, "loss": 2.4739, "step": 4886 }, { "epoch": 0.5603715170278638, "grad_norm": 0.23894492566449418, "learning_rate": 0.00042682132347486676, "loss": 2.3284, "step": 4887 }, { "epoch": 0.5604861827772044, "grad_norm": 0.2402670088342951, "learning_rate": 0.00042663763285798444, "loss": 2.3162, "step": 4888 }, { "epoch": 0.5606008485265451, "grad_norm": 0.2652815362614118, "learning_rate": 0.00042645395236004226, "loss": 2.6069, "step": 4889 }, { "epoch": 0.5607155142758858, "grad_norm": 0.2559776449159992, "learning_rate": 0.0004262702820063754, "loss": 2.5234, "step": 4890 }, { "epoch": 0.5608301800252264, "grad_norm": 0.2426786195464696, "learning_rate": 0.0004260866218223177, "loss": 2.399, "step": 4891 }, { "epoch": 0.5609448457745672, "grad_norm": 0.26698254481411127, "learning_rate": 0.00042590297183320156, "loss": 2.3718, "step": 4892 }, { "epoch": 0.5610595115239078, "grad_norm": 0.26883105127140416, "learning_rate": 0.00042571933206435813, "loss": 2.439, "step": 4893 }, { "epoch": 0.5611741772732485, "grad_norm": 0.2524544550876167, "learning_rate": 0.00042553570254111697, "loss": 2.4446, "step": 4894 }, { "epoch": 0.5612888430225892, "grad_norm": 0.25464092400980126, "learning_rate": 0.0004253520832888061, "loss": 2.4696, "step": 4895 }, { "epoch": 0.5614035087719298, "grad_norm": 0.24338251152511772, "learning_rate": 0.00042516847433275257, "loss": 2.395, "step": 4896 }, { "epoch": 0.5615181745212705, "grad_norm": 0.2563180763101968, "learning_rate": 0.00042498487569828135, "loss": 2.4257, "step": 4897 }, { "epoch": 0.5616328402706112, "grad_norm": 0.24530434972226864, "learning_rate": 0.00042480128741071674, "loss": 2.3181, "step": 4898 }, { "epoch": 0.5617475060199518, "grad_norm": 0.24984142600840645, "learning_rate": 0.0004246177094953812, "loss": 2.4263, "step": 4899 }, { "epoch": 0.5618621717692925, "grad_norm": 0.29386776057788194, "learning_rate": 0.00042443414197759565, "loss": 2.5327, "step": 4900 }, { "epoch": 0.5619768375186331, "grad_norm": 0.22677102868342894, "learning_rate": 0.0004242505848826798, "loss": 2.4321, "step": 4901 }, { "epoch": 0.5620915032679739, "grad_norm": 0.25922261468985014, "learning_rate": 0.0004240670382359517, "loss": 2.4383, "step": 4902 }, { "epoch": 0.5622061690173146, "grad_norm": 0.2507186314083744, "learning_rate": 0.0004238835020627285, "loss": 2.3512, "step": 4903 }, { "epoch": 0.5623208347666552, "grad_norm": 0.2476373779137207, "learning_rate": 0.00042369997638832515, "loss": 2.3509, "step": 4904 }, { "epoch": 0.5624355005159959, "grad_norm": 0.2692043134895928, "learning_rate": 0.0004235164612380557, "loss": 2.4558, "step": 4905 }, { "epoch": 0.5625501662653365, "grad_norm": 0.24333623662612258, "learning_rate": 0.0004233329566372326, "loss": 2.4246, "step": 4906 }, { "epoch": 0.5626648320146772, "grad_norm": 0.23950960330645835, "learning_rate": 0.0004231494626111665, "loss": 2.4713, "step": 4907 }, { "epoch": 0.5627794977640179, "grad_norm": 0.2758028885336742, "learning_rate": 0.00042296597918516717, "loss": 2.5374, "step": 4908 }, { "epoch": 0.5628941635133585, "grad_norm": 0.27789722969499764, "learning_rate": 0.0004227825063845427, "loss": 2.5723, "step": 4909 }, { "epoch": 0.5630088292626992, "grad_norm": 0.24353642476783302, "learning_rate": 0.0004225990442345996, "loss": 2.3756, "step": 4910 }, { "epoch": 0.56312349501204, "grad_norm": 0.2458334667688009, "learning_rate": 0.00042241559276064297, "loss": 2.4076, "step": 4911 }, { "epoch": 0.5632381607613806, "grad_norm": 0.2729030042695683, "learning_rate": 0.0004222321519879762, "loss": 2.5115, "step": 4912 }, { "epoch": 0.5633528265107213, "grad_norm": 0.252502709404377, "learning_rate": 0.00042204872194190194, "loss": 2.4485, "step": 4913 }, { "epoch": 0.5634674922600619, "grad_norm": 0.24861345475897326, "learning_rate": 0.0004218653026477204, "loss": 2.5666, "step": 4914 }, { "epoch": 0.5635821580094026, "grad_norm": 0.22254850372656043, "learning_rate": 0.00042168189413073124, "loss": 2.5101, "step": 4915 }, { "epoch": 0.5636968237587433, "grad_norm": 0.24672798872409168, "learning_rate": 0.00042149849641623166, "loss": 2.4821, "step": 4916 }, { "epoch": 0.5638114895080839, "grad_norm": 0.24014874610772252, "learning_rate": 0.00042131510952951806, "loss": 2.4685, "step": 4917 }, { "epoch": 0.5639261552574246, "grad_norm": 0.22741422591953592, "learning_rate": 0.00042113173349588544, "loss": 2.3497, "step": 4918 }, { "epoch": 0.5640408210067652, "grad_norm": 0.2273740470363072, "learning_rate": 0.00042094836834062686, "loss": 2.4727, "step": 4919 }, { "epoch": 0.5641554867561059, "grad_norm": 0.2387965910549213, "learning_rate": 0.0004207650140890339, "loss": 2.5299, "step": 4920 }, { "epoch": 0.5642701525054467, "grad_norm": 0.23733339658162125, "learning_rate": 0.0004205816707663969, "loss": 2.4013, "step": 4921 }, { "epoch": 0.5643848182547873, "grad_norm": 0.26553282989404914, "learning_rate": 0.00042039833839800444, "loss": 2.3392, "step": 4922 }, { "epoch": 0.564499484004128, "grad_norm": 0.24471383608341513, "learning_rate": 0.0004202150170091439, "loss": 2.4644, "step": 4923 }, { "epoch": 0.5646141497534687, "grad_norm": 0.23393592612213054, "learning_rate": 0.0004200317066251011, "loss": 2.4007, "step": 4924 }, { "epoch": 0.5647288155028093, "grad_norm": 0.24215179716579294, "learning_rate": 0.00041984840727116, "loss": 2.4905, "step": 4925 }, { "epoch": 0.56484348125215, "grad_norm": 0.23905396601960657, "learning_rate": 0.0004196651189726032, "loss": 2.4086, "step": 4926 }, { "epoch": 0.5649581470014906, "grad_norm": 0.24883208810305038, "learning_rate": 0.00041948184175471175, "loss": 2.6536, "step": 4927 }, { "epoch": 0.5650728127508313, "grad_norm": 0.2515775101440606, "learning_rate": 0.00041929857564276574, "loss": 2.5192, "step": 4928 }, { "epoch": 0.565187478500172, "grad_norm": 0.24490707878154433, "learning_rate": 0.0004191153206620427, "loss": 2.5293, "step": 4929 }, { "epoch": 0.5653021442495126, "grad_norm": 0.22906064671648874, "learning_rate": 0.00041893207683781953, "loss": 2.5218, "step": 4930 }, { "epoch": 0.5654168099988534, "grad_norm": 0.24325593525300496, "learning_rate": 0.0004187488441953711, "loss": 2.3544, "step": 4931 }, { "epoch": 0.5655314757481941, "grad_norm": 0.2558655745278075, "learning_rate": 0.0004185656227599705, "loss": 2.4883, "step": 4932 }, { "epoch": 0.5656461414975347, "grad_norm": 0.3085505469798391, "learning_rate": 0.00041838241255689015, "loss": 2.6035, "step": 4933 }, { "epoch": 0.5657608072468754, "grad_norm": 0.2453178686534555, "learning_rate": 0.0004181992136114003, "loss": 2.459, "step": 4934 }, { "epoch": 0.565875472996216, "grad_norm": 0.24832127561749337, "learning_rate": 0.0004180160259487695, "loss": 2.4957, "step": 4935 }, { "epoch": 0.5659901387455567, "grad_norm": 0.2325191296115635, "learning_rate": 0.00041783284959426536, "loss": 2.4064, "step": 4936 }, { "epoch": 0.5661048044948974, "grad_norm": 0.23004709154776268, "learning_rate": 0.00041764968457315313, "loss": 2.3672, "step": 4937 }, { "epoch": 0.566219470244238, "grad_norm": 0.23130771927941238, "learning_rate": 0.0004174665309106974, "loss": 2.4628, "step": 4938 }, { "epoch": 0.5663341359935787, "grad_norm": 0.2492399193196304, "learning_rate": 0.00041728338863216037, "loss": 2.424, "step": 4939 }, { "epoch": 0.5664488017429193, "grad_norm": 0.2387036290167383, "learning_rate": 0.00041710025776280324, "loss": 2.4757, "step": 4940 }, { "epoch": 0.56656346749226, "grad_norm": 0.2579005160141031, "learning_rate": 0.0004169171383278853, "loss": 2.4225, "step": 4941 }, { "epoch": 0.5666781332416008, "grad_norm": 0.24058982313737814, "learning_rate": 0.00041673403035266427, "loss": 2.3818, "step": 4942 }, { "epoch": 0.5667927989909414, "grad_norm": 0.2670992961083317, "learning_rate": 0.00041655093386239677, "loss": 2.4502, "step": 4943 }, { "epoch": 0.5669074647402821, "grad_norm": 0.23070627090023876, "learning_rate": 0.0004163678488823373, "loss": 2.5496, "step": 4944 }, { "epoch": 0.5670221304896228, "grad_norm": 0.249895098141157, "learning_rate": 0.00041618477543773876, "loss": 2.5463, "step": 4945 }, { "epoch": 0.5671367962389634, "grad_norm": 0.24158548854674405, "learning_rate": 0.00041600171355385293, "loss": 2.483, "step": 4946 }, { "epoch": 0.5672514619883041, "grad_norm": 0.23560677576318195, "learning_rate": 0.0004158186632559293, "loss": 2.425, "step": 4947 }, { "epoch": 0.5673661277376447, "grad_norm": 0.24915892407762452, "learning_rate": 0.0004156356245692166, "loss": 2.5013, "step": 4948 }, { "epoch": 0.5674807934869854, "grad_norm": 0.23654270998193125, "learning_rate": 0.00041545259751896136, "loss": 2.4007, "step": 4949 }, { "epoch": 0.5675954592363261, "grad_norm": 0.2220732795267781, "learning_rate": 0.0004152695821304088, "loss": 2.4143, "step": 4950 }, { "epoch": 0.5677101249856668, "grad_norm": 0.26851873014468003, "learning_rate": 0.00041508657842880204, "loss": 2.4724, "step": 4951 }, { "epoch": 0.5678247907350075, "grad_norm": 0.2478712374942227, "learning_rate": 0.0004149035864393832, "loss": 2.448, "step": 4952 }, { "epoch": 0.5679394564843481, "grad_norm": 0.24972375616551537, "learning_rate": 0.00041472060618739264, "loss": 2.3692, "step": 4953 }, { "epoch": 0.5680541222336888, "grad_norm": 0.2603092447141002, "learning_rate": 0.00041453763769806886, "loss": 2.3871, "step": 4954 }, { "epoch": 0.5681687879830295, "grad_norm": 0.24006965038814282, "learning_rate": 0.00041435468099664896, "loss": 2.3907, "step": 4955 }, { "epoch": 0.5682834537323701, "grad_norm": 0.2487257553112925, "learning_rate": 0.0004141717361083683, "loss": 2.4635, "step": 4956 }, { "epoch": 0.5683981194817108, "grad_norm": 0.2465090302221992, "learning_rate": 0.00041398880305846034, "loss": 2.6166, "step": 4957 }, { "epoch": 0.5685127852310515, "grad_norm": 0.23274574072710982, "learning_rate": 0.0004138058818721576, "loss": 2.5487, "step": 4958 }, { "epoch": 0.5686274509803921, "grad_norm": 0.28852515261643535, "learning_rate": 0.00041362297257469053, "loss": 2.4145, "step": 4959 }, { "epoch": 0.5687421167297328, "grad_norm": 0.29588218355642887, "learning_rate": 0.00041344007519128793, "loss": 2.5522, "step": 4960 }, { "epoch": 0.5688567824790735, "grad_norm": 0.22777146641677498, "learning_rate": 0.0004132571897471769, "loss": 2.5213, "step": 4961 }, { "epoch": 0.5689714482284142, "grad_norm": 0.27159006231697913, "learning_rate": 0.00041307431626758296, "loss": 2.4479, "step": 4962 }, { "epoch": 0.5690861139777549, "grad_norm": 0.22502095375227055, "learning_rate": 0.0004128914547777303, "loss": 2.3659, "step": 4963 }, { "epoch": 0.5692007797270955, "grad_norm": 0.25411900618427435, "learning_rate": 0.00041270860530284097, "loss": 2.5514, "step": 4964 }, { "epoch": 0.5693154454764362, "grad_norm": 0.2482581321113893, "learning_rate": 0.0004125257678681357, "loss": 2.4987, "step": 4965 }, { "epoch": 0.5694301112257769, "grad_norm": 0.25103544233672387, "learning_rate": 0.0004123429424988332, "loss": 2.3657, "step": 4966 }, { "epoch": 0.5695447769751175, "grad_norm": 0.2551137577516324, "learning_rate": 0.00041216012922015076, "loss": 2.4499, "step": 4967 }, { "epoch": 0.5696594427244582, "grad_norm": 0.240734279180522, "learning_rate": 0.0004119773280573044, "loss": 2.5309, "step": 4968 }, { "epoch": 0.5697741084737988, "grad_norm": 0.23234678420881874, "learning_rate": 0.0004117945390355078, "loss": 2.465, "step": 4969 }, { "epoch": 0.5698887742231395, "grad_norm": 0.22087348076078123, "learning_rate": 0.000411611762179973, "loss": 2.5495, "step": 4970 }, { "epoch": 0.5700034399724803, "grad_norm": 0.2515343611435465, "learning_rate": 0.0004114289975159109, "loss": 2.357, "step": 4971 }, { "epoch": 0.5701181057218209, "grad_norm": 0.23111997926693914, "learning_rate": 0.00041124624506853, "loss": 2.433, "step": 4972 }, { "epoch": 0.5702327714711616, "grad_norm": 0.25073422304041754, "learning_rate": 0.000411063504863038, "loss": 2.5441, "step": 4973 }, { "epoch": 0.5703474372205022, "grad_norm": 0.2369632413734191, "learning_rate": 0.0004108807769246403, "loss": 2.5702, "step": 4974 }, { "epoch": 0.5704621029698429, "grad_norm": 0.2356751599811318, "learning_rate": 0.0004106980612785407, "loss": 2.452, "step": 4975 }, { "epoch": 0.5705767687191836, "grad_norm": 0.23679153328467326, "learning_rate": 0.0004105153579499411, "loss": 2.517, "step": 4976 }, { "epoch": 0.5706914344685242, "grad_norm": 0.24373090189496652, "learning_rate": 0.0004103326669640421, "loss": 2.4005, "step": 4977 }, { "epoch": 0.5708061002178649, "grad_norm": 0.24078361012545765, "learning_rate": 0.00041014998834604265, "loss": 2.5166, "step": 4978 }, { "epoch": 0.5709207659672056, "grad_norm": 0.23285832180597507, "learning_rate": 0.0004099673221211395, "loss": 2.4188, "step": 4979 }, { "epoch": 0.5710354317165462, "grad_norm": 0.24794252343421705, "learning_rate": 0.00040978466831452824, "loss": 2.3682, "step": 4980 }, { "epoch": 0.571150097465887, "grad_norm": 0.2664849311181591, "learning_rate": 0.00040960202695140233, "loss": 2.3678, "step": 4981 }, { "epoch": 0.5712647632152276, "grad_norm": 0.25371214019794397, "learning_rate": 0.0004094193980569534, "loss": 2.3604, "step": 4982 }, { "epoch": 0.5713794289645683, "grad_norm": 0.25233694550672614, "learning_rate": 0.00040923678165637195, "loss": 2.3862, "step": 4983 }, { "epoch": 0.571494094713909, "grad_norm": 0.23428120722197104, "learning_rate": 0.00040905417777484655, "loss": 2.4834, "step": 4984 }, { "epoch": 0.5716087604632496, "grad_norm": 0.24161821402865863, "learning_rate": 0.0004088715864375636, "loss": 2.3369, "step": 4985 }, { "epoch": 0.5717234262125903, "grad_norm": 0.27425058296629806, "learning_rate": 0.00040868900766970835, "loss": 2.4576, "step": 4986 }, { "epoch": 0.5718380919619309, "grad_norm": 0.2570451729392658, "learning_rate": 0.0004085064414964638, "loss": 2.6059, "step": 4987 }, { "epoch": 0.5719527577112716, "grad_norm": 0.2617067632594886, "learning_rate": 0.0004083238879430117, "loss": 2.5504, "step": 4988 }, { "epoch": 0.5720674234606123, "grad_norm": 0.25595958583870637, "learning_rate": 0.0004081413470345317, "loss": 2.4119, "step": 4989 }, { "epoch": 0.572182089209953, "grad_norm": 0.24897617526276666, "learning_rate": 0.000407958818796202, "loss": 2.4577, "step": 4990 }, { "epoch": 0.5722967549592937, "grad_norm": 0.2408816396388078, "learning_rate": 0.0004077763032531987, "loss": 2.5488, "step": 4991 }, { "epoch": 0.5724114207086344, "grad_norm": 0.24116116110685146, "learning_rate": 0.0004075938004306963, "loss": 2.4115, "step": 4992 }, { "epoch": 0.572526086457975, "grad_norm": 0.22783101352931112, "learning_rate": 0.0004074113103538679, "loss": 2.5377, "step": 4993 }, { "epoch": 0.5726407522073157, "grad_norm": 0.24684727131855588, "learning_rate": 0.0004072288330478844, "loss": 2.4513, "step": 4994 }, { "epoch": 0.5727554179566563, "grad_norm": 0.2575865935143395, "learning_rate": 0.00040704636853791486, "loss": 2.4231, "step": 4995 }, { "epoch": 0.572870083705997, "grad_norm": 0.244373473530503, "learning_rate": 0.00040686391684912704, "loss": 2.3674, "step": 4996 }, { "epoch": 0.5729847494553377, "grad_norm": 0.2291286251653978, "learning_rate": 0.0004066814780066863, "loss": 2.4735, "step": 4997 }, { "epoch": 0.5730994152046783, "grad_norm": 0.2510218254496051, "learning_rate": 0.000406499052035757, "loss": 2.4592, "step": 4998 }, { "epoch": 0.573214080954019, "grad_norm": 0.2618742572643672, "learning_rate": 0.00040631663896150127, "loss": 2.4264, "step": 4999 }, { "epoch": 0.5733287467033598, "grad_norm": 0.24508574516627288, "learning_rate": 0.0004061342388090794, "loss": 2.4508, "step": 5000 }, { "epoch": 0.5734434124527004, "grad_norm": 0.26517361701772874, "learning_rate": 0.00040595185160365, "loss": 2.4589, "step": 5001 }, { "epoch": 0.5735580782020411, "grad_norm": 0.2584045114623395, "learning_rate": 0.00040576947737036985, "loss": 2.3148, "step": 5002 }, { "epoch": 0.5736727439513817, "grad_norm": 0.26357680107745307, "learning_rate": 0.00040558711613439426, "loss": 2.4467, "step": 5003 }, { "epoch": 0.5737874097007224, "grad_norm": 0.2815497403049391, "learning_rate": 0.0004054047679208762, "loss": 2.6143, "step": 5004 }, { "epoch": 0.5739020754500631, "grad_norm": 0.2943102244378706, "learning_rate": 0.0004052224327549674, "loss": 2.5064, "step": 5005 }, { "epoch": 0.5740167411994037, "grad_norm": 0.2500933055883957, "learning_rate": 0.0004050401106618174, "loss": 2.3959, "step": 5006 }, { "epoch": 0.5741314069487444, "grad_norm": 0.22715994894760586, "learning_rate": 0.00040485780166657384, "loss": 2.3756, "step": 5007 }, { "epoch": 0.574246072698085, "grad_norm": 0.250134571121391, "learning_rate": 0.00040467550579438295, "loss": 2.4686, "step": 5008 }, { "epoch": 0.5743607384474257, "grad_norm": 0.26218010014355303, "learning_rate": 0.0004044932230703892, "loss": 2.4689, "step": 5009 }, { "epoch": 0.5744754041967665, "grad_norm": 0.27574039824673974, "learning_rate": 0.0004043109535197347, "loss": 2.4893, "step": 5010 }, { "epoch": 0.5745900699461071, "grad_norm": 0.2738689805186912, "learning_rate": 0.0004041286971675602, "loss": 2.3798, "step": 5011 }, { "epoch": 0.5747047356954478, "grad_norm": 0.2544767575793516, "learning_rate": 0.0004039464540390043, "loss": 2.4219, "step": 5012 }, { "epoch": 0.5748194014447885, "grad_norm": 0.2540632078588226, "learning_rate": 0.00040376422415920425, "loss": 2.3558, "step": 5013 }, { "epoch": 0.5749340671941291, "grad_norm": 0.24574294133316146, "learning_rate": 0.00040358200755329505, "loss": 2.5075, "step": 5014 }, { "epoch": 0.5750487329434698, "grad_norm": 0.26746431676640986, "learning_rate": 0.0004033998042464101, "loss": 2.2936, "step": 5015 }, { "epoch": 0.5751633986928104, "grad_norm": 0.26172140562254753, "learning_rate": 0.00040321761426368065, "loss": 2.3932, "step": 5016 }, { "epoch": 0.5752780644421511, "grad_norm": 0.2439308984364897, "learning_rate": 0.0004030354376302363, "loss": 2.318, "step": 5017 }, { "epoch": 0.5753927301914918, "grad_norm": 0.2542153649695607, "learning_rate": 0.00040285327437120533, "loss": 2.5466, "step": 5018 }, { "epoch": 0.5755073959408324, "grad_norm": 0.2728987046727881, "learning_rate": 0.0004026711245117134, "loss": 2.5047, "step": 5019 }, { "epoch": 0.5756220616901732, "grad_norm": 0.2362835383886875, "learning_rate": 0.00040248898807688436, "loss": 2.6127, "step": 5020 }, { "epoch": 0.5757367274395138, "grad_norm": 0.23374921880662256, "learning_rate": 0.0004023068650918409, "loss": 2.4577, "step": 5021 }, { "epoch": 0.5758513931888545, "grad_norm": 0.2478615945335469, "learning_rate": 0.0004021247555817029, "loss": 2.3813, "step": 5022 }, { "epoch": 0.5759660589381952, "grad_norm": 0.2680961036638813, "learning_rate": 0.00040194265957158937, "loss": 2.5342, "step": 5023 }, { "epoch": 0.5760807246875358, "grad_norm": 0.2703612385876938, "learning_rate": 0.0004017605770866169, "loss": 2.4698, "step": 5024 }, { "epoch": 0.5761953904368765, "grad_norm": 0.26580634980699064, "learning_rate": 0.0004015785081519002, "loss": 2.4616, "step": 5025 }, { "epoch": 0.5763100561862172, "grad_norm": 0.24193585503969292, "learning_rate": 0.0004013964527925521, "loss": 2.4514, "step": 5026 }, { "epoch": 0.5764247219355578, "grad_norm": 0.2554291481177588, "learning_rate": 0.0004012144110336837, "loss": 2.4022, "step": 5027 }, { "epoch": 0.5765393876848985, "grad_norm": 0.24575255516281505, "learning_rate": 0.0004010323829004045, "loss": 2.4387, "step": 5028 }, { "epoch": 0.5766540534342391, "grad_norm": 0.24829422060193634, "learning_rate": 0.0004008503684178214, "loss": 2.3711, "step": 5029 }, { "epoch": 0.5767687191835799, "grad_norm": 0.22745846231294545, "learning_rate": 0.0004006683676110402, "loss": 2.4349, "step": 5030 }, { "epoch": 0.5768833849329206, "grad_norm": 0.25414273888483707, "learning_rate": 0.00040048638050516424, "loss": 2.3732, "step": 5031 }, { "epoch": 0.5769980506822612, "grad_norm": 0.24394908602608312, "learning_rate": 0.00040030440712529494, "loss": 2.484, "step": 5032 }, { "epoch": 0.5771127164316019, "grad_norm": 0.2522149488774059, "learning_rate": 0.00040012244749653244, "loss": 2.5205, "step": 5033 }, { "epoch": 0.5772273821809426, "grad_norm": 0.2471588564838673, "learning_rate": 0.00039994050164397457, "loss": 2.4099, "step": 5034 }, { "epoch": 0.5773420479302832, "grad_norm": 0.24311856279452515, "learning_rate": 0.00039975856959271707, "loss": 2.4746, "step": 5035 }, { "epoch": 0.5774567136796239, "grad_norm": 0.2830515290716525, "learning_rate": 0.00039957665136785425, "loss": 2.4548, "step": 5036 }, { "epoch": 0.5775713794289645, "grad_norm": 0.26662539141638475, "learning_rate": 0.0003993947469944779, "loss": 2.4929, "step": 5037 }, { "epoch": 0.5776860451783052, "grad_norm": 0.2584142809831617, "learning_rate": 0.0003992128564976787, "loss": 2.4212, "step": 5038 }, { "epoch": 0.577800710927646, "grad_norm": 0.24505748413684836, "learning_rate": 0.00039903097990254467, "loss": 2.308, "step": 5039 }, { "epoch": 0.5779153766769866, "grad_norm": 0.2738466558146099, "learning_rate": 0.00039884911723416243, "loss": 2.4598, "step": 5040 }, { "epoch": 0.5780300424263273, "grad_norm": 0.26009202116066776, "learning_rate": 0.00039866726851761625, "loss": 2.4846, "step": 5041 }, { "epoch": 0.5781447081756679, "grad_norm": 0.25752906127190345, "learning_rate": 0.0003984854337779887, "loss": 2.5153, "step": 5042 }, { "epoch": 0.5782593739250086, "grad_norm": 0.23544412902635597, "learning_rate": 0.00039830361304036074, "loss": 2.4548, "step": 5043 }, { "epoch": 0.5783740396743493, "grad_norm": 0.24210025081083186, "learning_rate": 0.00039812180632981084, "loss": 2.4365, "step": 5044 }, { "epoch": 0.5784887054236899, "grad_norm": 0.24423491119591487, "learning_rate": 0.0003979400136714156, "loss": 2.3371, "step": 5045 }, { "epoch": 0.5786033711730306, "grad_norm": 0.2565386641593241, "learning_rate": 0.00039775823509025023, "loss": 2.3662, "step": 5046 }, { "epoch": 0.5787180369223713, "grad_norm": 0.2456841947874958, "learning_rate": 0.00039757647061138717, "loss": 2.4482, "step": 5047 }, { "epoch": 0.5788327026717119, "grad_norm": 0.25895394305836317, "learning_rate": 0.00039739472025989776, "loss": 2.4652, "step": 5048 }, { "epoch": 0.5789473684210527, "grad_norm": 0.2508965260081382, "learning_rate": 0.00039721298406085096, "loss": 2.4232, "step": 5049 }, { "epoch": 0.5790620341703933, "grad_norm": 0.24100756074686788, "learning_rate": 0.0003970312620393137, "loss": 2.3737, "step": 5050 }, { "epoch": 0.579176699919734, "grad_norm": 0.25636627096187176, "learning_rate": 0.00039684955422035094, "loss": 2.3714, "step": 5051 }, { "epoch": 0.5792913656690747, "grad_norm": 0.24493546915612638, "learning_rate": 0.00039666786062902585, "loss": 2.536, "step": 5052 }, { "epoch": 0.5794060314184153, "grad_norm": 0.24890900174574493, "learning_rate": 0.0003964861812904, "loss": 2.3702, "step": 5053 }, { "epoch": 0.579520697167756, "grad_norm": 0.2465741681949579, "learning_rate": 0.00039630451622953204, "loss": 2.5329, "step": 5054 }, { "epoch": 0.5796353629170966, "grad_norm": 0.23526520178097005, "learning_rate": 0.0003961228654714797, "loss": 2.3701, "step": 5055 }, { "epoch": 0.5797500286664373, "grad_norm": 0.23174315551798486, "learning_rate": 0.00039594122904129794, "loss": 2.3131, "step": 5056 }, { "epoch": 0.579864694415778, "grad_norm": 0.23478348321827575, "learning_rate": 0.00039575960696403977, "loss": 2.4803, "step": 5057 }, { "epoch": 0.5799793601651186, "grad_norm": 0.2673030411901626, "learning_rate": 0.000395577999264757, "loss": 2.4607, "step": 5058 }, { "epoch": 0.5800940259144594, "grad_norm": 0.24203746326427245, "learning_rate": 0.0003953964059684989, "loss": 2.5467, "step": 5059 }, { "epoch": 0.5802086916638001, "grad_norm": 0.2704064739299854, "learning_rate": 0.00039521482710031257, "loss": 2.3754, "step": 5060 }, { "epoch": 0.5803233574131407, "grad_norm": 0.2599606850637984, "learning_rate": 0.00039503326268524355, "loss": 2.4309, "step": 5061 }, { "epoch": 0.5804380231624814, "grad_norm": 0.2523235674404862, "learning_rate": 0.00039485171274833484, "loss": 2.4223, "step": 5062 }, { "epoch": 0.580552688911822, "grad_norm": 0.252881143135449, "learning_rate": 0.0003946701773146283, "loss": 2.4115, "step": 5063 }, { "epoch": 0.5806673546611627, "grad_norm": 0.2361170998519746, "learning_rate": 0.00039448865640916294, "loss": 2.4754, "step": 5064 }, { "epoch": 0.5807820204105034, "grad_norm": 0.2723019477140788, "learning_rate": 0.0003943071500569763, "loss": 2.4067, "step": 5065 }, { "epoch": 0.580896686159844, "grad_norm": 0.2677907972770214, "learning_rate": 0.0003941256582831035, "loss": 2.4862, "step": 5066 }, { "epoch": 0.5810113519091847, "grad_norm": 0.23811825442679022, "learning_rate": 0.00039394418111257786, "loss": 2.5868, "step": 5067 }, { "epoch": 0.5811260176585255, "grad_norm": 0.23395874909472864, "learning_rate": 0.00039376271857043105, "loss": 2.472, "step": 5068 }, { "epoch": 0.581240683407866, "grad_norm": 0.27622535593774344, "learning_rate": 0.0003935812706816921, "loss": 2.489, "step": 5069 }, { "epoch": 0.5813553491572068, "grad_norm": 0.23427250282300477, "learning_rate": 0.00039339983747138836, "loss": 2.2698, "step": 5070 }, { "epoch": 0.5814700149065474, "grad_norm": 0.26246258835864084, "learning_rate": 0.00039321841896454505, "loss": 2.5501, "step": 5071 }, { "epoch": 0.5815846806558881, "grad_norm": 0.2355133164645691, "learning_rate": 0.00039303701518618526, "loss": 2.5025, "step": 5072 }, { "epoch": 0.5816993464052288, "grad_norm": 0.22468195801257737, "learning_rate": 0.0003928556261613304, "loss": 2.3843, "step": 5073 }, { "epoch": 0.5818140121545694, "grad_norm": 0.25635137594889784, "learning_rate": 0.0003926742519149997, "loss": 2.477, "step": 5074 }, { "epoch": 0.5819286779039101, "grad_norm": 0.25737719771148077, "learning_rate": 0.00039249289247221005, "loss": 2.512, "step": 5075 }, { "epoch": 0.5820433436532507, "grad_norm": 0.20689402098413232, "learning_rate": 0.0003923115478579765, "loss": 2.3257, "step": 5076 }, { "epoch": 0.5821580094025914, "grad_norm": 0.23565201934851593, "learning_rate": 0.0003921302180973122, "loss": 2.4065, "step": 5077 }, { "epoch": 0.5822726751519322, "grad_norm": 0.25220776282812907, "learning_rate": 0.00039194890321522823, "loss": 2.4514, "step": 5078 }, { "epoch": 0.5823873409012728, "grad_norm": 0.24143421815285085, "learning_rate": 0.00039176760323673336, "loss": 2.3765, "step": 5079 }, { "epoch": 0.5825020066506135, "grad_norm": 0.2636850660172274, "learning_rate": 0.00039158631818683466, "loss": 2.5025, "step": 5080 }, { "epoch": 0.5826166723999542, "grad_norm": 0.2665380235270446, "learning_rate": 0.0003914050480905368, "loss": 2.5385, "step": 5081 }, { "epoch": 0.5827313381492948, "grad_norm": 0.24059664363062083, "learning_rate": 0.0003912237929728423, "loss": 2.5053, "step": 5082 }, { "epoch": 0.5828460038986355, "grad_norm": 0.25706087748751005, "learning_rate": 0.00039104255285875224, "loss": 2.3346, "step": 5083 }, { "epoch": 0.5829606696479761, "grad_norm": 0.26485229627628987, "learning_rate": 0.0003908613277732652, "loss": 2.5231, "step": 5084 }, { "epoch": 0.5830753353973168, "grad_norm": 0.22552195086138882, "learning_rate": 0.0003906801177413775, "loss": 2.4214, "step": 5085 }, { "epoch": 0.5831900011466575, "grad_norm": 0.258545708869077, "learning_rate": 0.0003904989227880838, "loss": 2.4413, "step": 5086 }, { "epoch": 0.5833046668959981, "grad_norm": 0.25195722959452027, "learning_rate": 0.00039031774293837623, "loss": 2.5531, "step": 5087 }, { "epoch": 0.5834193326453389, "grad_norm": 0.2442888082596338, "learning_rate": 0.0003901365782172456, "loss": 2.5232, "step": 5088 }, { "epoch": 0.5835339983946795, "grad_norm": 0.22462173176703037, "learning_rate": 0.0003899554286496796, "loss": 2.4378, "step": 5089 }, { "epoch": 0.5836486641440202, "grad_norm": 0.24443331795574558, "learning_rate": 0.0003897742942606647, "loss": 2.4465, "step": 5090 }, { "epoch": 0.5837633298933609, "grad_norm": 0.2617752188904536, "learning_rate": 0.0003895931750751847, "loss": 2.5525, "step": 5091 }, { "epoch": 0.5838779956427015, "grad_norm": 0.2357586763821419, "learning_rate": 0.00038941207111822156, "loss": 2.4645, "step": 5092 }, { "epoch": 0.5839926613920422, "grad_norm": 0.23462237796448132, "learning_rate": 0.0003892309824147554, "loss": 2.3949, "step": 5093 }, { "epoch": 0.5841073271413829, "grad_norm": 0.22403263868852266, "learning_rate": 0.0003890499089897638, "loss": 2.4275, "step": 5094 }, { "epoch": 0.5842219928907235, "grad_norm": 0.2262607106441164, "learning_rate": 0.0003888688508682223, "loss": 2.484, "step": 5095 }, { "epoch": 0.5843366586400642, "grad_norm": 0.22706550788083926, "learning_rate": 0.0003886878080751045, "loss": 2.3659, "step": 5096 }, { "epoch": 0.5844513243894048, "grad_norm": 0.2904966655390043, "learning_rate": 0.0003885067806353816, "loss": 2.4453, "step": 5097 }, { "epoch": 0.5845659901387456, "grad_norm": 0.233395204038724, "learning_rate": 0.00038832576857402313, "loss": 2.3913, "step": 5098 }, { "epoch": 0.5846806558880863, "grad_norm": 0.27260707595832806, "learning_rate": 0.0003881447719159963, "loss": 2.4847, "step": 5099 }, { "epoch": 0.5847953216374269, "grad_norm": 0.3157668319774191, "learning_rate": 0.00038796379068626604, "loss": 2.6039, "step": 5100 }, { "epoch": 0.5849099873867676, "grad_norm": 0.2601864630542841, "learning_rate": 0.00038778282490979513, "loss": 2.3642, "step": 5101 }, { "epoch": 0.5850246531361083, "grad_norm": 0.25849911496167405, "learning_rate": 0.0003876018746115443, "loss": 2.5578, "step": 5102 }, { "epoch": 0.5851393188854489, "grad_norm": 0.23673207521087006, "learning_rate": 0.0003874209398164727, "loss": 2.5212, "step": 5103 }, { "epoch": 0.5852539846347896, "grad_norm": 0.24963372403303236, "learning_rate": 0.0003872400205495363, "loss": 2.4637, "step": 5104 }, { "epoch": 0.5853686503841302, "grad_norm": 0.25841570086865334, "learning_rate": 0.0003870591168356898, "loss": 2.4079, "step": 5105 }, { "epoch": 0.5854833161334709, "grad_norm": 0.22831346063674995, "learning_rate": 0.0003868782286998853, "loss": 2.3969, "step": 5106 }, { "epoch": 0.5855979818828116, "grad_norm": 0.2539261123956983, "learning_rate": 0.0003866973561670727, "loss": 2.3606, "step": 5107 }, { "epoch": 0.5857126476321523, "grad_norm": 0.2543712059027437, "learning_rate": 0.00038651649926220005, "loss": 2.4078, "step": 5108 }, { "epoch": 0.585827313381493, "grad_norm": 0.2582872878528295, "learning_rate": 0.0003863356580102133, "loss": 2.6518, "step": 5109 }, { "epoch": 0.5859419791308336, "grad_norm": 0.24359303332375276, "learning_rate": 0.00038615483243605584, "loss": 2.4019, "step": 5110 }, { "epoch": 0.5860566448801743, "grad_norm": 0.2389561542487212, "learning_rate": 0.0003859740225646692, "loss": 2.639, "step": 5111 }, { "epoch": 0.586171310629515, "grad_norm": 0.23655965261489872, "learning_rate": 0.0003857932284209924, "loss": 2.593, "step": 5112 }, { "epoch": 0.5862859763788556, "grad_norm": 0.2389807607585272, "learning_rate": 0.00038561245002996297, "loss": 2.5735, "step": 5113 }, { "epoch": 0.5864006421281963, "grad_norm": 0.24123560674734348, "learning_rate": 0.00038543168741651557, "loss": 2.4884, "step": 5114 }, { "epoch": 0.586515307877537, "grad_norm": 0.2570912547735875, "learning_rate": 0.00038525094060558307, "loss": 2.4702, "step": 5115 }, { "epoch": 0.5866299736268776, "grad_norm": 0.21642849264555775, "learning_rate": 0.00038507020962209594, "loss": 2.4302, "step": 5116 }, { "epoch": 0.5867446393762183, "grad_norm": 0.25150177391554046, "learning_rate": 0.00038488949449098247, "loss": 2.5713, "step": 5117 }, { "epoch": 0.586859305125559, "grad_norm": 0.22825456955090323, "learning_rate": 0.0003847087952371693, "loss": 2.4753, "step": 5118 }, { "epoch": 0.5869739708748997, "grad_norm": 0.2324367981284907, "learning_rate": 0.00038452811188558015, "loss": 2.3943, "step": 5119 }, { "epoch": 0.5870886366242404, "grad_norm": 0.23053754454441402, "learning_rate": 0.0003843474444611368, "loss": 2.5536, "step": 5120 }, { "epoch": 0.587203302373581, "grad_norm": 0.24336870442232017, "learning_rate": 0.000384166792988759, "loss": 2.4241, "step": 5121 }, { "epoch": 0.5873179681229217, "grad_norm": 0.26445927468055774, "learning_rate": 0.0003839861574933639, "loss": 2.4166, "step": 5122 }, { "epoch": 0.5874326338722623, "grad_norm": 0.2375854637209677, "learning_rate": 0.0003838055379998671, "loss": 2.4852, "step": 5123 }, { "epoch": 0.587547299621603, "grad_norm": 0.21839824598756138, "learning_rate": 0.0003836249345331816, "loss": 2.3611, "step": 5124 }, { "epoch": 0.5876619653709437, "grad_norm": 0.24070482123619102, "learning_rate": 0.000383444347118218, "loss": 2.3565, "step": 5125 }, { "epoch": 0.5877766311202843, "grad_norm": 0.25694149981460424, "learning_rate": 0.0003832637757798849, "loss": 2.3863, "step": 5126 }, { "epoch": 0.587891296869625, "grad_norm": 0.2548723885866467, "learning_rate": 0.00038308322054308867, "loss": 2.4829, "step": 5127 }, { "epoch": 0.5880059626189658, "grad_norm": 0.25401275538990925, "learning_rate": 0.00038290268143273366, "loss": 2.3436, "step": 5128 }, { "epoch": 0.5881206283683064, "grad_norm": 0.2593290721715852, "learning_rate": 0.00038272215847372156, "loss": 2.3948, "step": 5129 }, { "epoch": 0.5882352941176471, "grad_norm": 0.24015610468909754, "learning_rate": 0.00038254165169095236, "loss": 2.375, "step": 5130 }, { "epoch": 0.5883499598669877, "grad_norm": 0.2784877153979556, "learning_rate": 0.00038236116110932326, "loss": 2.5675, "step": 5131 }, { "epoch": 0.5884646256163284, "grad_norm": 0.26807598371237545, "learning_rate": 0.00038218068675372944, "loss": 2.6188, "step": 5132 }, { "epoch": 0.5885792913656691, "grad_norm": 0.25456400319575573, "learning_rate": 0.00038200022864906416, "loss": 2.4575, "step": 5133 }, { "epoch": 0.5886939571150097, "grad_norm": 0.2633685375489386, "learning_rate": 0.00038181978682021807, "loss": 2.5152, "step": 5134 }, { "epoch": 0.5888086228643504, "grad_norm": 0.2279632026284544, "learning_rate": 0.00038163936129207966, "loss": 2.3716, "step": 5135 }, { "epoch": 0.5889232886136911, "grad_norm": 0.22647178921101202, "learning_rate": 0.00038145895208953524, "loss": 2.3648, "step": 5136 }, { "epoch": 0.5890379543630317, "grad_norm": 0.28524370005834754, "learning_rate": 0.0003812785592374686, "loss": 2.3491, "step": 5137 }, { "epoch": 0.5891526201123725, "grad_norm": 0.23828153082710252, "learning_rate": 0.0003810981827607619, "loss": 2.3706, "step": 5138 }, { "epoch": 0.5892672858617131, "grad_norm": 0.22684081710844117, "learning_rate": 0.0003809178226842943, "loss": 2.3649, "step": 5139 }, { "epoch": 0.5893819516110538, "grad_norm": 0.2480258561568927, "learning_rate": 0.0003807374790329432, "loss": 2.3319, "step": 5140 }, { "epoch": 0.5894966173603945, "grad_norm": 0.2538901759850134, "learning_rate": 0.0003805571518315835, "loss": 2.4634, "step": 5141 }, { "epoch": 0.5896112831097351, "grad_norm": 0.2620400671794665, "learning_rate": 0.00038037684110508774, "loss": 2.5996, "step": 5142 }, { "epoch": 0.5897259488590758, "grad_norm": 0.25339051014132397, "learning_rate": 0.00038019654687832677, "loss": 2.4585, "step": 5143 }, { "epoch": 0.5898406146084164, "grad_norm": 0.24830535467373968, "learning_rate": 0.0003800162691761685, "loss": 2.3516, "step": 5144 }, { "epoch": 0.5899552803577571, "grad_norm": 0.23001181752788724, "learning_rate": 0.00037983600802347873, "loss": 2.3587, "step": 5145 }, { "epoch": 0.5900699461070978, "grad_norm": 0.26866400418278735, "learning_rate": 0.00037965576344512115, "loss": 2.4302, "step": 5146 }, { "epoch": 0.5901846118564384, "grad_norm": 0.253672309672238, "learning_rate": 0.0003794755354659569, "loss": 2.4196, "step": 5147 }, { "epoch": 0.5902992776057792, "grad_norm": 0.2936446606417755, "learning_rate": 0.0003792953241108452, "loss": 2.4583, "step": 5148 }, { "epoch": 0.5904139433551199, "grad_norm": 0.259918351178376, "learning_rate": 0.0003791151294046428, "loss": 2.5675, "step": 5149 }, { "epoch": 0.5905286091044605, "grad_norm": 0.2492783641204892, "learning_rate": 0.000378934951372204, "loss": 2.2789, "step": 5150 }, { "epoch": 0.5906432748538012, "grad_norm": 0.230375421930286, "learning_rate": 0.0003787547900383808, "loss": 2.5946, "step": 5151 }, { "epoch": 0.5907579406031418, "grad_norm": 0.23813157043930716, "learning_rate": 0.0003785746454280231, "loss": 2.5842, "step": 5152 }, { "epoch": 0.5908726063524825, "grad_norm": 0.28499346791973534, "learning_rate": 0.00037839451756597873, "loss": 2.4785, "step": 5153 }, { "epoch": 0.5909872721018232, "grad_norm": 0.23008861987223928, "learning_rate": 0.0003782144064770925, "loss": 2.33, "step": 5154 }, { "epoch": 0.5911019378511638, "grad_norm": 0.24500450507557636, "learning_rate": 0.0003780343121862077, "loss": 2.486, "step": 5155 }, { "epoch": 0.5912166036005045, "grad_norm": 0.22027311168219074, "learning_rate": 0.0003778542347181645, "loss": 2.2973, "step": 5156 }, { "epoch": 0.5913312693498453, "grad_norm": 0.23704132328570804, "learning_rate": 0.00037767417409780116, "loss": 2.3339, "step": 5157 }, { "epoch": 0.5914459350991859, "grad_norm": 0.2656755154419864, "learning_rate": 0.00037749413034995384, "loss": 2.3659, "step": 5158 }, { "epoch": 0.5915606008485266, "grad_norm": 0.23435159851520573, "learning_rate": 0.00037731410349945614, "loss": 2.3299, "step": 5159 }, { "epoch": 0.5916752665978672, "grad_norm": 0.24382278785597827, "learning_rate": 0.0003771340935711392, "loss": 2.4224, "step": 5160 }, { "epoch": 0.5917899323472079, "grad_norm": 0.23004772697730544, "learning_rate": 0.00037695410058983213, "loss": 2.4473, "step": 5161 }, { "epoch": 0.5919045980965486, "grad_norm": 0.2384115129277421, "learning_rate": 0.0003767741245803612, "loss": 2.4481, "step": 5162 }, { "epoch": 0.5920192638458892, "grad_norm": 0.2668427586192323, "learning_rate": 0.0003765941655675511, "loss": 2.4544, "step": 5163 }, { "epoch": 0.5921339295952299, "grad_norm": 0.2948163050877117, "learning_rate": 0.0003764142235762234, "loss": 2.5963, "step": 5164 }, { "epoch": 0.5922485953445705, "grad_norm": 0.251970855727244, "learning_rate": 0.0003762342986311979, "loss": 2.369, "step": 5165 }, { "epoch": 0.5923632610939112, "grad_norm": 0.2540598628161775, "learning_rate": 0.00037605439075729166, "loss": 2.4363, "step": 5166 }, { "epoch": 0.592477926843252, "grad_norm": 0.26671291686672455, "learning_rate": 0.00037587449997931945, "loss": 2.386, "step": 5167 }, { "epoch": 0.5925925925925926, "grad_norm": 0.23963713349476787, "learning_rate": 0.0003756946263220941, "loss": 2.611, "step": 5168 }, { "epoch": 0.5927072583419333, "grad_norm": 0.2567121271517095, "learning_rate": 0.0003755147698104256, "loss": 2.4759, "step": 5169 }, { "epoch": 0.592821924091274, "grad_norm": 0.25353372198431134, "learning_rate": 0.0003753349304691216, "loss": 2.3632, "step": 5170 }, { "epoch": 0.5929365898406146, "grad_norm": 0.2600916161516471, "learning_rate": 0.00037515510832298774, "loss": 2.5963, "step": 5171 }, { "epoch": 0.5930512555899553, "grad_norm": 0.25872364979107476, "learning_rate": 0.0003749753033968267, "loss": 2.4614, "step": 5172 }, { "epoch": 0.5931659213392959, "grad_norm": 0.2529198813004184, "learning_rate": 0.00037479551571543946, "loss": 2.3794, "step": 5173 }, { "epoch": 0.5932805870886366, "grad_norm": 0.2572923042974584, "learning_rate": 0.0003746157453036243, "loss": 2.2531, "step": 5174 }, { "epoch": 0.5933952528379773, "grad_norm": 0.22537207848971327, "learning_rate": 0.0003744359921861771, "loss": 2.4603, "step": 5175 }, { "epoch": 0.5935099185873179, "grad_norm": 0.2646339168110124, "learning_rate": 0.0003742562563878911, "loss": 2.3739, "step": 5176 }, { "epoch": 0.5936245843366587, "grad_norm": 0.249340296652455, "learning_rate": 0.00037407653793355766, "loss": 2.5506, "step": 5177 }, { "epoch": 0.5937392500859993, "grad_norm": 0.24853961395488436, "learning_rate": 0.0003738968368479656, "loss": 2.5452, "step": 5178 }, { "epoch": 0.59385391583534, "grad_norm": 0.23898825980332986, "learning_rate": 0.00037371715315590113, "loss": 2.3822, "step": 5179 }, { "epoch": 0.5939685815846807, "grad_norm": 0.23979273772098084, "learning_rate": 0.00037353748688214827, "loss": 2.3917, "step": 5180 }, { "epoch": 0.5940832473340213, "grad_norm": 0.23304283742061266, "learning_rate": 0.0003733578380514885, "loss": 2.4465, "step": 5181 }, { "epoch": 0.594197913083362, "grad_norm": 0.244749265371565, "learning_rate": 0.0003731782066887007, "loss": 2.3779, "step": 5182 }, { "epoch": 0.5943125788327027, "grad_norm": 0.24056692933959664, "learning_rate": 0.000372998592818562, "loss": 2.4192, "step": 5183 }, { "epoch": 0.5944272445820433, "grad_norm": 0.22937043339566887, "learning_rate": 0.00037281899646584665, "loss": 2.3419, "step": 5184 }, { "epoch": 0.594541910331384, "grad_norm": 0.2612073378271702, "learning_rate": 0.00037263941765532636, "loss": 2.3747, "step": 5185 }, { "epoch": 0.5946565760807246, "grad_norm": 0.2317889381707554, "learning_rate": 0.0003724598564117708, "loss": 2.3791, "step": 5186 }, { "epoch": 0.5947712418300654, "grad_norm": 0.23536942526013294, "learning_rate": 0.0003722803127599467, "loss": 2.4637, "step": 5187 }, { "epoch": 0.5948859075794061, "grad_norm": 0.23659790097348804, "learning_rate": 0.00037210078672461904, "loss": 2.3089, "step": 5188 }, { "epoch": 0.5950005733287467, "grad_norm": 0.2656512664794035, "learning_rate": 0.00037192127833054984, "loss": 2.4447, "step": 5189 }, { "epoch": 0.5951152390780874, "grad_norm": 0.24772561533391785, "learning_rate": 0.000371741787602499, "loss": 2.4652, "step": 5190 }, { "epoch": 0.5952299048274281, "grad_norm": 0.2704458006417386, "learning_rate": 0.00037156231456522356, "loss": 2.5326, "step": 5191 }, { "epoch": 0.5953445705767687, "grad_norm": 0.23577741711198177, "learning_rate": 0.00037138285924347847, "loss": 2.4445, "step": 5192 }, { "epoch": 0.5954592363261094, "grad_norm": 0.25107323311785995, "learning_rate": 0.00037120342166201657, "loss": 2.3266, "step": 5193 }, { "epoch": 0.59557390207545, "grad_norm": 0.2639120249856345, "learning_rate": 0.00037102400184558756, "loss": 2.5847, "step": 5194 }, { "epoch": 0.5956885678247907, "grad_norm": 0.23016154688523172, "learning_rate": 0.0003708445998189388, "loss": 2.2762, "step": 5195 }, { "epoch": 0.5958032335741315, "grad_norm": 0.21968766876680054, "learning_rate": 0.00037066521560681577, "loss": 2.4486, "step": 5196 }, { "epoch": 0.5959178993234721, "grad_norm": 0.2474525986223619, "learning_rate": 0.00037048584923396065, "loss": 2.4081, "step": 5197 }, { "epoch": 0.5960325650728128, "grad_norm": 0.26720726176898263, "learning_rate": 0.000370306500725114, "loss": 2.4163, "step": 5198 }, { "epoch": 0.5961472308221534, "grad_norm": 0.25818794110757337, "learning_rate": 0.00037012717010501345, "loss": 2.5431, "step": 5199 }, { "epoch": 0.5962618965714941, "grad_norm": 0.262054617895628, "learning_rate": 0.0003699478573983942, "loss": 2.3955, "step": 5200 }, { "epoch": 0.5963765623208348, "grad_norm": 0.253990997349075, "learning_rate": 0.0003697685626299889, "loss": 2.371, "step": 5201 }, { "epoch": 0.5964912280701754, "grad_norm": 0.2414532342476216, "learning_rate": 0.00036958928582452787, "loss": 2.5223, "step": 5202 }, { "epoch": 0.5966058938195161, "grad_norm": 0.2664248820803913, "learning_rate": 0.0003694100270067392, "loss": 2.5165, "step": 5203 }, { "epoch": 0.5967205595688568, "grad_norm": 0.26658400291592627, "learning_rate": 0.00036923078620134785, "loss": 2.4906, "step": 5204 }, { "epoch": 0.5968352253181974, "grad_norm": 0.23903412500930632, "learning_rate": 0.0003690515634330771, "loss": 2.3778, "step": 5205 }, { "epoch": 0.5969498910675382, "grad_norm": 0.23694566185799668, "learning_rate": 0.000368872358726647, "loss": 2.5241, "step": 5206 }, { "epoch": 0.5970645568168788, "grad_norm": 0.27354646591190823, "learning_rate": 0.00036869317210677524, "loss": 2.4304, "step": 5207 }, { "epoch": 0.5971792225662195, "grad_norm": 0.277008459080636, "learning_rate": 0.0003685140035981776, "loss": 2.3558, "step": 5208 }, { "epoch": 0.5972938883155602, "grad_norm": 0.24723880611501, "learning_rate": 0.0003683348532255669, "loss": 2.4497, "step": 5209 }, { "epoch": 0.5974085540649008, "grad_norm": 0.24899880573157604, "learning_rate": 0.00036815572101365335, "loss": 2.4871, "step": 5210 }, { "epoch": 0.5975232198142415, "grad_norm": 0.2524386294370823, "learning_rate": 0.000367976606987145, "loss": 2.3075, "step": 5211 }, { "epoch": 0.5976378855635821, "grad_norm": 0.23611411339056015, "learning_rate": 0.00036779751117074695, "loss": 2.4105, "step": 5212 }, { "epoch": 0.5977525513129228, "grad_norm": 0.24400823344718622, "learning_rate": 0.00036761843358916235, "loss": 2.5683, "step": 5213 }, { "epoch": 0.5978672170622635, "grad_norm": 0.24481773572436466, "learning_rate": 0.0003674393742670914, "loss": 2.49, "step": 5214 }, { "epoch": 0.5979818828116041, "grad_norm": 0.2655993211569076, "learning_rate": 0.000367260333229232, "loss": 2.3868, "step": 5215 }, { "epoch": 0.5980965485609449, "grad_norm": 0.25755946919280404, "learning_rate": 0.00036708131050027925, "loss": 2.522, "step": 5216 }, { "epoch": 0.5982112143102856, "grad_norm": 0.25282300496118976, "learning_rate": 0.000366902306104926, "loss": 2.4832, "step": 5217 }, { "epoch": 0.5983258800596262, "grad_norm": 0.26455383199806126, "learning_rate": 0.00036672332006786274, "loss": 2.3676, "step": 5218 }, { "epoch": 0.5984405458089669, "grad_norm": 0.24852381927720799, "learning_rate": 0.00036654435241377704, "loss": 2.4214, "step": 5219 }, { "epoch": 0.5985552115583075, "grad_norm": 0.25550939683738993, "learning_rate": 0.00036636540316735384, "loss": 2.3789, "step": 5220 }, { "epoch": 0.5986698773076482, "grad_norm": 0.23369424677905018, "learning_rate": 0.0003661864723532761, "loss": 2.4665, "step": 5221 }, { "epoch": 0.5987845430569889, "grad_norm": 0.2511536869835362, "learning_rate": 0.0003660075599962236, "loss": 2.2889, "step": 5222 }, { "epoch": 0.5988992088063295, "grad_norm": 0.2546526074279082, "learning_rate": 0.0003658286661208741, "loss": 2.5958, "step": 5223 }, { "epoch": 0.5990138745556702, "grad_norm": 0.2545174935018608, "learning_rate": 0.00036564979075190277, "loss": 2.4371, "step": 5224 }, { "epoch": 0.599128540305011, "grad_norm": 0.24902735732294498, "learning_rate": 0.0003654709339139818, "loss": 2.5822, "step": 5225 }, { "epoch": 0.5992432060543516, "grad_norm": 0.2560034123376561, "learning_rate": 0.000365292095631781, "loss": 2.5191, "step": 5226 }, { "epoch": 0.5993578718036923, "grad_norm": 0.2701721320301501, "learning_rate": 0.0003651132759299678, "loss": 2.4605, "step": 5227 }, { "epoch": 0.5994725375530329, "grad_norm": 0.23574831544721947, "learning_rate": 0.0003649344748332072, "loss": 2.4539, "step": 5228 }, { "epoch": 0.5995872033023736, "grad_norm": 0.24842497262509552, "learning_rate": 0.0003647556923661611, "loss": 2.4594, "step": 5229 }, { "epoch": 0.5997018690517143, "grad_norm": 0.2471358150526401, "learning_rate": 0.0003645769285534894, "loss": 2.2773, "step": 5230 }, { "epoch": 0.5998165348010549, "grad_norm": 0.25336000889568916, "learning_rate": 0.00036439818341984896, "loss": 2.4491, "step": 5231 }, { "epoch": 0.5999312005503956, "grad_norm": 0.24038133706702142, "learning_rate": 0.0003642194569898941, "loss": 2.3441, "step": 5232 }, { "epoch": 0.6000458662997362, "grad_norm": 0.23827485018902722, "learning_rate": 0.0003640407492882771, "loss": 2.3761, "step": 5233 }, { "epoch": 0.6001605320490769, "grad_norm": 0.25009552268642, "learning_rate": 0.0003638620603396472, "loss": 2.5022, "step": 5234 }, { "epoch": 0.6002751977984176, "grad_norm": 0.26892645124857023, "learning_rate": 0.00036368339016865087, "loss": 2.5234, "step": 5235 }, { "epoch": 0.6003898635477583, "grad_norm": 0.24086734630253276, "learning_rate": 0.00036350473879993264, "loss": 2.5373, "step": 5236 }, { "epoch": 0.600504529297099, "grad_norm": 0.26438194766427175, "learning_rate": 0.00036332610625813356, "loss": 2.3789, "step": 5237 }, { "epoch": 0.6006191950464397, "grad_norm": 0.2503044947664895, "learning_rate": 0.00036314749256789313, "loss": 2.3734, "step": 5238 }, { "epoch": 0.6007338607957803, "grad_norm": 0.25485476236541055, "learning_rate": 0.00036296889775384736, "loss": 2.3764, "step": 5239 }, { "epoch": 0.600848526545121, "grad_norm": 0.23020168719237788, "learning_rate": 0.0003627903218406302, "loss": 2.4581, "step": 5240 }, { "epoch": 0.6009631922944616, "grad_norm": 0.2390960560803545, "learning_rate": 0.00036261176485287254, "loss": 2.4428, "step": 5241 }, { "epoch": 0.6010778580438023, "grad_norm": 0.2443200328671786, "learning_rate": 0.00036243322681520296, "loss": 2.4613, "step": 5242 }, { "epoch": 0.601192523793143, "grad_norm": 0.21772933236602846, "learning_rate": 0.00036225470775224776, "loss": 2.3255, "step": 5243 }, { "epoch": 0.6013071895424836, "grad_norm": 0.2410248479107569, "learning_rate": 0.00036207620768862993, "loss": 2.5717, "step": 5244 }, { "epoch": 0.6014218552918243, "grad_norm": 0.2701897960180836, "learning_rate": 0.00036189772664897, "loss": 2.521, "step": 5245 }, { "epoch": 0.601536521041165, "grad_norm": 0.2696818783052496, "learning_rate": 0.0003617192646578864, "loss": 2.5238, "step": 5246 }, { "epoch": 0.6016511867905057, "grad_norm": 0.25830125228983225, "learning_rate": 0.0003615408217399941, "loss": 2.5162, "step": 5247 }, { "epoch": 0.6017658525398464, "grad_norm": 0.24510156602320524, "learning_rate": 0.0003613623979199064, "loss": 2.4919, "step": 5248 }, { "epoch": 0.601880518289187, "grad_norm": 0.27127744347076677, "learning_rate": 0.00036118399322223325, "loss": 2.3951, "step": 5249 }, { "epoch": 0.6019951840385277, "grad_norm": 0.24790545510913592, "learning_rate": 0.0003610056076715821, "loss": 2.3373, "step": 5250 }, { "epoch": 0.6021098497878684, "grad_norm": 0.2395050306206691, "learning_rate": 0.0003608272412925579, "loss": 2.4889, "step": 5251 }, { "epoch": 0.602224515537209, "grad_norm": 0.24885710111219678, "learning_rate": 0.00036064889410976277, "loss": 2.4734, "step": 5252 }, { "epoch": 0.6023391812865497, "grad_norm": 0.22501903944497442, "learning_rate": 0.0003604705661477966, "loss": 2.4403, "step": 5253 }, { "epoch": 0.6024538470358903, "grad_norm": 0.22437609639941897, "learning_rate": 0.00036029225743125607, "loss": 2.3677, "step": 5254 }, { "epoch": 0.602568512785231, "grad_norm": 0.2378894472366075, "learning_rate": 0.0003601139679847356, "loss": 2.3169, "step": 5255 }, { "epoch": 0.6026831785345718, "grad_norm": 0.25234647245825503, "learning_rate": 0.00035993569783282687, "loss": 2.5075, "step": 5256 }, { "epoch": 0.6027978442839124, "grad_norm": 0.23323413610208354, "learning_rate": 0.0003597574470001185, "loss": 2.4357, "step": 5257 }, { "epoch": 0.6029125100332531, "grad_norm": 0.24073819978385405, "learning_rate": 0.0003595792155111971, "loss": 2.4144, "step": 5258 }, { "epoch": 0.6030271757825938, "grad_norm": 0.2651961054183406, "learning_rate": 0.0003594010033906464, "loss": 2.5571, "step": 5259 }, { "epoch": 0.6031418415319344, "grad_norm": 0.23949874324184325, "learning_rate": 0.0003592228106630472, "loss": 2.4422, "step": 5260 }, { "epoch": 0.6032565072812751, "grad_norm": 0.26359831191054023, "learning_rate": 0.00035904463735297784, "loss": 2.3899, "step": 5261 }, { "epoch": 0.6033711730306157, "grad_norm": 0.2086565184507233, "learning_rate": 0.00035886648348501364, "loss": 2.4416, "step": 5262 }, { "epoch": 0.6034858387799564, "grad_norm": 0.23061913271872425, "learning_rate": 0.0003586883490837281, "loss": 2.3939, "step": 5263 }, { "epoch": 0.6036005045292971, "grad_norm": 0.223308440355955, "learning_rate": 0.000358510234173691, "loss": 2.5251, "step": 5264 }, { "epoch": 0.6037151702786377, "grad_norm": 0.25036887415783954, "learning_rate": 0.00035833213877947017, "loss": 2.4599, "step": 5265 }, { "epoch": 0.6038298360279785, "grad_norm": 0.2628068287257144, "learning_rate": 0.0003581540629256303, "loss": 2.6148, "step": 5266 }, { "epoch": 0.6039445017773191, "grad_norm": 0.2586578251399628, "learning_rate": 0.00035797600663673346, "loss": 2.5151, "step": 5267 }, { "epoch": 0.6040591675266598, "grad_norm": 0.2397139805977986, "learning_rate": 0.00035779796993733946, "loss": 2.4285, "step": 5268 }, { "epoch": 0.6041738332760005, "grad_norm": 0.2826388600170993, "learning_rate": 0.000357619952852005, "loss": 2.5821, "step": 5269 }, { "epoch": 0.6042884990253411, "grad_norm": 0.22960536477199134, "learning_rate": 0.0003574419554052839, "loss": 2.4805, "step": 5270 }, { "epoch": 0.6044031647746818, "grad_norm": 0.25788793428903745, "learning_rate": 0.0003572639776217277, "loss": 2.5264, "step": 5271 }, { "epoch": 0.6045178305240225, "grad_norm": 0.2517578499262476, "learning_rate": 0.0003570860195258848, "loss": 2.4289, "step": 5272 }, { "epoch": 0.6046324962733631, "grad_norm": 0.22030637604971712, "learning_rate": 0.00035690808114230145, "loss": 2.2829, "step": 5273 }, { "epoch": 0.6047471620227038, "grad_norm": 0.2703497515528306, "learning_rate": 0.0003567301624955208, "loss": 2.3831, "step": 5274 }, { "epoch": 0.6048618277720444, "grad_norm": 0.25363086414629726, "learning_rate": 0.00035655226361008325, "loss": 2.567, "step": 5275 }, { "epoch": 0.6049764935213852, "grad_norm": 0.24746203728924887, "learning_rate": 0.0003563743845105265, "loss": 2.408, "step": 5276 }, { "epoch": 0.6050911592707259, "grad_norm": 0.24375154873142385, "learning_rate": 0.0003561965252213855, "loss": 2.3578, "step": 5277 }, { "epoch": 0.6052058250200665, "grad_norm": 0.23743623433277636, "learning_rate": 0.00035601868576719297, "loss": 2.4494, "step": 5278 }, { "epoch": 0.6053204907694072, "grad_norm": 0.2566299983399756, "learning_rate": 0.0003558408661724781, "loss": 2.4442, "step": 5279 }, { "epoch": 0.6054351565187478, "grad_norm": 0.25666514634085624, "learning_rate": 0.0003556630664617679, "loss": 2.384, "step": 5280 }, { "epoch": 0.6055498222680885, "grad_norm": 0.2354687967004414, "learning_rate": 0.00035548528665958637, "loss": 2.4164, "step": 5281 }, { "epoch": 0.6056644880174292, "grad_norm": 0.26590811114037705, "learning_rate": 0.00035530752679045465, "loss": 2.4596, "step": 5282 }, { "epoch": 0.6057791537667698, "grad_norm": 0.27734069168319, "learning_rate": 0.00035512978687889156, "loss": 2.485, "step": 5283 }, { "epoch": 0.6058938195161105, "grad_norm": 0.2697654086713395, "learning_rate": 0.00035495206694941304, "loss": 2.4415, "step": 5284 }, { "epoch": 0.6060084852654513, "grad_norm": 0.23765136557901423, "learning_rate": 0.0003547743670265319, "loss": 2.4124, "step": 5285 }, { "epoch": 0.6061231510147919, "grad_norm": 0.2840736867979609, "learning_rate": 0.00035459668713475875, "loss": 2.3979, "step": 5286 }, { "epoch": 0.6062378167641326, "grad_norm": 0.25820808907061493, "learning_rate": 0.0003544190272986007, "loss": 2.3158, "step": 5287 }, { "epoch": 0.6063524825134732, "grad_norm": 0.25055694869910355, "learning_rate": 0.0003542413875425631, "loss": 2.4006, "step": 5288 }, { "epoch": 0.6064671482628139, "grad_norm": 0.2759542549345469, "learning_rate": 0.0003540637678911475, "loss": 2.4224, "step": 5289 }, { "epoch": 0.6065818140121546, "grad_norm": 0.24797148858259108, "learning_rate": 0.00035388616836885334, "loss": 2.5718, "step": 5290 }, { "epoch": 0.6066964797614952, "grad_norm": 0.26304279965926813, "learning_rate": 0.00035370858900017713, "loss": 2.5309, "step": 5291 }, { "epoch": 0.6068111455108359, "grad_norm": 0.25200293855260864, "learning_rate": 0.00035353102980961227, "loss": 2.3644, "step": 5292 }, { "epoch": 0.6069258112601766, "grad_norm": 0.24138956654637037, "learning_rate": 0.00035335349082165013, "loss": 2.4127, "step": 5293 }, { "epoch": 0.6070404770095172, "grad_norm": 0.22749679120027289, "learning_rate": 0.00035317597206077866, "loss": 2.4741, "step": 5294 }, { "epoch": 0.607155142758858, "grad_norm": 0.2519236486163959, "learning_rate": 0.00035299847355148295, "loss": 2.2825, "step": 5295 }, { "epoch": 0.6072698085081986, "grad_norm": 0.2387713141570366, "learning_rate": 0.00035282099531824585, "loss": 2.3967, "step": 5296 }, { "epoch": 0.6073844742575393, "grad_norm": 0.24280946925227198, "learning_rate": 0.0003526435373855468, "loss": 2.4135, "step": 5297 }, { "epoch": 0.60749914000688, "grad_norm": 0.23181557819701756, "learning_rate": 0.0003524660997778629, "loss": 2.4425, "step": 5298 }, { "epoch": 0.6076138057562206, "grad_norm": 0.23932708120345042, "learning_rate": 0.0003522886825196684, "loss": 2.636, "step": 5299 }, { "epoch": 0.6077284715055613, "grad_norm": 0.24252249074982363, "learning_rate": 0.0003521112856354346, "loss": 2.4565, "step": 5300 }, { "epoch": 0.6078431372549019, "grad_norm": 0.22905014202754032, "learning_rate": 0.00035193390914962973, "loss": 2.3795, "step": 5301 }, { "epoch": 0.6079578030042426, "grad_norm": 0.22919836267462645, "learning_rate": 0.0003517565530867196, "loss": 2.414, "step": 5302 }, { "epoch": 0.6080724687535833, "grad_norm": 0.23040647159033725, "learning_rate": 0.0003515792174711675, "loss": 2.4626, "step": 5303 }, { "epoch": 0.6081871345029239, "grad_norm": 0.2765265900062124, "learning_rate": 0.00035140190232743303, "loss": 2.4132, "step": 5304 }, { "epoch": 0.6083018002522647, "grad_norm": 0.25576890938940805, "learning_rate": 0.00035122460767997364, "loss": 2.4717, "step": 5305 }, { "epoch": 0.6084164660016054, "grad_norm": 0.23646117768922026, "learning_rate": 0.0003510473335532438, "loss": 2.4564, "step": 5306 }, { "epoch": 0.608531131750946, "grad_norm": 0.2524754209515238, "learning_rate": 0.0003508700799716947, "loss": 2.5803, "step": 5307 }, { "epoch": 0.6086457975002867, "grad_norm": 0.24059152236415793, "learning_rate": 0.0003506928469597756, "loss": 2.5121, "step": 5308 }, { "epoch": 0.6087604632496273, "grad_norm": 0.27906973337441204, "learning_rate": 0.00035051563454193226, "loss": 2.445, "step": 5309 }, { "epoch": 0.608875128998968, "grad_norm": 0.2494819472270163, "learning_rate": 0.0003503384427426076, "loss": 2.4065, "step": 5310 }, { "epoch": 0.6089897947483087, "grad_norm": 0.24190555133948896, "learning_rate": 0.00035016127158624203, "loss": 2.4636, "step": 5311 }, { "epoch": 0.6091044604976493, "grad_norm": 0.2431297784236629, "learning_rate": 0.00034998412109727274, "loss": 2.4355, "step": 5312 }, { "epoch": 0.60921912624699, "grad_norm": 0.22485520845922946, "learning_rate": 0.0003498069913001345, "loss": 2.4302, "step": 5313 }, { "epoch": 0.6093337919963306, "grad_norm": 0.25101323798351055, "learning_rate": 0.0003496298822192588, "loss": 2.4538, "step": 5314 }, { "epoch": 0.6094484577456714, "grad_norm": 0.26425404006436537, "learning_rate": 0.00034945279387907466, "loss": 2.3939, "step": 5315 }, { "epoch": 0.6095631234950121, "grad_norm": 0.2314290867886926, "learning_rate": 0.00034927572630400773, "loss": 2.4765, "step": 5316 }, { "epoch": 0.6096777892443527, "grad_norm": 0.2500960088918181, "learning_rate": 0.0003490986795184813, "loss": 2.4621, "step": 5317 }, { "epoch": 0.6097924549936934, "grad_norm": 0.22868225792125754, "learning_rate": 0.0003489216535469157, "loss": 2.4038, "step": 5318 }, { "epoch": 0.6099071207430341, "grad_norm": 0.24570568204533658, "learning_rate": 0.0003487446484137282, "loss": 2.4946, "step": 5319 }, { "epoch": 0.6100217864923747, "grad_norm": 0.24154158676179366, "learning_rate": 0.0003485676641433332, "loss": 2.5139, "step": 5320 }, { "epoch": 0.6101364522417154, "grad_norm": 0.22998705878680656, "learning_rate": 0.0003483907007601425, "loss": 2.4438, "step": 5321 }, { "epoch": 0.610251117991056, "grad_norm": 0.24924259417746333, "learning_rate": 0.00034821375828856437, "loss": 2.3711, "step": 5322 }, { "epoch": 0.6103657837403967, "grad_norm": 0.25715444458758147, "learning_rate": 0.00034803683675300513, "loss": 2.5113, "step": 5323 }, { "epoch": 0.6104804494897375, "grad_norm": 0.23253186839330517, "learning_rate": 0.0003478599361778677, "loss": 2.4714, "step": 5324 }, { "epoch": 0.6105951152390781, "grad_norm": 0.24962603018695653, "learning_rate": 0.000347683056587552, "loss": 2.4384, "step": 5325 }, { "epoch": 0.6107097809884188, "grad_norm": 0.2651151403688997, "learning_rate": 0.0003475061980064551, "loss": 2.3589, "step": 5326 }, { "epoch": 0.6108244467377595, "grad_norm": 0.2497386988672167, "learning_rate": 0.00034732936045897133, "loss": 2.3886, "step": 5327 }, { "epoch": 0.6109391124871001, "grad_norm": 0.22820889102795114, "learning_rate": 0.0003471525439694924, "loss": 2.4139, "step": 5328 }, { "epoch": 0.6110537782364408, "grad_norm": 0.2476447657138765, "learning_rate": 0.0003469757485624062, "loss": 2.6063, "step": 5329 }, { "epoch": 0.6111684439857814, "grad_norm": 0.2573423269290262, "learning_rate": 0.00034679897426209883, "loss": 2.458, "step": 5330 }, { "epoch": 0.6112831097351221, "grad_norm": 0.29318343669114416, "learning_rate": 0.0003466222210929526, "loss": 2.3243, "step": 5331 }, { "epoch": 0.6113977754844628, "grad_norm": 0.28252808873586965, "learning_rate": 0.00034644548907934714, "loss": 2.3889, "step": 5332 }, { "epoch": 0.6115124412338034, "grad_norm": 0.2648378042282343, "learning_rate": 0.0003462687782456596, "loss": 2.4225, "step": 5333 }, { "epoch": 0.6116271069831442, "grad_norm": 0.24412091446145343, "learning_rate": 0.00034609208861626373, "loss": 2.5899, "step": 5334 }, { "epoch": 0.6117417727324848, "grad_norm": 0.24491556753776308, "learning_rate": 0.0003459154202155305, "loss": 2.3439, "step": 5335 }, { "epoch": 0.6118564384818255, "grad_norm": 0.26291881972271297, "learning_rate": 0.0003457387730678279, "loss": 2.3998, "step": 5336 }, { "epoch": 0.6119711042311662, "grad_norm": 0.24365504385917092, "learning_rate": 0.000345562147197521, "loss": 2.4626, "step": 5337 }, { "epoch": 0.6120857699805068, "grad_norm": 0.23634335306218962, "learning_rate": 0.00034538554262897217, "loss": 2.4631, "step": 5338 }, { "epoch": 0.6122004357298475, "grad_norm": 0.24538168385464576, "learning_rate": 0.0003452089593865404, "loss": 2.341, "step": 5339 }, { "epoch": 0.6123151014791882, "grad_norm": 0.230633745840467, "learning_rate": 0.0003450323974945822, "loss": 2.2428, "step": 5340 }, { "epoch": 0.6124297672285288, "grad_norm": 0.262455514925542, "learning_rate": 0.0003448558569774507, "loss": 2.4891, "step": 5341 }, { "epoch": 0.6125444329778695, "grad_norm": 0.25328951010068307, "learning_rate": 0.0003446793378594963, "loss": 2.4043, "step": 5342 }, { "epoch": 0.6126590987272101, "grad_norm": 0.26598490696861865, "learning_rate": 0.0003445028401650668, "loss": 2.5008, "step": 5343 }, { "epoch": 0.6127737644765509, "grad_norm": 0.23458522331521278, "learning_rate": 0.00034432636391850655, "loss": 2.5524, "step": 5344 }, { "epoch": 0.6128884302258916, "grad_norm": 0.2478789298453673, "learning_rate": 0.0003441499091441568, "loss": 2.3756, "step": 5345 }, { "epoch": 0.6130030959752322, "grad_norm": 0.27509912124723396, "learning_rate": 0.00034397347586635643, "loss": 2.3928, "step": 5346 }, { "epoch": 0.6131177617245729, "grad_norm": 0.279331823798233, "learning_rate": 0.00034379706410944064, "loss": 2.2535, "step": 5347 }, { "epoch": 0.6132324274739135, "grad_norm": 0.257585247726897, "learning_rate": 0.00034362067389774256, "loss": 2.5285, "step": 5348 }, { "epoch": 0.6133470932232542, "grad_norm": 0.23506507696650417, "learning_rate": 0.0003434443052555917, "loss": 2.3818, "step": 5349 }, { "epoch": 0.6134617589725949, "grad_norm": 0.24023361477867655, "learning_rate": 0.00034326795820731473, "loss": 2.3018, "step": 5350 }, { "epoch": 0.6135764247219355, "grad_norm": 0.23772648096040327, "learning_rate": 0.0003430916327772352, "loss": 2.4359, "step": 5351 }, { "epoch": 0.6136910904712762, "grad_norm": 0.2532255201833486, "learning_rate": 0.00034291532898967394, "loss": 2.4785, "step": 5352 }, { "epoch": 0.613805756220617, "grad_norm": 0.2434741852604548, "learning_rate": 0.0003427390468689489, "loss": 2.46, "step": 5353 }, { "epoch": 0.6139204219699576, "grad_norm": 0.2515945637004177, "learning_rate": 0.00034256278643937454, "loss": 2.3422, "step": 5354 }, { "epoch": 0.6140350877192983, "grad_norm": 0.26528098987973076, "learning_rate": 0.000342386547725263, "loss": 2.5322, "step": 5355 }, { "epoch": 0.6141497534686389, "grad_norm": 0.2609598632082265, "learning_rate": 0.00034221033075092265, "loss": 2.4962, "step": 5356 }, { "epoch": 0.6142644192179796, "grad_norm": 0.25369520182966604, "learning_rate": 0.00034203413554065933, "loss": 2.5861, "step": 5357 }, { "epoch": 0.6143790849673203, "grad_norm": 0.23665506797778044, "learning_rate": 0.000341857962118776, "loss": 2.3958, "step": 5358 }, { "epoch": 0.6144937507166609, "grad_norm": 0.2441334740332325, "learning_rate": 0.00034168181050957247, "loss": 2.3437, "step": 5359 }, { "epoch": 0.6146084164660016, "grad_norm": 0.22214658520071795, "learning_rate": 0.0003415056807373452, "loss": 2.2619, "step": 5360 }, { "epoch": 0.6147230822153423, "grad_norm": 0.23875081726655506, "learning_rate": 0.00034132957282638824, "loss": 2.3703, "step": 5361 }, { "epoch": 0.6148377479646829, "grad_norm": 0.26443556683443215, "learning_rate": 0.0003411534868009919, "loss": 2.4505, "step": 5362 }, { "epoch": 0.6149524137140236, "grad_norm": 0.2687047986855505, "learning_rate": 0.0003409774226854444, "loss": 2.3973, "step": 5363 }, { "epoch": 0.6150670794633643, "grad_norm": 0.24301725854488332, "learning_rate": 0.0003408013805040301, "loss": 2.2918, "step": 5364 }, { "epoch": 0.615181745212705, "grad_norm": 0.24735351836206526, "learning_rate": 0.00034062536028103087, "loss": 2.4766, "step": 5365 }, { "epoch": 0.6152964109620457, "grad_norm": 0.246171574088853, "learning_rate": 0.0003404493620407252, "loss": 2.3657, "step": 5366 }, { "epoch": 0.6154110767113863, "grad_norm": 0.2718113017641868, "learning_rate": 0.0003402733858073885, "loss": 2.5335, "step": 5367 }, { "epoch": 0.615525742460727, "grad_norm": 0.2372415201337167, "learning_rate": 0.0003400974316052938, "loss": 2.4527, "step": 5368 }, { "epoch": 0.6156404082100676, "grad_norm": 0.2417647084620309, "learning_rate": 0.0003399214994587104, "loss": 2.3643, "step": 5369 }, { "epoch": 0.6157550739594083, "grad_norm": 0.262513289649278, "learning_rate": 0.0003397455893919047, "loss": 2.4304, "step": 5370 }, { "epoch": 0.615869739708749, "grad_norm": 0.24577929035906254, "learning_rate": 0.00033956970142914026, "loss": 2.3848, "step": 5371 }, { "epoch": 0.6159844054580896, "grad_norm": 0.259299308815684, "learning_rate": 0.0003393938355946773, "loss": 2.4103, "step": 5372 }, { "epoch": 0.6160990712074303, "grad_norm": 0.25697462288424944, "learning_rate": 0.0003392179919127734, "loss": 2.4373, "step": 5373 }, { "epoch": 0.6162137369567711, "grad_norm": 0.26246140448745964, "learning_rate": 0.0003390421704076827, "loss": 2.5576, "step": 5374 }, { "epoch": 0.6163284027061117, "grad_norm": 0.2397669094543542, "learning_rate": 0.0003388663711036566, "loss": 2.5079, "step": 5375 }, { "epoch": 0.6164430684554524, "grad_norm": 0.30014075123792505, "learning_rate": 0.00033869059402494295, "loss": 2.4433, "step": 5376 }, { "epoch": 0.616557734204793, "grad_norm": 0.2333488140283522, "learning_rate": 0.0003385148391957869, "loss": 2.3793, "step": 5377 }, { "epoch": 0.6166723999541337, "grad_norm": 0.24448568344986832, "learning_rate": 0.0003383391066404308, "loss": 2.4711, "step": 5378 }, { "epoch": 0.6167870657034744, "grad_norm": 0.25274471680646865, "learning_rate": 0.00033816339638311334, "loss": 2.5064, "step": 5379 }, { "epoch": 0.616901731452815, "grad_norm": 0.22863551919023348, "learning_rate": 0.0003379877084480706, "loss": 2.3477, "step": 5380 }, { "epoch": 0.6170163972021557, "grad_norm": 0.23754900820380526, "learning_rate": 0.00033781204285953515, "loss": 2.488, "step": 5381 }, { "epoch": 0.6171310629514963, "grad_norm": 0.2472998426717085, "learning_rate": 0.00033763639964173663, "loss": 2.4095, "step": 5382 }, { "epoch": 0.617245728700837, "grad_norm": 0.24491155008254736, "learning_rate": 0.000337460778818902, "loss": 2.3917, "step": 5383 }, { "epoch": 0.6173603944501778, "grad_norm": 0.25190449317129016, "learning_rate": 0.00033728518041525467, "loss": 2.4115, "step": 5384 }, { "epoch": 0.6174750601995184, "grad_norm": 0.23093202841879926, "learning_rate": 0.00033710960445501505, "loss": 2.3739, "step": 5385 }, { "epoch": 0.6175897259488591, "grad_norm": 0.24569168772200303, "learning_rate": 0.0003369340509624006, "loss": 2.4419, "step": 5386 }, { "epoch": 0.6177043916981998, "grad_norm": 0.2562113725187753, "learning_rate": 0.0003367585199616253, "loss": 2.3976, "step": 5387 }, { "epoch": 0.6178190574475404, "grad_norm": 0.22528454890243937, "learning_rate": 0.0003365830114769007, "loss": 2.3696, "step": 5388 }, { "epoch": 0.6179337231968811, "grad_norm": 0.22612204732915472, "learning_rate": 0.0003364075255324345, "loss": 2.4453, "step": 5389 }, { "epoch": 0.6180483889462217, "grad_norm": 0.23281230148204482, "learning_rate": 0.00033623206215243197, "loss": 2.4307, "step": 5390 }, { "epoch": 0.6181630546955624, "grad_norm": 0.23161343870385326, "learning_rate": 0.0003360566213610947, "loss": 2.3729, "step": 5391 }, { "epoch": 0.6182777204449031, "grad_norm": 0.2324230132405558, "learning_rate": 0.0003358812031826213, "loss": 2.424, "step": 5392 }, { "epoch": 0.6183923861942437, "grad_norm": 0.2572644201045394, "learning_rate": 0.0003357058076412077, "loss": 2.4777, "step": 5393 }, { "epoch": 0.6185070519435845, "grad_norm": 0.2308997959575474, "learning_rate": 0.0003355304347610463, "loss": 2.4043, "step": 5394 }, { "epoch": 0.6186217176929252, "grad_norm": 0.25205588247545285, "learning_rate": 0.0003353550845663261, "loss": 2.2769, "step": 5395 }, { "epoch": 0.6187363834422658, "grad_norm": 0.2669851319857069, "learning_rate": 0.0003351797570812338, "loss": 2.36, "step": 5396 }, { "epoch": 0.6188510491916065, "grad_norm": 0.22808200599646938, "learning_rate": 0.00033500445232995205, "loss": 2.4196, "step": 5397 }, { "epoch": 0.6189657149409471, "grad_norm": 0.24670492708992056, "learning_rate": 0.00033482917033666104, "loss": 2.4569, "step": 5398 }, { "epoch": 0.6190803806902878, "grad_norm": 0.24139017048679554, "learning_rate": 0.00033465391112553767, "loss": 2.4359, "step": 5399 }, { "epoch": 0.6191950464396285, "grad_norm": 0.24756109501414555, "learning_rate": 0.0003344786747207555, "loss": 2.4111, "step": 5400 }, { "epoch": 0.6193097121889691, "grad_norm": 0.2866634266847848, "learning_rate": 0.0003343034611464849, "loss": 2.5018, "step": 5401 }, { "epoch": 0.6194243779383098, "grad_norm": 0.24863575240457936, "learning_rate": 0.0003341282704268933, "loss": 2.3661, "step": 5402 }, { "epoch": 0.6195390436876504, "grad_norm": 0.23477959265776807, "learning_rate": 0.0003339531025861451, "loss": 2.3637, "step": 5403 }, { "epoch": 0.6196537094369912, "grad_norm": 0.24253278868488765, "learning_rate": 0.0003337779576484012, "loss": 2.6492, "step": 5404 }, { "epoch": 0.6197683751863319, "grad_norm": 0.26010857383535524, "learning_rate": 0.00033360283563781975, "loss": 2.5186, "step": 5405 }, { "epoch": 0.6198830409356725, "grad_norm": 0.21839527200976241, "learning_rate": 0.0003334277365785551, "loss": 2.413, "step": 5406 }, { "epoch": 0.6199977066850132, "grad_norm": 0.27046315936372234, "learning_rate": 0.0003332526604947591, "loss": 2.3708, "step": 5407 }, { "epoch": 0.6201123724343539, "grad_norm": 0.24933712577442851, "learning_rate": 0.00033307760741058004, "loss": 2.561, "step": 5408 }, { "epoch": 0.6202270381836945, "grad_norm": 0.24644868132629197, "learning_rate": 0.0003329025773501634, "loss": 2.3822, "step": 5409 }, { "epoch": 0.6203417039330352, "grad_norm": 0.2607458816152741, "learning_rate": 0.0003327275703376508, "loss": 2.3869, "step": 5410 }, { "epoch": 0.6204563696823758, "grad_norm": 0.2426621633219935, "learning_rate": 0.0003325525863971816, "loss": 2.3628, "step": 5411 }, { "epoch": 0.6205710354317165, "grad_norm": 0.2537107141174077, "learning_rate": 0.000332377625552891, "loss": 2.5197, "step": 5412 }, { "epoch": 0.6206857011810573, "grad_norm": 0.2524824562248918, "learning_rate": 0.00033220268782891197, "loss": 2.4506, "step": 5413 }, { "epoch": 0.6208003669303979, "grad_norm": 0.2452999470591215, "learning_rate": 0.0003320277732493736, "loss": 2.4518, "step": 5414 }, { "epoch": 0.6209150326797386, "grad_norm": 0.2640588940936114, "learning_rate": 0.0003318528818384021, "loss": 2.4165, "step": 5415 }, { "epoch": 0.6210296984290792, "grad_norm": 0.23576368038103548, "learning_rate": 0.00033167801362012035, "loss": 2.4621, "step": 5416 }, { "epoch": 0.6211443641784199, "grad_norm": 0.2510023379825873, "learning_rate": 0.00033150316861864793, "loss": 2.3299, "step": 5417 }, { "epoch": 0.6212590299277606, "grad_norm": 0.26690381175729844, "learning_rate": 0.00033132834685810185, "loss": 2.4362, "step": 5418 }, { "epoch": 0.6213736956771012, "grad_norm": 0.27413337620667927, "learning_rate": 0.0003311535483625951, "loss": 2.5005, "step": 5419 }, { "epoch": 0.6214883614264419, "grad_norm": 0.22795907387295283, "learning_rate": 0.00033097877315623776, "loss": 2.4265, "step": 5420 }, { "epoch": 0.6216030271757826, "grad_norm": 0.23039072247904063, "learning_rate": 0.0003308040212631369, "loss": 2.5533, "step": 5421 }, { "epoch": 0.6217176929251232, "grad_norm": 0.24550036024223942, "learning_rate": 0.00033062929270739595, "loss": 2.4412, "step": 5422 }, { "epoch": 0.621832358674464, "grad_norm": 0.2706890173584199, "learning_rate": 0.0003304545875131157, "loss": 2.4122, "step": 5423 }, { "epoch": 0.6219470244238046, "grad_norm": 0.22197097543097835, "learning_rate": 0.0003302799057043934, "loss": 2.571, "step": 5424 }, { "epoch": 0.6220616901731453, "grad_norm": 0.24134249971579047, "learning_rate": 0.0003301052473053228, "loss": 2.3304, "step": 5425 }, { "epoch": 0.622176355922486, "grad_norm": 0.258388470223987, "learning_rate": 0.0003299306123399949, "loss": 2.4532, "step": 5426 }, { "epoch": 0.6222910216718266, "grad_norm": 0.2593944358738077, "learning_rate": 0.000329756000832497, "loss": 2.4445, "step": 5427 }, { "epoch": 0.6224056874211673, "grad_norm": 0.2656660588069641, "learning_rate": 0.0003295814128069139, "loss": 2.3967, "step": 5428 }, { "epoch": 0.622520353170508, "grad_norm": 0.231867502400429, "learning_rate": 0.00032940684828732637, "loss": 2.4026, "step": 5429 }, { "epoch": 0.6226350189198486, "grad_norm": 0.25092649502049413, "learning_rate": 0.00032923230729781235, "loss": 2.3368, "step": 5430 }, { "epoch": 0.6227496846691893, "grad_norm": 0.25358475929739666, "learning_rate": 0.00032905778986244634, "loss": 2.4898, "step": 5431 }, { "epoch": 0.6228643504185299, "grad_norm": 0.26379407018434853, "learning_rate": 0.0003288832960052998, "loss": 2.3477, "step": 5432 }, { "epoch": 0.6229790161678707, "grad_norm": 0.25057578476317127, "learning_rate": 0.0003287088257504407, "loss": 2.325, "step": 5433 }, { "epoch": 0.6230936819172114, "grad_norm": 0.26982041231049947, "learning_rate": 0.0003285343791219341, "loss": 2.4719, "step": 5434 }, { "epoch": 0.623208347666552, "grad_norm": 0.2614465627258252, "learning_rate": 0.0003283599561438414, "loss": 2.446, "step": 5435 }, { "epoch": 0.6233230134158927, "grad_norm": 0.25195433707880965, "learning_rate": 0.0003281855568402211, "loss": 2.421, "step": 5436 }, { "epoch": 0.6234376791652333, "grad_norm": 0.22649154864278348, "learning_rate": 0.00032801118123512794, "loss": 2.4118, "step": 5437 }, { "epoch": 0.623552344914574, "grad_norm": 0.24825349844754543, "learning_rate": 0.00032783682935261417, "loss": 2.4357, "step": 5438 }, { "epoch": 0.6236670106639147, "grad_norm": 0.24996870611544156, "learning_rate": 0.00032766250121672784, "loss": 2.2983, "step": 5439 }, { "epoch": 0.6237816764132553, "grad_norm": 0.2701061304116632, "learning_rate": 0.00032748819685151465, "loss": 2.4118, "step": 5440 }, { "epoch": 0.623896342162596, "grad_norm": 0.25082489388495977, "learning_rate": 0.00032731391628101615, "loss": 2.3431, "step": 5441 }, { "epoch": 0.6240110079119368, "grad_norm": 0.253547312280989, "learning_rate": 0.00032713965952927115, "loss": 2.4363, "step": 5442 }, { "epoch": 0.6241256736612774, "grad_norm": 0.23819635344120613, "learning_rate": 0.00032696542662031527, "loss": 2.3073, "step": 5443 }, { "epoch": 0.6242403394106181, "grad_norm": 0.24724090571936858, "learning_rate": 0.0003267912175781803, "loss": 2.6248, "step": 5444 }, { "epoch": 0.6243550051599587, "grad_norm": 0.23203628175832056, "learning_rate": 0.0003266170324268954, "loss": 2.4089, "step": 5445 }, { "epoch": 0.6244696709092994, "grad_norm": 0.2551391047297994, "learning_rate": 0.0003264428711904859, "loss": 2.3243, "step": 5446 }, { "epoch": 0.6245843366586401, "grad_norm": 0.2328920030844889, "learning_rate": 0.00032626873389297374, "loss": 2.5156, "step": 5447 }, { "epoch": 0.6246990024079807, "grad_norm": 0.24468383494260626, "learning_rate": 0.00032609462055837826, "loss": 2.5453, "step": 5448 }, { "epoch": 0.6248136681573214, "grad_norm": 0.2509440208584527, "learning_rate": 0.000325920531210715, "loss": 2.493, "step": 5449 }, { "epoch": 0.624928333906662, "grad_norm": 0.21954094729802456, "learning_rate": 0.00032574646587399614, "loss": 2.3892, "step": 5450 }, { "epoch": 0.6250429996560027, "grad_norm": 0.2555694687744024, "learning_rate": 0.0003255724245722308, "loss": 2.4452, "step": 5451 }, { "epoch": 0.6251576654053435, "grad_norm": 0.2410474378451684, "learning_rate": 0.0003253984073294245, "loss": 2.5267, "step": 5452 }, { "epoch": 0.6252723311546841, "grad_norm": 0.23963320541046418, "learning_rate": 0.0003252244141695798, "loss": 2.4459, "step": 5453 }, { "epoch": 0.6253869969040248, "grad_norm": 0.2351413068638118, "learning_rate": 0.0003250504451166956, "loss": 2.4315, "step": 5454 }, { "epoch": 0.6255016626533655, "grad_norm": 0.23543347380739077, "learning_rate": 0.0003248765001947677, "loss": 2.4486, "step": 5455 }, { "epoch": 0.6256163284027061, "grad_norm": 0.24500196711101976, "learning_rate": 0.00032470257942778837, "loss": 2.3563, "step": 5456 }, { "epoch": 0.6257309941520468, "grad_norm": 0.26353541801449626, "learning_rate": 0.00032452868283974675, "loss": 2.4245, "step": 5457 }, { "epoch": 0.6258456599013874, "grad_norm": 0.21686078378451998, "learning_rate": 0.00032435481045462854, "loss": 2.2888, "step": 5458 }, { "epoch": 0.6259603256507281, "grad_norm": 0.2415018207485043, "learning_rate": 0.0003241809622964162, "loss": 2.469, "step": 5459 }, { "epoch": 0.6260749914000688, "grad_norm": 0.23086990660280785, "learning_rate": 0.00032400713838908856, "loss": 2.4293, "step": 5460 }, { "epoch": 0.6261896571494094, "grad_norm": 0.23851945675743613, "learning_rate": 0.00032383333875662165, "loss": 2.5474, "step": 5461 }, { "epoch": 0.6263043228987502, "grad_norm": 0.2690089692349101, "learning_rate": 0.00032365956342298737, "loss": 2.6645, "step": 5462 }, { "epoch": 0.6264189886480909, "grad_norm": 0.24439181347511676, "learning_rate": 0.0003234858124121551, "loss": 2.51, "step": 5463 }, { "epoch": 0.6265336543974315, "grad_norm": 0.26903258539880825, "learning_rate": 0.00032331208574809035, "loss": 2.5185, "step": 5464 }, { "epoch": 0.6266483201467722, "grad_norm": 0.26012570838808835, "learning_rate": 0.0003231383834547554, "loss": 2.3846, "step": 5465 }, { "epoch": 0.6267629858961128, "grad_norm": 0.2296438192908017, "learning_rate": 0.0003229647055561091, "loss": 2.3762, "step": 5466 }, { "epoch": 0.6268776516454535, "grad_norm": 0.23473058490373952, "learning_rate": 0.000322791052076107, "loss": 2.376, "step": 5467 }, { "epoch": 0.6269923173947942, "grad_norm": 0.23961258660261542, "learning_rate": 0.00032261742303870154, "loss": 2.6897, "step": 5468 }, { "epoch": 0.6271069831441348, "grad_norm": 0.2600271461463504, "learning_rate": 0.0003224438184678412, "loss": 2.4828, "step": 5469 }, { "epoch": 0.6272216488934755, "grad_norm": 0.2652892508562063, "learning_rate": 0.0003222702383874717, "loss": 2.4042, "step": 5470 }, { "epoch": 0.6273363146428161, "grad_norm": 0.2797844459288448, "learning_rate": 0.00032209668282153506, "loss": 2.5332, "step": 5471 }, { "epoch": 0.6274509803921569, "grad_norm": 0.28364195810296916, "learning_rate": 0.0003219231517939696, "loss": 2.4018, "step": 5472 }, { "epoch": 0.6275656461414976, "grad_norm": 0.25439327998842626, "learning_rate": 0.00032174964532871096, "loss": 2.3814, "step": 5473 }, { "epoch": 0.6276803118908382, "grad_norm": 0.26109929265176657, "learning_rate": 0.00032157616344969113, "loss": 2.4081, "step": 5474 }, { "epoch": 0.6277949776401789, "grad_norm": 0.23569691371734813, "learning_rate": 0.00032140270618083845, "loss": 2.4352, "step": 5475 }, { "epoch": 0.6279096433895196, "grad_norm": 0.2520229030454903, "learning_rate": 0.0003212292735460781, "loss": 2.3584, "step": 5476 }, { "epoch": 0.6280243091388602, "grad_norm": 0.23998395655732369, "learning_rate": 0.0003210558655693316, "loss": 2.4266, "step": 5477 }, { "epoch": 0.6281389748882009, "grad_norm": 0.25625259390238025, "learning_rate": 0.00032088248227451767, "loss": 2.5287, "step": 5478 }, { "epoch": 0.6282536406375415, "grad_norm": 0.2501529640672899, "learning_rate": 0.00032070912368555086, "loss": 2.482, "step": 5479 }, { "epoch": 0.6283683063868822, "grad_norm": 0.2345353127462096, "learning_rate": 0.000320535789826343, "loss": 2.1879, "step": 5480 }, { "epoch": 0.628482972136223, "grad_norm": 0.23688116286080194, "learning_rate": 0.0003203624807208019, "loss": 2.3143, "step": 5481 }, { "epoch": 0.6285976378855636, "grad_norm": 0.22545522103322052, "learning_rate": 0.00032018919639283246, "loss": 2.4015, "step": 5482 }, { "epoch": 0.6287123036349043, "grad_norm": 0.21959471011646534, "learning_rate": 0.0003200159368663358, "loss": 2.4177, "step": 5483 }, { "epoch": 0.6288269693842449, "grad_norm": 0.2501554885623485, "learning_rate": 0.00031984270216520996, "loss": 2.2608, "step": 5484 }, { "epoch": 0.6289416351335856, "grad_norm": 0.24316059208074314, "learning_rate": 0.0003196694923133491, "loss": 2.3335, "step": 5485 }, { "epoch": 0.6290563008829263, "grad_norm": 0.23623194923116173, "learning_rate": 0.0003194963073346445, "loss": 2.491, "step": 5486 }, { "epoch": 0.6291709666322669, "grad_norm": 0.24110661172697564, "learning_rate": 0.00031932314725298345, "loss": 2.399, "step": 5487 }, { "epoch": 0.6292856323816076, "grad_norm": 0.26376468003678255, "learning_rate": 0.00031915001209225025, "loss": 2.557, "step": 5488 }, { "epoch": 0.6294002981309483, "grad_norm": 0.26543914747271163, "learning_rate": 0.00031897690187632574, "loss": 2.4134, "step": 5489 }, { "epoch": 0.6295149638802889, "grad_norm": 0.2508087983585551, "learning_rate": 0.00031880381662908693, "loss": 2.5225, "step": 5490 }, { "epoch": 0.6296296296296297, "grad_norm": 0.26452554061035183, "learning_rate": 0.00031863075637440764, "loss": 2.3985, "step": 5491 }, { "epoch": 0.6297442953789703, "grad_norm": 0.25791049454827547, "learning_rate": 0.00031845772113615813, "loss": 2.565, "step": 5492 }, { "epoch": 0.629858961128311, "grad_norm": 0.2578357698457459, "learning_rate": 0.00031828471093820577, "loss": 2.3649, "step": 5493 }, { "epoch": 0.6299736268776517, "grad_norm": 0.2694390128545404, "learning_rate": 0.0003181117258044136, "loss": 2.4125, "step": 5494 }, { "epoch": 0.6300882926269923, "grad_norm": 0.25791065427027676, "learning_rate": 0.0003179387657586418, "loss": 2.4542, "step": 5495 }, { "epoch": 0.630202958376333, "grad_norm": 0.2423263581930325, "learning_rate": 0.00031776583082474685, "loss": 2.4937, "step": 5496 }, { "epoch": 0.6303176241256737, "grad_norm": 0.2916520850055632, "learning_rate": 0.0003175929210265817, "loss": 2.3076, "step": 5497 }, { "epoch": 0.6304322898750143, "grad_norm": 0.24161540883828006, "learning_rate": 0.0003174200363879962, "loss": 2.3997, "step": 5498 }, { "epoch": 0.630546955624355, "grad_norm": 0.23804532370719833, "learning_rate": 0.00031724717693283643, "loss": 2.4723, "step": 5499 }, { "epoch": 0.6306616213736956, "grad_norm": 0.2467822792884135, "learning_rate": 0.000317074342684945, "loss": 2.4242, "step": 5500 }, { "epoch": 0.6307762871230363, "grad_norm": 0.23737049206667943, "learning_rate": 0.0003169015336681612, "loss": 2.4923, "step": 5501 }, { "epoch": 0.6308909528723771, "grad_norm": 0.2506743641223088, "learning_rate": 0.00031672874990632037, "loss": 2.3795, "step": 5502 }, { "epoch": 0.6310056186217177, "grad_norm": 0.23909620289245787, "learning_rate": 0.0003165559914232553, "loss": 2.3816, "step": 5503 }, { "epoch": 0.6311202843710584, "grad_norm": 0.2375816680017304, "learning_rate": 0.00031638325824279433, "loss": 2.4596, "step": 5504 }, { "epoch": 0.631234950120399, "grad_norm": 0.26140138403476304, "learning_rate": 0.00031621055038876303, "loss": 2.4322, "step": 5505 }, { "epoch": 0.6313496158697397, "grad_norm": 0.22467097639761296, "learning_rate": 0.00031603786788498265, "loss": 2.5442, "step": 5506 }, { "epoch": 0.6314642816190804, "grad_norm": 0.24217797317589312, "learning_rate": 0.0003158652107552719, "loss": 2.5105, "step": 5507 }, { "epoch": 0.631578947368421, "grad_norm": 0.23632741961744183, "learning_rate": 0.0003156925790234454, "loss": 2.3843, "step": 5508 }, { "epoch": 0.6316936131177617, "grad_norm": 0.2383480290351181, "learning_rate": 0.00031551997271331446, "loss": 2.2803, "step": 5509 }, { "epoch": 0.6318082788671024, "grad_norm": 0.26298746529120715, "learning_rate": 0.0003153473918486867, "loss": 2.5991, "step": 5510 }, { "epoch": 0.631922944616443, "grad_norm": 0.2504096133030129, "learning_rate": 0.0003151748364533665, "loss": 2.4849, "step": 5511 }, { "epoch": 0.6320376103657838, "grad_norm": 0.23958811871731764, "learning_rate": 0.00031500230655115435, "loss": 2.2696, "step": 5512 }, { "epoch": 0.6321522761151244, "grad_norm": 0.25787432311562236, "learning_rate": 0.0003148298021658478, "loss": 2.4625, "step": 5513 }, { "epoch": 0.6322669418644651, "grad_norm": 0.23531505375305742, "learning_rate": 0.0003146573233212404, "loss": 2.3895, "step": 5514 }, { "epoch": 0.6323816076138058, "grad_norm": 0.2261254023625664, "learning_rate": 0.0003144848700411223, "loss": 2.4434, "step": 5515 }, { "epoch": 0.6324962733631464, "grad_norm": 0.23953155573569931, "learning_rate": 0.00031431244234928, "loss": 2.4295, "step": 5516 }, { "epoch": 0.6326109391124871, "grad_norm": 0.28301790978497304, "learning_rate": 0.0003141400402694967, "loss": 2.3678, "step": 5517 }, { "epoch": 0.6327256048618277, "grad_norm": 0.25936219853458653, "learning_rate": 0.00031396766382555226, "loss": 2.4512, "step": 5518 }, { "epoch": 0.6328402706111684, "grad_norm": 0.26562696973589034, "learning_rate": 0.00031379531304122235, "loss": 2.5733, "step": 5519 }, { "epoch": 0.6329549363605091, "grad_norm": 0.26699292220508375, "learning_rate": 0.00031362298794027977, "loss": 2.4473, "step": 5520 }, { "epoch": 0.6330696021098497, "grad_norm": 0.2558328254177126, "learning_rate": 0.00031345068854649326, "loss": 2.5546, "step": 5521 }, { "epoch": 0.6331842678591905, "grad_norm": 0.23306523667239934, "learning_rate": 0.00031327841488362816, "loss": 2.6028, "step": 5522 }, { "epoch": 0.6332989336085312, "grad_norm": 0.2681128333970615, "learning_rate": 0.0003131061669754466, "loss": 2.3875, "step": 5523 }, { "epoch": 0.6334135993578718, "grad_norm": 0.24744012009539318, "learning_rate": 0.0003129339448457069, "loss": 2.3353, "step": 5524 }, { "epoch": 0.6335282651072125, "grad_norm": 0.26523398461921827, "learning_rate": 0.0003127617485181635, "loss": 2.4449, "step": 5525 }, { "epoch": 0.6336429308565531, "grad_norm": 0.23182057506235487, "learning_rate": 0.000312589578016568, "loss": 2.4889, "step": 5526 }, { "epoch": 0.6337575966058938, "grad_norm": 0.25115581753960103, "learning_rate": 0.00031241743336466767, "loss": 2.4399, "step": 5527 }, { "epoch": 0.6338722623552345, "grad_norm": 0.26182569183589843, "learning_rate": 0.00031224531458620686, "loss": 2.4776, "step": 5528 }, { "epoch": 0.6339869281045751, "grad_norm": 0.24567204287346073, "learning_rate": 0.0003120732217049259, "loss": 2.3643, "step": 5529 }, { "epoch": 0.6341015938539158, "grad_norm": 0.21424622793344852, "learning_rate": 0.00031190115474456195, "loss": 2.4366, "step": 5530 }, { "epoch": 0.6342162596032566, "grad_norm": 0.23698383033086595, "learning_rate": 0.00031172911372884807, "loss": 2.3229, "step": 5531 }, { "epoch": 0.6343309253525972, "grad_norm": 0.25363311166436553, "learning_rate": 0.0003115570986815141, "loss": 2.4634, "step": 5532 }, { "epoch": 0.6344455911019379, "grad_norm": 0.24652759686856138, "learning_rate": 0.0003113851096262865, "loss": 2.5557, "step": 5533 }, { "epoch": 0.6345602568512785, "grad_norm": 0.24408343214048267, "learning_rate": 0.0003112131465868877, "loss": 2.4431, "step": 5534 }, { "epoch": 0.6346749226006192, "grad_norm": 0.23515488338936574, "learning_rate": 0.00031104120958703666, "loss": 2.4399, "step": 5535 }, { "epoch": 0.6347895883499599, "grad_norm": 0.24296324443275436, "learning_rate": 0.00031086929865044896, "loss": 2.3646, "step": 5536 }, { "epoch": 0.6349042540993005, "grad_norm": 0.268929637905429, "learning_rate": 0.0003106974138008362, "loss": 2.3471, "step": 5537 }, { "epoch": 0.6350189198486412, "grad_norm": 0.25946396242332137, "learning_rate": 0.00031052555506190673, "loss": 2.4154, "step": 5538 }, { "epoch": 0.6351335855979818, "grad_norm": 0.2287616870704528, "learning_rate": 0.00031035372245736544, "loss": 2.2702, "step": 5539 }, { "epoch": 0.6352482513473225, "grad_norm": 0.2479208663589942, "learning_rate": 0.0003101819160109131, "loss": 2.3541, "step": 5540 }, { "epoch": 0.6353629170966633, "grad_norm": 0.24710559483474936, "learning_rate": 0.00031001013574624714, "loss": 2.364, "step": 5541 }, { "epoch": 0.6354775828460039, "grad_norm": 0.2486665537378549, "learning_rate": 0.00030983838168706124, "loss": 2.3355, "step": 5542 }, { "epoch": 0.6355922485953446, "grad_norm": 0.24808884454052754, "learning_rate": 0.0003096666538570461, "loss": 2.4294, "step": 5543 }, { "epoch": 0.6357069143446853, "grad_norm": 0.25631861026758807, "learning_rate": 0.0003094949522798877, "loss": 2.4378, "step": 5544 }, { "epoch": 0.6358215800940259, "grad_norm": 0.25654630331148426, "learning_rate": 0.0003093232769792695, "loss": 2.4426, "step": 5545 }, { "epoch": 0.6359362458433666, "grad_norm": 0.25625823062406006, "learning_rate": 0.0003091516279788706, "loss": 2.5253, "step": 5546 }, { "epoch": 0.6360509115927072, "grad_norm": 0.2447769550132519, "learning_rate": 0.0003089800053023665, "loss": 2.4636, "step": 5547 }, { "epoch": 0.6361655773420479, "grad_norm": 0.279545014330734, "learning_rate": 0.00030880840897342955, "loss": 2.5956, "step": 5548 }, { "epoch": 0.6362802430913886, "grad_norm": 0.2742271169893888, "learning_rate": 0.0003086368390157283, "loss": 2.3668, "step": 5549 }, { "epoch": 0.6363949088407292, "grad_norm": 0.2560739523412276, "learning_rate": 0.0003084652954529273, "loss": 2.388, "step": 5550 }, { "epoch": 0.63650957459007, "grad_norm": 0.2725102637306408, "learning_rate": 0.00030829377830868785, "loss": 2.4671, "step": 5551 }, { "epoch": 0.6366242403394106, "grad_norm": 0.27811919686798214, "learning_rate": 0.0003081222876066675, "loss": 2.3782, "step": 5552 }, { "epoch": 0.6367389060887513, "grad_norm": 0.2278704699688382, "learning_rate": 0.00030795082337051985, "loss": 2.4658, "step": 5553 }, { "epoch": 0.636853571838092, "grad_norm": 0.2343502685248253, "learning_rate": 0.0003077793856238954, "loss": 2.337, "step": 5554 }, { "epoch": 0.6369682375874326, "grad_norm": 0.2418894524517346, "learning_rate": 0.0003076079743904407, "loss": 2.3272, "step": 5555 }, { "epoch": 0.6370829033367733, "grad_norm": 0.2674808932411423, "learning_rate": 0.00030743658969379864, "loss": 2.448, "step": 5556 }, { "epoch": 0.637197569086114, "grad_norm": 0.26549930620050916, "learning_rate": 0.0003072652315576084, "loss": 2.4675, "step": 5557 }, { "epoch": 0.6373122348354546, "grad_norm": 0.23494644688036828, "learning_rate": 0.00030709390000550543, "loss": 2.427, "step": 5558 }, { "epoch": 0.6374269005847953, "grad_norm": 0.2360840166240721, "learning_rate": 0.00030692259506112207, "loss": 2.562, "step": 5559 }, { "epoch": 0.637541566334136, "grad_norm": 0.2561945452470874, "learning_rate": 0.0003067513167480862, "loss": 2.4625, "step": 5560 }, { "epoch": 0.6376562320834767, "grad_norm": 0.2606740278367096, "learning_rate": 0.00030658006509002265, "loss": 2.3394, "step": 5561 }, { "epoch": 0.6377708978328174, "grad_norm": 0.255192048020537, "learning_rate": 0.0003064088401105521, "loss": 2.551, "step": 5562 }, { "epoch": 0.637885563582158, "grad_norm": 0.22265055027920913, "learning_rate": 0.00030623764183329176, "loss": 2.3916, "step": 5563 }, { "epoch": 0.6380002293314987, "grad_norm": 0.2538054116207791, "learning_rate": 0.0003060664702818555, "loss": 2.4574, "step": 5564 }, { "epoch": 0.6381148950808394, "grad_norm": 0.2381936565759509, "learning_rate": 0.000305895325479853, "loss": 2.4271, "step": 5565 }, { "epoch": 0.63822956083018, "grad_norm": 0.2391724091021789, "learning_rate": 0.00030572420745089024, "loss": 2.3996, "step": 5566 }, { "epoch": 0.6383442265795207, "grad_norm": 0.22596674411763887, "learning_rate": 0.00030555311621856984, "loss": 2.3073, "step": 5567 }, { "epoch": 0.6384588923288613, "grad_norm": 0.23910245204790712, "learning_rate": 0.0003053820518064905, "loss": 2.4449, "step": 5568 }, { "epoch": 0.638573558078202, "grad_norm": 0.24863720160124406, "learning_rate": 0.00030521101423824736, "loss": 2.3923, "step": 5569 }, { "epoch": 0.6386882238275428, "grad_norm": 0.23848836686992558, "learning_rate": 0.0003050400035374319, "loss": 2.5183, "step": 5570 }, { "epoch": 0.6388028895768834, "grad_norm": 0.23244563722155503, "learning_rate": 0.0003048690197276318, "loss": 2.3985, "step": 5571 }, { "epoch": 0.6389175553262241, "grad_norm": 0.23980346381708229, "learning_rate": 0.0003046980628324306, "loss": 2.4689, "step": 5572 }, { "epoch": 0.6390322210755647, "grad_norm": 0.24048800239971505, "learning_rate": 0.0003045271328754089, "loss": 2.4929, "step": 5573 }, { "epoch": 0.6391468868249054, "grad_norm": 0.2833472758316603, "learning_rate": 0.00030435622988014333, "loss": 2.5184, "step": 5574 }, { "epoch": 0.6392615525742461, "grad_norm": 0.24345676726661816, "learning_rate": 0.0003041853538702064, "loss": 2.3298, "step": 5575 }, { "epoch": 0.6393762183235867, "grad_norm": 0.23737334691864767, "learning_rate": 0.00030401450486916743, "loss": 2.4799, "step": 5576 }, { "epoch": 0.6394908840729274, "grad_norm": 0.2456564074696597, "learning_rate": 0.00030384368290059175, "loss": 2.6203, "step": 5577 }, { "epoch": 0.6396055498222681, "grad_norm": 0.2433604148140225, "learning_rate": 0.00030367288798804063, "loss": 2.4013, "step": 5578 }, { "epoch": 0.6397202155716087, "grad_norm": 0.23804416385711794, "learning_rate": 0.0003035021201550725, "loss": 2.4361, "step": 5579 }, { "epoch": 0.6398348813209495, "grad_norm": 0.24675106347917813, "learning_rate": 0.0003033313794252413, "loss": 2.4898, "step": 5580 }, { "epoch": 0.6399495470702901, "grad_norm": 0.2877622192447196, "learning_rate": 0.00030316066582209745, "loss": 2.5742, "step": 5581 }, { "epoch": 0.6400642128196308, "grad_norm": 0.2690196002395355, "learning_rate": 0.0003029899793691877, "loss": 2.4379, "step": 5582 }, { "epoch": 0.6401788785689715, "grad_norm": 0.24370696613004647, "learning_rate": 0.00030281932009005474, "loss": 2.3857, "step": 5583 }, { "epoch": 0.6402935443183121, "grad_norm": 0.25075507329585384, "learning_rate": 0.0003026486880082382, "loss": 2.2878, "step": 5584 }, { "epoch": 0.6404082100676528, "grad_norm": 0.2712703315860943, "learning_rate": 0.00030247808314727315, "loss": 2.4744, "step": 5585 }, { "epoch": 0.6405228758169934, "grad_norm": 0.2433559738733202, "learning_rate": 0.0003023075055306915, "loss": 2.387, "step": 5586 }, { "epoch": 0.6406375415663341, "grad_norm": 0.22891710661314116, "learning_rate": 0.0003021369551820211, "loss": 2.3227, "step": 5587 }, { "epoch": 0.6407522073156748, "grad_norm": 0.25020110788364225, "learning_rate": 0.0003019664321247859, "loss": 2.5849, "step": 5588 }, { "epoch": 0.6408668730650154, "grad_norm": 0.24850155323763826, "learning_rate": 0.0003017959363825068, "loss": 2.4952, "step": 5589 }, { "epoch": 0.6409815388143562, "grad_norm": 0.25391151201367884, "learning_rate": 0.00030162546797870014, "loss": 2.4506, "step": 5590 }, { "epoch": 0.6410962045636969, "grad_norm": 0.25703356684184364, "learning_rate": 0.0003014550269368788, "loss": 2.3004, "step": 5591 }, { "epoch": 0.6412108703130375, "grad_norm": 0.2466140134033812, "learning_rate": 0.0003012846132805519, "loss": 2.4447, "step": 5592 }, { "epoch": 0.6413255360623782, "grad_norm": 0.22938962171974508, "learning_rate": 0.00030111422703322447, "loss": 2.4067, "step": 5593 }, { "epoch": 0.6414402018117188, "grad_norm": 0.2210941104555438, "learning_rate": 0.00030094386821839846, "loss": 2.3416, "step": 5594 }, { "epoch": 0.6415548675610595, "grad_norm": 0.21476503437330766, "learning_rate": 0.00030077353685957167, "loss": 2.4579, "step": 5595 }, { "epoch": 0.6416695333104002, "grad_norm": 0.2448900982724273, "learning_rate": 0.0003006032329802378, "loss": 2.4469, "step": 5596 }, { "epoch": 0.6417841990597408, "grad_norm": 0.3042977985089514, "learning_rate": 0.000300432956603887, "loss": 2.4168, "step": 5597 }, { "epoch": 0.6418988648090815, "grad_norm": 0.23066407889979565, "learning_rate": 0.0003002627077540056, "loss": 2.499, "step": 5598 }, { "epoch": 0.6420135305584223, "grad_norm": 0.2546313088370773, "learning_rate": 0.0003000924864540766, "loss": 2.3875, "step": 5599 }, { "epoch": 0.6421281963077629, "grad_norm": 0.22364361841055, "learning_rate": 0.00029992229272757833, "loss": 2.4302, "step": 5600 }, { "epoch": 0.6422428620571036, "grad_norm": 0.255260045539605, "learning_rate": 0.0002997521265979861, "loss": 2.4009, "step": 5601 }, { "epoch": 0.6423575278064442, "grad_norm": 0.27093319642463676, "learning_rate": 0.0002995819880887709, "loss": 2.3329, "step": 5602 }, { "epoch": 0.6424721935557849, "grad_norm": 0.26454849215149356, "learning_rate": 0.0002994118772233999, "loss": 2.3926, "step": 5603 }, { "epoch": 0.6425868593051256, "grad_norm": 0.2965449710769386, "learning_rate": 0.0002992417940253371, "loss": 2.3461, "step": 5604 }, { "epoch": 0.6427015250544662, "grad_norm": 0.25930391110901724, "learning_rate": 0.00029907173851804206, "loss": 2.4265, "step": 5605 }, { "epoch": 0.6428161908038069, "grad_norm": 0.29076442987596607, "learning_rate": 0.00029890171072497054, "loss": 2.477, "step": 5606 }, { "epoch": 0.6429308565531475, "grad_norm": 0.2657461234756996, "learning_rate": 0.0002987317106695748, "loss": 2.4292, "step": 5607 }, { "epoch": 0.6430455223024882, "grad_norm": 0.2762738606372087, "learning_rate": 0.0002985617383753029, "loss": 2.4491, "step": 5608 }, { "epoch": 0.643160188051829, "grad_norm": 0.25762550425728215, "learning_rate": 0.00029839179386559957, "loss": 2.3368, "step": 5609 }, { "epoch": 0.6432748538011696, "grad_norm": 0.23622645143233845, "learning_rate": 0.00029822187716390525, "loss": 2.367, "step": 5610 }, { "epoch": 0.6433895195505103, "grad_norm": 0.25078313472020947, "learning_rate": 0.0002980519882936568, "loss": 2.5408, "step": 5611 }, { "epoch": 0.643504185299851, "grad_norm": 0.2355015951503166, "learning_rate": 0.000297882127278287, "loss": 2.2902, "step": 5612 }, { "epoch": 0.6436188510491916, "grad_norm": 0.20815988634004784, "learning_rate": 0.00029771229414122494, "loss": 2.4703, "step": 5613 }, { "epoch": 0.6437335167985323, "grad_norm": 0.26240585088274326, "learning_rate": 0.0002975424889058961, "loss": 2.4254, "step": 5614 }, { "epoch": 0.6438481825478729, "grad_norm": 0.23871563351535682, "learning_rate": 0.00029737271159572176, "loss": 2.43, "step": 5615 }, { "epoch": 0.6439628482972136, "grad_norm": 0.23387369770608432, "learning_rate": 0.0002972029622341193, "loss": 2.3656, "step": 5616 }, { "epoch": 0.6440775140465543, "grad_norm": 0.2763505678650222, "learning_rate": 0.0002970332408445027, "loss": 2.5593, "step": 5617 }, { "epoch": 0.6441921797958949, "grad_norm": 0.2639255756398446, "learning_rate": 0.0002968635474502813, "loss": 2.4779, "step": 5618 }, { "epoch": 0.6443068455452357, "grad_norm": 0.23528628447469446, "learning_rate": 0.0002966938820748616, "loss": 2.231, "step": 5619 }, { "epoch": 0.6444215112945763, "grad_norm": 0.24616959166218327, "learning_rate": 0.00029652424474164557, "loss": 2.2724, "step": 5620 }, { "epoch": 0.644536177043917, "grad_norm": 0.26708752812841563, "learning_rate": 0.0002963546354740314, "loss": 2.4256, "step": 5621 }, { "epoch": 0.6446508427932577, "grad_norm": 0.26427323317525886, "learning_rate": 0.0002961850542954133, "loss": 2.3504, "step": 5622 }, { "epoch": 0.6447655085425983, "grad_norm": 0.26998655568855934, "learning_rate": 0.0002960155012291818, "loss": 2.5164, "step": 5623 }, { "epoch": 0.644880174291939, "grad_norm": 0.250084754133724, "learning_rate": 0.0002958459762987238, "loss": 2.4739, "step": 5624 }, { "epoch": 0.6449948400412797, "grad_norm": 0.28786685738260465, "learning_rate": 0.00029567647952742176, "loss": 2.3899, "step": 5625 }, { "epoch": 0.6451095057906203, "grad_norm": 0.26916469411413324, "learning_rate": 0.00029550701093865474, "loss": 2.428, "step": 5626 }, { "epoch": 0.645224171539961, "grad_norm": 0.274447756838796, "learning_rate": 0.0002953375705557975, "loss": 2.4073, "step": 5627 }, { "epoch": 0.6453388372893016, "grad_norm": 0.27906643516140783, "learning_rate": 0.00029516815840222103, "loss": 2.4642, "step": 5628 }, { "epoch": 0.6454535030386424, "grad_norm": 0.23396250522532427, "learning_rate": 0.0002949987745012928, "loss": 2.3474, "step": 5629 }, { "epoch": 0.6455681687879831, "grad_norm": 0.2557116841351349, "learning_rate": 0.00029482941887637595, "loss": 2.4611, "step": 5630 }, { "epoch": 0.6456828345373237, "grad_norm": 0.23600051243122336, "learning_rate": 0.00029466009155082977, "loss": 2.4435, "step": 5631 }, { "epoch": 0.6457975002866644, "grad_norm": 0.24745280890026364, "learning_rate": 0.00029449079254801, "loss": 2.3555, "step": 5632 }, { "epoch": 0.6459121660360051, "grad_norm": 0.26967046403946093, "learning_rate": 0.00029432152189126784, "loss": 2.3924, "step": 5633 }, { "epoch": 0.6460268317853457, "grad_norm": 0.2582628980879432, "learning_rate": 0.00029415227960395126, "loss": 2.4175, "step": 5634 }, { "epoch": 0.6461414975346864, "grad_norm": 0.2667329007189647, "learning_rate": 0.0002939830657094038, "loss": 2.4359, "step": 5635 }, { "epoch": 0.646256163284027, "grad_norm": 0.2584403054067718, "learning_rate": 0.00029381388023096556, "loss": 2.4777, "step": 5636 }, { "epoch": 0.6463708290333677, "grad_norm": 0.2448074439887825, "learning_rate": 0.0002936447231919721, "loss": 2.4026, "step": 5637 }, { "epoch": 0.6464854947827084, "grad_norm": 0.23506794710498322, "learning_rate": 0.0002934755946157556, "loss": 2.5527, "step": 5638 }, { "epoch": 0.646600160532049, "grad_norm": 0.229589813860594, "learning_rate": 0.0002933064945256442, "loss": 2.3899, "step": 5639 }, { "epoch": 0.6467148262813898, "grad_norm": 0.22711133093913352, "learning_rate": 0.00029313742294496194, "loss": 2.2789, "step": 5640 }, { "epoch": 0.6468294920307304, "grad_norm": 0.24589402616403436, "learning_rate": 0.00029296837989702893, "loss": 2.4373, "step": 5641 }, { "epoch": 0.6469441577800711, "grad_norm": 0.23318551826503237, "learning_rate": 0.0002927993654051617, "loss": 2.3886, "step": 5642 }, { "epoch": 0.6470588235294118, "grad_norm": 0.23603384018727594, "learning_rate": 0.0002926303794926722, "loss": 2.5977, "step": 5643 }, { "epoch": 0.6471734892787524, "grad_norm": 0.23520160798590653, "learning_rate": 0.0002924614221828691, "loss": 2.3885, "step": 5644 }, { "epoch": 0.6472881550280931, "grad_norm": 0.23387142584112652, "learning_rate": 0.0002922924934990568, "loss": 2.4888, "step": 5645 }, { "epoch": 0.6474028207774338, "grad_norm": 0.2348625852895707, "learning_rate": 0.00029212359346453585, "loss": 2.3976, "step": 5646 }, { "epoch": 0.6475174865267744, "grad_norm": 0.24575923868493688, "learning_rate": 0.00029195472210260257, "loss": 2.4553, "step": 5647 }, { "epoch": 0.6476321522761151, "grad_norm": 0.2529606429639754, "learning_rate": 0.00029178587943654965, "loss": 2.4127, "step": 5648 }, { "epoch": 0.6477468180254558, "grad_norm": 0.25988253427881813, "learning_rate": 0.00029161706548966576, "loss": 2.2999, "step": 5649 }, { "epoch": 0.6478614837747965, "grad_norm": 0.24219340207708312, "learning_rate": 0.0002914482802852356, "loss": 2.4196, "step": 5650 }, { "epoch": 0.6479761495241372, "grad_norm": 0.2696814415673247, "learning_rate": 0.0002912795238465399, "loss": 2.4996, "step": 5651 }, { "epoch": 0.6480908152734778, "grad_norm": 0.2541434058301903, "learning_rate": 0.0002911107961968552, "loss": 2.3113, "step": 5652 }, { "epoch": 0.6482054810228185, "grad_norm": 0.2516095556348608, "learning_rate": 0.0002909420973594541, "loss": 2.5481, "step": 5653 }, { "epoch": 0.6483201467721592, "grad_norm": 0.25464964305242477, "learning_rate": 0.00029077342735760615, "loss": 2.5334, "step": 5654 }, { "epoch": 0.6484348125214998, "grad_norm": 0.27157018764617435, "learning_rate": 0.0002906047862145754, "loss": 2.394, "step": 5655 }, { "epoch": 0.6485494782708405, "grad_norm": 0.29350822765479045, "learning_rate": 0.00029043617395362297, "loss": 2.5096, "step": 5656 }, { "epoch": 0.6486641440201811, "grad_norm": 0.23958297168633486, "learning_rate": 0.00029026759059800597, "loss": 2.5002, "step": 5657 }, { "epoch": 0.6487788097695218, "grad_norm": 0.25212140529203575, "learning_rate": 0.00029009903617097647, "loss": 2.4031, "step": 5658 }, { "epoch": 0.6488934755188626, "grad_norm": 0.2801407147918183, "learning_rate": 0.00028993051069578415, "loss": 2.4639, "step": 5659 }, { "epoch": 0.6490081412682032, "grad_norm": 0.26439529783461496, "learning_rate": 0.0002897620141956737, "loss": 2.5516, "step": 5660 }, { "epoch": 0.6491228070175439, "grad_norm": 0.23585350633934601, "learning_rate": 0.00028959354669388584, "loss": 2.4125, "step": 5661 }, { "epoch": 0.6492374727668845, "grad_norm": 0.22659632561400014, "learning_rate": 0.0002894251082136574, "loss": 2.2204, "step": 5662 }, { "epoch": 0.6493521385162252, "grad_norm": 0.24286476485274064, "learning_rate": 0.0002892566987782213, "loss": 2.2555, "step": 5663 }, { "epoch": 0.6494668042655659, "grad_norm": 0.24946976485492503, "learning_rate": 0.0002890883184108065, "loss": 2.4342, "step": 5664 }, { "epoch": 0.6495814700149065, "grad_norm": 0.24937815568346353, "learning_rate": 0.0002889199671346379, "loss": 2.4295, "step": 5665 }, { "epoch": 0.6496961357642472, "grad_norm": 0.27192849803477626, "learning_rate": 0.0002887516449729365, "loss": 2.4252, "step": 5666 }, { "epoch": 0.6498108015135879, "grad_norm": 0.22347661255198248, "learning_rate": 0.0002885833519489186, "loss": 2.4658, "step": 5667 }, { "epoch": 0.6499254672629285, "grad_norm": 0.23576867180901842, "learning_rate": 0.0002884150880857972, "loss": 2.4509, "step": 5668 }, { "epoch": 0.6500401330122693, "grad_norm": 0.23434289413172396, "learning_rate": 0.00028824685340678163, "loss": 2.377, "step": 5669 }, { "epoch": 0.6501547987616099, "grad_norm": 0.23397172846403394, "learning_rate": 0.000288078647935076, "loss": 2.3541, "step": 5670 }, { "epoch": 0.6502694645109506, "grad_norm": 0.22515335259006614, "learning_rate": 0.0002879104716938814, "loss": 2.2812, "step": 5671 }, { "epoch": 0.6503841302602913, "grad_norm": 0.2538860788105143, "learning_rate": 0.00028774232470639454, "loss": 2.5628, "step": 5672 }, { "epoch": 0.6504987960096319, "grad_norm": 0.2300478455823693, "learning_rate": 0.0002875742069958076, "loss": 2.3377, "step": 5673 }, { "epoch": 0.6506134617589726, "grad_norm": 0.2605896453240586, "learning_rate": 0.00028740611858530984, "loss": 2.3875, "step": 5674 }, { "epoch": 0.6507281275083132, "grad_norm": 0.24941822668389896, "learning_rate": 0.0002872380594980858, "loss": 2.4525, "step": 5675 }, { "epoch": 0.6508427932576539, "grad_norm": 0.2790859475463254, "learning_rate": 0.00028707002975731564, "loss": 2.3956, "step": 5676 }, { "epoch": 0.6509574590069946, "grad_norm": 0.2791239206128264, "learning_rate": 0.00028690202938617607, "loss": 2.5136, "step": 5677 }, { "epoch": 0.6510721247563352, "grad_norm": 0.22798766210742447, "learning_rate": 0.0002867340584078395, "loss": 2.2897, "step": 5678 }, { "epoch": 0.651186790505676, "grad_norm": 0.2512337476378715, "learning_rate": 0.0002865661168454744, "loss": 2.3616, "step": 5679 }, { "epoch": 0.6513014562550167, "grad_norm": 0.24514082837191367, "learning_rate": 0.000286398204722245, "loss": 2.4086, "step": 5680 }, { "epoch": 0.6514161220043573, "grad_norm": 0.2632389911596468, "learning_rate": 0.0002862303220613118, "loss": 2.3422, "step": 5681 }, { "epoch": 0.651530787753698, "grad_norm": 0.2534026760685791, "learning_rate": 0.0002860624688858308, "loss": 2.4974, "step": 5682 }, { "epoch": 0.6516454535030386, "grad_norm": 0.2596882211351948, "learning_rate": 0.00028589464521895414, "loss": 2.4496, "step": 5683 }, { "epoch": 0.6517601192523793, "grad_norm": 0.26204431894852764, "learning_rate": 0.0002857268510838299, "loss": 2.4679, "step": 5684 }, { "epoch": 0.65187478500172, "grad_norm": 0.24101427205039994, "learning_rate": 0.0002855590865036022, "loss": 2.4391, "step": 5685 }, { "epoch": 0.6519894507510606, "grad_norm": 0.24282564442262652, "learning_rate": 0.00028539135150141084, "loss": 2.4821, "step": 5686 }, { "epoch": 0.6521041165004013, "grad_norm": 0.25533332270224457, "learning_rate": 0.0002852236461003919, "loss": 2.598, "step": 5687 }, { "epoch": 0.6522187822497421, "grad_norm": 0.25363442918670664, "learning_rate": 0.00028505597032367665, "loss": 2.4296, "step": 5688 }, { "epoch": 0.6523334479990827, "grad_norm": 0.2824175622123652, "learning_rate": 0.00028488832419439346, "loss": 2.6137, "step": 5689 }, { "epoch": 0.6524481137484234, "grad_norm": 0.2637758170660184, "learning_rate": 0.0002847207077356654, "loss": 2.4239, "step": 5690 }, { "epoch": 0.652562779497764, "grad_norm": 0.24076632651416974, "learning_rate": 0.00028455312097061205, "loss": 2.4514, "step": 5691 }, { "epoch": 0.6526774452471047, "grad_norm": 0.22477164297578944, "learning_rate": 0.0002843855639223488, "loss": 2.4554, "step": 5692 }, { "epoch": 0.6527921109964454, "grad_norm": 0.2652591228536832, "learning_rate": 0.00028421803661398716, "loss": 2.3119, "step": 5693 }, { "epoch": 0.652906776745786, "grad_norm": 0.251732929770671, "learning_rate": 0.00028405053906863407, "loss": 2.576, "step": 5694 }, { "epoch": 0.6530214424951267, "grad_norm": 0.2567026618906436, "learning_rate": 0.00028388307130939303, "loss": 2.4603, "step": 5695 }, { "epoch": 0.6531361082444673, "grad_norm": 0.22930826067858937, "learning_rate": 0.0002837156333593625, "loss": 2.2955, "step": 5696 }, { "epoch": 0.653250773993808, "grad_norm": 0.2585253162135586, "learning_rate": 0.0002835482252416376, "loss": 2.4466, "step": 5697 }, { "epoch": 0.6533654397431488, "grad_norm": 0.23976899563172563, "learning_rate": 0.00028338084697930913, "loss": 2.4875, "step": 5698 }, { "epoch": 0.6534801054924894, "grad_norm": 0.2665870704519859, "learning_rate": 0.0002832134985954636, "loss": 2.4702, "step": 5699 }, { "epoch": 0.6535947712418301, "grad_norm": 0.2629597209701206, "learning_rate": 0.0002830461801131837, "loss": 2.3997, "step": 5700 }, { "epoch": 0.6537094369911708, "grad_norm": 0.25377673367996123, "learning_rate": 0.0002828788915555479, "loss": 2.4565, "step": 5701 }, { "epoch": 0.6538241027405114, "grad_norm": 0.24075963857151567, "learning_rate": 0.0002827116329456301, "loss": 2.3954, "step": 5702 }, { "epoch": 0.6539387684898521, "grad_norm": 0.2833079796240083, "learning_rate": 0.0002825444043065004, "loss": 2.4201, "step": 5703 }, { "epoch": 0.6540534342391927, "grad_norm": 0.24875200053756577, "learning_rate": 0.0002823772056612255, "loss": 2.5976, "step": 5704 }, { "epoch": 0.6541680999885334, "grad_norm": 0.22378414377365588, "learning_rate": 0.00028221003703286665, "loss": 2.396, "step": 5705 }, { "epoch": 0.6542827657378741, "grad_norm": 0.2709371367628395, "learning_rate": 0.0002820428984444816, "loss": 2.2661, "step": 5706 }, { "epoch": 0.6543974314872147, "grad_norm": 0.2757182974005147, "learning_rate": 0.00028187578991912437, "loss": 2.3632, "step": 5707 }, { "epoch": 0.6545120972365555, "grad_norm": 0.23508535568675532, "learning_rate": 0.00028170871147984366, "loss": 2.3696, "step": 5708 }, { "epoch": 0.6546267629858961, "grad_norm": 0.24594163908158714, "learning_rate": 0.00028154166314968545, "loss": 2.3751, "step": 5709 }, { "epoch": 0.6547414287352368, "grad_norm": 0.25833428537587416, "learning_rate": 0.0002813746449516907, "loss": 2.3903, "step": 5710 }, { "epoch": 0.6548560944845775, "grad_norm": 0.27484711465138284, "learning_rate": 0.0002812076569088962, "loss": 2.322, "step": 5711 }, { "epoch": 0.6549707602339181, "grad_norm": 0.2322907765790648, "learning_rate": 0.0002810406990443348, "loss": 2.5325, "step": 5712 }, { "epoch": 0.6550854259832588, "grad_norm": 0.24776938688191574, "learning_rate": 0.00028087377138103533, "loss": 2.3297, "step": 5713 }, { "epoch": 0.6552000917325995, "grad_norm": 0.23158261012027856, "learning_rate": 0.0002807068739420221, "loss": 2.4271, "step": 5714 }, { "epoch": 0.6553147574819401, "grad_norm": 0.23479974406412335, "learning_rate": 0.0002805400067503155, "loss": 2.5268, "step": 5715 }, { "epoch": 0.6554294232312808, "grad_norm": 0.2507778250433927, "learning_rate": 0.0002803731698289319, "loss": 2.3899, "step": 5716 }, { "epoch": 0.6555440889806214, "grad_norm": 0.2583038963802231, "learning_rate": 0.00028020636320088286, "loss": 2.4044, "step": 5717 }, { "epoch": 0.6556587547299622, "grad_norm": 0.2648132659645557, "learning_rate": 0.00028003958688917614, "loss": 2.3401, "step": 5718 }, { "epoch": 0.6557734204793029, "grad_norm": 0.23900663822637008, "learning_rate": 0.00027987284091681596, "loss": 2.4322, "step": 5719 }, { "epoch": 0.6558880862286435, "grad_norm": 0.23544551274215067, "learning_rate": 0.0002797061253068012, "loss": 2.2931, "step": 5720 }, { "epoch": 0.6560027519779842, "grad_norm": 0.253469442142768, "learning_rate": 0.0002795394400821273, "loss": 2.3632, "step": 5721 }, { "epoch": 0.6561174177273249, "grad_norm": 0.24782072259899357, "learning_rate": 0.00027937278526578546, "loss": 2.3532, "step": 5722 }, { "epoch": 0.6562320834766655, "grad_norm": 0.2430005383429055, "learning_rate": 0.0002792061608807619, "loss": 2.3572, "step": 5723 }, { "epoch": 0.6563467492260062, "grad_norm": 0.25354276627142075, "learning_rate": 0.00027903956695004, "loss": 2.4653, "step": 5724 }, { "epoch": 0.6564614149753468, "grad_norm": 0.2557591005337568, "learning_rate": 0.00027887300349659815, "loss": 2.3297, "step": 5725 }, { "epoch": 0.6565760807246875, "grad_norm": 0.26731713672166413, "learning_rate": 0.00027870647054341016, "loss": 2.3034, "step": 5726 }, { "epoch": 0.6566907464740283, "grad_norm": 0.24931412951962512, "learning_rate": 0.0002785399681134464, "loss": 2.3791, "step": 5727 }, { "epoch": 0.6568054122233689, "grad_norm": 0.25883674133520823, "learning_rate": 0.0002783734962296726, "loss": 2.442, "step": 5728 }, { "epoch": 0.6569200779727096, "grad_norm": 0.25334229322011337, "learning_rate": 0.00027820705491505025, "loss": 2.5225, "step": 5729 }, { "epoch": 0.6570347437220502, "grad_norm": 0.25985002381941585, "learning_rate": 0.0002780406441925371, "loss": 2.2936, "step": 5730 }, { "epoch": 0.6571494094713909, "grad_norm": 0.25005410924447674, "learning_rate": 0.0002778742640850863, "loss": 2.3953, "step": 5731 }, { "epoch": 0.6572640752207316, "grad_norm": 0.2748524723040095, "learning_rate": 0.0002777079146156465, "loss": 2.3931, "step": 5732 }, { "epoch": 0.6573787409700722, "grad_norm": 0.25164991841180934, "learning_rate": 0.0002775415958071625, "loss": 2.4199, "step": 5733 }, { "epoch": 0.6574934067194129, "grad_norm": 0.2307592088245544, "learning_rate": 0.000277375307682575, "loss": 2.5075, "step": 5734 }, { "epoch": 0.6576080724687536, "grad_norm": 0.23280048460380875, "learning_rate": 0.00027720905026482024, "loss": 2.3895, "step": 5735 }, { "epoch": 0.6577227382180942, "grad_norm": 0.26153464164864837, "learning_rate": 0.00027704282357683013, "loss": 2.4235, "step": 5736 }, { "epoch": 0.657837403967435, "grad_norm": 0.24546149653701257, "learning_rate": 0.0002768766276415328, "loss": 2.353, "step": 5737 }, { "epoch": 0.6579520697167756, "grad_norm": 0.2552156430418072, "learning_rate": 0.00027671046248185115, "loss": 2.3778, "step": 5738 }, { "epoch": 0.6580667354661163, "grad_norm": 0.22784909879818593, "learning_rate": 0.0002765443281207053, "loss": 2.5407, "step": 5739 }, { "epoch": 0.658181401215457, "grad_norm": 0.2380033394835606, "learning_rate": 0.00027637822458100974, "loss": 2.337, "step": 5740 }, { "epoch": 0.6582960669647976, "grad_norm": 0.24508608122322872, "learning_rate": 0.0002762121518856755, "loss": 2.5339, "step": 5741 }, { "epoch": 0.6584107327141383, "grad_norm": 0.23613795002851157, "learning_rate": 0.00027604611005760914, "loss": 2.3751, "step": 5742 }, { "epoch": 0.6585253984634789, "grad_norm": 0.22844835784567327, "learning_rate": 0.0002758800991197129, "loss": 2.4891, "step": 5743 }, { "epoch": 0.6586400642128196, "grad_norm": 0.2425499749283864, "learning_rate": 0.00027571411909488487, "loss": 2.3546, "step": 5744 }, { "epoch": 0.6587547299621603, "grad_norm": 0.23198365567729787, "learning_rate": 0.00027554817000601905, "loss": 2.5144, "step": 5745 }, { "epoch": 0.6588693957115009, "grad_norm": 0.23813006346784443, "learning_rate": 0.00027538225187600455, "loss": 2.4779, "step": 5746 }, { "epoch": 0.6589840614608417, "grad_norm": 0.2532566653616931, "learning_rate": 0.0002752163647277268, "loss": 2.4193, "step": 5747 }, { "epoch": 0.6590987272101824, "grad_norm": 0.25236499765016984, "learning_rate": 0.0002750505085840668, "loss": 2.5391, "step": 5748 }, { "epoch": 0.659213392959523, "grad_norm": 0.2637220533699274, "learning_rate": 0.0002748846834679012, "loss": 2.3598, "step": 5749 }, { "epoch": 0.6593280587088637, "grad_norm": 0.24911036304341752, "learning_rate": 0.0002747188894021024, "loss": 2.4712, "step": 5750 }, { "epoch": 0.6594427244582043, "grad_norm": 0.28459948709122834, "learning_rate": 0.00027455312640953873, "loss": 2.3872, "step": 5751 }, { "epoch": 0.659557390207545, "grad_norm": 0.2909386573438485, "learning_rate": 0.0002743873945130737, "loss": 2.4084, "step": 5752 }, { "epoch": 0.6596720559568857, "grad_norm": 0.242725157572406, "learning_rate": 0.0002742216937355668, "loss": 2.5349, "step": 5753 }, { "epoch": 0.6597867217062263, "grad_norm": 0.23820125358539773, "learning_rate": 0.00027405602409987396, "loss": 2.509, "step": 5754 }, { "epoch": 0.659901387455567, "grad_norm": 0.24448818374823061, "learning_rate": 0.0002738903856288455, "loss": 2.3564, "step": 5755 }, { "epoch": 0.6600160532049077, "grad_norm": 0.27903598930521567, "learning_rate": 0.0002737247783453283, "loss": 2.3885, "step": 5756 }, { "epoch": 0.6601307189542484, "grad_norm": 0.2315742638326828, "learning_rate": 0.00027355920227216493, "loss": 2.3174, "step": 5757 }, { "epoch": 0.6602453847035891, "grad_norm": 0.25829538078419223, "learning_rate": 0.00027339365743219286, "loss": 2.3934, "step": 5758 }, { "epoch": 0.6603600504529297, "grad_norm": 0.26011231980751, "learning_rate": 0.00027322814384824645, "loss": 2.448, "step": 5759 }, { "epoch": 0.6604747162022704, "grad_norm": 0.25564062319716885, "learning_rate": 0.0002730626615431551, "loss": 2.4743, "step": 5760 }, { "epoch": 0.6605893819516111, "grad_norm": 0.2258519372107094, "learning_rate": 0.0002728972105397436, "loss": 2.4715, "step": 5761 }, { "epoch": 0.6607040477009517, "grad_norm": 0.2474470814769647, "learning_rate": 0.0002727317908608329, "loss": 2.4973, "step": 5762 }, { "epoch": 0.6608187134502924, "grad_norm": 0.2521469502411131, "learning_rate": 0.0002725664025292395, "loss": 2.4968, "step": 5763 }, { "epoch": 0.660933379199633, "grad_norm": 0.22188981807127156, "learning_rate": 0.00027240104556777565, "loss": 2.4193, "step": 5764 }, { "epoch": 0.6610480449489737, "grad_norm": 0.26844795854860976, "learning_rate": 0.00027223571999924914, "loss": 2.4611, "step": 5765 }, { "epoch": 0.6611627106983144, "grad_norm": 0.2615122837564423, "learning_rate": 0.00027207042584646363, "loss": 2.4015, "step": 5766 }, { "epoch": 0.661277376447655, "grad_norm": 0.24321498550275653, "learning_rate": 0.000271905163132218, "loss": 2.3518, "step": 5767 }, { "epoch": 0.6613920421969958, "grad_norm": 0.24328017601829807, "learning_rate": 0.00027173993187930696, "loss": 2.4403, "step": 5768 }, { "epoch": 0.6615067079463365, "grad_norm": 0.26482781169651587, "learning_rate": 0.00027157473211052174, "loss": 2.4241, "step": 5769 }, { "epoch": 0.6616213736956771, "grad_norm": 0.24617465758440277, "learning_rate": 0.0002714095638486478, "loss": 2.4408, "step": 5770 }, { "epoch": 0.6617360394450178, "grad_norm": 0.25793754815303105, "learning_rate": 0.0002712444271164672, "loss": 2.4667, "step": 5771 }, { "epoch": 0.6618507051943584, "grad_norm": 0.2666174470617704, "learning_rate": 0.00027107932193675766, "loss": 2.3816, "step": 5772 }, { "epoch": 0.6619653709436991, "grad_norm": 0.2600776433261486, "learning_rate": 0.00027091424833229155, "loss": 2.3813, "step": 5773 }, { "epoch": 0.6620800366930398, "grad_norm": 0.26017058961566003, "learning_rate": 0.0002707492063258384, "loss": 2.4452, "step": 5774 }, { "epoch": 0.6621947024423804, "grad_norm": 0.24664364353920692, "learning_rate": 0.00027058419594016246, "loss": 2.5027, "step": 5775 }, { "epoch": 0.6623093681917211, "grad_norm": 0.2768164244078615, "learning_rate": 0.00027041921719802344, "loss": 2.4844, "step": 5776 }, { "epoch": 0.6624240339410618, "grad_norm": 0.2611758363815006, "learning_rate": 0.00027025427012217717, "loss": 2.3916, "step": 5777 }, { "epoch": 0.6625386996904025, "grad_norm": 0.26198707558714446, "learning_rate": 0.00027008935473537497, "loss": 2.535, "step": 5778 }, { "epoch": 0.6626533654397432, "grad_norm": 0.24101031711537704, "learning_rate": 0.0002699244710603639, "loss": 2.33, "step": 5779 }, { "epoch": 0.6627680311890838, "grad_norm": 0.26695295253175233, "learning_rate": 0.00026975961911988633, "loss": 2.3467, "step": 5780 }, { "epoch": 0.6628826969384245, "grad_norm": 0.2523948482778285, "learning_rate": 0.0002695947989366807, "loss": 2.5101, "step": 5781 }, { "epoch": 0.6629973626877652, "grad_norm": 0.24880321366928507, "learning_rate": 0.0002694300105334805, "loss": 2.5354, "step": 5782 }, { "epoch": 0.6631120284371058, "grad_norm": 0.2586214705167903, "learning_rate": 0.00026926525393301527, "loss": 2.3773, "step": 5783 }, { "epoch": 0.6632266941864465, "grad_norm": 0.2260697621130077, "learning_rate": 0.00026910052915801003, "loss": 2.4925, "step": 5784 }, { "epoch": 0.6633413599357871, "grad_norm": 0.22549051760704042, "learning_rate": 0.00026893583623118547, "loss": 2.4412, "step": 5785 }, { "epoch": 0.6634560256851278, "grad_norm": 0.25216009994666233, "learning_rate": 0.00026877117517525777, "loss": 2.5138, "step": 5786 }, { "epoch": 0.6635706914344686, "grad_norm": 0.2289944579339804, "learning_rate": 0.0002686065460129391, "loss": 2.442, "step": 5787 }, { "epoch": 0.6636853571838092, "grad_norm": 0.2268511314711116, "learning_rate": 0.0002684419487669362, "loss": 2.2968, "step": 5788 }, { "epoch": 0.6638000229331499, "grad_norm": 0.2517235733942, "learning_rate": 0.000268277383459953, "loss": 2.4164, "step": 5789 }, { "epoch": 0.6639146886824906, "grad_norm": 0.24532212714591592, "learning_rate": 0.00026811285011468745, "loss": 2.6298, "step": 5790 }, { "epoch": 0.6640293544318312, "grad_norm": 0.2511226033114938, "learning_rate": 0.00026794834875383414, "loss": 2.2943, "step": 5791 }, { "epoch": 0.6641440201811719, "grad_norm": 0.2296211956024753, "learning_rate": 0.00026778387940008276, "loss": 2.3497, "step": 5792 }, { "epoch": 0.6642586859305125, "grad_norm": 0.2579091474860311, "learning_rate": 0.0002676194420761188, "loss": 2.5644, "step": 5793 }, { "epoch": 0.6643733516798532, "grad_norm": 0.2845383516106061, "learning_rate": 0.00026745503680462325, "loss": 2.3564, "step": 5794 }, { "epoch": 0.664488017429194, "grad_norm": 0.23801234657020823, "learning_rate": 0.0002672906636082728, "loss": 2.4536, "step": 5795 }, { "epoch": 0.6646026831785345, "grad_norm": 0.26748769531200295, "learning_rate": 0.0002671263225097393, "loss": 2.4308, "step": 5796 }, { "epoch": 0.6647173489278753, "grad_norm": 0.231109445136059, "learning_rate": 0.0002669620135316906, "loss": 2.3721, "step": 5797 }, { "epoch": 0.6648320146772159, "grad_norm": 0.27985227682006536, "learning_rate": 0.0002667977366967901, "loss": 2.386, "step": 5798 }, { "epoch": 0.6649466804265566, "grad_norm": 0.25301374917522906, "learning_rate": 0.0002666334920276965, "loss": 2.5929, "step": 5799 }, { "epoch": 0.6650613461758973, "grad_norm": 0.2481723864450886, "learning_rate": 0.00026646927954706434, "loss": 2.4493, "step": 5800 }, { "epoch": 0.6651760119252379, "grad_norm": 0.2571158282055341, "learning_rate": 0.00026630509927754375, "loss": 2.466, "step": 5801 }, { "epoch": 0.6652906776745786, "grad_norm": 0.2579527850583459, "learning_rate": 0.00026614095124177995, "loss": 2.4284, "step": 5802 }, { "epoch": 0.6654053434239193, "grad_norm": 0.2622022799194773, "learning_rate": 0.0002659768354624139, "loss": 2.4838, "step": 5803 }, { "epoch": 0.6655200091732599, "grad_norm": 0.2568510598247887, "learning_rate": 0.0002658127519620829, "loss": 2.5334, "step": 5804 }, { "epoch": 0.6656346749226006, "grad_norm": 0.2514932722055232, "learning_rate": 0.00026564870076341865, "loss": 2.3849, "step": 5805 }, { "epoch": 0.6657493406719412, "grad_norm": 0.25904607420170395, "learning_rate": 0.0002654846818890489, "loss": 2.4995, "step": 5806 }, { "epoch": 0.665864006421282, "grad_norm": 0.25564826804265495, "learning_rate": 0.0002653206953615972, "loss": 2.371, "step": 5807 }, { "epoch": 0.6659786721706227, "grad_norm": 0.24061487582797872, "learning_rate": 0.0002651567412036818, "loss": 2.3945, "step": 5808 }, { "epoch": 0.6660933379199633, "grad_norm": 0.24164705421958846, "learning_rate": 0.0002649928194379177, "loss": 2.4871, "step": 5809 }, { "epoch": 0.666208003669304, "grad_norm": 0.24557890888663322, "learning_rate": 0.00026482893008691465, "loss": 2.5372, "step": 5810 }, { "epoch": 0.6663226694186446, "grad_norm": 0.25386631532968834, "learning_rate": 0.0002646650731732777, "loss": 2.3595, "step": 5811 }, { "epoch": 0.6664373351679853, "grad_norm": 0.24488325798568564, "learning_rate": 0.00026450124871960805, "loss": 2.4271, "step": 5812 }, { "epoch": 0.666552000917326, "grad_norm": 0.2429752532187373, "learning_rate": 0.0002643374567485022, "loss": 2.3799, "step": 5813 }, { "epoch": 0.6666666666666666, "grad_norm": 0.2546202728494158, "learning_rate": 0.00026417369728255194, "loss": 2.4456, "step": 5814 }, { "epoch": 0.6667813324160073, "grad_norm": 0.26078461944946285, "learning_rate": 0.0002640099703443449, "loss": 2.4225, "step": 5815 }, { "epoch": 0.6668959981653481, "grad_norm": 0.23396062942653115, "learning_rate": 0.00026384627595646426, "loss": 2.3843, "step": 5816 }, { "epoch": 0.6670106639146887, "grad_norm": 0.27587231849473237, "learning_rate": 0.0002636826141414882, "loss": 2.4722, "step": 5817 }, { "epoch": 0.6671253296640294, "grad_norm": 0.23880852441174227, "learning_rate": 0.0002635189849219907, "loss": 2.2687, "step": 5818 }, { "epoch": 0.66723999541337, "grad_norm": 0.2385612950245045, "learning_rate": 0.0002633553883205419, "loss": 2.4924, "step": 5819 }, { "epoch": 0.6673546611627107, "grad_norm": 0.2257002183425713, "learning_rate": 0.0002631918243597062, "loss": 2.4018, "step": 5820 }, { "epoch": 0.6674693269120514, "grad_norm": 0.24560813459014622, "learning_rate": 0.00026302829306204436, "loss": 2.3393, "step": 5821 }, { "epoch": 0.667583992661392, "grad_norm": 0.26520992093483164, "learning_rate": 0.0002628647944501126, "loss": 2.434, "step": 5822 }, { "epoch": 0.6676986584107327, "grad_norm": 0.2483553760706253, "learning_rate": 0.0002627013285464618, "loss": 2.489, "step": 5823 }, { "epoch": 0.6678133241600734, "grad_norm": 0.255928078299804, "learning_rate": 0.0002625378953736396, "loss": 2.5033, "step": 5824 }, { "epoch": 0.667927989909414, "grad_norm": 0.24211697108188346, "learning_rate": 0.0002623744949541886, "loss": 2.3963, "step": 5825 }, { "epoch": 0.6680426556587548, "grad_norm": 0.2647890559606929, "learning_rate": 0.0002622111273106462, "loss": 2.3193, "step": 5826 }, { "epoch": 0.6681573214080954, "grad_norm": 0.277788007715186, "learning_rate": 0.0002620477924655461, "loss": 2.4702, "step": 5827 }, { "epoch": 0.6682719871574361, "grad_norm": 0.2460607124802482, "learning_rate": 0.0002618844904414173, "loss": 2.369, "step": 5828 }, { "epoch": 0.6683866529067768, "grad_norm": 0.24410150939426625, "learning_rate": 0.0002617212212607841, "loss": 2.4882, "step": 5829 }, { "epoch": 0.6685013186561174, "grad_norm": 0.26404685752331813, "learning_rate": 0.00026155798494616645, "loss": 2.2492, "step": 5830 }, { "epoch": 0.6686159844054581, "grad_norm": 0.2450358696295318, "learning_rate": 0.0002613947815200798, "loss": 2.3861, "step": 5831 }, { "epoch": 0.6687306501547987, "grad_norm": 0.256508753509733, "learning_rate": 0.00026123161100503466, "loss": 2.5351, "step": 5832 }, { "epoch": 0.6688453159041394, "grad_norm": 0.26715591941633976, "learning_rate": 0.00026106847342353745, "loss": 2.478, "step": 5833 }, { "epoch": 0.6689599816534801, "grad_norm": 0.2574387525470375, "learning_rate": 0.00026090536879808986, "loss": 2.3845, "step": 5834 }, { "epoch": 0.6690746474028207, "grad_norm": 0.2903925699704349, "learning_rate": 0.0002607422971511891, "loss": 2.4275, "step": 5835 }, { "epoch": 0.6691893131521615, "grad_norm": 0.2804494104622146, "learning_rate": 0.00026057925850532775, "loss": 2.5161, "step": 5836 }, { "epoch": 0.6693039789015022, "grad_norm": 0.22938752247390093, "learning_rate": 0.00026041625288299414, "loss": 2.4119, "step": 5837 }, { "epoch": 0.6694186446508428, "grad_norm": 0.2813340993743448, "learning_rate": 0.00026025328030667116, "loss": 2.5075, "step": 5838 }, { "epoch": 0.6695333104001835, "grad_norm": 0.2451700677017847, "learning_rate": 0.00026009034079883857, "loss": 2.3208, "step": 5839 }, { "epoch": 0.6696479761495241, "grad_norm": 0.2444477972459729, "learning_rate": 0.0002599274343819702, "loss": 2.5459, "step": 5840 }, { "epoch": 0.6697626418988648, "grad_norm": 0.2717160189980225, "learning_rate": 0.000259764561078536, "loss": 2.2996, "step": 5841 }, { "epoch": 0.6698773076482055, "grad_norm": 0.2264375927879016, "learning_rate": 0.0002596017209110013, "loss": 2.356, "step": 5842 }, { "epoch": 0.6699919733975461, "grad_norm": 0.22376510689130266, "learning_rate": 0.0002594389139018269, "loss": 2.4212, "step": 5843 }, { "epoch": 0.6701066391468868, "grad_norm": 0.25114334735399785, "learning_rate": 0.00025927614007346874, "loss": 2.5236, "step": 5844 }, { "epoch": 0.6702213048962274, "grad_norm": 0.263593600167426, "learning_rate": 0.0002591133994483786, "loss": 2.3822, "step": 5845 }, { "epoch": 0.6703359706455682, "grad_norm": 0.21509395210204282, "learning_rate": 0.0002589506920490031, "loss": 2.3675, "step": 5846 }, { "epoch": 0.6704506363949089, "grad_norm": 0.26201572383920296, "learning_rate": 0.0002587880178977849, "loss": 2.2854, "step": 5847 }, { "epoch": 0.6705653021442495, "grad_norm": 0.25025803403755875, "learning_rate": 0.00025862537701716164, "loss": 2.5073, "step": 5848 }, { "epoch": 0.6706799678935902, "grad_norm": 0.23987446527945874, "learning_rate": 0.0002584627694295666, "loss": 2.287, "step": 5849 }, { "epoch": 0.6707946336429309, "grad_norm": 0.25166852219440267, "learning_rate": 0.0002583001951574284, "loss": 2.4286, "step": 5850 }, { "epoch": 0.6709092993922715, "grad_norm": 0.2532772960086042, "learning_rate": 0.0002581376542231713, "loss": 2.4012, "step": 5851 }, { "epoch": 0.6710239651416122, "grad_norm": 0.24124231838592292, "learning_rate": 0.0002579751466492143, "loss": 2.4907, "step": 5852 }, { "epoch": 0.6711386308909528, "grad_norm": 0.28441037507188494, "learning_rate": 0.0002578126724579721, "loss": 2.4885, "step": 5853 }, { "epoch": 0.6712532966402935, "grad_norm": 0.2549522000471048, "learning_rate": 0.00025765023167185575, "loss": 2.4948, "step": 5854 }, { "epoch": 0.6713679623896343, "grad_norm": 0.25523779783028466, "learning_rate": 0.00025748782431327013, "loss": 2.2664, "step": 5855 }, { "epoch": 0.6714826281389749, "grad_norm": 0.21679774321743178, "learning_rate": 0.0002573254504046163, "loss": 2.3899, "step": 5856 }, { "epoch": 0.6715972938883156, "grad_norm": 0.2603769775819472, "learning_rate": 0.00025716310996829096, "loss": 2.4745, "step": 5857 }, { "epoch": 0.6717119596376563, "grad_norm": 0.23727766944641868, "learning_rate": 0.0002570008030266856, "loss": 2.4293, "step": 5858 }, { "epoch": 0.6718266253869969, "grad_norm": 0.25059253054758934, "learning_rate": 0.00025683852960218747, "loss": 2.32, "step": 5859 }, { "epoch": 0.6719412911363376, "grad_norm": 0.23024222526876786, "learning_rate": 0.0002566762897171794, "loss": 2.3824, "step": 5860 }, { "epoch": 0.6720559568856782, "grad_norm": 0.2700322663754082, "learning_rate": 0.0002565140833940387, "loss": 2.4785, "step": 5861 }, { "epoch": 0.6721706226350189, "grad_norm": 0.2566635676578736, "learning_rate": 0.00025635191065513896, "loss": 2.4051, "step": 5862 }, { "epoch": 0.6722852883843596, "grad_norm": 0.2662057413338458, "learning_rate": 0.00025618977152284874, "loss": 2.3916, "step": 5863 }, { "epoch": 0.6723999541337002, "grad_norm": 0.2439899583835536, "learning_rate": 0.0002560276660195322, "loss": 2.406, "step": 5864 }, { "epoch": 0.672514619883041, "grad_norm": 0.2408159922043852, "learning_rate": 0.0002558655941675485, "loss": 2.304, "step": 5865 }, { "epoch": 0.6726292856323816, "grad_norm": 0.24346162156251644, "learning_rate": 0.00025570355598925266, "loss": 2.4549, "step": 5866 }, { "epoch": 0.6727439513817223, "grad_norm": 0.27339626600201794, "learning_rate": 0.00025554155150699445, "loss": 2.3451, "step": 5867 }, { "epoch": 0.672858617131063, "grad_norm": 0.2621980925090863, "learning_rate": 0.0002553795807431192, "loss": 2.4296, "step": 5868 }, { "epoch": 0.6729732828804036, "grad_norm": 0.28952244705526664, "learning_rate": 0.0002552176437199682, "loss": 2.5053, "step": 5869 }, { "epoch": 0.6730879486297443, "grad_norm": 0.25947310150940167, "learning_rate": 0.00025505574045987713, "loss": 2.4611, "step": 5870 }, { "epoch": 0.673202614379085, "grad_norm": 0.23732402243097353, "learning_rate": 0.0002548938709851776, "loss": 2.4868, "step": 5871 }, { "epoch": 0.6733172801284256, "grad_norm": 0.2462447821195796, "learning_rate": 0.00025473203531819653, "loss": 2.4188, "step": 5872 }, { "epoch": 0.6734319458777663, "grad_norm": 0.2569346102223702, "learning_rate": 0.0002545702334812557, "loss": 2.4894, "step": 5873 }, { "epoch": 0.6735466116271069, "grad_norm": 0.2649165498663336, "learning_rate": 0.00025440846549667297, "loss": 2.3394, "step": 5874 }, { "epoch": 0.6736612773764477, "grad_norm": 0.26382775321129204, "learning_rate": 0.00025424673138676123, "loss": 2.4476, "step": 5875 }, { "epoch": 0.6737759431257884, "grad_norm": 0.23108671893421445, "learning_rate": 0.0002540850311738282, "loss": 2.4266, "step": 5876 }, { "epoch": 0.673890608875129, "grad_norm": 0.2335579654361149, "learning_rate": 0.00025392336488017764, "loss": 2.4573, "step": 5877 }, { "epoch": 0.6740052746244697, "grad_norm": 0.23736260011359034, "learning_rate": 0.00025376173252810813, "loss": 2.4134, "step": 5878 }, { "epoch": 0.6741199403738103, "grad_norm": 0.21910237059644166, "learning_rate": 0.00025360013413991395, "loss": 2.38, "step": 5879 }, { "epoch": 0.674234606123151, "grad_norm": 0.23505053392981368, "learning_rate": 0.0002534385697378845, "loss": 2.377, "step": 5880 }, { "epoch": 0.6743492718724917, "grad_norm": 0.24578701409554926, "learning_rate": 0.00025327703934430456, "loss": 2.4878, "step": 5881 }, { "epoch": 0.6744639376218323, "grad_norm": 0.22919335517878578, "learning_rate": 0.0002531155429814539, "loss": 2.5216, "step": 5882 }, { "epoch": 0.674578603371173, "grad_norm": 0.27020361971866275, "learning_rate": 0.00025295408067160807, "loss": 2.3832, "step": 5883 }, { "epoch": 0.6746932691205138, "grad_norm": 0.2166826344627892, "learning_rate": 0.0002527926524370378, "loss": 2.4304, "step": 5884 }, { "epoch": 0.6748079348698544, "grad_norm": 0.23714389448091572, "learning_rate": 0.00025263125830000873, "loss": 2.3412, "step": 5885 }, { "epoch": 0.6749226006191951, "grad_norm": 0.28015954831863465, "learning_rate": 0.00025246989828278237, "loss": 2.5086, "step": 5886 }, { "epoch": 0.6750372663685357, "grad_norm": 0.22883550320154789, "learning_rate": 0.0002523085724076154, "loss": 2.4369, "step": 5887 }, { "epoch": 0.6751519321178764, "grad_norm": 0.2537664035537909, "learning_rate": 0.00025214728069675906, "loss": 2.4068, "step": 5888 }, { "epoch": 0.6752665978672171, "grad_norm": 0.23571029398729612, "learning_rate": 0.00025198602317246123, "loss": 2.4127, "step": 5889 }, { "epoch": 0.6753812636165577, "grad_norm": 0.2811325464767358, "learning_rate": 0.0002518247998569637, "loss": 2.561, "step": 5890 }, { "epoch": 0.6754959293658984, "grad_norm": 0.22473587153980507, "learning_rate": 0.0002516636107725044, "loss": 2.5216, "step": 5891 }, { "epoch": 0.6756105951152391, "grad_norm": 0.24642430135370247, "learning_rate": 0.00025150245594131624, "loss": 2.4197, "step": 5892 }, { "epoch": 0.6757252608645797, "grad_norm": 0.250258597634242, "learning_rate": 0.00025134133538562756, "loss": 2.4669, "step": 5893 }, { "epoch": 0.6758399266139204, "grad_norm": 0.2635142679032383, "learning_rate": 0.00025118024912766174, "loss": 2.354, "step": 5894 }, { "epoch": 0.675954592363261, "grad_norm": 0.25277435164073214, "learning_rate": 0.0002510191971896376, "loss": 2.3408, "step": 5895 }, { "epoch": 0.6760692581126018, "grad_norm": 0.2578933627793284, "learning_rate": 0.00025085817959376945, "loss": 2.4225, "step": 5896 }, { "epoch": 0.6761839238619425, "grad_norm": 0.2675630706458878, "learning_rate": 0.00025069719636226616, "loss": 2.3976, "step": 5897 }, { "epoch": 0.6762985896112831, "grad_norm": 0.25339599527676654, "learning_rate": 0.0002505362475173325, "loss": 2.4903, "step": 5898 }, { "epoch": 0.6764132553606238, "grad_norm": 0.2387584991554649, "learning_rate": 0.0002503753330811682, "loss": 2.4778, "step": 5899 }, { "epoch": 0.6765279211099644, "grad_norm": 0.24591141481484705, "learning_rate": 0.00025021445307596847, "loss": 2.3851, "step": 5900 }, { "epoch": 0.6766425868593051, "grad_norm": 0.23155630142824957, "learning_rate": 0.0002500536075239236, "loss": 2.3125, "step": 5901 }, { "epoch": 0.6767572526086458, "grad_norm": 0.232707155713306, "learning_rate": 0.00024989279644721936, "loss": 2.4229, "step": 5902 }, { "epoch": 0.6768719183579864, "grad_norm": 0.26805417164901, "learning_rate": 0.000249732019868036, "loss": 2.3585, "step": 5903 }, { "epoch": 0.6769865841073271, "grad_norm": 0.24279181077830103, "learning_rate": 0.00024957127780855033, "loss": 2.347, "step": 5904 }, { "epoch": 0.6771012498566679, "grad_norm": 0.272616109737779, "learning_rate": 0.00024941057029093306, "loss": 2.5327, "step": 5905 }, { "epoch": 0.6772159156060085, "grad_norm": 0.2551979228619229, "learning_rate": 0.0002492498973373509, "loss": 2.4281, "step": 5906 }, { "epoch": 0.6773305813553492, "grad_norm": 0.24572448791223372, "learning_rate": 0.00024908925896996583, "loss": 2.4757, "step": 5907 }, { "epoch": 0.6774452471046898, "grad_norm": 0.26693233352896395, "learning_rate": 0.00024892865521093454, "loss": 2.3794, "step": 5908 }, { "epoch": 0.6775599128540305, "grad_norm": 0.2773657909438781, "learning_rate": 0.0002487680860824095, "loss": 2.3026, "step": 5909 }, { "epoch": 0.6776745786033712, "grad_norm": 0.2176424816177569, "learning_rate": 0.0002486075516065382, "loss": 2.4033, "step": 5910 }, { "epoch": 0.6777892443527118, "grad_norm": 0.24747854142718895, "learning_rate": 0.000248447051805463, "loss": 2.3994, "step": 5911 }, { "epoch": 0.6779039101020525, "grad_norm": 0.27009090810666164, "learning_rate": 0.000248286586701322, "loss": 2.4939, "step": 5912 }, { "epoch": 0.6780185758513931, "grad_norm": 0.26388707263814826, "learning_rate": 0.00024812615631624824, "loss": 2.4566, "step": 5913 }, { "epoch": 0.6781332416007338, "grad_norm": 0.24910976625115885, "learning_rate": 0.0002479657606723701, "loss": 2.4372, "step": 5914 }, { "epoch": 0.6782479073500746, "grad_norm": 0.2559728568280635, "learning_rate": 0.00024780539979181107, "loss": 2.4859, "step": 5915 }, { "epoch": 0.6783625730994152, "grad_norm": 0.2648773513855566, "learning_rate": 0.00024764507369669, "loss": 2.3807, "step": 5916 }, { "epoch": 0.6784772388487559, "grad_norm": 0.24838837270309236, "learning_rate": 0.00024748478240912053, "loss": 2.2707, "step": 5917 }, { "epoch": 0.6785919045980966, "grad_norm": 0.27010994079675804, "learning_rate": 0.0002473245259512118, "loss": 2.4327, "step": 5918 }, { "epoch": 0.6787065703474372, "grad_norm": 0.2436045844203098, "learning_rate": 0.0002471643043450686, "loss": 2.2979, "step": 5919 }, { "epoch": 0.6788212360967779, "grad_norm": 0.2617427159749266, "learning_rate": 0.00024700411761278995, "loss": 2.3195, "step": 5920 }, { "epoch": 0.6789359018461185, "grad_norm": 0.24559457889463165, "learning_rate": 0.0002468439657764708, "loss": 2.3256, "step": 5921 }, { "epoch": 0.6790505675954592, "grad_norm": 0.2719261131528808, "learning_rate": 0.0002466838488582011, "loss": 2.4911, "step": 5922 }, { "epoch": 0.6791652333448, "grad_norm": 0.27087362183702196, "learning_rate": 0.00024652376688006543, "loss": 2.4066, "step": 5923 }, { "epoch": 0.6792798990941405, "grad_norm": 0.27600723664610594, "learning_rate": 0.0002463637198641446, "loss": 2.3256, "step": 5924 }, { "epoch": 0.6793945648434813, "grad_norm": 0.2652221131227717, "learning_rate": 0.00024620370783251413, "loss": 2.3651, "step": 5925 }, { "epoch": 0.679509230592822, "grad_norm": 0.25463130838323844, "learning_rate": 0.00024604373080724414, "loss": 2.3179, "step": 5926 }, { "epoch": 0.6796238963421626, "grad_norm": 0.2382997507235637, "learning_rate": 0.00024588378881040064, "loss": 2.3115, "step": 5927 }, { "epoch": 0.6797385620915033, "grad_norm": 0.264290060030737, "learning_rate": 0.00024572388186404456, "loss": 2.3649, "step": 5928 }, { "epoch": 0.6798532278408439, "grad_norm": 0.26962157118235613, "learning_rate": 0.000245564009990232, "loss": 2.5403, "step": 5929 }, { "epoch": 0.6799678935901846, "grad_norm": 0.25068342091440526, "learning_rate": 0.00024540417321101434, "loss": 2.3642, "step": 5930 }, { "epoch": 0.6800825593395253, "grad_norm": 0.2786768365078446, "learning_rate": 0.0002452443715484381, "loss": 2.4606, "step": 5931 }, { "epoch": 0.6801972250888659, "grad_norm": 0.27548258901410927, "learning_rate": 0.00024508460502454453, "loss": 2.3439, "step": 5932 }, { "epoch": 0.6803118908382066, "grad_norm": 0.23455765887383576, "learning_rate": 0.00024492487366137056, "loss": 2.4906, "step": 5933 }, { "epoch": 0.6804265565875472, "grad_norm": 0.25440478626484697, "learning_rate": 0.0002447651774809481, "loss": 2.3705, "step": 5934 }, { "epoch": 0.680541222336888, "grad_norm": 0.25749006959661125, "learning_rate": 0.0002446055165053042, "loss": 2.3652, "step": 5935 }, { "epoch": 0.6806558880862287, "grad_norm": 0.2521559261381318, "learning_rate": 0.0002444458907564611, "loss": 2.3948, "step": 5936 }, { "epoch": 0.6807705538355693, "grad_norm": 0.27495783942904434, "learning_rate": 0.00024428630025643617, "loss": 2.2922, "step": 5937 }, { "epoch": 0.68088521958491, "grad_norm": 0.2837050212860146, "learning_rate": 0.00024412674502724142, "loss": 2.3345, "step": 5938 }, { "epoch": 0.6809998853342507, "grad_norm": 0.2395640420658184, "learning_rate": 0.00024396722509088497, "loss": 2.4224, "step": 5939 }, { "epoch": 0.6811145510835913, "grad_norm": 0.2268010809022975, "learning_rate": 0.0002438077404693696, "loss": 2.3514, "step": 5940 }, { "epoch": 0.681229216832932, "grad_norm": 0.2679908185901851, "learning_rate": 0.0002436482911846928, "loss": 2.4255, "step": 5941 }, { "epoch": 0.6813438825822726, "grad_norm": 0.26518135787583974, "learning_rate": 0.00024348887725884766, "loss": 2.4844, "step": 5942 }, { "epoch": 0.6814585483316133, "grad_norm": 0.26472901173473073, "learning_rate": 0.00024332949871382238, "loss": 2.547, "step": 5943 }, { "epoch": 0.6815732140809541, "grad_norm": 0.23726327026324298, "learning_rate": 0.00024317015557160022, "loss": 2.5146, "step": 5944 }, { "epoch": 0.6816878798302947, "grad_norm": 0.2764433058310707, "learning_rate": 0.00024301084785415938, "loss": 2.3204, "step": 5945 }, { "epoch": 0.6818025455796354, "grad_norm": 0.25050205161613665, "learning_rate": 0.00024285157558347372, "loss": 2.5113, "step": 5946 }, { "epoch": 0.681917211328976, "grad_norm": 0.27296474692877987, "learning_rate": 0.00024269233878151125, "loss": 2.4571, "step": 5947 }, { "epoch": 0.6820318770783167, "grad_norm": 0.25927578895452735, "learning_rate": 0.00024253313747023592, "loss": 2.3687, "step": 5948 }, { "epoch": 0.6821465428276574, "grad_norm": 0.22678160139598205, "learning_rate": 0.00024237397167160653, "loss": 2.5486, "step": 5949 }, { "epoch": 0.682261208576998, "grad_norm": 0.2659364625002715, "learning_rate": 0.00024221484140757692, "loss": 2.375, "step": 5950 }, { "epoch": 0.6823758743263387, "grad_norm": 0.2529910655021268, "learning_rate": 0.00024205574670009618, "loss": 2.4485, "step": 5951 }, { "epoch": 0.6824905400756794, "grad_norm": 0.2426440602915467, "learning_rate": 0.00024189668757110844, "loss": 2.4361, "step": 5952 }, { "epoch": 0.68260520582502, "grad_norm": 0.25004059774554244, "learning_rate": 0.00024173766404255237, "loss": 2.6313, "step": 5953 }, { "epoch": 0.6827198715743608, "grad_norm": 0.2357702364768859, "learning_rate": 0.00024157867613636304, "loss": 2.3629, "step": 5954 }, { "epoch": 0.6828345373237014, "grad_norm": 0.2531489651126013, "learning_rate": 0.00024141972387446914, "loss": 2.5759, "step": 5955 }, { "epoch": 0.6829492030730421, "grad_norm": 0.2484231619181877, "learning_rate": 0.0002412608072787954, "loss": 2.3616, "step": 5956 }, { "epoch": 0.6830638688223828, "grad_norm": 0.2948318936902473, "learning_rate": 0.00024110192637126122, "loss": 2.5161, "step": 5957 }, { "epoch": 0.6831785345717234, "grad_norm": 0.28413265794348663, "learning_rate": 0.00024094308117378127, "loss": 2.3751, "step": 5958 }, { "epoch": 0.6832932003210641, "grad_norm": 0.24185703249314555, "learning_rate": 0.00024078427170826523, "loss": 2.2914, "step": 5959 }, { "epoch": 0.6834078660704048, "grad_norm": 0.270538450484696, "learning_rate": 0.000240625497996618, "loss": 2.4241, "step": 5960 }, { "epoch": 0.6835225318197454, "grad_norm": 0.2584247070936721, "learning_rate": 0.00024046676006073898, "loss": 2.3998, "step": 5961 }, { "epoch": 0.6836371975690861, "grad_norm": 0.24547427202955674, "learning_rate": 0.00024030805792252325, "loss": 2.4195, "step": 5962 }, { "epoch": 0.6837518633184267, "grad_norm": 0.24529444483689977, "learning_rate": 0.00024014939160386074, "loss": 2.4509, "step": 5963 }, { "epoch": 0.6838665290677675, "grad_norm": 0.2520558691476565, "learning_rate": 0.00023999076112663647, "loss": 2.3378, "step": 5964 }, { "epoch": 0.6839811948171082, "grad_norm": 0.2535389506748618, "learning_rate": 0.00023983216651273043, "loss": 2.4644, "step": 5965 }, { "epoch": 0.6840958605664488, "grad_norm": 0.2451114482287227, "learning_rate": 0.00023967360778401803, "loss": 2.4831, "step": 5966 }, { "epoch": 0.6842105263157895, "grad_norm": 0.25715563112976164, "learning_rate": 0.00023951508496236897, "loss": 2.2813, "step": 5967 }, { "epoch": 0.6843251920651301, "grad_norm": 0.2439105770800984, "learning_rate": 0.00023935659806964844, "loss": 2.3361, "step": 5968 }, { "epoch": 0.6844398578144708, "grad_norm": 0.24473107374652628, "learning_rate": 0.00023919814712771725, "loss": 2.6024, "step": 5969 }, { "epoch": 0.6845545235638115, "grad_norm": 0.22765717686941522, "learning_rate": 0.0002390397321584301, "loss": 2.3261, "step": 5970 }, { "epoch": 0.6846691893131521, "grad_norm": 0.2649468817745486, "learning_rate": 0.00023888135318363764, "loss": 2.4759, "step": 5971 }, { "epoch": 0.6847838550624928, "grad_norm": 0.2424686437415284, "learning_rate": 0.00023872301022518527, "loss": 2.4309, "step": 5972 }, { "epoch": 0.6848985208118336, "grad_norm": 0.23988201752151206, "learning_rate": 0.00023856470330491287, "loss": 2.3969, "step": 5973 }, { "epoch": 0.6850131865611742, "grad_norm": 0.2733974010482567, "learning_rate": 0.00023840643244465643, "loss": 2.2964, "step": 5974 }, { "epoch": 0.6851278523105149, "grad_norm": 0.263365702608744, "learning_rate": 0.0002382481976662464, "loss": 2.4853, "step": 5975 }, { "epoch": 0.6852425180598555, "grad_norm": 0.24022707045594938, "learning_rate": 0.0002380899989915079, "loss": 2.3139, "step": 5976 }, { "epoch": 0.6853571838091962, "grad_norm": 0.25887098517347007, "learning_rate": 0.00023793183644226152, "loss": 2.4615, "step": 5977 }, { "epoch": 0.6854718495585369, "grad_norm": 0.26789157388600776, "learning_rate": 0.00023777371004032289, "loss": 2.4133, "step": 5978 }, { "epoch": 0.6855865153078775, "grad_norm": 0.23428414183566829, "learning_rate": 0.0002376156198075024, "loss": 2.3758, "step": 5979 }, { "epoch": 0.6857011810572182, "grad_norm": 0.24782346397380386, "learning_rate": 0.00023745756576560572, "loss": 2.4501, "step": 5980 }, { "epoch": 0.6858158468065588, "grad_norm": 0.25427364735193364, "learning_rate": 0.00023729954793643348, "loss": 2.4936, "step": 5981 }, { "epoch": 0.6859305125558995, "grad_norm": 0.2592222653160551, "learning_rate": 0.0002371415663417809, "loss": 2.4839, "step": 5982 }, { "epoch": 0.6860451783052403, "grad_norm": 0.25862299505307546, "learning_rate": 0.00023698362100343858, "loss": 2.4485, "step": 5983 }, { "epoch": 0.6861598440545809, "grad_norm": 0.25706240565145433, "learning_rate": 0.00023682571194319247, "loss": 2.354, "step": 5984 }, { "epoch": 0.6862745098039216, "grad_norm": 0.25473879671946764, "learning_rate": 0.0002366678391828227, "loss": 2.3497, "step": 5985 }, { "epoch": 0.6863891755532623, "grad_norm": 0.259146785436911, "learning_rate": 0.00023651000274410493, "loss": 2.4152, "step": 5986 }, { "epoch": 0.6865038413026029, "grad_norm": 0.26580596935281386, "learning_rate": 0.0002363522026488099, "loss": 2.397, "step": 5987 }, { "epoch": 0.6866185070519436, "grad_norm": 0.2637236203569202, "learning_rate": 0.00023619443891870258, "loss": 2.2371, "step": 5988 }, { "epoch": 0.6867331728012842, "grad_norm": 0.3099883403327138, "learning_rate": 0.00023603671157554397, "loss": 2.4009, "step": 5989 }, { "epoch": 0.6868478385506249, "grad_norm": 0.2717404622692867, "learning_rate": 0.00023587902064108962, "loss": 2.4706, "step": 5990 }, { "epoch": 0.6869625042999656, "grad_norm": 0.27227966807734244, "learning_rate": 0.00023572136613708955, "loss": 2.3522, "step": 5991 }, { "epoch": 0.6870771700493062, "grad_norm": 0.25207348136714425, "learning_rate": 0.0002355637480852894, "loss": 2.3323, "step": 5992 }, { "epoch": 0.687191835798647, "grad_norm": 0.2323301108521448, "learning_rate": 0.00023540616650742963, "loss": 2.2851, "step": 5993 }, { "epoch": 0.6873065015479877, "grad_norm": 0.2556784604113841, "learning_rate": 0.00023524862142524557, "loss": 2.338, "step": 5994 }, { "epoch": 0.6874211672973283, "grad_norm": 0.256515269018604, "learning_rate": 0.00023509111286046752, "loss": 2.5354, "step": 5995 }, { "epoch": 0.687535833046669, "grad_norm": 0.22087769681952038, "learning_rate": 0.00023493364083482105, "loss": 2.3333, "step": 5996 }, { "epoch": 0.6876504987960096, "grad_norm": 0.2551928427023443, "learning_rate": 0.00023477620537002604, "loss": 2.3491, "step": 5997 }, { "epoch": 0.6877651645453503, "grad_norm": 0.2426545704312682, "learning_rate": 0.00023461880648779782, "loss": 2.4617, "step": 5998 }, { "epoch": 0.687879830294691, "grad_norm": 0.28228693609092514, "learning_rate": 0.00023446144420984666, "loss": 2.4737, "step": 5999 }, { "epoch": 0.6879944960440316, "grad_norm": 0.2660732786067178, "learning_rate": 0.00023430411855787764, "loss": 2.5198, "step": 6000 }, { "epoch": 0.6881091617933723, "grad_norm": 0.2682803624536527, "learning_rate": 0.00023414682955359085, "loss": 2.285, "step": 6001 }, { "epoch": 0.6882238275427129, "grad_norm": 0.25371106817736977, "learning_rate": 0.00023398957721868147, "loss": 2.4452, "step": 6002 }, { "epoch": 0.6883384932920537, "grad_norm": 0.2684407354524099, "learning_rate": 0.0002338323615748389, "loss": 2.3738, "step": 6003 }, { "epoch": 0.6884531590413944, "grad_norm": 0.2718203646472465, "learning_rate": 0.0002336751826437488, "loss": 2.4163, "step": 6004 }, { "epoch": 0.688567824790735, "grad_norm": 0.25776345692107555, "learning_rate": 0.00023351804044709052, "loss": 2.3195, "step": 6005 }, { "epoch": 0.6886824905400757, "grad_norm": 0.28017188060966125, "learning_rate": 0.00023336093500653888, "loss": 2.5245, "step": 6006 }, { "epoch": 0.6887971562894164, "grad_norm": 0.2828575163434155, "learning_rate": 0.00023320386634376368, "loss": 2.3966, "step": 6007 }, { "epoch": 0.688911822038757, "grad_norm": 0.27808779909933234, "learning_rate": 0.00023304683448042957, "loss": 2.4189, "step": 6008 }, { "epoch": 0.6890264877880977, "grad_norm": 0.27887565906848655, "learning_rate": 0.00023288983943819597, "loss": 2.4518, "step": 6009 }, { "epoch": 0.6891411535374383, "grad_norm": 0.2932893899820623, "learning_rate": 0.00023273288123871767, "loss": 2.4071, "step": 6010 }, { "epoch": 0.689255819286779, "grad_norm": 0.23137782650760233, "learning_rate": 0.00023257595990364366, "loss": 2.3965, "step": 6011 }, { "epoch": 0.6893704850361198, "grad_norm": 0.26637552574109913, "learning_rate": 0.00023241907545461837, "loss": 2.4212, "step": 6012 }, { "epoch": 0.6894851507854604, "grad_norm": 0.2504518103977167, "learning_rate": 0.0002322622279132811, "loss": 2.3614, "step": 6013 }, { "epoch": 0.6895998165348011, "grad_norm": 0.24774369044488778, "learning_rate": 0.0002321054173012659, "loss": 2.4518, "step": 6014 }, { "epoch": 0.6897144822841417, "grad_norm": 0.25556935747182274, "learning_rate": 0.0002319486436402019, "loss": 2.3862, "step": 6015 }, { "epoch": 0.6898291480334824, "grad_norm": 0.2304107534739128, "learning_rate": 0.00023179190695171316, "loss": 2.3165, "step": 6016 }, { "epoch": 0.6899438137828231, "grad_norm": 0.25369502599549154, "learning_rate": 0.0002316352072574181, "loss": 2.544, "step": 6017 }, { "epoch": 0.6900584795321637, "grad_norm": 0.25800459517868296, "learning_rate": 0.00023147854457893058, "loss": 2.5028, "step": 6018 }, { "epoch": 0.6901731452815044, "grad_norm": 0.2599825953979226, "learning_rate": 0.0002313219189378597, "loss": 2.5127, "step": 6019 }, { "epoch": 0.6902878110308451, "grad_norm": 0.27956996653213906, "learning_rate": 0.00023116533035580844, "loss": 2.3869, "step": 6020 }, { "epoch": 0.6904024767801857, "grad_norm": 0.25144555438504407, "learning_rate": 0.00023100877885437544, "loss": 2.4285, "step": 6021 }, { "epoch": 0.6905171425295265, "grad_norm": 0.26793971796127836, "learning_rate": 0.00023085226445515412, "loss": 2.5434, "step": 6022 }, { "epoch": 0.690631808278867, "grad_norm": 0.2524798530443962, "learning_rate": 0.0002306957871797321, "loss": 2.3822, "step": 6023 }, { "epoch": 0.6907464740282078, "grad_norm": 0.25414619201184835, "learning_rate": 0.00023053934704969303, "loss": 2.3981, "step": 6024 }, { "epoch": 0.6908611397775485, "grad_norm": 0.2680085705854303, "learning_rate": 0.00023038294408661488, "loss": 2.3863, "step": 6025 }, { "epoch": 0.6909758055268891, "grad_norm": 0.2523206307534517, "learning_rate": 0.00023022657831207, "loss": 2.4861, "step": 6026 }, { "epoch": 0.6910904712762298, "grad_norm": 0.2211428267397594, "learning_rate": 0.00023007024974762625, "loss": 2.3405, "step": 6027 }, { "epoch": 0.6912051370255705, "grad_norm": 0.21711186694922344, "learning_rate": 0.0002299139584148463, "loss": 2.3254, "step": 6028 }, { "epoch": 0.6913198027749111, "grad_norm": 0.2687171898704878, "learning_rate": 0.00022975770433528742, "loss": 2.4673, "step": 6029 }, { "epoch": 0.6914344685242518, "grad_norm": 0.2526553915969993, "learning_rate": 0.00022960148753050198, "loss": 2.3097, "step": 6030 }, { "epoch": 0.6915491342735924, "grad_norm": 0.2400595220874449, "learning_rate": 0.00022944530802203723, "loss": 2.3928, "step": 6031 }, { "epoch": 0.6916638000229332, "grad_norm": 0.24070170839143495, "learning_rate": 0.00022928916583143483, "loss": 2.3958, "step": 6032 }, { "epoch": 0.6917784657722739, "grad_norm": 0.261669425278507, "learning_rate": 0.00022913306098023157, "loss": 2.3152, "step": 6033 }, { "epoch": 0.6918931315216145, "grad_norm": 0.2611638711887135, "learning_rate": 0.00022897699348995977, "loss": 2.5859, "step": 6034 }, { "epoch": 0.6920077972709552, "grad_norm": 0.2420817711503355, "learning_rate": 0.00022882096338214532, "loss": 2.4329, "step": 6035 }, { "epoch": 0.6921224630202958, "grad_norm": 0.2535777943157481, "learning_rate": 0.00022866497067830982, "loss": 2.5085, "step": 6036 }, { "epoch": 0.6922371287696365, "grad_norm": 0.2582391630889939, "learning_rate": 0.00022850901539996971, "loss": 2.4327, "step": 6037 }, { "epoch": 0.6923517945189772, "grad_norm": 0.242483854221785, "learning_rate": 0.00022835309756863548, "loss": 2.4958, "step": 6038 }, { "epoch": 0.6924664602683178, "grad_norm": 0.2586813358047851, "learning_rate": 0.00022819721720581355, "loss": 2.4243, "step": 6039 }, { "epoch": 0.6925811260176585, "grad_norm": 0.24531309423144862, "learning_rate": 0.00022804137433300465, "loss": 2.3632, "step": 6040 }, { "epoch": 0.6926957917669992, "grad_norm": 0.2499376248500328, "learning_rate": 0.00022788556897170397, "loss": 2.5065, "step": 6041 }, { "epoch": 0.6928104575163399, "grad_norm": 0.23896445264405852, "learning_rate": 0.00022772980114340208, "loss": 2.3797, "step": 6042 }, { "epoch": 0.6929251232656806, "grad_norm": 0.23312729924962428, "learning_rate": 0.0002275740708695842, "loss": 2.2866, "step": 6043 }, { "epoch": 0.6930397890150212, "grad_norm": 0.23391535979741276, "learning_rate": 0.00022741837817173032, "loss": 2.451, "step": 6044 }, { "epoch": 0.6931544547643619, "grad_norm": 0.2655517868447734, "learning_rate": 0.0002272627230713153, "loss": 2.5203, "step": 6045 }, { "epoch": 0.6932691205137026, "grad_norm": 0.25581189120813497, "learning_rate": 0.00022710710558980897, "loss": 2.4073, "step": 6046 }, { "epoch": 0.6933837862630432, "grad_norm": 0.25545915385073414, "learning_rate": 0.00022695152574867545, "loss": 2.3968, "step": 6047 }, { "epoch": 0.6934984520123839, "grad_norm": 0.2536850588934234, "learning_rate": 0.00022679598356937415, "loss": 2.4762, "step": 6048 }, { "epoch": 0.6936131177617245, "grad_norm": 0.23372803142015164, "learning_rate": 0.00022664047907335917, "loss": 2.3904, "step": 6049 }, { "epoch": 0.6937277835110652, "grad_norm": 0.23391094359989, "learning_rate": 0.00022648501228207942, "loss": 2.4163, "step": 6050 }, { "epoch": 0.693842449260406, "grad_norm": 0.2840064279502021, "learning_rate": 0.0002263295832169785, "loss": 2.5135, "step": 6051 }, { "epoch": 0.6939571150097466, "grad_norm": 0.23929194980645876, "learning_rate": 0.00022617419189949518, "loss": 2.4789, "step": 6052 }, { "epoch": 0.6940717807590873, "grad_norm": 0.27090188683192695, "learning_rate": 0.00022601883835106208, "loss": 2.2868, "step": 6053 }, { "epoch": 0.694186446508428, "grad_norm": 0.2624206903796548, "learning_rate": 0.0002258635225931081, "loss": 2.3578, "step": 6054 }, { "epoch": 0.6943011122577686, "grad_norm": 0.2123345810483625, "learning_rate": 0.0002257082446470554, "loss": 2.3363, "step": 6055 }, { "epoch": 0.6944157780071093, "grad_norm": 0.23344845397915376, "learning_rate": 0.00022555300453432192, "loss": 2.3605, "step": 6056 }, { "epoch": 0.6945304437564499, "grad_norm": 0.23884032436175504, "learning_rate": 0.00022539780227632007, "loss": 2.4283, "step": 6057 }, { "epoch": 0.6946451095057906, "grad_norm": 0.2542484066507928, "learning_rate": 0.00022524263789445694, "loss": 2.3803, "step": 6058 }, { "epoch": 0.6947597752551313, "grad_norm": 0.2413493138490213, "learning_rate": 0.00022508751141013463, "loss": 2.4603, "step": 6059 }, { "epoch": 0.6948744410044719, "grad_norm": 0.24658475187835932, "learning_rate": 0.00022493242284474992, "loss": 2.1539, "step": 6060 }, { "epoch": 0.6949891067538126, "grad_norm": 0.25866980424589175, "learning_rate": 0.00022477737221969407, "loss": 2.2465, "step": 6061 }, { "epoch": 0.6951037725031534, "grad_norm": 0.2909943831876313, "learning_rate": 0.00022462235955635347, "loss": 2.6218, "step": 6062 }, { "epoch": 0.695218438252494, "grad_norm": 0.2507038658433087, "learning_rate": 0.00022446738487610923, "loss": 2.4541, "step": 6063 }, { "epoch": 0.6953331040018347, "grad_norm": 0.2484650058685007, "learning_rate": 0.0002243124482003371, "loss": 2.5083, "step": 6064 }, { "epoch": 0.6954477697511753, "grad_norm": 0.26319575810254775, "learning_rate": 0.00022415754955040762, "loss": 2.4375, "step": 6065 }, { "epoch": 0.695562435500516, "grad_norm": 0.2548088509715983, "learning_rate": 0.0002240026889476864, "loss": 2.4561, "step": 6066 }, { "epoch": 0.6956771012498567, "grad_norm": 0.2392221857286319, "learning_rate": 0.00022384786641353299, "loss": 2.3351, "step": 6067 }, { "epoch": 0.6957917669991973, "grad_norm": 0.2526821430350191, "learning_rate": 0.00022369308196930237, "loss": 2.376, "step": 6068 }, { "epoch": 0.695906432748538, "grad_norm": 0.2723388980766227, "learning_rate": 0.00022353833563634452, "loss": 2.4998, "step": 6069 }, { "epoch": 0.6960210984978786, "grad_norm": 0.2631895448480991, "learning_rate": 0.00022338362743600326, "loss": 2.2407, "step": 6070 }, { "epoch": 0.6961357642472193, "grad_norm": 0.2637832750776056, "learning_rate": 0.0002232289573896178, "loss": 2.5123, "step": 6071 }, { "epoch": 0.6962504299965601, "grad_norm": 0.2720490386181197, "learning_rate": 0.00022307432551852213, "loss": 2.3557, "step": 6072 }, { "epoch": 0.6963650957459007, "grad_norm": 0.27838838904850055, "learning_rate": 0.00022291973184404419, "loss": 2.3983, "step": 6073 }, { "epoch": 0.6964797614952414, "grad_norm": 0.2512977583300789, "learning_rate": 0.00022276517638750783, "loss": 2.4426, "step": 6074 }, { "epoch": 0.6965944272445821, "grad_norm": 0.2820055149496258, "learning_rate": 0.00022261065917023094, "loss": 2.3921, "step": 6075 }, { "epoch": 0.6967090929939227, "grad_norm": 0.2539027499816261, "learning_rate": 0.00022245618021352593, "loss": 2.2748, "step": 6076 }, { "epoch": 0.6968237587432634, "grad_norm": 0.2643284120615805, "learning_rate": 0.00022230173953870043, "loss": 2.3921, "step": 6077 }, { "epoch": 0.696938424492604, "grad_norm": 0.2818843205409747, "learning_rate": 0.0002221473371670565, "loss": 2.6005, "step": 6078 }, { "epoch": 0.6970530902419447, "grad_norm": 0.24661314052114677, "learning_rate": 0.00022199297311989108, "loss": 2.4509, "step": 6079 }, { "epoch": 0.6971677559912854, "grad_norm": 0.2475283108277519, "learning_rate": 0.00022183864741849569, "loss": 2.4362, "step": 6080 }, { "epoch": 0.697282421740626, "grad_norm": 0.2576912035458726, "learning_rate": 0.00022168436008415687, "loss": 2.5478, "step": 6081 }, { "epoch": 0.6973970874899668, "grad_norm": 0.24904693317698942, "learning_rate": 0.00022153011113815519, "loss": 2.3927, "step": 6082 }, { "epoch": 0.6975117532393074, "grad_norm": 0.24650944303798017, "learning_rate": 0.00022137590060176637, "loss": 2.4531, "step": 6083 }, { "epoch": 0.6976264189886481, "grad_norm": 0.28935174808384223, "learning_rate": 0.00022122172849626143, "loss": 2.2884, "step": 6084 }, { "epoch": 0.6977410847379888, "grad_norm": 0.24574798396334227, "learning_rate": 0.00022106759484290483, "loss": 2.3338, "step": 6085 }, { "epoch": 0.6978557504873294, "grad_norm": 0.25787007344997465, "learning_rate": 0.0002209134996629566, "loss": 2.3664, "step": 6086 }, { "epoch": 0.6979704162366701, "grad_norm": 0.25545711015600603, "learning_rate": 0.00022075944297767137, "loss": 2.4057, "step": 6087 }, { "epoch": 0.6980850819860108, "grad_norm": 0.27204289605649984, "learning_rate": 0.00022060542480829782, "loss": 2.5412, "step": 6088 }, { "epoch": 0.6981997477353514, "grad_norm": 0.2348975156654947, "learning_rate": 0.00022045144517608033, "loss": 2.4248, "step": 6089 }, { "epoch": 0.6983144134846921, "grad_norm": 0.2534379627374989, "learning_rate": 0.00022029750410225752, "loss": 2.4142, "step": 6090 }, { "epoch": 0.6984290792340327, "grad_norm": 0.25886380375220064, "learning_rate": 0.00022014360160806218, "loss": 2.4203, "step": 6091 }, { "epoch": 0.6985437449833735, "grad_norm": 0.2529740471850219, "learning_rate": 0.00021998973771472248, "loss": 2.361, "step": 6092 }, { "epoch": 0.6986584107327142, "grad_norm": 0.27199155463472, "learning_rate": 0.00021983591244346097, "loss": 2.4755, "step": 6093 }, { "epoch": 0.6987730764820548, "grad_norm": 0.2783746824463265, "learning_rate": 0.00021968212581549495, "loss": 2.4007, "step": 6094 }, { "epoch": 0.6988877422313955, "grad_norm": 0.25293474977809655, "learning_rate": 0.00021952837785203634, "loss": 2.5032, "step": 6095 }, { "epoch": 0.6990024079807362, "grad_norm": 0.2529987731629939, "learning_rate": 0.00021937466857429184, "loss": 2.5169, "step": 6096 }, { "epoch": 0.6991170737300768, "grad_norm": 0.24316642182432266, "learning_rate": 0.00021922099800346246, "loss": 2.3067, "step": 6097 }, { "epoch": 0.6992317394794175, "grad_norm": 0.2542560577671439, "learning_rate": 0.00021906736616074428, "loss": 2.4223, "step": 6098 }, { "epoch": 0.6993464052287581, "grad_norm": 0.2544461721743063, "learning_rate": 0.0002189137730673279, "loss": 2.3984, "step": 6099 }, { "epoch": 0.6994610709780988, "grad_norm": 0.23192157333733684, "learning_rate": 0.0002187602187443985, "loss": 2.4086, "step": 6100 }, { "epoch": 0.6995757367274396, "grad_norm": 0.2549273280267901, "learning_rate": 0.00021860670321313604, "loss": 2.3486, "step": 6101 }, { "epoch": 0.6996904024767802, "grad_norm": 0.24624058290959622, "learning_rate": 0.00021845322649471517, "loss": 2.5079, "step": 6102 }, { "epoch": 0.6998050682261209, "grad_norm": 0.2608300303069196, "learning_rate": 0.00021829978861030459, "loss": 2.4843, "step": 6103 }, { "epoch": 0.6999197339754615, "grad_norm": 0.2475642543440845, "learning_rate": 0.0002181463895810688, "loss": 2.2747, "step": 6104 }, { "epoch": 0.7000343997248022, "grad_norm": 0.2464811384869547, "learning_rate": 0.00021799302942816578, "loss": 2.2378, "step": 6105 }, { "epoch": 0.7001490654741429, "grad_norm": 0.273222174520215, "learning_rate": 0.00021783970817274885, "loss": 2.3874, "step": 6106 }, { "epoch": 0.7002637312234835, "grad_norm": 0.2528763674954494, "learning_rate": 0.0002176864258359657, "loss": 2.2565, "step": 6107 }, { "epoch": 0.7003783969728242, "grad_norm": 0.2769712007325071, "learning_rate": 0.0002175331824389587, "loss": 2.4165, "step": 6108 }, { "epoch": 0.7004930627221649, "grad_norm": 0.25442949628112366, "learning_rate": 0.0002173799780028649, "loss": 2.3315, "step": 6109 }, { "epoch": 0.7006077284715055, "grad_norm": 0.2735310373855664, "learning_rate": 0.00021722681254881604, "loss": 2.514, "step": 6110 }, { "epoch": 0.7007223942208463, "grad_norm": 0.26606288204312917, "learning_rate": 0.00021707368609793815, "loss": 2.2473, "step": 6111 }, { "epoch": 0.7008370599701869, "grad_norm": 0.25193752688170873, "learning_rate": 0.0002169205986713521, "loss": 2.4051, "step": 6112 }, { "epoch": 0.7009517257195276, "grad_norm": 0.2312633852700128, "learning_rate": 0.00021676755029017354, "loss": 2.4278, "step": 6113 }, { "epoch": 0.7010663914688683, "grad_norm": 0.2468576856485859, "learning_rate": 0.00021661454097551247, "loss": 2.5959, "step": 6114 }, { "epoch": 0.7011810572182089, "grad_norm": 0.2302460985510695, "learning_rate": 0.00021646157074847372, "loss": 2.3457, "step": 6115 }, { "epoch": 0.7012957229675496, "grad_norm": 0.24866278183761428, "learning_rate": 0.0002163086396301567, "loss": 2.4546, "step": 6116 }, { "epoch": 0.7014103887168902, "grad_norm": 0.23669034397686303, "learning_rate": 0.00021615574764165496, "loss": 2.3993, "step": 6117 }, { "epoch": 0.7015250544662309, "grad_norm": 0.24069332240785304, "learning_rate": 0.00021600289480405715, "loss": 2.3245, "step": 6118 }, { "epoch": 0.7016397202155716, "grad_norm": 0.2787556975542305, "learning_rate": 0.0002158500811384469, "loss": 2.511, "step": 6119 }, { "epoch": 0.7017543859649122, "grad_norm": 0.24245597655539464, "learning_rate": 0.00021569730666590132, "loss": 2.4537, "step": 6120 }, { "epoch": 0.701869051714253, "grad_norm": 0.25192496185685076, "learning_rate": 0.000215544571407493, "loss": 2.4153, "step": 6121 }, { "epoch": 0.7019837174635937, "grad_norm": 0.249582614256594, "learning_rate": 0.00021539187538428906, "loss": 2.3925, "step": 6122 }, { "epoch": 0.7020983832129343, "grad_norm": 0.240094425234144, "learning_rate": 0.0002152392186173504, "loss": 2.3414, "step": 6123 }, { "epoch": 0.702213048962275, "grad_norm": 0.2505059477568348, "learning_rate": 0.0002150866011277337, "loss": 2.4865, "step": 6124 }, { "epoch": 0.7023277147116156, "grad_norm": 0.27627480796233034, "learning_rate": 0.0002149340229364896, "loss": 2.57, "step": 6125 }, { "epoch": 0.7024423804609563, "grad_norm": 0.24354523082493298, "learning_rate": 0.00021478148406466302, "loss": 2.4071, "step": 6126 }, { "epoch": 0.702557046210297, "grad_norm": 0.26369275833503175, "learning_rate": 0.00021462898453329394, "loss": 2.3926, "step": 6127 }, { "epoch": 0.7026717119596376, "grad_norm": 0.2724997102298509, "learning_rate": 0.00021447652436341674, "loss": 2.5047, "step": 6128 }, { "epoch": 0.7027863777089783, "grad_norm": 0.2538795470584879, "learning_rate": 0.00021432410357606046, "loss": 2.5197, "step": 6129 }, { "epoch": 0.702901043458319, "grad_norm": 0.23910144882538228, "learning_rate": 0.0002141717221922486, "loss": 2.4812, "step": 6130 }, { "epoch": 0.7030157092076597, "grad_norm": 0.23914988826423383, "learning_rate": 0.0002140193802329995, "loss": 2.4265, "step": 6131 }, { "epoch": 0.7031303749570004, "grad_norm": 0.2510558133713407, "learning_rate": 0.0002138670777193254, "loss": 2.3365, "step": 6132 }, { "epoch": 0.703245040706341, "grad_norm": 0.2906966616706873, "learning_rate": 0.00021371481467223358, "loss": 2.3397, "step": 6133 }, { "epoch": 0.7033597064556817, "grad_norm": 0.25808191233504235, "learning_rate": 0.0002135625911127263, "loss": 2.5402, "step": 6134 }, { "epoch": 0.7034743722050224, "grad_norm": 0.23651133898314605, "learning_rate": 0.00021341040706179942, "loss": 2.5378, "step": 6135 }, { "epoch": 0.703589037954363, "grad_norm": 0.24692749896526253, "learning_rate": 0.000213258262540444, "loss": 2.4893, "step": 6136 }, { "epoch": 0.7037037037037037, "grad_norm": 0.25971551793412295, "learning_rate": 0.0002131061575696457, "loss": 2.386, "step": 6137 }, { "epoch": 0.7038183694530443, "grad_norm": 0.2258367772341615, "learning_rate": 0.0002129540921703838, "loss": 2.4342, "step": 6138 }, { "epoch": 0.703933035202385, "grad_norm": 0.2380350894943928, "learning_rate": 0.00021280206636363358, "loss": 2.3896, "step": 6139 }, { "epoch": 0.7040477009517258, "grad_norm": 0.24301671728180796, "learning_rate": 0.0002126500801703639, "loss": 2.484, "step": 6140 }, { "epoch": 0.7041623667010664, "grad_norm": 0.2586781392192157, "learning_rate": 0.00021249813361153819, "loss": 2.4672, "step": 6141 }, { "epoch": 0.7042770324504071, "grad_norm": 0.2645662491101431, "learning_rate": 0.0002123462267081146, "loss": 2.3119, "step": 6142 }, { "epoch": 0.7043916981997478, "grad_norm": 0.2591620900363445, "learning_rate": 0.00021219435948104586, "loss": 2.3664, "step": 6143 }, { "epoch": 0.7045063639490884, "grad_norm": 0.26698700705465, "learning_rate": 0.00021204253195127916, "loss": 2.4214, "step": 6144 }, { "epoch": 0.7046210296984291, "grad_norm": 0.2575902621445413, "learning_rate": 0.00021189074413975622, "loss": 2.2619, "step": 6145 }, { "epoch": 0.7047356954477697, "grad_norm": 0.2294340362426549, "learning_rate": 0.00021173899606741342, "loss": 2.2688, "step": 6146 }, { "epoch": 0.7048503611971104, "grad_norm": 0.23319524984019208, "learning_rate": 0.0002115872877551812, "loss": 2.2491, "step": 6147 }, { "epoch": 0.7049650269464511, "grad_norm": 0.24685819642436638, "learning_rate": 0.00021143561922398497, "loss": 2.3206, "step": 6148 }, { "epoch": 0.7050796926957917, "grad_norm": 0.24236108209047455, "learning_rate": 0.00021128399049474456, "loss": 2.5078, "step": 6149 }, { "epoch": 0.7051943584451325, "grad_norm": 0.23443693361503415, "learning_rate": 0.0002111324015883742, "loss": 2.3633, "step": 6150 }, { "epoch": 0.705309024194473, "grad_norm": 0.2687937375866934, "learning_rate": 0.00021098085252578276, "loss": 2.3525, "step": 6151 }, { "epoch": 0.7054236899438138, "grad_norm": 0.2588047585068749, "learning_rate": 0.00021082934332787367, "loss": 2.3487, "step": 6152 }, { "epoch": 0.7055383556931545, "grad_norm": 0.23730068914264443, "learning_rate": 0.00021067787401554423, "loss": 2.4015, "step": 6153 }, { "epoch": 0.7056530214424951, "grad_norm": 0.2816192620347726, "learning_rate": 0.0002105264446096874, "loss": 2.4742, "step": 6154 }, { "epoch": 0.7057676871918358, "grad_norm": 0.28149055521360683, "learning_rate": 0.00021037505513118955, "loss": 2.4745, "step": 6155 }, { "epoch": 0.7058823529411765, "grad_norm": 0.2544194421912847, "learning_rate": 0.000210223705600932, "loss": 2.4349, "step": 6156 }, { "epoch": 0.7059970186905171, "grad_norm": 0.25789215153767386, "learning_rate": 0.00021007239603979073, "loss": 2.3918, "step": 6157 }, { "epoch": 0.7061116844398578, "grad_norm": 0.24298011585530466, "learning_rate": 0.00020992112646863581, "loss": 2.4304, "step": 6158 }, { "epoch": 0.7062263501891984, "grad_norm": 0.2277606776129831, "learning_rate": 0.00020976989690833214, "loss": 2.3179, "step": 6159 }, { "epoch": 0.7063410159385392, "grad_norm": 0.2763483069212914, "learning_rate": 0.0002096187073797391, "loss": 2.4563, "step": 6160 }, { "epoch": 0.7064556816878799, "grad_norm": 0.284254106250262, "learning_rate": 0.00020946755790371004, "loss": 2.4251, "step": 6161 }, { "epoch": 0.7065703474372205, "grad_norm": 0.2605713308098253, "learning_rate": 0.00020931644850109322, "loss": 2.3959, "step": 6162 }, { "epoch": 0.7066850131865612, "grad_norm": 0.24540620666098126, "learning_rate": 0.00020916537919273148, "loss": 2.3294, "step": 6163 }, { "epoch": 0.7067996789359019, "grad_norm": 0.29653632633471655, "learning_rate": 0.00020901434999946183, "loss": 2.4973, "step": 6164 }, { "epoch": 0.7069143446852425, "grad_norm": 0.2562281535222324, "learning_rate": 0.00020886336094211595, "loss": 2.2602, "step": 6165 }, { "epoch": 0.7070290104345832, "grad_norm": 0.2625016806204164, "learning_rate": 0.00020871241204151997, "loss": 2.3983, "step": 6166 }, { "epoch": 0.7071436761839238, "grad_norm": 0.26457796389038596, "learning_rate": 0.00020856150331849415, "loss": 2.5286, "step": 6167 }, { "epoch": 0.7072583419332645, "grad_norm": 0.24165276782952522, "learning_rate": 0.00020841063479385346, "loss": 2.3958, "step": 6168 }, { "epoch": 0.7073730076826052, "grad_norm": 0.24464755491755444, "learning_rate": 0.00020825980648840788, "loss": 2.389, "step": 6169 }, { "epoch": 0.7074876734319459, "grad_norm": 0.28052073661345367, "learning_rate": 0.00020810901842296072, "loss": 2.4862, "step": 6170 }, { "epoch": 0.7076023391812866, "grad_norm": 0.2706153663700323, "learning_rate": 0.00020795827061831045, "loss": 2.4206, "step": 6171 }, { "epoch": 0.7077170049306272, "grad_norm": 0.2405468956448794, "learning_rate": 0.00020780756309525024, "loss": 2.4842, "step": 6172 }, { "epoch": 0.7078316706799679, "grad_norm": 0.2707652751124375, "learning_rate": 0.00020765689587456655, "loss": 2.4222, "step": 6173 }, { "epoch": 0.7079463364293086, "grad_norm": 0.22660517580232212, "learning_rate": 0.00020750626897704172, "loss": 2.3964, "step": 6174 }, { "epoch": 0.7080610021786492, "grad_norm": 0.27332411494008946, "learning_rate": 0.0002073556824234518, "loss": 2.4166, "step": 6175 }, { "epoch": 0.7081756679279899, "grad_norm": 0.2471906328371607, "learning_rate": 0.00020720513623456705, "loss": 2.4128, "step": 6176 }, { "epoch": 0.7082903336773306, "grad_norm": 0.24240664026656653, "learning_rate": 0.0002070546304311525, "loss": 2.4684, "step": 6177 }, { "epoch": 0.7084049994266712, "grad_norm": 0.2605443583995794, "learning_rate": 0.00020690416503396774, "loss": 2.4241, "step": 6178 }, { "epoch": 0.708519665176012, "grad_norm": 0.22567155238789033, "learning_rate": 0.00020675374006376646, "loss": 2.4143, "step": 6179 }, { "epoch": 0.7086343309253526, "grad_norm": 0.24694175963077794, "learning_rate": 0.00020660335554129695, "loss": 2.4058, "step": 6180 }, { "epoch": 0.7087489966746933, "grad_norm": 0.23363024279092437, "learning_rate": 0.00020645301148730205, "loss": 2.4078, "step": 6181 }, { "epoch": 0.708863662424034, "grad_norm": 0.23511955165853576, "learning_rate": 0.00020630270792251854, "loss": 2.352, "step": 6182 }, { "epoch": 0.7089783281733746, "grad_norm": 0.2731031809677535, "learning_rate": 0.00020615244486767793, "loss": 2.3433, "step": 6183 }, { "epoch": 0.7090929939227153, "grad_norm": 0.2440709017597821, "learning_rate": 0.00020600222234350664, "loss": 2.3585, "step": 6184 }, { "epoch": 0.709207659672056, "grad_norm": 0.2609437795357541, "learning_rate": 0.00020585204037072447, "loss": 2.448, "step": 6185 }, { "epoch": 0.7093223254213966, "grad_norm": 0.25063624602538603, "learning_rate": 0.00020570189897004638, "loss": 2.3258, "step": 6186 }, { "epoch": 0.7094369911707373, "grad_norm": 0.26227554894561483, "learning_rate": 0.00020555179816218166, "loss": 2.4188, "step": 6187 }, { "epoch": 0.7095516569200779, "grad_norm": 0.24933635997827105, "learning_rate": 0.0002054017379678334, "loss": 2.4555, "step": 6188 }, { "epoch": 0.7096663226694186, "grad_norm": 0.282008584290258, "learning_rate": 0.00020525171840769996, "loss": 2.4421, "step": 6189 }, { "epoch": 0.7097809884187594, "grad_norm": 0.24081906831505961, "learning_rate": 0.00020510173950247373, "loss": 2.4536, "step": 6190 }, { "epoch": 0.7098956541681, "grad_norm": 0.2537676245957966, "learning_rate": 0.00020495180127284108, "loss": 2.4773, "step": 6191 }, { "epoch": 0.7100103199174407, "grad_norm": 0.25246782289630526, "learning_rate": 0.00020480190373948332, "loss": 2.4319, "step": 6192 }, { "epoch": 0.7101249856667813, "grad_norm": 0.25982914474746366, "learning_rate": 0.00020465204692307598, "loss": 2.3953, "step": 6193 }, { "epoch": 0.710239651416122, "grad_norm": 0.24891645510139418, "learning_rate": 0.00020450223084428888, "loss": 2.4694, "step": 6194 }, { "epoch": 0.7103543171654627, "grad_norm": 0.2794990509879442, "learning_rate": 0.00020435245552378635, "loss": 2.4143, "step": 6195 }, { "epoch": 0.7104689829148033, "grad_norm": 0.24623750048276213, "learning_rate": 0.00020420272098222724, "loss": 2.4088, "step": 6196 }, { "epoch": 0.710583648664144, "grad_norm": 0.24521645346691387, "learning_rate": 0.00020405302724026415, "loss": 2.5882, "step": 6197 }, { "epoch": 0.7106983144134847, "grad_norm": 0.24331008381446623, "learning_rate": 0.00020390337431854466, "loss": 2.3338, "step": 6198 }, { "epoch": 0.7108129801628253, "grad_norm": 0.2337402647747093, "learning_rate": 0.00020375376223771062, "loss": 2.3866, "step": 6199 }, { "epoch": 0.7109276459121661, "grad_norm": 0.2632061636402899, "learning_rate": 0.00020360419101839812, "loss": 2.3893, "step": 6200 }, { "epoch": 0.7110423116615067, "grad_norm": 0.2535343911901774, "learning_rate": 0.00020345466068123758, "loss": 2.4511, "step": 6201 }, { "epoch": 0.7111569774108474, "grad_norm": 0.24851256140778466, "learning_rate": 0.00020330517124685416, "loss": 2.458, "step": 6202 }, { "epoch": 0.7112716431601881, "grad_norm": 0.28028099997603184, "learning_rate": 0.00020315572273586646, "loss": 2.4785, "step": 6203 }, { "epoch": 0.7113863089095287, "grad_norm": 0.264445909212145, "learning_rate": 0.00020300631516888883, "loss": 2.4593, "step": 6204 }, { "epoch": 0.7115009746588694, "grad_norm": 0.25340463036099214, "learning_rate": 0.00020285694856652864, "loss": 2.5138, "step": 6205 }, { "epoch": 0.71161564040821, "grad_norm": 0.26817628504477736, "learning_rate": 0.00020270762294938833, "loss": 2.3434, "step": 6206 }, { "epoch": 0.7117303061575507, "grad_norm": 0.2518760529370294, "learning_rate": 0.00020255833833806452, "loss": 2.4225, "step": 6207 }, { "epoch": 0.7118449719068914, "grad_norm": 0.23481911222833723, "learning_rate": 0.00020240909475314816, "loss": 2.4746, "step": 6208 }, { "epoch": 0.711959637656232, "grad_norm": 0.26712626193944317, "learning_rate": 0.00020225989221522462, "loss": 2.4006, "step": 6209 }, { "epoch": 0.7120743034055728, "grad_norm": 0.29652177995680157, "learning_rate": 0.00020211073074487363, "loss": 2.4567, "step": 6210 }, { "epoch": 0.7121889691549135, "grad_norm": 0.2477749072997504, "learning_rate": 0.00020196161036266892, "loss": 2.5365, "step": 6211 }, { "epoch": 0.7123036349042541, "grad_norm": 0.24869835536299567, "learning_rate": 0.0002018125310891789, "loss": 2.3424, "step": 6212 }, { "epoch": 0.7124183006535948, "grad_norm": 0.2826178818608545, "learning_rate": 0.00020166349294496622, "loss": 2.5115, "step": 6213 }, { "epoch": 0.7125329664029354, "grad_norm": 0.24591263641471692, "learning_rate": 0.0002015144959505879, "loss": 2.5305, "step": 6214 }, { "epoch": 0.7126476321522761, "grad_norm": 0.27625355600767626, "learning_rate": 0.00020136554012659526, "loss": 2.3493, "step": 6215 }, { "epoch": 0.7127622979016168, "grad_norm": 0.2597629111832175, "learning_rate": 0.00020121662549353402, "loss": 2.4192, "step": 6216 }, { "epoch": 0.7128769636509574, "grad_norm": 0.24194281370495624, "learning_rate": 0.0002010677520719438, "loss": 2.4677, "step": 6217 }, { "epoch": 0.7129916294002981, "grad_norm": 0.2508260808037649, "learning_rate": 0.00020091891988235878, "loss": 2.4265, "step": 6218 }, { "epoch": 0.7131062951496389, "grad_norm": 0.2774659156109727, "learning_rate": 0.00020077012894530817, "loss": 2.436, "step": 6219 }, { "epoch": 0.7132209608989795, "grad_norm": 0.2717706387504273, "learning_rate": 0.00020062137928131418, "loss": 2.385, "step": 6220 }, { "epoch": 0.7133356266483202, "grad_norm": 0.24577502463146314, "learning_rate": 0.00020047267091089432, "loss": 2.4795, "step": 6221 }, { "epoch": 0.7134502923976608, "grad_norm": 0.2724709289924197, "learning_rate": 0.0002003240038545602, "loss": 2.4866, "step": 6222 }, { "epoch": 0.7135649581470015, "grad_norm": 0.23771636484757336, "learning_rate": 0.00020017537813281694, "loss": 2.295, "step": 6223 }, { "epoch": 0.7136796238963422, "grad_norm": 0.23673611553423884, "learning_rate": 0.00020002679376616533, "loss": 2.4352, "step": 6224 }, { "epoch": 0.7137942896456828, "grad_norm": 0.2450469193955017, "learning_rate": 0.0001998782507750997, "loss": 2.3508, "step": 6225 }, { "epoch": 0.7139089553950235, "grad_norm": 0.23537014443521556, "learning_rate": 0.00019972974918010833, "loss": 2.4463, "step": 6226 }, { "epoch": 0.7140236211443641, "grad_norm": 0.25208541887548586, "learning_rate": 0.00019958128900167437, "loss": 2.5648, "step": 6227 }, { "epoch": 0.7141382868937048, "grad_norm": 0.22977789869581988, "learning_rate": 0.00019943287026027518, "loss": 2.2609, "step": 6228 }, { "epoch": 0.7142529526430456, "grad_norm": 0.23599264727608193, "learning_rate": 0.0001992844929763821, "loss": 2.4706, "step": 6229 }, { "epoch": 0.7143676183923862, "grad_norm": 0.2498939136285986, "learning_rate": 0.00019913615717046112, "loss": 2.3906, "step": 6230 }, { "epoch": 0.7144822841417269, "grad_norm": 0.25055903426110343, "learning_rate": 0.00019898786286297242, "loss": 2.3875, "step": 6231 }, { "epoch": 0.7145969498910676, "grad_norm": 0.25410998216773506, "learning_rate": 0.00019883961007437006, "loss": 2.4278, "step": 6232 }, { "epoch": 0.7147116156404082, "grad_norm": 0.23318905116867952, "learning_rate": 0.0001986913988251027, "loss": 2.3679, "step": 6233 }, { "epoch": 0.7148262813897489, "grad_norm": 0.2736715339408412, "learning_rate": 0.00019854322913561372, "loss": 2.4014, "step": 6234 }, { "epoch": 0.7149409471390895, "grad_norm": 0.27218399871702403, "learning_rate": 0.00019839510102633985, "loss": 2.3683, "step": 6235 }, { "epoch": 0.7150556128884302, "grad_norm": 0.24468006633495115, "learning_rate": 0.00019824701451771266, "loss": 2.2595, "step": 6236 }, { "epoch": 0.7151702786377709, "grad_norm": 0.25197957782058705, "learning_rate": 0.00019809896963015805, "loss": 2.5135, "step": 6237 }, { "epoch": 0.7152849443871115, "grad_norm": 0.2906670527447589, "learning_rate": 0.00019795096638409548, "loss": 2.4263, "step": 6238 }, { "epoch": 0.7153996101364523, "grad_norm": 0.2389474328017037, "learning_rate": 0.0001978030047999397, "loss": 2.3709, "step": 6239 }, { "epoch": 0.7155142758857929, "grad_norm": 0.25059375558875024, "learning_rate": 0.00019765508489809913, "loss": 2.4834, "step": 6240 }, { "epoch": 0.7156289416351336, "grad_norm": 0.2485915415453671, "learning_rate": 0.00019750720669897627, "loss": 2.2594, "step": 6241 }, { "epoch": 0.7157436073844743, "grad_norm": 0.2782216358327332, "learning_rate": 0.00019735937022296817, "loss": 2.3428, "step": 6242 }, { "epoch": 0.7158582731338149, "grad_norm": 0.2443229624657192, "learning_rate": 0.00019721157549046613, "loss": 2.4073, "step": 6243 }, { "epoch": 0.7159729388831556, "grad_norm": 0.26841422563857065, "learning_rate": 0.00019706382252185556, "loss": 2.54, "step": 6244 }, { "epoch": 0.7160876046324963, "grad_norm": 0.2590504279642476, "learning_rate": 0.0001969161113375162, "loss": 2.3342, "step": 6245 }, { "epoch": 0.7162022703818369, "grad_norm": 0.2502875754748808, "learning_rate": 0.00019676844195782217, "loss": 2.4302, "step": 6246 }, { "epoch": 0.7163169361311776, "grad_norm": 0.2587318390869682, "learning_rate": 0.00019662081440314122, "loss": 2.3865, "step": 6247 }, { "epoch": 0.7164316018805182, "grad_norm": 0.2432238886924314, "learning_rate": 0.000196473228693836, "loss": 2.4883, "step": 6248 }, { "epoch": 0.716546267629859, "grad_norm": 0.23979503693380924, "learning_rate": 0.00019632568485026313, "loss": 2.4638, "step": 6249 }, { "epoch": 0.7166609333791997, "grad_norm": 0.25445703718254303, "learning_rate": 0.00019617818289277355, "loss": 2.4098, "step": 6250 }, { "epoch": 0.7167755991285403, "grad_norm": 0.25880728685465315, "learning_rate": 0.0001960307228417122, "loss": 2.5151, "step": 6251 }, { "epoch": 0.716890264877881, "grad_norm": 0.251278764904902, "learning_rate": 0.00019588330471741866, "loss": 2.257, "step": 6252 }, { "epoch": 0.7170049306272217, "grad_norm": 0.2510446734340795, "learning_rate": 0.00019573592854022588, "loss": 2.351, "step": 6253 }, { "epoch": 0.7171195963765623, "grad_norm": 0.2866824381754139, "learning_rate": 0.00019558859433046227, "loss": 2.4946, "step": 6254 }, { "epoch": 0.717234262125903, "grad_norm": 0.23490741112180144, "learning_rate": 0.00019544130210844934, "loss": 2.3133, "step": 6255 }, { "epoch": 0.7173489278752436, "grad_norm": 0.2391580626887255, "learning_rate": 0.00019529405189450333, "loss": 2.4463, "step": 6256 }, { "epoch": 0.7174635936245843, "grad_norm": 0.21821070999377562, "learning_rate": 0.00019514684370893466, "loss": 2.387, "step": 6257 }, { "epoch": 0.717578259373925, "grad_norm": 0.2510103018797439, "learning_rate": 0.0001949996775720479, "loss": 2.4026, "step": 6258 }, { "epoch": 0.7176929251232657, "grad_norm": 0.26956321145554135, "learning_rate": 0.00019485255350414171, "loss": 2.3856, "step": 6259 }, { "epoch": 0.7178075908726064, "grad_norm": 0.24787895222937587, "learning_rate": 0.00019470547152550938, "loss": 2.5316, "step": 6260 }, { "epoch": 0.717922256621947, "grad_norm": 0.26518199109472634, "learning_rate": 0.00019455843165643754, "loss": 2.2918, "step": 6261 }, { "epoch": 0.7180369223712877, "grad_norm": 0.25193365039969184, "learning_rate": 0.0001944114339172079, "loss": 2.4664, "step": 6262 }, { "epoch": 0.7181515881206284, "grad_norm": 0.24643153521798708, "learning_rate": 0.00019426447832809586, "loss": 2.4356, "step": 6263 }, { "epoch": 0.718266253869969, "grad_norm": 0.2744099837412709, "learning_rate": 0.00019411756490937115, "loss": 2.5434, "step": 6264 }, { "epoch": 0.7183809196193097, "grad_norm": 0.2452611451323808, "learning_rate": 0.00019397069368129777, "loss": 2.1427, "step": 6265 }, { "epoch": 0.7184955853686504, "grad_norm": 0.24903537160430875, "learning_rate": 0.00019382386466413394, "loss": 2.4363, "step": 6266 }, { "epoch": 0.718610251117991, "grad_norm": 0.2573727411348979, "learning_rate": 0.0001936770778781315, "loss": 2.3807, "step": 6267 }, { "epoch": 0.7187249168673318, "grad_norm": 0.25108334600727267, "learning_rate": 0.000193530333343537, "loss": 2.3154, "step": 6268 }, { "epoch": 0.7188395826166724, "grad_norm": 0.2593871158355924, "learning_rate": 0.00019338363108059142, "loss": 2.4615, "step": 6269 }, { "epoch": 0.7189542483660131, "grad_norm": 0.26274250915099306, "learning_rate": 0.0001932369711095292, "loss": 2.4868, "step": 6270 }, { "epoch": 0.7190689141153538, "grad_norm": 0.2419829542783949, "learning_rate": 0.00019309035345057942, "loss": 2.4262, "step": 6271 }, { "epoch": 0.7191835798646944, "grad_norm": 0.24149502549488625, "learning_rate": 0.00019294377812396524, "loss": 2.3419, "step": 6272 }, { "epoch": 0.7192982456140351, "grad_norm": 0.2383070200164491, "learning_rate": 0.00019279724514990356, "loss": 2.501, "step": 6273 }, { "epoch": 0.7194129113633757, "grad_norm": 0.265126603393842, "learning_rate": 0.0001926507545486062, "loss": 2.5041, "step": 6274 }, { "epoch": 0.7195275771127164, "grad_norm": 0.3003548243124077, "learning_rate": 0.00019250430634027883, "loss": 2.4454, "step": 6275 }, { "epoch": 0.7196422428620571, "grad_norm": 0.2316324848199238, "learning_rate": 0.00019235790054512086, "loss": 2.3529, "step": 6276 }, { "epoch": 0.7197569086113977, "grad_norm": 0.2472042958246401, "learning_rate": 0.0001922115371833263, "loss": 2.354, "step": 6277 }, { "epoch": 0.7198715743607385, "grad_norm": 0.29767880618529297, "learning_rate": 0.0001920652162750832, "loss": 2.3726, "step": 6278 }, { "epoch": 0.7199862401100792, "grad_norm": 0.2529227827924357, "learning_rate": 0.00019191893784057374, "loss": 2.4425, "step": 6279 }, { "epoch": 0.7201009058594198, "grad_norm": 0.26951910727094114, "learning_rate": 0.00019177270189997425, "loss": 2.3759, "step": 6280 }, { "epoch": 0.7202155716087605, "grad_norm": 0.29237078754309104, "learning_rate": 0.00019162650847345542, "loss": 2.4389, "step": 6281 }, { "epoch": 0.7203302373581011, "grad_norm": 0.295590979124636, "learning_rate": 0.00019148035758118138, "loss": 2.47, "step": 6282 }, { "epoch": 0.7204449031074418, "grad_norm": 0.27554983238441616, "learning_rate": 0.0001913342492433109, "loss": 2.3924, "step": 6283 }, { "epoch": 0.7205595688567825, "grad_norm": 0.27155205442555047, "learning_rate": 0.0001911881834799974, "loss": 2.4521, "step": 6284 }, { "epoch": 0.7206742346061231, "grad_norm": 0.2698033299533288, "learning_rate": 0.0001910421603113873, "loss": 2.419, "step": 6285 }, { "epoch": 0.7207889003554638, "grad_norm": 0.27553646680522675, "learning_rate": 0.00019089617975762198, "loss": 2.5869, "step": 6286 }, { "epoch": 0.7209035661048045, "grad_norm": 0.25486455796290763, "learning_rate": 0.00019075024183883672, "loss": 2.4118, "step": 6287 }, { "epoch": 0.7210182318541452, "grad_norm": 0.2685407220536627, "learning_rate": 0.00019060434657516045, "loss": 2.3728, "step": 6288 }, { "epoch": 0.7211328976034859, "grad_norm": 0.27426053160859754, "learning_rate": 0.0001904584939867171, "loss": 2.4482, "step": 6289 }, { "epoch": 0.7212475633528265, "grad_norm": 0.26708925021555474, "learning_rate": 0.0001903126840936243, "loss": 2.3926, "step": 6290 }, { "epoch": 0.7213622291021672, "grad_norm": 0.23446968936020765, "learning_rate": 0.00019016691691599335, "loss": 2.3789, "step": 6291 }, { "epoch": 0.7214768948515079, "grad_norm": 0.25998850130756773, "learning_rate": 0.00019002119247393028, "loss": 2.3509, "step": 6292 }, { "epoch": 0.7215915606008485, "grad_norm": 0.29050952876957775, "learning_rate": 0.000189875510787535, "loss": 2.4089, "step": 6293 }, { "epoch": 0.7217062263501892, "grad_norm": 0.2525993141090884, "learning_rate": 0.00018972987187690149, "loss": 2.3216, "step": 6294 }, { "epoch": 0.7218208920995298, "grad_norm": 0.25212862423752697, "learning_rate": 0.00018958427576211783, "loss": 2.4313, "step": 6295 }, { "epoch": 0.7219355578488705, "grad_norm": 0.29600205599174806, "learning_rate": 0.00018943872246326643, "loss": 2.4789, "step": 6296 }, { "epoch": 0.7220502235982112, "grad_norm": 0.27153656854742864, "learning_rate": 0.0001892932120004233, "loss": 2.4877, "step": 6297 }, { "epoch": 0.7221648893475519, "grad_norm": 0.27301803270981717, "learning_rate": 0.00018914774439365896, "loss": 2.4937, "step": 6298 }, { "epoch": 0.7222795550968926, "grad_norm": 0.27959387007577485, "learning_rate": 0.0001890023196630379, "loss": 2.3753, "step": 6299 }, { "epoch": 0.7223942208462333, "grad_norm": 0.24717647338007923, "learning_rate": 0.0001888569378286187, "loss": 2.3679, "step": 6300 }, { "epoch": 0.7225088865955739, "grad_norm": 0.25098079037604293, "learning_rate": 0.0001887115989104539, "loss": 2.5038, "step": 6301 }, { "epoch": 0.7226235523449146, "grad_norm": 0.27908070611136876, "learning_rate": 0.00018856630292859062, "loss": 2.5158, "step": 6302 }, { "epoch": 0.7227382180942552, "grad_norm": 0.2668373810654318, "learning_rate": 0.00018842104990306896, "loss": 2.4835, "step": 6303 }, { "epoch": 0.7228528838435959, "grad_norm": 0.2841194733746334, "learning_rate": 0.00018827583985392467, "loss": 2.4008, "step": 6304 }, { "epoch": 0.7229675495929366, "grad_norm": 0.265193063923442, "learning_rate": 0.000188130672801186, "loss": 2.5068, "step": 6305 }, { "epoch": 0.7230822153422772, "grad_norm": 0.2547161387389508, "learning_rate": 0.00018798554876487628, "loss": 2.3686, "step": 6306 }, { "epoch": 0.723196881091618, "grad_norm": 0.27225987870501694, "learning_rate": 0.00018784046776501261, "loss": 2.5141, "step": 6307 }, { "epoch": 0.7233115468409586, "grad_norm": 0.2264798497366811, "learning_rate": 0.00018769542982160608, "loss": 2.5625, "step": 6308 }, { "epoch": 0.7234262125902993, "grad_norm": 0.25793381300077495, "learning_rate": 0.00018755043495466195, "loss": 2.4379, "step": 6309 }, { "epoch": 0.72354087833964, "grad_norm": 0.24650215639070633, "learning_rate": 0.0001874054831841796, "loss": 2.4676, "step": 6310 }, { "epoch": 0.7236555440889806, "grad_norm": 0.2439435786702376, "learning_rate": 0.00018726057453015215, "loss": 2.4076, "step": 6311 }, { "epoch": 0.7237702098383213, "grad_norm": 0.2897404988533681, "learning_rate": 0.00018711570901256707, "loss": 2.5332, "step": 6312 }, { "epoch": 0.723884875587662, "grad_norm": 0.2414452327815872, "learning_rate": 0.0001869708866514058, "loss": 2.3204, "step": 6313 }, { "epoch": 0.7239995413370026, "grad_norm": 0.24871506945495983, "learning_rate": 0.00018682610746664386, "loss": 2.4646, "step": 6314 }, { "epoch": 0.7241142070863433, "grad_norm": 0.24184806579039836, "learning_rate": 0.00018668137147825076, "loss": 2.3158, "step": 6315 }, { "epoch": 0.7242288728356839, "grad_norm": 0.2757096431387559, "learning_rate": 0.00018653667870619024, "loss": 2.4439, "step": 6316 }, { "epoch": 0.7243435385850246, "grad_norm": 0.23700227749077915, "learning_rate": 0.00018639202917041954, "loss": 2.3628, "step": 6317 }, { "epoch": 0.7244582043343654, "grad_norm": 0.25116409805069695, "learning_rate": 0.0001862474228908903, "loss": 2.4153, "step": 6318 }, { "epoch": 0.724572870083706, "grad_norm": 0.2445765787977314, "learning_rate": 0.00018610285988754883, "loss": 2.5184, "step": 6319 }, { "epoch": 0.7246875358330467, "grad_norm": 0.2804681948833104, "learning_rate": 0.0001859583401803342, "loss": 2.3973, "step": 6320 }, { "epoch": 0.7248022015823874, "grad_norm": 0.257869270058341, "learning_rate": 0.00018581386378918035, "loss": 2.4286, "step": 6321 }, { "epoch": 0.724916867331728, "grad_norm": 0.2506811720828909, "learning_rate": 0.00018566943073401526, "loss": 2.4745, "step": 6322 }, { "epoch": 0.7250315330810687, "grad_norm": 0.2461873243266885, "learning_rate": 0.00018552504103476014, "loss": 2.3219, "step": 6323 }, { "epoch": 0.7251461988304093, "grad_norm": 0.28801032880318717, "learning_rate": 0.0001853806947113314, "loss": 2.3951, "step": 6324 }, { "epoch": 0.72526086457975, "grad_norm": 0.25259642828538714, "learning_rate": 0.00018523639178363876, "loss": 2.3336, "step": 6325 }, { "epoch": 0.7253755303290907, "grad_norm": 0.23776146399876796, "learning_rate": 0.00018509213227158578, "loss": 2.409, "step": 6326 }, { "epoch": 0.7254901960784313, "grad_norm": 0.2692431768410158, "learning_rate": 0.00018494791619507044, "loss": 2.3954, "step": 6327 }, { "epoch": 0.7256048618277721, "grad_norm": 0.23501032557711746, "learning_rate": 0.0001848037435739846, "loss": 2.466, "step": 6328 }, { "epoch": 0.7257195275771127, "grad_norm": 0.23494667264566466, "learning_rate": 0.00018465961442821422, "loss": 2.3036, "step": 6329 }, { "epoch": 0.7258341933264534, "grad_norm": 0.2525351815852477, "learning_rate": 0.00018451552877763906, "loss": 2.3913, "step": 6330 }, { "epoch": 0.7259488590757941, "grad_norm": 0.2411813687212816, "learning_rate": 0.00018437148664213315, "loss": 2.2822, "step": 6331 }, { "epoch": 0.7260635248251347, "grad_norm": 0.2596968333743399, "learning_rate": 0.00018422748804156414, "loss": 2.4222, "step": 6332 }, { "epoch": 0.7261781905744754, "grad_norm": 0.2603268655437906, "learning_rate": 0.00018408353299579377, "loss": 2.513, "step": 6333 }, { "epoch": 0.7262928563238161, "grad_norm": 0.263041376017191, "learning_rate": 0.00018393962152467847, "loss": 2.3774, "step": 6334 }, { "epoch": 0.7264075220731567, "grad_norm": 0.2448367694477783, "learning_rate": 0.0001837957536480675, "loss": 2.35, "step": 6335 }, { "epoch": 0.7265221878224974, "grad_norm": 0.24224252505582225, "learning_rate": 0.00018365192938580495, "loss": 2.3577, "step": 6336 }, { "epoch": 0.726636853571838, "grad_norm": 0.25752866169736355, "learning_rate": 0.00018350814875772874, "loss": 2.4191, "step": 6337 }, { "epoch": 0.7267515193211788, "grad_norm": 0.248726891606585, "learning_rate": 0.00018336441178367025, "loss": 2.4851, "step": 6338 }, { "epoch": 0.7268661850705195, "grad_norm": 0.26251566921382435, "learning_rate": 0.00018322071848345567, "loss": 2.4883, "step": 6339 }, { "epoch": 0.7269808508198601, "grad_norm": 0.24581560194528682, "learning_rate": 0.00018307706887690477, "loss": 2.1997, "step": 6340 }, { "epoch": 0.7270955165692008, "grad_norm": 0.27848330607461524, "learning_rate": 0.00018293346298383097, "loss": 2.5403, "step": 6341 }, { "epoch": 0.7272101823185414, "grad_norm": 0.2560841098437568, "learning_rate": 0.00018278990082404206, "loss": 2.4487, "step": 6342 }, { "epoch": 0.7273248480678821, "grad_norm": 0.25605803918238557, "learning_rate": 0.00018264638241733983, "loss": 2.4992, "step": 6343 }, { "epoch": 0.7274395138172228, "grad_norm": 0.25471666680789784, "learning_rate": 0.0001825029077835198, "loss": 2.4528, "step": 6344 }, { "epoch": 0.7275541795665634, "grad_norm": 0.2702709415218117, "learning_rate": 0.0001823594769423716, "loss": 2.3383, "step": 6345 }, { "epoch": 0.7276688453159041, "grad_norm": 0.26978626842915426, "learning_rate": 0.0001822160899136789, "loss": 2.4416, "step": 6346 }, { "epoch": 0.7277835110652449, "grad_norm": 0.28201002769361216, "learning_rate": 0.00018207274671721896, "loss": 2.4747, "step": 6347 }, { "epoch": 0.7278981768145855, "grad_norm": 0.25739206792869423, "learning_rate": 0.00018192944737276335, "loss": 2.3456, "step": 6348 }, { "epoch": 0.7280128425639262, "grad_norm": 0.24048059538881492, "learning_rate": 0.0001817861919000775, "loss": 2.5114, "step": 6349 }, { "epoch": 0.7281275083132668, "grad_norm": 0.24741594863916858, "learning_rate": 0.0001816429803189208, "loss": 2.3827, "step": 6350 }, { "epoch": 0.7282421740626075, "grad_norm": 0.22575820325453494, "learning_rate": 0.00018149981264904653, "loss": 2.462, "step": 6351 }, { "epoch": 0.7283568398119482, "grad_norm": 0.2599262102245639, "learning_rate": 0.00018135668891020208, "loss": 2.3489, "step": 6352 }, { "epoch": 0.7284715055612888, "grad_norm": 0.2529561409075528, "learning_rate": 0.00018121360912212814, "loss": 2.5248, "step": 6353 }, { "epoch": 0.7285861713106295, "grad_norm": 0.23192061333535038, "learning_rate": 0.00018107057330456056, "loss": 2.3605, "step": 6354 }, { "epoch": 0.7287008370599702, "grad_norm": 0.22693905714388501, "learning_rate": 0.00018092758147722794, "loss": 2.3762, "step": 6355 }, { "epoch": 0.7288155028093108, "grad_norm": 0.25769585498734093, "learning_rate": 0.00018078463365985332, "loss": 2.5354, "step": 6356 }, { "epoch": 0.7289301685586516, "grad_norm": 0.22427009533700526, "learning_rate": 0.00018064172987215365, "loss": 2.4918, "step": 6357 }, { "epoch": 0.7290448343079922, "grad_norm": 0.21141639241388008, "learning_rate": 0.00018049887013383986, "loss": 2.2762, "step": 6358 }, { "epoch": 0.7291595000573329, "grad_norm": 0.25417417583245894, "learning_rate": 0.00018035605446461671, "loss": 2.4098, "step": 6359 }, { "epoch": 0.7292741658066736, "grad_norm": 0.24630905597078248, "learning_rate": 0.00018021328288418304, "loss": 2.4107, "step": 6360 }, { "epoch": 0.7293888315560142, "grad_norm": 0.2562281266007565, "learning_rate": 0.0001800705554122311, "loss": 2.3002, "step": 6361 }, { "epoch": 0.7295034973053549, "grad_norm": 0.25053144427494145, "learning_rate": 0.00017992787206844767, "loss": 2.4666, "step": 6362 }, { "epoch": 0.7296181630546955, "grad_norm": 0.2707093903145125, "learning_rate": 0.00017978523287251314, "loss": 2.3509, "step": 6363 }, { "epoch": 0.7297328288040362, "grad_norm": 0.23546626627246298, "learning_rate": 0.0001796426378441019, "loss": 2.3344, "step": 6364 }, { "epoch": 0.7298474945533769, "grad_norm": 0.26456970685911196, "learning_rate": 0.00017950008700288222, "loss": 2.4178, "step": 6365 }, { "epoch": 0.7299621603027175, "grad_norm": 0.2700147073907764, "learning_rate": 0.0001793575803685164, "loss": 2.4863, "step": 6366 }, { "epoch": 0.7300768260520583, "grad_norm": 0.23060516066151862, "learning_rate": 0.0001792151179606602, "loss": 2.2841, "step": 6367 }, { "epoch": 0.730191491801399, "grad_norm": 0.27001929445015477, "learning_rate": 0.00017907269979896358, "loss": 2.4686, "step": 6368 }, { "epoch": 0.7303061575507396, "grad_norm": 0.2738402082052355, "learning_rate": 0.00017893032590307097, "loss": 2.4409, "step": 6369 }, { "epoch": 0.7304208233000803, "grad_norm": 0.2633449863703587, "learning_rate": 0.00017878799629261955, "loss": 2.4848, "step": 6370 }, { "epoch": 0.7305354890494209, "grad_norm": 0.2530087326873093, "learning_rate": 0.0001786457109872412, "loss": 2.3913, "step": 6371 }, { "epoch": 0.7306501547987616, "grad_norm": 0.23594224576003306, "learning_rate": 0.00017850347000656158, "loss": 2.3928, "step": 6372 }, { "epoch": 0.7307648205481023, "grad_norm": 0.2504171887142438, "learning_rate": 0.0001783612733701997, "loss": 2.2967, "step": 6373 }, { "epoch": 0.7308794862974429, "grad_norm": 0.28876013344220103, "learning_rate": 0.00017821912109776929, "loss": 2.4997, "step": 6374 }, { "epoch": 0.7309941520467836, "grad_norm": 0.2566375347483386, "learning_rate": 0.00017807701320887766, "loss": 2.3467, "step": 6375 }, { "epoch": 0.7311088177961242, "grad_norm": 0.2559084099750586, "learning_rate": 0.0001779349497231254, "loss": 2.3992, "step": 6376 }, { "epoch": 0.731223483545465, "grad_norm": 0.24455758879156603, "learning_rate": 0.00017779293066010776, "loss": 2.58, "step": 6377 }, { "epoch": 0.7313381492948057, "grad_norm": 0.32298626457033325, "learning_rate": 0.0001776509560394135, "loss": 2.4417, "step": 6378 }, { "epoch": 0.7314528150441463, "grad_norm": 0.2705516887674042, "learning_rate": 0.00017750902588062534, "loss": 2.2464, "step": 6379 }, { "epoch": 0.731567480793487, "grad_norm": 0.24950170430154092, "learning_rate": 0.0001773671402033198, "loss": 2.2936, "step": 6380 }, { "epoch": 0.7316821465428277, "grad_norm": 0.2616864632231311, "learning_rate": 0.00017722529902706747, "loss": 2.3761, "step": 6381 }, { "epoch": 0.7317968122921683, "grad_norm": 0.24255816239243266, "learning_rate": 0.00017708350237143235, "loss": 2.5065, "step": 6382 }, { "epoch": 0.731911478041509, "grad_norm": 0.26166732735161835, "learning_rate": 0.0001769417502559726, "loss": 2.4196, "step": 6383 }, { "epoch": 0.7320261437908496, "grad_norm": 0.27835620739194583, "learning_rate": 0.00017680004270024063, "loss": 2.4478, "step": 6384 }, { "epoch": 0.7321408095401903, "grad_norm": 0.2742563062833354, "learning_rate": 0.00017665837972378185, "loss": 2.4594, "step": 6385 }, { "epoch": 0.732255475289531, "grad_norm": 0.25214509190746215, "learning_rate": 0.0001765167613461362, "loss": 2.327, "step": 6386 }, { "epoch": 0.7323701410388717, "grad_norm": 0.23972928643385047, "learning_rate": 0.0001763751875868373, "loss": 2.5061, "step": 6387 }, { "epoch": 0.7324848067882124, "grad_norm": 0.2405970862554129, "learning_rate": 0.00017623365846541206, "loss": 2.4211, "step": 6388 }, { "epoch": 0.7325994725375531, "grad_norm": 0.26278832972145627, "learning_rate": 0.00017609217400138227, "loss": 2.4494, "step": 6389 }, { "epoch": 0.7327141382868937, "grad_norm": 0.29854968307617197, "learning_rate": 0.000175950734214263, "loss": 2.4794, "step": 6390 }, { "epoch": 0.7328288040362344, "grad_norm": 0.22982503979857025, "learning_rate": 0.00017580933912356283, "loss": 2.4716, "step": 6391 }, { "epoch": 0.732943469785575, "grad_norm": 0.2711480861666599, "learning_rate": 0.0001756679887487847, "loss": 2.4777, "step": 6392 }, { "epoch": 0.7330581355349157, "grad_norm": 0.2693472933179573, "learning_rate": 0.0001755266831094252, "loss": 2.4635, "step": 6393 }, { "epoch": 0.7331728012842564, "grad_norm": 0.28335585644181865, "learning_rate": 0.00017538542222497478, "loss": 2.4173, "step": 6394 }, { "epoch": 0.733287467033597, "grad_norm": 0.2567275570973392, "learning_rate": 0.0001752442061149176, "loss": 2.39, "step": 6395 }, { "epoch": 0.7334021327829378, "grad_norm": 0.25372657583174985, "learning_rate": 0.0001751030347987319, "loss": 2.453, "step": 6396 }, { "epoch": 0.7335167985322784, "grad_norm": 0.25051814611341433, "learning_rate": 0.00017496190829588938, "loss": 2.5713, "step": 6397 }, { "epoch": 0.7336314642816191, "grad_norm": 0.274209797280249, "learning_rate": 0.00017482082662585575, "loss": 2.3274, "step": 6398 }, { "epoch": 0.7337461300309598, "grad_norm": 0.23327299851833, "learning_rate": 0.00017467978980809063, "loss": 2.462, "step": 6399 }, { "epoch": 0.7338607957803004, "grad_norm": 0.2695485987745949, "learning_rate": 0.00017453879786204736, "loss": 2.4806, "step": 6400 }, { "epoch": 0.7339754615296411, "grad_norm": 0.25397866037272276, "learning_rate": 0.00017439785080717302, "loss": 2.4387, "step": 6401 }, { "epoch": 0.7340901272789818, "grad_norm": 0.23029430975258247, "learning_rate": 0.00017425694866290886, "loss": 2.3079, "step": 6402 }, { "epoch": 0.7342047930283224, "grad_norm": 0.2618202206264615, "learning_rate": 0.00017411609144868902, "loss": 2.2745, "step": 6403 }, { "epoch": 0.7343194587776631, "grad_norm": 0.257225924005901, "learning_rate": 0.0001739752791839428, "loss": 2.556, "step": 6404 }, { "epoch": 0.7344341245270037, "grad_norm": 0.27276206623449895, "learning_rate": 0.00017383451188809202, "loss": 2.4432, "step": 6405 }, { "epoch": 0.7345487902763445, "grad_norm": 0.2737758710908553, "learning_rate": 0.00017369378958055309, "loss": 2.3957, "step": 6406 }, { "epoch": 0.7346634560256852, "grad_norm": 0.2612370822339997, "learning_rate": 0.00017355311228073588, "loss": 2.3741, "step": 6407 }, { "epoch": 0.7347781217750258, "grad_norm": 0.27460841137430797, "learning_rate": 0.0001734124800080442, "loss": 2.4622, "step": 6408 }, { "epoch": 0.7348927875243665, "grad_norm": 0.23819597260755612, "learning_rate": 0.0001732718927818756, "loss": 2.3509, "step": 6409 }, { "epoch": 0.7350074532737071, "grad_norm": 0.26902026136348944, "learning_rate": 0.00017313135062162156, "loss": 2.332, "step": 6410 }, { "epoch": 0.7351221190230478, "grad_norm": 0.25570815986070616, "learning_rate": 0.00017299085354666684, "loss": 2.4685, "step": 6411 }, { "epoch": 0.7352367847723885, "grad_norm": 0.2483241272040528, "learning_rate": 0.00017285040157639055, "loss": 2.2687, "step": 6412 }, { "epoch": 0.7353514505217291, "grad_norm": 0.2503368519024546, "learning_rate": 0.00017270999473016524, "loss": 2.4077, "step": 6413 }, { "epoch": 0.7354661162710698, "grad_norm": 0.33417339436644733, "learning_rate": 0.0001725696330273575, "loss": 2.3328, "step": 6414 }, { "epoch": 0.7355807820204106, "grad_norm": 0.2684638318458924, "learning_rate": 0.00017242931648732746, "loss": 2.3738, "step": 6415 }, { "epoch": 0.7356954477697512, "grad_norm": 0.25751092889785415, "learning_rate": 0.00017228904512942933, "loss": 2.4867, "step": 6416 }, { "epoch": 0.7358101135190919, "grad_norm": 0.24769457030090525, "learning_rate": 0.00017214881897301053, "loss": 2.3744, "step": 6417 }, { "epoch": 0.7359247792684325, "grad_norm": 0.273370489174241, "learning_rate": 0.00017200863803741256, "loss": 2.3376, "step": 6418 }, { "epoch": 0.7360394450177732, "grad_norm": 0.24001829120908535, "learning_rate": 0.0001718685023419712, "loss": 2.3963, "step": 6419 }, { "epoch": 0.7361541107671139, "grad_norm": 0.2735408441436579, "learning_rate": 0.00017172841190601508, "loss": 2.3947, "step": 6420 }, { "epoch": 0.7362687765164545, "grad_norm": 0.25038010240051484, "learning_rate": 0.00017158836674886709, "loss": 2.512, "step": 6421 }, { "epoch": 0.7363834422657952, "grad_norm": 0.2511986466388504, "learning_rate": 0.00017144836688984393, "loss": 2.3736, "step": 6422 }, { "epoch": 0.7364981080151359, "grad_norm": 0.261641704377779, "learning_rate": 0.0001713084123482555, "loss": 2.3823, "step": 6423 }, { "epoch": 0.7366127737644765, "grad_norm": 0.24774891335312474, "learning_rate": 0.0001711685031434063, "loss": 2.3787, "step": 6424 }, { "epoch": 0.7367274395138173, "grad_norm": 0.2817974334501849, "learning_rate": 0.00017102863929459422, "loss": 2.3888, "step": 6425 }, { "epoch": 0.7368421052631579, "grad_norm": 0.25556045841548114, "learning_rate": 0.00017088882082111033, "loss": 2.3557, "step": 6426 }, { "epoch": 0.7369567710124986, "grad_norm": 0.25429917219156073, "learning_rate": 0.00017074904774224027, "loss": 2.3451, "step": 6427 }, { "epoch": 0.7370714367618393, "grad_norm": 0.24949524694374095, "learning_rate": 0.00017060932007726293, "loss": 2.4022, "step": 6428 }, { "epoch": 0.7371861025111799, "grad_norm": 0.26945785783098614, "learning_rate": 0.0001704696378454511, "loss": 2.5371, "step": 6429 }, { "epoch": 0.7373007682605206, "grad_norm": 0.24085239603047626, "learning_rate": 0.00017033000106607133, "loss": 2.4066, "step": 6430 }, { "epoch": 0.7374154340098612, "grad_norm": 0.26438975356507105, "learning_rate": 0.000170190409758384, "loss": 2.4455, "step": 6431 }, { "epoch": 0.7375300997592019, "grad_norm": 0.23933484319095555, "learning_rate": 0.00017005086394164277, "loss": 2.4235, "step": 6432 }, { "epoch": 0.7376447655085426, "grad_norm": 0.25455056990007663, "learning_rate": 0.00016991136363509523, "loss": 2.3903, "step": 6433 }, { "epoch": 0.7377594312578832, "grad_norm": 0.2879926758018694, "learning_rate": 0.00016977190885798338, "loss": 2.4464, "step": 6434 }, { "epoch": 0.737874097007224, "grad_norm": 0.2526712579522657, "learning_rate": 0.00016963249962954186, "loss": 2.2796, "step": 6435 }, { "epoch": 0.7379887627565647, "grad_norm": 0.2775822329112146, "learning_rate": 0.00016949313596899957, "loss": 2.5296, "step": 6436 }, { "epoch": 0.7381034285059053, "grad_norm": 0.2926359744267795, "learning_rate": 0.00016935381789557935, "loss": 2.379, "step": 6437 }, { "epoch": 0.738218094255246, "grad_norm": 0.25009791022147054, "learning_rate": 0.0001692145454284969, "loss": 2.4433, "step": 6438 }, { "epoch": 0.7383327600045866, "grad_norm": 0.27482010013691033, "learning_rate": 0.00016907531858696272, "loss": 2.4141, "step": 6439 }, { "epoch": 0.7384474257539273, "grad_norm": 0.24086837784438758, "learning_rate": 0.00016893613739018054, "loss": 2.3119, "step": 6440 }, { "epoch": 0.738562091503268, "grad_norm": 0.2698293221390053, "learning_rate": 0.00016879700185734736, "loss": 2.3283, "step": 6441 }, { "epoch": 0.7386767572526086, "grad_norm": 0.27000175315465824, "learning_rate": 0.00016865791200765452, "loss": 2.4391, "step": 6442 }, { "epoch": 0.7387914230019493, "grad_norm": 0.24856935527968704, "learning_rate": 0.0001685188678602867, "loss": 2.5043, "step": 6443 }, { "epoch": 0.7389060887512899, "grad_norm": 0.25099600324808935, "learning_rate": 0.0001683798694344225, "loss": 2.3761, "step": 6444 }, { "epoch": 0.7390207545006306, "grad_norm": 0.27073262329145825, "learning_rate": 0.00016824091674923404, "loss": 2.5456, "step": 6445 }, { "epoch": 0.7391354202499714, "grad_norm": 0.2700194485082787, "learning_rate": 0.00016810200982388736, "loss": 2.3781, "step": 6446 }, { "epoch": 0.739250085999312, "grad_norm": 0.24865415740635244, "learning_rate": 0.00016796314867754175, "loss": 2.3994, "step": 6447 }, { "epoch": 0.7393647517486527, "grad_norm": 0.2617456392522337, "learning_rate": 0.00016782433332935054, "loss": 2.4393, "step": 6448 }, { "epoch": 0.7394794174979934, "grad_norm": 0.2516537119649872, "learning_rate": 0.00016768556379846074, "loss": 2.2734, "step": 6449 }, { "epoch": 0.739594083247334, "grad_norm": 0.24740584302427626, "learning_rate": 0.00016754684010401288, "loss": 2.2605, "step": 6450 }, { "epoch": 0.7397087489966747, "grad_norm": 0.3000928272834957, "learning_rate": 0.00016740816226514134, "loss": 2.4193, "step": 6451 }, { "epoch": 0.7398234147460153, "grad_norm": 0.2689530172082053, "learning_rate": 0.0001672695303009742, "loss": 2.3269, "step": 6452 }, { "epoch": 0.739938080495356, "grad_norm": 0.25359276963384686, "learning_rate": 0.00016713094423063257, "loss": 2.445, "step": 6453 }, { "epoch": 0.7400527462446967, "grad_norm": 0.2655351113095584, "learning_rate": 0.00016699240407323245, "loss": 2.2968, "step": 6454 }, { "epoch": 0.7401674119940373, "grad_norm": 0.26143423498694873, "learning_rate": 0.00016685390984788235, "loss": 2.3892, "step": 6455 }, { "epoch": 0.7402820777433781, "grad_norm": 0.2685860924714883, "learning_rate": 0.00016671546157368507, "loss": 2.3429, "step": 6456 }, { "epoch": 0.7403967434927188, "grad_norm": 0.2695601604068389, "learning_rate": 0.00016657705926973682, "loss": 2.2832, "step": 6457 }, { "epoch": 0.7405114092420594, "grad_norm": 0.29279549829964374, "learning_rate": 0.00016643870295512763, "loss": 2.474, "step": 6458 }, { "epoch": 0.7406260749914001, "grad_norm": 0.25695420948760556, "learning_rate": 0.00016630039264894114, "loss": 2.5383, "step": 6459 }, { "epoch": 0.7407407407407407, "grad_norm": 0.2826840319743698, "learning_rate": 0.00016616212837025475, "loss": 2.57, "step": 6460 }, { "epoch": 0.7408554064900814, "grad_norm": 0.28331646164693103, "learning_rate": 0.00016602391013813907, "loss": 2.449, "step": 6461 }, { "epoch": 0.7409700722394221, "grad_norm": 0.2880777652658807, "learning_rate": 0.0001658857379716588, "loss": 2.4556, "step": 6462 }, { "epoch": 0.7410847379887627, "grad_norm": 0.2779212008973584, "learning_rate": 0.0001657476118898722, "loss": 2.3941, "step": 6463 }, { "epoch": 0.7411994037381034, "grad_norm": 0.24365589846876948, "learning_rate": 0.0001656095319118311, "loss": 2.4424, "step": 6464 }, { "epoch": 0.741314069487444, "grad_norm": 0.2562907513914674, "learning_rate": 0.00016547149805658102, "loss": 2.4218, "step": 6465 }, { "epoch": 0.7414287352367848, "grad_norm": 0.22003670505580827, "learning_rate": 0.00016533351034316124, "loss": 2.5837, "step": 6466 }, { "epoch": 0.7415434009861255, "grad_norm": 0.23707831648615418, "learning_rate": 0.00016519556879060422, "loss": 2.3795, "step": 6467 }, { "epoch": 0.7416580667354661, "grad_norm": 0.2265355270750885, "learning_rate": 0.00016505767341793638, "loss": 2.42, "step": 6468 }, { "epoch": 0.7417727324848068, "grad_norm": 0.25543895117182963, "learning_rate": 0.00016491982424417822, "loss": 2.4143, "step": 6469 }, { "epoch": 0.7418873982341475, "grad_norm": 0.24562601409371795, "learning_rate": 0.00016478202128834298, "loss": 2.5538, "step": 6470 }, { "epoch": 0.7420020639834881, "grad_norm": 0.24634630304277433, "learning_rate": 0.00016464426456943798, "loss": 2.4678, "step": 6471 }, { "epoch": 0.7421167297328288, "grad_norm": 0.2745901075912691, "learning_rate": 0.0001645065541064645, "loss": 2.3996, "step": 6472 }, { "epoch": 0.7422313954821694, "grad_norm": 0.24452415107738634, "learning_rate": 0.00016436888991841635, "loss": 2.4128, "step": 6473 }, { "epoch": 0.7423460612315101, "grad_norm": 0.26966349356425356, "learning_rate": 0.00016423127202428234, "loss": 2.3854, "step": 6474 }, { "epoch": 0.7424607269808509, "grad_norm": 0.23410776465117272, "learning_rate": 0.00016409370044304417, "loss": 2.2958, "step": 6475 }, { "epoch": 0.7425753927301915, "grad_norm": 0.23977639100899975, "learning_rate": 0.00016395617519367694, "loss": 2.3592, "step": 6476 }, { "epoch": 0.7426900584795322, "grad_norm": 0.24684302742019762, "learning_rate": 0.00016381869629514972, "loss": 2.3887, "step": 6477 }, { "epoch": 0.7428047242288728, "grad_norm": 0.2760276302081096, "learning_rate": 0.00016368126376642517, "loss": 2.3981, "step": 6478 }, { "epoch": 0.7429193899782135, "grad_norm": 0.24430085885689223, "learning_rate": 0.0001635438776264595, "loss": 2.4263, "step": 6479 }, { "epoch": 0.7430340557275542, "grad_norm": 0.24341053562650392, "learning_rate": 0.00016340653789420246, "loss": 2.4263, "step": 6480 }, { "epoch": 0.7431487214768948, "grad_norm": 0.2954612588048405, "learning_rate": 0.0001632692445885976, "loss": 2.5716, "step": 6481 }, { "epoch": 0.7432633872262355, "grad_norm": 0.2795041690457652, "learning_rate": 0.00016313199772858162, "loss": 2.421, "step": 6482 }, { "epoch": 0.7433780529755762, "grad_norm": 0.24879527765820658, "learning_rate": 0.0001629947973330851, "loss": 2.2663, "step": 6483 }, { "epoch": 0.7434927187249168, "grad_norm": 0.24786038043681655, "learning_rate": 0.0001628576434210327, "loss": 2.3732, "step": 6484 }, { "epoch": 0.7436073844742576, "grad_norm": 0.2578956512610779, "learning_rate": 0.0001627205360113417, "loss": 2.4985, "step": 6485 }, { "epoch": 0.7437220502235982, "grad_norm": 0.2547879296125543, "learning_rate": 0.00016258347512292364, "loss": 2.4464, "step": 6486 }, { "epoch": 0.7438367159729389, "grad_norm": 0.2426821857356462, "learning_rate": 0.0001624464607746836, "loss": 2.4569, "step": 6487 }, { "epoch": 0.7439513817222796, "grad_norm": 0.2293939025740168, "learning_rate": 0.00016230949298551967, "loss": 2.3713, "step": 6488 }, { "epoch": 0.7440660474716202, "grad_norm": 0.2597311712424021, "learning_rate": 0.00016217257177432432, "loss": 2.3969, "step": 6489 }, { "epoch": 0.7441807132209609, "grad_norm": 0.22775542067646762, "learning_rate": 0.0001620356971599833, "loss": 2.4824, "step": 6490 }, { "epoch": 0.7442953789703016, "grad_norm": 0.24618118113992368, "learning_rate": 0.00016189886916137547, "loss": 2.4064, "step": 6491 }, { "epoch": 0.7444100447196422, "grad_norm": 0.26715289128833575, "learning_rate": 0.0001617620877973739, "loss": 2.3661, "step": 6492 }, { "epoch": 0.7445247104689829, "grad_norm": 0.24838679607508837, "learning_rate": 0.0001616253530868449, "loss": 2.4047, "step": 6493 }, { "epoch": 0.7446393762183235, "grad_norm": 0.2665402729934092, "learning_rate": 0.0001614886650486484, "loss": 2.3521, "step": 6494 }, { "epoch": 0.7447540419676643, "grad_norm": 0.21901497163164738, "learning_rate": 0.00016135202370163798, "loss": 2.2581, "step": 6495 }, { "epoch": 0.744868707717005, "grad_norm": 0.27963002865866904, "learning_rate": 0.00016121542906466087, "loss": 2.4586, "step": 6496 }, { "epoch": 0.7449833734663456, "grad_norm": 0.2272167510121665, "learning_rate": 0.00016107888115655727, "loss": 2.4346, "step": 6497 }, { "epoch": 0.7450980392156863, "grad_norm": 0.23221969349128596, "learning_rate": 0.00016094237999616162, "loss": 2.3672, "step": 6498 }, { "epoch": 0.7452127049650269, "grad_norm": 0.2572882361327514, "learning_rate": 0.00016080592560230162, "loss": 2.5071, "step": 6499 }, { "epoch": 0.7453273707143676, "grad_norm": 0.23563660975172557, "learning_rate": 0.00016066951799379852, "loss": 2.364, "step": 6500 }, { "epoch": 0.7454420364637083, "grad_norm": 0.257424602609495, "learning_rate": 0.00016053315718946726, "loss": 2.5173, "step": 6501 }, { "epoch": 0.7455567022130489, "grad_norm": 0.25988742865269904, "learning_rate": 0.00016039684320811627, "loss": 2.3357, "step": 6502 }, { "epoch": 0.7456713679623896, "grad_norm": 0.259338911434718, "learning_rate": 0.000160260576068547, "loss": 2.3429, "step": 6503 }, { "epoch": 0.7457860337117304, "grad_norm": 0.26662028476921323, "learning_rate": 0.00016012435578955552, "loss": 2.3863, "step": 6504 }, { "epoch": 0.745900699461071, "grad_norm": 0.257441712900983, "learning_rate": 0.00015998818238993046, "loss": 2.4351, "step": 6505 }, { "epoch": 0.7460153652104117, "grad_norm": 0.2397458781662433, "learning_rate": 0.00015985205588845436, "loss": 2.5672, "step": 6506 }, { "epoch": 0.7461300309597523, "grad_norm": 0.2554299064534445, "learning_rate": 0.0001597159763039034, "loss": 2.3912, "step": 6507 }, { "epoch": 0.746244696709093, "grad_norm": 0.23833238551514632, "learning_rate": 0.00015957994365504709, "loss": 2.3998, "step": 6508 }, { "epoch": 0.7463593624584337, "grad_norm": 0.25926464849641523, "learning_rate": 0.00015944395796064847, "loss": 2.354, "step": 6509 }, { "epoch": 0.7464740282077743, "grad_norm": 0.2588610537965368, "learning_rate": 0.00015930801923946453, "loss": 2.3658, "step": 6510 }, { "epoch": 0.746588693957115, "grad_norm": 0.2478396020699848, "learning_rate": 0.00015917212751024497, "loss": 2.3444, "step": 6511 }, { "epoch": 0.7467033597064556, "grad_norm": 0.24263915918450826, "learning_rate": 0.0001590362827917336, "loss": 2.43, "step": 6512 }, { "epoch": 0.7468180254557963, "grad_norm": 0.25354821661470545, "learning_rate": 0.00015890048510266768, "loss": 2.2718, "step": 6513 }, { "epoch": 0.7469326912051371, "grad_norm": 0.2867770908491982, "learning_rate": 0.00015876473446177792, "loss": 2.419, "step": 6514 }, { "epoch": 0.7470473569544777, "grad_norm": 0.25085489041591963, "learning_rate": 0.0001586290308877885, "loss": 2.4816, "step": 6515 }, { "epoch": 0.7471620227038184, "grad_norm": 0.24658458569034983, "learning_rate": 0.00015849337439941731, "loss": 2.3646, "step": 6516 }, { "epoch": 0.7472766884531591, "grad_norm": 0.2562785709451517, "learning_rate": 0.00015835776501537524, "loss": 2.4274, "step": 6517 }, { "epoch": 0.7473913542024997, "grad_norm": 0.25372733288916743, "learning_rate": 0.000158222202754367, "loss": 2.4289, "step": 6518 }, { "epoch": 0.7475060199518404, "grad_norm": 0.24170121945546652, "learning_rate": 0.00015808668763509138, "loss": 2.3094, "step": 6519 }, { "epoch": 0.747620685701181, "grad_norm": 0.23866326861704354, "learning_rate": 0.00015795121967623955, "loss": 2.4337, "step": 6520 }, { "epoch": 0.7477353514505217, "grad_norm": 0.3157771506251674, "learning_rate": 0.00015781579889649695, "loss": 2.2822, "step": 6521 }, { "epoch": 0.7478500171998624, "grad_norm": 0.275701823082198, "learning_rate": 0.0001576804253145424, "loss": 2.4905, "step": 6522 }, { "epoch": 0.747964682949203, "grad_norm": 0.26507048253077253, "learning_rate": 0.00015754509894904767, "loss": 2.4144, "step": 6523 }, { "epoch": 0.7480793486985438, "grad_norm": 0.2650881384820673, "learning_rate": 0.0001574098198186789, "loss": 2.4104, "step": 6524 }, { "epoch": 0.7481940144478845, "grad_norm": 0.23345830152427435, "learning_rate": 0.00015727458794209532, "loss": 2.3961, "step": 6525 }, { "epoch": 0.7483086801972251, "grad_norm": 0.2616393227989673, "learning_rate": 0.00015713940333794924, "loss": 2.3332, "step": 6526 }, { "epoch": 0.7484233459465658, "grad_norm": 0.2636261029965682, "learning_rate": 0.00015700426602488694, "loss": 2.3359, "step": 6527 }, { "epoch": 0.7485380116959064, "grad_norm": 0.2590906361630558, "learning_rate": 0.000156869176021548, "loss": 2.3446, "step": 6528 }, { "epoch": 0.7486526774452471, "grad_norm": 0.24853030451381355, "learning_rate": 0.00015673413334656562, "loss": 2.2838, "step": 6529 }, { "epoch": 0.7487673431945878, "grad_norm": 0.2674819230945004, "learning_rate": 0.00015659913801856624, "loss": 2.3066, "step": 6530 }, { "epoch": 0.7488820089439284, "grad_norm": 0.251569792005686, "learning_rate": 0.00015646419005617014, "loss": 2.3501, "step": 6531 }, { "epoch": 0.7489966746932691, "grad_norm": 0.257095050545602, "learning_rate": 0.00015632928947799046, "loss": 2.4589, "step": 6532 }, { "epoch": 0.7491113404426097, "grad_norm": 0.2718355013828471, "learning_rate": 0.00015619443630263426, "loss": 2.4579, "step": 6533 }, { "epoch": 0.7492260061919505, "grad_norm": 0.27022586286513006, "learning_rate": 0.00015605963054870226, "loss": 2.4169, "step": 6534 }, { "epoch": 0.7493406719412912, "grad_norm": 0.2730818068698634, "learning_rate": 0.00015592487223478802, "loss": 2.2675, "step": 6535 }, { "epoch": 0.7494553376906318, "grad_norm": 0.28820148512239824, "learning_rate": 0.00015579016137947898, "loss": 2.3329, "step": 6536 }, { "epoch": 0.7495700034399725, "grad_norm": 0.2648563268377763, "learning_rate": 0.0001556554980013561, "loss": 2.3603, "step": 6537 }, { "epoch": 0.7496846691893132, "grad_norm": 0.286807142340206, "learning_rate": 0.00015552088211899311, "loss": 2.4557, "step": 6538 }, { "epoch": 0.7497993349386538, "grad_norm": 0.27078543998863563, "learning_rate": 0.00015538631375095824, "loss": 2.3435, "step": 6539 }, { "epoch": 0.7499140006879945, "grad_norm": 0.2539300677677799, "learning_rate": 0.00015525179291581265, "loss": 2.446, "step": 6540 }, { "epoch": 0.7500286664373351, "grad_norm": 0.2680512435727763, "learning_rate": 0.00015511731963211057, "loss": 2.3388, "step": 6541 }, { "epoch": 0.7501433321866758, "grad_norm": 0.23276964650397447, "learning_rate": 0.00015498289391840015, "loss": 2.3831, "step": 6542 }, { "epoch": 0.7502579979360166, "grad_norm": 0.25655775790486474, "learning_rate": 0.0001548485157932229, "loss": 2.3926, "step": 6543 }, { "epoch": 0.7503726636853572, "grad_norm": 0.28221696635866594, "learning_rate": 0.00015471418527511366, "loss": 2.4787, "step": 6544 }, { "epoch": 0.7504873294346979, "grad_norm": 0.23006601439987143, "learning_rate": 0.0001545799023826009, "loss": 2.3436, "step": 6545 }, { "epoch": 0.7506019951840385, "grad_norm": 0.23829401301324962, "learning_rate": 0.00015444566713420643, "loss": 2.3431, "step": 6546 }, { "epoch": 0.7507166609333792, "grad_norm": 0.2522764831833254, "learning_rate": 0.00015431147954844516, "loss": 2.35, "step": 6547 }, { "epoch": 0.7508313266827199, "grad_norm": 0.25266432541030887, "learning_rate": 0.00015417733964382587, "loss": 2.441, "step": 6548 }, { "epoch": 0.7509459924320605, "grad_norm": 0.24496874776163988, "learning_rate": 0.00015404324743885055, "loss": 2.4877, "step": 6549 }, { "epoch": 0.7510606581814012, "grad_norm": 0.25821055523971004, "learning_rate": 0.0001539092029520147, "loss": 2.4441, "step": 6550 }, { "epoch": 0.7511753239307419, "grad_norm": 0.2446546467967584, "learning_rate": 0.0001537752062018073, "loss": 2.2808, "step": 6551 }, { "epoch": 0.7512899896800825, "grad_norm": 0.23416276014559373, "learning_rate": 0.00015364125720671062, "loss": 2.331, "step": 6552 }, { "epoch": 0.7514046554294233, "grad_norm": 0.22276805268508754, "learning_rate": 0.00015350735598520005, "loss": 2.2845, "step": 6553 }, { "epoch": 0.7515193211787639, "grad_norm": 0.23970777248771488, "learning_rate": 0.00015337350255574528, "loss": 2.4154, "step": 6554 }, { "epoch": 0.7516339869281046, "grad_norm": 0.24398053535117697, "learning_rate": 0.0001532396969368084, "loss": 2.4158, "step": 6555 }, { "epoch": 0.7517486526774453, "grad_norm": 0.24255876198168227, "learning_rate": 0.00015310593914684545, "loss": 2.4301, "step": 6556 }, { "epoch": 0.7518633184267859, "grad_norm": 0.23422146158572033, "learning_rate": 0.00015297222920430576, "loss": 2.2782, "step": 6557 }, { "epoch": 0.7519779841761266, "grad_norm": 0.2824356410531014, "learning_rate": 0.00015283856712763216, "loss": 2.465, "step": 6558 }, { "epoch": 0.7520926499254673, "grad_norm": 0.2509184042150388, "learning_rate": 0.0001527049529352607, "loss": 2.5023, "step": 6559 }, { "epoch": 0.7522073156748079, "grad_norm": 0.2541861593116082, "learning_rate": 0.0001525713866456211, "loss": 2.5068, "step": 6560 }, { "epoch": 0.7523219814241486, "grad_norm": 0.2600394679701279, "learning_rate": 0.0001524378682771359, "loss": 2.4542, "step": 6561 }, { "epoch": 0.7524366471734892, "grad_norm": 0.26762343055476856, "learning_rate": 0.0001523043978482217, "loss": 2.5312, "step": 6562 }, { "epoch": 0.75255131292283, "grad_norm": 0.2631801285456526, "learning_rate": 0.00015217097537728813, "loss": 2.3052, "step": 6563 }, { "epoch": 0.7526659786721707, "grad_norm": 0.2522708164393584, "learning_rate": 0.00015203760088273828, "loss": 2.3605, "step": 6564 }, { "epoch": 0.7527806444215113, "grad_norm": 0.26360983448076153, "learning_rate": 0.00015190427438296862, "loss": 2.349, "step": 6565 }, { "epoch": 0.752895310170852, "grad_norm": 0.237102860279755, "learning_rate": 0.00015177099589636916, "loss": 2.1964, "step": 6566 }, { "epoch": 0.7530099759201926, "grad_norm": 0.2554446661192114, "learning_rate": 0.0001516377654413228, "loss": 2.2934, "step": 6567 }, { "epoch": 0.7531246416695333, "grad_norm": 0.24540631856412679, "learning_rate": 0.00015150458303620618, "loss": 2.3706, "step": 6568 }, { "epoch": 0.753239307418874, "grad_norm": 0.27470871946578723, "learning_rate": 0.00015137144869938968, "loss": 2.4719, "step": 6569 }, { "epoch": 0.7533539731682146, "grad_norm": 0.2636079681191475, "learning_rate": 0.00015123836244923622, "loss": 2.5388, "step": 6570 }, { "epoch": 0.7534686389175553, "grad_norm": 0.2514471183025824, "learning_rate": 0.00015110532430410268, "loss": 2.5087, "step": 6571 }, { "epoch": 0.753583304666896, "grad_norm": 0.24977845291221487, "learning_rate": 0.00015097233428233925, "loss": 2.3574, "step": 6572 }, { "epoch": 0.7536979704162367, "grad_norm": 0.26476615615970445, "learning_rate": 0.00015083939240228888, "loss": 2.4463, "step": 6573 }, { "epoch": 0.7538126361655774, "grad_norm": 0.25745675599193374, "learning_rate": 0.00015070649868228898, "loss": 2.4495, "step": 6574 }, { "epoch": 0.753927301914918, "grad_norm": 0.2970827011285602, "learning_rate": 0.00015057365314066952, "loss": 2.4109, "step": 6575 }, { "epoch": 0.7540419676642587, "grad_norm": 0.24908745403969004, "learning_rate": 0.0001504408557957538, "loss": 2.4128, "step": 6576 }, { "epoch": 0.7541566334135994, "grad_norm": 0.2663490710353967, "learning_rate": 0.0001503081066658588, "loss": 2.4695, "step": 6577 }, { "epoch": 0.75427129916294, "grad_norm": 0.25961752483260114, "learning_rate": 0.00015017540576929477, "loss": 2.4745, "step": 6578 }, { "epoch": 0.7543859649122807, "grad_norm": 0.258382235975336, "learning_rate": 0.00015004275312436528, "loss": 2.4189, "step": 6579 }, { "epoch": 0.7545006306616213, "grad_norm": 0.245218710372783, "learning_rate": 0.00014991014874936721, "loss": 2.3867, "step": 6580 }, { "epoch": 0.754615296410962, "grad_norm": 0.2505824358915988, "learning_rate": 0.00014977759266259093, "loss": 2.4547, "step": 6581 }, { "epoch": 0.7547299621603027, "grad_norm": 0.24960901032548094, "learning_rate": 0.00014964508488231971, "loss": 2.2998, "step": 6582 }, { "epoch": 0.7548446279096434, "grad_norm": 0.26770038840401794, "learning_rate": 0.00014951262542683052, "loss": 2.3962, "step": 6583 }, { "epoch": 0.7549592936589841, "grad_norm": 0.26525596711295824, "learning_rate": 0.00014938021431439407, "loss": 2.431, "step": 6584 }, { "epoch": 0.7550739594083248, "grad_norm": 0.2200162860346518, "learning_rate": 0.00014924785156327354, "loss": 2.339, "step": 6585 }, { "epoch": 0.7551886251576654, "grad_norm": 0.25439755147628335, "learning_rate": 0.00014911553719172587, "loss": 2.3767, "step": 6586 }, { "epoch": 0.7553032909070061, "grad_norm": 0.27934193198072843, "learning_rate": 0.00014898327121800153, "loss": 2.5087, "step": 6587 }, { "epoch": 0.7554179566563467, "grad_norm": 0.2627670808905417, "learning_rate": 0.0001488510536603437, "loss": 2.2752, "step": 6588 }, { "epoch": 0.7555326224056874, "grad_norm": 0.27328945197109195, "learning_rate": 0.00014871888453698957, "loss": 2.3786, "step": 6589 }, { "epoch": 0.7556472881550281, "grad_norm": 0.25542486950594584, "learning_rate": 0.00014858676386616949, "loss": 2.4662, "step": 6590 }, { "epoch": 0.7557619539043687, "grad_norm": 0.29053477814777107, "learning_rate": 0.00014845469166610664, "loss": 2.4517, "step": 6591 }, { "epoch": 0.7558766196537094, "grad_norm": 0.2401803572319788, "learning_rate": 0.00014832266795501803, "loss": 2.4132, "step": 6592 }, { "epoch": 0.7559912854030502, "grad_norm": 0.2607204767078145, "learning_rate": 0.00014819069275111375, "loss": 2.4297, "step": 6593 }, { "epoch": 0.7561059511523908, "grad_norm": 0.2465156351953693, "learning_rate": 0.00014805876607259732, "loss": 2.3357, "step": 6594 }, { "epoch": 0.7562206169017315, "grad_norm": 0.26379271350811356, "learning_rate": 0.0001479268879376655, "loss": 2.5467, "step": 6595 }, { "epoch": 0.7563352826510721, "grad_norm": 0.25282091845670956, "learning_rate": 0.00014779505836450858, "loss": 2.3736, "step": 6596 }, { "epoch": 0.7564499484004128, "grad_norm": 0.2716544263188572, "learning_rate": 0.0001476632773713095, "loss": 2.418, "step": 6597 }, { "epoch": 0.7565646141497535, "grad_norm": 0.2368122502261607, "learning_rate": 0.00014753154497624516, "loss": 2.5258, "step": 6598 }, { "epoch": 0.7566792798990941, "grad_norm": 0.26398318338078525, "learning_rate": 0.00014739986119748555, "loss": 2.3533, "step": 6599 }, { "epoch": 0.7567939456484348, "grad_norm": 0.27607560676181087, "learning_rate": 0.00014726822605319395, "loss": 2.3845, "step": 6600 }, { "epoch": 0.7569086113977754, "grad_norm": 0.2582099129002738, "learning_rate": 0.00014713663956152683, "loss": 2.4197, "step": 6601 }, { "epoch": 0.7570232771471161, "grad_norm": 0.2614857032104042, "learning_rate": 0.00014700510174063425, "loss": 2.3975, "step": 6602 }, { "epoch": 0.7571379428964569, "grad_norm": 0.24998381044406973, "learning_rate": 0.00014687361260865895, "loss": 2.294, "step": 6603 }, { "epoch": 0.7572526086457975, "grad_norm": 0.27955160821198743, "learning_rate": 0.0001467421721837378, "loss": 2.3737, "step": 6604 }, { "epoch": 0.7573672743951382, "grad_norm": 0.27421178767257853, "learning_rate": 0.00014661078048400022, "loss": 2.3944, "step": 6605 }, { "epoch": 0.7574819401444789, "grad_norm": 0.23959803315329295, "learning_rate": 0.0001464794375275692, "loss": 2.4179, "step": 6606 }, { "epoch": 0.7575966058938195, "grad_norm": 0.23597878298447664, "learning_rate": 0.00014634814333256107, "loss": 2.259, "step": 6607 }, { "epoch": 0.7577112716431602, "grad_norm": 0.284674838340585, "learning_rate": 0.00014621689791708537, "loss": 2.4184, "step": 6608 }, { "epoch": 0.7578259373925008, "grad_norm": 0.26071227690661253, "learning_rate": 0.00014608570129924492, "loss": 2.3996, "step": 6609 }, { "epoch": 0.7579406031418415, "grad_norm": 0.25387333916424853, "learning_rate": 0.00014595455349713583, "loss": 2.3992, "step": 6610 }, { "epoch": 0.7580552688911822, "grad_norm": 0.24547623314803763, "learning_rate": 0.00014582345452884726, "loss": 2.3626, "step": 6611 }, { "epoch": 0.7581699346405228, "grad_norm": 0.2540895935021022, "learning_rate": 0.0001456924044124619, "loss": 2.4054, "step": 6612 }, { "epoch": 0.7582846003898636, "grad_norm": 0.2587215324575647, "learning_rate": 0.00014556140316605566, "loss": 2.3148, "step": 6613 }, { "epoch": 0.7583992661392042, "grad_norm": 0.26255031206946017, "learning_rate": 0.00014543045080769762, "loss": 2.3216, "step": 6614 }, { "epoch": 0.7585139318885449, "grad_norm": 0.22832438793093263, "learning_rate": 0.00014529954735545016, "loss": 2.3179, "step": 6615 }, { "epoch": 0.7586285976378856, "grad_norm": 0.24235394915497896, "learning_rate": 0.00014516869282736911, "loss": 2.505, "step": 6616 }, { "epoch": 0.7587432633872262, "grad_norm": 0.2317537446371527, "learning_rate": 0.00014503788724150302, "loss": 2.3544, "step": 6617 }, { "epoch": 0.7588579291365669, "grad_norm": 0.25603494773416513, "learning_rate": 0.00014490713061589405, "loss": 2.4477, "step": 6618 }, { "epoch": 0.7589725948859076, "grad_norm": 0.23909052454498408, "learning_rate": 0.00014477642296857807, "loss": 2.3316, "step": 6619 }, { "epoch": 0.7590872606352482, "grad_norm": 0.2552529622855681, "learning_rate": 0.00014464576431758315, "loss": 2.3904, "step": 6620 }, { "epoch": 0.7592019263845889, "grad_norm": 0.2548669530931078, "learning_rate": 0.0001445151546809314, "loss": 2.4366, "step": 6621 }, { "epoch": 0.7593165921339295, "grad_norm": 0.2567057188057503, "learning_rate": 0.00014438459407663807, "loss": 2.3519, "step": 6622 }, { "epoch": 0.7594312578832703, "grad_norm": 0.2366407440583382, "learning_rate": 0.000144254082522711, "loss": 2.3935, "step": 6623 }, { "epoch": 0.759545923632611, "grad_norm": 0.27813547277917605, "learning_rate": 0.00014412362003715228, "loss": 2.391, "step": 6624 }, { "epoch": 0.7596605893819516, "grad_norm": 0.24437914698457763, "learning_rate": 0.00014399320663795672, "loss": 2.3812, "step": 6625 }, { "epoch": 0.7597752551312923, "grad_norm": 0.23773796960309754, "learning_rate": 0.00014386284234311204, "loss": 2.4151, "step": 6626 }, { "epoch": 0.759889920880633, "grad_norm": 0.28067293554375405, "learning_rate": 0.00014373252717059964, "loss": 2.4878, "step": 6627 }, { "epoch": 0.7600045866299736, "grad_norm": 0.24667583741589158, "learning_rate": 0.00014360226113839408, "loss": 2.5272, "step": 6628 }, { "epoch": 0.7601192523793143, "grad_norm": 0.21924024300205358, "learning_rate": 0.00014347204426446304, "loss": 2.3447, "step": 6629 }, { "epoch": 0.7602339181286549, "grad_norm": 0.26552192308108785, "learning_rate": 0.0001433418765667674, "loss": 2.3299, "step": 6630 }, { "epoch": 0.7603485838779956, "grad_norm": 0.2544526239602189, "learning_rate": 0.0001432117580632616, "loss": 2.3384, "step": 6631 }, { "epoch": 0.7604632496273364, "grad_norm": 0.241834312867679, "learning_rate": 0.00014308168877189253, "loss": 2.3787, "step": 6632 }, { "epoch": 0.760577915376677, "grad_norm": 0.26889820285213384, "learning_rate": 0.00014295166871060088, "loss": 2.4072, "step": 6633 }, { "epoch": 0.7606925811260177, "grad_norm": 0.24229494223821071, "learning_rate": 0.00014282169789732091, "loss": 2.3826, "step": 6634 }, { "epoch": 0.7608072468753583, "grad_norm": 0.28425367269356444, "learning_rate": 0.00014269177634997914, "loss": 2.2299, "step": 6635 }, { "epoch": 0.760921912624699, "grad_norm": 0.234340480475067, "learning_rate": 0.00014256190408649589, "loss": 2.3652, "step": 6636 }, { "epoch": 0.7610365783740397, "grad_norm": 0.2264685852149525, "learning_rate": 0.00014243208112478474, "loss": 2.4637, "step": 6637 }, { "epoch": 0.7611512441233803, "grad_norm": 0.262455095360687, "learning_rate": 0.0001423023074827518, "loss": 2.3766, "step": 6638 }, { "epoch": 0.761265909872721, "grad_norm": 0.24091388722021084, "learning_rate": 0.00014217258317829735, "loss": 2.3629, "step": 6639 }, { "epoch": 0.7613805756220617, "grad_norm": 0.2649176508161098, "learning_rate": 0.00014204290822931442, "loss": 2.4058, "step": 6640 }, { "epoch": 0.7614952413714023, "grad_norm": 0.2628773440882439, "learning_rate": 0.00014191328265368886, "loss": 2.3025, "step": 6641 }, { "epoch": 0.7616099071207431, "grad_norm": 0.2433668136557576, "learning_rate": 0.00014178370646930017, "loss": 2.3005, "step": 6642 }, { "epoch": 0.7617245728700837, "grad_norm": 0.268670919624635, "learning_rate": 0.000141654179694021, "loss": 2.4849, "step": 6643 }, { "epoch": 0.7618392386194244, "grad_norm": 0.3109078186889851, "learning_rate": 0.000141524702345717, "loss": 2.3522, "step": 6644 }, { "epoch": 0.7619539043687651, "grad_norm": 0.24743456555178936, "learning_rate": 0.0001413952744422472, "loss": 2.4379, "step": 6645 }, { "epoch": 0.7620685701181057, "grad_norm": 0.24247268933711663, "learning_rate": 0.0001412658960014638, "loss": 2.3638, "step": 6646 }, { "epoch": 0.7621832358674464, "grad_norm": 0.27971227884244154, "learning_rate": 0.0001411365670412118, "loss": 2.2063, "step": 6647 }, { "epoch": 0.762297901616787, "grad_norm": 0.24328706614973591, "learning_rate": 0.0001410072875793298, "loss": 2.4731, "step": 6648 }, { "epoch": 0.7624125673661277, "grad_norm": 0.26000443963884157, "learning_rate": 0.00014087805763364947, "loss": 2.2662, "step": 6649 }, { "epoch": 0.7625272331154684, "grad_norm": 0.2562946972523383, "learning_rate": 0.0001407488772219956, "loss": 2.3624, "step": 6650 }, { "epoch": 0.762641898864809, "grad_norm": 0.2902918259582824, "learning_rate": 0.00014061974636218617, "loss": 2.3978, "step": 6651 }, { "epoch": 0.7627565646141498, "grad_norm": 0.2785332006422166, "learning_rate": 0.00014049066507203244, "loss": 2.3816, "step": 6652 }, { "epoch": 0.7628712303634905, "grad_norm": 0.2600523856272482, "learning_rate": 0.00014036163336933832, "loss": 2.3424, "step": 6653 }, { "epoch": 0.7629858961128311, "grad_norm": 0.2591644829640088, "learning_rate": 0.00014023265127190187, "loss": 2.3293, "step": 6654 }, { "epoch": 0.7631005618621718, "grad_norm": 0.26723978082085725, "learning_rate": 0.00014010371879751322, "loss": 2.297, "step": 6655 }, { "epoch": 0.7632152276115124, "grad_norm": 0.28077701040527386, "learning_rate": 0.00013997483596395627, "loss": 2.4163, "step": 6656 }, { "epoch": 0.7633298933608531, "grad_norm": 0.2896252058771666, "learning_rate": 0.00013984600278900804, "loss": 2.4001, "step": 6657 }, { "epoch": 0.7634445591101938, "grad_norm": 0.24914817655355395, "learning_rate": 0.00013971721929043857, "loss": 2.4258, "step": 6658 }, { "epoch": 0.7635592248595344, "grad_norm": 0.2540792073362412, "learning_rate": 0.00013958848548601104, "loss": 2.2948, "step": 6659 }, { "epoch": 0.7636738906088751, "grad_norm": 0.25133489254091174, "learning_rate": 0.00013945980139348207, "loss": 2.4366, "step": 6660 }, { "epoch": 0.7637885563582159, "grad_norm": 0.2524374119126822, "learning_rate": 0.00013933116703060077, "loss": 2.4123, "step": 6661 }, { "epoch": 0.7639032221075565, "grad_norm": 0.268768194441521, "learning_rate": 0.0001392025824151101, "loss": 2.4821, "step": 6662 }, { "epoch": 0.7640178878568972, "grad_norm": 0.26056616480961897, "learning_rate": 0.00013907404756474567, "loss": 2.525, "step": 6663 }, { "epoch": 0.7641325536062378, "grad_norm": 0.3000439367305072, "learning_rate": 0.00013894556249723655, "loss": 2.5407, "step": 6664 }, { "epoch": 0.7642472193555785, "grad_norm": 0.25870897775299373, "learning_rate": 0.0001388171272303047, "loss": 2.2766, "step": 6665 }, { "epoch": 0.7643618851049192, "grad_norm": 0.23113707867982647, "learning_rate": 0.00013868874178166562, "loss": 2.2746, "step": 6666 }, { "epoch": 0.7644765508542598, "grad_norm": 0.22918475292273205, "learning_rate": 0.00013856040616902715, "loss": 2.3619, "step": 6667 }, { "epoch": 0.7645912166036005, "grad_norm": 0.25070597552018997, "learning_rate": 0.0001384321204100909, "loss": 2.4057, "step": 6668 }, { "epoch": 0.7647058823529411, "grad_norm": 0.2555856940874532, "learning_rate": 0.00013830388452255182, "loss": 2.4079, "step": 6669 }, { "epoch": 0.7648205481022818, "grad_norm": 0.25605074313532655, "learning_rate": 0.0001381756985240971, "loss": 2.4445, "step": 6670 }, { "epoch": 0.7649352138516226, "grad_norm": 0.24135209261256807, "learning_rate": 0.00013804756243240786, "loss": 2.4277, "step": 6671 }, { "epoch": 0.7650498796009632, "grad_norm": 0.2646191550267873, "learning_rate": 0.00013791947626515805, "loss": 2.4888, "step": 6672 }, { "epoch": 0.7651645453503039, "grad_norm": 0.2508601066718837, "learning_rate": 0.0001377914400400143, "loss": 2.47, "step": 6673 }, { "epoch": 0.7652792110996446, "grad_norm": 0.25034297759677093, "learning_rate": 0.00013766345377463725, "loss": 2.4777, "step": 6674 }, { "epoch": 0.7653938768489852, "grad_norm": 0.2375621951467455, "learning_rate": 0.00013753551748668007, "loss": 2.2888, "step": 6675 }, { "epoch": 0.7655085425983259, "grad_norm": 0.2725747605508751, "learning_rate": 0.00013740763119378898, "loss": 2.6211, "step": 6676 }, { "epoch": 0.7656232083476665, "grad_norm": 0.24579444362489467, "learning_rate": 0.00013727979491360347, "loss": 2.4411, "step": 6677 }, { "epoch": 0.7657378740970072, "grad_norm": 0.24996767475687234, "learning_rate": 0.00013715200866375627, "loss": 2.364, "step": 6678 }, { "epoch": 0.7658525398463479, "grad_norm": 0.24963513282280045, "learning_rate": 0.00013702427246187288, "loss": 2.3863, "step": 6679 }, { "epoch": 0.7659672055956885, "grad_norm": 0.2619643266332412, "learning_rate": 0.00013689658632557217, "loss": 2.4444, "step": 6680 }, { "epoch": 0.7660818713450293, "grad_norm": 0.2483229166621221, "learning_rate": 0.00013676895027246616, "loss": 2.4623, "step": 6681 }, { "epoch": 0.76619653709437, "grad_norm": 0.25409547386113196, "learning_rate": 0.00013664136432015944, "loss": 2.3516, "step": 6682 }, { "epoch": 0.7663112028437106, "grad_norm": 0.22683002366358138, "learning_rate": 0.00013651382848625022, "loss": 2.2265, "step": 6683 }, { "epoch": 0.7664258685930513, "grad_norm": 0.24179439370549818, "learning_rate": 0.0001363863427883299, "loss": 2.2857, "step": 6684 }, { "epoch": 0.7665405343423919, "grad_norm": 0.2550536490417984, "learning_rate": 0.00013625890724398238, "loss": 2.4005, "step": 6685 }, { "epoch": 0.7666552000917326, "grad_norm": 0.24149959111924035, "learning_rate": 0.00013613152187078503, "loss": 2.2975, "step": 6686 }, { "epoch": 0.7667698658410733, "grad_norm": 0.2723342907512863, "learning_rate": 0.00013600418668630843, "loss": 2.2921, "step": 6687 }, { "epoch": 0.7668845315904139, "grad_norm": 0.25620256232546235, "learning_rate": 0.00013587690170811562, "loss": 2.4593, "step": 6688 }, { "epoch": 0.7669991973397546, "grad_norm": 0.25299496307253233, "learning_rate": 0.00013574966695376352, "loss": 2.4248, "step": 6689 }, { "epoch": 0.7671138630890952, "grad_norm": 0.26404362104130036, "learning_rate": 0.00013562248244080178, "loss": 2.4834, "step": 6690 }, { "epoch": 0.767228528838436, "grad_norm": 0.268537337735386, "learning_rate": 0.00013549534818677278, "loss": 2.3818, "step": 6691 }, { "epoch": 0.7673431945877767, "grad_norm": 0.25040490214991357, "learning_rate": 0.0001353682642092124, "loss": 2.327, "step": 6692 }, { "epoch": 0.7674578603371173, "grad_norm": 0.2624302879955897, "learning_rate": 0.00013524123052564946, "loss": 2.4375, "step": 6693 }, { "epoch": 0.767572526086458, "grad_norm": 0.2546766462944758, "learning_rate": 0.00013511424715360583, "loss": 2.3069, "step": 6694 }, { "epoch": 0.7676871918357987, "grad_norm": 0.2591590947335354, "learning_rate": 0.00013498731411059646, "loss": 2.3715, "step": 6695 }, { "epoch": 0.7678018575851393, "grad_norm": 0.238711854993485, "learning_rate": 0.00013486043141412946, "loss": 2.382, "step": 6696 }, { "epoch": 0.76791652333448, "grad_norm": 0.26332454285062235, "learning_rate": 0.00013473359908170563, "loss": 2.3549, "step": 6697 }, { "epoch": 0.7680311890838206, "grad_norm": 0.2714399003481173, "learning_rate": 0.00013460681713081918, "loss": 2.409, "step": 6698 }, { "epoch": 0.7681458548331613, "grad_norm": 0.2535074321111449, "learning_rate": 0.0001344800855789573, "loss": 2.3398, "step": 6699 }, { "epoch": 0.768260520582502, "grad_norm": 0.2761185754544816, "learning_rate": 0.00013435340444360017, "loss": 2.4476, "step": 6700 }, { "epoch": 0.7683751863318427, "grad_norm": 0.2828665761643112, "learning_rate": 0.00013422677374222097, "loss": 2.4357, "step": 6701 }, { "epoch": 0.7684898520811834, "grad_norm": 0.2652456285604216, "learning_rate": 0.0001341001934922863, "loss": 2.442, "step": 6702 }, { "epoch": 0.768604517830524, "grad_norm": 0.2660266702564812, "learning_rate": 0.00013397366371125491, "loss": 2.4041, "step": 6703 }, { "epoch": 0.7687191835798647, "grad_norm": 0.28071514494688726, "learning_rate": 0.00013384718441657983, "loss": 2.5201, "step": 6704 }, { "epoch": 0.7688338493292054, "grad_norm": 0.24888585726959186, "learning_rate": 0.000133720755625706, "loss": 2.4086, "step": 6705 }, { "epoch": 0.768948515078546, "grad_norm": 0.2741294606970871, "learning_rate": 0.000133594377356072, "loss": 2.5103, "step": 6706 }, { "epoch": 0.7690631808278867, "grad_norm": 0.25793879299601646, "learning_rate": 0.00013346804962510934, "loss": 2.3862, "step": 6707 }, { "epoch": 0.7691778465772274, "grad_norm": 0.2669726901008002, "learning_rate": 0.00013334177245024248, "loss": 2.2828, "step": 6708 }, { "epoch": 0.769292512326568, "grad_norm": 0.2727491160952012, "learning_rate": 0.00013321554584888896, "loss": 2.3787, "step": 6709 }, { "epoch": 0.7694071780759087, "grad_norm": 0.24429506144682955, "learning_rate": 0.00013308936983845947, "loss": 2.3596, "step": 6710 }, { "epoch": 0.7695218438252494, "grad_norm": 0.2416728458294508, "learning_rate": 0.00013296324443635738, "loss": 2.3857, "step": 6711 }, { "epoch": 0.7696365095745901, "grad_norm": 0.28979540804612186, "learning_rate": 0.00013283716965997933, "loss": 2.4526, "step": 6712 }, { "epoch": 0.7697511753239308, "grad_norm": 0.2427351281685031, "learning_rate": 0.00013271114552671493, "loss": 2.4514, "step": 6713 }, { "epoch": 0.7698658410732714, "grad_norm": 0.26762829063114646, "learning_rate": 0.0001325851720539469, "loss": 2.3893, "step": 6714 }, { "epoch": 0.7699805068226121, "grad_norm": 0.2421377987805488, "learning_rate": 0.0001324592492590508, "loss": 2.3948, "step": 6715 }, { "epoch": 0.7700951725719528, "grad_norm": 0.2580493156200732, "learning_rate": 0.00013233337715939543, "loss": 2.4551, "step": 6716 }, { "epoch": 0.7702098383212934, "grad_norm": 0.28378972312913053, "learning_rate": 0.00013220755577234217, "loss": 2.5038, "step": 6717 }, { "epoch": 0.7703245040706341, "grad_norm": 0.23599179436306258, "learning_rate": 0.00013208178511524572, "loss": 2.3167, "step": 6718 }, { "epoch": 0.7704391698199747, "grad_norm": 0.2709500909450494, "learning_rate": 0.00013195606520545412, "loss": 2.4124, "step": 6719 }, { "epoch": 0.7705538355693154, "grad_norm": 0.2697822011116564, "learning_rate": 0.00013183039606030761, "loss": 2.4825, "step": 6720 }, { "epoch": 0.7706685013186562, "grad_norm": 0.24119099763353175, "learning_rate": 0.00013170477769714007, "loss": 2.4273, "step": 6721 }, { "epoch": 0.7707831670679968, "grad_norm": 0.2491072135795041, "learning_rate": 0.00013157921013327818, "loss": 2.4016, "step": 6722 }, { "epoch": 0.7708978328173375, "grad_norm": 0.26240384137404155, "learning_rate": 0.00013145369338604123, "loss": 2.3999, "step": 6723 }, { "epoch": 0.7710124985666781, "grad_norm": 0.25394163439574824, "learning_rate": 0.0001313282274727423, "loss": 2.4186, "step": 6724 }, { "epoch": 0.7711271643160188, "grad_norm": 0.29144253215777816, "learning_rate": 0.00013120281241068706, "loss": 2.3929, "step": 6725 }, { "epoch": 0.7712418300653595, "grad_norm": 0.24468945285131885, "learning_rate": 0.00013107744821717378, "loss": 2.4242, "step": 6726 }, { "epoch": 0.7713564958147001, "grad_norm": 0.23992107096356521, "learning_rate": 0.00013095213490949416, "loss": 2.4502, "step": 6727 }, { "epoch": 0.7714711615640408, "grad_norm": 0.25561032155131896, "learning_rate": 0.00013082687250493287, "loss": 2.4754, "step": 6728 }, { "epoch": 0.7715858273133815, "grad_norm": 0.24810590538965988, "learning_rate": 0.00013070166102076741, "loss": 2.3403, "step": 6729 }, { "epoch": 0.7717004930627221, "grad_norm": 0.2535647232351421, "learning_rate": 0.00013057650047426834, "loss": 2.4285, "step": 6730 }, { "epoch": 0.7718151588120629, "grad_norm": 0.2607598592409844, "learning_rate": 0.00013045139088269935, "loss": 2.4769, "step": 6731 }, { "epoch": 0.7719298245614035, "grad_norm": 0.24281646364076165, "learning_rate": 0.00013032633226331663, "loss": 2.3052, "step": 6732 }, { "epoch": 0.7720444903107442, "grad_norm": 0.27588018337218645, "learning_rate": 0.0001302013246333696, "loss": 2.35, "step": 6733 }, { "epoch": 0.7721591560600849, "grad_norm": 0.23288382665180205, "learning_rate": 0.0001300763680101011, "loss": 2.4203, "step": 6734 }, { "epoch": 0.7722738218094255, "grad_norm": 0.26362651134625326, "learning_rate": 0.00012995146241074606, "loss": 2.3343, "step": 6735 }, { "epoch": 0.7723884875587662, "grad_norm": 0.23797718744264573, "learning_rate": 0.00012982660785253308, "loss": 2.4258, "step": 6736 }, { "epoch": 0.7725031533081068, "grad_norm": 0.23939278455290353, "learning_rate": 0.00012970180435268352, "loss": 2.4245, "step": 6737 }, { "epoch": 0.7726178190574475, "grad_norm": 0.2531636203113712, "learning_rate": 0.00012957705192841125, "loss": 2.4769, "step": 6738 }, { "epoch": 0.7727324848067882, "grad_norm": 0.2426578461368766, "learning_rate": 0.0001294523505969239, "loss": 2.5486, "step": 6739 }, { "epoch": 0.7728471505561288, "grad_norm": 0.28811562532877744, "learning_rate": 0.00012932770037542163, "loss": 2.4666, "step": 6740 }, { "epoch": 0.7729618163054696, "grad_norm": 0.266875482029193, "learning_rate": 0.0001292031012810973, "loss": 2.4406, "step": 6741 }, { "epoch": 0.7730764820548103, "grad_norm": 0.23962694371038845, "learning_rate": 0.0001290785533311371, "loss": 2.3326, "step": 6742 }, { "epoch": 0.7731911478041509, "grad_norm": 0.2922606547071169, "learning_rate": 0.00012895405654272013, "loss": 2.4653, "step": 6743 }, { "epoch": 0.7733058135534916, "grad_norm": 0.2621888353280769, "learning_rate": 0.00012882961093301821, "loss": 2.4894, "step": 6744 }, { "epoch": 0.7734204793028322, "grad_norm": 0.24649893214913532, "learning_rate": 0.00012870521651919638, "loss": 2.3794, "step": 6745 }, { "epoch": 0.7735351450521729, "grad_norm": 0.24967562397585344, "learning_rate": 0.00012858087331841245, "loss": 2.3528, "step": 6746 }, { "epoch": 0.7736498108015136, "grad_norm": 0.25300181162821855, "learning_rate": 0.00012845658134781707, "loss": 2.3722, "step": 6747 }, { "epoch": 0.7737644765508542, "grad_norm": 0.2558374445529858, "learning_rate": 0.00012833234062455408, "loss": 2.2594, "step": 6748 }, { "epoch": 0.7738791423001949, "grad_norm": 0.2309763115483486, "learning_rate": 0.00012820815116576002, "loss": 2.397, "step": 6749 }, { "epoch": 0.7739938080495357, "grad_norm": 0.24819263235431924, "learning_rate": 0.00012808401298856448, "loss": 2.4286, "step": 6750 }, { "epoch": 0.7741084737988763, "grad_norm": 0.2644804030517646, "learning_rate": 0.00012795992611009, "loss": 2.3965, "step": 6751 }, { "epoch": 0.774223139548217, "grad_norm": 0.2621979907022684, "learning_rate": 0.00012783589054745216, "loss": 2.3417, "step": 6752 }, { "epoch": 0.7743378052975576, "grad_norm": 0.2547485839487469, "learning_rate": 0.0001277119063177588, "loss": 2.3489, "step": 6753 }, { "epoch": 0.7744524710468983, "grad_norm": 0.24904654280401417, "learning_rate": 0.00012758797343811185, "loss": 2.3808, "step": 6754 }, { "epoch": 0.774567136796239, "grad_norm": 0.26786175103033183, "learning_rate": 0.00012746409192560494, "loss": 2.4109, "step": 6755 }, { "epoch": 0.7746818025455796, "grad_norm": 0.2515542511824764, "learning_rate": 0.0001273402617973254, "loss": 2.4858, "step": 6756 }, { "epoch": 0.7747964682949203, "grad_norm": 0.23798496270028302, "learning_rate": 0.0001272164830703532, "loss": 2.3564, "step": 6757 }, { "epoch": 0.7749111340442609, "grad_norm": 0.23510520203873148, "learning_rate": 0.00012709275576176126, "loss": 2.4671, "step": 6758 }, { "epoch": 0.7750257997936016, "grad_norm": 0.2652416024050623, "learning_rate": 0.0001269690798886154, "loss": 2.3175, "step": 6759 }, { "epoch": 0.7751404655429424, "grad_norm": 0.2288314665896917, "learning_rate": 0.0001268454554679745, "loss": 2.4389, "step": 6760 }, { "epoch": 0.775255131292283, "grad_norm": 0.31235679679253275, "learning_rate": 0.00012672188251688988, "loss": 2.4288, "step": 6761 }, { "epoch": 0.7753697970416237, "grad_norm": 0.24916806110380782, "learning_rate": 0.0001265983610524063, "loss": 2.4176, "step": 6762 }, { "epoch": 0.7754844627909644, "grad_norm": 0.26552106376410894, "learning_rate": 0.00012647489109156106, "loss": 2.3202, "step": 6763 }, { "epoch": 0.775599128540305, "grad_norm": 0.23830204637106392, "learning_rate": 0.00012635147265138458, "loss": 2.4354, "step": 6764 }, { "epoch": 0.7757137942896457, "grad_norm": 0.2717537917902372, "learning_rate": 0.0001262281057489001, "loss": 2.3551, "step": 6765 }, { "epoch": 0.7758284600389863, "grad_norm": 0.2416107359097658, "learning_rate": 0.00012610479040112377, "loss": 2.4243, "step": 6766 }, { "epoch": 0.775943125788327, "grad_norm": 0.2363310438225181, "learning_rate": 0.0001259815266250644, "loss": 2.4865, "step": 6767 }, { "epoch": 0.7760577915376677, "grad_norm": 0.2512659079546379, "learning_rate": 0.00012585831443772384, "loss": 2.51, "step": 6768 }, { "epoch": 0.7761724572870083, "grad_norm": 0.2503441228925365, "learning_rate": 0.00012573515385609735, "loss": 2.3356, "step": 6769 }, { "epoch": 0.7762871230363491, "grad_norm": 0.2489387387398845, "learning_rate": 0.00012561204489717204, "loss": 2.4542, "step": 6770 }, { "epoch": 0.7764017887856897, "grad_norm": 0.2501216635199761, "learning_rate": 0.0001254889875779287, "loss": 2.2867, "step": 6771 }, { "epoch": 0.7765164545350304, "grad_norm": 0.23454760424364315, "learning_rate": 0.00012536598191534081, "loss": 2.3632, "step": 6772 }, { "epoch": 0.7766311202843711, "grad_norm": 0.25266289697076355, "learning_rate": 0.00012524302792637427, "loss": 2.4499, "step": 6773 }, { "epoch": 0.7767457860337117, "grad_norm": 0.2257019356786494, "learning_rate": 0.00012512012562798864, "loss": 2.3209, "step": 6774 }, { "epoch": 0.7768604517830524, "grad_norm": 0.2490724954470137, "learning_rate": 0.00012499727503713593, "loss": 2.3692, "step": 6775 }, { "epoch": 0.7769751175323931, "grad_norm": 0.25975149663652675, "learning_rate": 0.0001248744761707608, "loss": 2.401, "step": 6776 }, { "epoch": 0.7770897832817337, "grad_norm": 0.2616494721500692, "learning_rate": 0.00012475172904580117, "loss": 2.3494, "step": 6777 }, { "epoch": 0.7772044490310744, "grad_norm": 0.26254857361496403, "learning_rate": 0.00012462903367918766, "loss": 2.3636, "step": 6778 }, { "epoch": 0.777319114780415, "grad_norm": 0.25641683363116075, "learning_rate": 0.00012450639008784365, "loss": 2.2955, "step": 6779 }, { "epoch": 0.7774337805297558, "grad_norm": 0.24962073410301486, "learning_rate": 0.00012438379828868563, "loss": 2.4942, "step": 6780 }, { "epoch": 0.7775484462790965, "grad_norm": 0.28383966436989877, "learning_rate": 0.00012426125829862288, "loss": 2.3656, "step": 6781 }, { "epoch": 0.7776631120284371, "grad_norm": 0.2734082063305767, "learning_rate": 0.00012413877013455716, "loss": 2.3577, "step": 6782 }, { "epoch": 0.7777777777777778, "grad_norm": 0.28827860255289034, "learning_rate": 0.0001240163338133834, "loss": 2.457, "step": 6783 }, { "epoch": 0.7778924435271185, "grad_norm": 0.29734988787010436, "learning_rate": 0.0001238939493519897, "loss": 2.376, "step": 6784 }, { "epoch": 0.7780071092764591, "grad_norm": 0.26946110298299003, "learning_rate": 0.0001237716167672564, "loss": 2.5524, "step": 6785 }, { "epoch": 0.7781217750257998, "grad_norm": 0.248865538876543, "learning_rate": 0.000123649336076057, "loss": 2.3772, "step": 6786 }, { "epoch": 0.7782364407751404, "grad_norm": 0.271226080741363, "learning_rate": 0.0001235271072952579, "loss": 2.3481, "step": 6787 }, { "epoch": 0.7783511065244811, "grad_norm": 0.26892458397232577, "learning_rate": 0.00012340493044171787, "loss": 2.4388, "step": 6788 }, { "epoch": 0.7784657722738219, "grad_norm": 0.28585478622875443, "learning_rate": 0.0001232828055322892, "loss": 2.3622, "step": 6789 }, { "epoch": 0.7785804380231625, "grad_norm": 0.23281361216201132, "learning_rate": 0.00012316073258381683, "loss": 2.4165, "step": 6790 }, { "epoch": 0.7786951037725032, "grad_norm": 0.26412077109659277, "learning_rate": 0.00012303871161313805, "loss": 2.4098, "step": 6791 }, { "epoch": 0.7788097695218438, "grad_norm": 0.2631627870531466, "learning_rate": 0.00012291674263708346, "loss": 2.448, "step": 6792 }, { "epoch": 0.7789244352711845, "grad_norm": 0.25745554900012735, "learning_rate": 0.00012279482567247634, "loss": 2.3164, "step": 6793 }, { "epoch": 0.7790391010205252, "grad_norm": 0.25013605326549, "learning_rate": 0.00012267296073613287, "loss": 2.3426, "step": 6794 }, { "epoch": 0.7791537667698658, "grad_norm": 0.3236628537191183, "learning_rate": 0.00012255114784486194, "loss": 2.4647, "step": 6795 }, { "epoch": 0.7792684325192065, "grad_norm": 0.2839006306201851, "learning_rate": 0.00012242938701546545, "loss": 2.2653, "step": 6796 }, { "epoch": 0.7793830982685472, "grad_norm": 0.25635341730623284, "learning_rate": 0.00012230767826473777, "loss": 2.3439, "step": 6797 }, { "epoch": 0.7794977640178878, "grad_norm": 0.2627817694497659, "learning_rate": 0.0001221860216094664, "loss": 2.559, "step": 6798 }, { "epoch": 0.7796124297672286, "grad_norm": 0.2421796728964254, "learning_rate": 0.00012206441706643151, "loss": 2.3482, "step": 6799 }, { "epoch": 0.7797270955165692, "grad_norm": 0.25841308985607714, "learning_rate": 0.00012194286465240623, "loss": 2.5083, "step": 6800 }, { "epoch": 0.7798417612659099, "grad_norm": 0.24702732459447166, "learning_rate": 0.00012182136438415631, "loss": 2.3457, "step": 6801 }, { "epoch": 0.7799564270152506, "grad_norm": 0.24193101268822317, "learning_rate": 0.00012169991627844063, "loss": 2.4964, "step": 6802 }, { "epoch": 0.7800710927645912, "grad_norm": 0.26623638447971226, "learning_rate": 0.00012157852035201012, "loss": 2.3721, "step": 6803 }, { "epoch": 0.7801857585139319, "grad_norm": 0.25753972279585624, "learning_rate": 0.00012145717662160966, "loss": 2.5166, "step": 6804 }, { "epoch": 0.7803004242632725, "grad_norm": 0.24598753773961207, "learning_rate": 0.0001213358851039758, "loss": 2.4094, "step": 6805 }, { "epoch": 0.7804150900126132, "grad_norm": 0.2548999635180375, "learning_rate": 0.00012121464581583868, "loss": 2.4119, "step": 6806 }, { "epoch": 0.7805297557619539, "grad_norm": 0.22325814670259, "learning_rate": 0.0001210934587739208, "loss": 2.4082, "step": 6807 }, { "epoch": 0.7806444215112945, "grad_norm": 0.25903921462501833, "learning_rate": 0.00012097232399493768, "loss": 2.4685, "step": 6808 }, { "epoch": 0.7807590872606353, "grad_norm": 0.25636680944453333, "learning_rate": 0.00012085124149559757, "loss": 2.514, "step": 6809 }, { "epoch": 0.780873753009976, "grad_norm": 0.24377042532608056, "learning_rate": 0.00012073021129260158, "loss": 2.4624, "step": 6810 }, { "epoch": 0.7809884187593166, "grad_norm": 0.26951914093943835, "learning_rate": 0.00012060923340264329, "loss": 2.4367, "step": 6811 }, { "epoch": 0.7811030845086573, "grad_norm": 0.2667855628320824, "learning_rate": 0.00012048830784240939, "loss": 2.6164, "step": 6812 }, { "epoch": 0.7812177502579979, "grad_norm": 0.266745529842439, "learning_rate": 0.00012036743462857924, "loss": 2.4539, "step": 6813 }, { "epoch": 0.7813324160073386, "grad_norm": 0.25270594220148607, "learning_rate": 0.00012024661377782503, "loss": 2.5068, "step": 6814 }, { "epoch": 0.7814470817566793, "grad_norm": 0.23801915189603112, "learning_rate": 0.00012012584530681175, "loss": 2.2616, "step": 6815 }, { "epoch": 0.7815617475060199, "grad_norm": 0.29365007203058474, "learning_rate": 0.00012000512923219714, "loss": 2.4966, "step": 6816 }, { "epoch": 0.7816764132553606, "grad_norm": 0.222740240741158, "learning_rate": 0.00011988446557063148, "loss": 2.3433, "step": 6817 }, { "epoch": 0.7817910790047014, "grad_norm": 0.22859403909681947, "learning_rate": 0.00011976385433875797, "loss": 2.3845, "step": 6818 }, { "epoch": 0.781905744754042, "grad_norm": 0.24469848142173548, "learning_rate": 0.00011964329555321307, "loss": 2.2305, "step": 6819 }, { "epoch": 0.7820204105033827, "grad_norm": 0.2374452383230014, "learning_rate": 0.0001195227892306252, "loss": 2.422, "step": 6820 }, { "epoch": 0.7821350762527233, "grad_norm": 0.24625181140730534, "learning_rate": 0.00011940233538761597, "loss": 2.5437, "step": 6821 }, { "epoch": 0.782249742002064, "grad_norm": 0.259538065771925, "learning_rate": 0.00011928193404079967, "loss": 2.3026, "step": 6822 }, { "epoch": 0.7823644077514047, "grad_norm": 0.23823653868265982, "learning_rate": 0.0001191615852067835, "loss": 2.2802, "step": 6823 }, { "epoch": 0.7824790735007453, "grad_norm": 0.2510478757940728, "learning_rate": 0.00011904128890216715, "loss": 2.4359, "step": 6824 }, { "epoch": 0.782593739250086, "grad_norm": 0.24451770316097682, "learning_rate": 0.00011892104514354347, "loss": 2.2862, "step": 6825 }, { "epoch": 0.7827084049994266, "grad_norm": 0.2372839016313924, "learning_rate": 0.0001188008539474974, "loss": 2.3743, "step": 6826 }, { "epoch": 0.7828230707487673, "grad_norm": 0.2609022122381084, "learning_rate": 0.0001186807153306072, "loss": 2.267, "step": 6827 }, { "epoch": 0.782937736498108, "grad_norm": 0.2776608846118546, "learning_rate": 0.00011856062930944372, "loss": 2.4965, "step": 6828 }, { "epoch": 0.7830524022474487, "grad_norm": 0.24238856462568917, "learning_rate": 0.00011844059590057054, "loss": 2.4751, "step": 6829 }, { "epoch": 0.7831670679967894, "grad_norm": 0.25136234726901846, "learning_rate": 0.00011832061512054398, "loss": 2.2689, "step": 6830 }, { "epoch": 0.7832817337461301, "grad_norm": 0.2668117376700046, "learning_rate": 0.00011820068698591324, "loss": 2.2448, "step": 6831 }, { "epoch": 0.7833963994954707, "grad_norm": 0.2351329250658453, "learning_rate": 0.00011808081151321982, "loss": 2.3166, "step": 6832 }, { "epoch": 0.7835110652448114, "grad_norm": 0.2603827509706882, "learning_rate": 0.00011796098871899824, "loss": 2.2767, "step": 6833 }, { "epoch": 0.783625730994152, "grad_norm": 0.2849077807721982, "learning_rate": 0.00011784121861977625, "loss": 2.3031, "step": 6834 }, { "epoch": 0.7837403967434927, "grad_norm": 0.2521469876072976, "learning_rate": 0.00011772150123207343, "loss": 2.372, "step": 6835 }, { "epoch": 0.7838550624928334, "grad_norm": 0.280494473510368, "learning_rate": 0.0001176018365724026, "loss": 2.4214, "step": 6836 }, { "epoch": 0.783969728242174, "grad_norm": 0.263191933019923, "learning_rate": 0.00011748222465726937, "loss": 2.3821, "step": 6837 }, { "epoch": 0.7840843939915147, "grad_norm": 0.2413776709440937, "learning_rate": 0.00011736266550317154, "loss": 2.3315, "step": 6838 }, { "epoch": 0.7841990597408554, "grad_norm": 0.26708315330374316, "learning_rate": 0.00011724315912660038, "loss": 2.467, "step": 6839 }, { "epoch": 0.7843137254901961, "grad_norm": 0.24713110207981037, "learning_rate": 0.00011712370554403956, "loss": 2.4164, "step": 6840 }, { "epoch": 0.7844283912395368, "grad_norm": 0.27159626918779534, "learning_rate": 0.00011700430477196516, "loss": 2.3713, "step": 6841 }, { "epoch": 0.7845430569888774, "grad_norm": 0.2706520747692881, "learning_rate": 0.0001168849568268463, "loss": 2.4882, "step": 6842 }, { "epoch": 0.7846577227382181, "grad_norm": 0.24992521687614402, "learning_rate": 0.00011676566172514485, "loss": 2.4342, "step": 6843 }, { "epoch": 0.7847723884875588, "grad_norm": 0.24723570819689836, "learning_rate": 0.00011664641948331528, "loss": 2.4522, "step": 6844 }, { "epoch": 0.7848870542368994, "grad_norm": 0.255953626650492, "learning_rate": 0.0001165272301178047, "loss": 2.4019, "step": 6845 }, { "epoch": 0.7850017199862401, "grad_norm": 0.24775299340232967, "learning_rate": 0.00011640809364505329, "loss": 2.4685, "step": 6846 }, { "epoch": 0.7851163857355807, "grad_norm": 0.2573375269025107, "learning_rate": 0.00011628901008149323, "loss": 2.4114, "step": 6847 }, { "epoch": 0.7852310514849214, "grad_norm": 0.24830087530723977, "learning_rate": 0.00011616997944355, "loss": 2.2887, "step": 6848 }, { "epoch": 0.7853457172342622, "grad_norm": 0.25495053815830065, "learning_rate": 0.00011605100174764172, "loss": 2.4399, "step": 6849 }, { "epoch": 0.7854603829836028, "grad_norm": 0.2463684664289024, "learning_rate": 0.00011593207701017894, "loss": 2.4928, "step": 6850 }, { "epoch": 0.7855750487329435, "grad_norm": 0.2466823067834659, "learning_rate": 0.00011581320524756517, "loss": 2.4406, "step": 6851 }, { "epoch": 0.7856897144822842, "grad_norm": 0.2411050771898711, "learning_rate": 0.00011569438647619662, "loss": 2.4069, "step": 6852 }, { "epoch": 0.7858043802316248, "grad_norm": 0.254349851043376, "learning_rate": 0.00011557562071246163, "loss": 2.5172, "step": 6853 }, { "epoch": 0.7859190459809655, "grad_norm": 0.25046515278037307, "learning_rate": 0.00011545690797274228, "loss": 2.4405, "step": 6854 }, { "epoch": 0.7860337117303061, "grad_norm": 0.2544690464158616, "learning_rate": 0.00011533824827341227, "loss": 2.3124, "step": 6855 }, { "epoch": 0.7861483774796468, "grad_norm": 0.2548610387103973, "learning_rate": 0.00011521964163083853, "loss": 2.461, "step": 6856 }, { "epoch": 0.7862630432289875, "grad_norm": 0.21374969542326022, "learning_rate": 0.0001151010880613807, "loss": 2.4123, "step": 6857 }, { "epoch": 0.7863777089783281, "grad_norm": 0.24147315581330375, "learning_rate": 0.00011498258758139096, "loss": 2.3451, "step": 6858 }, { "epoch": 0.7864923747276689, "grad_norm": 0.24057022307280926, "learning_rate": 0.00011486414020721409, "loss": 2.435, "step": 6859 }, { "epoch": 0.7866070404770095, "grad_norm": 0.2495311796770366, "learning_rate": 0.00011474574595518777, "loss": 2.5052, "step": 6860 }, { "epoch": 0.7867217062263502, "grad_norm": 0.22915948494123575, "learning_rate": 0.0001146274048416423, "loss": 2.3789, "step": 6861 }, { "epoch": 0.7868363719756909, "grad_norm": 0.2760430460121054, "learning_rate": 0.00011450911688290033, "loss": 2.4967, "step": 6862 }, { "epoch": 0.7869510377250315, "grad_norm": 0.2475679763885997, "learning_rate": 0.00011439088209527754, "loss": 2.3535, "step": 6863 }, { "epoch": 0.7870657034743722, "grad_norm": 0.2711487140131415, "learning_rate": 0.00011427270049508221, "loss": 2.4046, "step": 6864 }, { "epoch": 0.7871803692237129, "grad_norm": 0.25477261163360804, "learning_rate": 0.00011415457209861524, "loss": 2.403, "step": 6865 }, { "epoch": 0.7872950349730535, "grad_norm": 0.2508496060122097, "learning_rate": 0.00011403649692217011, "loss": 2.3584, "step": 6866 }, { "epoch": 0.7874097007223942, "grad_norm": 0.25643849399689145, "learning_rate": 0.00011391847498203328, "loss": 2.4111, "step": 6867 }, { "epoch": 0.7875243664717348, "grad_norm": 0.251245652083769, "learning_rate": 0.00011380050629448313, "loss": 2.4071, "step": 6868 }, { "epoch": 0.7876390322210756, "grad_norm": 0.25048520917210765, "learning_rate": 0.0001136825908757918, "loss": 2.2692, "step": 6869 }, { "epoch": 0.7877536979704163, "grad_norm": 0.2622116550506559, "learning_rate": 0.00011356472874222307, "loss": 2.3229, "step": 6870 }, { "epoch": 0.7878683637197569, "grad_norm": 0.25364752025709036, "learning_rate": 0.00011344691991003386, "loss": 2.2759, "step": 6871 }, { "epoch": 0.7879830294690976, "grad_norm": 0.23652624271123546, "learning_rate": 0.00011332916439547363, "loss": 2.3852, "step": 6872 }, { "epoch": 0.7880976952184382, "grad_norm": 0.260825287035466, "learning_rate": 0.00011321146221478462, "loss": 2.279, "step": 6873 }, { "epoch": 0.7882123609677789, "grad_norm": 0.2535277582055584, "learning_rate": 0.0001130938133842015, "loss": 2.3158, "step": 6874 }, { "epoch": 0.7883270267171196, "grad_norm": 0.24090115930141076, "learning_rate": 0.00011297621791995188, "loss": 2.4916, "step": 6875 }, { "epoch": 0.7884416924664602, "grad_norm": 0.27281986870535163, "learning_rate": 0.00011285867583825549, "loss": 2.5464, "step": 6876 }, { "epoch": 0.788556358215801, "grad_norm": 0.2717714055125925, "learning_rate": 0.00011274118715532522, "loss": 2.3902, "step": 6877 }, { "epoch": 0.7886710239651417, "grad_norm": 0.27084529366944915, "learning_rate": 0.00011262375188736634, "loss": 2.4643, "step": 6878 }, { "epoch": 0.7887856897144823, "grad_norm": 0.2683490756462135, "learning_rate": 0.00011250637005057685, "loss": 2.4136, "step": 6879 }, { "epoch": 0.788900355463823, "grad_norm": 0.26444407424945043, "learning_rate": 0.00011238904166114727, "loss": 2.5191, "step": 6880 }, { "epoch": 0.7890150212131636, "grad_norm": 0.25597989549114103, "learning_rate": 0.00011227176673526101, "loss": 2.3583, "step": 6881 }, { "epoch": 0.7891296869625043, "grad_norm": 0.2394916937439897, "learning_rate": 0.00011215454528909358, "loss": 2.3644, "step": 6882 }, { "epoch": 0.789244352711845, "grad_norm": 0.2905449000116514, "learning_rate": 0.00011203737733881353, "loss": 2.2051, "step": 6883 }, { "epoch": 0.7893590184611856, "grad_norm": 0.29653055768440506, "learning_rate": 0.00011192026290058233, "loss": 2.367, "step": 6884 }, { "epoch": 0.7894736842105263, "grad_norm": 0.24697146859657346, "learning_rate": 0.00011180320199055322, "loss": 2.5159, "step": 6885 }, { "epoch": 0.789588349959867, "grad_norm": 0.24185975784157224, "learning_rate": 0.00011168619462487267, "loss": 2.4004, "step": 6886 }, { "epoch": 0.7897030157092076, "grad_norm": 0.27624984270824127, "learning_rate": 0.00011156924081967978, "loss": 2.5588, "step": 6887 }, { "epoch": 0.7898176814585484, "grad_norm": 0.24214968784755503, "learning_rate": 0.00011145234059110571, "loss": 2.4612, "step": 6888 }, { "epoch": 0.789932347207889, "grad_norm": 0.25095297246538084, "learning_rate": 0.00011133549395527497, "loss": 2.3112, "step": 6889 }, { "epoch": 0.7900470129572297, "grad_norm": 0.2650610791748218, "learning_rate": 0.00011121870092830439, "loss": 2.4206, "step": 6890 }, { "epoch": 0.7901616787065704, "grad_norm": 0.2524768539756423, "learning_rate": 0.00011110196152630303, "loss": 2.3046, "step": 6891 }, { "epoch": 0.790276344455911, "grad_norm": 0.26181449084924485, "learning_rate": 0.00011098527576537299, "loss": 2.3896, "step": 6892 }, { "epoch": 0.7903910102052517, "grad_norm": 0.23455171418778667, "learning_rate": 0.00011086864366160887, "loss": 2.326, "step": 6893 }, { "epoch": 0.7905056759545923, "grad_norm": 0.26729612630694116, "learning_rate": 0.00011075206523109783, "loss": 2.3269, "step": 6894 }, { "epoch": 0.790620341703933, "grad_norm": 0.24974223499420717, "learning_rate": 0.00011063554048991964, "loss": 2.399, "step": 6895 }, { "epoch": 0.7907350074532737, "grad_norm": 0.2469709455907023, "learning_rate": 0.00011051906945414686, "loss": 2.3645, "step": 6896 }, { "epoch": 0.7908496732026143, "grad_norm": 0.2667849329771901, "learning_rate": 0.00011040265213984407, "loss": 2.4399, "step": 6897 }, { "epoch": 0.7909643389519551, "grad_norm": 0.26081779772397223, "learning_rate": 0.00011028628856306905, "loss": 2.3812, "step": 6898 }, { "epoch": 0.7910790047012958, "grad_norm": 0.2549624530959558, "learning_rate": 0.00011016997873987189, "loss": 2.4652, "step": 6899 }, { "epoch": 0.7911936704506364, "grad_norm": 0.24701631797960671, "learning_rate": 0.00011005372268629538, "loss": 2.3831, "step": 6900 }, { "epoch": 0.7913083361999771, "grad_norm": 0.2333277746841834, "learning_rate": 0.00010993752041837474, "loss": 2.4187, "step": 6901 }, { "epoch": 0.7914230019493177, "grad_norm": 0.24730614753844446, "learning_rate": 0.00010982137195213799, "loss": 2.4569, "step": 6902 }, { "epoch": 0.7915376676986584, "grad_norm": 0.24357275050037164, "learning_rate": 0.00010970527730360524, "loss": 2.4461, "step": 6903 }, { "epoch": 0.7916523334479991, "grad_norm": 0.2545264542299276, "learning_rate": 0.00010958923648878993, "loss": 2.382, "step": 6904 }, { "epoch": 0.7917669991973397, "grad_norm": 0.2515001283477543, "learning_rate": 0.0001094732495236977, "loss": 2.3751, "step": 6905 }, { "epoch": 0.7918816649466804, "grad_norm": 0.25198306552429056, "learning_rate": 0.00010935731642432644, "loss": 2.4867, "step": 6906 }, { "epoch": 0.791996330696021, "grad_norm": 0.25088314249643856, "learning_rate": 0.00010924143720666708, "loss": 2.41, "step": 6907 }, { "epoch": 0.7921109964453618, "grad_norm": 0.2582129420231825, "learning_rate": 0.00010912561188670284, "loss": 2.2643, "step": 6908 }, { "epoch": 0.7922256621947025, "grad_norm": 0.24178145220770467, "learning_rate": 0.00010900984048040974, "loss": 2.4595, "step": 6909 }, { "epoch": 0.7923403279440431, "grad_norm": 0.25189715797031564, "learning_rate": 0.00010889412300375623, "loss": 2.3524, "step": 6910 }, { "epoch": 0.7924549936933838, "grad_norm": 0.2577826975392892, "learning_rate": 0.00010877845947270337, "loss": 2.3243, "step": 6911 }, { "epoch": 0.7925696594427245, "grad_norm": 0.2508361227879777, "learning_rate": 0.00010866284990320457, "loss": 2.5711, "step": 6912 }, { "epoch": 0.7926843251920651, "grad_norm": 0.29107575404734926, "learning_rate": 0.00010854729431120608, "loss": 2.5415, "step": 6913 }, { "epoch": 0.7927989909414058, "grad_norm": 0.24585978633505923, "learning_rate": 0.00010843179271264652, "loss": 2.3768, "step": 6914 }, { "epoch": 0.7929136566907464, "grad_norm": 0.27533813824283, "learning_rate": 0.00010831634512345722, "loss": 2.2871, "step": 6915 }, { "epoch": 0.7930283224400871, "grad_norm": 0.2747661895741441, "learning_rate": 0.00010820095155956194, "loss": 2.4561, "step": 6916 }, { "epoch": 0.7931429881894279, "grad_norm": 0.2696816385766819, "learning_rate": 0.00010808561203687712, "loss": 2.3377, "step": 6917 }, { "epoch": 0.7932576539387685, "grad_norm": 0.25766336227000675, "learning_rate": 0.00010797032657131135, "loss": 2.3815, "step": 6918 }, { "epoch": 0.7933723196881092, "grad_norm": 0.2606992113217092, "learning_rate": 0.00010785509517876646, "loss": 2.419, "step": 6919 }, { "epoch": 0.7934869854374499, "grad_norm": 0.2501851928546129, "learning_rate": 0.00010773991787513615, "loss": 2.314, "step": 6920 }, { "epoch": 0.7936016511867905, "grad_norm": 0.2530499049338274, "learning_rate": 0.00010762479467630698, "loss": 2.2749, "step": 6921 }, { "epoch": 0.7937163169361312, "grad_norm": 0.2616008267612015, "learning_rate": 0.00010750972559815803, "loss": 2.4719, "step": 6922 }, { "epoch": 0.7938309826854718, "grad_norm": 0.2660578440712656, "learning_rate": 0.00010739471065656092, "loss": 2.4008, "step": 6923 }, { "epoch": 0.7939456484348125, "grad_norm": 0.27026244871971705, "learning_rate": 0.00010727974986737971, "loss": 2.3796, "step": 6924 }, { "epoch": 0.7940603141841532, "grad_norm": 0.2622607520937587, "learning_rate": 0.00010716484324647119, "loss": 2.3566, "step": 6925 }, { "epoch": 0.7941749799334938, "grad_norm": 0.2659642750302182, "learning_rate": 0.00010704999080968431, "loss": 2.3265, "step": 6926 }, { "epoch": 0.7942896456828346, "grad_norm": 0.2601833551664661, "learning_rate": 0.00010693519257286094, "loss": 2.4049, "step": 6927 }, { "epoch": 0.7944043114321752, "grad_norm": 0.25758034533303387, "learning_rate": 0.0001068204485518352, "loss": 2.5314, "step": 6928 }, { "epoch": 0.7945189771815159, "grad_norm": 0.25068599214894854, "learning_rate": 0.00010670575876243388, "loss": 2.4118, "step": 6929 }, { "epoch": 0.7946336429308566, "grad_norm": 0.2806115194867441, "learning_rate": 0.00010659112322047631, "loss": 2.4053, "step": 6930 }, { "epoch": 0.7947483086801972, "grad_norm": 0.2750254357405606, "learning_rate": 0.00010647654194177436, "loss": 2.3932, "step": 6931 }, { "epoch": 0.7948629744295379, "grad_norm": 0.2470003239993299, "learning_rate": 0.0001063620149421321, "loss": 2.2538, "step": 6932 }, { "epoch": 0.7949776401788786, "grad_norm": 0.2539465837010201, "learning_rate": 0.00010624754223734628, "loss": 2.4609, "step": 6933 }, { "epoch": 0.7950923059282192, "grad_norm": 0.2656069059003555, "learning_rate": 0.00010613312384320672, "loss": 2.3554, "step": 6934 }, { "epoch": 0.7952069716775599, "grad_norm": 0.26398845005673016, "learning_rate": 0.00010601875977549474, "loss": 2.4941, "step": 6935 }, { "epoch": 0.7953216374269005, "grad_norm": 0.22946196700084354, "learning_rate": 0.00010590445004998494, "loss": 2.4425, "step": 6936 }, { "epoch": 0.7954363031762413, "grad_norm": 0.25718615111915194, "learning_rate": 0.00010579019468244421, "loss": 2.3944, "step": 6937 }, { "epoch": 0.795550968925582, "grad_norm": 0.2417548408696056, "learning_rate": 0.00010567599368863156, "loss": 2.4083, "step": 6938 }, { "epoch": 0.7956656346749226, "grad_norm": 0.28213505526766525, "learning_rate": 0.00010556184708429922, "loss": 2.4168, "step": 6939 }, { "epoch": 0.7957803004242633, "grad_norm": 0.2677138329051195, "learning_rate": 0.00010544775488519154, "loss": 2.5642, "step": 6940 }, { "epoch": 0.7958949661736039, "grad_norm": 0.23018926770589765, "learning_rate": 0.00010533371710704504, "loss": 2.5062, "step": 6941 }, { "epoch": 0.7960096319229446, "grad_norm": 0.27874228429895287, "learning_rate": 0.00010521973376558925, "loss": 2.3756, "step": 6942 }, { "epoch": 0.7961242976722853, "grad_norm": 0.2658523993443978, "learning_rate": 0.00010510580487654603, "loss": 2.2162, "step": 6943 }, { "epoch": 0.7962389634216259, "grad_norm": 0.24738403324609673, "learning_rate": 0.00010499193045562966, "loss": 2.4539, "step": 6944 }, { "epoch": 0.7963536291709666, "grad_norm": 0.27451845904506583, "learning_rate": 0.0001048781105185469, "loss": 2.4648, "step": 6945 }, { "epoch": 0.7964682949203074, "grad_norm": 0.2412453083949022, "learning_rate": 0.00010476434508099725, "loss": 2.5148, "step": 6946 }, { "epoch": 0.796582960669648, "grad_norm": 0.24290417464598424, "learning_rate": 0.00010465063415867216, "loss": 2.2668, "step": 6947 }, { "epoch": 0.7966976264189887, "grad_norm": 0.26647036110151556, "learning_rate": 0.00010453697776725596, "loss": 2.3113, "step": 6948 }, { "epoch": 0.7968122921683293, "grad_norm": 0.2656498102169688, "learning_rate": 0.00010442337592242567, "loss": 2.3041, "step": 6949 }, { "epoch": 0.79692695791767, "grad_norm": 0.25222819204654473, "learning_rate": 0.00010430982863985023, "loss": 2.3448, "step": 6950 }, { "epoch": 0.7970416236670107, "grad_norm": 0.263812063873034, "learning_rate": 0.00010419633593519134, "loss": 2.3792, "step": 6951 }, { "epoch": 0.7971562894163513, "grad_norm": 0.265180835628077, "learning_rate": 0.00010408289782410335, "loss": 2.4562, "step": 6952 }, { "epoch": 0.797270955165692, "grad_norm": 0.24865198742248873, "learning_rate": 0.00010396951432223245, "loss": 2.3708, "step": 6953 }, { "epoch": 0.7973856209150327, "grad_norm": 0.26402752258513074, "learning_rate": 0.00010385618544521819, "loss": 2.4901, "step": 6954 }, { "epoch": 0.7975002866643733, "grad_norm": 0.2517870364257527, "learning_rate": 0.00010374291120869206, "loss": 2.3083, "step": 6955 }, { "epoch": 0.797614952413714, "grad_norm": 0.2694991147854156, "learning_rate": 0.00010362969162827785, "loss": 2.2431, "step": 6956 }, { "epoch": 0.7977296181630547, "grad_norm": 0.2298091318554434, "learning_rate": 0.00010351652671959222, "loss": 2.3173, "step": 6957 }, { "epoch": 0.7978442839123954, "grad_norm": 0.26051970132182495, "learning_rate": 0.00010340341649824403, "loss": 2.3735, "step": 6958 }, { "epoch": 0.7979589496617361, "grad_norm": 0.2778167878131525, "learning_rate": 0.00010329036097983474, "loss": 2.3545, "step": 6959 }, { "epoch": 0.7980736154110767, "grad_norm": 0.2621648668343236, "learning_rate": 0.00010317736017995816, "loss": 2.266, "step": 6960 }, { "epoch": 0.7981882811604174, "grad_norm": 0.26108324101635194, "learning_rate": 0.00010306441411420075, "loss": 2.391, "step": 6961 }, { "epoch": 0.798302946909758, "grad_norm": 0.284550242432633, "learning_rate": 0.00010295152279814096, "loss": 2.4193, "step": 6962 }, { "epoch": 0.7984176126590987, "grad_norm": 0.2685279586193713, "learning_rate": 0.00010283868624735021, "loss": 2.4443, "step": 6963 }, { "epoch": 0.7985322784084394, "grad_norm": 0.2571220058511691, "learning_rate": 0.0001027259044773921, "loss": 2.2945, "step": 6964 }, { "epoch": 0.79864694415778, "grad_norm": 0.27042574686473425, "learning_rate": 0.00010261317750382276, "loss": 2.3661, "step": 6965 }, { "epoch": 0.7987616099071208, "grad_norm": 0.24170155808899835, "learning_rate": 0.00010250050534219068, "loss": 2.2534, "step": 6966 }, { "epoch": 0.7988762756564615, "grad_norm": 0.2630985580919513, "learning_rate": 0.000102387888008037, "loss": 2.2916, "step": 6967 }, { "epoch": 0.7989909414058021, "grad_norm": 0.2598798516149883, "learning_rate": 0.00010227532551689472, "loss": 2.3505, "step": 6968 }, { "epoch": 0.7991056071551428, "grad_norm": 0.2659340699640982, "learning_rate": 0.00010216281788429022, "loss": 2.4232, "step": 6969 }, { "epoch": 0.7992202729044834, "grad_norm": 0.26269622946639093, "learning_rate": 0.0001020503651257414, "loss": 2.4228, "step": 6970 }, { "epoch": 0.7993349386538241, "grad_norm": 0.25905272759009235, "learning_rate": 0.00010193796725675907, "loss": 2.5631, "step": 6971 }, { "epoch": 0.7994496044031648, "grad_norm": 0.24945300195445935, "learning_rate": 0.00010182562429284647, "loss": 2.4002, "step": 6972 }, { "epoch": 0.7995642701525054, "grad_norm": 0.2684725179355554, "learning_rate": 0.00010171333624949908, "loss": 2.3093, "step": 6973 }, { "epoch": 0.7996789359018461, "grad_norm": 0.25163134128389636, "learning_rate": 0.00010160110314220489, "loss": 2.2555, "step": 6974 }, { "epoch": 0.7997936016511867, "grad_norm": 0.23819828850716698, "learning_rate": 0.00010148892498644452, "loss": 2.4777, "step": 6975 }, { "epoch": 0.7999082674005275, "grad_norm": 0.28552381843621605, "learning_rate": 0.00010137680179769048, "loss": 2.3167, "step": 6976 }, { "epoch": 0.8000229331498682, "grad_norm": 0.27070904619950015, "learning_rate": 0.0001012647335914082, "loss": 2.3954, "step": 6977 }, { "epoch": 0.8001375988992088, "grad_norm": 0.2725708048755457, "learning_rate": 0.00010115272038305529, "loss": 2.3674, "step": 6978 }, { "epoch": 0.8002522646485495, "grad_norm": 0.24615764486365646, "learning_rate": 0.00010104076218808194, "loss": 2.5392, "step": 6979 }, { "epoch": 0.8003669303978902, "grad_norm": 0.2990770754159958, "learning_rate": 0.00010092885902193055, "loss": 2.3088, "step": 6980 }, { "epoch": 0.8004815961472308, "grad_norm": 0.2526128135764045, "learning_rate": 0.00010081701090003625, "loss": 2.2668, "step": 6981 }, { "epoch": 0.8005962618965715, "grad_norm": 0.26216620350526504, "learning_rate": 0.00010070521783782599, "loss": 2.2908, "step": 6982 }, { "epoch": 0.8007109276459121, "grad_norm": 0.24547498160989933, "learning_rate": 0.00010059347985071959, "loss": 2.3308, "step": 6983 }, { "epoch": 0.8008255933952528, "grad_norm": 0.26559221926058574, "learning_rate": 0.00010048179695412951, "loss": 2.35, "step": 6984 }, { "epoch": 0.8009402591445935, "grad_norm": 0.2539138703304889, "learning_rate": 0.00010037016916345987, "loss": 2.4227, "step": 6985 }, { "epoch": 0.8010549248939342, "grad_norm": 0.25726414016761157, "learning_rate": 0.00010025859649410778, "loss": 2.4401, "step": 6986 }, { "epoch": 0.8011695906432749, "grad_norm": 0.2557555577590632, "learning_rate": 0.0001001470789614627, "loss": 2.4826, "step": 6987 }, { "epoch": 0.8012842563926156, "grad_norm": 0.24985395161000665, "learning_rate": 0.00010003561658090588, "loss": 2.4515, "step": 6988 }, { "epoch": 0.8013989221419562, "grad_norm": 0.2467958590704012, "learning_rate": 9.992420936781188e-05, "loss": 2.3719, "step": 6989 }, { "epoch": 0.8015135878912969, "grad_norm": 0.2320999153479567, "learning_rate": 9.981285733754719e-05, "loss": 2.4159, "step": 6990 }, { "epoch": 0.8016282536406375, "grad_norm": 0.25129107169356285, "learning_rate": 9.970156050547047e-05, "loss": 2.3715, "step": 6991 }, { "epoch": 0.8017429193899782, "grad_norm": 0.28175688629122697, "learning_rate": 9.959031888693304e-05, "loss": 2.5136, "step": 6992 }, { "epoch": 0.8018575851393189, "grad_norm": 0.27126125663573897, "learning_rate": 9.947913249727864e-05, "loss": 2.422, "step": 6993 }, { "epoch": 0.8019722508886595, "grad_norm": 0.2575020097669227, "learning_rate": 9.936800135184332e-05, "loss": 2.4822, "step": 6994 }, { "epoch": 0.8020869166380002, "grad_norm": 0.25548222948312577, "learning_rate": 9.925692546595544e-05, "loss": 2.3899, "step": 6995 }, { "epoch": 0.8022015823873409, "grad_norm": 0.2720799436566901, "learning_rate": 9.914590485493596e-05, "loss": 2.3977, "step": 6996 }, { "epoch": 0.8023162481366816, "grad_norm": 0.2671158416325561, "learning_rate": 9.903493953409781e-05, "loss": 2.2552, "step": 6997 }, { "epoch": 0.8024309138860223, "grad_norm": 0.2771785017714363, "learning_rate": 9.892402951874657e-05, "loss": 2.3894, "step": 6998 }, { "epoch": 0.8025455796353629, "grad_norm": 0.2785747470819906, "learning_rate": 9.881317482418045e-05, "loss": 2.5308, "step": 6999 }, { "epoch": 0.8026602453847036, "grad_norm": 0.25888800760965536, "learning_rate": 9.87023754656895e-05, "loss": 2.4156, "step": 7000 }, { "epoch": 0.8027749111340443, "grad_norm": 0.25068459974895557, "learning_rate": 9.859163145855632e-05, "loss": 2.3743, "step": 7001 }, { "epoch": 0.8028895768833849, "grad_norm": 0.24856172773104443, "learning_rate": 9.848094281805626e-05, "loss": 2.2163, "step": 7002 }, { "epoch": 0.8030042426327256, "grad_norm": 0.2661545202434637, "learning_rate": 9.837030955945619e-05, "loss": 2.4266, "step": 7003 }, { "epoch": 0.8031189083820662, "grad_norm": 0.2619773901015697, "learning_rate": 9.825973169801627e-05, "loss": 2.3976, "step": 7004 }, { "epoch": 0.803233574131407, "grad_norm": 0.26514591740103366, "learning_rate": 9.814920924898869e-05, "loss": 2.2835, "step": 7005 }, { "epoch": 0.8033482398807477, "grad_norm": 0.2752083855936904, "learning_rate": 9.80387422276175e-05, "loss": 2.4224, "step": 7006 }, { "epoch": 0.8034629056300883, "grad_norm": 0.2522019012537233, "learning_rate": 9.792833064913981e-05, "loss": 2.3522, "step": 7007 }, { "epoch": 0.803577571379429, "grad_norm": 0.28428501299962844, "learning_rate": 9.781797452878471e-05, "loss": 2.5217, "step": 7008 }, { "epoch": 0.8036922371287696, "grad_norm": 0.26404147779891035, "learning_rate": 9.770767388177371e-05, "loss": 2.4935, "step": 7009 }, { "epoch": 0.8038069028781103, "grad_norm": 0.2581773548310228, "learning_rate": 9.759742872332073e-05, "loss": 2.5377, "step": 7010 }, { "epoch": 0.803921568627451, "grad_norm": 0.22350432203916074, "learning_rate": 9.74872390686321e-05, "loss": 2.3019, "step": 7011 }, { "epoch": 0.8040362343767916, "grad_norm": 0.24545712652346155, "learning_rate": 9.737710493290614e-05, "loss": 2.34, "step": 7012 }, { "epoch": 0.8041509001261323, "grad_norm": 0.29974318717026505, "learning_rate": 9.726702633133383e-05, "loss": 2.4811, "step": 7013 }, { "epoch": 0.804265565875473, "grad_norm": 0.26622374012307676, "learning_rate": 9.715700327909843e-05, "loss": 2.3258, "step": 7014 }, { "epoch": 0.8043802316248136, "grad_norm": 0.25365192080274335, "learning_rate": 9.704703579137558e-05, "loss": 2.2716, "step": 7015 }, { "epoch": 0.8044948973741544, "grad_norm": 0.23543868496073328, "learning_rate": 9.693712388333314e-05, "loss": 2.2966, "step": 7016 }, { "epoch": 0.804609563123495, "grad_norm": 0.30021137705531065, "learning_rate": 9.682726757013155e-05, "loss": 2.5061, "step": 7017 }, { "epoch": 0.8047242288728357, "grad_norm": 0.26473142034752084, "learning_rate": 9.671746686692296e-05, "loss": 2.3841, "step": 7018 }, { "epoch": 0.8048388946221764, "grad_norm": 0.26205358357994735, "learning_rate": 9.660772178885275e-05, "loss": 2.2289, "step": 7019 }, { "epoch": 0.804953560371517, "grad_norm": 0.28138023223175274, "learning_rate": 9.649803235105786e-05, "loss": 2.4809, "step": 7020 }, { "epoch": 0.8050682261208577, "grad_norm": 0.2908001264279954, "learning_rate": 9.638839856866799e-05, "loss": 2.3245, "step": 7021 }, { "epoch": 0.8051828918701984, "grad_norm": 0.2748606317175616, "learning_rate": 9.627882045680497e-05, "loss": 2.4546, "step": 7022 }, { "epoch": 0.805297557619539, "grad_norm": 0.27900928243017625, "learning_rate": 9.616929803058305e-05, "loss": 2.499, "step": 7023 }, { "epoch": 0.8054122233688797, "grad_norm": 0.2560660648049793, "learning_rate": 9.605983130510876e-05, "loss": 2.4843, "step": 7024 }, { "epoch": 0.8055268891182203, "grad_norm": 0.24602628591873082, "learning_rate": 9.595042029548112e-05, "loss": 2.44, "step": 7025 }, { "epoch": 0.8056415548675611, "grad_norm": 0.24566556641200737, "learning_rate": 9.584106501679091e-05, "loss": 2.3937, "step": 7026 }, { "epoch": 0.8057562206169018, "grad_norm": 0.2536659896352178, "learning_rate": 9.573176548412182e-05, "loss": 2.3585, "step": 7027 }, { "epoch": 0.8058708863662424, "grad_norm": 0.24893256907622652, "learning_rate": 9.562252171254965e-05, "loss": 2.5434, "step": 7028 }, { "epoch": 0.8059855521155831, "grad_norm": 0.2520849704166014, "learning_rate": 9.55133337171425e-05, "loss": 2.4621, "step": 7029 }, { "epoch": 0.8061002178649237, "grad_norm": 0.26405524592418683, "learning_rate": 9.540420151296069e-05, "loss": 2.5345, "step": 7030 }, { "epoch": 0.8062148836142644, "grad_norm": 0.24279935683284148, "learning_rate": 9.529512511505716e-05, "loss": 2.2723, "step": 7031 }, { "epoch": 0.8063295493636051, "grad_norm": 0.2575212809317785, "learning_rate": 9.51861045384766e-05, "loss": 2.387, "step": 7032 }, { "epoch": 0.8064442151129457, "grad_norm": 0.27138751271438716, "learning_rate": 9.507713979825628e-05, "loss": 2.4858, "step": 7033 }, { "epoch": 0.8065588808622864, "grad_norm": 0.2794879446439531, "learning_rate": 9.496823090942631e-05, "loss": 2.2195, "step": 7034 }, { "epoch": 0.8066735466116272, "grad_norm": 0.2613049952315897, "learning_rate": 9.485937788700816e-05, "loss": 2.3202, "step": 7035 }, { "epoch": 0.8067882123609678, "grad_norm": 0.2709212811018067, "learning_rate": 9.475058074601611e-05, "loss": 2.402, "step": 7036 }, { "epoch": 0.8069028781103085, "grad_norm": 0.23662644129606814, "learning_rate": 9.464183950145683e-05, "loss": 2.4382, "step": 7037 }, { "epoch": 0.8070175438596491, "grad_norm": 0.2557539109207896, "learning_rate": 9.45331541683287e-05, "loss": 2.3822, "step": 7038 }, { "epoch": 0.8071322096089898, "grad_norm": 0.27669445720539104, "learning_rate": 9.442452476162322e-05, "loss": 2.4056, "step": 7039 }, { "epoch": 0.8072468753583305, "grad_norm": 0.2643439824210586, "learning_rate": 9.431595129632364e-05, "loss": 2.4038, "step": 7040 }, { "epoch": 0.8073615411076711, "grad_norm": 0.2856784498370159, "learning_rate": 9.420743378740542e-05, "loss": 2.2515, "step": 7041 }, { "epoch": 0.8074762068570118, "grad_norm": 0.23474042333803755, "learning_rate": 9.409897224983666e-05, "loss": 2.3596, "step": 7042 }, { "epoch": 0.8075908726063524, "grad_norm": 0.24035333735611183, "learning_rate": 9.399056669857747e-05, "loss": 2.4408, "step": 7043 }, { "epoch": 0.8077055383556931, "grad_norm": 0.27243032553725854, "learning_rate": 9.38822171485803e-05, "loss": 2.2979, "step": 7044 }, { "epoch": 0.8078202041050339, "grad_norm": 0.250095781034164, "learning_rate": 9.377392361479003e-05, "loss": 2.3978, "step": 7045 }, { "epoch": 0.8079348698543745, "grad_norm": 0.2743427116142907, "learning_rate": 9.366568611214376e-05, "loss": 2.41, "step": 7046 }, { "epoch": 0.8080495356037152, "grad_norm": 0.2531163547179839, "learning_rate": 9.35575046555705e-05, "loss": 2.4264, "step": 7047 }, { "epoch": 0.8081642013530559, "grad_norm": 0.24258897702375937, "learning_rate": 9.344937925999187e-05, "loss": 2.3214, "step": 7048 }, { "epoch": 0.8082788671023965, "grad_norm": 0.26544358624606484, "learning_rate": 9.334130994032202e-05, "loss": 2.4451, "step": 7049 }, { "epoch": 0.8083935328517372, "grad_norm": 0.2256731967244761, "learning_rate": 9.323329671146674e-05, "loss": 2.4189, "step": 7050 }, { "epoch": 0.8085081986010778, "grad_norm": 0.26571066615449296, "learning_rate": 9.312533958832453e-05, "loss": 2.4761, "step": 7051 }, { "epoch": 0.8086228643504185, "grad_norm": 0.26344904774416017, "learning_rate": 9.301743858578609e-05, "loss": 2.4168, "step": 7052 }, { "epoch": 0.8087375300997592, "grad_norm": 0.27775565013374215, "learning_rate": 9.290959371873391e-05, "loss": 2.3632, "step": 7053 }, { "epoch": 0.8088521958490998, "grad_norm": 0.25176749154829475, "learning_rate": 9.28018050020436e-05, "loss": 2.4131, "step": 7054 }, { "epoch": 0.8089668615984406, "grad_norm": 0.28701635137919107, "learning_rate": 9.269407245058254e-05, "loss": 2.384, "step": 7055 }, { "epoch": 0.8090815273477813, "grad_norm": 0.22856369421702732, "learning_rate": 9.258639607921005e-05, "loss": 2.3651, "step": 7056 }, { "epoch": 0.8091961930971219, "grad_norm": 0.24426358781748547, "learning_rate": 9.24787759027782e-05, "loss": 2.3433, "step": 7057 }, { "epoch": 0.8093108588464626, "grad_norm": 0.2511764342959095, "learning_rate": 9.237121193613119e-05, "loss": 2.3026, "step": 7058 }, { "epoch": 0.8094255245958032, "grad_norm": 0.2654578168667341, "learning_rate": 9.22637041941054e-05, "loss": 2.4127, "step": 7059 }, { "epoch": 0.8095401903451439, "grad_norm": 0.27094303590526236, "learning_rate": 9.215625269152938e-05, "loss": 2.3983, "step": 7060 }, { "epoch": 0.8096548560944846, "grad_norm": 0.24355249859018951, "learning_rate": 9.204885744322428e-05, "loss": 2.3707, "step": 7061 }, { "epoch": 0.8097695218438252, "grad_norm": 0.23892316931040286, "learning_rate": 9.19415184640029e-05, "loss": 2.2796, "step": 7062 }, { "epoch": 0.8098841875931659, "grad_norm": 0.28657650179592775, "learning_rate": 9.183423576867078e-05, "loss": 2.4289, "step": 7063 }, { "epoch": 0.8099988533425065, "grad_norm": 0.25075207637843466, "learning_rate": 9.172700937202544e-05, "loss": 2.4402, "step": 7064 }, { "epoch": 0.8101135190918473, "grad_norm": 0.2673021913373499, "learning_rate": 9.161983928885676e-05, "loss": 2.4787, "step": 7065 }, { "epoch": 0.810228184841188, "grad_norm": 0.2563132248206784, "learning_rate": 9.151272553394685e-05, "loss": 2.4824, "step": 7066 }, { "epoch": 0.8103428505905286, "grad_norm": 0.2755890212139305, "learning_rate": 9.140566812207008e-05, "loss": 2.4895, "step": 7067 }, { "epoch": 0.8104575163398693, "grad_norm": 0.2517563116213636, "learning_rate": 9.129866706799262e-05, "loss": 2.4264, "step": 7068 }, { "epoch": 0.81057218208921, "grad_norm": 0.2421034258556156, "learning_rate": 9.119172238647377e-05, "loss": 2.291, "step": 7069 }, { "epoch": 0.8106868478385506, "grad_norm": 0.23326836811781465, "learning_rate": 9.108483409226403e-05, "loss": 2.4193, "step": 7070 }, { "epoch": 0.8108015135878913, "grad_norm": 0.24349759531050258, "learning_rate": 9.097800220010689e-05, "loss": 2.4006, "step": 7071 }, { "epoch": 0.8109161793372319, "grad_norm": 0.25781496298250534, "learning_rate": 9.087122672473758e-05, "loss": 2.5394, "step": 7072 }, { "epoch": 0.8110308450865726, "grad_norm": 0.25836956641708336, "learning_rate": 9.07645076808839e-05, "loss": 2.5001, "step": 7073 }, { "epoch": 0.8111455108359134, "grad_norm": 0.25521665297489526, "learning_rate": 9.065784508326564e-05, "loss": 2.451, "step": 7074 }, { "epoch": 0.811260176585254, "grad_norm": 0.24402218016355487, "learning_rate": 9.055123894659501e-05, "loss": 2.3042, "step": 7075 }, { "epoch": 0.8113748423345947, "grad_norm": 0.23950617618985487, "learning_rate": 9.044468928557603e-05, "loss": 2.3582, "step": 7076 }, { "epoch": 0.8114895080839353, "grad_norm": 0.25193677658962943, "learning_rate": 9.03381961149053e-05, "loss": 2.4313, "step": 7077 }, { "epoch": 0.811604173833276, "grad_norm": 0.2539978155655879, "learning_rate": 9.023175944927159e-05, "loss": 2.2715, "step": 7078 }, { "epoch": 0.8117188395826167, "grad_norm": 0.24300363592535032, "learning_rate": 9.012537930335574e-05, "loss": 2.3649, "step": 7079 }, { "epoch": 0.8118335053319573, "grad_norm": 0.285688224511098, "learning_rate": 9.001905569183089e-05, "loss": 2.4115, "step": 7080 }, { "epoch": 0.811948171081298, "grad_norm": 0.2931415766894181, "learning_rate": 8.991278862936253e-05, "loss": 2.4031, "step": 7081 }, { "epoch": 0.8120628368306387, "grad_norm": 0.28251524389170907, "learning_rate": 8.980657813060783e-05, "loss": 2.3514, "step": 7082 }, { "epoch": 0.8121775025799793, "grad_norm": 0.2520673457728205, "learning_rate": 8.970042421021662e-05, "loss": 2.2877, "step": 7083 }, { "epoch": 0.81229216832932, "grad_norm": 0.26190673540371573, "learning_rate": 8.959432688283103e-05, "loss": 2.3681, "step": 7084 }, { "epoch": 0.8124068340786607, "grad_norm": 0.2341330233565217, "learning_rate": 8.948828616308496e-05, "loss": 2.3811, "step": 7085 }, { "epoch": 0.8125214998280014, "grad_norm": 0.2674752073294024, "learning_rate": 8.938230206560471e-05, "loss": 2.2678, "step": 7086 }, { "epoch": 0.8126361655773421, "grad_norm": 0.2764465992327549, "learning_rate": 8.927637460500892e-05, "loss": 2.5112, "step": 7087 }, { "epoch": 0.8127508313266827, "grad_norm": 0.25870195241285737, "learning_rate": 8.91705037959079e-05, "loss": 2.4008, "step": 7088 }, { "epoch": 0.8128654970760234, "grad_norm": 0.2723688475861717, "learning_rate": 8.906468965290493e-05, "loss": 2.3863, "step": 7089 }, { "epoch": 0.8129801628253641, "grad_norm": 0.25157579990487255, "learning_rate": 8.895893219059498e-05, "loss": 2.4284, "step": 7090 }, { "epoch": 0.8130948285747047, "grad_norm": 0.2553041018828866, "learning_rate": 8.885323142356506e-05, "loss": 2.3712, "step": 7091 }, { "epoch": 0.8132094943240454, "grad_norm": 0.24759221782061655, "learning_rate": 8.874758736639465e-05, "loss": 2.3768, "step": 7092 }, { "epoch": 0.813324160073386, "grad_norm": 0.23571078160196235, "learning_rate": 8.864200003365541e-05, "loss": 2.395, "step": 7093 }, { "epoch": 0.8134388258227268, "grad_norm": 0.25477292438650545, "learning_rate": 8.853646943991106e-05, "loss": 2.445, "step": 7094 }, { "epoch": 0.8135534915720675, "grad_norm": 0.2521028083552525, "learning_rate": 8.843099559971757e-05, "loss": 2.4443, "step": 7095 }, { "epoch": 0.8136681573214081, "grad_norm": 0.2349413720162316, "learning_rate": 8.832557852762312e-05, "loss": 2.4378, "step": 7096 }, { "epoch": 0.8137828230707488, "grad_norm": 0.26959163983617296, "learning_rate": 8.822021823816778e-05, "loss": 2.5226, "step": 7097 }, { "epoch": 0.8138974888200894, "grad_norm": 0.28448664790777556, "learning_rate": 8.811491474588396e-05, "loss": 2.4321, "step": 7098 }, { "epoch": 0.8140121545694301, "grad_norm": 0.240574969584723, "learning_rate": 8.800966806529665e-05, "loss": 2.431, "step": 7099 }, { "epoch": 0.8141268203187708, "grad_norm": 0.2609193108109736, "learning_rate": 8.790447821092224e-05, "loss": 2.4555, "step": 7100 }, { "epoch": 0.8142414860681114, "grad_norm": 0.277569571970675, "learning_rate": 8.779934519726985e-05, "loss": 2.3929, "step": 7101 }, { "epoch": 0.8143561518174521, "grad_norm": 0.2571115232081349, "learning_rate": 8.76942690388406e-05, "loss": 2.2698, "step": 7102 }, { "epoch": 0.8144708175667928, "grad_norm": 0.2506312212981786, "learning_rate": 8.758924975012744e-05, "loss": 2.4831, "step": 7103 }, { "epoch": 0.8145854833161335, "grad_norm": 0.25634071246485335, "learning_rate": 8.748428734561614e-05, "loss": 2.3682, "step": 7104 }, { "epoch": 0.8147001490654742, "grad_norm": 0.28357040304383274, "learning_rate": 8.737938183978428e-05, "loss": 2.4487, "step": 7105 }, { "epoch": 0.8148148148148148, "grad_norm": 0.2552235608366805, "learning_rate": 8.727453324710133e-05, "loss": 2.2901, "step": 7106 }, { "epoch": 0.8149294805641555, "grad_norm": 0.25067560930010707, "learning_rate": 8.716974158202923e-05, "loss": 2.3063, "step": 7107 }, { "epoch": 0.8150441463134962, "grad_norm": 0.2594420757705762, "learning_rate": 8.706500685902197e-05, "loss": 2.3089, "step": 7108 }, { "epoch": 0.8151588120628368, "grad_norm": 0.23952720973807762, "learning_rate": 8.696032909252582e-05, "loss": 2.2999, "step": 7109 }, { "epoch": 0.8152734778121775, "grad_norm": 0.2906314094419773, "learning_rate": 8.685570829697898e-05, "loss": 2.3311, "step": 7110 }, { "epoch": 0.8153881435615181, "grad_norm": 0.25751071352592, "learning_rate": 8.675114448681209e-05, "loss": 2.362, "step": 7111 }, { "epoch": 0.8155028093108588, "grad_norm": 0.2744821924998915, "learning_rate": 8.664663767644742e-05, "loss": 2.4209, "step": 7112 }, { "epoch": 0.8156174750601995, "grad_norm": 0.25013760260730145, "learning_rate": 8.654218788029989e-05, "loss": 2.476, "step": 7113 }, { "epoch": 0.8157321408095402, "grad_norm": 0.2787722648642111, "learning_rate": 8.643779511277628e-05, "loss": 2.3429, "step": 7114 }, { "epoch": 0.8158468065588809, "grad_norm": 0.26471771747976514, "learning_rate": 8.633345938827563e-05, "loss": 2.3694, "step": 7115 }, { "epoch": 0.8159614723082216, "grad_norm": 0.2246152208849204, "learning_rate": 8.622918072118908e-05, "loss": 2.3453, "step": 7116 }, { "epoch": 0.8160761380575622, "grad_norm": 0.26320255971681095, "learning_rate": 8.612495912589996e-05, "loss": 2.5282, "step": 7117 }, { "epoch": 0.8161908038069029, "grad_norm": 0.2498666067842061, "learning_rate": 8.602079461678331e-05, "loss": 2.2792, "step": 7118 }, { "epoch": 0.8163054695562435, "grad_norm": 0.2728040852870363, "learning_rate": 8.591668720820716e-05, "loss": 2.446, "step": 7119 }, { "epoch": 0.8164201353055842, "grad_norm": 0.2864231242241152, "learning_rate": 8.581263691453073e-05, "loss": 2.4344, "step": 7120 }, { "epoch": 0.8165348010549249, "grad_norm": 0.26482610475196167, "learning_rate": 8.57086437501059e-05, "loss": 2.4839, "step": 7121 }, { "epoch": 0.8166494668042655, "grad_norm": 0.28031591614485957, "learning_rate": 8.560470772927653e-05, "loss": 2.4847, "step": 7122 }, { "epoch": 0.8167641325536062, "grad_norm": 0.23398675095287777, "learning_rate": 8.550082886637861e-05, "loss": 2.413, "step": 7123 }, { "epoch": 0.816878798302947, "grad_norm": 0.2740168981089584, "learning_rate": 8.539700717574034e-05, "loss": 2.4516, "step": 7124 }, { "epoch": 0.8169934640522876, "grad_norm": 0.243484692303628, "learning_rate": 8.529324267168198e-05, "loss": 2.3453, "step": 7125 }, { "epoch": 0.8171081298016283, "grad_norm": 0.25010723336214113, "learning_rate": 8.518953536851565e-05, "loss": 2.3364, "step": 7126 }, { "epoch": 0.8172227955509689, "grad_norm": 0.26525581037187146, "learning_rate": 8.50858852805459e-05, "loss": 2.5474, "step": 7127 }, { "epoch": 0.8173374613003096, "grad_norm": 0.26629206328340216, "learning_rate": 8.498229242206929e-05, "loss": 2.3893, "step": 7128 }, { "epoch": 0.8174521270496503, "grad_norm": 0.2896460817829774, "learning_rate": 8.487875680737444e-05, "loss": 2.3058, "step": 7129 }, { "epoch": 0.8175667927989909, "grad_norm": 0.2486763486324493, "learning_rate": 8.477527845074223e-05, "loss": 2.2317, "step": 7130 }, { "epoch": 0.8176814585483316, "grad_norm": 0.2838162039631978, "learning_rate": 8.467185736644556e-05, "loss": 2.3573, "step": 7131 }, { "epoch": 0.8177961242976722, "grad_norm": 0.26982437009572635, "learning_rate": 8.456849356874912e-05, "loss": 2.3196, "step": 7132 }, { "epoch": 0.817910790047013, "grad_norm": 0.22435517710224578, "learning_rate": 8.446518707191009e-05, "loss": 2.3347, "step": 7133 }, { "epoch": 0.8180254557963537, "grad_norm": 0.2672491983132503, "learning_rate": 8.436193789017788e-05, "loss": 2.3318, "step": 7134 }, { "epoch": 0.8181401215456943, "grad_norm": 0.2667778275181249, "learning_rate": 8.425874603779349e-05, "loss": 2.4536, "step": 7135 }, { "epoch": 0.818254787295035, "grad_norm": 0.23423843421644505, "learning_rate": 8.415561152899026e-05, "loss": 2.4576, "step": 7136 }, { "epoch": 0.8183694530443757, "grad_norm": 0.24676450697557595, "learning_rate": 8.405253437799387e-05, "loss": 2.3365, "step": 7137 }, { "epoch": 0.8184841187937163, "grad_norm": 0.239433093917491, "learning_rate": 8.394951459902145e-05, "loss": 2.4747, "step": 7138 }, { "epoch": 0.818598784543057, "grad_norm": 0.25456402768893965, "learning_rate": 8.384655220628296e-05, "loss": 2.481, "step": 7139 }, { "epoch": 0.8187134502923976, "grad_norm": 0.25384640230990885, "learning_rate": 8.374364721398015e-05, "loss": 2.3056, "step": 7140 }, { "epoch": 0.8188281160417383, "grad_norm": 0.29496457373132967, "learning_rate": 8.364079963630644e-05, "loss": 2.3623, "step": 7141 }, { "epoch": 0.818942781791079, "grad_norm": 0.25549648465115943, "learning_rate": 8.353800948744794e-05, "loss": 2.327, "step": 7142 }, { "epoch": 0.8190574475404196, "grad_norm": 0.2665849365789555, "learning_rate": 8.343527678158252e-05, "loss": 2.4726, "step": 7143 }, { "epoch": 0.8191721132897604, "grad_norm": 0.2401832604274856, "learning_rate": 8.333260153288025e-05, "loss": 2.4543, "step": 7144 }, { "epoch": 0.819286779039101, "grad_norm": 0.2505407988253365, "learning_rate": 8.322998375550316e-05, "loss": 2.3091, "step": 7145 }, { "epoch": 0.8194014447884417, "grad_norm": 0.24556674004304613, "learning_rate": 8.312742346360558e-05, "loss": 2.383, "step": 7146 }, { "epoch": 0.8195161105377824, "grad_norm": 0.2654076651511823, "learning_rate": 8.302492067133349e-05, "loss": 2.5243, "step": 7147 }, { "epoch": 0.819630776287123, "grad_norm": 0.2586343692827218, "learning_rate": 8.292247539282522e-05, "loss": 2.5485, "step": 7148 }, { "epoch": 0.8197454420364637, "grad_norm": 0.2492835164469896, "learning_rate": 8.282008764221149e-05, "loss": 2.4998, "step": 7149 }, { "epoch": 0.8198601077858044, "grad_norm": 0.26311181205565626, "learning_rate": 8.271775743361443e-05, "loss": 2.3439, "step": 7150 }, { "epoch": 0.819974773535145, "grad_norm": 0.27589743830621305, "learning_rate": 8.261548478114855e-05, "loss": 2.4721, "step": 7151 }, { "epoch": 0.8200894392844857, "grad_norm": 0.233550982589699, "learning_rate": 8.251326969892059e-05, "loss": 2.2327, "step": 7152 }, { "epoch": 0.8202041050338263, "grad_norm": 0.24329465127089503, "learning_rate": 8.241111220102882e-05, "loss": 2.4646, "step": 7153 }, { "epoch": 0.8203187707831671, "grad_norm": 0.26338360481784595, "learning_rate": 8.23090123015643e-05, "loss": 2.3021, "step": 7154 }, { "epoch": 0.8204334365325078, "grad_norm": 0.2566252070336107, "learning_rate": 8.22069700146097e-05, "loss": 2.3193, "step": 7155 }, { "epoch": 0.8205481022818484, "grad_norm": 0.270564754020459, "learning_rate": 8.210498535423961e-05, "loss": 2.4127, "step": 7156 }, { "epoch": 0.8206627680311891, "grad_norm": 0.25799502038068656, "learning_rate": 8.200305833452099e-05, "loss": 2.5109, "step": 7157 }, { "epoch": 0.8207774337805298, "grad_norm": 0.27454837905225354, "learning_rate": 8.190118896951271e-05, "loss": 2.3174, "step": 7158 }, { "epoch": 0.8208920995298704, "grad_norm": 0.25435873418816374, "learning_rate": 8.179937727326575e-05, "loss": 2.4281, "step": 7159 }, { "epoch": 0.8210067652792111, "grad_norm": 0.2646981899494223, "learning_rate": 8.16976232598231e-05, "loss": 2.4499, "step": 7160 }, { "epoch": 0.8211214310285517, "grad_norm": 0.2539208233098313, "learning_rate": 8.159592694321982e-05, "loss": 2.2882, "step": 7161 }, { "epoch": 0.8212360967778924, "grad_norm": 0.2690461692585868, "learning_rate": 8.149428833748279e-05, "loss": 2.5136, "step": 7162 }, { "epoch": 0.8213507625272332, "grad_norm": 0.22786345418460338, "learning_rate": 8.13927074566313e-05, "loss": 2.4348, "step": 7163 }, { "epoch": 0.8214654282765738, "grad_norm": 0.22456851914412992, "learning_rate": 8.129118431467636e-05, "loss": 2.436, "step": 7164 }, { "epoch": 0.8215800940259145, "grad_norm": 0.26636807528037715, "learning_rate": 8.118971892562128e-05, "loss": 2.332, "step": 7165 }, { "epoch": 0.8216947597752551, "grad_norm": 0.24944320363236863, "learning_rate": 8.108831130346117e-05, "loss": 2.3177, "step": 7166 }, { "epoch": 0.8218094255245958, "grad_norm": 0.2542661002335648, "learning_rate": 8.098696146218354e-05, "loss": 2.4709, "step": 7167 }, { "epoch": 0.8219240912739365, "grad_norm": 0.2625456967084732, "learning_rate": 8.088566941576714e-05, "loss": 2.3651, "step": 7168 }, { "epoch": 0.8220387570232771, "grad_norm": 0.2561120172775171, "learning_rate": 8.078443517818384e-05, "loss": 2.407, "step": 7169 }, { "epoch": 0.8221534227726178, "grad_norm": 0.25807589879116727, "learning_rate": 8.068325876339666e-05, "loss": 2.4868, "step": 7170 }, { "epoch": 0.8222680885219585, "grad_norm": 0.24191543879822863, "learning_rate": 8.058214018536104e-05, "loss": 2.4519, "step": 7171 }, { "epoch": 0.8223827542712991, "grad_norm": 0.23565348805849098, "learning_rate": 8.04810794580243e-05, "loss": 2.27, "step": 7172 }, { "epoch": 0.8224974200206399, "grad_norm": 0.23200899431052055, "learning_rate": 8.038007659532587e-05, "loss": 2.5146, "step": 7173 }, { "epoch": 0.8226120857699805, "grad_norm": 0.2501445489851966, "learning_rate": 8.027913161119726e-05, "loss": 2.566, "step": 7174 }, { "epoch": 0.8227267515193212, "grad_norm": 0.2535352682465857, "learning_rate": 8.017824451956191e-05, "loss": 2.3483, "step": 7175 }, { "epoch": 0.8228414172686619, "grad_norm": 0.2399969547449492, "learning_rate": 8.00774153343351e-05, "loss": 2.403, "step": 7176 }, { "epoch": 0.8229560830180025, "grad_norm": 0.2718698146732127, "learning_rate": 7.99766440694244e-05, "loss": 2.2132, "step": 7177 }, { "epoch": 0.8230707487673432, "grad_norm": 0.24650301176187286, "learning_rate": 7.987593073872923e-05, "loss": 2.4001, "step": 7178 }, { "epoch": 0.8231854145166838, "grad_norm": 0.2683963185710929, "learning_rate": 7.977527535614116e-05, "loss": 2.2794, "step": 7179 }, { "epoch": 0.8233000802660245, "grad_norm": 0.2708995664577427, "learning_rate": 7.967467793554361e-05, "loss": 2.3415, "step": 7180 }, { "epoch": 0.8234147460153652, "grad_norm": 0.2515790197110842, "learning_rate": 7.95741384908123e-05, "loss": 2.498, "step": 7181 }, { "epoch": 0.8235294117647058, "grad_norm": 0.24202869051115636, "learning_rate": 7.947365703581432e-05, "loss": 2.4365, "step": 7182 }, { "epoch": 0.8236440775140466, "grad_norm": 0.24264061029127898, "learning_rate": 7.937323358440934e-05, "loss": 2.3535, "step": 7183 }, { "epoch": 0.8237587432633873, "grad_norm": 0.25760007293484516, "learning_rate": 7.927286815044915e-05, "loss": 2.24, "step": 7184 }, { "epoch": 0.8238734090127279, "grad_norm": 0.2594058159413161, "learning_rate": 7.91725607477769e-05, "loss": 2.4142, "step": 7185 }, { "epoch": 0.8239880747620686, "grad_norm": 0.24630730261397657, "learning_rate": 7.907231139022814e-05, "loss": 2.4586, "step": 7186 }, { "epoch": 0.8241027405114092, "grad_norm": 0.2760166702430448, "learning_rate": 7.897212009163057e-05, "loss": 2.4808, "step": 7187 }, { "epoch": 0.8242174062607499, "grad_norm": 0.2465036927951378, "learning_rate": 7.887198686580327e-05, "loss": 2.4958, "step": 7188 }, { "epoch": 0.8243320720100906, "grad_norm": 0.2658216266709044, "learning_rate": 7.877191172655807e-05, "loss": 2.4915, "step": 7189 }, { "epoch": 0.8244467377594312, "grad_norm": 0.2433147344105137, "learning_rate": 7.867189468769842e-05, "loss": 2.3368, "step": 7190 }, { "epoch": 0.8245614035087719, "grad_norm": 0.2652262281953106, "learning_rate": 7.857193576301953e-05, "loss": 2.5328, "step": 7191 }, { "epoch": 0.8246760692581127, "grad_norm": 0.2362629446715802, "learning_rate": 7.847203496630895e-05, "loss": 2.4088, "step": 7192 }, { "epoch": 0.8247907350074533, "grad_norm": 0.2540998828739804, "learning_rate": 7.837219231134613e-05, "loss": 2.2888, "step": 7193 }, { "epoch": 0.824905400756794, "grad_norm": 0.2723361365359184, "learning_rate": 7.827240781190237e-05, "loss": 2.5134, "step": 7194 }, { "epoch": 0.8250200665061346, "grad_norm": 0.28267377765310747, "learning_rate": 7.817268148174111e-05, "loss": 2.5182, "step": 7195 }, { "epoch": 0.8251347322554753, "grad_norm": 0.24393442745004826, "learning_rate": 7.80730133346178e-05, "loss": 2.2889, "step": 7196 }, { "epoch": 0.825249398004816, "grad_norm": 0.2347990545299825, "learning_rate": 7.797340338427955e-05, "loss": 2.339, "step": 7197 }, { "epoch": 0.8253640637541566, "grad_norm": 0.2760651031431398, "learning_rate": 7.787385164446559e-05, "loss": 2.3882, "step": 7198 }, { "epoch": 0.8254787295034973, "grad_norm": 0.24786390848628995, "learning_rate": 7.77743581289076e-05, "loss": 2.2813, "step": 7199 }, { "epoch": 0.8255933952528379, "grad_norm": 0.25148135423263224, "learning_rate": 7.767492285132844e-05, "loss": 2.4449, "step": 7200 }, { "epoch": 0.8257080610021786, "grad_norm": 0.24763676702980145, "learning_rate": 7.757554582544341e-05, "loss": 2.3728, "step": 7201 }, { "epoch": 0.8258227267515194, "grad_norm": 0.24976633216874178, "learning_rate": 7.747622706495983e-05, "loss": 2.4171, "step": 7202 }, { "epoch": 0.82593739250086, "grad_norm": 0.2392046371267394, "learning_rate": 7.73769665835764e-05, "loss": 2.3823, "step": 7203 }, { "epoch": 0.8260520582502007, "grad_norm": 0.28685577705728543, "learning_rate": 7.727776439498463e-05, "loss": 2.38, "step": 7204 }, { "epoch": 0.8261667239995414, "grad_norm": 0.2513136031169034, "learning_rate": 7.717862051286761e-05, "loss": 2.3509, "step": 7205 }, { "epoch": 0.826281389748882, "grad_norm": 0.26066376689976095, "learning_rate": 7.707953495090004e-05, "loss": 2.4258, "step": 7206 }, { "epoch": 0.8263960554982227, "grad_norm": 0.2436961269898676, "learning_rate": 7.6980507722749e-05, "loss": 2.4257, "step": 7207 }, { "epoch": 0.8265107212475633, "grad_norm": 0.24391059271418528, "learning_rate": 7.68815388420735e-05, "loss": 2.3175, "step": 7208 }, { "epoch": 0.826625386996904, "grad_norm": 0.2712509630415017, "learning_rate": 7.678262832252431e-05, "loss": 2.3714, "step": 7209 }, { "epoch": 0.8267400527462447, "grad_norm": 0.2780572645754265, "learning_rate": 7.668377617774424e-05, "loss": 2.3674, "step": 7210 }, { "epoch": 0.8268547184955853, "grad_norm": 0.3202583144289909, "learning_rate": 7.658498242136824e-05, "loss": 2.3247, "step": 7211 }, { "epoch": 0.826969384244926, "grad_norm": 0.26791707902707, "learning_rate": 7.648624706702285e-05, "loss": 2.3877, "step": 7212 }, { "epoch": 0.8270840499942668, "grad_norm": 0.28624905373613635, "learning_rate": 7.63875701283267e-05, "loss": 2.4451, "step": 7213 }, { "epoch": 0.8271987157436074, "grad_norm": 0.2656631069633248, "learning_rate": 7.628895161889048e-05, "loss": 2.4496, "step": 7214 }, { "epoch": 0.8273133814929481, "grad_norm": 0.2500916687785805, "learning_rate": 7.619039155231672e-05, "loss": 2.4095, "step": 7215 }, { "epoch": 0.8274280472422887, "grad_norm": 0.2588078405510094, "learning_rate": 7.60918899421999e-05, "loss": 2.4207, "step": 7216 }, { "epoch": 0.8275427129916294, "grad_norm": 0.2829804167681859, "learning_rate": 7.599344680212655e-05, "loss": 2.3761, "step": 7217 }, { "epoch": 0.8276573787409701, "grad_norm": 0.2714230677983044, "learning_rate": 7.589506214567465e-05, "loss": 2.4679, "step": 7218 }, { "epoch": 0.8277720444903107, "grad_norm": 0.2716022097493529, "learning_rate": 7.5796735986415e-05, "loss": 2.3279, "step": 7219 }, { "epoch": 0.8278867102396514, "grad_norm": 0.2772807912380061, "learning_rate": 7.56984683379095e-05, "loss": 2.4198, "step": 7220 }, { "epoch": 0.828001375988992, "grad_norm": 0.24843591433734002, "learning_rate": 7.560025921371232e-05, "loss": 2.4135, "step": 7221 }, { "epoch": 0.8281160417383328, "grad_norm": 0.2513581266651729, "learning_rate": 7.550210862736961e-05, "loss": 2.3976, "step": 7222 }, { "epoch": 0.8282307074876735, "grad_norm": 0.24577162491321267, "learning_rate": 7.540401659241936e-05, "loss": 2.4007, "step": 7223 }, { "epoch": 0.8283453732370141, "grad_norm": 0.27997173488984356, "learning_rate": 7.530598312239145e-05, "loss": 2.383, "step": 7224 }, { "epoch": 0.8284600389863548, "grad_norm": 0.24539548333258915, "learning_rate": 7.520800823080792e-05, "loss": 2.488, "step": 7225 }, { "epoch": 0.8285747047356955, "grad_norm": 0.26838255800805433, "learning_rate": 7.511009193118223e-05, "loss": 2.4194, "step": 7226 }, { "epoch": 0.8286893704850361, "grad_norm": 0.2634384952414978, "learning_rate": 7.501223423702025e-05, "loss": 2.4499, "step": 7227 }, { "epoch": 0.8288040362343768, "grad_norm": 0.2396692024773577, "learning_rate": 7.491443516181951e-05, "loss": 2.3293, "step": 7228 }, { "epoch": 0.8289187019837174, "grad_norm": 0.2662685080998943, "learning_rate": 7.48166947190696e-05, "loss": 2.47, "step": 7229 }, { "epoch": 0.8290333677330581, "grad_norm": 0.2449644219170579, "learning_rate": 7.471901292225197e-05, "loss": 2.4226, "step": 7230 }, { "epoch": 0.8291480334823988, "grad_norm": 0.26497573512014916, "learning_rate": 7.462138978483996e-05, "loss": 2.4894, "step": 7231 }, { "epoch": 0.8292626992317395, "grad_norm": 0.26858564074668034, "learning_rate": 7.452382532029867e-05, "loss": 2.2864, "step": 7232 }, { "epoch": 0.8293773649810802, "grad_norm": 0.278777023164981, "learning_rate": 7.442631954208529e-05, "loss": 2.48, "step": 7233 }, { "epoch": 0.8294920307304208, "grad_norm": 0.28443270161861384, "learning_rate": 7.432887246364911e-05, "loss": 2.3328, "step": 7234 }, { "epoch": 0.8296066964797615, "grad_norm": 0.28633282641630514, "learning_rate": 7.423148409843089e-05, "loss": 2.3754, "step": 7235 }, { "epoch": 0.8297213622291022, "grad_norm": 0.2502414400694604, "learning_rate": 7.413415445986355e-05, "loss": 2.3553, "step": 7236 }, { "epoch": 0.8298360279784428, "grad_norm": 0.2743945045142924, "learning_rate": 7.403688356137194e-05, "loss": 2.4973, "step": 7237 }, { "epoch": 0.8299506937277835, "grad_norm": 0.26592234998722886, "learning_rate": 7.393967141637248e-05, "loss": 2.4752, "step": 7238 }, { "epoch": 0.8300653594771242, "grad_norm": 0.2564211960408765, "learning_rate": 7.384251803827397e-05, "loss": 2.5098, "step": 7239 }, { "epoch": 0.8301800252264648, "grad_norm": 0.26588368484504593, "learning_rate": 7.374542344047692e-05, "loss": 2.3298, "step": 7240 }, { "epoch": 0.8302946909758055, "grad_norm": 0.2650345696005376, "learning_rate": 7.364838763637344e-05, "loss": 2.2663, "step": 7241 }, { "epoch": 0.8304093567251462, "grad_norm": 0.24417553992010202, "learning_rate": 7.35514106393479e-05, "loss": 2.3542, "step": 7242 }, { "epoch": 0.8305240224744869, "grad_norm": 0.23503875689848353, "learning_rate": 7.345449246277636e-05, "loss": 2.4518, "step": 7243 }, { "epoch": 0.8306386882238276, "grad_norm": 0.2696371879271095, "learning_rate": 7.335763312002691e-05, "loss": 2.3158, "step": 7244 }, { "epoch": 0.8307533539731682, "grad_norm": 0.2508278778191043, "learning_rate": 7.326083262445942e-05, "loss": 2.4071, "step": 7245 }, { "epoch": 0.8308680197225089, "grad_norm": 0.25406213633540037, "learning_rate": 7.316409098942584e-05, "loss": 2.3312, "step": 7246 }, { "epoch": 0.8309826854718496, "grad_norm": 0.24449508957366375, "learning_rate": 7.306740822826957e-05, "loss": 2.291, "step": 7247 }, { "epoch": 0.8310973512211902, "grad_norm": 0.27581977431894283, "learning_rate": 7.297078435432608e-05, "loss": 2.5688, "step": 7248 }, { "epoch": 0.8312120169705309, "grad_norm": 0.26075882817435575, "learning_rate": 7.287421938092325e-05, "loss": 2.428, "step": 7249 }, { "epoch": 0.8313266827198715, "grad_norm": 0.24713561039219137, "learning_rate": 7.277771332137995e-05, "loss": 2.3161, "step": 7250 }, { "epoch": 0.8314413484692122, "grad_norm": 0.28519544977886985, "learning_rate": 7.26812661890075e-05, "loss": 2.4297, "step": 7251 }, { "epoch": 0.831556014218553, "grad_norm": 0.2616099370264878, "learning_rate": 7.258487799710911e-05, "loss": 2.4194, "step": 7252 }, { "epoch": 0.8316706799678936, "grad_norm": 0.29114726733632806, "learning_rate": 7.248854875897926e-05, "loss": 2.4057, "step": 7253 }, { "epoch": 0.8317853457172343, "grad_norm": 0.22480701367658262, "learning_rate": 7.239227848790508e-05, "loss": 2.4236, "step": 7254 }, { "epoch": 0.8319000114665749, "grad_norm": 0.23093442136998926, "learning_rate": 7.22960671971653e-05, "loss": 2.3343, "step": 7255 }, { "epoch": 0.8320146772159156, "grad_norm": 0.2844865147191501, "learning_rate": 7.219991490003014e-05, "loss": 2.3658, "step": 7256 }, { "epoch": 0.8321293429652563, "grad_norm": 0.23316307431744435, "learning_rate": 7.210382160976209e-05, "loss": 2.4953, "step": 7257 }, { "epoch": 0.8322440087145969, "grad_norm": 0.27233437079066286, "learning_rate": 7.200778733961544e-05, "loss": 2.46, "step": 7258 }, { "epoch": 0.8323586744639376, "grad_norm": 0.24225435306764317, "learning_rate": 7.191181210283626e-05, "loss": 2.367, "step": 7259 }, { "epoch": 0.8324733402132783, "grad_norm": 0.26344696541372714, "learning_rate": 7.181589591266246e-05, "loss": 2.4097, "step": 7260 }, { "epoch": 0.832588005962619, "grad_norm": 0.26866560268991174, "learning_rate": 7.172003878232403e-05, "loss": 2.4727, "step": 7261 }, { "epoch": 0.8327026717119597, "grad_norm": 0.2531057048044504, "learning_rate": 7.162424072504236e-05, "loss": 2.3087, "step": 7262 }, { "epoch": 0.8328173374613003, "grad_norm": 0.2906871180689149, "learning_rate": 7.152850175403109e-05, "loss": 2.4627, "step": 7263 }, { "epoch": 0.832932003210641, "grad_norm": 0.25555493878835456, "learning_rate": 7.143282188249562e-05, "loss": 2.3759, "step": 7264 }, { "epoch": 0.8330466689599817, "grad_norm": 0.2660521706000028, "learning_rate": 7.133720112363313e-05, "loss": 2.4182, "step": 7265 }, { "epoch": 0.8331613347093223, "grad_norm": 0.28004182864190275, "learning_rate": 7.124163949063267e-05, "loss": 2.4137, "step": 7266 }, { "epoch": 0.833276000458663, "grad_norm": 0.24903384072780274, "learning_rate": 7.114613699667527e-05, "loss": 2.3767, "step": 7267 }, { "epoch": 0.8333906662080036, "grad_norm": 0.23752177820548975, "learning_rate": 7.105069365493338e-05, "loss": 2.3715, "step": 7268 }, { "epoch": 0.8335053319573443, "grad_norm": 0.24603997173608957, "learning_rate": 7.095530947857199e-05, "loss": 2.2875, "step": 7269 }, { "epoch": 0.833619997706685, "grad_norm": 0.27297910269369585, "learning_rate": 7.08599844807472e-05, "loss": 2.472, "step": 7270 }, { "epoch": 0.8337346634560256, "grad_norm": 0.26005650102857897, "learning_rate": 7.076471867460743e-05, "loss": 2.4577, "step": 7271 }, { "epoch": 0.8338493292053664, "grad_norm": 0.25524278375618115, "learning_rate": 7.066951207329276e-05, "loss": 2.4469, "step": 7272 }, { "epoch": 0.8339639949547071, "grad_norm": 0.2782497377626116, "learning_rate": 7.057436468993505e-05, "loss": 2.4549, "step": 7273 }, { "epoch": 0.8340786607040477, "grad_norm": 0.26019365486685053, "learning_rate": 7.047927653765817e-05, "loss": 2.2951, "step": 7274 }, { "epoch": 0.8341933264533884, "grad_norm": 0.24025601853475834, "learning_rate": 7.03842476295778e-05, "loss": 2.3485, "step": 7275 }, { "epoch": 0.834307992202729, "grad_norm": 0.24123276404205093, "learning_rate": 7.028927797880114e-05, "loss": 2.3945, "step": 7276 }, { "epoch": 0.8344226579520697, "grad_norm": 0.2508800247432839, "learning_rate": 7.019436759842757e-05, "loss": 2.3946, "step": 7277 }, { "epoch": 0.8345373237014104, "grad_norm": 0.24428499995314762, "learning_rate": 7.009951650154811e-05, "loss": 2.3261, "step": 7278 }, { "epoch": 0.834651989450751, "grad_norm": 0.2718217213821309, "learning_rate": 7.00047247012457e-05, "loss": 2.5009, "step": 7279 }, { "epoch": 0.8347666552000917, "grad_norm": 0.26977900984501996, "learning_rate": 6.990999221059507e-05, "loss": 2.3587, "step": 7280 }, { "epoch": 0.8348813209494325, "grad_norm": 0.24865884789957596, "learning_rate": 6.981531904266286e-05, "loss": 2.36, "step": 7281 }, { "epoch": 0.8349959866987731, "grad_norm": 0.27179563907116217, "learning_rate": 6.972070521050722e-05, "loss": 2.4209, "step": 7282 }, { "epoch": 0.8351106524481138, "grad_norm": 0.2950936695707222, "learning_rate": 6.962615072717831e-05, "loss": 2.3474, "step": 7283 }, { "epoch": 0.8352253181974544, "grad_norm": 0.2404068639289652, "learning_rate": 6.95316556057185e-05, "loss": 2.4528, "step": 7284 }, { "epoch": 0.8353399839467951, "grad_norm": 0.2551203600407414, "learning_rate": 6.943721985916113e-05, "loss": 2.4323, "step": 7285 }, { "epoch": 0.8354546496961358, "grad_norm": 0.26486004366700383, "learning_rate": 6.93428435005321e-05, "loss": 2.3465, "step": 7286 }, { "epoch": 0.8355693154454764, "grad_norm": 0.24235792262936767, "learning_rate": 6.92485265428488e-05, "loss": 2.3695, "step": 7287 }, { "epoch": 0.8356839811948171, "grad_norm": 0.27733897004089325, "learning_rate": 6.915426899912013e-05, "loss": 2.4402, "step": 7288 }, { "epoch": 0.8357986469441577, "grad_norm": 0.24963678669127454, "learning_rate": 6.906007088234756e-05, "loss": 2.4752, "step": 7289 }, { "epoch": 0.8359133126934984, "grad_norm": 0.24232553263118228, "learning_rate": 6.896593220552383e-05, "loss": 2.3814, "step": 7290 }, { "epoch": 0.8360279784428392, "grad_norm": 0.2491660624907511, "learning_rate": 6.88718529816334e-05, "loss": 2.3512, "step": 7291 }, { "epoch": 0.8361426441921798, "grad_norm": 0.25502406516151066, "learning_rate": 6.877783322365283e-05, "loss": 2.3183, "step": 7292 }, { "epoch": 0.8362573099415205, "grad_norm": 0.24969230662069572, "learning_rate": 6.868387294455026e-05, "loss": 2.358, "step": 7293 }, { "epoch": 0.8363719756908612, "grad_norm": 0.25759095080743494, "learning_rate": 6.85899721572858e-05, "loss": 2.5223, "step": 7294 }, { "epoch": 0.8364866414402018, "grad_norm": 0.2699824723129227, "learning_rate": 6.849613087481126e-05, "loss": 2.36, "step": 7295 }, { "epoch": 0.8366013071895425, "grad_norm": 0.24788418939363163, "learning_rate": 6.840234911007043e-05, "loss": 2.5134, "step": 7296 }, { "epoch": 0.8367159729388831, "grad_norm": 0.2513732101988989, "learning_rate": 6.830862687599837e-05, "loss": 2.2801, "step": 7297 }, { "epoch": 0.8368306386882238, "grad_norm": 0.23198748868272304, "learning_rate": 6.821496418552231e-05, "loss": 2.4725, "step": 7298 }, { "epoch": 0.8369453044375645, "grad_norm": 0.263116211005387, "learning_rate": 6.812136105156163e-05, "loss": 2.4326, "step": 7299 }, { "epoch": 0.8370599701869051, "grad_norm": 0.23793405248878913, "learning_rate": 6.802781748702674e-05, "loss": 2.4031, "step": 7300 }, { "epoch": 0.8371746359362459, "grad_norm": 0.273780723537298, "learning_rate": 6.793433350482025e-05, "loss": 2.3825, "step": 7301 }, { "epoch": 0.8372893016855865, "grad_norm": 0.2308942464101055, "learning_rate": 6.784090911783664e-05, "loss": 2.3472, "step": 7302 }, { "epoch": 0.8374039674349272, "grad_norm": 0.2531369834290967, "learning_rate": 6.774754433896174e-05, "loss": 2.3137, "step": 7303 }, { "epoch": 0.8375186331842679, "grad_norm": 0.27257079247329097, "learning_rate": 6.765423918107372e-05, "loss": 2.3903, "step": 7304 }, { "epoch": 0.8376332989336085, "grad_norm": 0.24456377464439655, "learning_rate": 6.756099365704221e-05, "loss": 2.4615, "step": 7305 }, { "epoch": 0.8377479646829492, "grad_norm": 0.27576612640762316, "learning_rate": 6.746780777972849e-05, "loss": 2.3409, "step": 7306 }, { "epoch": 0.8378626304322899, "grad_norm": 0.24949247309246808, "learning_rate": 6.73746815619859e-05, "loss": 2.4692, "step": 7307 }, { "epoch": 0.8379772961816305, "grad_norm": 0.26647716159428586, "learning_rate": 6.728161501665942e-05, "loss": 2.482, "step": 7308 }, { "epoch": 0.8380919619309712, "grad_norm": 0.2637306942825546, "learning_rate": 6.718860815658573e-05, "loss": 2.4026, "step": 7309 }, { "epoch": 0.8382066276803118, "grad_norm": 0.24756735286244275, "learning_rate": 6.70956609945934e-05, "loss": 2.337, "step": 7310 }, { "epoch": 0.8383212934296526, "grad_norm": 0.24668977270785789, "learning_rate": 6.700277354350287e-05, "loss": 2.5056, "step": 7311 }, { "epoch": 0.8384359591789933, "grad_norm": 0.2388337656994437, "learning_rate": 6.690994581612597e-05, "loss": 2.2893, "step": 7312 }, { "epoch": 0.8385506249283339, "grad_norm": 0.3004076562737932, "learning_rate": 6.681717782526659e-05, "loss": 2.5131, "step": 7313 }, { "epoch": 0.8386652906776746, "grad_norm": 0.25350005487696986, "learning_rate": 6.672446958372025e-05, "loss": 2.2998, "step": 7314 }, { "epoch": 0.8387799564270153, "grad_norm": 0.28073604632860066, "learning_rate": 6.663182110427441e-05, "loss": 2.5, "step": 7315 }, { "epoch": 0.8388946221763559, "grad_norm": 0.24501884135959578, "learning_rate": 6.65392323997081e-05, "loss": 2.278, "step": 7316 }, { "epoch": 0.8390092879256966, "grad_norm": 0.256193076309501, "learning_rate": 6.644670348279225e-05, "loss": 2.3912, "step": 7317 }, { "epoch": 0.8391239536750372, "grad_norm": 0.2573664545228904, "learning_rate": 6.635423436628917e-05, "loss": 2.3905, "step": 7318 }, { "epoch": 0.8392386194243779, "grad_norm": 0.2570706676683893, "learning_rate": 6.626182506295363e-05, "loss": 2.3977, "step": 7319 }, { "epoch": 0.8393532851737187, "grad_norm": 0.2890941402018045, "learning_rate": 6.616947558553137e-05, "loss": 2.4346, "step": 7320 }, { "epoch": 0.8394679509230593, "grad_norm": 0.25485391141520813, "learning_rate": 6.607718594676048e-05, "loss": 2.2933, "step": 7321 }, { "epoch": 0.8395826166724, "grad_norm": 0.2577630225856674, "learning_rate": 6.598495615937039e-05, "loss": 2.3925, "step": 7322 }, { "epoch": 0.8396972824217406, "grad_norm": 0.2505373404336989, "learning_rate": 6.589278623608259e-05, "loss": 2.3808, "step": 7323 }, { "epoch": 0.8398119481710813, "grad_norm": 0.25458866578444955, "learning_rate": 6.580067618961006e-05, "loss": 2.3797, "step": 7324 }, { "epoch": 0.839926613920422, "grad_norm": 0.26753147989259257, "learning_rate": 6.570862603265771e-05, "loss": 2.458, "step": 7325 }, { "epoch": 0.8400412796697626, "grad_norm": 0.24254139339112835, "learning_rate": 6.561663577792198e-05, "loss": 2.3326, "step": 7326 }, { "epoch": 0.8401559454191033, "grad_norm": 0.2859343549058705, "learning_rate": 6.55247054380913e-05, "loss": 2.4524, "step": 7327 }, { "epoch": 0.840270611168444, "grad_norm": 0.24524962378953882, "learning_rate": 6.543283502584557e-05, "loss": 2.2975, "step": 7328 }, { "epoch": 0.8403852769177846, "grad_norm": 0.28286575223812377, "learning_rate": 6.53410245538567e-05, "loss": 2.4589, "step": 7329 }, { "epoch": 0.8404999426671254, "grad_norm": 0.27158284475979777, "learning_rate": 6.524927403478815e-05, "loss": 2.4286, "step": 7330 }, { "epoch": 0.840614608416466, "grad_norm": 0.2727525703568084, "learning_rate": 6.51575834812952e-05, "loss": 2.4674, "step": 7331 }, { "epoch": 0.8407292741658067, "grad_norm": 0.29112258870111296, "learning_rate": 6.506595290602468e-05, "loss": 2.5155, "step": 7332 }, { "epoch": 0.8408439399151474, "grad_norm": 0.25921260413305397, "learning_rate": 6.497438232161524e-05, "loss": 2.4807, "step": 7333 }, { "epoch": 0.840958605664488, "grad_norm": 0.24785690549810055, "learning_rate": 6.488287174069762e-05, "loss": 2.3841, "step": 7334 }, { "epoch": 0.8410732714138287, "grad_norm": 0.25456652507733446, "learning_rate": 6.47914211758937e-05, "loss": 2.4636, "step": 7335 }, { "epoch": 0.8411879371631693, "grad_norm": 0.25809020802231264, "learning_rate": 6.470003063981733e-05, "loss": 2.4407, "step": 7336 }, { "epoch": 0.84130260291251, "grad_norm": 0.24323508425158163, "learning_rate": 6.460870014507431e-05, "loss": 2.2582, "step": 7337 }, { "epoch": 0.8414172686618507, "grad_norm": 0.2588758204058675, "learning_rate": 6.451742970426161e-05, "loss": 2.4089, "step": 7338 }, { "epoch": 0.8415319344111913, "grad_norm": 0.27148679795716, "learning_rate": 6.44262193299685e-05, "loss": 2.4399, "step": 7339 }, { "epoch": 0.841646600160532, "grad_norm": 0.2657109929302001, "learning_rate": 6.433506903477571e-05, "loss": 2.4002, "step": 7340 }, { "epoch": 0.8417612659098728, "grad_norm": 0.2577284612194604, "learning_rate": 6.424397883125554e-05, "loss": 2.3419, "step": 7341 }, { "epoch": 0.8418759316592134, "grad_norm": 0.2550326540740129, "learning_rate": 6.415294873197225e-05, "loss": 2.2878, "step": 7342 }, { "epoch": 0.8419905974085541, "grad_norm": 0.23077940450244505, "learning_rate": 6.406197874948166e-05, "loss": 2.3087, "step": 7343 }, { "epoch": 0.8421052631578947, "grad_norm": 0.23822317447418903, "learning_rate": 6.397106889633136e-05, "loss": 2.334, "step": 7344 }, { "epoch": 0.8422199289072354, "grad_norm": 0.2625904096570945, "learning_rate": 6.388021918506065e-05, "loss": 2.4367, "step": 7345 }, { "epoch": 0.8423345946565761, "grad_norm": 0.2907368284328103, "learning_rate": 6.378942962820062e-05, "loss": 2.4346, "step": 7346 }, { "epoch": 0.8424492604059167, "grad_norm": 0.26491362284568687, "learning_rate": 6.369870023827374e-05, "loss": 2.4932, "step": 7347 }, { "epoch": 0.8425639261552574, "grad_norm": 0.25500391978179204, "learning_rate": 6.360803102779439e-05, "loss": 2.4485, "step": 7348 }, { "epoch": 0.8426785919045982, "grad_norm": 0.2614024775172327, "learning_rate": 6.351742200926897e-05, "loss": 2.582, "step": 7349 }, { "epoch": 0.8427932576539388, "grad_norm": 0.2474000174458802, "learning_rate": 6.342687319519497e-05, "loss": 2.3858, "step": 7350 }, { "epoch": 0.8429079234032795, "grad_norm": 0.2982661846619476, "learning_rate": 6.333638459806195e-05, "loss": 2.3497, "step": 7351 }, { "epoch": 0.8430225891526201, "grad_norm": 0.2715945361134651, "learning_rate": 6.324595623035123e-05, "loss": 2.4733, "step": 7352 }, { "epoch": 0.8431372549019608, "grad_norm": 0.2651693783731241, "learning_rate": 6.315558810453537e-05, "loss": 2.5103, "step": 7353 }, { "epoch": 0.8432519206513015, "grad_norm": 0.26166227820021287, "learning_rate": 6.306528023307922e-05, "loss": 2.296, "step": 7354 }, { "epoch": 0.8433665864006421, "grad_norm": 0.25578684843953703, "learning_rate": 6.297503262843901e-05, "loss": 2.4718, "step": 7355 }, { "epoch": 0.8434812521499828, "grad_norm": 0.26213480820403906, "learning_rate": 6.28848453030625e-05, "loss": 2.463, "step": 7356 }, { "epoch": 0.8435959178993234, "grad_norm": 0.24959556893097148, "learning_rate": 6.279471826938942e-05, "loss": 2.2073, "step": 7357 }, { "epoch": 0.8437105836486641, "grad_norm": 0.24258328351939681, "learning_rate": 6.270465153985106e-05, "loss": 2.2358, "step": 7358 }, { "epoch": 0.8438252493980049, "grad_norm": 0.24700953373465703, "learning_rate": 6.261464512687038e-05, "loss": 2.3821, "step": 7359 }, { "epoch": 0.8439399151473455, "grad_norm": 0.26305591558985625, "learning_rate": 6.252469904286212e-05, "loss": 2.2233, "step": 7360 }, { "epoch": 0.8440545808966862, "grad_norm": 0.2652830551624764, "learning_rate": 6.243481330023265e-05, "loss": 2.4158, "step": 7361 }, { "epoch": 0.8441692466460269, "grad_norm": 0.24539442068718118, "learning_rate": 6.234498791137988e-05, "loss": 2.4527, "step": 7362 }, { "epoch": 0.8442839123953675, "grad_norm": 0.2732018574619513, "learning_rate": 6.225522288869356e-05, "loss": 2.4278, "step": 7363 }, { "epoch": 0.8443985781447082, "grad_norm": 0.22877451667360732, "learning_rate": 6.216551824455502e-05, "loss": 2.5291, "step": 7364 }, { "epoch": 0.8445132438940488, "grad_norm": 0.2655325772150972, "learning_rate": 6.207587399133741e-05, "loss": 2.5398, "step": 7365 }, { "epoch": 0.8446279096433895, "grad_norm": 0.25428362933839443, "learning_rate": 6.19862901414054e-05, "loss": 2.3839, "step": 7366 }, { "epoch": 0.8447425753927302, "grad_norm": 0.26474752531660295, "learning_rate": 6.189676670711542e-05, "loss": 2.4153, "step": 7367 }, { "epoch": 0.8448572411420708, "grad_norm": 0.27551712502897546, "learning_rate": 6.180730370081521e-05, "loss": 2.5247, "step": 7368 }, { "epoch": 0.8449719068914116, "grad_norm": 0.24579649068025336, "learning_rate": 6.171790113484504e-05, "loss": 2.3655, "step": 7369 }, { "epoch": 0.8450865726407522, "grad_norm": 0.252975984238005, "learning_rate": 6.162855902153586e-05, "loss": 2.5797, "step": 7370 }, { "epoch": 0.8452012383900929, "grad_norm": 0.24513496386807695, "learning_rate": 6.153927737321086e-05, "loss": 2.3605, "step": 7371 }, { "epoch": 0.8453159041394336, "grad_norm": 0.2526966998419088, "learning_rate": 6.145005620218469e-05, "loss": 2.3886, "step": 7372 }, { "epoch": 0.8454305698887742, "grad_norm": 0.2577473517761556, "learning_rate": 6.13608955207638e-05, "loss": 2.4651, "step": 7373 }, { "epoch": 0.8455452356381149, "grad_norm": 0.21669635081659958, "learning_rate": 6.127179534124605e-05, "loss": 2.3285, "step": 7374 }, { "epoch": 0.8456599013874556, "grad_norm": 0.268442581182433, "learning_rate": 6.118275567592141e-05, "loss": 2.5963, "step": 7375 }, { "epoch": 0.8457745671367962, "grad_norm": 0.28499603876589635, "learning_rate": 6.109377653707087e-05, "loss": 2.2982, "step": 7376 }, { "epoch": 0.8458892328861369, "grad_norm": 0.27998811886631714, "learning_rate": 6.100485793696753e-05, "loss": 2.5727, "step": 7377 }, { "epoch": 0.8460038986354775, "grad_norm": 0.25688106333283234, "learning_rate": 6.091599988787605e-05, "loss": 2.3853, "step": 7378 }, { "epoch": 0.8461185643848183, "grad_norm": 0.30853933039733067, "learning_rate": 6.08272024020527e-05, "loss": 2.4761, "step": 7379 }, { "epoch": 0.846233230134159, "grad_norm": 0.2638574023641479, "learning_rate": 6.073846549174533e-05, "loss": 2.4848, "step": 7380 }, { "epoch": 0.8463478958834996, "grad_norm": 0.2365175073026522, "learning_rate": 6.0649789169193727e-05, "loss": 2.5048, "step": 7381 }, { "epoch": 0.8464625616328403, "grad_norm": 0.2424481589956357, "learning_rate": 6.056117344662876e-05, "loss": 2.3042, "step": 7382 }, { "epoch": 0.846577227382181, "grad_norm": 0.23348956570811055, "learning_rate": 6.047261833627338e-05, "loss": 2.4641, "step": 7383 }, { "epoch": 0.8466918931315216, "grad_norm": 0.2517947819576375, "learning_rate": 6.0384123850342366e-05, "loss": 2.3262, "step": 7384 }, { "epoch": 0.8468065588808623, "grad_norm": 0.2395713403821714, "learning_rate": 6.029569000104152e-05, "loss": 2.3195, "step": 7385 }, { "epoch": 0.8469212246302029, "grad_norm": 0.24399739858681516, "learning_rate": 6.0207316800568676e-05, "loss": 2.4086, "step": 7386 }, { "epoch": 0.8470358903795436, "grad_norm": 0.26589052433398574, "learning_rate": 6.0119004261113386e-05, "loss": 2.3383, "step": 7387 }, { "epoch": 0.8471505561288843, "grad_norm": 0.24639137050366963, "learning_rate": 6.003075239485639e-05, "loss": 2.3629, "step": 7388 }, { "epoch": 0.847265221878225, "grad_norm": 0.2560674614993338, "learning_rate": 5.994256121397057e-05, "loss": 2.4028, "step": 7389 }, { "epoch": 0.8473798876275657, "grad_norm": 0.2379773377532734, "learning_rate": 5.985443073062036e-05, "loss": 2.3552, "step": 7390 }, { "epoch": 0.8474945533769063, "grad_norm": 0.2802583155125992, "learning_rate": 5.9766360956961305e-05, "loss": 2.4047, "step": 7391 }, { "epoch": 0.847609219126247, "grad_norm": 0.2583178224968202, "learning_rate": 5.9678351905141204e-05, "loss": 2.3878, "step": 7392 }, { "epoch": 0.8477238848755877, "grad_norm": 0.2640310657779709, "learning_rate": 5.9590403587299116e-05, "loss": 2.3564, "step": 7393 }, { "epoch": 0.8478385506249283, "grad_norm": 0.30346044463945665, "learning_rate": 5.950251601556589e-05, "loss": 2.3779, "step": 7394 }, { "epoch": 0.847953216374269, "grad_norm": 0.28333153589238047, "learning_rate": 5.941468920206394e-05, "loss": 2.346, "step": 7395 }, { "epoch": 0.8480678821236097, "grad_norm": 0.26647318999969893, "learning_rate": 5.932692315890742e-05, "loss": 2.3383, "step": 7396 }, { "epoch": 0.8481825478729503, "grad_norm": 0.258954386770001, "learning_rate": 5.923921789820175e-05, "loss": 2.4828, "step": 7397 }, { "epoch": 0.848297213622291, "grad_norm": 0.2582813128197879, "learning_rate": 5.9151573432044195e-05, "loss": 2.3856, "step": 7398 }, { "epoch": 0.8484118793716316, "grad_norm": 0.28484231174216457, "learning_rate": 5.9063989772523917e-05, "loss": 2.4715, "step": 7399 }, { "epoch": 0.8485265451209724, "grad_norm": 0.24275835553791417, "learning_rate": 5.8976466931721206e-05, "loss": 2.5656, "step": 7400 }, { "epoch": 0.8486412108703131, "grad_norm": 0.3037462476854509, "learning_rate": 5.888900492170818e-05, "loss": 2.4641, "step": 7401 }, { "epoch": 0.8487558766196537, "grad_norm": 0.2940176942252436, "learning_rate": 5.8801603754548684e-05, "loss": 2.4044, "step": 7402 }, { "epoch": 0.8488705423689944, "grad_norm": 0.2685151020416385, "learning_rate": 5.871426344229769e-05, "loss": 2.2659, "step": 7403 }, { "epoch": 0.848985208118335, "grad_norm": 0.2550121924686993, "learning_rate": 5.8626983997002556e-05, "loss": 2.3505, "step": 7404 }, { "epoch": 0.8490998738676757, "grad_norm": 0.2521927640985913, "learning_rate": 5.853976543070172e-05, "loss": 2.4366, "step": 7405 }, { "epoch": 0.8492145396170164, "grad_norm": 0.2624617311168068, "learning_rate": 5.845260775542516e-05, "loss": 2.3559, "step": 7406 }, { "epoch": 0.849329205366357, "grad_norm": 0.2650708839251532, "learning_rate": 5.83655109831947e-05, "loss": 2.5392, "step": 7407 }, { "epoch": 0.8494438711156977, "grad_norm": 0.24888837987351464, "learning_rate": 5.8278475126023635e-05, "loss": 2.4695, "step": 7408 }, { "epoch": 0.8495585368650385, "grad_norm": 0.2361990338659879, "learning_rate": 5.819150019591701e-05, "loss": 2.3647, "step": 7409 }, { "epoch": 0.8496732026143791, "grad_norm": 0.24055793119594715, "learning_rate": 5.810458620487124e-05, "loss": 2.4647, "step": 7410 }, { "epoch": 0.8497878683637198, "grad_norm": 0.25066155961743153, "learning_rate": 5.801773316487463e-05, "loss": 2.3476, "step": 7411 }, { "epoch": 0.8499025341130604, "grad_norm": 0.2827528739259388, "learning_rate": 5.7930941087906695e-05, "loss": 2.3077, "step": 7412 }, { "epoch": 0.8500171998624011, "grad_norm": 0.24786308722520733, "learning_rate": 5.7844209985938865e-05, "loss": 2.3916, "step": 7413 }, { "epoch": 0.8501318656117418, "grad_norm": 0.2578890418696975, "learning_rate": 5.7757539870933964e-05, "loss": 2.4608, "step": 7414 }, { "epoch": 0.8502465313610824, "grad_norm": 0.27209706260619676, "learning_rate": 5.767093075484653e-05, "loss": 2.4006, "step": 7415 }, { "epoch": 0.8503611971104231, "grad_norm": 0.24406850873997418, "learning_rate": 5.758438264962268e-05, "loss": 2.2542, "step": 7416 }, { "epoch": 0.8504758628597638, "grad_norm": 0.26564558132033356, "learning_rate": 5.7497895567200085e-05, "loss": 2.2802, "step": 7417 }, { "epoch": 0.8505905286091044, "grad_norm": 0.24160028508585424, "learning_rate": 5.741146951950776e-05, "loss": 2.3906, "step": 7418 }, { "epoch": 0.8507051943584452, "grad_norm": 0.27690186611941386, "learning_rate": 5.73251045184669e-05, "loss": 2.3152, "step": 7419 }, { "epoch": 0.8508198601077858, "grad_norm": 0.24059730814558286, "learning_rate": 5.7238800575989626e-05, "loss": 2.3433, "step": 7420 }, { "epoch": 0.8509345258571265, "grad_norm": 0.2258590769848453, "learning_rate": 5.7152557703980034e-05, "loss": 2.3204, "step": 7421 }, { "epoch": 0.8510491916064672, "grad_norm": 0.31143347917557757, "learning_rate": 5.706637591433367e-05, "loss": 2.5194, "step": 7422 }, { "epoch": 0.8511638573558078, "grad_norm": 0.253613217850807, "learning_rate": 5.6980255218937625e-05, "loss": 2.4162, "step": 7423 }, { "epoch": 0.8512785231051485, "grad_norm": 0.24455910535803058, "learning_rate": 5.689419562967069e-05, "loss": 2.5173, "step": 7424 }, { "epoch": 0.8513931888544891, "grad_norm": 0.26079317055129914, "learning_rate": 5.6808197158403185e-05, "loss": 2.4076, "step": 7425 }, { "epoch": 0.8515078546038298, "grad_norm": 0.22975612616919897, "learning_rate": 5.6722259816996744e-05, "loss": 2.4616, "step": 7426 }, { "epoch": 0.8516225203531705, "grad_norm": 0.2630507239123343, "learning_rate": 5.6636383617305e-05, "loss": 2.3969, "step": 7427 }, { "epoch": 0.8517371861025111, "grad_norm": 0.2885157925201874, "learning_rate": 5.65505685711728e-05, "loss": 2.485, "step": 7428 }, { "epoch": 0.8518518518518519, "grad_norm": 0.247694619357967, "learning_rate": 5.646481469043674e-05, "loss": 2.3786, "step": 7429 }, { "epoch": 0.8519665176011926, "grad_norm": 0.2733639534439636, "learning_rate": 5.6379121986924966e-05, "loss": 2.374, "step": 7430 }, { "epoch": 0.8520811833505332, "grad_norm": 0.25048987916610066, "learning_rate": 5.629349047245724e-05, "loss": 2.4347, "step": 7431 }, { "epoch": 0.8521958490998739, "grad_norm": 0.246810369404724, "learning_rate": 5.620792015884457e-05, "loss": 2.3322, "step": 7432 }, { "epoch": 0.8523105148492145, "grad_norm": 0.22519279725078284, "learning_rate": 5.612241105788979e-05, "loss": 2.345, "step": 7433 }, { "epoch": 0.8524251805985552, "grad_norm": 0.2628826146842859, "learning_rate": 5.6036963181387516e-05, "loss": 2.4617, "step": 7434 }, { "epoch": 0.8525398463478959, "grad_norm": 0.25420169764311584, "learning_rate": 5.5951576541123315e-05, "loss": 2.2493, "step": 7435 }, { "epoch": 0.8526545120972365, "grad_norm": 0.2628101032101516, "learning_rate": 5.5866251148874894e-05, "loss": 2.4613, "step": 7436 }, { "epoch": 0.8527691778465772, "grad_norm": 0.2680625969096601, "learning_rate": 5.5780987016411214e-05, "loss": 2.392, "step": 7437 }, { "epoch": 0.8528838435959178, "grad_norm": 0.2609814039963656, "learning_rate": 5.569578415549259e-05, "loss": 2.3488, "step": 7438 }, { "epoch": 0.8529985093452586, "grad_norm": 0.28461357598423603, "learning_rate": 5.5610642577871404e-05, "loss": 2.3195, "step": 7439 }, { "epoch": 0.8531131750945993, "grad_norm": 0.24201554296547595, "learning_rate": 5.552556229529138e-05, "loss": 2.2454, "step": 7440 }, { "epoch": 0.8532278408439399, "grad_norm": 0.2537792738599434, "learning_rate": 5.54405433194875e-05, "loss": 2.3136, "step": 7441 }, { "epoch": 0.8533425065932806, "grad_norm": 0.2874682166144907, "learning_rate": 5.535558566218657e-05, "loss": 2.3575, "step": 7442 }, { "epoch": 0.8534571723426213, "grad_norm": 0.24938415360585353, "learning_rate": 5.527068933510687e-05, "loss": 2.3451, "step": 7443 }, { "epoch": 0.8535718380919619, "grad_norm": 0.24437905448870373, "learning_rate": 5.518585434995832e-05, "loss": 2.3803, "step": 7444 }, { "epoch": 0.8536865038413026, "grad_norm": 0.2887218110764142, "learning_rate": 5.510108071844222e-05, "loss": 2.3729, "step": 7445 }, { "epoch": 0.8538011695906432, "grad_norm": 0.2794849551525388, "learning_rate": 5.501636845225161e-05, "loss": 2.4006, "step": 7446 }, { "epoch": 0.8539158353399839, "grad_norm": 0.23054717174951853, "learning_rate": 5.4931717563070635e-05, "loss": 2.3783, "step": 7447 }, { "epoch": 0.8540305010893247, "grad_norm": 0.2543642470420606, "learning_rate": 5.4847128062575415e-05, "loss": 2.3824, "step": 7448 }, { "epoch": 0.8541451668386653, "grad_norm": 0.28237933690372635, "learning_rate": 5.476259996243371e-05, "loss": 2.3931, "step": 7449 }, { "epoch": 0.854259832588006, "grad_norm": 0.26227786293634653, "learning_rate": 5.467813327430421e-05, "loss": 2.3537, "step": 7450 }, { "epoch": 0.8543744983373467, "grad_norm": 0.2505258421378121, "learning_rate": 5.4593728009837585e-05, "loss": 2.2931, "step": 7451 }, { "epoch": 0.8544891640866873, "grad_norm": 0.2679298200199342, "learning_rate": 5.450938418067614e-05, "loss": 2.5281, "step": 7452 }, { "epoch": 0.854603829836028, "grad_norm": 0.25762434127657835, "learning_rate": 5.442510179845306e-05, "loss": 2.391, "step": 7453 }, { "epoch": 0.8547184955853686, "grad_norm": 0.2688359171292793, "learning_rate": 5.434088087479389e-05, "loss": 2.4066, "step": 7454 }, { "epoch": 0.8548331613347093, "grad_norm": 0.2768619120395305, "learning_rate": 5.425672142131527e-05, "loss": 2.3442, "step": 7455 }, { "epoch": 0.85494782708405, "grad_norm": 0.27156218318543174, "learning_rate": 5.4172623449625146e-05, "loss": 2.2954, "step": 7456 }, { "epoch": 0.8550624928333906, "grad_norm": 0.28099427245419717, "learning_rate": 5.408858697132341e-05, "loss": 2.5027, "step": 7457 }, { "epoch": 0.8551771585827314, "grad_norm": 0.23226589385877483, "learning_rate": 5.4004611998001274e-05, "loss": 2.3016, "step": 7458 }, { "epoch": 0.855291824332072, "grad_norm": 0.2490192815080716, "learning_rate": 5.392069854124143e-05, "loss": 2.322, "step": 7459 }, { "epoch": 0.8554064900814127, "grad_norm": 0.2788048067416016, "learning_rate": 5.383684661261823e-05, "loss": 2.4655, "step": 7460 }, { "epoch": 0.8555211558307534, "grad_norm": 0.24090559279598975, "learning_rate": 5.3753056223697415e-05, "loss": 2.4112, "step": 7461 }, { "epoch": 0.855635821580094, "grad_norm": 0.25805492020046117, "learning_rate": 5.366932738603619e-05, "loss": 2.376, "step": 7462 }, { "epoch": 0.8557504873294347, "grad_norm": 0.2858213764314826, "learning_rate": 5.358566011118343e-05, "loss": 2.2827, "step": 7463 }, { "epoch": 0.8558651530787754, "grad_norm": 0.2636881805331364, "learning_rate": 5.350205441067946e-05, "loss": 2.4616, "step": 7464 }, { "epoch": 0.855979818828116, "grad_norm": 0.2906053505063247, "learning_rate": 5.341851029605599e-05, "loss": 2.3821, "step": 7465 }, { "epoch": 0.8560944845774567, "grad_norm": 0.25978054088468305, "learning_rate": 5.333502777883642e-05, "loss": 2.3743, "step": 7466 }, { "epoch": 0.8562091503267973, "grad_norm": 0.267879057807956, "learning_rate": 5.3251606870535706e-05, "loss": 2.5273, "step": 7467 }, { "epoch": 0.8563238160761381, "grad_norm": 0.2503332371198279, "learning_rate": 5.3168247582659814e-05, "loss": 2.3541, "step": 7468 }, { "epoch": 0.8564384818254788, "grad_norm": 0.2823829438082458, "learning_rate": 5.3084949926706924e-05, "loss": 2.4142, "step": 7469 }, { "epoch": 0.8565531475748194, "grad_norm": 0.26504482018333186, "learning_rate": 5.3001713914166136e-05, "loss": 2.2558, "step": 7470 }, { "epoch": 0.8566678133241601, "grad_norm": 0.27265628064410286, "learning_rate": 5.291853955651837e-05, "loss": 2.3745, "step": 7471 }, { "epoch": 0.8567824790735007, "grad_norm": 0.2713372114241245, "learning_rate": 5.283542686523596e-05, "loss": 2.2901, "step": 7472 }, { "epoch": 0.8568971448228414, "grad_norm": 0.2374886649620647, "learning_rate": 5.2752375851782605e-05, "loss": 2.4342, "step": 7473 }, { "epoch": 0.8570118105721821, "grad_norm": 0.25916418214610326, "learning_rate": 5.266938652761377e-05, "loss": 2.2924, "step": 7474 }, { "epoch": 0.8571264763215227, "grad_norm": 0.2599817813794228, "learning_rate": 5.258645890417618e-05, "loss": 2.2489, "step": 7475 }, { "epoch": 0.8572411420708634, "grad_norm": 0.26527406015378374, "learning_rate": 5.2503592992908075e-05, "loss": 2.5143, "step": 7476 }, { "epoch": 0.8573558078202042, "grad_norm": 0.2290020556249911, "learning_rate": 5.242078880523921e-05, "loss": 2.4696, "step": 7477 }, { "epoch": 0.8574704735695448, "grad_norm": 0.2440494446577652, "learning_rate": 5.233804635259093e-05, "loss": 2.4292, "step": 7478 }, { "epoch": 0.8575851393188855, "grad_norm": 0.23967853343296489, "learning_rate": 5.2255365646375896e-05, "loss": 2.3563, "step": 7479 }, { "epoch": 0.8576998050682261, "grad_norm": 0.29740054177905095, "learning_rate": 5.217274669799843e-05, "loss": 2.3164, "step": 7480 }, { "epoch": 0.8578144708175668, "grad_norm": 0.2604555153857124, "learning_rate": 5.20901895188543e-05, "loss": 2.2748, "step": 7481 }, { "epoch": 0.8579291365669075, "grad_norm": 0.24850499433693082, "learning_rate": 5.200769412033046e-05, "loss": 2.3135, "step": 7482 }, { "epoch": 0.8580438023162481, "grad_norm": 0.26441925455814375, "learning_rate": 5.1925260513805584e-05, "loss": 2.308, "step": 7483 }, { "epoch": 0.8581584680655888, "grad_norm": 0.23873635506646962, "learning_rate": 5.1842888710650185e-05, "loss": 2.2353, "step": 7484 }, { "epoch": 0.8582731338149295, "grad_norm": 0.2396979253790572, "learning_rate": 5.17605787222255e-05, "loss": 2.2657, "step": 7485 }, { "epoch": 0.8583877995642701, "grad_norm": 0.25701420085020904, "learning_rate": 5.16783305598848e-05, "loss": 2.4428, "step": 7486 }, { "epoch": 0.8585024653136109, "grad_norm": 0.26423914598622267, "learning_rate": 5.1596144234972665e-05, "loss": 2.5353, "step": 7487 }, { "epoch": 0.8586171310629515, "grad_norm": 0.25885837728136946, "learning_rate": 5.1514019758824915e-05, "loss": 2.5074, "step": 7488 }, { "epoch": 0.8587317968122922, "grad_norm": 0.2686750134878246, "learning_rate": 5.143195714276927e-05, "loss": 2.3648, "step": 7489 }, { "epoch": 0.8588464625616329, "grad_norm": 0.26311816594345444, "learning_rate": 5.1349956398124785e-05, "loss": 2.3043, "step": 7490 }, { "epoch": 0.8589611283109735, "grad_norm": 0.24465547724357115, "learning_rate": 5.1268017536201696e-05, "loss": 2.4699, "step": 7491 }, { "epoch": 0.8590757940603142, "grad_norm": 0.27444670752145744, "learning_rate": 5.1186140568301954e-05, "loss": 2.4068, "step": 7492 }, { "epoch": 0.8591904598096548, "grad_norm": 0.27747680186814133, "learning_rate": 5.110432550571892e-05, "loss": 2.4593, "step": 7493 }, { "epoch": 0.8593051255589955, "grad_norm": 0.24249241700822985, "learning_rate": 5.102257235973751e-05, "loss": 2.3549, "step": 7494 }, { "epoch": 0.8594197913083362, "grad_norm": 0.2739970833093966, "learning_rate": 5.0940881141633924e-05, "loss": 2.3568, "step": 7495 }, { "epoch": 0.8595344570576768, "grad_norm": 0.249760384609764, "learning_rate": 5.0859251862676035e-05, "loss": 2.4051, "step": 7496 }, { "epoch": 0.8596491228070176, "grad_norm": 0.28462301526596834, "learning_rate": 5.0777684534122845e-05, "loss": 2.443, "step": 7497 }, { "epoch": 0.8597637885563583, "grad_norm": 0.27298531294680267, "learning_rate": 5.069617916722502e-05, "loss": 2.3805, "step": 7498 }, { "epoch": 0.8598784543056989, "grad_norm": 0.26235266054910095, "learning_rate": 5.0614735773224953e-05, "loss": 2.3039, "step": 7499 }, { "epoch": 0.8599931200550396, "grad_norm": 0.25426787830790465, "learning_rate": 5.053335436335593e-05, "loss": 2.3945, "step": 7500 }, { "epoch": 0.8601077858043802, "grad_norm": 0.25500449845568723, "learning_rate": 5.045203494884309e-05, "loss": 2.2948, "step": 7501 }, { "epoch": 0.8602224515537209, "grad_norm": 0.2582028649308595, "learning_rate": 5.037077754090297e-05, "loss": 2.3852, "step": 7502 }, { "epoch": 0.8603371173030616, "grad_norm": 0.26029502411623073, "learning_rate": 5.028958215074314e-05, "loss": 2.4357, "step": 7503 }, { "epoch": 0.8604517830524022, "grad_norm": 0.2566413785869189, "learning_rate": 5.0208448789563324e-05, "loss": 2.329, "step": 7504 }, { "epoch": 0.8605664488017429, "grad_norm": 0.39235457308815774, "learning_rate": 5.012737746855428e-05, "loss": 2.3778, "step": 7505 }, { "epoch": 0.8606811145510835, "grad_norm": 0.24477798601771983, "learning_rate": 5.0046368198898015e-05, "loss": 2.4195, "step": 7506 }, { "epoch": 0.8607957803004243, "grad_norm": 0.2643168138484971, "learning_rate": 4.996542099176843e-05, "loss": 2.3121, "step": 7507 }, { "epoch": 0.860910446049765, "grad_norm": 0.27102985402351976, "learning_rate": 4.9884535858330526e-05, "loss": 2.3153, "step": 7508 }, { "epoch": 0.8610251117991056, "grad_norm": 0.24416388518642204, "learning_rate": 4.9803712809740945e-05, "loss": 2.3029, "step": 7509 }, { "epoch": 0.8611397775484463, "grad_norm": 0.26104062757910423, "learning_rate": 4.972295185714765e-05, "loss": 2.3787, "step": 7510 }, { "epoch": 0.861254443297787, "grad_norm": 0.25481360665056685, "learning_rate": 4.964225301169023e-05, "loss": 2.3383, "step": 7511 }, { "epoch": 0.8613691090471276, "grad_norm": 0.23963738105396487, "learning_rate": 4.956161628449923e-05, "loss": 2.4837, "step": 7512 }, { "epoch": 0.8614837747964683, "grad_norm": 0.2419191317610368, "learning_rate": 4.94810416866972e-05, "loss": 2.4131, "step": 7513 }, { "epoch": 0.8615984405458089, "grad_norm": 0.21720650100970568, "learning_rate": 4.940052922939775e-05, "loss": 2.3352, "step": 7514 }, { "epoch": 0.8617131062951496, "grad_norm": 0.24116176726440833, "learning_rate": 4.93200789237061e-05, "loss": 2.358, "step": 7515 }, { "epoch": 0.8618277720444903, "grad_norm": 0.2309417338364269, "learning_rate": 4.923969078071883e-05, "loss": 2.2111, "step": 7516 }, { "epoch": 0.861942437793831, "grad_norm": 0.26427672734598406, "learning_rate": 4.9159364811524e-05, "loss": 2.3385, "step": 7517 }, { "epoch": 0.8620571035431717, "grad_norm": 0.24350230507605078, "learning_rate": 4.907910102720087e-05, "loss": 2.3563, "step": 7518 }, { "epoch": 0.8621717692925124, "grad_norm": 0.2862664891226002, "learning_rate": 4.899889943882052e-05, "loss": 2.5323, "step": 7519 }, { "epoch": 0.862286435041853, "grad_norm": 0.2829828106986698, "learning_rate": 4.891876005744511e-05, "loss": 2.471, "step": 7520 }, { "epoch": 0.8624011007911937, "grad_norm": 0.2798733317098286, "learning_rate": 4.883868289412835e-05, "loss": 2.4314, "step": 7521 }, { "epoch": 0.8625157665405343, "grad_norm": 0.25685300260632393, "learning_rate": 4.875866795991535e-05, "loss": 2.319, "step": 7522 }, { "epoch": 0.862630432289875, "grad_norm": 0.28097789743909446, "learning_rate": 4.867871526584261e-05, "loss": 2.423, "step": 7523 }, { "epoch": 0.8627450980392157, "grad_norm": 0.28397917406149864, "learning_rate": 4.8598824822938206e-05, "loss": 2.4456, "step": 7524 }, { "epoch": 0.8628597637885563, "grad_norm": 0.26120545778488774, "learning_rate": 4.8518996642221536e-05, "loss": 2.3712, "step": 7525 }, { "epoch": 0.862974429537897, "grad_norm": 0.26548425278540194, "learning_rate": 4.8439230734703135e-05, "loss": 2.3202, "step": 7526 }, { "epoch": 0.8630890952872377, "grad_norm": 0.2541897315716967, "learning_rate": 4.8359527111385304e-05, "loss": 2.2117, "step": 7527 }, { "epoch": 0.8632037610365784, "grad_norm": 0.265459699646245, "learning_rate": 4.8279885783261704e-05, "loss": 2.3264, "step": 7528 }, { "epoch": 0.8633184267859191, "grad_norm": 0.28437568398474766, "learning_rate": 4.820030676131726e-05, "loss": 2.5454, "step": 7529 }, { "epoch": 0.8634330925352597, "grad_norm": 0.2898740677015077, "learning_rate": 4.812079005652842e-05, "loss": 2.5458, "step": 7530 }, { "epoch": 0.8635477582846004, "grad_norm": 0.2610459806874104, "learning_rate": 4.804133567986307e-05, "loss": 2.2771, "step": 7531 }, { "epoch": 0.8636624240339411, "grad_norm": 0.2604464216622164, "learning_rate": 4.796194364228029e-05, "loss": 2.3779, "step": 7532 }, { "epoch": 0.8637770897832817, "grad_norm": 0.2730962506091434, "learning_rate": 4.788261395473065e-05, "loss": 2.4007, "step": 7533 }, { "epoch": 0.8638917555326224, "grad_norm": 0.2664853094252474, "learning_rate": 4.780334662815644e-05, "loss": 2.5028, "step": 7534 }, { "epoch": 0.864006421281963, "grad_norm": 0.27936988057249607, "learning_rate": 4.7724141673490816e-05, "loss": 2.3651, "step": 7535 }, { "epoch": 0.8641210870313037, "grad_norm": 0.2454247628582261, "learning_rate": 4.7644999101658705e-05, "loss": 2.3353, "step": 7536 }, { "epoch": 0.8642357527806445, "grad_norm": 0.2631340836347278, "learning_rate": 4.756591892357637e-05, "loss": 2.4752, "step": 7537 }, { "epoch": 0.8643504185299851, "grad_norm": 0.25685027363527335, "learning_rate": 4.7486901150151197e-05, "loss": 2.443, "step": 7538 }, { "epoch": 0.8644650842793258, "grad_norm": 0.2683025680694736, "learning_rate": 4.740794579228236e-05, "loss": 2.4955, "step": 7539 }, { "epoch": 0.8645797500286664, "grad_norm": 0.27734681180055326, "learning_rate": 4.732905286086037e-05, "loss": 2.5874, "step": 7540 }, { "epoch": 0.8646944157780071, "grad_norm": 0.30948720090687226, "learning_rate": 4.725022236676668e-05, "loss": 2.4534, "step": 7541 }, { "epoch": 0.8648090815273478, "grad_norm": 0.26625111623394787, "learning_rate": 4.717145432087466e-05, "loss": 2.4762, "step": 7542 }, { "epoch": 0.8649237472766884, "grad_norm": 0.2795490410795455, "learning_rate": 4.709274873404884e-05, "loss": 2.477, "step": 7543 }, { "epoch": 0.8650384130260291, "grad_norm": 0.24748686151921762, "learning_rate": 4.701410561714509e-05, "loss": 2.2869, "step": 7544 }, { "epoch": 0.8651530787753698, "grad_norm": 0.25385967593851466, "learning_rate": 4.693552498101078e-05, "loss": 2.3031, "step": 7545 }, { "epoch": 0.8652677445247104, "grad_norm": 0.2788516388643916, "learning_rate": 4.6857006836484695e-05, "loss": 2.4774, "step": 7546 }, { "epoch": 0.8653824102740512, "grad_norm": 0.2658955969815931, "learning_rate": 4.677855119439678e-05, "loss": 2.5336, "step": 7547 }, { "epoch": 0.8654970760233918, "grad_norm": 0.28068508282585364, "learning_rate": 4.670015806556843e-05, "loss": 2.2472, "step": 7548 }, { "epoch": 0.8656117417727325, "grad_norm": 0.28079822547632677, "learning_rate": 4.6621827460812725e-05, "loss": 2.4176, "step": 7549 }, { "epoch": 0.8657264075220732, "grad_norm": 0.2767932947049551, "learning_rate": 4.6543559390933685e-05, "loss": 2.4025, "step": 7550 }, { "epoch": 0.8658410732714138, "grad_norm": 0.2789421233295059, "learning_rate": 4.6465353866727014e-05, "loss": 2.3948, "step": 7551 }, { "epoch": 0.8659557390207545, "grad_norm": 0.23794909996909555, "learning_rate": 4.638721089897968e-05, "loss": 2.2494, "step": 7552 }, { "epoch": 0.8660704047700952, "grad_norm": 0.25203129599706925, "learning_rate": 4.6309130498469744e-05, "loss": 2.4435, "step": 7553 }, { "epoch": 0.8661850705194358, "grad_norm": 0.2658433541751157, "learning_rate": 4.623111267596725e-05, "loss": 2.3067, "step": 7554 }, { "epoch": 0.8662997362687765, "grad_norm": 0.2509143147990918, "learning_rate": 4.61531574422332e-05, "loss": 2.4901, "step": 7555 }, { "epoch": 0.8664144020181171, "grad_norm": 0.2627905123146582, "learning_rate": 4.607526480801988e-05, "loss": 2.3802, "step": 7556 }, { "epoch": 0.8665290677674579, "grad_norm": 0.2541623611844552, "learning_rate": 4.59974347840712e-05, "loss": 2.3149, "step": 7557 }, { "epoch": 0.8666437335167986, "grad_norm": 0.2514468185470806, "learning_rate": 4.59196673811223e-05, "loss": 2.3542, "step": 7558 }, { "epoch": 0.8667583992661392, "grad_norm": 0.2518103404012861, "learning_rate": 4.584196260989975e-05, "loss": 2.3851, "step": 7559 }, { "epoch": 0.8668730650154799, "grad_norm": 0.2748275447138567, "learning_rate": 4.576432048112145e-05, "loss": 2.3721, "step": 7560 }, { "epoch": 0.8669877307648205, "grad_norm": 0.2532233203036863, "learning_rate": 4.568674100549664e-05, "loss": 2.4745, "step": 7561 }, { "epoch": 0.8671023965141612, "grad_norm": 0.2589750193162282, "learning_rate": 4.560922419372587e-05, "loss": 2.447, "step": 7562 }, { "epoch": 0.8672170622635019, "grad_norm": 0.2896788461721633, "learning_rate": 4.553177005650111e-05, "loss": 2.4129, "step": 7563 }, { "epoch": 0.8673317280128425, "grad_norm": 0.24787864313204927, "learning_rate": 4.545437860450574e-05, "loss": 2.404, "step": 7564 }, { "epoch": 0.8674463937621832, "grad_norm": 0.24490035891030096, "learning_rate": 4.53770498484144e-05, "loss": 2.3503, "step": 7565 }, { "epoch": 0.867561059511524, "grad_norm": 0.29008269954674465, "learning_rate": 4.5299783798893154e-05, "loss": 2.5506, "step": 7566 }, { "epoch": 0.8676757252608646, "grad_norm": 0.25932507536681576, "learning_rate": 4.522258046659944e-05, "loss": 2.3907, "step": 7567 }, { "epoch": 0.8677903910102053, "grad_norm": 0.24701258550245359, "learning_rate": 4.514543986218173e-05, "loss": 2.3926, "step": 7568 }, { "epoch": 0.8679050567595459, "grad_norm": 0.2631782799640303, "learning_rate": 4.5068361996280393e-05, "loss": 2.5567, "step": 7569 }, { "epoch": 0.8680197225088866, "grad_norm": 0.2538545676333995, "learning_rate": 4.499134687952672e-05, "loss": 2.5068, "step": 7570 }, { "epoch": 0.8681343882582273, "grad_norm": 0.26549123328737745, "learning_rate": 4.491439452254348e-05, "loss": 2.4734, "step": 7571 }, { "epoch": 0.8682490540075679, "grad_norm": 0.2510687472469412, "learning_rate": 4.483750493594474e-05, "loss": 2.3964, "step": 7572 }, { "epoch": 0.8683637197569086, "grad_norm": 0.2469279738097297, "learning_rate": 4.4760678130336076e-05, "loss": 2.3824, "step": 7573 }, { "epoch": 0.8684783855062492, "grad_norm": 0.2357818188278889, "learning_rate": 4.468391411631417e-05, "loss": 2.3232, "step": 7574 }, { "epoch": 0.8685930512555899, "grad_norm": 0.2655283428412872, "learning_rate": 4.4607212904467285e-05, "loss": 2.3735, "step": 7575 }, { "epoch": 0.8687077170049307, "grad_norm": 0.2737125971745604, "learning_rate": 4.453057450537473e-05, "loss": 2.377, "step": 7576 }, { "epoch": 0.8688223827542713, "grad_norm": 0.25866634348935846, "learning_rate": 4.445399892960738e-05, "loss": 2.3985, "step": 7577 }, { "epoch": 0.868937048503612, "grad_norm": 0.24887854726973824, "learning_rate": 4.437748618772741e-05, "loss": 2.3914, "step": 7578 }, { "epoch": 0.8690517142529527, "grad_norm": 0.24958462902763495, "learning_rate": 4.4301036290288254e-05, "loss": 2.3689, "step": 7579 }, { "epoch": 0.8691663800022933, "grad_norm": 0.2633820114493486, "learning_rate": 4.42246492478347e-05, "loss": 2.2858, "step": 7580 }, { "epoch": 0.869281045751634, "grad_norm": 0.25135418461105513, "learning_rate": 4.4148325070902994e-05, "loss": 2.3758, "step": 7581 }, { "epoch": 0.8693957115009746, "grad_norm": 0.29293457967361597, "learning_rate": 4.407206377002049e-05, "loss": 2.4101, "step": 7582 }, { "epoch": 0.8695103772503153, "grad_norm": 0.25136549978553563, "learning_rate": 4.3995865355705876e-05, "loss": 2.3329, "step": 7583 }, { "epoch": 0.869625042999656, "grad_norm": 0.258351193263078, "learning_rate": 4.391972983846959e-05, "loss": 2.3308, "step": 7584 }, { "epoch": 0.8697397087489966, "grad_norm": 0.22160632167761943, "learning_rate": 4.3843657228812836e-05, "loss": 2.4114, "step": 7585 }, { "epoch": 0.8698543744983374, "grad_norm": 0.2670705851476512, "learning_rate": 4.3767647537228396e-05, "loss": 2.3808, "step": 7586 }, { "epoch": 0.8699690402476781, "grad_norm": 0.24635500806979568, "learning_rate": 4.369170077420048e-05, "loss": 2.3992, "step": 7587 }, { "epoch": 0.8700837059970187, "grad_norm": 0.2613072259723636, "learning_rate": 4.361581695020428e-05, "loss": 2.3956, "step": 7588 }, { "epoch": 0.8701983717463594, "grad_norm": 0.2603857962075291, "learning_rate": 4.353999607570674e-05, "loss": 2.3957, "step": 7589 }, { "epoch": 0.8703130374957, "grad_norm": 0.26819169753217453, "learning_rate": 4.34642381611659e-05, "loss": 2.4115, "step": 7590 }, { "epoch": 0.8704277032450407, "grad_norm": 0.2871064807701723, "learning_rate": 4.338854321703095e-05, "loss": 2.4871, "step": 7591 }, { "epoch": 0.8705423689943814, "grad_norm": 0.24936790403539127, "learning_rate": 4.331291125374265e-05, "loss": 2.4834, "step": 7592 }, { "epoch": 0.870657034743722, "grad_norm": 0.265413715570518, "learning_rate": 4.3237342281732986e-05, "loss": 2.3899, "step": 7593 }, { "epoch": 0.8707717004930627, "grad_norm": 0.2754066242465423, "learning_rate": 4.3161836311425286e-05, "loss": 2.369, "step": 7594 }, { "epoch": 0.8708863662424033, "grad_norm": 0.25463282441226226, "learning_rate": 4.308639335323417e-05, "loss": 2.5293, "step": 7595 }, { "epoch": 0.8710010319917441, "grad_norm": 0.24094454554683684, "learning_rate": 4.301101341756558e-05, "loss": 2.4005, "step": 7596 }, { "epoch": 0.8711156977410848, "grad_norm": 0.27748375862915653, "learning_rate": 4.29356965148166e-05, "loss": 2.42, "step": 7597 }, { "epoch": 0.8712303634904254, "grad_norm": 0.24321474036360463, "learning_rate": 4.286044265537575e-05, "loss": 2.3251, "step": 7598 }, { "epoch": 0.8713450292397661, "grad_norm": 0.2305483631280844, "learning_rate": 4.278525184962312e-05, "loss": 2.3091, "step": 7599 }, { "epoch": 0.8714596949891068, "grad_norm": 0.2577075625066181, "learning_rate": 4.2710124107929624e-05, "loss": 2.4276, "step": 7600 }, { "epoch": 0.8715743607384474, "grad_norm": 0.26959321892250643, "learning_rate": 4.2635059440657767e-05, "loss": 2.4533, "step": 7601 }, { "epoch": 0.8716890264877881, "grad_norm": 0.25885123183305475, "learning_rate": 4.256005785816142e-05, "loss": 2.3208, "step": 7602 }, { "epoch": 0.8718036922371287, "grad_norm": 0.2654863752665307, "learning_rate": 4.2485119370785254e-05, "loss": 2.3138, "step": 7603 }, { "epoch": 0.8719183579864694, "grad_norm": 0.26906136080506504, "learning_rate": 4.2410243988866006e-05, "loss": 2.341, "step": 7604 }, { "epoch": 0.8720330237358102, "grad_norm": 0.2394984111622575, "learning_rate": 4.233543172273119e-05, "loss": 2.4601, "step": 7605 }, { "epoch": 0.8721476894851508, "grad_norm": 0.2736493805587362, "learning_rate": 4.226068258269955e-05, "loss": 2.4671, "step": 7606 }, { "epoch": 0.8722623552344915, "grad_norm": 0.2499539050953273, "learning_rate": 4.218599657908151e-05, "loss": 2.5294, "step": 7607 }, { "epoch": 0.8723770209838321, "grad_norm": 0.25737835074394616, "learning_rate": 4.211137372217849e-05, "loss": 2.3614, "step": 7608 }, { "epoch": 0.8724916867331728, "grad_norm": 0.2649955642745985, "learning_rate": 4.203681402228332e-05, "loss": 2.4215, "step": 7609 }, { "epoch": 0.8726063524825135, "grad_norm": 0.2555902386064952, "learning_rate": 4.1962317489680104e-05, "loss": 2.4315, "step": 7610 }, { "epoch": 0.8727210182318541, "grad_norm": 0.2583100806221419, "learning_rate": 4.188788413464423e-05, "loss": 2.3842, "step": 7611 }, { "epoch": 0.8728356839811948, "grad_norm": 0.28998045843855647, "learning_rate": 4.181351396744226e-05, "loss": 2.4582, "step": 7612 }, { "epoch": 0.8729503497305355, "grad_norm": 0.28796453701880076, "learning_rate": 4.173920699833223e-05, "loss": 2.3616, "step": 7613 }, { "epoch": 0.8730650154798761, "grad_norm": 0.26824842564028595, "learning_rate": 4.166496323756336e-05, "loss": 2.3443, "step": 7614 }, { "epoch": 0.8731796812292169, "grad_norm": 0.2662512954186086, "learning_rate": 4.159078269537614e-05, "loss": 2.2964, "step": 7615 }, { "epoch": 0.8732943469785575, "grad_norm": 0.25509339386945734, "learning_rate": 4.1516665382002385e-05, "loss": 2.4042, "step": 7616 }, { "epoch": 0.8734090127278982, "grad_norm": 0.24454177918960515, "learning_rate": 4.144261130766519e-05, "loss": 2.3534, "step": 7617 }, { "epoch": 0.8735236784772389, "grad_norm": 0.2374474651301502, "learning_rate": 4.136862048257872e-05, "loss": 2.4336, "step": 7618 }, { "epoch": 0.8736383442265795, "grad_norm": 0.2313996348113904, "learning_rate": 4.129469291694893e-05, "loss": 2.3994, "step": 7619 }, { "epoch": 0.8737530099759202, "grad_norm": 0.2616459861301337, "learning_rate": 4.122082862097243e-05, "loss": 2.5434, "step": 7620 }, { "epoch": 0.8738676757252609, "grad_norm": 0.2664246624796791, "learning_rate": 4.1147027604837515e-05, "loss": 2.4715, "step": 7621 }, { "epoch": 0.8739823414746015, "grad_norm": 0.28760612650943945, "learning_rate": 4.107328987872361e-05, "loss": 2.5415, "step": 7622 }, { "epoch": 0.8740970072239422, "grad_norm": 0.27702168168647046, "learning_rate": 4.09996154528014e-05, "loss": 2.6015, "step": 7623 }, { "epoch": 0.8742116729732828, "grad_norm": 0.2472953021577611, "learning_rate": 4.0926004337232926e-05, "loss": 2.3707, "step": 7624 }, { "epoch": 0.8743263387226236, "grad_norm": 0.22990184722081428, "learning_rate": 4.085245654217146e-05, "loss": 2.4507, "step": 7625 }, { "epoch": 0.8744410044719643, "grad_norm": 0.24144417664890302, "learning_rate": 4.077897207776138e-05, "loss": 2.4182, "step": 7626 }, { "epoch": 0.8745556702213049, "grad_norm": 0.2525417628631351, "learning_rate": 4.0705550954138585e-05, "loss": 2.3073, "step": 7627 }, { "epoch": 0.8746703359706456, "grad_norm": 0.26678836328118244, "learning_rate": 4.0632193181430074e-05, "loss": 2.3723, "step": 7628 }, { "epoch": 0.8747850017199862, "grad_norm": 0.26899167050816297, "learning_rate": 4.055889876975416e-05, "loss": 2.5314, "step": 7629 }, { "epoch": 0.8748996674693269, "grad_norm": 0.2441483534613465, "learning_rate": 4.048566772922041e-05, "loss": 2.2773, "step": 7630 }, { "epoch": 0.8750143332186676, "grad_norm": 0.24453055855428704, "learning_rate": 4.0412500069929746e-05, "loss": 2.4655, "step": 7631 }, { "epoch": 0.8751289989680082, "grad_norm": 0.2587396752038846, "learning_rate": 4.0339395801974096e-05, "loss": 2.4048, "step": 7632 }, { "epoch": 0.8752436647173489, "grad_norm": 0.26359620643091675, "learning_rate": 4.0266354935436785e-05, "loss": 2.46, "step": 7633 }, { "epoch": 0.8753583304666896, "grad_norm": 0.24242810725388683, "learning_rate": 4.0193377480392646e-05, "loss": 2.3804, "step": 7634 }, { "epoch": 0.8754729962160303, "grad_norm": 0.26072350752919854, "learning_rate": 4.0120463446907284e-05, "loss": 2.4393, "step": 7635 }, { "epoch": 0.875587661965371, "grad_norm": 0.2575579224680871, "learning_rate": 4.0047612845037885e-05, "loss": 2.4855, "step": 7636 }, { "epoch": 0.8757023277147116, "grad_norm": 0.2646997709456313, "learning_rate": 3.9974825684832905e-05, "loss": 2.357, "step": 7637 }, { "epoch": 0.8758169934640523, "grad_norm": 0.26019690866772366, "learning_rate": 3.9902101976331654e-05, "loss": 2.4213, "step": 7638 }, { "epoch": 0.875931659213393, "grad_norm": 0.24264201480781492, "learning_rate": 3.982944172956526e-05, "loss": 2.3159, "step": 7639 }, { "epoch": 0.8760463249627336, "grad_norm": 0.24727180741188337, "learning_rate": 3.975684495455584e-05, "loss": 2.3334, "step": 7640 }, { "epoch": 0.8761609907120743, "grad_norm": 0.2616552500011797, "learning_rate": 3.968431166131647e-05, "loss": 2.3125, "step": 7641 }, { "epoch": 0.8762756564614149, "grad_norm": 0.2653104251259976, "learning_rate": 3.961184185985195e-05, "loss": 2.2448, "step": 7642 }, { "epoch": 0.8763903222107556, "grad_norm": 0.2709995102838466, "learning_rate": 3.9539435560158045e-05, "loss": 2.3838, "step": 7643 }, { "epoch": 0.8765049879600963, "grad_norm": 0.25529329876921053, "learning_rate": 3.9467092772221823e-05, "loss": 2.3685, "step": 7644 }, { "epoch": 0.876619653709437, "grad_norm": 0.26286207784063564, "learning_rate": 3.939481350602159e-05, "loss": 2.4253, "step": 7645 }, { "epoch": 0.8767343194587777, "grad_norm": 0.2652555961270944, "learning_rate": 3.932259777152702e-05, "loss": 2.3661, "step": 7646 }, { "epoch": 0.8768489852081184, "grad_norm": 0.2559564037677125, "learning_rate": 3.9250445578698665e-05, "loss": 2.4806, "step": 7647 }, { "epoch": 0.876963650957459, "grad_norm": 0.2564624994804444, "learning_rate": 3.9178356937488555e-05, "loss": 2.3919, "step": 7648 }, { "epoch": 0.8770783167067997, "grad_norm": 0.25174082041608176, "learning_rate": 3.910633185784024e-05, "loss": 2.4126, "step": 7649 }, { "epoch": 0.8771929824561403, "grad_norm": 0.2616942726240522, "learning_rate": 3.903437034968793e-05, "loss": 2.3121, "step": 7650 }, { "epoch": 0.877307648205481, "grad_norm": 0.22940061510322224, "learning_rate": 3.896247242295742e-05, "loss": 2.5577, "step": 7651 }, { "epoch": 0.8774223139548217, "grad_norm": 0.2607146298396803, "learning_rate": 3.8890638087565765e-05, "loss": 2.5542, "step": 7652 }, { "epoch": 0.8775369797041623, "grad_norm": 0.2821742461765685, "learning_rate": 3.881886735342083e-05, "loss": 2.4249, "step": 7653 }, { "epoch": 0.877651645453503, "grad_norm": 0.26117832784199285, "learning_rate": 3.874716023042235e-05, "loss": 2.4518, "step": 7654 }, { "epoch": 0.8777663112028438, "grad_norm": 0.2595817848545602, "learning_rate": 3.867551672846092e-05, "loss": 2.3867, "step": 7655 }, { "epoch": 0.8778809769521844, "grad_norm": 0.27533690013425666, "learning_rate": 3.86039368574182e-05, "loss": 2.4348, "step": 7656 }, { "epoch": 0.8779956427015251, "grad_norm": 0.23623155375699328, "learning_rate": 3.8532420627167384e-05, "loss": 2.419, "step": 7657 }, { "epoch": 0.8781103084508657, "grad_norm": 0.24334702142614192, "learning_rate": 3.846096804757282e-05, "loss": 2.4511, "step": 7658 }, { "epoch": 0.8782249742002064, "grad_norm": 0.2831851053581169, "learning_rate": 3.838957912848995e-05, "loss": 2.3661, "step": 7659 }, { "epoch": 0.8783396399495471, "grad_norm": 0.25501863558789634, "learning_rate": 3.8318253879765506e-05, "loss": 2.3724, "step": 7660 }, { "epoch": 0.8784543056988877, "grad_norm": 0.27956953106040827, "learning_rate": 3.824699231123763e-05, "loss": 2.3913, "step": 7661 }, { "epoch": 0.8785689714482284, "grad_norm": 0.22748027738332083, "learning_rate": 3.817579443273522e-05, "loss": 2.2276, "step": 7662 }, { "epoch": 0.878683637197569, "grad_norm": 0.2593140656839005, "learning_rate": 3.810466025407883e-05, "loss": 2.5078, "step": 7663 }, { "epoch": 0.8787983029469097, "grad_norm": 0.24681092612583647, "learning_rate": 3.8033589785080034e-05, "loss": 2.5683, "step": 7664 }, { "epoch": 0.8789129686962505, "grad_norm": 0.2416036463231976, "learning_rate": 3.796258303554162e-05, "loss": 2.543, "step": 7665 }, { "epoch": 0.8790276344455911, "grad_norm": 0.2766024816283266, "learning_rate": 3.789164001525769e-05, "loss": 2.5153, "step": 7666 }, { "epoch": 0.8791423001949318, "grad_norm": 0.2563242960549551, "learning_rate": 3.782076073401347e-05, "loss": 2.516, "step": 7667 }, { "epoch": 0.8792569659442725, "grad_norm": 0.23862940113332057, "learning_rate": 3.774994520158531e-05, "loss": 2.415, "step": 7668 }, { "epoch": 0.8793716316936131, "grad_norm": 0.2522519479222634, "learning_rate": 3.7679193427740895e-05, "loss": 2.2903, "step": 7669 }, { "epoch": 0.8794862974429538, "grad_norm": 0.2591567267096782, "learning_rate": 3.760850542223909e-05, "loss": 2.2433, "step": 7670 }, { "epoch": 0.8796009631922944, "grad_norm": 0.2508280644116827, "learning_rate": 3.753788119483004e-05, "loss": 2.3279, "step": 7671 }, { "epoch": 0.8797156289416351, "grad_norm": 0.2716913282594248, "learning_rate": 3.746732075525494e-05, "loss": 2.4559, "step": 7672 }, { "epoch": 0.8798302946909758, "grad_norm": 0.28585096843898883, "learning_rate": 3.73968241132463e-05, "loss": 2.3828, "step": 7673 }, { "epoch": 0.8799449604403164, "grad_norm": 0.29546035398047377, "learning_rate": 3.7326391278527605e-05, "loss": 2.4535, "step": 7674 }, { "epoch": 0.8800596261896572, "grad_norm": 0.2546297539491112, "learning_rate": 3.725602226081409e-05, "loss": 2.3608, "step": 7675 }, { "epoch": 0.8801742919389978, "grad_norm": 0.25840799060391817, "learning_rate": 3.71857170698115e-05, "loss": 2.4109, "step": 7676 }, { "epoch": 0.8802889576883385, "grad_norm": 0.27521806217112205, "learning_rate": 3.711547571521717e-05, "loss": 2.2516, "step": 7677 }, { "epoch": 0.8804036234376792, "grad_norm": 0.25297973264287055, "learning_rate": 3.7045298206719594e-05, "loss": 2.3747, "step": 7678 }, { "epoch": 0.8805182891870198, "grad_norm": 0.2571466502587079, "learning_rate": 3.69751845539984e-05, "loss": 2.3711, "step": 7679 }, { "epoch": 0.8806329549363605, "grad_norm": 0.27663279116394934, "learning_rate": 3.6905134766724466e-05, "loss": 2.3519, "step": 7680 }, { "epoch": 0.8807476206857012, "grad_norm": 0.24293568497480866, "learning_rate": 3.683514885455985e-05, "loss": 2.5819, "step": 7681 }, { "epoch": 0.8808622864350418, "grad_norm": 0.26510507872151523, "learning_rate": 3.676522682715766e-05, "loss": 2.3801, "step": 7682 }, { "epoch": 0.8809769521843825, "grad_norm": 0.2517953498303615, "learning_rate": 3.6695368694162345e-05, "loss": 2.2978, "step": 7683 }, { "epoch": 0.8810916179337231, "grad_norm": 0.29215431056835484, "learning_rate": 3.662557446520959e-05, "loss": 2.3113, "step": 7684 }, { "epoch": 0.8812062836830639, "grad_norm": 0.26625909025735733, "learning_rate": 3.655584414992608e-05, "loss": 2.3097, "step": 7685 }, { "epoch": 0.8813209494324046, "grad_norm": 0.23211484920922443, "learning_rate": 3.6486177757929784e-05, "loss": 2.3062, "step": 7686 }, { "epoch": 0.8814356151817452, "grad_norm": 0.27727865200689944, "learning_rate": 3.641657529883002e-05, "loss": 2.5506, "step": 7687 }, { "epoch": 0.8815502809310859, "grad_norm": 0.2512554364969393, "learning_rate": 3.634703678222689e-05, "loss": 2.4909, "step": 7688 }, { "epoch": 0.8816649466804266, "grad_norm": 0.25459045606606606, "learning_rate": 3.6277562217711946e-05, "loss": 2.4853, "step": 7689 }, { "epoch": 0.8817796124297672, "grad_norm": 0.2564899100539294, "learning_rate": 3.620815161486807e-05, "loss": 2.3574, "step": 7690 }, { "epoch": 0.8818942781791079, "grad_norm": 0.25398050635245323, "learning_rate": 3.613880498326894e-05, "loss": 2.3883, "step": 7691 }, { "epoch": 0.8820089439284485, "grad_norm": 0.2535465928738247, "learning_rate": 3.6069522332479674e-05, "loss": 2.3935, "step": 7692 }, { "epoch": 0.8821236096777892, "grad_norm": 0.2491559046101294, "learning_rate": 3.6000303672056537e-05, "loss": 2.4813, "step": 7693 }, { "epoch": 0.88223827542713, "grad_norm": 0.2799190692029175, "learning_rate": 3.593114901154676e-05, "loss": 2.2892, "step": 7694 }, { "epoch": 0.8823529411764706, "grad_norm": 0.27621329226019237, "learning_rate": 3.586205836048906e-05, "loss": 2.4511, "step": 7695 }, { "epoch": 0.8824676069258113, "grad_norm": 0.24100600796440189, "learning_rate": 3.579303172841319e-05, "loss": 2.4576, "step": 7696 }, { "epoch": 0.8825822726751519, "grad_norm": 0.2584762015053148, "learning_rate": 3.5724069124839945e-05, "loss": 2.4851, "step": 7697 }, { "epoch": 0.8826969384244926, "grad_norm": 0.28550518874219943, "learning_rate": 3.565517055928147e-05, "loss": 2.4149, "step": 7698 }, { "epoch": 0.8828116041738333, "grad_norm": 0.26534674304662104, "learning_rate": 3.5586336041241004e-05, "loss": 2.4513, "step": 7699 }, { "epoch": 0.8829262699231739, "grad_norm": 0.29498718440544464, "learning_rate": 3.5517565580212966e-05, "loss": 2.5358, "step": 7700 }, { "epoch": 0.8830409356725146, "grad_norm": 0.2630769266983824, "learning_rate": 3.5448859185682856e-05, "loss": 2.3146, "step": 7701 }, { "epoch": 0.8831556014218553, "grad_norm": 0.2693022580221657, "learning_rate": 3.538021686712761e-05, "loss": 2.3084, "step": 7702 }, { "epoch": 0.8832702671711959, "grad_norm": 0.2629705608509755, "learning_rate": 3.5311638634014874e-05, "loss": 2.3208, "step": 7703 }, { "epoch": 0.8833849329205367, "grad_norm": 0.25981481205938256, "learning_rate": 3.524312449580375e-05, "loss": 2.5015, "step": 7704 }, { "epoch": 0.8834995986698773, "grad_norm": 0.24200271527823503, "learning_rate": 3.517467446194472e-05, "loss": 2.2255, "step": 7705 }, { "epoch": 0.883614264419218, "grad_norm": 0.2608698729408616, "learning_rate": 3.510628854187886e-05, "loss": 2.342, "step": 7706 }, { "epoch": 0.8837289301685587, "grad_norm": 0.2434117234689837, "learning_rate": 3.503796674503884e-05, "loss": 2.3327, "step": 7707 }, { "epoch": 0.8838435959178993, "grad_norm": 0.2414002881663324, "learning_rate": 3.496970908084834e-05, "loss": 2.4323, "step": 7708 }, { "epoch": 0.88395826166724, "grad_norm": 0.2519533507392297, "learning_rate": 3.490151555872212e-05, "loss": 2.5395, "step": 7709 }, { "epoch": 0.8840729274165807, "grad_norm": 0.2791458549507502, "learning_rate": 3.483338618806625e-05, "loss": 2.3837, "step": 7710 }, { "epoch": 0.8841875931659213, "grad_norm": 0.2289687750593109, "learning_rate": 3.4765320978278004e-05, "loss": 2.2176, "step": 7711 }, { "epoch": 0.884302258915262, "grad_norm": 0.2740979751747013, "learning_rate": 3.469731993874542e-05, "loss": 2.3464, "step": 7712 }, { "epoch": 0.8844169246646026, "grad_norm": 0.2678973478862052, "learning_rate": 3.4629383078848106e-05, "loss": 2.4294, "step": 7713 }, { "epoch": 0.8845315904139434, "grad_norm": 0.2843393999561222, "learning_rate": 3.456151040795663e-05, "loss": 2.5201, "step": 7714 }, { "epoch": 0.8846462561632841, "grad_norm": 0.2602932119888259, "learning_rate": 3.449370193543272e-05, "loss": 2.3309, "step": 7715 }, { "epoch": 0.8847609219126247, "grad_norm": 0.24984559252827407, "learning_rate": 3.4425957670629284e-05, "loss": 2.3974, "step": 7716 }, { "epoch": 0.8848755876619654, "grad_norm": 0.25928177983440165, "learning_rate": 3.43582776228904e-05, "loss": 2.5503, "step": 7717 }, { "epoch": 0.884990253411306, "grad_norm": 0.24475822792902074, "learning_rate": 3.429066180155111e-05, "loss": 2.3711, "step": 7718 }, { "epoch": 0.8851049191606467, "grad_norm": 0.2289359717670924, "learning_rate": 3.422311021593777e-05, "loss": 2.4663, "step": 7719 }, { "epoch": 0.8852195849099874, "grad_norm": 0.2818462664822109, "learning_rate": 3.415562287536789e-05, "loss": 2.5567, "step": 7720 }, { "epoch": 0.885334250659328, "grad_norm": 0.2563939199579085, "learning_rate": 3.408819978915001e-05, "loss": 2.5003, "step": 7721 }, { "epoch": 0.8854489164086687, "grad_norm": 0.25629023485693486, "learning_rate": 3.4020840966583924e-05, "loss": 2.2314, "step": 7722 }, { "epoch": 0.8855635821580095, "grad_norm": 0.27371491588418795, "learning_rate": 3.3953546416960525e-05, "loss": 2.3868, "step": 7723 }, { "epoch": 0.8856782479073501, "grad_norm": 0.2580296951907124, "learning_rate": 3.388631614956156e-05, "loss": 2.4225, "step": 7724 }, { "epoch": 0.8857929136566908, "grad_norm": 0.2525494786826452, "learning_rate": 3.38191501736605e-05, "loss": 2.446, "step": 7725 }, { "epoch": 0.8859075794060314, "grad_norm": 0.25758612792633184, "learning_rate": 3.375204849852137e-05, "loss": 2.373, "step": 7726 }, { "epoch": 0.8860222451553721, "grad_norm": 0.2518726559205612, "learning_rate": 3.3685011133399724e-05, "loss": 2.4361, "step": 7727 }, { "epoch": 0.8861369109047128, "grad_norm": 0.2539401257950697, "learning_rate": 3.361803808754194e-05, "loss": 2.2387, "step": 7728 }, { "epoch": 0.8862515766540534, "grad_norm": 0.2710292371866573, "learning_rate": 3.3551129370185786e-05, "loss": 2.451, "step": 7729 }, { "epoch": 0.8863662424033941, "grad_norm": 0.26679334313908754, "learning_rate": 3.348428499056e-05, "loss": 2.3997, "step": 7730 }, { "epoch": 0.8864809081527347, "grad_norm": 0.2655288831546453, "learning_rate": 3.341750495788459e-05, "loss": 2.4122, "step": 7731 }, { "epoch": 0.8865955739020754, "grad_norm": 0.2704158250167759, "learning_rate": 3.3350789281370477e-05, "loss": 2.3696, "step": 7732 }, { "epoch": 0.8867102396514162, "grad_norm": 0.27163187571066705, "learning_rate": 3.328413797021973e-05, "loss": 2.3585, "step": 7733 }, { "epoch": 0.8868249054007568, "grad_norm": 0.24313872140552956, "learning_rate": 3.321755103362584e-05, "loss": 2.3793, "step": 7734 }, { "epoch": 0.8869395711500975, "grad_norm": 0.2608363617738911, "learning_rate": 3.315102848077306e-05, "loss": 2.4685, "step": 7735 }, { "epoch": 0.8870542368994382, "grad_norm": 0.24224073494857384, "learning_rate": 3.308457032083689e-05, "loss": 2.2408, "step": 7736 }, { "epoch": 0.8871689026487788, "grad_norm": 0.23182832364218836, "learning_rate": 3.301817656298417e-05, "loss": 2.2865, "step": 7737 }, { "epoch": 0.8872835683981195, "grad_norm": 0.2420227910539694, "learning_rate": 3.295184721637245e-05, "loss": 2.363, "step": 7738 }, { "epoch": 0.8873982341474601, "grad_norm": 0.24758686548660996, "learning_rate": 3.288558229015054e-05, "loss": 2.3525, "step": 7739 }, { "epoch": 0.8875128998968008, "grad_norm": 0.2739520645162603, "learning_rate": 3.2819381793458724e-05, "loss": 2.2985, "step": 7740 }, { "epoch": 0.8876275656461415, "grad_norm": 0.2723640966767095, "learning_rate": 3.275324573542782e-05, "loss": 2.4885, "step": 7741 }, { "epoch": 0.8877422313954821, "grad_norm": 0.22564194151408037, "learning_rate": 3.268717412518013e-05, "loss": 2.2813, "step": 7742 }, { "epoch": 0.8878568971448229, "grad_norm": 0.23749219986013811, "learning_rate": 3.262116697182899e-05, "loss": 2.4247, "step": 7743 }, { "epoch": 0.8879715628941636, "grad_norm": 0.23177614459197315, "learning_rate": 3.255522428447877e-05, "loss": 2.2875, "step": 7744 }, { "epoch": 0.8880862286435042, "grad_norm": 0.2399891268449904, "learning_rate": 3.248934607222509e-05, "loss": 2.424, "step": 7745 }, { "epoch": 0.8882008943928449, "grad_norm": 0.26651423750304626, "learning_rate": 3.2423532344154684e-05, "loss": 2.4469, "step": 7746 }, { "epoch": 0.8883155601421855, "grad_norm": 0.25781809204881734, "learning_rate": 3.2357783109345005e-05, "loss": 2.4504, "step": 7747 }, { "epoch": 0.8884302258915262, "grad_norm": 0.23994898840168424, "learning_rate": 3.229209837686514e-05, "loss": 2.3198, "step": 7748 }, { "epoch": 0.8885448916408669, "grad_norm": 0.2483453972214529, "learning_rate": 3.222647815577495e-05, "loss": 2.3544, "step": 7749 }, { "epoch": 0.8886595573902075, "grad_norm": 0.24268611470012, "learning_rate": 3.2160922455125527e-05, "loss": 2.4357, "step": 7750 }, { "epoch": 0.8887742231395482, "grad_norm": 0.25492869194138024, "learning_rate": 3.209543128395903e-05, "loss": 2.4691, "step": 7751 }, { "epoch": 0.8888888888888888, "grad_norm": 0.25429335907543055, "learning_rate": 3.203000465130879e-05, "loss": 2.3445, "step": 7752 }, { "epoch": 0.8890035546382296, "grad_norm": 0.26002230390629805, "learning_rate": 3.196464256619902e-05, "loss": 2.3685, "step": 7753 }, { "epoch": 0.8891182203875703, "grad_norm": 0.25829146396279, "learning_rate": 3.189934503764513e-05, "loss": 2.4632, "step": 7754 }, { "epoch": 0.8892328861369109, "grad_norm": 0.24382908594886604, "learning_rate": 3.1834112074653974e-05, "loss": 2.3197, "step": 7755 }, { "epoch": 0.8893475518862516, "grad_norm": 0.31918875811060793, "learning_rate": 3.1768943686222906e-05, "loss": 2.5497, "step": 7756 }, { "epoch": 0.8894622176355923, "grad_norm": 0.2760466868618441, "learning_rate": 3.170383988134079e-05, "loss": 2.4055, "step": 7757 }, { "epoch": 0.8895768833849329, "grad_norm": 0.2620877757542277, "learning_rate": 3.16388006689875e-05, "loss": 2.34, "step": 7758 }, { "epoch": 0.8896915491342736, "grad_norm": 0.25451936772442024, "learning_rate": 3.157382605813369e-05, "loss": 2.335, "step": 7759 }, { "epoch": 0.8898062148836142, "grad_norm": 0.253775132044127, "learning_rate": 3.150891605774164e-05, "loss": 2.3736, "step": 7760 }, { "epoch": 0.8899208806329549, "grad_norm": 0.24521292453800597, "learning_rate": 3.144407067676447e-05, "loss": 2.3017, "step": 7761 }, { "epoch": 0.8900355463822957, "grad_norm": 0.24252191368121348, "learning_rate": 3.137928992414618e-05, "loss": 2.3949, "step": 7762 }, { "epoch": 0.8901502121316363, "grad_norm": 0.22779664868167165, "learning_rate": 3.131457380882213e-05, "loss": 2.25, "step": 7763 }, { "epoch": 0.890264877880977, "grad_norm": 0.2852451473490904, "learning_rate": 3.1249922339718685e-05, "loss": 2.3292, "step": 7764 }, { "epoch": 0.8903795436303176, "grad_norm": 0.25278597837861333, "learning_rate": 3.11853355257532e-05, "loss": 2.4541, "step": 7765 }, { "epoch": 0.8904942093796583, "grad_norm": 0.2548503547050677, "learning_rate": 3.1120813375834276e-05, "loss": 2.4103, "step": 7766 }, { "epoch": 0.890608875128999, "grad_norm": 0.25096820071754866, "learning_rate": 3.105635589886163e-05, "loss": 2.3396, "step": 7767 }, { "epoch": 0.8907235408783396, "grad_norm": 0.2568472044355167, "learning_rate": 3.099196310372565e-05, "loss": 2.2981, "step": 7768 }, { "epoch": 0.8908382066276803, "grad_norm": 0.2651767520122004, "learning_rate": 3.092763499930834e-05, "loss": 2.398, "step": 7769 }, { "epoch": 0.890952872377021, "grad_norm": 0.2476188786851755, "learning_rate": 3.0863371594482384e-05, "loss": 2.3607, "step": 7770 }, { "epoch": 0.8910675381263616, "grad_norm": 0.2447304870360554, "learning_rate": 3.07991728981118e-05, "loss": 2.4151, "step": 7771 }, { "epoch": 0.8911822038757024, "grad_norm": 0.25211444122968185, "learning_rate": 3.07350389190515e-05, "loss": 2.3941, "step": 7772 }, { "epoch": 0.891296869625043, "grad_norm": 0.24209280085054075, "learning_rate": 3.067096966614763e-05, "loss": 2.295, "step": 7773 }, { "epoch": 0.8914115353743837, "grad_norm": 0.2734923942913027, "learning_rate": 3.060696514823719e-05, "loss": 2.3574, "step": 7774 }, { "epoch": 0.8915262011237244, "grad_norm": 0.24551510547654865, "learning_rate": 3.0543025374148545e-05, "loss": 2.376, "step": 7775 }, { "epoch": 0.891640866873065, "grad_norm": 0.25336843376775836, "learning_rate": 3.0479150352700878e-05, "loss": 2.4449, "step": 7776 }, { "epoch": 0.8917555326224057, "grad_norm": 0.24539614191350503, "learning_rate": 3.041534009270447e-05, "loss": 2.4143, "step": 7777 }, { "epoch": 0.8918701983717464, "grad_norm": 0.2554545274162413, "learning_rate": 3.035159460296083e-05, "loss": 2.4106, "step": 7778 }, { "epoch": 0.891984864121087, "grad_norm": 0.2642047935547276, "learning_rate": 3.028791389226243e-05, "loss": 2.349, "step": 7779 }, { "epoch": 0.8920995298704277, "grad_norm": 0.26632321175877466, "learning_rate": 3.0224297969392745e-05, "loss": 2.3195, "step": 7780 }, { "epoch": 0.8922141956197683, "grad_norm": 0.24374035582879336, "learning_rate": 3.0160746843126418e-05, "loss": 2.4381, "step": 7781 }, { "epoch": 0.892328861369109, "grad_norm": 0.24872889523238803, "learning_rate": 3.009726052222922e-05, "loss": 2.4771, "step": 7782 }, { "epoch": 0.8924435271184498, "grad_norm": 0.2618677127474087, "learning_rate": 3.0033839015457644e-05, "loss": 2.3349, "step": 7783 }, { "epoch": 0.8925581928677904, "grad_norm": 0.2692763630918073, "learning_rate": 2.9970482331559634e-05, "loss": 2.3675, "step": 7784 }, { "epoch": 0.8926728586171311, "grad_norm": 0.25206787187291285, "learning_rate": 2.9907190479274037e-05, "loss": 2.2911, "step": 7785 }, { "epoch": 0.8927875243664717, "grad_norm": 0.23469985213610883, "learning_rate": 2.9843963467330703e-05, "loss": 2.303, "step": 7786 }, { "epoch": 0.8929021901158124, "grad_norm": 0.25640932846071607, "learning_rate": 2.97808013044506e-05, "loss": 2.3133, "step": 7787 }, { "epoch": 0.8930168558651531, "grad_norm": 0.295640791986467, "learning_rate": 2.971770399934587e-05, "loss": 2.3341, "step": 7788 }, { "epoch": 0.8931315216144937, "grad_norm": 0.2551169985318455, "learning_rate": 2.965467156071927e-05, "loss": 2.3173, "step": 7789 }, { "epoch": 0.8932461873638344, "grad_norm": 0.2494333370925394, "learning_rate": 2.959170399726535e-05, "loss": 2.4005, "step": 7790 }, { "epoch": 0.8933608531131751, "grad_norm": 0.264343855895208, "learning_rate": 2.952880131766894e-05, "loss": 2.43, "step": 7791 }, { "epoch": 0.8934755188625157, "grad_norm": 0.23538877237660957, "learning_rate": 2.9465963530606433e-05, "loss": 2.2349, "step": 7792 }, { "epoch": 0.8935901846118565, "grad_norm": 0.26909144087688414, "learning_rate": 2.940319064474506e-05, "loss": 2.333, "step": 7793 }, { "epoch": 0.8937048503611971, "grad_norm": 0.27293954154322986, "learning_rate": 2.934048266874312e-05, "loss": 2.3535, "step": 7794 }, { "epoch": 0.8938195161105378, "grad_norm": 0.23756380641305277, "learning_rate": 2.9277839611250023e-05, "loss": 2.3367, "step": 7795 }, { "epoch": 0.8939341818598785, "grad_norm": 0.2756024912949463, "learning_rate": 2.9215261480906254e-05, "loss": 2.4312, "step": 7796 }, { "epoch": 0.8940488476092191, "grad_norm": 0.2456284058755781, "learning_rate": 2.9152748286343067e-05, "loss": 2.5261, "step": 7797 }, { "epoch": 0.8941635133585598, "grad_norm": 0.2854948485110286, "learning_rate": 2.9090300036183125e-05, "loss": 2.313, "step": 7798 }, { "epoch": 0.8942781791079004, "grad_norm": 0.2632978746553978, "learning_rate": 2.9027916739039927e-05, "loss": 2.3334, "step": 7799 }, { "epoch": 0.8943928448572411, "grad_norm": 0.25831788742131717, "learning_rate": 2.896559840351809e-05, "loss": 2.3981, "step": 7800 }, { "epoch": 0.8945075106065818, "grad_norm": 0.2675474456749712, "learning_rate": 2.8903345038213235e-05, "loss": 2.4568, "step": 7801 }, { "epoch": 0.8946221763559224, "grad_norm": 0.2531222299845237, "learning_rate": 2.8841156651712053e-05, "loss": 2.3829, "step": 7802 }, { "epoch": 0.8947368421052632, "grad_norm": 0.2628036133565752, "learning_rate": 2.8779033252592123e-05, "loss": 2.3268, "step": 7803 }, { "epoch": 0.8948515078546039, "grad_norm": 0.2349018779280865, "learning_rate": 2.8716974849422263e-05, "loss": 2.6534, "step": 7804 }, { "epoch": 0.8949661736039445, "grad_norm": 0.27115289899851386, "learning_rate": 2.865498145076234e-05, "loss": 2.3735, "step": 7805 }, { "epoch": 0.8950808393532852, "grad_norm": 0.24908753717443133, "learning_rate": 2.8593053065162967e-05, "loss": 2.3863, "step": 7806 }, { "epoch": 0.8951955051026258, "grad_norm": 0.2698628326205673, "learning_rate": 2.8531189701166137e-05, "loss": 2.4338, "step": 7807 }, { "epoch": 0.8953101708519665, "grad_norm": 0.2725051495545839, "learning_rate": 2.8469391367304753e-05, "loss": 2.4051, "step": 7808 }, { "epoch": 0.8954248366013072, "grad_norm": 0.24550115934780878, "learning_rate": 2.8407658072102495e-05, "loss": 2.3766, "step": 7809 }, { "epoch": 0.8955395023506478, "grad_norm": 0.25255966614504294, "learning_rate": 2.834598982407449e-05, "loss": 2.322, "step": 7810 }, { "epoch": 0.8956541680999885, "grad_norm": 0.2744654425684799, "learning_rate": 2.828438663172672e-05, "loss": 2.3363, "step": 7811 }, { "epoch": 0.8957688338493293, "grad_norm": 0.24750221878780485, "learning_rate": 2.8222848503556054e-05, "loss": 2.2631, "step": 7812 }, { "epoch": 0.8958834995986699, "grad_norm": 0.2527257287176193, "learning_rate": 2.8161375448050474e-05, "loss": 2.6212, "step": 7813 }, { "epoch": 0.8959981653480106, "grad_norm": 0.2565443796704807, "learning_rate": 2.8099967473689147e-05, "loss": 2.4679, "step": 7814 }, { "epoch": 0.8961128310973512, "grad_norm": 0.2773641159016645, "learning_rate": 2.8038624588942074e-05, "loss": 2.3576, "step": 7815 }, { "epoch": 0.8962274968466919, "grad_norm": 0.2537068143969078, "learning_rate": 2.7977346802270322e-05, "loss": 2.3317, "step": 7816 }, { "epoch": 0.8963421625960326, "grad_norm": 0.2636175611266541, "learning_rate": 2.791613412212607e-05, "loss": 2.3346, "step": 7817 }, { "epoch": 0.8964568283453732, "grad_norm": 0.294148898208544, "learning_rate": 2.7854986556952344e-05, "loss": 2.308, "step": 7818 }, { "epoch": 0.8965714940947139, "grad_norm": 0.25534715728180185, "learning_rate": 2.7793904115183287e-05, "loss": 2.4505, "step": 7819 }, { "epoch": 0.8966861598440545, "grad_norm": 0.222332000673108, "learning_rate": 2.7732886805244108e-05, "loss": 2.3406, "step": 7820 }, { "epoch": 0.8968008255933952, "grad_norm": 0.2596438436912729, "learning_rate": 2.7671934635550956e-05, "loss": 2.4462, "step": 7821 }, { "epoch": 0.896915491342736, "grad_norm": 0.2464858570761382, "learning_rate": 2.7611047614511052e-05, "loss": 2.4006, "step": 7822 }, { "epoch": 0.8970301570920766, "grad_norm": 0.23735821048363162, "learning_rate": 2.7550225750522627e-05, "loss": 2.3803, "step": 7823 }, { "epoch": 0.8971448228414173, "grad_norm": 0.25752105411774967, "learning_rate": 2.748946905197469e-05, "loss": 2.3885, "step": 7824 }, { "epoch": 0.897259488590758, "grad_norm": 0.271135448834253, "learning_rate": 2.7428777527247707e-05, "loss": 2.3636, "step": 7825 }, { "epoch": 0.8973741543400986, "grad_norm": 0.28538045693773134, "learning_rate": 2.7368151184712918e-05, "loss": 2.3036, "step": 7826 }, { "epoch": 0.8974888200894393, "grad_norm": 0.2793228120527593, "learning_rate": 2.730759003273242e-05, "loss": 2.3208, "step": 7827 }, { "epoch": 0.8976034858387799, "grad_norm": 0.2406438885695902, "learning_rate": 2.7247094079659528e-05, "loss": 2.4537, "step": 7828 }, { "epoch": 0.8977181515881206, "grad_norm": 0.25972341250979636, "learning_rate": 2.7186663333838514e-05, "loss": 2.4673, "step": 7829 }, { "epoch": 0.8978328173374613, "grad_norm": 0.2959067621531113, "learning_rate": 2.712629780360465e-05, "loss": 2.3499, "step": 7830 }, { "epoch": 0.897947483086802, "grad_norm": 0.24667301600462077, "learning_rate": 2.7065997497284224e-05, "loss": 2.5194, "step": 7831 }, { "epoch": 0.8980621488361427, "grad_norm": 0.24290724284194581, "learning_rate": 2.700576242319458e-05, "loss": 2.4992, "step": 7832 }, { "epoch": 0.8981768145854833, "grad_norm": 0.2514735373448667, "learning_rate": 2.6945592589643853e-05, "loss": 2.4853, "step": 7833 }, { "epoch": 0.898291480334824, "grad_norm": 0.2858408348601675, "learning_rate": 2.6885488004931347e-05, "loss": 2.3865, "step": 7834 }, { "epoch": 0.8984061460841647, "grad_norm": 0.26390622936852715, "learning_rate": 2.682544867734743e-05, "loss": 2.3246, "step": 7835 }, { "epoch": 0.8985208118335053, "grad_norm": 0.26803992807521726, "learning_rate": 2.6765474615173313e-05, "loss": 2.4616, "step": 7836 }, { "epoch": 0.898635477582846, "grad_norm": 0.2752464542503413, "learning_rate": 2.6705565826681322e-05, "loss": 2.3671, "step": 7837 }, { "epoch": 0.8987501433321867, "grad_norm": 0.2659857670056342, "learning_rate": 2.664572232013479e-05, "loss": 2.4742, "step": 7838 }, { "epoch": 0.8988648090815273, "grad_norm": 0.27514635555858746, "learning_rate": 2.658594410378773e-05, "loss": 2.2781, "step": 7839 }, { "epoch": 0.898979474830868, "grad_norm": 0.25802708356998977, "learning_rate": 2.6526231185885763e-05, "loss": 2.5159, "step": 7840 }, { "epoch": 0.8990941405802086, "grad_norm": 0.2721796391535052, "learning_rate": 2.6466583574664914e-05, "loss": 2.352, "step": 7841 }, { "epoch": 0.8992088063295494, "grad_norm": 0.26137860304294547, "learning_rate": 2.6407001278352495e-05, "loss": 2.4054, "step": 7842 }, { "epoch": 0.8993234720788901, "grad_norm": 0.2586948705123628, "learning_rate": 2.6347484305166704e-05, "loss": 2.4599, "step": 7843 }, { "epoch": 0.8994381378282307, "grad_norm": 0.24508466218311736, "learning_rate": 2.628803266331686e-05, "loss": 2.3333, "step": 7844 }, { "epoch": 0.8995528035775714, "grad_norm": 0.266850580034854, "learning_rate": 2.6228646361003085e-05, "loss": 2.4701, "step": 7845 }, { "epoch": 0.8996674693269121, "grad_norm": 0.2864173278943525, "learning_rate": 2.6169325406416754e-05, "loss": 2.3649, "step": 7846 }, { "epoch": 0.8997821350762527, "grad_norm": 0.25437292815266915, "learning_rate": 2.6110069807739832e-05, "loss": 2.2711, "step": 7847 }, { "epoch": 0.8998968008255934, "grad_norm": 0.2541625261546507, "learning_rate": 2.6050879573145668e-05, "loss": 2.5051, "step": 7848 }, { "epoch": 0.900011466574934, "grad_norm": 0.2858908934118265, "learning_rate": 2.5991754710798345e-05, "loss": 2.3956, "step": 7849 }, { "epoch": 0.9001261323242747, "grad_norm": 0.25169167331504866, "learning_rate": 2.5932695228853055e-05, "loss": 2.4216, "step": 7850 }, { "epoch": 0.9002407980736155, "grad_norm": 0.2472825923975517, "learning_rate": 2.5873701135455952e-05, "loss": 2.4799, "step": 7851 }, { "epoch": 0.9003554638229561, "grad_norm": 0.2633651096181669, "learning_rate": 2.581477243874414e-05, "loss": 2.3638, "step": 7852 }, { "epoch": 0.9004701295722968, "grad_norm": 0.26599684658992595, "learning_rate": 2.575590914684556e-05, "loss": 2.3738, "step": 7853 }, { "epoch": 0.9005847953216374, "grad_norm": 0.2851127565467227, "learning_rate": 2.569711126787938e-05, "loss": 2.4166, "step": 7854 }, { "epoch": 0.9006994610709781, "grad_norm": 0.2485678713731354, "learning_rate": 2.5638378809955786e-05, "loss": 2.4474, "step": 7855 }, { "epoch": 0.9008141268203188, "grad_norm": 0.25608213589692336, "learning_rate": 2.557971178117563e-05, "loss": 2.3716, "step": 7856 }, { "epoch": 0.9009287925696594, "grad_norm": 0.23168367018671582, "learning_rate": 2.5521110189630937e-05, "loss": 2.3818, "step": 7857 }, { "epoch": 0.9010434583190001, "grad_norm": 0.265882243984455, "learning_rate": 2.5462574043404795e-05, "loss": 2.4257, "step": 7858 }, { "epoch": 0.9011581240683408, "grad_norm": 0.2636971340212292, "learning_rate": 2.540410335057092e-05, "loss": 2.4915, "step": 7859 }, { "epoch": 0.9012727898176814, "grad_norm": 0.24036791651609013, "learning_rate": 2.5345698119194403e-05, "loss": 2.2715, "step": 7860 }, { "epoch": 0.9013874555670222, "grad_norm": 0.25682477392528935, "learning_rate": 2.5287358357331203e-05, "loss": 2.4662, "step": 7861 }, { "epoch": 0.9015021213163628, "grad_norm": 0.24387437086591005, "learning_rate": 2.5229084073027987e-05, "loss": 2.4367, "step": 7862 }, { "epoch": 0.9016167870657035, "grad_norm": 0.2512676590586209, "learning_rate": 2.5170875274322657e-05, "loss": 2.3182, "step": 7863 }, { "epoch": 0.9017314528150442, "grad_norm": 0.2556134895644676, "learning_rate": 2.5112731969244073e-05, "loss": 2.3356, "step": 7864 }, { "epoch": 0.9018461185643848, "grad_norm": 0.2695815921155733, "learning_rate": 2.5054654165811873e-05, "loss": 2.3602, "step": 7865 }, { "epoch": 0.9019607843137255, "grad_norm": 0.28399502911945773, "learning_rate": 2.4996641872036875e-05, "loss": 2.3925, "step": 7866 }, { "epoch": 0.9020754500630661, "grad_norm": 0.23414169920940725, "learning_rate": 2.493869509592078e-05, "loss": 2.2545, "step": 7867 }, { "epoch": 0.9021901158124068, "grad_norm": 0.24146780653946495, "learning_rate": 2.488081384545621e-05, "loss": 2.3091, "step": 7868 }, { "epoch": 0.9023047815617475, "grad_norm": 0.2538515271390083, "learning_rate": 2.482299812862665e-05, "loss": 2.4604, "step": 7869 }, { "epoch": 0.9024194473110881, "grad_norm": 0.24208003527611488, "learning_rate": 2.47652479534069e-05, "loss": 2.389, "step": 7870 }, { "epoch": 0.9025341130604289, "grad_norm": 0.25496550477094687, "learning_rate": 2.4707563327762417e-05, "loss": 2.2991, "step": 7871 }, { "epoch": 0.9026487788097696, "grad_norm": 0.2672172439689978, "learning_rate": 2.4649944259649615e-05, "loss": 2.5429, "step": 7872 }, { "epoch": 0.9027634445591102, "grad_norm": 0.2652537669432156, "learning_rate": 2.459239075701608e-05, "loss": 2.4249, "step": 7873 }, { "epoch": 0.9028781103084509, "grad_norm": 0.25933928278776125, "learning_rate": 2.453490282780002e-05, "loss": 2.3148, "step": 7874 }, { "epoch": 0.9029927760577915, "grad_norm": 0.26039896820845754, "learning_rate": 2.447748047993098e-05, "loss": 2.2465, "step": 7875 }, { "epoch": 0.9031074418071322, "grad_norm": 0.24749361630700203, "learning_rate": 2.4420123721329344e-05, "loss": 2.3158, "step": 7876 }, { "epoch": 0.9032221075564729, "grad_norm": 0.2637399071512931, "learning_rate": 2.4362832559906123e-05, "loss": 2.4934, "step": 7877 }, { "epoch": 0.9033367733058135, "grad_norm": 0.2515447449055186, "learning_rate": 2.4305607003563714e-05, "loss": 2.4473, "step": 7878 }, { "epoch": 0.9034514390551542, "grad_norm": 0.291058701300032, "learning_rate": 2.42484470601953e-05, "loss": 2.2977, "step": 7879 }, { "epoch": 0.903566104804495, "grad_norm": 0.25700278369804813, "learning_rate": 2.4191352737684913e-05, "loss": 2.2886, "step": 7880 }, { "epoch": 0.9036807705538356, "grad_norm": 0.2469321386600971, "learning_rate": 2.4134324043907697e-05, "loss": 2.4029, "step": 7881 }, { "epoch": 0.9037954363031763, "grad_norm": 0.2531502721088116, "learning_rate": 2.407736098672969e-05, "loss": 2.4438, "step": 7882 }, { "epoch": 0.9039101020525169, "grad_norm": 0.2456453380400903, "learning_rate": 2.4020463574007777e-05, "loss": 2.393, "step": 7883 }, { "epoch": 0.9040247678018576, "grad_norm": 0.23817744241130204, "learning_rate": 2.396363181358996e-05, "loss": 2.2695, "step": 7884 }, { "epoch": 0.9041394335511983, "grad_norm": 0.2853802066100375, "learning_rate": 2.3906865713315018e-05, "loss": 2.3366, "step": 7885 }, { "epoch": 0.9042540993005389, "grad_norm": 0.25068855741011037, "learning_rate": 2.3850165281012747e-05, "loss": 2.4639, "step": 7886 }, { "epoch": 0.9043687650498796, "grad_norm": 0.25346109375597387, "learning_rate": 2.3793530524504004e-05, "loss": 2.4021, "step": 7887 }, { "epoch": 0.9044834307992202, "grad_norm": 0.2414374657536262, "learning_rate": 2.3736961451600423e-05, "loss": 2.1887, "step": 7888 }, { "epoch": 0.9045980965485609, "grad_norm": 0.25446171707434806, "learning_rate": 2.3680458070104492e-05, "loss": 2.3651, "step": 7889 }, { "epoch": 0.9047127622979017, "grad_norm": 0.23584363518947468, "learning_rate": 2.362402038781003e-05, "loss": 2.2678, "step": 7890 }, { "epoch": 0.9048274280472423, "grad_norm": 0.24127462723579762, "learning_rate": 2.3567648412501365e-05, "loss": 2.3628, "step": 7891 }, { "epoch": 0.904942093796583, "grad_norm": 0.24126346027555615, "learning_rate": 2.351134215195394e-05, "loss": 2.4828, "step": 7892 }, { "epoch": 0.9050567595459237, "grad_norm": 0.27217730333184964, "learning_rate": 2.345510161393416e-05, "loss": 2.4645, "step": 7893 }, { "epoch": 0.9051714252952643, "grad_norm": 0.2723157247316288, "learning_rate": 2.339892680619937e-05, "loss": 2.4504, "step": 7894 }, { "epoch": 0.905286091044605, "grad_norm": 0.2525789596314939, "learning_rate": 2.334281773649777e-05, "loss": 2.4053, "step": 7895 }, { "epoch": 0.9054007567939456, "grad_norm": 0.2532463916943479, "learning_rate": 2.32867744125686e-05, "loss": 2.4811, "step": 7896 }, { "epoch": 0.9055154225432863, "grad_norm": 0.26091843189723296, "learning_rate": 2.323079684214191e-05, "loss": 2.4983, "step": 7897 }, { "epoch": 0.905630088292627, "grad_norm": 0.24144753077020223, "learning_rate": 2.3174885032938685e-05, "loss": 2.4569, "step": 7898 }, { "epoch": 0.9057447540419676, "grad_norm": 0.23577034498701382, "learning_rate": 2.3119038992670983e-05, "loss": 2.3982, "step": 7899 }, { "epoch": 0.9058594197913084, "grad_norm": 0.2458641648577192, "learning_rate": 2.3063258729041635e-05, "loss": 2.3224, "step": 7900 }, { "epoch": 0.905974085540649, "grad_norm": 0.2550707171865382, "learning_rate": 2.300754424974455e-05, "loss": 2.3976, "step": 7901 }, { "epoch": 0.9060887512899897, "grad_norm": 0.27940292289825736, "learning_rate": 2.295189556246452e-05, "loss": 2.5029, "step": 7902 }, { "epoch": 0.9062034170393304, "grad_norm": 0.2758627350006131, "learning_rate": 2.2896312674877017e-05, "loss": 2.3768, "step": 7903 }, { "epoch": 0.906318082788671, "grad_norm": 0.2501087405194425, "learning_rate": 2.284079559464869e-05, "loss": 2.3264, "step": 7904 }, { "epoch": 0.9064327485380117, "grad_norm": 0.2526346671421727, "learning_rate": 2.2785344329437297e-05, "loss": 2.5465, "step": 7905 }, { "epoch": 0.9065474142873524, "grad_norm": 0.24413443907852095, "learning_rate": 2.2729958886891e-05, "loss": 2.3026, "step": 7906 }, { "epoch": 0.906662080036693, "grad_norm": 0.25158936353853034, "learning_rate": 2.2674639274649357e-05, "loss": 2.4795, "step": 7907 }, { "epoch": 0.9067767457860337, "grad_norm": 0.24059036981887977, "learning_rate": 2.2619385500342594e-05, "loss": 2.4297, "step": 7908 }, { "epoch": 0.9068914115353743, "grad_norm": 0.2635798576820344, "learning_rate": 2.2564197571591726e-05, "loss": 2.4817, "step": 7909 }, { "epoch": 0.907006077284715, "grad_norm": 0.2633408375649657, "learning_rate": 2.2509075496009168e-05, "loss": 2.3852, "step": 7910 }, { "epoch": 0.9071207430340558, "grad_norm": 0.2675663086237945, "learning_rate": 2.245401928119789e-05, "loss": 2.3659, "step": 7911 }, { "epoch": 0.9072354087833964, "grad_norm": 0.26898841794311357, "learning_rate": 2.2399028934751764e-05, "loss": 2.3604, "step": 7912 }, { "epoch": 0.9073500745327371, "grad_norm": 0.24553393301062404, "learning_rate": 2.2344104464255664e-05, "loss": 2.391, "step": 7913 }, { "epoch": 0.9074647402820778, "grad_norm": 0.2690125686843234, "learning_rate": 2.228924587728537e-05, "loss": 2.1671, "step": 7914 }, { "epoch": 0.9075794060314184, "grad_norm": 0.25391168996228936, "learning_rate": 2.2234453181407654e-05, "loss": 2.3694, "step": 7915 }, { "epoch": 0.9076940717807591, "grad_norm": 0.2654551987482526, "learning_rate": 2.217972638418009e-05, "loss": 2.1761, "step": 7916 }, { "epoch": 0.9078087375300997, "grad_norm": 0.27135612053122643, "learning_rate": 2.2125065493151242e-05, "loss": 2.2833, "step": 7917 }, { "epoch": 0.9079234032794404, "grad_norm": 0.24079487125715343, "learning_rate": 2.207047051586042e-05, "loss": 2.3862, "step": 7918 }, { "epoch": 0.9080380690287811, "grad_norm": 0.24838401386026915, "learning_rate": 2.201594145983793e-05, "loss": 2.4133, "step": 7919 }, { "epoch": 0.9081527347781218, "grad_norm": 0.2527087426914141, "learning_rate": 2.196147833260531e-05, "loss": 2.2708, "step": 7920 }, { "epoch": 0.9082674005274625, "grad_norm": 0.25189706427685826, "learning_rate": 2.190708114167439e-05, "loss": 2.2798, "step": 7921 }, { "epoch": 0.9083820662768031, "grad_norm": 0.23870274237895803, "learning_rate": 2.185274989454833e-05, "loss": 2.3957, "step": 7922 }, { "epoch": 0.9084967320261438, "grad_norm": 0.26146361575832205, "learning_rate": 2.1798484598721245e-05, "loss": 2.4097, "step": 7923 }, { "epoch": 0.9086113977754845, "grad_norm": 0.2577300202943111, "learning_rate": 2.1744285261677653e-05, "loss": 2.3238, "step": 7924 }, { "epoch": 0.9087260635248251, "grad_norm": 0.23911959575200273, "learning_rate": 2.1690151890893628e-05, "loss": 2.3646, "step": 7925 }, { "epoch": 0.9088407292741658, "grad_norm": 0.2484690091750989, "learning_rate": 2.1636084493835804e-05, "loss": 2.5228, "step": 7926 }, { "epoch": 0.9089553950235065, "grad_norm": 0.27500206040896635, "learning_rate": 2.1582083077961612e-05, "loss": 2.3183, "step": 7927 }, { "epoch": 0.9090700607728471, "grad_norm": 0.2608313947945333, "learning_rate": 2.1528147650719533e-05, "loss": 2.3748, "step": 7928 }, { "epoch": 0.9091847265221878, "grad_norm": 0.2568595556171314, "learning_rate": 2.1474278219549058e-05, "loss": 2.3894, "step": 7929 }, { "epoch": 0.9092993922715285, "grad_norm": 0.24138136489043635, "learning_rate": 2.1420474791880306e-05, "loss": 2.3941, "step": 7930 }, { "epoch": 0.9094140580208692, "grad_norm": 0.2398758345396527, "learning_rate": 2.136673737513456e-05, "loss": 2.3627, "step": 7931 }, { "epoch": 0.9095287237702099, "grad_norm": 0.25842059196958517, "learning_rate": 2.1313065976723834e-05, "loss": 2.531, "step": 7932 }, { "epoch": 0.9096433895195505, "grad_norm": 0.2545696850414797, "learning_rate": 2.125946060405104e-05, "loss": 2.2547, "step": 7933 }, { "epoch": 0.9097580552688912, "grad_norm": 0.2664535756038087, "learning_rate": 2.1205921264509986e-05, "loss": 2.32, "step": 7934 }, { "epoch": 0.9098727210182318, "grad_norm": 0.24092546936507106, "learning_rate": 2.1152447965485432e-05, "loss": 2.3564, "step": 7935 }, { "epoch": 0.9099873867675725, "grad_norm": 0.26941517866280684, "learning_rate": 2.109904071435309e-05, "loss": 2.3969, "step": 7936 }, { "epoch": 0.9101020525169132, "grad_norm": 0.2906635670090949, "learning_rate": 2.1045699518479345e-05, "loss": 2.3913, "step": 7937 }, { "epoch": 0.9102167182662538, "grad_norm": 0.2648506540808758, "learning_rate": 2.09924243852217e-05, "loss": 2.4178, "step": 7938 }, { "epoch": 0.9103313840155945, "grad_norm": 0.25568113775986445, "learning_rate": 2.0939215321928328e-05, "loss": 2.4078, "step": 7939 }, { "epoch": 0.9104460497649353, "grad_norm": 0.2360733093806897, "learning_rate": 2.0886072335938532e-05, "loss": 2.3216, "step": 7940 }, { "epoch": 0.9105607155142759, "grad_norm": 0.23834070098452093, "learning_rate": 2.083299543458228e-05, "loss": 2.4079, "step": 7941 }, { "epoch": 0.9106753812636166, "grad_norm": 0.25228416042551843, "learning_rate": 2.0779984625180548e-05, "loss": 2.2367, "step": 7942 }, { "epoch": 0.9107900470129572, "grad_norm": 0.2521735271398086, "learning_rate": 2.072703991504521e-05, "loss": 2.525, "step": 7943 }, { "epoch": 0.9109047127622979, "grad_norm": 0.2534719502817023, "learning_rate": 2.0674161311478924e-05, "loss": 2.416, "step": 7944 }, { "epoch": 0.9110193785116386, "grad_norm": 0.28122559249600215, "learning_rate": 2.06213488217753e-05, "loss": 2.4074, "step": 7945 }, { "epoch": 0.9111340442609792, "grad_norm": 0.25621858258613533, "learning_rate": 2.0568602453218843e-05, "loss": 2.347, "step": 7946 }, { "epoch": 0.9112487100103199, "grad_norm": 0.26564576016287866, "learning_rate": 2.0515922213084848e-05, "loss": 2.3198, "step": 7947 }, { "epoch": 0.9113633757596606, "grad_norm": 0.2728719353488285, "learning_rate": 2.0463308108639612e-05, "loss": 2.4589, "step": 7948 }, { "epoch": 0.9114780415090012, "grad_norm": 0.2481026541444845, "learning_rate": 2.0410760147140162e-05, "loss": 2.4044, "step": 7949 }, { "epoch": 0.911592707258342, "grad_norm": 0.25040294688921416, "learning_rate": 2.035827833583459e-05, "loss": 2.5385, "step": 7950 }, { "epoch": 0.9117073730076826, "grad_norm": 0.27304643825929703, "learning_rate": 2.0305862681961717e-05, "loss": 2.3287, "step": 7951 }, { "epoch": 0.9118220387570233, "grad_norm": 0.27729707874279247, "learning_rate": 2.025351319275137e-05, "loss": 2.3632, "step": 7952 }, { "epoch": 0.911936704506364, "grad_norm": 0.2487220078848279, "learning_rate": 2.0201229875424e-05, "loss": 2.4274, "step": 7953 }, { "epoch": 0.9120513702557046, "grad_norm": 0.269903611618632, "learning_rate": 2.0149012737191052e-05, "loss": 2.4517, "step": 7954 }, { "epoch": 0.9121660360050453, "grad_norm": 0.2544392297092218, "learning_rate": 2.0096861785255217e-05, "loss": 2.3716, "step": 7955 }, { "epoch": 0.9122807017543859, "grad_norm": 0.2756260067261441, "learning_rate": 2.00447770268094e-05, "loss": 2.4198, "step": 7956 }, { "epoch": 0.9123953675037266, "grad_norm": 0.27686351547989557, "learning_rate": 1.9992758469037808e-05, "loss": 2.4203, "step": 7957 }, { "epoch": 0.9125100332530673, "grad_norm": 0.23229343546216494, "learning_rate": 1.994080611911553e-05, "loss": 2.1795, "step": 7958 }, { "epoch": 0.912624699002408, "grad_norm": 0.2793079021045908, "learning_rate": 1.988891998420811e-05, "loss": 2.288, "step": 7959 }, { "epoch": 0.9127393647517487, "grad_norm": 0.2477835520672039, "learning_rate": 1.983710007147249e-05, "loss": 2.4896, "step": 7960 }, { "epoch": 0.9128540305010894, "grad_norm": 0.2761137058296533, "learning_rate": 1.9785346388056236e-05, "loss": 2.5479, "step": 7961 }, { "epoch": 0.91296869625043, "grad_norm": 0.28408936240485505, "learning_rate": 1.9733658941097686e-05, "loss": 2.4743, "step": 7962 }, { "epoch": 0.9130833619997707, "grad_norm": 0.243227350656218, "learning_rate": 1.96820377377262e-05, "loss": 2.3633, "step": 7963 }, { "epoch": 0.9131980277491113, "grad_norm": 0.24971888746080473, "learning_rate": 1.9630482785061854e-05, "loss": 2.4409, "step": 7964 }, { "epoch": 0.913312693498452, "grad_norm": 0.27381885216961716, "learning_rate": 1.9578994090215796e-05, "loss": 2.3039, "step": 7965 }, { "epoch": 0.9134273592477927, "grad_norm": 0.2893100878612547, "learning_rate": 1.952757166028979e-05, "loss": 2.4132, "step": 7966 }, { "epoch": 0.9135420249971333, "grad_norm": 0.26426162992338725, "learning_rate": 1.9476215502376714e-05, "loss": 2.3879, "step": 7967 }, { "epoch": 0.913656690746474, "grad_norm": 0.2750488912030977, "learning_rate": 1.9424925623560075e-05, "loss": 2.3436, "step": 7968 }, { "epoch": 0.9137713564958146, "grad_norm": 0.26550105223081966, "learning_rate": 1.9373702030914208e-05, "loss": 2.3745, "step": 7969 }, { "epoch": 0.9138860222451554, "grad_norm": 0.2544003262522658, "learning_rate": 1.932254473150474e-05, "loss": 2.3693, "step": 7970 }, { "epoch": 0.9140006879944961, "grad_norm": 0.22774973269781684, "learning_rate": 1.9271453732387646e-05, "loss": 2.4314, "step": 7971 }, { "epoch": 0.9141153537438367, "grad_norm": 0.2433306057065735, "learning_rate": 1.9220429040609943e-05, "loss": 2.4567, "step": 7972 }, { "epoch": 0.9142300194931774, "grad_norm": 0.2591787343448949, "learning_rate": 1.9169470663209675e-05, "loss": 2.5235, "step": 7973 }, { "epoch": 0.9143446852425181, "grad_norm": 0.25497552405908674, "learning_rate": 1.911857860721533e-05, "loss": 2.4218, "step": 7974 }, { "epoch": 0.9144593509918587, "grad_norm": 0.26170914019516583, "learning_rate": 1.9067752879646683e-05, "loss": 2.3525, "step": 7975 }, { "epoch": 0.9145740167411994, "grad_norm": 0.2710807091619628, "learning_rate": 1.901699348751418e-05, "loss": 2.3217, "step": 7976 }, { "epoch": 0.91468868249054, "grad_norm": 0.26184255260961053, "learning_rate": 1.896630043781894e-05, "loss": 2.5225, "step": 7977 }, { "epoch": 0.9148033482398807, "grad_norm": 0.27548298724178183, "learning_rate": 1.8915673737553317e-05, "loss": 2.524, "step": 7978 }, { "epoch": 0.9149180139892215, "grad_norm": 0.26156981036334437, "learning_rate": 1.8865113393700117e-05, "loss": 2.3514, "step": 7979 }, { "epoch": 0.9150326797385621, "grad_norm": 0.2686840361718828, "learning_rate": 1.8814619413233314e-05, "loss": 2.3751, "step": 7980 }, { "epoch": 0.9151473454879028, "grad_norm": 0.2515356834960261, "learning_rate": 1.8764191803117502e-05, "loss": 2.4604, "step": 7981 }, { "epoch": 0.9152620112372435, "grad_norm": 0.2866490588818978, "learning_rate": 1.8713830570308343e-05, "loss": 2.2217, "step": 7982 }, { "epoch": 0.9153766769865841, "grad_norm": 0.25815223016280925, "learning_rate": 1.8663535721752e-05, "loss": 2.4529, "step": 7983 }, { "epoch": 0.9154913427359248, "grad_norm": 0.2613118482300695, "learning_rate": 1.8613307264385816e-05, "loss": 2.4427, "step": 7984 }, { "epoch": 0.9156060084852654, "grad_norm": 0.25563490272277123, "learning_rate": 1.8563145205137856e-05, "loss": 2.3248, "step": 7985 }, { "epoch": 0.9157206742346061, "grad_norm": 0.26308704049976694, "learning_rate": 1.8513049550926974e-05, "loss": 2.3331, "step": 7986 }, { "epoch": 0.9158353399839468, "grad_norm": 0.2691209456354479, "learning_rate": 1.8463020308662927e-05, "loss": 2.4257, "step": 7987 }, { "epoch": 0.9159500057332874, "grad_norm": 0.24971958931054827, "learning_rate": 1.8413057485246355e-05, "loss": 2.2825, "step": 7988 }, { "epoch": 0.9160646714826282, "grad_norm": 0.2347494049609805, "learning_rate": 1.836316108756847e-05, "loss": 2.4649, "step": 7989 }, { "epoch": 0.9161793372319688, "grad_norm": 0.2379239030923568, "learning_rate": 1.831333112251188e-05, "loss": 2.3399, "step": 7990 }, { "epoch": 0.9162940029813095, "grad_norm": 0.278357519210992, "learning_rate": 1.826356759694936e-05, "loss": 2.3701, "step": 7991 }, { "epoch": 0.9164086687306502, "grad_norm": 0.2561537001232615, "learning_rate": 1.821387051774498e-05, "loss": 2.3156, "step": 7992 }, { "epoch": 0.9165233344799908, "grad_norm": 0.26891840820248547, "learning_rate": 1.8164239891753476e-05, "loss": 2.3687, "step": 7993 }, { "epoch": 0.9166380002293315, "grad_norm": 0.27703946127002466, "learning_rate": 1.8114675725820428e-05, "loss": 2.4344, "step": 7994 }, { "epoch": 0.9167526659786722, "grad_norm": 0.268571678749942, "learning_rate": 1.8065178026782315e-05, "loss": 2.4033, "step": 7995 }, { "epoch": 0.9168673317280128, "grad_norm": 0.24724163625560708, "learning_rate": 1.8015746801466447e-05, "loss": 2.2614, "step": 7996 }, { "epoch": 0.9169819974773535, "grad_norm": 0.27260184253505326, "learning_rate": 1.7966382056690765e-05, "loss": 2.391, "step": 7997 }, { "epoch": 0.9170966632266941, "grad_norm": 0.2591481752110663, "learning_rate": 1.7917083799264323e-05, "loss": 2.3386, "step": 7998 }, { "epoch": 0.9172113289760349, "grad_norm": 0.2550676766374494, "learning_rate": 1.786785203598679e-05, "loss": 2.4072, "step": 7999 }, { "epoch": 0.9173259947253756, "grad_norm": 0.26146311090283386, "learning_rate": 1.781868677364884e-05, "loss": 2.4198, "step": 8000 }, { "epoch": 0.9174406604747162, "grad_norm": 0.2356798541763734, "learning_rate": 1.7769588019031835e-05, "loss": 2.4053, "step": 8001 }, { "epoch": 0.9175553262240569, "grad_norm": 0.2529702641889254, "learning_rate": 1.772055577890802e-05, "loss": 2.2814, "step": 8002 }, { "epoch": 0.9176699919733975, "grad_norm": 0.2943835985004157, "learning_rate": 1.767159006004043e-05, "loss": 2.4049, "step": 8003 }, { "epoch": 0.9177846577227382, "grad_norm": 0.2807948584623163, "learning_rate": 1.7622690869182946e-05, "loss": 2.3376, "step": 8004 }, { "epoch": 0.9178993234720789, "grad_norm": 0.25007874522344503, "learning_rate": 1.7573858213080384e-05, "loss": 2.4103, "step": 8005 }, { "epoch": 0.9180139892214195, "grad_norm": 0.2658146043768412, "learning_rate": 1.7525092098468143e-05, "loss": 2.1952, "step": 8006 }, { "epoch": 0.9181286549707602, "grad_norm": 0.27960997861621845, "learning_rate": 1.7476392532072673e-05, "loss": 2.2935, "step": 8007 }, { "epoch": 0.918243320720101, "grad_norm": 0.2641727380124091, "learning_rate": 1.7427759520611153e-05, "loss": 2.3138, "step": 8008 }, { "epoch": 0.9183579864694416, "grad_norm": 0.28608360586076703, "learning_rate": 1.7379193070791387e-05, "loss": 2.4391, "step": 8009 }, { "epoch": 0.9184726522187823, "grad_norm": 0.2641146051205449, "learning_rate": 1.7330693189312464e-05, "loss": 2.4031, "step": 8010 }, { "epoch": 0.9185873179681229, "grad_norm": 0.2477446034912744, "learning_rate": 1.7282259882863972e-05, "loss": 2.3376, "step": 8011 }, { "epoch": 0.9187019837174636, "grad_norm": 0.24554748678152213, "learning_rate": 1.723389315812618e-05, "loss": 2.3335, "step": 8012 }, { "epoch": 0.9188166494668043, "grad_norm": 0.24960720814015303, "learning_rate": 1.7185593021770472e-05, "loss": 2.335, "step": 8013 }, { "epoch": 0.9189313152161449, "grad_norm": 0.2515645579988811, "learning_rate": 1.713735948045897e-05, "loss": 2.4738, "step": 8014 }, { "epoch": 0.9190459809654856, "grad_norm": 0.2641872583191583, "learning_rate": 1.708919254084451e-05, "loss": 2.4574, "step": 8015 }, { "epoch": 0.9191606467148263, "grad_norm": 0.2584275813051626, "learning_rate": 1.7041092209570784e-05, "loss": 2.4039, "step": 8016 }, { "epoch": 0.9192753124641669, "grad_norm": 0.24781138472333708, "learning_rate": 1.6993058493272483e-05, "loss": 2.3033, "step": 8017 }, { "epoch": 0.9193899782135077, "grad_norm": 0.24658448928599494, "learning_rate": 1.6945091398574698e-05, "loss": 2.2932, "step": 8018 }, { "epoch": 0.9195046439628483, "grad_norm": 0.2739745712532286, "learning_rate": 1.689719093209363e-05, "loss": 2.4372, "step": 8019 }, { "epoch": 0.919619309712189, "grad_norm": 0.26670163818051185, "learning_rate": 1.6849357100436448e-05, "loss": 2.3562, "step": 8020 }, { "epoch": 0.9197339754615297, "grad_norm": 0.2704356247511721, "learning_rate": 1.68015899102007e-05, "loss": 2.2534, "step": 8021 }, { "epoch": 0.9198486412108703, "grad_norm": 0.2996818891262219, "learning_rate": 1.6753889367975008e-05, "loss": 2.4931, "step": 8022 }, { "epoch": 0.919963306960211, "grad_norm": 0.2456550937484026, "learning_rate": 1.670625548033883e-05, "loss": 2.4219, "step": 8023 }, { "epoch": 0.9200779727095516, "grad_norm": 0.26025304614520073, "learning_rate": 1.6658688253862185e-05, "loss": 2.2894, "step": 8024 }, { "epoch": 0.9201926384588923, "grad_norm": 0.2578033726658523, "learning_rate": 1.6611187695106223e-05, "loss": 2.2727, "step": 8025 }, { "epoch": 0.920307304208233, "grad_norm": 0.26216816800449183, "learning_rate": 1.6563753810622752e-05, "loss": 2.3408, "step": 8026 }, { "epoch": 0.9204219699575736, "grad_norm": 0.261256331046384, "learning_rate": 1.6516386606954203e-05, "loss": 2.3193, "step": 8027 }, { "epoch": 0.9205366357069144, "grad_norm": 0.24402673385751353, "learning_rate": 1.6469086090634134e-05, "loss": 2.4218, "step": 8028 }, { "epoch": 0.9206513014562551, "grad_norm": 0.2746084357662859, "learning_rate": 1.642185226818671e-05, "loss": 2.3899, "step": 8029 }, { "epoch": 0.9207659672055957, "grad_norm": 0.24236924930280326, "learning_rate": 1.6374685146126943e-05, "loss": 2.436, "step": 8030 }, { "epoch": 0.9208806329549364, "grad_norm": 0.22700968993118648, "learning_rate": 1.6327584730960576e-05, "loss": 2.3948, "step": 8031 }, { "epoch": 0.920995298704277, "grad_norm": 0.2535278933427497, "learning_rate": 1.6280551029184355e-05, "loss": 2.4042, "step": 8032 }, { "epoch": 0.9211099644536177, "grad_norm": 0.24834352454977696, "learning_rate": 1.6233584047285478e-05, "loss": 2.2668, "step": 8033 }, { "epoch": 0.9212246302029584, "grad_norm": 0.2611479636369127, "learning_rate": 1.6186683791742318e-05, "loss": 2.3545, "step": 8034 }, { "epoch": 0.921339295952299, "grad_norm": 0.24651986676882395, "learning_rate": 1.613985026902376e-05, "loss": 2.2661, "step": 8035 }, { "epoch": 0.9214539617016397, "grad_norm": 0.25988313718205536, "learning_rate": 1.6093083485589633e-05, "loss": 2.4275, "step": 8036 }, { "epoch": 0.9215686274509803, "grad_norm": 0.2875927731094603, "learning_rate": 1.6046383447890612e-05, "loss": 2.3422, "step": 8037 }, { "epoch": 0.921683293200321, "grad_norm": 0.2608070110660877, "learning_rate": 1.599975016236799e-05, "loss": 2.5118, "step": 8038 }, { "epoch": 0.9217979589496618, "grad_norm": 0.2589293421098052, "learning_rate": 1.595318363545384e-05, "loss": 2.3081, "step": 8039 }, { "epoch": 0.9219126246990024, "grad_norm": 0.2722987015486067, "learning_rate": 1.590668387357136e-05, "loss": 2.4379, "step": 8040 }, { "epoch": 0.9220272904483431, "grad_norm": 0.25924875070456926, "learning_rate": 1.5860250883134143e-05, "loss": 2.3375, "step": 8041 }, { "epoch": 0.9221419561976838, "grad_norm": 0.26974540043077905, "learning_rate": 1.581388467054673e-05, "loss": 2.4762, "step": 8042 }, { "epoch": 0.9222566219470244, "grad_norm": 0.2799503301423009, "learning_rate": 1.5767585242204562e-05, "loss": 2.3385, "step": 8043 }, { "epoch": 0.9223712876963651, "grad_norm": 0.26064917524069303, "learning_rate": 1.5721352604493645e-05, "loss": 2.4257, "step": 8044 }, { "epoch": 0.9224859534457057, "grad_norm": 0.31501954858120906, "learning_rate": 1.567518676379098e-05, "loss": 2.2656, "step": 8045 }, { "epoch": 0.9226006191950464, "grad_norm": 0.28667108818283005, "learning_rate": 1.5629087726464265e-05, "loss": 2.3694, "step": 8046 }, { "epoch": 0.9227152849443871, "grad_norm": 0.2293863517201724, "learning_rate": 1.55830554988719e-05, "loss": 2.2784, "step": 8047 }, { "epoch": 0.9228299506937278, "grad_norm": 0.25280557352421623, "learning_rate": 1.5537090087363205e-05, "loss": 2.2526, "step": 8048 }, { "epoch": 0.9229446164430685, "grad_norm": 0.25257668366634006, "learning_rate": 1.549119149827821e-05, "loss": 2.3565, "step": 8049 }, { "epoch": 0.9230592821924092, "grad_norm": 0.24461801050401896, "learning_rate": 1.5445359737947806e-05, "loss": 2.3586, "step": 8050 }, { "epoch": 0.9231739479417498, "grad_norm": 0.2637585277435613, "learning_rate": 1.539959481269354e-05, "loss": 2.5222, "step": 8051 }, { "epoch": 0.9232886136910905, "grad_norm": 0.27081062356170865, "learning_rate": 1.5353896728827922e-05, "loss": 2.4402, "step": 8052 }, { "epoch": 0.9234032794404311, "grad_norm": 0.24721872818071702, "learning_rate": 1.5308265492654018e-05, "loss": 2.5042, "step": 8053 }, { "epoch": 0.9235179451897718, "grad_norm": 0.29667772138042775, "learning_rate": 1.526270111046574e-05, "loss": 2.3161, "step": 8054 }, { "epoch": 0.9236326109391125, "grad_norm": 0.23851441070567184, "learning_rate": 1.5217203588548001e-05, "loss": 2.2463, "step": 8055 }, { "epoch": 0.9237472766884531, "grad_norm": 0.2597259226025099, "learning_rate": 1.5171772933176175e-05, "loss": 2.3163, "step": 8056 }, { "epoch": 0.9238619424377938, "grad_norm": 0.2621332819611845, "learning_rate": 1.5126409150616638e-05, "loss": 2.3143, "step": 8057 }, { "epoch": 0.9239766081871345, "grad_norm": 0.2253327387878889, "learning_rate": 1.5081112247126494e-05, "loss": 2.3978, "step": 8058 }, { "epoch": 0.9240912739364752, "grad_norm": 0.24468009057057297, "learning_rate": 1.5035882228953357e-05, "loss": 2.3048, "step": 8059 }, { "epoch": 0.9242059396858159, "grad_norm": 0.2616487800639435, "learning_rate": 1.4990719102336126e-05, "loss": 2.3876, "step": 8060 }, { "epoch": 0.9243206054351565, "grad_norm": 0.25626595317777, "learning_rate": 1.4945622873504094e-05, "loss": 2.4645, "step": 8061 }, { "epoch": 0.9244352711844972, "grad_norm": 0.25829895455656565, "learning_rate": 1.4900593548677343e-05, "loss": 2.3807, "step": 8062 }, { "epoch": 0.9245499369338379, "grad_norm": 0.2801724726914918, "learning_rate": 1.4855631134066905e-05, "loss": 2.3013, "step": 8063 }, { "epoch": 0.9246646026831785, "grad_norm": 0.2341175315881007, "learning_rate": 1.4810735635874428e-05, "loss": 2.3921, "step": 8064 }, { "epoch": 0.9247792684325192, "grad_norm": 0.2574068992612916, "learning_rate": 1.476590706029246e-05, "loss": 2.374, "step": 8065 }, { "epoch": 0.9248939341818598, "grad_norm": 0.2625702326673317, "learning_rate": 1.4721145413504167e-05, "loss": 2.5189, "step": 8066 }, { "epoch": 0.9250085999312005, "grad_norm": 0.25630476012187603, "learning_rate": 1.4676450701683719e-05, "loss": 2.3504, "step": 8067 }, { "epoch": 0.9251232656805413, "grad_norm": 0.2825537565181648, "learning_rate": 1.4631822930995686e-05, "loss": 2.3681, "step": 8068 }, { "epoch": 0.9252379314298819, "grad_norm": 0.25883033966670527, "learning_rate": 1.45872621075957e-05, "loss": 2.4504, "step": 8069 }, { "epoch": 0.9253525971792226, "grad_norm": 0.2772779369534294, "learning_rate": 1.4542768237630177e-05, "loss": 2.4007, "step": 8070 }, { "epoch": 0.9254672629285632, "grad_norm": 0.262993882211127, "learning_rate": 1.4498341327236041e-05, "loss": 2.3537, "step": 8071 }, { "epoch": 0.9255819286779039, "grad_norm": 0.2574064289083826, "learning_rate": 1.445398138254128e-05, "loss": 2.6096, "step": 8072 }, { "epoch": 0.9256965944272446, "grad_norm": 0.2851183605526974, "learning_rate": 1.4409688409664445e-05, "loss": 2.393, "step": 8073 }, { "epoch": 0.9258112601765852, "grad_norm": 0.2700998206778859, "learning_rate": 1.4365462414714815e-05, "loss": 2.4588, "step": 8074 }, { "epoch": 0.9259259259259259, "grad_norm": 0.24209341760037892, "learning_rate": 1.4321303403792674e-05, "loss": 2.4768, "step": 8075 }, { "epoch": 0.9260405916752666, "grad_norm": 0.2615796457137312, "learning_rate": 1.4277211382988876e-05, "loss": 2.4355, "step": 8076 }, { "epoch": 0.9261552574246072, "grad_norm": 0.2481692971947895, "learning_rate": 1.4233186358384998e-05, "loss": 2.3523, "step": 8077 }, { "epoch": 0.926269923173948, "grad_norm": 0.26187215463348046, "learning_rate": 1.4189228336053516e-05, "loss": 2.4091, "step": 8078 }, { "epoch": 0.9263845889232886, "grad_norm": 0.282674474270493, "learning_rate": 1.4145337322057583e-05, "loss": 2.4269, "step": 8079 }, { "epoch": 0.9264992546726293, "grad_norm": 0.23963390516482738, "learning_rate": 1.4101513322451131e-05, "loss": 2.4968, "step": 8080 }, { "epoch": 0.92661392042197, "grad_norm": 0.2648079043992534, "learning_rate": 1.4057756343278827e-05, "loss": 2.4128, "step": 8081 }, { "epoch": 0.9267285861713106, "grad_norm": 0.25472455996478843, "learning_rate": 1.401406639057623e-05, "loss": 2.2936, "step": 8082 }, { "epoch": 0.9268432519206513, "grad_norm": 0.26990040563559786, "learning_rate": 1.3970443470369354e-05, "loss": 2.4937, "step": 8083 }, { "epoch": 0.926957917669992, "grad_norm": 0.25026388096420865, "learning_rate": 1.3926887588675274e-05, "loss": 2.3775, "step": 8084 }, { "epoch": 0.9270725834193326, "grad_norm": 0.2750035741354005, "learning_rate": 1.388339875150163e-05, "loss": 2.3809, "step": 8085 }, { "epoch": 0.9271872491686733, "grad_norm": 0.2617939472972245, "learning_rate": 1.3839976964846901e-05, "loss": 2.4285, "step": 8086 }, { "epoch": 0.927301914918014, "grad_norm": 0.24190652887271005, "learning_rate": 1.3796622234700295e-05, "loss": 2.3817, "step": 8087 }, { "epoch": 0.9274165806673547, "grad_norm": 0.2446660276298129, "learning_rate": 1.375333456704181e-05, "loss": 2.3651, "step": 8088 }, { "epoch": 0.9275312464166954, "grad_norm": 0.2701734654306042, "learning_rate": 1.3710113967842053e-05, "loss": 2.3684, "step": 8089 }, { "epoch": 0.927645912166036, "grad_norm": 0.2663681660267147, "learning_rate": 1.3666960443062649e-05, "loss": 2.3223, "step": 8090 }, { "epoch": 0.9277605779153767, "grad_norm": 0.24776890733817822, "learning_rate": 1.3623873998655612e-05, "loss": 2.3772, "step": 8091 }, { "epoch": 0.9278752436647173, "grad_norm": 0.23582531936025827, "learning_rate": 1.3580854640564023e-05, "loss": 2.3119, "step": 8092 }, { "epoch": 0.927989909414058, "grad_norm": 0.2609344013850428, "learning_rate": 1.3537902374721523e-05, "loss": 2.3441, "step": 8093 }, { "epoch": 0.9281045751633987, "grad_norm": 0.24913621976369907, "learning_rate": 1.349501720705254e-05, "loss": 2.3979, "step": 8094 }, { "epoch": 0.9282192409127393, "grad_norm": 0.27003163874076047, "learning_rate": 1.3452199143472343e-05, "loss": 2.351, "step": 8095 }, { "epoch": 0.92833390666208, "grad_norm": 0.2652766675384967, "learning_rate": 1.3409448189886874e-05, "loss": 2.3332, "step": 8096 }, { "epoch": 0.9284485724114208, "grad_norm": 0.2418609052748786, "learning_rate": 1.336676435219275e-05, "loss": 2.4123, "step": 8097 }, { "epoch": 0.9285632381607614, "grad_norm": 0.2668356668007308, "learning_rate": 1.3324147636277372e-05, "loss": 2.3291, "step": 8098 }, { "epoch": 0.9286779039101021, "grad_norm": 0.2770723424643746, "learning_rate": 1.3281598048018928e-05, "loss": 2.4089, "step": 8099 }, { "epoch": 0.9287925696594427, "grad_norm": 0.30168713655626106, "learning_rate": 1.3239115593286389e-05, "loss": 2.3588, "step": 8100 }, { "epoch": 0.9289072354087834, "grad_norm": 0.2535836061655585, "learning_rate": 1.3196700277939344e-05, "loss": 2.2975, "step": 8101 }, { "epoch": 0.9290219011581241, "grad_norm": 0.24718738857233885, "learning_rate": 1.3154352107828171e-05, "loss": 2.5053, "step": 8102 }, { "epoch": 0.9291365669074647, "grad_norm": 0.231351121321423, "learning_rate": 1.3112071088793976e-05, "loss": 2.2341, "step": 8103 }, { "epoch": 0.9292512326568054, "grad_norm": 0.23475932132271454, "learning_rate": 1.3069857226668647e-05, "loss": 2.3904, "step": 8104 }, { "epoch": 0.929365898406146, "grad_norm": 0.23460476116341777, "learning_rate": 1.3027710527274806e-05, "loss": 2.3637, "step": 8105 }, { "epoch": 0.9294805641554867, "grad_norm": 0.2576488117986609, "learning_rate": 1.2985630996425746e-05, "loss": 2.3566, "step": 8106 }, { "epoch": 0.9295952299048275, "grad_norm": 0.24929724574098233, "learning_rate": 1.2943618639925547e-05, "loss": 2.3319, "step": 8107 }, { "epoch": 0.9297098956541681, "grad_norm": 0.23539520977208472, "learning_rate": 1.2901673463569075e-05, "loss": 2.3742, "step": 8108 }, { "epoch": 0.9298245614035088, "grad_norm": 0.25739202726007077, "learning_rate": 1.28597954731417e-05, "loss": 2.449, "step": 8109 }, { "epoch": 0.9299392271528495, "grad_norm": 0.2877615685642805, "learning_rate": 1.2817984674419914e-05, "loss": 2.4556, "step": 8110 }, { "epoch": 0.9300538929021901, "grad_norm": 0.26353407010656826, "learning_rate": 1.2776241073170603e-05, "loss": 2.4423, "step": 8111 }, { "epoch": 0.9301685586515308, "grad_norm": 0.2560319015406194, "learning_rate": 1.273456467515144e-05, "loss": 2.1551, "step": 8112 }, { "epoch": 0.9302832244008714, "grad_norm": 0.24250028838108162, "learning_rate": 1.2692955486110991e-05, "loss": 2.4481, "step": 8113 }, { "epoch": 0.9303978901502121, "grad_norm": 0.2682825746063407, "learning_rate": 1.265141351178839e-05, "loss": 2.3304, "step": 8114 }, { "epoch": 0.9305125558995528, "grad_norm": 0.2331300577866949, "learning_rate": 1.2609938757913553e-05, "loss": 2.2855, "step": 8115 }, { "epoch": 0.9306272216488934, "grad_norm": 0.2588524294465511, "learning_rate": 1.2568531230207236e-05, "loss": 2.2791, "step": 8116 }, { "epoch": 0.9307418873982342, "grad_norm": 0.2512275611350516, "learning_rate": 1.252719093438076e-05, "loss": 2.3478, "step": 8117 }, { "epoch": 0.9308565531475749, "grad_norm": 0.27675649325902624, "learning_rate": 1.2485917876136178e-05, "loss": 2.3105, "step": 8118 }, { "epoch": 0.9309712188969155, "grad_norm": 0.25420024848552236, "learning_rate": 1.2444712061166318e-05, "loss": 2.2799, "step": 8119 }, { "epoch": 0.9310858846462562, "grad_norm": 0.31861198933237, "learning_rate": 1.2403573495154863e-05, "loss": 2.4298, "step": 8120 }, { "epoch": 0.9312005503955968, "grad_norm": 0.2561857908232313, "learning_rate": 1.2362502183775937e-05, "loss": 2.38, "step": 8121 }, { "epoch": 0.9313152161449375, "grad_norm": 0.255324395430613, "learning_rate": 1.2321498132694676e-05, "loss": 2.3723, "step": 8122 }, { "epoch": 0.9314298818942782, "grad_norm": 0.2642985124976724, "learning_rate": 1.2280561347566777e-05, "loss": 2.4441, "step": 8123 }, { "epoch": 0.9315445476436188, "grad_norm": 0.28474685572396197, "learning_rate": 1.2239691834038558e-05, "loss": 2.4006, "step": 8124 }, { "epoch": 0.9316592133929595, "grad_norm": 0.27220102181672123, "learning_rate": 1.2198889597747342e-05, "loss": 2.3648, "step": 8125 }, { "epoch": 0.9317738791423001, "grad_norm": 0.2921522234509613, "learning_rate": 1.2158154644321018e-05, "loss": 2.3943, "step": 8126 }, { "epoch": 0.9318885448916409, "grad_norm": 0.2793095130191193, "learning_rate": 1.211748697937809e-05, "loss": 2.3729, "step": 8127 }, { "epoch": 0.9320032106409816, "grad_norm": 0.24376219647054306, "learning_rate": 1.2076886608527959e-05, "loss": 2.3583, "step": 8128 }, { "epoch": 0.9321178763903222, "grad_norm": 0.26408061711823383, "learning_rate": 1.2036353537370648e-05, "loss": 2.5253, "step": 8129 }, { "epoch": 0.9322325421396629, "grad_norm": 0.24676651918416226, "learning_rate": 1.199588777149696e-05, "loss": 2.4686, "step": 8130 }, { "epoch": 0.9323472078890036, "grad_norm": 0.25976025126099656, "learning_rate": 1.1955489316488322e-05, "loss": 2.432, "step": 8131 }, { "epoch": 0.9324618736383442, "grad_norm": 0.24315919247582696, "learning_rate": 1.1915158177916996e-05, "loss": 2.3466, "step": 8132 }, { "epoch": 0.9325765393876849, "grad_norm": 0.2435881397005569, "learning_rate": 1.1874894361345867e-05, "loss": 2.2413, "step": 8133 }, { "epoch": 0.9326912051370255, "grad_norm": 0.2784322742443304, "learning_rate": 1.1834697872328492e-05, "loss": 2.3823, "step": 8134 }, { "epoch": 0.9328058708863662, "grad_norm": 0.27380728021859974, "learning_rate": 1.179456871640927e-05, "loss": 2.3836, "step": 8135 }, { "epoch": 0.932920536635707, "grad_norm": 0.2696855930390986, "learning_rate": 1.1754506899123273e-05, "loss": 2.4284, "step": 8136 }, { "epoch": 0.9330352023850476, "grad_norm": 0.2632257106020182, "learning_rate": 1.171451242599625e-05, "loss": 2.382, "step": 8137 }, { "epoch": 0.9331498681343883, "grad_norm": 0.27040681980860104, "learning_rate": 1.1674585302544726e-05, "loss": 2.493, "step": 8138 }, { "epoch": 0.9332645338837289, "grad_norm": 0.2511660045678568, "learning_rate": 1.1634725534275692e-05, "loss": 2.3608, "step": 8139 }, { "epoch": 0.9333791996330696, "grad_norm": 0.25653616936615176, "learning_rate": 1.1594933126687301e-05, "loss": 2.2532, "step": 8140 }, { "epoch": 0.9334938653824103, "grad_norm": 0.26385315639963225, "learning_rate": 1.1555208085267999e-05, "loss": 2.4286, "step": 8141 }, { "epoch": 0.9336085311317509, "grad_norm": 0.24634496044355686, "learning_rate": 1.1515550415497122e-05, "loss": 2.3009, "step": 8142 }, { "epoch": 0.9337231968810916, "grad_norm": 0.25905183880565524, "learning_rate": 1.1475960122844742e-05, "loss": 2.3532, "step": 8143 }, { "epoch": 0.9338378626304323, "grad_norm": 0.262936580628309, "learning_rate": 1.1436437212771489e-05, "loss": 2.2969, "step": 8144 }, { "epoch": 0.9339525283797729, "grad_norm": 0.24296228702044412, "learning_rate": 1.1396981690728891e-05, "loss": 2.3006, "step": 8145 }, { "epoch": 0.9340671941291137, "grad_norm": 0.2564210334966024, "learning_rate": 1.1357593562159096e-05, "loss": 2.5314, "step": 8146 }, { "epoch": 0.9341818598784543, "grad_norm": 0.2522925381206959, "learning_rate": 1.1318272832494814e-05, "loss": 2.3658, "step": 8147 }, { "epoch": 0.934296525627795, "grad_norm": 0.25305273931603023, "learning_rate": 1.1279019507159705e-05, "loss": 2.4349, "step": 8148 }, { "epoch": 0.9344111913771357, "grad_norm": 0.23915144230310795, "learning_rate": 1.1239833591567994e-05, "loss": 2.3122, "step": 8149 }, { "epoch": 0.9345258571264763, "grad_norm": 0.24068217190610214, "learning_rate": 1.1200715091124637e-05, "loss": 2.4044, "step": 8150 }, { "epoch": 0.934640522875817, "grad_norm": 0.2559142024861348, "learning_rate": 1.1161664011225203e-05, "loss": 2.3234, "step": 8151 }, { "epoch": 0.9347551886251577, "grad_norm": 0.24937071839083125, "learning_rate": 1.1122680357256222e-05, "loss": 2.3436, "step": 8152 }, { "epoch": 0.9348698543744983, "grad_norm": 0.2494229983249669, "learning_rate": 1.1083764134594554e-05, "loss": 2.3909, "step": 8153 }, { "epoch": 0.934984520123839, "grad_norm": 0.24045518556775694, "learning_rate": 1.104491534860802e-05, "loss": 2.4098, "step": 8154 }, { "epoch": 0.9350991858731796, "grad_norm": 0.2612825271897257, "learning_rate": 1.1006134004655111e-05, "loss": 2.503, "step": 8155 }, { "epoch": 0.9352138516225204, "grad_norm": 0.26178766323469627, "learning_rate": 1.0967420108084991e-05, "loss": 2.2943, "step": 8156 }, { "epoch": 0.9353285173718611, "grad_norm": 0.25675477711198846, "learning_rate": 1.092877366423739e-05, "loss": 2.3567, "step": 8157 }, { "epoch": 0.9354431831212017, "grad_norm": 0.2691510228172048, "learning_rate": 1.0890194678442989e-05, "loss": 2.4989, "step": 8158 }, { "epoch": 0.9355578488705424, "grad_norm": 0.2533649609496583, "learning_rate": 1.0851683156022862e-05, "loss": 2.2879, "step": 8159 }, { "epoch": 0.935672514619883, "grad_norm": 0.2614503229547817, "learning_rate": 1.081323910228904e-05, "loss": 2.3818, "step": 8160 }, { "epoch": 0.9357871803692237, "grad_norm": 0.27044941928066546, "learning_rate": 1.0774862522544227e-05, "loss": 2.4398, "step": 8161 }, { "epoch": 0.9359018461185644, "grad_norm": 0.25535286117410844, "learning_rate": 1.0736553422081574e-05, "loss": 2.2717, "step": 8162 }, { "epoch": 0.936016511867905, "grad_norm": 0.26589397287993904, "learning_rate": 1.0698311806185135e-05, "loss": 2.3391, "step": 8163 }, { "epoch": 0.9361311776172457, "grad_norm": 0.26660613382708637, "learning_rate": 1.0660137680129688e-05, "loss": 2.4047, "step": 8164 }, { "epoch": 0.9362458433665864, "grad_norm": 0.24489497052574707, "learning_rate": 1.062203104918058e-05, "loss": 2.392, "step": 8165 }, { "epoch": 0.936360509115927, "grad_norm": 0.2646791392910024, "learning_rate": 1.0583991918593883e-05, "loss": 2.3431, "step": 8166 }, { "epoch": 0.9364751748652678, "grad_norm": 0.2740548601999037, "learning_rate": 1.0546020293616455e-05, "loss": 2.3384, "step": 8167 }, { "epoch": 0.9365898406146084, "grad_norm": 0.25543228522917905, "learning_rate": 1.0508116179485605e-05, "loss": 2.3592, "step": 8168 }, { "epoch": 0.9367045063639491, "grad_norm": 0.25087072636070334, "learning_rate": 1.0470279581429542e-05, "loss": 2.3475, "step": 8169 }, { "epoch": 0.9368191721132898, "grad_norm": 0.25474509236004017, "learning_rate": 1.0432510504667203e-05, "loss": 2.4221, "step": 8170 }, { "epoch": 0.9369338378626304, "grad_norm": 0.23843440945386765, "learning_rate": 1.039480895440803e-05, "loss": 2.1601, "step": 8171 }, { "epoch": 0.9370485036119711, "grad_norm": 0.26492667132581915, "learning_rate": 1.0357174935852199e-05, "loss": 2.3854, "step": 8172 }, { "epoch": 0.9371631693613117, "grad_norm": 0.24130163619148037, "learning_rate": 1.031960845419072e-05, "loss": 2.4521, "step": 8173 }, { "epoch": 0.9372778351106524, "grad_norm": 0.2520288278931533, "learning_rate": 1.0282109514604954e-05, "loss": 2.3969, "step": 8174 }, { "epoch": 0.9373925008599931, "grad_norm": 0.2886621048163839, "learning_rate": 1.024467812226737e-05, "loss": 2.3921, "step": 8175 }, { "epoch": 0.9375071666093338, "grad_norm": 0.2902248853621348, "learning_rate": 1.0207314282340952e-05, "loss": 2.451, "step": 8176 }, { "epoch": 0.9376218323586745, "grad_norm": 0.25050174508757433, "learning_rate": 1.0170017999979131e-05, "loss": 2.4845, "step": 8177 }, { "epoch": 0.9377364981080152, "grad_norm": 0.2586959952570016, "learning_rate": 1.0132789280326293e-05, "loss": 2.3119, "step": 8178 }, { "epoch": 0.9378511638573558, "grad_norm": 0.24259731500228857, "learning_rate": 1.0095628128517497e-05, "loss": 2.2834, "step": 8179 }, { "epoch": 0.9379658296066965, "grad_norm": 0.2624829536896241, "learning_rate": 1.0058534549678367e-05, "loss": 2.5068, "step": 8180 }, { "epoch": 0.9380804953560371, "grad_norm": 0.2456246342332185, "learning_rate": 1.0021508548925306e-05, "loss": 2.351, "step": 8181 }, { "epoch": 0.9381951611053778, "grad_norm": 0.2833106641353864, "learning_rate": 9.984550131365289e-06, "loss": 2.5897, "step": 8182 }, { "epoch": 0.9383098268547185, "grad_norm": 0.2763389271792517, "learning_rate": 9.947659302096012e-06, "loss": 2.3534, "step": 8183 }, { "epoch": 0.9384244926040591, "grad_norm": 0.25918347093172817, "learning_rate": 9.910836066205852e-06, "loss": 2.2401, "step": 8184 }, { "epoch": 0.9385391583533998, "grad_norm": 0.27506661410040334, "learning_rate": 9.874080428773968e-06, "loss": 2.4616, "step": 8185 }, { "epoch": 0.9386538241027406, "grad_norm": 0.2791890698718216, "learning_rate": 9.837392394870026e-06, "loss": 2.3201, "step": 8186 }, { "epoch": 0.9387684898520812, "grad_norm": 0.2563764371024099, "learning_rate": 9.800771969554423e-06, "loss": 2.402, "step": 8187 }, { "epoch": 0.9388831556014219, "grad_norm": 0.23461644597124304, "learning_rate": 9.76421915787834e-06, "loss": 2.38, "step": 8188 }, { "epoch": 0.9389978213507625, "grad_norm": 0.2783591103087557, "learning_rate": 9.727733964883355e-06, "loss": 2.359, "step": 8189 }, { "epoch": 0.9391124871001032, "grad_norm": 0.280376622582292, "learning_rate": 9.691316395602168e-06, "loss": 2.3404, "step": 8190 }, { "epoch": 0.9392271528494439, "grad_norm": 0.261009485782321, "learning_rate": 9.6549664550577e-06, "loss": 2.207, "step": 8191 }, { "epoch": 0.9393418185987845, "grad_norm": 0.2382090117687303, "learning_rate": 9.61868414826378e-06, "loss": 2.3753, "step": 8192 }, { "epoch": 0.9394564843481252, "grad_norm": 0.29608566487414617, "learning_rate": 9.582469480224842e-06, "loss": 2.4569, "step": 8193 }, { "epoch": 0.9395711500974658, "grad_norm": 0.23546322155848892, "learning_rate": 9.546322455936062e-06, "loss": 2.3255, "step": 8194 }, { "epoch": 0.9396858158468065, "grad_norm": 0.2682526249035245, "learning_rate": 9.510243080383174e-06, "loss": 2.4585, "step": 8195 }, { "epoch": 0.9398004815961473, "grad_norm": 0.258771175203155, "learning_rate": 9.474231358542751e-06, "loss": 2.5324, "step": 8196 }, { "epoch": 0.9399151473454879, "grad_norm": 0.26280991110474716, "learning_rate": 9.438287295381765e-06, "loss": 2.3957, "step": 8197 }, { "epoch": 0.9400298130948286, "grad_norm": 0.25583777145935155, "learning_rate": 9.402410895858082e-06, "loss": 2.2061, "step": 8198 }, { "epoch": 0.9401444788441693, "grad_norm": 0.2683364000943464, "learning_rate": 9.366602164920191e-06, "loss": 2.4681, "step": 8199 }, { "epoch": 0.9402591445935099, "grad_norm": 0.25470355285088, "learning_rate": 9.330861107507194e-06, "loss": 2.3237, "step": 8200 }, { "epoch": 0.9403738103428506, "grad_norm": 0.2611328858482173, "learning_rate": 9.295187728548926e-06, "loss": 2.3808, "step": 8201 }, { "epoch": 0.9404884760921912, "grad_norm": 0.23912676310221773, "learning_rate": 9.25958203296584e-06, "loss": 2.2712, "step": 8202 }, { "epoch": 0.9406031418415319, "grad_norm": 0.2495063308244627, "learning_rate": 9.224044025669009e-06, "loss": 2.3571, "step": 8203 }, { "epoch": 0.9407178075908726, "grad_norm": 0.2750014813890792, "learning_rate": 9.188573711560232e-06, "loss": 2.4177, "step": 8204 }, { "epoch": 0.9408324733402132, "grad_norm": 0.2758533583967891, "learning_rate": 9.153171095532097e-06, "loss": 2.4742, "step": 8205 }, { "epoch": 0.940947139089554, "grad_norm": 0.263950315915875, "learning_rate": 9.117836182467532e-06, "loss": 2.4068, "step": 8206 }, { "epoch": 0.9410618048388946, "grad_norm": 0.23929323323736013, "learning_rate": 9.082568977240469e-06, "loss": 2.4301, "step": 8207 }, { "epoch": 0.9411764705882353, "grad_norm": 0.26393297498459356, "learning_rate": 9.04736948471524e-06, "loss": 2.4352, "step": 8208 }, { "epoch": 0.941291136337576, "grad_norm": 0.23995622895639435, "learning_rate": 9.012237709746961e-06, "loss": 2.3966, "step": 8209 }, { "epoch": 0.9414058020869166, "grad_norm": 0.2907944245391007, "learning_rate": 8.977173657181426e-06, "loss": 2.3662, "step": 8210 }, { "epoch": 0.9415204678362573, "grad_norm": 0.25209705749344846, "learning_rate": 8.942177331855096e-06, "loss": 2.3436, "step": 8211 }, { "epoch": 0.941635133585598, "grad_norm": 0.261047032096895, "learning_rate": 8.907248738594942e-06, "loss": 2.3725, "step": 8212 }, { "epoch": 0.9417497993349386, "grad_norm": 0.2461029250719317, "learning_rate": 8.872387882218724e-06, "loss": 2.4841, "step": 8213 }, { "epoch": 0.9418644650842793, "grad_norm": 0.2571065825096209, "learning_rate": 8.837594767534873e-06, "loss": 2.2285, "step": 8214 }, { "epoch": 0.94197913083362, "grad_norm": 0.24301011444603032, "learning_rate": 8.802869399342384e-06, "loss": 2.4527, "step": 8215 }, { "epoch": 0.9420937965829607, "grad_norm": 0.2544954513278097, "learning_rate": 8.768211782431036e-06, "loss": 2.4334, "step": 8216 }, { "epoch": 0.9422084623323014, "grad_norm": 0.2631521290016174, "learning_rate": 8.73362192158117e-06, "loss": 2.443, "step": 8217 }, { "epoch": 0.942323128081642, "grad_norm": 0.24055154540659598, "learning_rate": 8.699099821563749e-06, "loss": 2.3142, "step": 8218 }, { "epoch": 0.9424377938309827, "grad_norm": 0.26221366755403697, "learning_rate": 8.66464548714041e-06, "loss": 2.4519, "step": 8219 }, { "epoch": 0.9425524595803234, "grad_norm": 0.24950036721508184, "learning_rate": 8.630258923063573e-06, "loss": 2.3231, "step": 8220 }, { "epoch": 0.942667125329664, "grad_norm": 0.2587646515070118, "learning_rate": 8.595940134076108e-06, "loss": 2.474, "step": 8221 }, { "epoch": 0.9427817910790047, "grad_norm": 0.2383356711770594, "learning_rate": 8.561689124911731e-06, "loss": 2.3602, "step": 8222 }, { "epoch": 0.9428964568283453, "grad_norm": 0.26418347944946585, "learning_rate": 8.527505900294719e-06, "loss": 2.3306, "step": 8223 }, { "epoch": 0.943011122577686, "grad_norm": 0.30481677695958276, "learning_rate": 8.493390464939854e-06, "loss": 2.3382, "step": 8224 }, { "epoch": 0.9431257883270268, "grad_norm": 0.2556177220339646, "learning_rate": 8.459342823552818e-06, "loss": 2.4669, "step": 8225 }, { "epoch": 0.9432404540763674, "grad_norm": 0.25263975231650243, "learning_rate": 8.425362980829909e-06, "loss": 2.3443, "step": 8226 }, { "epoch": 0.9433551198257081, "grad_norm": 0.23674415081125696, "learning_rate": 8.391450941457823e-06, "loss": 2.39, "step": 8227 }, { "epoch": 0.9434697855750487, "grad_norm": 0.275384626348196, "learning_rate": 8.357606710114207e-06, "loss": 2.4353, "step": 8228 }, { "epoch": 0.9435844513243894, "grad_norm": 0.2596109302555167, "learning_rate": 8.323830291467161e-06, "loss": 2.4022, "step": 8229 }, { "epoch": 0.9436991170737301, "grad_norm": 0.23552399591032394, "learning_rate": 8.290121690175567e-06, "loss": 2.2812, "step": 8230 }, { "epoch": 0.9438137828230707, "grad_norm": 0.2832792483730591, "learning_rate": 8.25648091088882e-06, "loss": 2.4406, "step": 8231 }, { "epoch": 0.9439284485724114, "grad_norm": 0.24407402094145375, "learning_rate": 8.222907958247094e-06, "loss": 2.4201, "step": 8232 }, { "epoch": 0.9440431143217521, "grad_norm": 0.2359417764000179, "learning_rate": 8.18940283688102e-06, "loss": 2.2518, "step": 8233 }, { "epoch": 0.9441577800710927, "grad_norm": 0.2953689762173765, "learning_rate": 8.15596555141207e-06, "loss": 2.4648, "step": 8234 }, { "epoch": 0.9442724458204335, "grad_norm": 0.23583814425399952, "learning_rate": 8.122596106452273e-06, "loss": 2.4145, "step": 8235 }, { "epoch": 0.9443871115697741, "grad_norm": 0.24081713067548055, "learning_rate": 8.089294506604338e-06, "loss": 2.4625, "step": 8236 }, { "epoch": 0.9445017773191148, "grad_norm": 0.25618574964819085, "learning_rate": 8.056060756461537e-06, "loss": 2.3716, "step": 8237 }, { "epoch": 0.9446164430684555, "grad_norm": 0.2931553241910916, "learning_rate": 8.022894860607866e-06, "loss": 2.3177, "step": 8238 }, { "epoch": 0.9447311088177961, "grad_norm": 0.25201195399629506, "learning_rate": 7.98979682361789e-06, "loss": 2.387, "step": 8239 }, { "epoch": 0.9448457745671368, "grad_norm": 0.23157594235541062, "learning_rate": 7.956766650056901e-06, "loss": 2.4126, "step": 8240 }, { "epoch": 0.9449604403164775, "grad_norm": 0.25052233889127434, "learning_rate": 7.923804344480756e-06, "loss": 2.3845, "step": 8241 }, { "epoch": 0.9450751060658181, "grad_norm": 0.2599024985461345, "learning_rate": 7.890909911435929e-06, "loss": 2.4538, "step": 8242 }, { "epoch": 0.9451897718151588, "grad_norm": 0.2461863638989664, "learning_rate": 7.858083355459677e-06, "loss": 2.3649, "step": 8243 }, { "epoch": 0.9453044375644994, "grad_norm": 0.25570765614632873, "learning_rate": 7.825324681079715e-06, "loss": 2.344, "step": 8244 }, { "epoch": 0.9454191033138402, "grad_norm": 0.27073657468203466, "learning_rate": 7.792633892814483e-06, "loss": 2.2348, "step": 8245 }, { "epoch": 0.9455337690631809, "grad_norm": 0.2875278758270435, "learning_rate": 7.760010995173151e-06, "loss": 2.2376, "step": 8246 }, { "epoch": 0.9456484348125215, "grad_norm": 0.27750889442392596, "learning_rate": 7.727455992655342e-06, "loss": 2.5322, "step": 8247 }, { "epoch": 0.9457631005618622, "grad_norm": 0.2559563778209828, "learning_rate": 7.694968889751408e-06, "loss": 2.3339, "step": 8248 }, { "epoch": 0.9458777663112028, "grad_norm": 0.24754544966331177, "learning_rate": 7.662549690942267e-06, "loss": 2.3161, "step": 8249 }, { "epoch": 0.9459924320605435, "grad_norm": 0.24799802622102513, "learning_rate": 7.630198400699672e-06, "loss": 2.3639, "step": 8250 }, { "epoch": 0.9461070978098842, "grad_norm": 0.26823317484198467, "learning_rate": 7.597915023485724e-06, "loss": 2.4358, "step": 8251 }, { "epoch": 0.9462217635592248, "grad_norm": 0.24518256672697789, "learning_rate": 7.565699563753414e-06, "loss": 2.4118, "step": 8252 }, { "epoch": 0.9463364293085655, "grad_norm": 0.2583556497728397, "learning_rate": 7.533552025946188e-06, "loss": 2.3429, "step": 8253 }, { "epoch": 0.9464510950579063, "grad_norm": 0.24802980403519487, "learning_rate": 7.5014724144981115e-06, "loss": 2.2754, "step": 8254 }, { "epoch": 0.9465657608072469, "grad_norm": 0.251840908988671, "learning_rate": 7.469460733834144e-06, "loss": 2.3421, "step": 8255 }, { "epoch": 0.9466804265565876, "grad_norm": 0.2718626846263573, "learning_rate": 7.4375169883695306e-06, "loss": 2.3137, "step": 8256 }, { "epoch": 0.9467950923059282, "grad_norm": 0.2517344320976422, "learning_rate": 7.405641182510359e-06, "loss": 2.339, "step": 8257 }, { "epoch": 0.9469097580552689, "grad_norm": 0.27945542632915055, "learning_rate": 7.373833320653334e-06, "loss": 2.4839, "step": 8258 }, { "epoch": 0.9470244238046096, "grad_norm": 0.24301312496700453, "learning_rate": 7.342093407185613e-06, "loss": 2.2752, "step": 8259 }, { "epoch": 0.9471390895539502, "grad_norm": 0.254627648646453, "learning_rate": 7.310421446485194e-06, "loss": 2.3177, "step": 8260 }, { "epoch": 0.9472537553032909, "grad_norm": 0.2663780075296378, "learning_rate": 7.2788174429206935e-06, "loss": 2.3615, "step": 8261 }, { "epoch": 0.9473684210526315, "grad_norm": 0.2669470205161946, "learning_rate": 7.24728140085118e-06, "loss": 2.3978, "step": 8262 }, { "epoch": 0.9474830868019722, "grad_norm": 0.25541325954841004, "learning_rate": 7.215813324626452e-06, "loss": 2.4179, "step": 8263 }, { "epoch": 0.947597752551313, "grad_norm": 0.26121280345126874, "learning_rate": 7.184413218586927e-06, "loss": 2.396, "step": 8264 }, { "epoch": 0.9477124183006536, "grad_norm": 0.2819296299012922, "learning_rate": 7.153081087063751e-06, "loss": 2.4594, "step": 8265 }, { "epoch": 0.9478270840499943, "grad_norm": 0.2311050058761652, "learning_rate": 7.121816934378467e-06, "loss": 2.3666, "step": 8266 }, { "epoch": 0.947941749799335, "grad_norm": 0.24868183497068921, "learning_rate": 7.090620764843458e-06, "loss": 2.3062, "step": 8267 }, { "epoch": 0.9480564155486756, "grad_norm": 0.2538853647633459, "learning_rate": 7.059492582761618e-06, "loss": 2.2446, "step": 8268 }, { "epoch": 0.9481710812980163, "grad_norm": 0.25978775168953633, "learning_rate": 7.028432392426398e-06, "loss": 2.4882, "step": 8269 }, { "epoch": 0.9482857470473569, "grad_norm": 0.22256521243699512, "learning_rate": 6.997440198122151e-06, "loss": 2.303, "step": 8270 }, { "epoch": 0.9484004127966976, "grad_norm": 0.24493201282792895, "learning_rate": 6.966516004123458e-06, "loss": 2.4145, "step": 8271 }, { "epoch": 0.9485150785460383, "grad_norm": 0.2867705130830074, "learning_rate": 6.935659814695849e-06, "loss": 2.3361, "step": 8272 }, { "epoch": 0.9486297442953789, "grad_norm": 0.24303993238198116, "learning_rate": 6.904871634095311e-06, "loss": 2.1991, "step": 8273 }, { "epoch": 0.9487444100447197, "grad_norm": 0.2561614797401005, "learning_rate": 6.8741514665683905e-06, "loss": 2.3874, "step": 8274 }, { "epoch": 0.9488590757940604, "grad_norm": 0.25889911175158453, "learning_rate": 6.84349931635253e-06, "loss": 2.3403, "step": 8275 }, { "epoch": 0.948973741543401, "grad_norm": 0.2696669586795356, "learning_rate": 6.812915187675517e-06, "loss": 2.4291, "step": 8276 }, { "epoch": 0.9490884072927417, "grad_norm": 0.2656433627560766, "learning_rate": 6.7823990847558635e-06, "loss": 2.3351, "step": 8277 }, { "epoch": 0.9492030730420823, "grad_norm": 0.2572834947221208, "learning_rate": 6.751951011802649e-06, "loss": 2.363, "step": 8278 }, { "epoch": 0.949317738791423, "grad_norm": 0.2517027906554384, "learning_rate": 6.7215709730156225e-06, "loss": 2.2274, "step": 8279 }, { "epoch": 0.9494324045407637, "grad_norm": 0.2744701809436497, "learning_rate": 6.691258972585157e-06, "loss": 2.4672, "step": 8280 }, { "epoch": 0.9495470702901043, "grad_norm": 0.2598967443229175, "learning_rate": 6.661015014692184e-06, "loss": 2.5424, "step": 8281 }, { "epoch": 0.949661736039445, "grad_norm": 0.2685728928687207, "learning_rate": 6.630839103508312e-06, "loss": 2.4848, "step": 8282 }, { "epoch": 0.9497764017887856, "grad_norm": 0.28309410998051354, "learning_rate": 6.600731243195712e-06, "loss": 2.4671, "step": 8283 }, { "epoch": 0.9498910675381264, "grad_norm": 0.2703105840260175, "learning_rate": 6.570691437907172e-06, "loss": 2.5553, "step": 8284 }, { "epoch": 0.9500057332874671, "grad_norm": 0.26168414854028377, "learning_rate": 6.540719691786101e-06, "loss": 2.4644, "step": 8285 }, { "epoch": 0.9501203990368077, "grad_norm": 0.2385760686453105, "learning_rate": 6.51081600896658e-06, "loss": 2.3778, "step": 8286 }, { "epoch": 0.9502350647861484, "grad_norm": 0.26687836018085953, "learning_rate": 6.480980393573255e-06, "loss": 2.4915, "step": 8287 }, { "epoch": 0.9503497305354891, "grad_norm": 0.2590077700080527, "learning_rate": 6.451212849721389e-06, "loss": 2.3588, "step": 8288 }, { "epoch": 0.9504643962848297, "grad_norm": 0.22761665414136276, "learning_rate": 6.421513381516752e-06, "loss": 2.5281, "step": 8289 }, { "epoch": 0.9505790620341704, "grad_norm": 0.27245624288842796, "learning_rate": 6.391881993055903e-06, "loss": 2.4664, "step": 8290 }, { "epoch": 0.950693727783511, "grad_norm": 0.3011637604420565, "learning_rate": 6.362318688425905e-06, "loss": 2.337, "step": 8291 }, { "epoch": 0.9508083935328517, "grad_norm": 0.25090579843869215, "learning_rate": 6.332823471704441e-06, "loss": 2.4416, "step": 8292 }, { "epoch": 0.9509230592821925, "grad_norm": 0.26981447542141124, "learning_rate": 6.303396346959867e-06, "loss": 2.4451, "step": 8293 }, { "epoch": 0.951037725031533, "grad_norm": 0.24675037790856236, "learning_rate": 6.2740373182509916e-06, "loss": 2.3511, "step": 8294 }, { "epoch": 0.9511523907808738, "grad_norm": 0.25520009926014153, "learning_rate": 6.244746389627409e-06, "loss": 2.3927, "step": 8295 }, { "epoch": 0.9512670565302144, "grad_norm": 0.26536958202966315, "learning_rate": 6.215523565129277e-06, "loss": 2.4324, "step": 8296 }, { "epoch": 0.9513817222795551, "grad_norm": 0.23027959919447816, "learning_rate": 6.186368848787261e-06, "loss": 2.3214, "step": 8297 }, { "epoch": 0.9514963880288958, "grad_norm": 0.25824101648853826, "learning_rate": 6.157282244622697e-06, "loss": 2.3741, "step": 8298 }, { "epoch": 0.9516110537782364, "grad_norm": 0.2574870582555831, "learning_rate": 6.1282637566476e-06, "loss": 2.3825, "step": 8299 }, { "epoch": 0.9517257195275771, "grad_norm": 0.22674492358316906, "learning_rate": 6.09931338886438e-06, "loss": 2.3879, "step": 8300 }, { "epoch": 0.9518403852769178, "grad_norm": 0.2442943026143882, "learning_rate": 6.070431145266342e-06, "loss": 2.4195, "step": 8301 }, { "epoch": 0.9519550510262584, "grad_norm": 0.2710190308868985, "learning_rate": 6.041617029837188e-06, "loss": 2.3508, "step": 8302 }, { "epoch": 0.9520697167755992, "grad_norm": 0.2487835860780046, "learning_rate": 6.012871046551238e-06, "loss": 2.3725, "step": 8303 }, { "epoch": 0.9521843825249398, "grad_norm": 0.24631300714824145, "learning_rate": 5.984193199373433e-06, "loss": 2.3199, "step": 8304 }, { "epoch": 0.9522990482742805, "grad_norm": 0.2828104915810898, "learning_rate": 5.955583492259442e-06, "loss": 2.3485, "step": 8305 }, { "epoch": 0.9524137140236212, "grad_norm": 0.27032574254147634, "learning_rate": 5.92704192915533e-06, "loss": 2.3933, "step": 8306 }, { "epoch": 0.9525283797729618, "grad_norm": 0.23853866638329677, "learning_rate": 5.898568513997893e-06, "loss": 2.4829, "step": 8307 }, { "epoch": 0.9526430455223025, "grad_norm": 0.2700118492653518, "learning_rate": 5.870163250714544e-06, "loss": 2.27, "step": 8308 }, { "epoch": 0.9527577112716432, "grad_norm": 0.2529457667048586, "learning_rate": 5.841826143223094e-06, "loss": 2.3639, "step": 8309 }, { "epoch": 0.9528723770209838, "grad_norm": 0.2534768616603229, "learning_rate": 5.8135571954323065e-06, "loss": 2.5443, "step": 8310 }, { "epoch": 0.9529870427703245, "grad_norm": 0.250687673797539, "learning_rate": 5.7853564112412274e-06, "loss": 2.4916, "step": 8311 }, { "epoch": 0.9531017085196651, "grad_norm": 0.2544087508919549, "learning_rate": 5.75722379453969e-06, "loss": 2.3374, "step": 8312 }, { "epoch": 0.9532163742690059, "grad_norm": 0.26331795354540927, "learning_rate": 5.7291593492079245e-06, "loss": 2.3509, "step": 8313 }, { "epoch": 0.9533310400183466, "grad_norm": 0.2535627933490605, "learning_rate": 5.701163079117e-06, "loss": 2.3188, "step": 8314 }, { "epoch": 0.9534457057676872, "grad_norm": 0.24786966133909533, "learning_rate": 5.6732349881284394e-06, "loss": 2.3342, "step": 8315 }, { "epoch": 0.9535603715170279, "grad_norm": 0.2543877198308559, "learning_rate": 5.6453750800944395e-06, "loss": 2.3627, "step": 8316 }, { "epoch": 0.9536750372663685, "grad_norm": 0.26908684213864814, "learning_rate": 5.617583358857647e-06, "loss": 2.3906, "step": 8317 }, { "epoch": 0.9537897030157092, "grad_norm": 0.2571294618781444, "learning_rate": 5.589859828251443e-06, "loss": 2.376, "step": 8318 }, { "epoch": 0.9539043687650499, "grad_norm": 0.2288080190888788, "learning_rate": 5.56220449209971e-06, "loss": 2.2848, "step": 8319 }, { "epoch": 0.9540190345143905, "grad_norm": 0.2675439354218467, "learning_rate": 5.5346173542171195e-06, "loss": 2.3461, "step": 8320 }, { "epoch": 0.9541337002637312, "grad_norm": 0.2545697958252429, "learning_rate": 5.507098418408629e-06, "loss": 2.2829, "step": 8321 }, { "epoch": 0.954248366013072, "grad_norm": 0.2640037128881111, "learning_rate": 5.479647688470035e-06, "loss": 2.4964, "step": 8322 }, { "epoch": 0.9543630317624126, "grad_norm": 0.24680418294124895, "learning_rate": 5.4522651681876976e-06, "loss": 2.4496, "step": 8323 }, { "epoch": 0.9544776975117533, "grad_norm": 0.27937992197459405, "learning_rate": 5.424950861338318e-06, "loss": 2.5054, "step": 8324 }, { "epoch": 0.9545923632610939, "grad_norm": 0.26686293410864853, "learning_rate": 5.397704771689549e-06, "loss": 2.3905, "step": 8325 }, { "epoch": 0.9547070290104346, "grad_norm": 0.27647543565482224, "learning_rate": 5.3705269029994955e-06, "loss": 2.3988, "step": 8326 }, { "epoch": 0.9548216947597753, "grad_norm": 0.24235521436265892, "learning_rate": 5.343417259016714e-06, "loss": 2.3787, "step": 8327 }, { "epoch": 0.9549363605091159, "grad_norm": 0.2579202116584383, "learning_rate": 5.316375843480492e-06, "loss": 2.4601, "step": 8328 }, { "epoch": 0.9550510262584566, "grad_norm": 0.24670098455650555, "learning_rate": 5.289402660120735e-06, "loss": 2.4395, "step": 8329 }, { "epoch": 0.9551656920077972, "grad_norm": 0.29804283541135873, "learning_rate": 5.262497712657799e-06, "loss": 2.4581, "step": 8330 }, { "epoch": 0.9552803577571379, "grad_norm": 0.24559413791377968, "learning_rate": 5.235661004802772e-06, "loss": 2.3473, "step": 8331 }, { "epoch": 0.9553950235064786, "grad_norm": 0.2676532781782227, "learning_rate": 5.208892540257249e-06, "loss": 2.571, "step": 8332 }, { "epoch": 0.9555096892558193, "grad_norm": 0.2723955925299646, "learning_rate": 5.1821923227134415e-06, "loss": 2.5049, "step": 8333 }, { "epoch": 0.95562435500516, "grad_norm": 0.2730711463800587, "learning_rate": 5.155560355854072e-06, "loss": 2.4023, "step": 8334 }, { "epoch": 0.9557390207545007, "grad_norm": 0.24420291135179673, "learning_rate": 5.12899664335259e-06, "loss": 2.4451, "step": 8335 }, { "epoch": 0.9558536865038413, "grad_norm": 0.2357257651263144, "learning_rate": 5.102501188872899e-06, "loss": 2.4001, "step": 8336 }, { "epoch": 0.955968352253182, "grad_norm": 0.2523126556735753, "learning_rate": 5.076073996069575e-06, "loss": 2.2557, "step": 8337 }, { "epoch": 0.9560830180025226, "grad_norm": 0.2791939052779133, "learning_rate": 5.049715068587757e-06, "loss": 2.3186, "step": 8338 }, { "epoch": 0.9561976837518633, "grad_norm": 0.27711418022798756, "learning_rate": 5.023424410063037e-06, "loss": 2.5132, "step": 8339 }, { "epoch": 0.956312349501204, "grad_norm": 0.23558857933079597, "learning_rate": 4.997202024121905e-06, "loss": 2.4132, "step": 8340 }, { "epoch": 0.9564270152505446, "grad_norm": 0.253260530868003, "learning_rate": 4.971047914381133e-06, "loss": 2.4074, "step": 8341 }, { "epoch": 0.9565416809998853, "grad_norm": 0.25437723525719685, "learning_rate": 4.944962084448168e-06, "loss": 2.4498, "step": 8342 }, { "epoch": 0.9566563467492261, "grad_norm": 0.2610423914681025, "learning_rate": 4.918944537921078e-06, "loss": 2.4428, "step": 8343 }, { "epoch": 0.9567710124985667, "grad_norm": 0.25845056010930106, "learning_rate": 4.89299527838849e-06, "loss": 2.4038, "step": 8344 }, { "epoch": 0.9568856782479074, "grad_norm": 0.24714501010664763, "learning_rate": 4.867114309429599e-06, "loss": 2.1805, "step": 8345 }, { "epoch": 0.957000343997248, "grad_norm": 0.2745990649825864, "learning_rate": 4.84130163461427e-06, "loss": 2.3015, "step": 8346 }, { "epoch": 0.9571150097465887, "grad_norm": 0.2632636359137389, "learning_rate": 4.815557257502712e-06, "loss": 2.5008, "step": 8347 }, { "epoch": 0.9572296754959294, "grad_norm": 0.2519043709936815, "learning_rate": 4.789881181645972e-06, "loss": 2.2929, "step": 8348 }, { "epoch": 0.95734434124527, "grad_norm": 0.29761126367304686, "learning_rate": 4.7642734105855515e-06, "loss": 2.4881, "step": 8349 }, { "epoch": 0.9574590069946107, "grad_norm": 0.24442059022814722, "learning_rate": 4.738733947853568e-06, "loss": 2.3661, "step": 8350 }, { "epoch": 0.9575736727439513, "grad_norm": 0.24876567862219537, "learning_rate": 4.713262796972706e-06, "loss": 2.3632, "step": 8351 }, { "epoch": 0.957688338493292, "grad_norm": 0.25481764587786854, "learning_rate": 4.687859961456265e-06, "loss": 2.2325, "step": 8352 }, { "epoch": 0.9578030042426328, "grad_norm": 0.2571578782698582, "learning_rate": 4.662525444807941e-06, "loss": 2.2839, "step": 8353 }, { "epoch": 0.9579176699919734, "grad_norm": 0.2650279118932301, "learning_rate": 4.637259250522274e-06, "loss": 2.3512, "step": 8354 }, { "epoch": 0.9580323357413141, "grad_norm": 0.25210339343781624, "learning_rate": 4.612061382084254e-06, "loss": 2.2983, "step": 8355 }, { "epoch": 0.9581470014906548, "grad_norm": 0.2611284550921159, "learning_rate": 4.586931842969378e-06, "loss": 2.4196, "step": 8356 }, { "epoch": 0.9582616672399954, "grad_norm": 0.26891327701704676, "learning_rate": 4.561870636643817e-06, "loss": 2.402, "step": 8357 }, { "epoch": 0.9583763329893361, "grad_norm": 0.2557116036715242, "learning_rate": 4.5368777665643625e-06, "loss": 2.3076, "step": 8358 }, { "epoch": 0.9584909987386767, "grad_norm": 0.2412888538341008, "learning_rate": 4.511953236178146e-06, "loss": 2.3084, "step": 8359 }, { "epoch": 0.9586056644880174, "grad_norm": 0.27363336282448064, "learning_rate": 4.487097048923139e-06, "loss": 2.5273, "step": 8360 }, { "epoch": 0.9587203302373581, "grad_norm": 0.2774842908544762, "learning_rate": 4.462309208227821e-06, "loss": 2.4187, "step": 8361 }, { "epoch": 0.9588349959866987, "grad_norm": 0.25774516848941514, "learning_rate": 4.43758971751107e-06, "loss": 2.3091, "step": 8362 }, { "epoch": 0.9589496617360395, "grad_norm": 0.2622898867122452, "learning_rate": 4.412938580182546e-06, "loss": 2.4931, "step": 8363 }, { "epoch": 0.9590643274853801, "grad_norm": 0.2562935073562918, "learning_rate": 4.388355799642418e-06, "loss": 2.4813, "step": 8364 }, { "epoch": 0.9591789932347208, "grad_norm": 0.24449369236245136, "learning_rate": 4.363841379281364e-06, "loss": 2.3105, "step": 8365 }, { "epoch": 0.9592936589840615, "grad_norm": 0.2571371389296851, "learning_rate": 4.339395322480733e-06, "loss": 2.4525, "step": 8366 }, { "epoch": 0.9594083247334021, "grad_norm": 0.2449777465007151, "learning_rate": 4.315017632612383e-06, "loss": 2.4308, "step": 8367 }, { "epoch": 0.9595229904827428, "grad_norm": 0.24055070955795596, "learning_rate": 4.2907083130387385e-06, "loss": 2.4023, "step": 8368 }, { "epoch": 0.9596376562320835, "grad_norm": 0.2501889870635449, "learning_rate": 4.266467367112725e-06, "loss": 2.3454, "step": 8369 }, { "epoch": 0.9597523219814241, "grad_norm": 0.26470128591142694, "learning_rate": 4.242294798178059e-06, "loss": 2.2379, "step": 8370 }, { "epoch": 0.9598669877307648, "grad_norm": 0.31153605708143683, "learning_rate": 4.21819060956885e-06, "loss": 2.4934, "step": 8371 }, { "epoch": 0.9599816534801054, "grad_norm": 0.2562742180643882, "learning_rate": 4.1941548046097176e-06, "loss": 2.3686, "step": 8372 }, { "epoch": 0.9600963192294462, "grad_norm": 0.28757836786342056, "learning_rate": 4.170187386616064e-06, "loss": 2.2426, "step": 8373 }, { "epoch": 0.9602109849787869, "grad_norm": 0.2636900410544457, "learning_rate": 4.146288358893635e-06, "loss": 2.5788, "step": 8374 }, { "epoch": 0.9603256507281275, "grad_norm": 0.27547721186056146, "learning_rate": 4.1224577247389036e-06, "loss": 2.3483, "step": 8375 }, { "epoch": 0.9604403164774682, "grad_norm": 0.23583431981462877, "learning_rate": 4.098695487438908e-06, "loss": 2.3554, "step": 8376 }, { "epoch": 0.9605549822268089, "grad_norm": 0.27678831304033363, "learning_rate": 4.075001650271082e-06, "loss": 2.5244, "step": 8377 }, { "epoch": 0.9606696479761495, "grad_norm": 0.2554363846591312, "learning_rate": 4.051376216503588e-06, "loss": 2.3167, "step": 8378 }, { "epoch": 0.9607843137254902, "grad_norm": 0.2580344106752261, "learning_rate": 4.027819189395099e-06, "loss": 2.4036, "step": 8379 }, { "epoch": 0.9608989794748308, "grad_norm": 0.2816616575444161, "learning_rate": 4.004330572194903e-06, "loss": 2.4606, "step": 8380 }, { "epoch": 0.9610136452241715, "grad_norm": 0.28968082659291905, "learning_rate": 3.9809103681426876e-06, "loss": 2.4585, "step": 8381 }, { "epoch": 0.9611283109735123, "grad_norm": 0.25259906443195596, "learning_rate": 3.9575585804689785e-06, "loss": 2.3501, "step": 8382 }, { "epoch": 0.9612429767228529, "grad_norm": 0.25662404265332256, "learning_rate": 3.9342752123946446e-06, "loss": 2.4186, "step": 8383 }, { "epoch": 0.9613576424721936, "grad_norm": 0.28381785279693855, "learning_rate": 3.911060267131117e-06, "loss": 2.5626, "step": 8384 }, { "epoch": 0.9614723082215342, "grad_norm": 0.2736075773189394, "learning_rate": 3.8879137478805006e-06, "loss": 2.3913, "step": 8385 }, { "epoch": 0.9615869739708749, "grad_norm": 0.2639082016217544, "learning_rate": 3.864835657835464e-06, "loss": 2.2596, "step": 8386 }, { "epoch": 0.9617016397202156, "grad_norm": 0.25312072830550825, "learning_rate": 3.841826000179127e-06, "loss": 2.3277, "step": 8387 }, { "epoch": 0.9618163054695562, "grad_norm": 0.2728875003965544, "learning_rate": 3.818884778085286e-06, "loss": 2.3821, "step": 8388 }, { "epoch": 0.9619309712188969, "grad_norm": 0.24836051430557018, "learning_rate": 3.7960119947181847e-06, "loss": 2.4183, "step": 8389 }, { "epoch": 0.9620456369682376, "grad_norm": 0.2481167959315104, "learning_rate": 3.7732076532327463e-06, "loss": 2.4998, "step": 8390 }, { "epoch": 0.9621603027175782, "grad_norm": 0.2621211159878882, "learning_rate": 3.7504717567743986e-06, "loss": 2.4555, "step": 8391 }, { "epoch": 0.962274968466919, "grad_norm": 0.2405782746911224, "learning_rate": 3.7278043084790214e-06, "loss": 2.3392, "step": 8392 }, { "epoch": 0.9623896342162596, "grad_norm": 0.2600099823094173, "learning_rate": 3.705205311473281e-06, "loss": 2.2495, "step": 8393 }, { "epoch": 0.9625042999656003, "grad_norm": 0.26291773328794926, "learning_rate": 3.682674768874239e-06, "loss": 2.2289, "step": 8394 }, { "epoch": 0.962618965714941, "grad_norm": 0.2884000301364202, "learning_rate": 3.660212683789521e-06, "loss": 2.4623, "step": 8395 }, { "epoch": 0.9627336314642816, "grad_norm": 0.2615649223766214, "learning_rate": 3.637819059317371e-06, "loss": 2.3936, "step": 8396 }, { "epoch": 0.9628482972136223, "grad_norm": 0.2772111623220838, "learning_rate": 3.61549389854654e-06, "loss": 2.3991, "step": 8397 }, { "epoch": 0.9629629629629629, "grad_norm": 0.24320704776268254, "learning_rate": 3.5932372045563986e-06, "loss": 2.416, "step": 8398 }, { "epoch": 0.9630776287123036, "grad_norm": 0.23006586809253868, "learning_rate": 3.571048980416769e-06, "loss": 2.3647, "step": 8399 }, { "epoch": 0.9631922944616443, "grad_norm": 0.27537648184512703, "learning_rate": 3.5489292291882023e-06, "loss": 2.3363, "step": 8400 }, { "epoch": 0.9633069602109849, "grad_norm": 0.26573239909756463, "learning_rate": 3.526877953921592e-06, "loss": 2.4941, "step": 8401 }, { "epoch": 0.9634216259603257, "grad_norm": 0.23504372292061523, "learning_rate": 3.5048951576585607e-06, "loss": 2.438, "step": 8402 }, { "epoch": 0.9635362917096664, "grad_norm": 0.23039198012059517, "learning_rate": 3.482980843431127e-06, "loss": 2.3353, "step": 8403 }, { "epoch": 0.963650957459007, "grad_norm": 0.2807335610428062, "learning_rate": 3.46113501426204e-06, "loss": 2.5978, "step": 8404 }, { "epoch": 0.9637656232083477, "grad_norm": 0.24931737692248507, "learning_rate": 3.4393576731645003e-06, "loss": 2.4239, "step": 8405 }, { "epoch": 0.9638802889576883, "grad_norm": 0.2638901800050044, "learning_rate": 3.4176488231422163e-06, "loss": 2.4502, "step": 8406 }, { "epoch": 0.963994954707029, "grad_norm": 0.24767178698275336, "learning_rate": 3.396008467189626e-06, "loss": 2.2911, "step": 8407 }, { "epoch": 0.9641096204563697, "grad_norm": 0.23863421504479457, "learning_rate": 3.374436608291509e-06, "loss": 2.1898, "step": 8408 }, { "epoch": 0.9642242862057103, "grad_norm": 0.27956160235255784, "learning_rate": 3.3529332494232623e-06, "loss": 2.5438, "step": 8409 }, { "epoch": 0.964338951955051, "grad_norm": 0.316384202323507, "learning_rate": 3.331498393550958e-06, "loss": 2.3726, "step": 8410 }, { "epoch": 0.9644536177043918, "grad_norm": 0.24870588118664255, "learning_rate": 3.31013204363112e-06, "loss": 2.329, "step": 8411 }, { "epoch": 0.9645682834537324, "grad_norm": 0.29291804670613175, "learning_rate": 3.2888342026107797e-06, "loss": 2.4762, "step": 8412 }, { "epoch": 0.9646829492030731, "grad_norm": 0.25438408331469714, "learning_rate": 3.267604873427532e-06, "loss": 2.3931, "step": 8413 }, { "epoch": 0.9647976149524137, "grad_norm": 0.25729325979079226, "learning_rate": 3.2464440590097013e-06, "loss": 2.4832, "step": 8414 }, { "epoch": 0.9649122807017544, "grad_norm": 0.24890578951332115, "learning_rate": 3.225351762275841e-06, "loss": 2.3521, "step": 8415 }, { "epoch": 0.9650269464510951, "grad_norm": 0.2654343198648445, "learning_rate": 3.204327986135347e-06, "loss": 2.291, "step": 8416 }, { "epoch": 0.9651416122004357, "grad_norm": 0.26795019871067954, "learning_rate": 3.183372733488066e-06, "loss": 2.4191, "step": 8417 }, { "epoch": 0.9652562779497764, "grad_norm": 0.2428742026047186, "learning_rate": 3.162486007224297e-06, "loss": 2.3942, "step": 8418 }, { "epoch": 0.965370943699117, "grad_norm": 0.2929667099073874, "learning_rate": 3.1416678102249574e-06, "loss": 2.3929, "step": 8419 }, { "epoch": 0.9654856094484577, "grad_norm": 0.24590684530648857, "learning_rate": 3.1209181453616396e-06, "loss": 2.3613, "step": 8420 }, { "epoch": 0.9656002751977985, "grad_norm": 0.2833691750923599, "learning_rate": 3.1002370154962212e-06, "loss": 2.5156, "step": 8421 }, { "epoch": 0.9657149409471391, "grad_norm": 0.2587312929452015, "learning_rate": 3.079624423481364e-06, "loss": 2.3396, "step": 8422 }, { "epoch": 0.9658296066964798, "grad_norm": 0.2704084886452787, "learning_rate": 3.0590803721601833e-06, "loss": 2.2815, "step": 8423 }, { "epoch": 0.9659442724458205, "grad_norm": 0.2905788352811762, "learning_rate": 3.0386048643662455e-06, "loss": 2.4134, "step": 8424 }, { "epoch": 0.9660589381951611, "grad_norm": 0.24361707702662044, "learning_rate": 3.018197902923847e-06, "loss": 2.2445, "step": 8425 }, { "epoch": 0.9661736039445018, "grad_norm": 0.2550232728292785, "learning_rate": 2.997859490647736e-06, "loss": 2.4258, "step": 8426 }, { "epoch": 0.9662882696938424, "grad_norm": 0.24151542870021409, "learning_rate": 2.9775896303431694e-06, "loss": 2.2899, "step": 8427 }, { "epoch": 0.9664029354431831, "grad_norm": 0.2727833915889519, "learning_rate": 2.9573883248060207e-06, "loss": 2.3635, "step": 8428 }, { "epoch": 0.9665176011925238, "grad_norm": 0.24532817830146272, "learning_rate": 2.9372555768226172e-06, "loss": 2.2529, "step": 8429 }, { "epoch": 0.9666322669418644, "grad_norm": 0.28083526271437764, "learning_rate": 2.91719138916996e-06, "loss": 2.4243, "step": 8430 }, { "epoch": 0.9667469326912052, "grad_norm": 0.25164075668168007, "learning_rate": 2.897195764615501e-06, "loss": 2.5141, "step": 8431 }, { "epoch": 0.9668615984405458, "grad_norm": 0.2513564476712266, "learning_rate": 2.8772687059172577e-06, "loss": 2.4376, "step": 8432 }, { "epoch": 0.9669762641898865, "grad_norm": 0.2842053189882223, "learning_rate": 2.8574102158237525e-06, "loss": 2.4194, "step": 8433 }, { "epoch": 0.9670909299392272, "grad_norm": 0.2611075009076694, "learning_rate": 2.8376202970740726e-06, "loss": 2.2427, "step": 8434 }, { "epoch": 0.9672055956885678, "grad_norm": 0.28626553183420866, "learning_rate": 2.8178989523979235e-06, "loss": 2.437, "step": 8435 }, { "epoch": 0.9673202614379085, "grad_norm": 0.2623496442953994, "learning_rate": 2.7982461845154627e-06, "loss": 2.4218, "step": 8436 }, { "epoch": 0.9674349271872492, "grad_norm": 0.24122108299802497, "learning_rate": 2.7786619961374103e-06, "loss": 2.3628, "step": 8437 }, { "epoch": 0.9675495929365898, "grad_norm": 0.2401193007514644, "learning_rate": 2.7591463899650505e-06, "loss": 2.3108, "step": 8438 }, { "epoch": 0.9676642586859305, "grad_norm": 0.2859965051341742, "learning_rate": 2.7396993686901184e-06, "loss": 2.4414, "step": 8439 }, { "epoch": 0.9677789244352711, "grad_norm": 0.251153094870571, "learning_rate": 2.72032093499508e-06, "loss": 2.3124, "step": 8440 }, { "epoch": 0.9678935901846119, "grad_norm": 0.2726179968976424, "learning_rate": 2.701011091552741e-06, "loss": 2.354, "step": 8441 }, { "epoch": 0.9680082559339526, "grad_norm": 0.24020894522782626, "learning_rate": 2.6817698410265266e-06, "loss": 2.3449, "step": 8442 }, { "epoch": 0.9681229216832932, "grad_norm": 0.2628606635548168, "learning_rate": 2.6625971860704256e-06, "loss": 2.3972, "step": 8443 }, { "epoch": 0.9682375874326339, "grad_norm": 0.24459212683548268, "learning_rate": 2.643493129328933e-06, "loss": 2.4123, "step": 8444 }, { "epoch": 0.9683522531819746, "grad_norm": 0.25799431051324895, "learning_rate": 2.624457673437053e-06, "loss": 2.3618, "step": 8445 }, { "epoch": 0.9684669189313152, "grad_norm": 0.29717022944311133, "learning_rate": 2.605490821020462e-06, "loss": 2.4505, "step": 8446 }, { "epoch": 0.9685815846806559, "grad_norm": 0.25993638806101477, "learning_rate": 2.5865925746951234e-06, "loss": 2.3699, "step": 8447 }, { "epoch": 0.9686962504299965, "grad_norm": 0.2834277057767693, "learning_rate": 2.5677629370678413e-06, "loss": 2.2227, "step": 8448 }, { "epoch": 0.9688109161793372, "grad_norm": 0.2437292527163565, "learning_rate": 2.5490019107357045e-06, "loss": 2.2666, "step": 8449 }, { "epoch": 0.968925581928678, "grad_norm": 0.28649100931769983, "learning_rate": 2.530309498286476e-06, "loss": 2.4059, "step": 8450 }, { "epoch": 0.9690402476780186, "grad_norm": 0.2982880823695621, "learning_rate": 2.511685702298483e-06, "loss": 2.3061, "step": 8451 }, { "epoch": 0.9691549134273593, "grad_norm": 0.25532804029621126, "learning_rate": 2.4931305253403925e-06, "loss": 2.248, "step": 8452 }, { "epoch": 0.9692695791766999, "grad_norm": 0.23928389861445995, "learning_rate": 2.4746439699716016e-06, "loss": 2.2957, "step": 8453 }, { "epoch": 0.9693842449260406, "grad_norm": 0.24814515847985688, "learning_rate": 2.4562260387420154e-06, "loss": 2.2715, "step": 8454 }, { "epoch": 0.9694989106753813, "grad_norm": 0.2808779510379786, "learning_rate": 2.4378767341919907e-06, "loss": 2.3207, "step": 8455 }, { "epoch": 0.9696135764247219, "grad_norm": 0.259420852389618, "learning_rate": 2.4195960588524467e-06, "loss": 2.3086, "step": 8456 }, { "epoch": 0.9697282421740626, "grad_norm": 0.25913114440745566, "learning_rate": 2.401384015244867e-06, "loss": 2.3251, "step": 8457 }, { "epoch": 0.9698429079234033, "grad_norm": 0.27636273488207475, "learning_rate": 2.383240605881354e-06, "loss": 2.3595, "step": 8458 }, { "epoch": 0.9699575736727439, "grad_norm": 0.25222454998088556, "learning_rate": 2.365165833264293e-06, "loss": 2.3091, "step": 8459 }, { "epoch": 0.9700722394220846, "grad_norm": 0.26521689905308565, "learning_rate": 2.3471596998868026e-06, "loss": 2.356, "step": 8460 }, { "epoch": 0.9701869051714253, "grad_norm": 0.26201129724181055, "learning_rate": 2.329222208232562e-06, "loss": 2.2731, "step": 8461 }, { "epoch": 0.970301570920766, "grad_norm": 0.24982548082938946, "learning_rate": 2.3113533607756478e-06, "loss": 2.4669, "step": 8462 }, { "epoch": 0.9704162366701067, "grad_norm": 0.2463293940012505, "learning_rate": 2.293553159980699e-06, "loss": 2.2925, "step": 8463 }, { "epoch": 0.9705309024194473, "grad_norm": 0.23591816998375842, "learning_rate": 2.2758216083029192e-06, "loss": 2.3594, "step": 8464 }, { "epoch": 0.970645568168788, "grad_norm": 0.2557743380233621, "learning_rate": 2.2581587081881294e-06, "loss": 2.367, "step": 8465 }, { "epoch": 0.9707602339181286, "grad_norm": 0.2646908234834943, "learning_rate": 2.240564462072492e-06, "loss": 2.2461, "step": 8466 }, { "epoch": 0.9708748996674693, "grad_norm": 0.26900631526322105, "learning_rate": 2.2230388723828433e-06, "loss": 2.4389, "step": 8467 }, { "epoch": 0.97098956541681, "grad_norm": 0.2801739466040622, "learning_rate": 2.205581941536472e-06, "loss": 2.2958, "step": 8468 }, { "epoch": 0.9711042311661506, "grad_norm": 0.2605246338911792, "learning_rate": 2.1881936719411744e-06, "loss": 2.3529, "step": 8469 }, { "epoch": 0.9712188969154913, "grad_norm": 0.25239985832291245, "learning_rate": 2.170874065995532e-06, "loss": 2.3785, "step": 8470 }, { "epoch": 0.9713335626648321, "grad_norm": 0.2696513861947799, "learning_rate": 2.1536231260882446e-06, "loss": 2.3186, "step": 8471 }, { "epoch": 0.9714482284141727, "grad_norm": 0.29162139459134045, "learning_rate": 2.1364408545988533e-06, "loss": 2.3864, "step": 8472 }, { "epoch": 0.9715628941635134, "grad_norm": 0.2663284470093609, "learning_rate": 2.1193272538973516e-06, "loss": 2.5046, "step": 8473 }, { "epoch": 0.971677559912854, "grad_norm": 0.27015464372200615, "learning_rate": 2.1022823263441294e-06, "loss": 2.4469, "step": 8474 }, { "epoch": 0.9717922256621947, "grad_norm": 0.2556707523662889, "learning_rate": 2.08530607429025e-06, "loss": 2.1617, "step": 8475 }, { "epoch": 0.9719068914115354, "grad_norm": 0.26491338644190016, "learning_rate": 2.0683985000773396e-06, "loss": 2.3224, "step": 8476 }, { "epoch": 0.972021557160876, "grad_norm": 0.24145245870429918, "learning_rate": 2.0515596060373675e-06, "loss": 2.3224, "step": 8477 }, { "epoch": 0.9721362229102167, "grad_norm": 0.23731236863928826, "learning_rate": 2.0347893944930306e-06, "loss": 2.5342, "step": 8478 }, { "epoch": 0.9722508886595574, "grad_norm": 0.31113528877749735, "learning_rate": 2.0180878677573677e-06, "loss": 2.3587, "step": 8479 }, { "epoch": 0.972365554408898, "grad_norm": 0.25339015801507186, "learning_rate": 2.0014550281340914e-06, "loss": 2.2876, "step": 8480 }, { "epoch": 0.9724802201582388, "grad_norm": 0.2419201820307682, "learning_rate": 1.9848908779174223e-06, "loss": 2.3492, "step": 8481 }, { "epoch": 0.9725948859075794, "grad_norm": 0.2434474478898852, "learning_rate": 1.9683954193920328e-06, "loss": 2.3274, "step": 8482 }, { "epoch": 0.9727095516569201, "grad_norm": 0.2704571497396297, "learning_rate": 1.9519686548331028e-06, "loss": 2.3012, "step": 8483 }, { "epoch": 0.9728242174062608, "grad_norm": 0.2305453641312586, "learning_rate": 1.935610586506431e-06, "loss": 2.3197, "step": 8484 }, { "epoch": 0.9729388831556014, "grad_norm": 0.2578872919862767, "learning_rate": 1.9193212166683237e-06, "loss": 2.3573, "step": 8485 }, { "epoch": 0.9730535489049421, "grad_norm": 0.2675929036086628, "learning_rate": 1.9031005475655948e-06, "loss": 2.2994, "step": 8486 }, { "epoch": 0.9731682146542827, "grad_norm": 0.27769510078029847, "learning_rate": 1.8869485814355103e-06, "loss": 2.2844, "step": 8487 }, { "epoch": 0.9732828804036234, "grad_norm": 0.23492978133841613, "learning_rate": 1.87086532050601e-06, "loss": 2.3786, "step": 8488 }, { "epoch": 0.9733975461529641, "grad_norm": 0.32549103613331437, "learning_rate": 1.8548507669954307e-06, "loss": 2.4331, "step": 8489 }, { "epoch": 0.9735122119023047, "grad_norm": 0.2457448063532635, "learning_rate": 1.838904923112672e-06, "loss": 2.3706, "step": 8490 }, { "epoch": 0.9736268776516455, "grad_norm": 0.23738500721549768, "learning_rate": 1.8230277910571413e-06, "loss": 2.4697, "step": 8491 }, { "epoch": 0.9737415434009862, "grad_norm": 0.24351712546075663, "learning_rate": 1.8072193730188091e-06, "loss": 2.3576, "step": 8492 }, { "epoch": 0.9738562091503268, "grad_norm": 0.25803284402512366, "learning_rate": 1.791479671178209e-06, "loss": 2.4386, "step": 8493 }, { "epoch": 0.9739708748996675, "grad_norm": 0.2560372398616809, "learning_rate": 1.7758086877062153e-06, "loss": 2.3201, "step": 8494 }, { "epoch": 0.9740855406490081, "grad_norm": 0.25040048963431133, "learning_rate": 1.7602064247643768e-06, "loss": 2.4845, "step": 8495 }, { "epoch": 0.9742002063983488, "grad_norm": 0.24204034497013002, "learning_rate": 1.7446728845048055e-06, "loss": 2.2566, "step": 8496 }, { "epoch": 0.9743148721476895, "grad_norm": 0.27725711187821334, "learning_rate": 1.7292080690699542e-06, "loss": 2.2911, "step": 8497 }, { "epoch": 0.9744295378970301, "grad_norm": 0.2605717025827012, "learning_rate": 1.7138119805929498e-06, "loss": 2.3371, "step": 8498 }, { "epoch": 0.9745442036463708, "grad_norm": 0.2584119966825838, "learning_rate": 1.698484621197427e-06, "loss": 2.4076, "step": 8499 }, { "epoch": 0.9746588693957114, "grad_norm": 0.25005681982530353, "learning_rate": 1.683225992997417e-06, "loss": 2.302, "step": 8500 }, { "epoch": 0.9747735351450522, "grad_norm": 0.2615039172756164, "learning_rate": 1.668036098097625e-06, "loss": 2.3953, "step": 8501 }, { "epoch": 0.9748882008943929, "grad_norm": 0.2550620184315978, "learning_rate": 1.652914938593153e-06, "loss": 2.3409, "step": 8502 }, { "epoch": 0.9750028666437335, "grad_norm": 0.24508727119384985, "learning_rate": 1.6378625165697213e-06, "loss": 2.3968, "step": 8503 }, { "epoch": 0.9751175323930742, "grad_norm": 0.27767149087417653, "learning_rate": 1.6228788341035028e-06, "loss": 2.4108, "step": 8504 }, { "epoch": 0.9752321981424149, "grad_norm": 0.2786241869901198, "learning_rate": 1.607963893261233e-06, "loss": 2.4078, "step": 8505 }, { "epoch": 0.9753468638917555, "grad_norm": 0.24591812819938894, "learning_rate": 1.5931176961000993e-06, "loss": 2.3426, "step": 8506 }, { "epoch": 0.9754615296410962, "grad_norm": 0.25936059468835, "learning_rate": 1.5783402446679084e-06, "loss": 2.4557, "step": 8507 }, { "epoch": 0.9755761953904368, "grad_norm": 0.26959900676022736, "learning_rate": 1.563631541002919e-06, "loss": 2.3734, "step": 8508 }, { "epoch": 0.9756908611397775, "grad_norm": 0.23271576336009359, "learning_rate": 1.5489915871338411e-06, "loss": 2.3424, "step": 8509 }, { "epoch": 0.9758055268891183, "grad_norm": 0.3047475012515261, "learning_rate": 1.5344203850800597e-06, "loss": 2.489, "step": 8510 }, { "epoch": 0.9759201926384589, "grad_norm": 0.27608972436248186, "learning_rate": 1.519917936851356e-06, "loss": 2.4136, "step": 8511 }, { "epoch": 0.9760348583877996, "grad_norm": 0.2775226381175172, "learning_rate": 1.5054842444480743e-06, "loss": 2.3587, "step": 8512 }, { "epoch": 0.9761495241371403, "grad_norm": 0.25709021435459783, "learning_rate": 1.491119309861122e-06, "loss": 2.4863, "step": 8513 }, { "epoch": 0.9762641898864809, "grad_norm": 0.27395764061458866, "learning_rate": 1.4768231350717477e-06, "loss": 2.2367, "step": 8514 }, { "epoch": 0.9763788556358216, "grad_norm": 0.2608829989376526, "learning_rate": 1.4625957220519292e-06, "loss": 2.351, "step": 8515 }, { "epoch": 0.9764935213851622, "grad_norm": 0.2611176925670891, "learning_rate": 1.4484370727640417e-06, "loss": 2.437, "step": 8516 }, { "epoch": 0.9766081871345029, "grad_norm": 0.255556131065512, "learning_rate": 1.4343471891610783e-06, "loss": 2.4218, "step": 8517 }, { "epoch": 0.9767228528838436, "grad_norm": 0.26108419748104433, "learning_rate": 1.4203260731863176e-06, "loss": 2.3108, "step": 8518 }, { "epoch": 0.9768375186331842, "grad_norm": 0.2550087628600148, "learning_rate": 1.4063737267737686e-06, "loss": 2.2847, "step": 8519 }, { "epoch": 0.976952184382525, "grad_norm": 0.2733763933884475, "learning_rate": 1.392490151847947e-06, "loss": 2.232, "step": 8520 }, { "epoch": 0.9770668501318656, "grad_norm": 0.26434913676493516, "learning_rate": 1.3786753503237658e-06, "loss": 2.3077, "step": 8521 }, { "epoch": 0.9771815158812063, "grad_norm": 0.25819237027229835, "learning_rate": 1.3649293241067007e-06, "loss": 2.4131, "step": 8522 }, { "epoch": 0.977296181630547, "grad_norm": 0.27401061392284126, "learning_rate": 1.3512520750928458e-06, "loss": 2.3191, "step": 8523 }, { "epoch": 0.9774108473798876, "grad_norm": 0.24198730650660902, "learning_rate": 1.3376436051686369e-06, "loss": 2.2846, "step": 8524 }, { "epoch": 0.9775255131292283, "grad_norm": 0.2585859894798289, "learning_rate": 1.324103916211128e-06, "loss": 2.3691, "step": 8525 }, { "epoch": 0.977640178878569, "grad_norm": 0.2638394998218401, "learning_rate": 1.310633010087825e-06, "loss": 2.4373, "step": 8526 }, { "epoch": 0.9777548446279096, "grad_norm": 0.2552706076084606, "learning_rate": 1.297230888656853e-06, "loss": 2.4431, "step": 8527 }, { "epoch": 0.9778695103772503, "grad_norm": 0.2752598378347172, "learning_rate": 1.283897553766733e-06, "loss": 2.4102, "step": 8528 }, { "epoch": 0.9779841761265909, "grad_norm": 0.25793116206924965, "learning_rate": 1.2706330072564942e-06, "loss": 2.3146, "step": 8529 }, { "epoch": 0.9780988418759317, "grad_norm": 0.2784917858625757, "learning_rate": 1.2574372509558397e-06, "loss": 2.3299, "step": 8530 }, { "epoch": 0.9782135076252724, "grad_norm": 0.2774151749871697, "learning_rate": 1.2443102866848134e-06, "loss": 2.4968, "step": 8531 }, { "epoch": 0.978328173374613, "grad_norm": 0.28230478265078923, "learning_rate": 1.231252116254078e-06, "loss": 2.3479, "step": 8532 }, { "epoch": 0.9784428391239537, "grad_norm": 0.25304453276351124, "learning_rate": 1.2182627414646375e-06, "loss": 2.4481, "step": 8533 }, { "epoch": 0.9785575048732943, "grad_norm": 0.2767444064552373, "learning_rate": 1.2053421641082806e-06, "loss": 2.2822, "step": 8534 }, { "epoch": 0.978672170622635, "grad_norm": 0.2565010664441952, "learning_rate": 1.1924903859670266e-06, "loss": 2.4065, "step": 8535 }, { "epoch": 0.9787868363719757, "grad_norm": 0.23713810154018222, "learning_rate": 1.1797074088135685e-06, "loss": 2.4407, "step": 8536 }, { "epoch": 0.9789015021213163, "grad_norm": 0.2584627467071504, "learning_rate": 1.1669932344111623e-06, "loss": 2.4213, "step": 8537 }, { "epoch": 0.979016167870657, "grad_norm": 0.2637919677709154, "learning_rate": 1.1543478645134054e-06, "loss": 2.5363, "step": 8538 }, { "epoch": 0.9791308336199978, "grad_norm": 0.2865891791062618, "learning_rate": 1.1417713008644582e-06, "loss": 2.4531, "step": 8539 }, { "epoch": 0.9792454993693384, "grad_norm": 0.2609159710137132, "learning_rate": 1.129263545199044e-06, "loss": 2.3607, "step": 8540 }, { "epoch": 0.9793601651186791, "grad_norm": 0.28009965269635984, "learning_rate": 1.1168245992424497e-06, "loss": 2.393, "step": 8541 }, { "epoch": 0.9794748308680197, "grad_norm": 0.2370978025934183, "learning_rate": 1.1044544647102473e-06, "loss": 2.3487, "step": 8542 }, { "epoch": 0.9795894966173604, "grad_norm": 0.23859973817093474, "learning_rate": 1.092153143308794e-06, "loss": 2.3618, "step": 8543 }, { "epoch": 0.9797041623667011, "grad_norm": 0.2641864262541659, "learning_rate": 1.0799206367347326e-06, "loss": 2.5617, "step": 8544 }, { "epoch": 0.9798188281160417, "grad_norm": 0.26185422861275354, "learning_rate": 1.06775694667538e-06, "loss": 2.4492, "step": 8545 }, { "epoch": 0.9799334938653824, "grad_norm": 0.26620400934088717, "learning_rate": 1.0556620748083945e-06, "loss": 2.4071, "step": 8546 }, { "epoch": 0.9800481596147231, "grad_norm": 0.23265135526070768, "learning_rate": 1.043636022802108e-06, "loss": 2.3724, "step": 8547 }, { "epoch": 0.9801628253640637, "grad_norm": 0.27895118415445375, "learning_rate": 1.0316787923152493e-06, "loss": 2.4107, "step": 8548 }, { "epoch": 0.9802774911134045, "grad_norm": 0.27337607152649684, "learning_rate": 1.0197903849971103e-06, "loss": 2.2803, "step": 8549 }, { "epoch": 0.9803921568627451, "grad_norm": 0.27569940593712866, "learning_rate": 1.0079708024874345e-06, "loss": 2.3929, "step": 8550 }, { "epoch": 0.9805068226120858, "grad_norm": 0.24101819384287163, "learning_rate": 9.96220046416585e-07, "loss": 2.281, "step": 8551 }, { "epoch": 0.9806214883614265, "grad_norm": 0.27169162898514193, "learning_rate": 9.845381184052649e-07, "loss": 2.4003, "step": 8552 }, { "epoch": 0.9807361541107671, "grad_norm": 0.2969175020424089, "learning_rate": 9.729250200647965e-07, "loss": 2.4271, "step": 8553 }, { "epoch": 0.9808508198601078, "grad_norm": 0.2341802626887597, "learning_rate": 9.613807529970097e-07, "loss": 2.3637, "step": 8554 }, { "epoch": 0.9809654856094484, "grad_norm": 0.25508104807854093, "learning_rate": 9.49905318794242e-07, "loss": 2.3479, "step": 8555 }, { "epoch": 0.9810801513587891, "grad_norm": 0.2554437380150877, "learning_rate": 9.38498719039227e-07, "loss": 2.4653, "step": 8556 }, { "epoch": 0.9811948171081298, "grad_norm": 0.26662389628647565, "learning_rate": 9.271609553053728e-07, "loss": 2.5141, "step": 8557 }, { "epoch": 0.9813094828574704, "grad_norm": 0.24288649240674007, "learning_rate": 9.158920291564842e-07, "loss": 2.3692, "step": 8558 }, { "epoch": 0.9814241486068112, "grad_norm": 0.26412325947355103, "learning_rate": 9.04691942146818e-07, "loss": 2.2659, "step": 8559 }, { "epoch": 0.9815388143561519, "grad_norm": 0.28103900686852595, "learning_rate": 8.935606958213049e-07, "loss": 2.3534, "step": 8560 }, { "epoch": 0.9816534801054925, "grad_norm": 0.25661453042230875, "learning_rate": 8.824982917152724e-07, "loss": 2.5129, "step": 8561 }, { "epoch": 0.9817681458548332, "grad_norm": 0.26762345224717815, "learning_rate": 8.715047313545554e-07, "loss": 2.3802, "step": 8562 }, { "epoch": 0.9818828116041738, "grad_norm": 0.256755232078889, "learning_rate": 8.605800162554967e-07, "loss": 2.2949, "step": 8563 }, { "epoch": 0.9819974773535145, "grad_norm": 0.26337890713236367, "learning_rate": 8.497241479249462e-07, "loss": 2.5167, "step": 8564 }, { "epoch": 0.9821121431028552, "grad_norm": 0.23160137054670188, "learning_rate": 8.389371278603175e-07, "loss": 2.2524, "step": 8565 }, { "epoch": 0.9822268088521958, "grad_norm": 0.26657365958268897, "learning_rate": 8.282189575494203e-07, "loss": 2.4371, "step": 8566 }, { "epoch": 0.9823414746015365, "grad_norm": 0.2952456788827243, "learning_rate": 8.175696384706277e-07, "loss": 2.4809, "step": 8567 }, { "epoch": 0.9824561403508771, "grad_norm": 0.26076524901979437, "learning_rate": 8.069891720928202e-07, "loss": 2.328, "step": 8568 }, { "epoch": 0.9825708061002179, "grad_norm": 0.24788892534550078, "learning_rate": 7.964775598753859e-07, "loss": 2.4206, "step": 8569 }, { "epoch": 0.9826854718495586, "grad_norm": 0.2653858524494065, "learning_rate": 7.860348032682207e-07, "loss": 2.2839, "step": 8570 }, { "epoch": 0.9828001375988992, "grad_norm": 0.2609458188004867, "learning_rate": 7.756609037116169e-07, "loss": 2.4553, "step": 8571 }, { "epoch": 0.9829148033482399, "grad_norm": 0.2715620262028699, "learning_rate": 7.653558626365409e-07, "loss": 2.3281, "step": 8572 }, { "epoch": 0.9830294690975806, "grad_norm": 0.2744616849169987, "learning_rate": 7.551196814643002e-07, "loss": 2.3304, "step": 8573 }, { "epoch": 0.9831441348469212, "grad_norm": 0.27664841180895827, "learning_rate": 7.449523616068765e-07, "loss": 2.5822, "step": 8574 }, { "epoch": 0.9832588005962619, "grad_norm": 0.2510220552396542, "learning_rate": 7.348539044665925e-07, "loss": 2.3962, "step": 8575 }, { "epoch": 0.9833734663456025, "grad_norm": 0.25770550998226555, "learning_rate": 7.24824311436334e-07, "loss": 2.5001, "step": 8576 }, { "epoch": 0.9834881320949432, "grad_norm": 0.23522144334916062, "learning_rate": 7.148635838994943e-07, "loss": 2.4103, "step": 8577 }, { "epoch": 0.983602797844284, "grad_norm": 0.26492834233872287, "learning_rate": 7.049717232300302e-07, "loss": 2.3792, "step": 8578 }, { "epoch": 0.9837174635936246, "grad_norm": 0.2841040357611516, "learning_rate": 6.951487307922944e-07, "loss": 2.39, "step": 8579 }, { "epoch": 0.9838321293429653, "grad_norm": 0.24951431511582048, "learning_rate": 6.853946079411477e-07, "loss": 2.2993, "step": 8580 }, { "epoch": 0.983946795092306, "grad_norm": 0.2440385410536707, "learning_rate": 6.75709356022014e-07, "loss": 2.4304, "step": 8581 }, { "epoch": 0.9840614608416466, "grad_norm": 0.3032387375584678, "learning_rate": 6.660929763707691e-07, "loss": 2.4766, "step": 8582 }, { "epoch": 0.9841761265909873, "grad_norm": 0.26000275894989894, "learning_rate": 6.565454703138518e-07, "loss": 2.3079, "step": 8583 }, { "epoch": 0.9842907923403279, "grad_norm": 0.23190025457840022, "learning_rate": 6.470668391681533e-07, "loss": 2.5004, "step": 8584 }, { "epoch": 0.9844054580896686, "grad_norm": 0.24162034169815647, "learning_rate": 6.376570842410168e-07, "loss": 2.4408, "step": 8585 }, { "epoch": 0.9845201238390093, "grad_norm": 0.2627554606261515, "learning_rate": 6.283162068304038e-07, "loss": 2.4052, "step": 8586 }, { "epoch": 0.9846347895883499, "grad_norm": 0.253002779773242, "learning_rate": 6.190442082246172e-07, "loss": 2.495, "step": 8587 }, { "epoch": 0.9847494553376906, "grad_norm": 0.2829690156804403, "learning_rate": 6.098410897026896e-07, "loss": 2.4628, "step": 8588 }, { "epoch": 0.9848641210870313, "grad_norm": 0.24585862143049741, "learning_rate": 6.007068525338833e-07, "loss": 2.3591, "step": 8589 }, { "epoch": 0.984978786836372, "grad_norm": 0.2636374727683315, "learning_rate": 5.916414979781903e-07, "loss": 2.6421, "step": 8590 }, { "epoch": 0.9850934525857127, "grad_norm": 0.2696762905044979, "learning_rate": 5.82645027285944e-07, "loss": 2.4102, "step": 8591 }, { "epoch": 0.9852081183350533, "grad_norm": 0.25301280819631894, "learning_rate": 5.737174416980961e-07, "loss": 2.455, "step": 8592 }, { "epoch": 0.985322784084394, "grad_norm": 0.2762356668214205, "learning_rate": 5.648587424459395e-07, "loss": 2.4909, "step": 8593 }, { "epoch": 0.9854374498337347, "grad_norm": 0.26095966828504896, "learning_rate": 5.560689307514411e-07, "loss": 2.418, "step": 8594 }, { "epoch": 0.9855521155830753, "grad_norm": 0.2528275987167809, "learning_rate": 5.473480078269644e-07, "loss": 2.226, "step": 8595 }, { "epoch": 0.985666781332416, "grad_norm": 0.2520518829423772, "learning_rate": 5.386959748754361e-07, "loss": 2.3396, "step": 8596 }, { "epoch": 0.9857814470817566, "grad_norm": 0.22798840958610034, "learning_rate": 5.30112833090124e-07, "loss": 2.2825, "step": 8597 }, { "epoch": 0.9858961128310973, "grad_norm": 0.2474641745540536, "learning_rate": 5.215985836550252e-07, "loss": 2.3857, "step": 8598 }, { "epoch": 0.9860107785804381, "grad_norm": 0.2507477113255739, "learning_rate": 5.131532277444784e-07, "loss": 2.3427, "step": 8599 }, { "epoch": 0.9861254443297787, "grad_norm": 0.24766441039804943, "learning_rate": 5.047767665233849e-07, "loss": 2.4202, "step": 8600 }, { "epoch": 0.9862401100791194, "grad_norm": 0.24855984984404453, "learning_rate": 4.964692011470428e-07, "loss": 2.3779, "step": 8601 }, { "epoch": 0.98635477582846, "grad_norm": 0.2644803728861775, "learning_rate": 4.882305327614244e-07, "loss": 2.3043, "step": 8602 }, { "epoch": 0.9864694415778007, "grad_norm": 0.23700830234761502, "learning_rate": 4.800607625027876e-07, "loss": 2.2955, "step": 8603 }, { "epoch": 0.9865841073271414, "grad_norm": 0.26165516163496383, "learning_rate": 4.719598914980638e-07, "loss": 2.3221, "step": 8604 }, { "epoch": 0.986698773076482, "grad_norm": 0.2676096701582563, "learning_rate": 4.6392792086463744e-07, "loss": 2.5827, "step": 8605 }, { "epoch": 0.9868134388258227, "grad_norm": 0.25229729564013986, "learning_rate": 4.559648517102888e-07, "loss": 2.4649, "step": 8606 }, { "epoch": 0.9869281045751634, "grad_norm": 0.262937464184219, "learning_rate": 4.4807068513341707e-07, "loss": 2.2522, "step": 8607 }, { "epoch": 0.987042770324504, "grad_norm": 0.2588535218203286, "learning_rate": 4.4024542222287354e-07, "loss": 2.367, "step": 8608 }, { "epoch": 0.9871574360738448, "grad_norm": 0.26780235224776666, "learning_rate": 4.32489064058017e-07, "loss": 2.34, "step": 8609 }, { "epoch": 0.9872721018231854, "grad_norm": 0.25159497229173994, "learning_rate": 4.248016117086584e-07, "loss": 2.3852, "step": 8610 }, { "epoch": 0.9873867675725261, "grad_norm": 0.24894319767426784, "learning_rate": 4.1718306623517164e-07, "loss": 2.3663, "step": 8611 }, { "epoch": 0.9875014333218668, "grad_norm": 0.2704067145258275, "learning_rate": 4.0963342868832744e-07, "loss": 2.4953, "step": 8612 }, { "epoch": 0.9876160990712074, "grad_norm": 0.26337325958042057, "learning_rate": 4.021527001095149e-07, "loss": 2.347, "step": 8613 }, { "epoch": 0.9877307648205481, "grad_norm": 0.24638098946302542, "learning_rate": 3.947408815305198e-07, "loss": 2.3986, "step": 8614 }, { "epoch": 0.9878454305698888, "grad_norm": 0.2403370226109531, "learning_rate": 3.8739797397369105e-07, "loss": 2.2409, "step": 8615 }, { "epoch": 0.9879600963192294, "grad_norm": 0.25069772670311363, "learning_rate": 3.801239784518296e-07, "loss": 2.2775, "step": 8616 }, { "epoch": 0.9880747620685701, "grad_norm": 0.2385637216993145, "learning_rate": 3.729188959682439e-07, "loss": 2.3719, "step": 8617 }, { "epoch": 0.9881894278179107, "grad_norm": 0.2559934146276328, "learning_rate": 3.657827275167502e-07, "loss": 2.4245, "step": 8618 }, { "epoch": 0.9883040935672515, "grad_norm": 0.26140054239578825, "learning_rate": 3.587154740816168e-07, "loss": 2.2906, "step": 8619 }, { "epoch": 0.9884187593165922, "grad_norm": 0.2443434791368734, "learning_rate": 3.517171366376748e-07, "loss": 2.4153, "step": 8620 }, { "epoch": 0.9885334250659328, "grad_norm": 0.25900973421790924, "learning_rate": 3.4478771615015224e-07, "loss": 2.4326, "step": 8621 }, { "epoch": 0.9886480908152735, "grad_norm": 0.2794240255798061, "learning_rate": 3.379272135748956e-07, "loss": 2.3971, "step": 8622 }, { "epoch": 0.9887627565646141, "grad_norm": 0.25236031326502967, "learning_rate": 3.3113562985814804e-07, "loss": 2.3574, "step": 8623 }, { "epoch": 0.9888774223139548, "grad_norm": 0.25063342782528103, "learning_rate": 3.2441296593666014e-07, "loss": 2.4527, "step": 8624 }, { "epoch": 0.9889920880632955, "grad_norm": 0.261683573217417, "learning_rate": 3.1775922273774573e-07, "loss": 2.385, "step": 8625 }, { "epoch": 0.9891067538126361, "grad_norm": 0.27361532978457964, "learning_rate": 3.1117440117917063e-07, "loss": 2.2607, "step": 8626 }, { "epoch": 0.9892214195619768, "grad_norm": 0.2527023328580243, "learning_rate": 3.046585021690973e-07, "loss": 2.3632, "step": 8627 }, { "epoch": 0.9893360853113176, "grad_norm": 0.2492932072753548, "learning_rate": 2.982115266063623e-07, "loss": 2.2511, "step": 8628 }, { "epoch": 0.9894507510606582, "grad_norm": 0.2802536918308958, "learning_rate": 2.9183347538014324e-07, "loss": 2.4247, "step": 8629 }, { "epoch": 0.9895654168099989, "grad_norm": 0.2907507553788078, "learning_rate": 2.855243493701809e-07, "loss": 2.2605, "step": 8630 }, { "epoch": 0.9896800825593395, "grad_norm": 0.25848255535047177, "learning_rate": 2.7928414944672355e-07, "loss": 2.2358, "step": 8631 }, { "epoch": 0.9897947483086802, "grad_norm": 0.24152951604750336, "learning_rate": 2.731128764704716e-07, "loss": 2.3455, "step": 8632 }, { "epoch": 0.9899094140580209, "grad_norm": 0.25856205415955, "learning_rate": 2.670105312926885e-07, "loss": 2.3936, "step": 8633 }, { "epoch": 0.9900240798073615, "grad_norm": 0.26757653497520606, "learning_rate": 2.6097711475497885e-07, "loss": 2.2827, "step": 8634 }, { "epoch": 0.9901387455567022, "grad_norm": 0.25914451400940713, "learning_rate": 2.5501262768956593e-07, "loss": 2.5759, "step": 8635 }, { "epoch": 0.9902534113060428, "grad_norm": 0.24312766035895478, "learning_rate": 2.4911707091918036e-07, "loss": 2.4949, "step": 8636 }, { "epoch": 0.9903680770553835, "grad_norm": 0.26370911699327604, "learning_rate": 2.4329044525694954e-07, "loss": 2.3438, "step": 8637 }, { "epoch": 0.9904827428047243, "grad_norm": 0.25296513702543827, "learning_rate": 2.3753275150661947e-07, "loss": 2.4272, "step": 8638 }, { "epoch": 0.9905974085540649, "grad_norm": 0.24066438685155278, "learning_rate": 2.318439904622771e-07, "loss": 2.3957, "step": 8639 }, { "epoch": 0.9907120743034056, "grad_norm": 0.2884418538132336, "learning_rate": 2.2622416290857262e-07, "loss": 2.3981, "step": 8640 }, { "epoch": 0.9908267400527463, "grad_norm": 0.2627610370136801, "learning_rate": 2.2067326962071922e-07, "loss": 2.4432, "step": 8641 }, { "epoch": 0.9909414058020869, "grad_norm": 0.28302352401365827, "learning_rate": 2.151913113643822e-07, "loss": 2.4919, "step": 8642 }, { "epoch": 0.9910560715514276, "grad_norm": 0.27097779205217254, "learning_rate": 2.0977828889556794e-07, "loss": 2.4745, "step": 8643 }, { "epoch": 0.9911707373007682, "grad_norm": 0.26667546134556286, "learning_rate": 2.0443420296101246e-07, "loss": 2.4686, "step": 8644 }, { "epoch": 0.9912854030501089, "grad_norm": 0.2913833970875544, "learning_rate": 1.991590542977373e-07, "loss": 2.3921, "step": 8645 }, { "epoch": 0.9914000687994496, "grad_norm": 0.24231146255736993, "learning_rate": 1.9395284363343813e-07, "loss": 2.4104, "step": 8646 }, { "epoch": 0.9915147345487902, "grad_norm": 0.2438636295979367, "learning_rate": 1.8881557168620722e-07, "loss": 2.4818, "step": 8647 }, { "epoch": 0.991629400298131, "grad_norm": 0.25132533516851535, "learning_rate": 1.837472391645334e-07, "loss": 2.269, "step": 8648 }, { "epoch": 0.9917440660474717, "grad_norm": 0.24548136728404096, "learning_rate": 1.787478467675796e-07, "loss": 2.2914, "step": 8649 }, { "epoch": 0.9918587317968123, "grad_norm": 0.30056787591766043, "learning_rate": 1.7381739518496087e-07, "loss": 2.3555, "step": 8650 }, { "epoch": 0.991973397546153, "grad_norm": 0.2996134119455222, "learning_rate": 1.6895588509663328e-07, "loss": 2.6143, "step": 8651 }, { "epoch": 0.9920880632954936, "grad_norm": 0.26386854756364286, "learning_rate": 1.6416331717317157e-07, "loss": 2.333, "step": 8652 }, { "epoch": 0.9922027290448343, "grad_norm": 0.28182992367003884, "learning_rate": 1.5943969207565801e-07, "loss": 2.5012, "step": 8653 }, { "epoch": 0.992317394794175, "grad_norm": 0.2614427511089186, "learning_rate": 1.54785010455627e-07, "loss": 2.4487, "step": 8654 }, { "epoch": 0.9924320605435156, "grad_norm": 0.2871849118856723, "learning_rate": 1.5019927295506497e-07, "loss": 2.408, "step": 8655 }, { "epoch": 0.9925467262928563, "grad_norm": 0.28388353540419736, "learning_rate": 1.4568248020652152e-07, "loss": 2.3525, "step": 8656 }, { "epoch": 0.9926613920421969, "grad_norm": 0.2640457456346779, "learning_rate": 1.4123463283299832e-07, "loss": 2.4838, "step": 8657 }, { "epoch": 0.9927760577915377, "grad_norm": 0.29740165737984015, "learning_rate": 1.3685573144800456e-07, "loss": 2.4034, "step": 8658 }, { "epoch": 0.9928907235408784, "grad_norm": 0.27163374215204966, "learning_rate": 1.325457766554461e-07, "loss": 2.4704, "step": 8659 }, { "epoch": 0.993005389290219, "grad_norm": 0.26471152740242737, "learning_rate": 1.2830476904990286e-07, "loss": 2.3582, "step": 8660 }, { "epoch": 0.9931200550395597, "grad_norm": 0.2503953927919408, "learning_rate": 1.241327092162403e-07, "loss": 2.3669, "step": 8661 }, { "epoch": 0.9932347207889004, "grad_norm": 0.26780079575386767, "learning_rate": 1.200295977300536e-07, "loss": 2.4478, "step": 8662 }, { "epoch": 0.993349386538241, "grad_norm": 0.26285216971670683, "learning_rate": 1.1599543515711241e-07, "loss": 2.3395, "step": 8663 }, { "epoch": 0.9934640522875817, "grad_norm": 0.2588272966672831, "learning_rate": 1.1203022205402702e-07, "loss": 2.2397, "step": 8664 }, { "epoch": 0.9935787180369223, "grad_norm": 0.2564340223156835, "learning_rate": 1.0813395896758227e-07, "loss": 2.5017, "step": 8665 }, { "epoch": 0.993693383786263, "grad_norm": 0.23745272228770195, "learning_rate": 1.0430664643529264e-07, "loss": 2.325, "step": 8666 }, { "epoch": 0.9938080495356038, "grad_norm": 0.27010028185794954, "learning_rate": 1.0054828498501367e-07, "loss": 2.4829, "step": 8667 }, { "epoch": 0.9939227152849444, "grad_norm": 0.2824664547110627, "learning_rate": 9.68588751351085e-08, "loss": 2.4335, "step": 8668 }, { "epoch": 0.9940373810342851, "grad_norm": 0.2551464862481459, "learning_rate": 9.323841739455885e-08, "loss": 2.5204, "step": 8669 }, { "epoch": 0.9941520467836257, "grad_norm": 0.29775161777572506, "learning_rate": 8.968691226263204e-08, "loss": 2.4096, "step": 8670 }, { "epoch": 0.9942667125329664, "grad_norm": 0.2651379129855896, "learning_rate": 8.620436022926947e-08, "loss": 2.4061, "step": 8671 }, { "epoch": 0.9943813782823071, "grad_norm": 0.23904300107657003, "learning_rate": 8.279076177475364e-08, "loss": 2.374, "step": 8672 }, { "epoch": 0.9944960440316477, "grad_norm": 0.24277519274369436, "learning_rate": 7.944611736998564e-08, "loss": 2.351, "step": 8673 }, { "epoch": 0.9946107097809884, "grad_norm": 0.2468743565022368, "learning_rate": 7.617042747631864e-08, "loss": 2.3021, "step": 8674 }, { "epoch": 0.9947253755303291, "grad_norm": 0.2607820769266603, "learning_rate": 7.296369254544688e-08, "loss": 2.1844, "step": 8675 }, { "epoch": 0.9948400412796697, "grad_norm": 0.26964384342108455, "learning_rate": 6.982591301979424e-08, "loss": 2.4855, "step": 8676 }, { "epoch": 0.9949547070290105, "grad_norm": 0.2703086387292498, "learning_rate": 6.675708933212565e-08, "loss": 2.2926, "step": 8677 }, { "epoch": 0.9950693727783511, "grad_norm": 0.27776889607863414, "learning_rate": 6.375722190571364e-08, "loss": 2.3183, "step": 8678 }, { "epoch": 0.9951840385276918, "grad_norm": 0.2560109574942953, "learning_rate": 6.082631115439385e-08, "loss": 2.4115, "step": 8679 }, { "epoch": 0.9952987042770325, "grad_norm": 0.2674455608940206, "learning_rate": 5.796435748228746e-08, "loss": 2.2582, "step": 8680 }, { "epoch": 0.9954133700263731, "grad_norm": 0.24423833523924252, "learning_rate": 5.517136128430078e-08, "loss": 2.3668, "step": 8681 }, { "epoch": 0.9955280357757138, "grad_norm": 0.2628472406338573, "learning_rate": 5.24473229455702e-08, "loss": 2.4378, "step": 8682 }, { "epoch": 0.9956427015250545, "grad_norm": 0.26159456291626043, "learning_rate": 4.97922428419062e-08, "loss": 2.4248, "step": 8683 }, { "epoch": 0.9957573672743951, "grad_norm": 0.2576237397475355, "learning_rate": 4.720612133946034e-08, "loss": 2.4214, "step": 8684 }, { "epoch": 0.9958720330237358, "grad_norm": 0.22990891398550256, "learning_rate": 4.4688958794947274e-08, "loss": 2.2679, "step": 8685 }, { "epoch": 0.9959866987730764, "grad_norm": 0.2439124544729441, "learning_rate": 4.224075555558926e-08, "loss": 2.3323, "step": 8686 }, { "epoch": 0.9961013645224172, "grad_norm": 0.2450153319712526, "learning_rate": 3.986151195906063e-08, "loss": 2.3078, "step": 8687 }, { "epoch": 0.9962160302717579, "grad_norm": 0.25413039722979747, "learning_rate": 3.7551228333543294e-08, "loss": 2.3556, "step": 8688 }, { "epoch": 0.9963306960210985, "grad_norm": 0.22087335675688888, "learning_rate": 3.530990499761577e-08, "loss": 2.2377, "step": 8689 }, { "epoch": 0.9964453617704392, "grad_norm": 0.24757528557754177, "learning_rate": 3.313754226058619e-08, "loss": 2.343, "step": 8690 }, { "epoch": 0.9965600275197798, "grad_norm": 0.2831624394783042, "learning_rate": 3.10341404219372e-08, "loss": 2.3523, "step": 8691 }, { "epoch": 0.9966746932691205, "grad_norm": 0.25768590833675686, "learning_rate": 2.8999699771881105e-08, "loss": 2.3252, "step": 8692 }, { "epoch": 0.9967893590184612, "grad_norm": 0.2653641590796179, "learning_rate": 2.703422059097127e-08, "loss": 2.3874, "step": 8693 }, { "epoch": 0.9969040247678018, "grad_norm": 0.23592517689817794, "learning_rate": 2.5137703150379666e-08, "loss": 2.317, "step": 8694 }, { "epoch": 0.9970186905171425, "grad_norm": 0.2614730575100257, "learning_rate": 2.3310147711619324e-08, "loss": 2.3447, "step": 8695 }, { "epoch": 0.9971333562664833, "grad_norm": 0.26783196452850944, "learning_rate": 2.1551554526821892e-08, "loss": 2.3634, "step": 8696 }, { "epoch": 0.9972480220158239, "grad_norm": 0.2526019031264884, "learning_rate": 1.9861923838515596e-08, "loss": 2.3153, "step": 8697 }, { "epoch": 0.9973626877651646, "grad_norm": 0.25562213729370675, "learning_rate": 1.824125587979175e-08, "loss": 2.4103, "step": 8698 }, { "epoch": 0.9974773535145052, "grad_norm": 0.25315660282809743, "learning_rate": 1.668955087413826e-08, "loss": 2.5363, "step": 8699 }, { "epoch": 0.9975920192638459, "grad_norm": 0.2565312874958023, "learning_rate": 1.5206809035606116e-08, "loss": 2.5074, "step": 8700 }, { "epoch": 0.9977066850131866, "grad_norm": 0.29156274121267944, "learning_rate": 1.3793030568753917e-08, "loss": 2.3541, "step": 8701 }, { "epoch": 0.9978213507625272, "grad_norm": 0.2529971326244546, "learning_rate": 1.2448215668536822e-08, "loss": 2.4232, "step": 8702 }, { "epoch": 0.9979360165118679, "grad_norm": 0.2562649865938568, "learning_rate": 1.11723645204731e-08, "loss": 2.3617, "step": 8703 }, { "epoch": 0.9980506822612085, "grad_norm": 0.25383915701070436, "learning_rate": 9.965477300477588e-09, "loss": 2.4644, "step": 8704 }, { "epoch": 0.9981653480105492, "grad_norm": 0.2468425391742744, "learning_rate": 8.827554175139252e-09, "loss": 2.2745, "step": 8705 }, { "epoch": 0.99828001375989, "grad_norm": 0.26347020622133427, "learning_rate": 7.758595301277094e-09, "loss": 2.4839, "step": 8706 }, { "epoch": 0.9983946795092306, "grad_norm": 0.268117646474416, "learning_rate": 6.7586008264397534e-09, "loss": 2.6396, "step": 8707 }, { "epoch": 0.9985093452585713, "grad_norm": 0.23972507325541234, "learning_rate": 5.82757088846142e-09, "loss": 2.3252, "step": 8708 }, { "epoch": 0.998624011007912, "grad_norm": 0.25997406905727055, "learning_rate": 4.965505615850408e-09, "loss": 2.3622, "step": 8709 }, { "epoch": 0.9987386767572526, "grad_norm": 0.22394255195605028, "learning_rate": 4.172405127511603e-09, "loss": 2.2471, "step": 8710 }, { "epoch": 0.9988533425065933, "grad_norm": 0.2808201473396869, "learning_rate": 3.448269532746462e-09, "loss": 2.2729, "step": 8711 }, { "epoch": 0.9989680082559339, "grad_norm": 0.2795613612146032, "learning_rate": 2.793098931530569e-09, "loss": 2.4667, "step": 8712 }, { "epoch": 0.9990826740052746, "grad_norm": 0.25612538088162873, "learning_rate": 2.2068934141805664e-09, "loss": 2.295, "step": 8713 }, { "epoch": 0.9991973397546153, "grad_norm": 0.2704012435212936, "learning_rate": 1.6896530615206907e-09, "loss": 2.3392, "step": 8714 }, { "epoch": 0.9993120055039559, "grad_norm": 0.24133529993686634, "learning_rate": 1.2413779449937934e-09, "loss": 2.3006, "step": 8715 }, { "epoch": 0.9994266712532967, "grad_norm": 0.26350406306133656, "learning_rate": 8.620681263282748e-10, "loss": 2.4566, "step": 8716 }, { "epoch": 0.9995413370026374, "grad_norm": 0.23644930682027115, "learning_rate": 5.517236579266616e-10, "loss": 2.5879, "step": 8717 }, { "epoch": 0.999656002751978, "grad_norm": 0.2874345114734641, "learning_rate": 3.1034458258805134e-10, "loss": 2.4806, "step": 8718 }, { "epoch": 0.9997706685013187, "grad_norm": 0.2356292832141907, "learning_rate": 1.3793093350811247e-10, "loss": 2.2896, "step": 8719 }, { "epoch": 0.9998853342506593, "grad_norm": 0.25027326326702304, "learning_rate": 3.4482734556640086e-11, "loss": 2.5145, "step": 8720 }, { "epoch": 1.0, "grad_norm": 0.25850277707761193, "learning_rate": 0.0, "loss": 2.4187, "step": 8721 }, { "epoch": 1.0, "step": 8721, "total_flos": 291840174110720.0, "train_loss": 2.5324699844792957, "train_runtime": 37426.6704, "train_samples_per_second": 14.913, "train_steps_per_second": 0.233 } ], "logging_steps": 1.0, "max_steps": 8721, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 291840174110720.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }