|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015873015873015872, |
|
"grad_norm": 2.3559653063882835, |
|
"learning_rate": 0.0, |
|
"loss": 1.0469, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.031746031746031744, |
|
"grad_norm": 2.29659253609106, |
|
"learning_rate": 3.125e-07, |
|
"loss": 0.9929, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 2.8899785663629123, |
|
"learning_rate": 6.25e-07, |
|
"loss": 1.0189, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06349206349206349, |
|
"grad_norm": 2.2527917765154153, |
|
"learning_rate": 9.375000000000001e-07, |
|
"loss": 0.9098, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07936507936507936, |
|
"grad_norm": 2.2029116064708907, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.0462, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09523809523809523, |
|
"grad_norm": 2.122312207060731, |
|
"learning_rate": 1.5625e-06, |
|
"loss": 0.9986, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 2.045608321522422, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 0.9554, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.12698412698412698, |
|
"grad_norm": 1.8619152723479657, |
|
"learning_rate": 2.1875000000000002e-06, |
|
"loss": 0.9522, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 1.8663709940802706, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.9994, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.15873015873015872, |
|
"grad_norm": 1.756651050051264, |
|
"learning_rate": 2.8125e-06, |
|
"loss": 0.9373, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1746031746031746, |
|
"grad_norm": 1.6604489437599113, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.8839, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 1.292095856227553, |
|
"learning_rate": 3.4375e-06, |
|
"loss": 0.9907, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.20634920634920634, |
|
"grad_norm": 1.2693344466908103, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.9322, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 1.6344663181288221, |
|
"learning_rate": 4.0625000000000005e-06, |
|
"loss": 1.0934, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 1.1511973531225708, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.9771, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.25396825396825395, |
|
"grad_norm": 1.1464903643465947, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"loss": 1.0442, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2698412698412698, |
|
"grad_norm": 1.1549964376534243, |
|
"learning_rate": 5e-06, |
|
"loss": 0.949, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 1.2909225755543452, |
|
"learning_rate": 5.3125e-06, |
|
"loss": 1.0098, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.30158730158730157, |
|
"grad_norm": 1.0413894105842352, |
|
"learning_rate": 5.625e-06, |
|
"loss": 0.7468, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 1.2863204827157997, |
|
"learning_rate": 5.9375e-06, |
|
"loss": 0.9232, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.901486908411037, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.9885, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3492063492063492, |
|
"grad_norm": 1.0229681930848715, |
|
"learning_rate": 6.5625e-06, |
|
"loss": 1.0508, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.36507936507936506, |
|
"grad_norm": 1.0772270927236638, |
|
"learning_rate": 6.875e-06, |
|
"loss": 0.8728, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.38095238095238093, |
|
"grad_norm": 0.9393952588410857, |
|
"learning_rate": 7.1875e-06, |
|
"loss": 0.8349, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3968253968253968, |
|
"grad_norm": 1.0822345499912303, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.0283, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4126984126984127, |
|
"grad_norm": 0.8124841375138875, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 0.8612, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.7868328056966778, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 0.7232, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.8283002452965974, |
|
"learning_rate": 8.4375e-06, |
|
"loss": 0.9752, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4603174603174603, |
|
"grad_norm": 0.8117454313345658, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.9053, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 0.8594519052279771, |
|
"learning_rate": 9.0625e-06, |
|
"loss": 0.9479, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49206349206349204, |
|
"grad_norm": 0.9958158956912483, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.9188, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5079365079365079, |
|
"grad_norm": 0.8918575253813723, |
|
"learning_rate": 9.6875e-06, |
|
"loss": 0.8847, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5238095238095238, |
|
"grad_norm": 0.8125329449215294, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0379, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5396825396825397, |
|
"grad_norm": 0.7108806956906407, |
|
"learning_rate": 9.999691920767945e-06, |
|
"loss": 0.8376, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 0.641926463787557, |
|
"learning_rate": 9.998767721036901e-06, |
|
"loss": 0.8241, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.7048636529194373, |
|
"learning_rate": 9.997227514697568e-06, |
|
"loss": 0.9693, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5873015873015873, |
|
"grad_norm": 0.6041864409794199, |
|
"learning_rate": 9.99507149155218e-06, |
|
"loss": 0.9839, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6031746031746031, |
|
"grad_norm": 0.6529381186048961, |
|
"learning_rate": 9.992299917291118e-06, |
|
"loss": 0.8479, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6190476190476191, |
|
"grad_norm": 0.7457758141141355, |
|
"learning_rate": 9.98891313346017e-06, |
|
"loss": 0.9095, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 0.6700791615416641, |
|
"learning_rate": 9.984911557418444e-06, |
|
"loss": 0.7685, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6507936507936508, |
|
"grad_norm": 0.6202447937301818, |
|
"learning_rate": 9.980295682286924e-06, |
|
"loss": 0.8387, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.6888680420644837, |
|
"learning_rate": 9.97506607688772e-06, |
|
"loss": 0.9107, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6825396825396826, |
|
"grad_norm": 0.5229452850388104, |
|
"learning_rate": 9.969223385673958e-06, |
|
"loss": 0.8308, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6984126984126984, |
|
"grad_norm": 0.5679326043532053, |
|
"learning_rate": 9.962768328650367e-06, |
|
"loss": 0.7516, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.5234412349262514, |
|
"learning_rate": 9.95570170128455e-06, |
|
"loss": 0.8443, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7301587301587301, |
|
"grad_norm": 0.5148736685750067, |
|
"learning_rate": 9.94802437440896e-06, |
|
"loss": 0.7959, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.746031746031746, |
|
"grad_norm": 0.6223703419413371, |
|
"learning_rate": 9.939737294113585e-06, |
|
"loss": 0.8964, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 0.4712938980573866, |
|
"learning_rate": 9.930841481629358e-06, |
|
"loss": 0.8884, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 0.6385581101993485, |
|
"learning_rate": 9.92133803320231e-06, |
|
"loss": 0.7817, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 0.47528095545287, |
|
"learning_rate": 9.91122811995848e-06, |
|
"loss": 0.819, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8095238095238095, |
|
"grad_norm": 0.5522186664203698, |
|
"learning_rate": 9.90051298775959e-06, |
|
"loss": 0.8691, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8253968253968254, |
|
"grad_norm": 0.3924890188917555, |
|
"learning_rate": 9.88919395704952e-06, |
|
"loss": 0.8259, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8412698412698413, |
|
"grad_norm": 0.5584015479821739, |
|
"learning_rate": 9.877272422691583e-06, |
|
"loss": 0.9318, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.5472693893320031, |
|
"learning_rate": 9.864749853796642e-06, |
|
"loss": 0.7983, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.873015873015873, |
|
"grad_norm": 0.5011856989250408, |
|
"learning_rate": 9.85162779354206e-06, |
|
"loss": 0.7289, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.48176520075987733, |
|
"learning_rate": 9.837907858981536e-06, |
|
"loss": 0.8795, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.9047619047619048, |
|
"grad_norm": 0.4693619944653085, |
|
"learning_rate": 9.823591740845831e-06, |
|
"loss": 0.8625, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.9206349206349206, |
|
"grad_norm": 0.5158078748351012, |
|
"learning_rate": 9.808681203334416e-06, |
|
"loss": 0.7975, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.9365079365079365, |
|
"grad_norm": 0.467299048377056, |
|
"learning_rate": 9.793178083898073e-06, |
|
"loss": 0.878, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.4360100853426926, |
|
"learning_rate": 9.777084293012448e-06, |
|
"loss": 0.842, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9682539682539683, |
|
"grad_norm": 0.4999196363033725, |
|
"learning_rate": 9.760401813942641e-06, |
|
"loss": 0.7661, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.9841269841269841, |
|
"grad_norm": 0.49451715958225617, |
|
"learning_rate": 9.743132702498785e-06, |
|
"loss": 0.8685, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.51449429417728, |
|
"learning_rate": 9.725279086782719e-06, |
|
"loss": 0.7676, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0158730158730158, |
|
"grad_norm": 0.5392465569053122, |
|
"learning_rate": 9.706843166925733e-06, |
|
"loss": 0.7978, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.0317460317460316, |
|
"grad_norm": 0.49426185655546884, |
|
"learning_rate": 9.687827214817433e-06, |
|
"loss": 0.8264, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.0476190476190477, |
|
"grad_norm": 0.5050909892528982, |
|
"learning_rate": 9.668233573825794e-06, |
|
"loss": 0.8898, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0634920634920635, |
|
"grad_norm": 0.45134127922296613, |
|
"learning_rate": 9.64806465850836e-06, |
|
"loss": 0.7317, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0793650793650793, |
|
"grad_norm": 0.5413266326970981, |
|
"learning_rate": 9.62732295431471e-06, |
|
"loss": 0.7307, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.0952380952380953, |
|
"grad_norm": 0.4781316290575908, |
|
"learning_rate": 9.606011017280166e-06, |
|
"loss": 0.8977, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.5064140744904799, |
|
"learning_rate": 9.5841314737108e-06, |
|
"loss": 0.7648, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.126984126984127, |
|
"grad_norm": 0.5543523877170532, |
|
"learning_rate": 9.56168701985981e-06, |
|
"loss": 0.7995, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.4891764300467825, |
|
"learning_rate": 9.538680421595236e-06, |
|
"loss": 0.8072, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.1587301587301586, |
|
"grad_norm": 0.48203192054287314, |
|
"learning_rate": 9.515114514059127e-06, |
|
"loss": 0.8128, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.1746031746031746, |
|
"grad_norm": 0.499915788005329, |
|
"learning_rate": 9.490992201318165e-06, |
|
"loss": 0.7876, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 0.43129451868532453, |
|
"learning_rate": 9.466316456005783e-06, |
|
"loss": 0.7755, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.2063492063492063, |
|
"grad_norm": 0.49436944947590167, |
|
"learning_rate": 9.441090318955843e-06, |
|
"loss": 0.7015, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 0.5018080177691097, |
|
"learning_rate": 9.415316898827923e-06, |
|
"loss": 0.7346, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.2380952380952381, |
|
"grad_norm": 0.42117192002428844, |
|
"learning_rate": 9.388999371724212e-06, |
|
"loss": 0.8242, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.253968253968254, |
|
"grad_norm": 0.47397540901194374, |
|
"learning_rate": 9.362140980798127e-06, |
|
"loss": 0.8928, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.2698412698412698, |
|
"grad_norm": 0.48823131897505534, |
|
"learning_rate": 9.334745035854646e-06, |
|
"loss": 0.7581, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 0.5170460810325518, |
|
"learning_rate": 9.306814912942445e-06, |
|
"loss": 0.8361, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.3015873015873016, |
|
"grad_norm": 0.41118521047488926, |
|
"learning_rate": 9.278354053937848e-06, |
|
"loss": 0.7794, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.3174603174603174, |
|
"grad_norm": 0.4827654705693697, |
|
"learning_rate": 9.249365966120692e-06, |
|
"loss": 0.8542, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.45176873751511454, |
|
"learning_rate": 9.219854221742106e-06, |
|
"loss": 0.8101, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.3492063492063493, |
|
"grad_norm": 0.44526540495239475, |
|
"learning_rate": 9.189822457584311e-06, |
|
"loss": 0.7419, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3650793650793651, |
|
"grad_norm": 0.41133066066087726, |
|
"learning_rate": 9.159274374512444e-06, |
|
"loss": 0.6576, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.380952380952381, |
|
"grad_norm": 0.4500027229237173, |
|
"learning_rate": 9.128213737018493e-06, |
|
"loss": 0.8058, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.3968253968253967, |
|
"grad_norm": 0.40834920107678924, |
|
"learning_rate": 9.096644372757393e-06, |
|
"loss": 0.8849, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.4126984126984126, |
|
"grad_norm": 0.5843795042717066, |
|
"learning_rate": 9.064570172075349e-06, |
|
"loss": 0.7969, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.5139681695756663, |
|
"learning_rate": 9.031995087530403e-06, |
|
"loss": 0.7983, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 0.47799160571848326, |
|
"learning_rate": 8.99892313340537e-06, |
|
"loss": 0.6612, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.4603174603174602, |
|
"grad_norm": 0.48090290795792257, |
|
"learning_rate": 8.96535838521314e-06, |
|
"loss": 0.8026, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.4761904761904763, |
|
"grad_norm": 0.48955363216016506, |
|
"learning_rate": 8.931304979194452e-06, |
|
"loss": 0.8051, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.492063492063492, |
|
"grad_norm": 0.47949685756309185, |
|
"learning_rate": 8.896767111808177e-06, |
|
"loss": 0.7354, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.507936507936508, |
|
"grad_norm": 0.5732670061875946, |
|
"learning_rate": 8.861749039214177e-06, |
|
"loss": 0.9129, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.5238095238095237, |
|
"grad_norm": 0.48050508555262206, |
|
"learning_rate": 8.826255076748823e-06, |
|
"loss": 0.8445, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.5396825396825395, |
|
"grad_norm": 0.4329532952395629, |
|
"learning_rate": 8.790289598393186e-06, |
|
"loss": 0.7212, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 0.522751486773223, |
|
"learning_rate": 8.753857036234055e-06, |
|
"loss": 0.8149, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 0.4570961856172299, |
|
"learning_rate": 8.716961879917734e-06, |
|
"loss": 0.7365, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"grad_norm": 0.4363179134183329, |
|
"learning_rate": 8.679608676096793e-06, |
|
"loss": 0.8131, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6031746031746033, |
|
"grad_norm": 0.4655541415571893, |
|
"learning_rate": 8.641802027869774e-06, |
|
"loss": 0.7946, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.619047619047619, |
|
"grad_norm": 0.5743139418639736, |
|
"learning_rate": 8.603546594213935e-06, |
|
"loss": 0.8574, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.6349206349206349, |
|
"grad_norm": 0.5267570867681096, |
|
"learning_rate": 8.564847089411128e-06, |
|
"loss": 0.8286, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.6507936507936507, |
|
"grad_norm": 0.40799736834923667, |
|
"learning_rate": 8.525708282466839e-06, |
|
"loss": 0.8412, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.4236881481332967, |
|
"learning_rate": 8.486134996522502e-06, |
|
"loss": 0.8172, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.6825396825396826, |
|
"grad_norm": 0.5593679767726464, |
|
"learning_rate": 8.446132108261136e-06, |
|
"loss": 0.8058, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.6984126984126984, |
|
"grad_norm": 0.5031166228419733, |
|
"learning_rate": 8.405704547306379e-06, |
|
"loss": 0.8031, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 0.45322610730579044, |
|
"learning_rate": 8.364857295615006e-06, |
|
"loss": 0.8903, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.7301587301587302, |
|
"grad_norm": 0.5335556769284883, |
|
"learning_rate": 8.323595386862985e-06, |
|
"loss": 0.7925, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.746031746031746, |
|
"grad_norm": 0.4699718024263939, |
|
"learning_rate": 8.281923905825188e-06, |
|
"loss": 0.7664, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.7619047619047619, |
|
"grad_norm": 0.47207237316096745, |
|
"learning_rate": 8.23984798774876e-06, |
|
"loss": 0.7347, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 0.4532509556288616, |
|
"learning_rate": 8.197372817720314e-06, |
|
"loss": 0.7369, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.7936507936507935, |
|
"grad_norm": 0.5443221798521994, |
|
"learning_rate": 8.154503630026955e-06, |
|
"loss": 0.7261, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.8095238095238095, |
|
"grad_norm": 0.4456098920838456, |
|
"learning_rate": 8.111245707511253e-06, |
|
"loss": 0.7194, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.8253968253968254, |
|
"grad_norm": 0.4159654938486175, |
|
"learning_rate": 8.067604380920228e-06, |
|
"loss": 0.7945, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.8412698412698414, |
|
"grad_norm": 0.4706342532274064, |
|
"learning_rate": 8.023585028248435e-06, |
|
"loss": 0.8487, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 0.5701232470412769, |
|
"learning_rate": 7.979193074075216e-06, |
|
"loss": 0.8887, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.873015873015873, |
|
"grad_norm": 0.530430629054239, |
|
"learning_rate": 7.934433988896233e-06, |
|
"loss": 0.6534, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 0.48414840419963984, |
|
"learning_rate": 7.889313288449323e-06, |
|
"loss": 0.8214, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 0.4200926363513126, |
|
"learning_rate": 7.843836533034784e-06, |
|
"loss": 0.7614, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.9206349206349205, |
|
"grad_norm": 0.4941849127950555, |
|
"learning_rate": 7.798009326830167e-06, |
|
"loss": 0.7996, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.9365079365079365, |
|
"grad_norm": 0.41647477043231534, |
|
"learning_rate": 7.751837317199673e-06, |
|
"loss": 0.867, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.9523809523809523, |
|
"grad_norm": 0.4462896414872465, |
|
"learning_rate": 7.705326193998207e-06, |
|
"loss": 0.7547, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.9682539682539684, |
|
"grad_norm": 0.46366747032871125, |
|
"learning_rate": 7.658481688870218e-06, |
|
"loss": 0.7582, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.9841269841269842, |
|
"grad_norm": 0.4714130206121814, |
|
"learning_rate": 7.611309574543373e-06, |
|
"loss": 0.7606, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.46690869317456135, |
|
"learning_rate": 7.563815664117173e-06, |
|
"loss": 0.9121, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.015873015873016, |
|
"grad_norm": 0.8060769356732992, |
|
"learning_rate": 7.5160058103465985e-06, |
|
"loss": 0.7122, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.0317460317460316, |
|
"grad_norm": 0.5953210710991091, |
|
"learning_rate": 7.467885904920864e-06, |
|
"loss": 0.7567, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.0476190476190474, |
|
"grad_norm": 0.7560871980312371, |
|
"learning_rate": 7.419461877737373e-06, |
|
"loss": 0.8318, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.0634920634920633, |
|
"grad_norm": 2.1011598702400667, |
|
"learning_rate": 7.370739696170971e-06, |
|
"loss": 0.7428, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0793650793650795, |
|
"grad_norm": 0.940827314326734, |
|
"learning_rate": 7.321725364338566e-06, |
|
"loss": 0.6161, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.0952380952380953, |
|
"grad_norm": 0.5381465457966281, |
|
"learning_rate": 7.272424922359246e-06, |
|
"loss": 0.6432, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.111111111111111, |
|
"grad_norm": 0.7212158226191104, |
|
"learning_rate": 7.222844445609931e-06, |
|
"loss": 0.7817, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.126984126984127, |
|
"grad_norm": 0.6031927565028607, |
|
"learning_rate": 7.172990043976703e-06, |
|
"loss": 0.7291, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 0.5554913039306149, |
|
"learning_rate": 7.122867861101868e-06, |
|
"loss": 0.7928, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.1587301587301586, |
|
"grad_norm": 0.4440614086169425, |
|
"learning_rate": 7.072484073626872e-06, |
|
"loss": 0.6864, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.1746031746031744, |
|
"grad_norm": 2.875286251781212, |
|
"learning_rate": 7.021844890431136e-06, |
|
"loss": 0.7627, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.1904761904761907, |
|
"grad_norm": 0.7248206082063566, |
|
"learning_rate": 6.970956551866925e-06, |
|
"loss": 0.728, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.2063492063492065, |
|
"grad_norm": 0.5344769870855947, |
|
"learning_rate": 6.9198253289903515e-06, |
|
"loss": 0.6621, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.5334409779130068, |
|
"learning_rate": 6.868457522788561e-06, |
|
"loss": 0.7351, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.238095238095238, |
|
"grad_norm": 0.4791675678917909, |
|
"learning_rate": 6.816859463403271e-06, |
|
"loss": 0.6568, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.253968253968254, |
|
"grad_norm": 0.5667962259074942, |
|
"learning_rate": 6.765037509350685e-06, |
|
"loss": 0.758, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.2698412698412698, |
|
"grad_norm": 0.523154654898243, |
|
"learning_rate": 6.7129980467379265e-06, |
|
"loss": 0.6657, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 0.45239550513741295, |
|
"learning_rate": 6.660747488476066e-06, |
|
"loss": 0.6615, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.3015873015873014, |
|
"grad_norm": 0.6580127713752147, |
|
"learning_rate": 6.608292273489851e-06, |
|
"loss": 0.6112, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.317460317460317, |
|
"grad_norm": 0.6033248382665617, |
|
"learning_rate": 6.555638865924221e-06, |
|
"loss": 0.7033, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.5578461067365529, |
|
"learning_rate": 6.502793754347721e-06, |
|
"loss": 0.7578, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.3492063492063493, |
|
"grad_norm": 0.5451762654132818, |
|
"learning_rate": 6.449763450952912e-06, |
|
"loss": 0.6863, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.365079365079365, |
|
"grad_norm": 0.4667833185680937, |
|
"learning_rate": 6.396554490753848e-06, |
|
"loss": 0.6825, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 0.4209933154088852, |
|
"learning_rate": 6.343173430780769e-06, |
|
"loss": 0.836, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3968253968253967, |
|
"grad_norm": 0.46876037251704294, |
|
"learning_rate": 6.289626849272062e-06, |
|
"loss": 0.7981, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.4126984126984126, |
|
"grad_norm": 0.47367833829704725, |
|
"learning_rate": 6.2359213448636104e-06, |
|
"loss": 0.751, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 0.43459439089398605, |
|
"learning_rate": 6.182063535775634e-06, |
|
"loss": 0.7654, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 0.39767398947957067, |
|
"learning_rate": 6.1280600589971225e-06, |
|
"loss": 0.7896, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.4603174603174605, |
|
"grad_norm": 0.4231324131775063, |
|
"learning_rate": 6.073917569467934e-06, |
|
"loss": 0.8051, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.4761904761904763, |
|
"grad_norm": 0.3983830637612639, |
|
"learning_rate": 6.0196427392587085e-06, |
|
"loss": 0.7038, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.492063492063492, |
|
"grad_norm": 0.4585701856768339, |
|
"learning_rate": 5.96524225674865e-06, |
|
"loss": 0.7422, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.507936507936508, |
|
"grad_norm": 0.4299692751487169, |
|
"learning_rate": 5.9107228258013085e-06, |
|
"loss": 0.7053, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.5238095238095237, |
|
"grad_norm": 0.42827944956580943, |
|
"learning_rate": 5.856091164938451e-06, |
|
"loss": 0.6523, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.5396825396825395, |
|
"grad_norm": 0.4098750307712162, |
|
"learning_rate": 5.801354006512127e-06, |
|
"loss": 0.6895, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.5555555555555554, |
|
"grad_norm": 0.45624383692077836, |
|
"learning_rate": 5.746518095875033e-06, |
|
"loss": 0.6973, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.40961695420487504, |
|
"learning_rate": 5.6915901905492586e-06, |
|
"loss": 0.6285, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.5873015873015874, |
|
"grad_norm": 0.5946218628280344, |
|
"learning_rate": 5.6365770593935665e-06, |
|
"loss": 0.5907, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.6031746031746033, |
|
"grad_norm": 0.5401440035651196, |
|
"learning_rate": 5.581485481769231e-06, |
|
"loss": 0.7181, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.619047619047619, |
|
"grad_norm": 0.4378876946579892, |
|
"learning_rate": 5.526322246704628e-06, |
|
"loss": 0.7978, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.634920634920635, |
|
"grad_norm": 0.40853074862176036, |
|
"learning_rate": 5.471094152058592e-06, |
|
"loss": 0.681, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.6507936507936507, |
|
"grad_norm": 0.46389651051528763, |
|
"learning_rate": 5.415808003682717e-06, |
|
"loss": 0.7308, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.42992484133372394, |
|
"learning_rate": 5.360470614582661e-06, |
|
"loss": 0.7136, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.682539682539683, |
|
"grad_norm": 0.557870344379466, |
|
"learning_rate": 5.305088804078559e-06, |
|
"loss": 0.7333, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.6984126984126986, |
|
"grad_norm": 0.45453618737081114, |
|
"learning_rate": 5.249669396964665e-06, |
|
"loss": 0.6349, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.7142857142857144, |
|
"grad_norm": 0.4511080452383348, |
|
"learning_rate": 5.1942192226683385e-06, |
|
"loss": 0.776, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.7301587301587302, |
|
"grad_norm": 0.4900595891663431, |
|
"learning_rate": 5.138745114408427e-06, |
|
"loss": 0.5998, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.746031746031746, |
|
"grad_norm": 0.46419645481002475, |
|
"learning_rate": 5.083253908353193e-06, |
|
"loss": 0.6676, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.761904761904762, |
|
"grad_norm": 0.4905121964865482, |
|
"learning_rate": 5.0277524427778986e-06, |
|
"loss": 0.7831, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 0.4525848803424086, |
|
"learning_rate": 4.972247557222102e-06, |
|
"loss": 0.7164, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.7936507936507935, |
|
"grad_norm": 0.5368330661361714, |
|
"learning_rate": 4.916746091646808e-06, |
|
"loss": 0.6805, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.8095238095238093, |
|
"grad_norm": 0.4224136348005534, |
|
"learning_rate": 4.8612548855915755e-06, |
|
"loss": 0.724, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.825396825396825, |
|
"grad_norm": 0.4869146817578471, |
|
"learning_rate": 4.805780777331662e-06, |
|
"loss": 0.7446, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.8412698412698414, |
|
"grad_norm": 0.4511553539717499, |
|
"learning_rate": 4.750330603035336e-06, |
|
"loss": 0.7124, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.45286461188582156, |
|
"learning_rate": 4.694911195921443e-06, |
|
"loss": 0.7252, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.873015873015873, |
|
"grad_norm": 0.4293856541441545, |
|
"learning_rate": 4.6395293854173395e-06, |
|
"loss": 0.6053, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 0.5836445711298119, |
|
"learning_rate": 4.584191996317285e-06, |
|
"loss": 0.6828, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.9047619047619047, |
|
"grad_norm": 0.4710990644177235, |
|
"learning_rate": 4.528905847941411e-06, |
|
"loss": 0.8414, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.9206349206349205, |
|
"grad_norm": 0.4770727006845428, |
|
"learning_rate": 4.473677753295375e-06, |
|
"loss": 0.6592, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.9365079365079367, |
|
"grad_norm": 0.4087196329651188, |
|
"learning_rate": 4.418514518230769e-06, |
|
"loss": 0.7122, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.9523809523809526, |
|
"grad_norm": 0.48530953017287554, |
|
"learning_rate": 4.363422940606435e-06, |
|
"loss": 0.7454, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.9682539682539684, |
|
"grad_norm": 0.4900655757859956, |
|
"learning_rate": 4.308409809450742e-06, |
|
"loss": 0.7621, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.984126984126984, |
|
"grad_norm": 0.49992358594135816, |
|
"learning_rate": 4.253481904124968e-06, |
|
"loss": 0.7331, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.4385555753140365, |
|
"learning_rate": 4.198645993487872e-06, |
|
"loss": 0.604, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.015873015873016, |
|
"grad_norm": 0.8387072939566776, |
|
"learning_rate": 4.143908835061551e-06, |
|
"loss": 0.6844, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0317460317460316, |
|
"grad_norm": 0.5067553850596358, |
|
"learning_rate": 4.089277174198694e-06, |
|
"loss": 0.7, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.0476190476190474, |
|
"grad_norm": 0.8011190701762613, |
|
"learning_rate": 4.0347577432513515e-06, |
|
"loss": 0.743, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0634920634920633, |
|
"grad_norm": 0.5036174850503073, |
|
"learning_rate": 3.980357260741293e-06, |
|
"loss": 0.6811, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 3.0793650793650795, |
|
"grad_norm": 0.5463958175244662, |
|
"learning_rate": 3.926082430532067e-06, |
|
"loss": 0.6461, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 3.0952380952380953, |
|
"grad_norm": 0.6797072746043096, |
|
"learning_rate": 3.87193994100288e-06, |
|
"loss": 0.6076, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 0.6595821505262829, |
|
"learning_rate": 3.817936464224367e-06, |
|
"loss": 0.6729, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 3.126984126984127, |
|
"grad_norm": 0.6303031378119635, |
|
"learning_rate": 3.764078655136391e-06, |
|
"loss": 0.7462, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 3.142857142857143, |
|
"grad_norm": 0.5867535234820467, |
|
"learning_rate": 3.7103731507279383e-06, |
|
"loss": 0.6994, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.1587301587301586, |
|
"grad_norm": 0.744206344239702, |
|
"learning_rate": 3.656826569219233e-06, |
|
"loss": 0.6698, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 3.1746031746031744, |
|
"grad_norm": 0.48008260540189834, |
|
"learning_rate": 3.603445509246154e-06, |
|
"loss": 0.641, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.1904761904761907, |
|
"grad_norm": 0.4479357669097382, |
|
"learning_rate": 3.55023654904709e-06, |
|
"loss": 0.7151, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.2063492063492065, |
|
"grad_norm": 0.4992070753022617, |
|
"learning_rate": 3.49720624565228e-06, |
|
"loss": 0.5512, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 3.2222222222222223, |
|
"grad_norm": 0.48763610007436026, |
|
"learning_rate": 3.44436113407578e-06, |
|
"loss": 0.6851, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 3.238095238095238, |
|
"grad_norm": 0.5828989750562972, |
|
"learning_rate": 3.3917077265101505e-06, |
|
"loss": 0.7484, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.253968253968254, |
|
"grad_norm": 0.5181196242222144, |
|
"learning_rate": 3.3392525115239353e-06, |
|
"loss": 0.5734, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.2698412698412698, |
|
"grad_norm": 0.47471736105285633, |
|
"learning_rate": 3.2870019532620744e-06, |
|
"loss": 0.7111, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 3.2857142857142856, |
|
"grad_norm": 0.44233225614290655, |
|
"learning_rate": 3.2349624906493164e-06, |
|
"loss": 0.6547, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.3015873015873014, |
|
"grad_norm": 0.549412981718262, |
|
"learning_rate": 3.1831405365967315e-06, |
|
"loss": 0.5268, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 3.317460317460317, |
|
"grad_norm": 0.4610400286542981, |
|
"learning_rate": 3.1315424772114404e-06, |
|
"loss": 0.7138, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.5912978101838896, |
|
"learning_rate": 3.0801746710096497e-06, |
|
"loss": 0.5523, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.3492063492063493, |
|
"grad_norm": 0.5365898687480075, |
|
"learning_rate": 3.0290434481330746e-06, |
|
"loss": 0.586, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 3.365079365079365, |
|
"grad_norm": 0.46259460742378206, |
|
"learning_rate": 2.978155109568864e-06, |
|
"loss": 0.6185, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.380952380952381, |
|
"grad_norm": 0.5035201915907002, |
|
"learning_rate": 2.927515926373129e-06, |
|
"loss": 0.6636, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.3968253968253967, |
|
"grad_norm": 0.45266495724596634, |
|
"learning_rate": 2.8771321388981334e-06, |
|
"loss": 0.6636, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 3.4126984126984126, |
|
"grad_norm": 0.9391085088935888, |
|
"learning_rate": 2.8270099560232992e-06, |
|
"loss": 0.7087, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 0.4858344733348455, |
|
"learning_rate": 2.77715555439007e-06, |
|
"loss": 0.7505, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.4444444444444446, |
|
"grad_norm": 0.43967486893271346, |
|
"learning_rate": 2.7275750776407568e-06, |
|
"loss": 0.6028, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 3.4603174603174605, |
|
"grad_norm": 0.4717225483417324, |
|
"learning_rate": 2.6782746356614364e-06, |
|
"loss": 0.5843, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.4761904761904763, |
|
"grad_norm": 0.5642208112077767, |
|
"learning_rate": 2.6292603038290306e-06, |
|
"loss": 0.7137, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.492063492063492, |
|
"grad_norm": 0.49853539661605933, |
|
"learning_rate": 2.580538122262627e-06, |
|
"loss": 0.6527, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.507936507936508, |
|
"grad_norm": 0.5784669157284802, |
|
"learning_rate": 2.532114095079137e-06, |
|
"loss": 0.5754, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 3.5238095238095237, |
|
"grad_norm": 0.451433112279235, |
|
"learning_rate": 2.4839941896534027e-06, |
|
"loss": 0.6267, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 3.5396825396825395, |
|
"grad_norm": 0.43076481074733924, |
|
"learning_rate": 2.4361843358828287e-06, |
|
"loss": 0.5873, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 0.5555243876307536, |
|
"learning_rate": 2.388690425456629e-06, |
|
"loss": 0.7168, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 0.5140731956812183, |
|
"learning_rate": 2.341518311129781e-06, |
|
"loss": 0.5671, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.5873015873015874, |
|
"grad_norm": 0.4829910245643812, |
|
"learning_rate": 2.2946738060017947e-06, |
|
"loss": 0.708, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 3.6031746031746033, |
|
"grad_norm": 0.4822066553569954, |
|
"learning_rate": 2.24816268280033e-06, |
|
"loss": 0.653, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 3.619047619047619, |
|
"grad_norm": 0.47281479364796025, |
|
"learning_rate": 2.2019906731698337e-06, |
|
"loss": 0.6494, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.634920634920635, |
|
"grad_norm": 0.40568931340881786, |
|
"learning_rate": 2.156163466965218e-06, |
|
"loss": 0.6293, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 3.6507936507936507, |
|
"grad_norm": 0.8300833772536724, |
|
"learning_rate": 2.110686711550678e-06, |
|
"loss": 0.7077, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.6666666666666665, |
|
"grad_norm": 0.4061549756977283, |
|
"learning_rate": 2.0655660111037685e-06, |
|
"loss": 0.6339, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.682539682539683, |
|
"grad_norm": 0.4255055033708699, |
|
"learning_rate": 2.0208069259247866e-06, |
|
"loss": 0.6577, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 3.6984126984126986, |
|
"grad_norm": 0.43647553702799224, |
|
"learning_rate": 1.976414971751568e-06, |
|
"loss": 0.6029, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 3.7142857142857144, |
|
"grad_norm": 0.4631401287763674, |
|
"learning_rate": 1.932395619079771e-06, |
|
"loss": 0.6777, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.7301587301587302, |
|
"grad_norm": 0.4306497798982157, |
|
"learning_rate": 1.8887542924887486e-06, |
|
"loss": 0.6996, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.746031746031746, |
|
"grad_norm": 0.42608290640248714, |
|
"learning_rate": 1.8454963699730471e-06, |
|
"loss": 0.6727, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 3.761904761904762, |
|
"grad_norm": 0.43950627489492866, |
|
"learning_rate": 1.802627182279687e-06, |
|
"loss": 0.5927, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 3.7777777777777777, |
|
"grad_norm": 0.8150329270991283, |
|
"learning_rate": 1.760152012251241e-06, |
|
"loss": 0.5039, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 3.7936507936507935, |
|
"grad_norm": 0.5428442791120296, |
|
"learning_rate": 1.7180760941748132e-06, |
|
"loss": 0.6682, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 3.8095238095238093, |
|
"grad_norm": 0.7967588807434276, |
|
"learning_rate": 1.6764046131370142e-06, |
|
"loss": 0.7302, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.825396825396825, |
|
"grad_norm": 0.4533126401638171, |
|
"learning_rate": 1.6351427043849955e-06, |
|
"loss": 0.6953, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 3.8412698412698414, |
|
"grad_norm": 0.5989253712710739, |
|
"learning_rate": 1.5942954526936217e-06, |
|
"loss": 0.654, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 3.857142857142857, |
|
"grad_norm": 0.5693019253502395, |
|
"learning_rate": 1.5538678917388638e-06, |
|
"loss": 0.6618, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.873015873015873, |
|
"grad_norm": 0.47402524421174175, |
|
"learning_rate": 1.5138650034775004e-06, |
|
"loss": 0.5709, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 0.4074162350507339, |
|
"learning_rate": 1.4742917175331644e-06, |
|
"loss": 0.7154, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.9047619047619047, |
|
"grad_norm": 0.4421839212167129, |
|
"learning_rate": 1.4351529105888735e-06, |
|
"loss": 0.7655, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 3.9206349206349205, |
|
"grad_norm": 0.45643272842877347, |
|
"learning_rate": 1.3964534057860652e-06, |
|
"loss": 0.5952, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 3.9365079365079367, |
|
"grad_norm": 0.4769259453601474, |
|
"learning_rate": 1.3581979721302286e-06, |
|
"loss": 0.6536, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 3.9523809523809526, |
|
"grad_norm": 0.5748113648603245, |
|
"learning_rate": 1.3203913239032074e-06, |
|
"loss": 0.6675, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 3.9682539682539684, |
|
"grad_norm": 0.5055584872741437, |
|
"learning_rate": 1.283038120082268e-06, |
|
"loss": 0.6196, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.984126984126984, |
|
"grad_norm": 0.4492562647446275, |
|
"learning_rate": 1.2461429637659466e-06, |
|
"loss": 0.6194, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.5069425974799285, |
|
"learning_rate": 1.2097104016068146e-06, |
|
"loss": 0.6338, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.015873015873016, |
|
"grad_norm": 0.7325130340591377, |
|
"learning_rate": 1.1737449232511799e-06, |
|
"loss": 0.6351, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 4.031746031746032, |
|
"grad_norm": 0.47508153140792514, |
|
"learning_rate": 1.1382509607858233e-06, |
|
"loss": 0.6771, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 4.0476190476190474, |
|
"grad_norm": 0.42235140407913196, |
|
"learning_rate": 1.1032328881918237e-06, |
|
"loss": 0.6626, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.063492063492063, |
|
"grad_norm": 0.5366768605947194, |
|
"learning_rate": 1.0686950208055486e-06, |
|
"loss": 0.6944, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 4.079365079365079, |
|
"grad_norm": 0.48961048311306354, |
|
"learning_rate": 1.034641614786862e-06, |
|
"loss": 0.6248, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 4.095238095238095, |
|
"grad_norm": 0.6474817812230599, |
|
"learning_rate": 1.0010768665946309e-06, |
|
"loss": 0.6044, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 4.111111111111111, |
|
"grad_norm": 0.5770682040417152, |
|
"learning_rate": 9.680049124695973e-07, |
|
"loss": 0.6343, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 4.1269841269841265, |
|
"grad_norm": 0.5366437759340096, |
|
"learning_rate": 9.35429827924652e-07, |
|
"loss": 0.6539, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.142857142857143, |
|
"grad_norm": 0.6413478244014154, |
|
"learning_rate": 9.033556272426075e-07, |
|
"loss": 0.5768, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 4.158730158730159, |
|
"grad_norm": 0.6609629042151937, |
|
"learning_rate": 8.717862629815099e-07, |
|
"loss": 0.66, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 4.174603174603175, |
|
"grad_norm": 0.4842145086290818, |
|
"learning_rate": 8.407256254875573e-07, |
|
"loss": 0.5534, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 4.190476190476191, |
|
"grad_norm": 0.42750956471439316, |
|
"learning_rate": 8.101775424156888e-07, |
|
"loss": 0.7396, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 4.2063492063492065, |
|
"grad_norm": 0.43019081247965996, |
|
"learning_rate": 7.801457782578947e-07, |
|
"loss": 0.574, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.222222222222222, |
|
"grad_norm": 0.44008885788118995, |
|
"learning_rate": 7.506340338793111e-07, |
|
"loss": 0.6995, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 4.238095238095238, |
|
"grad_norm": 0.46207221181078584, |
|
"learning_rate": 7.216459460621528e-07, |
|
"loss": 0.5681, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 4.253968253968254, |
|
"grad_norm": 0.5615985950866916, |
|
"learning_rate": 6.931850870575563e-07, |
|
"loss": 0.6041, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 4.26984126984127, |
|
"grad_norm": 0.7306441290684199, |
|
"learning_rate": 6.652549641453543e-07, |
|
"loss": 0.652, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.6570180830204069, |
|
"learning_rate": 6.378590192018752e-07, |
|
"loss": 0.5264, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.301587301587301, |
|
"grad_norm": 0.5038100342096954, |
|
"learning_rate": 6.110006282757897e-07, |
|
"loss": 0.6315, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 4.317460317460317, |
|
"grad_norm": 0.49329707190738137, |
|
"learning_rate": 5.846831011720789e-07, |
|
"loss": 0.5652, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 4.333333333333333, |
|
"grad_norm": 0.47592088200670746, |
|
"learning_rate": 5.589096810441574e-07, |
|
"loss": 0.5355, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 4.349206349206349, |
|
"grad_norm": 0.41595101638818205, |
|
"learning_rate": 5.3368354399422e-07, |
|
"loss": 0.6575, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 4.365079365079365, |
|
"grad_norm": 0.4900815198733562, |
|
"learning_rate": 5.090077986818365e-07, |
|
"loss": 0.5854, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.380952380952381, |
|
"grad_norm": 0.5644693062570447, |
|
"learning_rate": 4.848854859408731e-07, |
|
"loss": 0.6625, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 4.396825396825397, |
|
"grad_norm": 0.47377982529710666, |
|
"learning_rate": 4.613195784047653e-07, |
|
"loss": 0.596, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 4.412698412698413, |
|
"grad_norm": 0.4584447745975199, |
|
"learning_rate": 4.3831298014019144e-07, |
|
"loss": 0.6383, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 4.428571428571429, |
|
"grad_norm": 0.4565322356771251, |
|
"learning_rate": 4.1586852628920095e-07, |
|
"loss": 0.5529, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 0.47404711132247973, |
|
"learning_rate": 3.939889827198362e-07, |
|
"loss": 0.4942, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.4603174603174605, |
|
"grad_norm": 0.45271106934439176, |
|
"learning_rate": 3.7267704568529015e-07, |
|
"loss": 0.5257, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 4.476190476190476, |
|
"grad_norm": 0.6723472247553434, |
|
"learning_rate": 3.519353414916404e-07, |
|
"loss": 0.6823, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 4.492063492063492, |
|
"grad_norm": 0.4048181921028701, |
|
"learning_rate": 3.3176642617420817e-07, |
|
"loss": 0.6414, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 4.507936507936508, |
|
"grad_norm": 0.505253004014245, |
|
"learning_rate": 3.1217278518256844e-07, |
|
"loss": 0.6771, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 4.523809523809524, |
|
"grad_norm": 0.43581438187229343, |
|
"learning_rate": 2.93156833074269e-07, |
|
"loss": 0.7079, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.5396825396825395, |
|
"grad_norm": 0.4401525579247126, |
|
"learning_rate": 2.7472091321728067e-07, |
|
"loss": 0.5173, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 4.555555555555555, |
|
"grad_norm": 0.5337113220659214, |
|
"learning_rate": 2.568672975012154e-07, |
|
"loss": 0.5528, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 0.49778493285427783, |
|
"learning_rate": 2.3959818605736095e-07, |
|
"loss": 0.5663, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.587301587301587, |
|
"grad_norm": 0.40499062553885884, |
|
"learning_rate": 2.229157069875537e-07, |
|
"loss": 0.6321, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 4.603174603174603, |
|
"grad_norm": 0.4127656173064128, |
|
"learning_rate": 2.068219161019297e-07, |
|
"loss": 0.4887, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.619047619047619, |
|
"grad_norm": 0.5639846795868625, |
|
"learning_rate": 1.9131879666558385e-07, |
|
"loss": 0.6286, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.634920634920634, |
|
"grad_norm": 0.5278765172669099, |
|
"learning_rate": 1.7640825915416994e-07, |
|
"loss": 0.5375, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 4.650793650793651, |
|
"grad_norm": 0.39500499979498543, |
|
"learning_rate": 1.6209214101846394e-07, |
|
"loss": 0.5712, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 0.4763017000664341, |
|
"learning_rate": 1.4837220645793905e-07, |
|
"loss": 0.6848, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 4.682539682539683, |
|
"grad_norm": 0.44371008674048357, |
|
"learning_rate": 1.3525014620335786e-07, |
|
"loss": 0.671, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.698412698412699, |
|
"grad_norm": 0.48437863773460277, |
|
"learning_rate": 1.2272757730841744e-07, |
|
"loss": 0.7189, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 4.714285714285714, |
|
"grad_norm": 0.466188982528453, |
|
"learning_rate": 1.1080604295048203e-07, |
|
"loss": 0.5655, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 4.73015873015873, |
|
"grad_norm": 0.5243644807437886, |
|
"learning_rate": 9.948701224041124e-08, |
|
"loss": 0.678, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 4.746031746031746, |
|
"grad_norm": 0.41383827022733516, |
|
"learning_rate": 8.877188004152104e-08, |
|
"loss": 0.677, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 4.761904761904762, |
|
"grad_norm": 0.4845407723856424, |
|
"learning_rate": 7.866196679768956e-08, |
|
"loss": 0.5844, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.777777777777778, |
|
"grad_norm": 0.4125199836030617, |
|
"learning_rate": 6.91585183706428e-08, |
|
"loss": 0.5936, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 4.7936507936507935, |
|
"grad_norm": 0.42044450878999223, |
|
"learning_rate": 6.02627058864158e-08, |
|
"loss": 0.6322, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 4.809523809523809, |
|
"grad_norm": 0.43796427028494905, |
|
"learning_rate": 5.19756255910403e-08, |
|
"loss": 0.6218, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 4.825396825396825, |
|
"grad_norm": 0.4943567439631721, |
|
"learning_rate": 4.429829871545055e-08, |
|
"loss": 0.6154, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 4.841269841269841, |
|
"grad_norm": 0.4041231594209634, |
|
"learning_rate": 3.7231671349634015e-08, |
|
"loss": 0.5363, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"grad_norm": 0.4244498098386566, |
|
"learning_rate": 3.077661432604184e-08, |
|
"loss": 0.5705, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 4.8730158730158735, |
|
"grad_norm": 0.4028466383216641, |
|
"learning_rate": 2.4933923112279712e-08, |
|
"loss": 0.6748, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"grad_norm": 0.4337309649014186, |
|
"learning_rate": 1.9704317713076236e-08, |
|
"loss": 0.7078, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 4.904761904761905, |
|
"grad_norm": 0.4603602645522074, |
|
"learning_rate": 1.508844258155728e-08, |
|
"loss": 0.6406, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 4.920634920634921, |
|
"grad_norm": 0.408641845898798, |
|
"learning_rate": 1.1086866539830044e-08, |
|
"loss": 0.6818, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.936507936507937, |
|
"grad_norm": 0.4380910149550681, |
|
"learning_rate": 7.700082708883006e-09, |
|
"loss": 0.6386, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 4.9523809523809526, |
|
"grad_norm": 0.4375524382908019, |
|
"learning_rate": 4.928508447821223e-09, |
|
"loss": 0.6198, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 4.968253968253968, |
|
"grad_norm": 0.4660304819447141, |
|
"learning_rate": 2.7724853024324594e-09, |
|
"loss": 0.5728, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 4.984126984126984, |
|
"grad_norm": 0.5605867904986561, |
|
"learning_rate": 1.2322789630997422e-09, |
|
"loss": 0.6566, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.47483560805428604, |
|
"learning_rate": 3.080792320564463e-10, |
|
"loss": 0.5612, |
|
"step": 315 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 78480301031424.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|