|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4994054696789536, |
|
"eval_steps": 105, |
|
"global_step": 210, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0023781212841854932, |
|
"grad_norm": 32.382706174573094, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 2.9481, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023781212841854932, |
|
"eval_loss": 3.3739373683929443, |
|
"eval_runtime": 152.3599, |
|
"eval_samples_per_second": 4.194, |
|
"eval_steps_per_second": 0.525, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0047562425683709865, |
|
"grad_norm": 36.523706488132554, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.8267, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007134363852556481, |
|
"grad_norm": 30.823955989385283, |
|
"learning_rate": 1.5e-06, |
|
"loss": 2.9936, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009512485136741973, |
|
"grad_norm": 27.113313789497223, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.8945, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011890606420927468, |
|
"grad_norm": 13.505949871248614, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.7689, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.014268727705112961, |
|
"grad_norm": 15.0338641814596, |
|
"learning_rate": 3e-06, |
|
"loss": 2.6611, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.016646848989298454, |
|
"grad_norm": 14.319329036066911, |
|
"learning_rate": 3.5e-06, |
|
"loss": 2.8985, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.019024970273483946, |
|
"grad_norm": 14.23176344511718, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.834, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02140309155766944, |
|
"grad_norm": 13.977227656063016, |
|
"learning_rate": 4.5e-06, |
|
"loss": 2.7566, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.023781212841854936, |
|
"grad_norm": 11.952136262918579, |
|
"learning_rate": 5e-06, |
|
"loss": 2.5198, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.026159334126040427, |
|
"grad_norm": 8.093250140015975, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 2.6486, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.028537455410225922, |
|
"grad_norm": 4.914080620863233, |
|
"learning_rate": 6e-06, |
|
"loss": 2.4014, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.030915576694411414, |
|
"grad_norm": 4.175963213321, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 2.3925, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03329369797859691, |
|
"grad_norm": 4.098569808536842, |
|
"learning_rate": 7e-06, |
|
"loss": 2.5433, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0356718192627824, |
|
"grad_norm": 5.46644902034287, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.5083, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03804994054696789, |
|
"grad_norm": 5.787961659158823, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.6687, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04042806183115339, |
|
"grad_norm": 3.034638828158752, |
|
"learning_rate": 8.5e-06, |
|
"loss": 2.4193, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04280618311533888, |
|
"grad_norm": 3.6371355055713352, |
|
"learning_rate": 9e-06, |
|
"loss": 2.3729, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04518430439952437, |
|
"grad_norm": 2.1215238627086714, |
|
"learning_rate": 9.5e-06, |
|
"loss": 2.4799, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04756242568370987, |
|
"grad_norm": 3.987132196757572, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3981, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04994054696789536, |
|
"grad_norm": 1.8643899752237216, |
|
"learning_rate": 1.0500000000000001e-05, |
|
"loss": 2.6625, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.052318668252080855, |
|
"grad_norm": 1.9762840454371524, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 2.4424, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.054696789536266346, |
|
"grad_norm": 1.6430481169991258, |
|
"learning_rate": 1.15e-05, |
|
"loss": 2.3021, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.057074910820451845, |
|
"grad_norm": 2.539281280765958, |
|
"learning_rate": 1.2e-05, |
|
"loss": 2.5471, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.059453032104637336, |
|
"grad_norm": 1.457528590578011, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.2526, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06183115338882283, |
|
"grad_norm": 2.1155531281068884, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 2.8667, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06420927467300833, |
|
"grad_norm": 1.73342938568033, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 2.4915, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06658739595719382, |
|
"grad_norm": 1.7317610327219517, |
|
"learning_rate": 1.4e-05, |
|
"loss": 2.498, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06896551724137931, |
|
"grad_norm": 1.8363122567034704, |
|
"learning_rate": 1.45e-05, |
|
"loss": 2.4243, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0713436385255648, |
|
"grad_norm": 1.6309580804999364, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 2.1101, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07372175980975029, |
|
"grad_norm": 1.334654998985815, |
|
"learning_rate": 1.55e-05, |
|
"loss": 2.1046, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07609988109393578, |
|
"grad_norm": 1.2376006409087055, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.4625, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07847800237812129, |
|
"grad_norm": 1.8515124675338408, |
|
"learning_rate": 1.65e-05, |
|
"loss": 2.3255, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08085612366230678, |
|
"grad_norm": 2.4630792631286575, |
|
"learning_rate": 1.7e-05, |
|
"loss": 2.3863, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08323424494649227, |
|
"grad_norm": 1.4437945302508726, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 2.6266, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08561236623067776, |
|
"grad_norm": 1.6987676732458694, |
|
"learning_rate": 1.8e-05, |
|
"loss": 2.2871, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08799048751486326, |
|
"grad_norm": 1.8155955386416776, |
|
"learning_rate": 1.8500000000000002e-05, |
|
"loss": 2.2534, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09036860879904875, |
|
"grad_norm": 1.1727394458786922, |
|
"learning_rate": 1.9e-05, |
|
"loss": 2.3458, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09274673008323424, |
|
"grad_norm": 1.4758138327709573, |
|
"learning_rate": 1.95e-05, |
|
"loss": 2.4411, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09512485136741974, |
|
"grad_norm": 2.683368502920404, |
|
"learning_rate": 2e-05, |
|
"loss": 2.5046, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09750297265160524, |
|
"grad_norm": 1.6044285552596205, |
|
"learning_rate": 1.9999981652287733e-05, |
|
"loss": 2.413, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09988109393579073, |
|
"grad_norm": 1.2802177698200177, |
|
"learning_rate": 1.999992660921826e-05, |
|
"loss": 2.2159, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10225921521997622, |
|
"grad_norm": 1.8164033444979393, |
|
"learning_rate": 1.999983487099356e-05, |
|
"loss": 2.5287, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10463733650416171, |
|
"grad_norm": 1.8420609096797744, |
|
"learning_rate": 1.999970643795027e-05, |
|
"loss": 2.3484, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1070154577883472, |
|
"grad_norm": 1.4183887604989038, |
|
"learning_rate": 1.9999541310559686e-05, |
|
"loss": 2.4876, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10939357907253269, |
|
"grad_norm": 1.419110897512839, |
|
"learning_rate": 1.9999339489427746e-05, |
|
"loss": 2.3251, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1117717003567182, |
|
"grad_norm": 1.6548987738436802, |
|
"learning_rate": 1.9999100975295046e-05, |
|
"loss": 2.358, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11414982164090369, |
|
"grad_norm": 1.7822053646902, |
|
"learning_rate": 1.999882576903682e-05, |
|
"loss": 2.5853, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11652794292508918, |
|
"grad_norm": 1.1885208188962406, |
|
"learning_rate": 1.9998513871662945e-05, |
|
"loss": 2.3794, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11890606420927467, |
|
"grad_norm": 1.7227839639402496, |
|
"learning_rate": 1.9998165284317944e-05, |
|
"loss": 2.1933, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12128418549346016, |
|
"grad_norm": 1.3654158039316031, |
|
"learning_rate": 1.999778000828098e-05, |
|
"loss": 2.0518, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12366230677764566, |
|
"grad_norm": 1.3281085763249942, |
|
"learning_rate": 1.9997358044965833e-05, |
|
"loss": 2.4352, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.12604042806183116, |
|
"grad_norm": 1.1185472935459022, |
|
"learning_rate": 1.9996899395920915e-05, |
|
"loss": 2.4415, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12841854934601665, |
|
"grad_norm": 1.343325378951958, |
|
"learning_rate": 1.999640406282926e-05, |
|
"loss": 2.2661, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13079667063020214, |
|
"grad_norm": 1.371349840462257, |
|
"learning_rate": 1.9995872047508516e-05, |
|
"loss": 2.2813, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13317479191438764, |
|
"grad_norm": 1.2560078627168356, |
|
"learning_rate": 1.9995303351910934e-05, |
|
"loss": 2.494, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.13555291319857313, |
|
"grad_norm": 1.3801141775649166, |
|
"learning_rate": 1.9994697978123363e-05, |
|
"loss": 2.1525, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 2.8895107871445167, |
|
"learning_rate": 1.9994055928367256e-05, |
|
"loss": 2.6727, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1403091557669441, |
|
"grad_norm": 1.231028014153545, |
|
"learning_rate": 1.999337720499863e-05, |
|
"loss": 2.3854, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1426872770511296, |
|
"grad_norm": 1.5588938179669447, |
|
"learning_rate": 1.99926618105081e-05, |
|
"loss": 2.033, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1450653983353151, |
|
"grad_norm": 1.1690094119529322, |
|
"learning_rate": 1.9991909747520835e-05, |
|
"loss": 2.1, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14744351961950058, |
|
"grad_norm": 1.5407871708756533, |
|
"learning_rate": 1.999112101879656e-05, |
|
"loss": 2.3949, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14982164090368608, |
|
"grad_norm": 1.0249789290696483, |
|
"learning_rate": 1.9990295627229544e-05, |
|
"loss": 2.463, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15219976218787157, |
|
"grad_norm": 1.5221618005694364, |
|
"learning_rate": 1.99894335758486e-05, |
|
"loss": 2.0575, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1545778834720571, |
|
"grad_norm": 1.0758079202744963, |
|
"learning_rate": 1.9988534867817065e-05, |
|
"loss": 2.0227, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15695600475624258, |
|
"grad_norm": 1.3430324490071632, |
|
"learning_rate": 1.9987599506432785e-05, |
|
"loss": 2.467, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15933412604042807, |
|
"grad_norm": 1.0714364507553935, |
|
"learning_rate": 1.9986627495128105e-05, |
|
"loss": 2.3812, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16171224732461356, |
|
"grad_norm": 1.296279438065536, |
|
"learning_rate": 1.9985618837469864e-05, |
|
"loss": 2.4045, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16409036860879905, |
|
"grad_norm": 1.1423020273292732, |
|
"learning_rate": 1.998457353715938e-05, |
|
"loss": 2.386, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.16646848989298454, |
|
"grad_norm": 1.5165435155836928, |
|
"learning_rate": 1.998349159803241e-05, |
|
"loss": 2.3063, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16884661117717004, |
|
"grad_norm": 2.758568140385349, |
|
"learning_rate": 1.9982373024059195e-05, |
|
"loss": 2.4007, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.17122473246135553, |
|
"grad_norm": 1.268753870102164, |
|
"learning_rate": 1.998121781934438e-05, |
|
"loss": 2.1938, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17360285374554102, |
|
"grad_norm": 1.1116353498200209, |
|
"learning_rate": 1.9980025988127037e-05, |
|
"loss": 2.1202, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1759809750297265, |
|
"grad_norm": 1.266229091733924, |
|
"learning_rate": 1.9978797534780646e-05, |
|
"loss": 2.4397, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.178359096313912, |
|
"grad_norm": 1.2442419764935988, |
|
"learning_rate": 1.9977532463813064e-05, |
|
"loss": 2.4345, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1807372175980975, |
|
"grad_norm": 1.0991664177678122, |
|
"learning_rate": 1.9976230779866527e-05, |
|
"loss": 2.3604, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.18311533888228299, |
|
"grad_norm": 1.4648399555465317, |
|
"learning_rate": 1.9974892487717613e-05, |
|
"loss": 2.4796, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.18549346016646848, |
|
"grad_norm": 1.4075214014031838, |
|
"learning_rate": 1.997351759227725e-05, |
|
"loss": 2.4604, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.187871581450654, |
|
"grad_norm": 1.1658962187610111, |
|
"learning_rate": 1.9972106098590665e-05, |
|
"loss": 2.3639, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1902497027348395, |
|
"grad_norm": 1.0920203834125308, |
|
"learning_rate": 1.9970658011837404e-05, |
|
"loss": 2.1962, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19262782401902498, |
|
"grad_norm": 1.0547465850317588, |
|
"learning_rate": 1.9969173337331283e-05, |
|
"loss": 2.2381, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.19500594530321047, |
|
"grad_norm": 0.9700896578179515, |
|
"learning_rate": 1.996765208052037e-05, |
|
"loss": 1.9818, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.19738406658739596, |
|
"grad_norm": 1.3721631541122385, |
|
"learning_rate": 1.9966094246986983e-05, |
|
"loss": 2.1842, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.19976218787158145, |
|
"grad_norm": 1.1909292893353944, |
|
"learning_rate": 1.9964499842447665e-05, |
|
"loss": 2.5704, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.20214030915576695, |
|
"grad_norm": 1.0845849173658786, |
|
"learning_rate": 1.9962868872753144e-05, |
|
"loss": 2.1158, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.20451843043995244, |
|
"grad_norm": 1.1512213319968665, |
|
"learning_rate": 1.996120134388834e-05, |
|
"loss": 2.3564, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.20689655172413793, |
|
"grad_norm": 1.1653706380489515, |
|
"learning_rate": 1.995949726197231e-05, |
|
"loss": 2.4664, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.20927467300832342, |
|
"grad_norm": 1.2375083589347724, |
|
"learning_rate": 1.9957756633258264e-05, |
|
"loss": 2.2763, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2116527942925089, |
|
"grad_norm": 1.180995352120129, |
|
"learning_rate": 1.9955979464133515e-05, |
|
"loss": 2.28, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2140309155766944, |
|
"grad_norm": 1.0060015116786936, |
|
"learning_rate": 1.995416576111945e-05, |
|
"loss": 2.0939, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2164090368608799, |
|
"grad_norm": 1.123230557994905, |
|
"learning_rate": 1.9952315530871537e-05, |
|
"loss": 2.2575, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.21878715814506539, |
|
"grad_norm": 1.033250223812201, |
|
"learning_rate": 1.9950428780179274e-05, |
|
"loss": 2.2192, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2211652794292509, |
|
"grad_norm": 1.01496288741104, |
|
"learning_rate": 1.994850551596617e-05, |
|
"loss": 2.3692, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2235434007134364, |
|
"grad_norm": 1.2509512883238079, |
|
"learning_rate": 1.9946545745289727e-05, |
|
"loss": 2.5349, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2259215219976219, |
|
"grad_norm": 1.1631889518067213, |
|
"learning_rate": 1.9944549475341404e-05, |
|
"loss": 2.2335, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.22829964328180738, |
|
"grad_norm": 1.0176487393302203, |
|
"learning_rate": 1.99425167134466e-05, |
|
"loss": 2.325, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.23067776456599287, |
|
"grad_norm": 1.0766159235170416, |
|
"learning_rate": 1.9940447467064624e-05, |
|
"loss": 2.4656, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.23305588585017836, |
|
"grad_norm": 1.1250639831138038, |
|
"learning_rate": 1.9938341743788658e-05, |
|
"loss": 2.1741, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.23543400713436385, |
|
"grad_norm": 1.1024674310720775, |
|
"learning_rate": 1.9936199551345744e-05, |
|
"loss": 2.1336, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.23781212841854935, |
|
"grad_norm": 1.2527734635640946, |
|
"learning_rate": 1.9934020897596752e-05, |
|
"loss": 2.2741, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24019024970273484, |
|
"grad_norm": 0.9699919156060421, |
|
"learning_rate": 1.9931805790536342e-05, |
|
"loss": 2.2369, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.24256837098692033, |
|
"grad_norm": 1.1834325873202396, |
|
"learning_rate": 1.9929554238292944e-05, |
|
"loss": 2.0419, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.24494649227110582, |
|
"grad_norm": 1.1078024399344104, |
|
"learning_rate": 1.992726624912872e-05, |
|
"loss": 2.4991, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2473246135552913, |
|
"grad_norm": 1.1465140647519878, |
|
"learning_rate": 1.992494183143955e-05, |
|
"loss": 2.6218, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2497027348394768, |
|
"grad_norm": 1.143333452765713, |
|
"learning_rate": 1.9922580993754985e-05, |
|
"loss": 2.3428, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2497027348394768, |
|
"eval_loss": 2.499577760696411, |
|
"eval_runtime": 151.1827, |
|
"eval_samples_per_second": 4.227, |
|
"eval_steps_per_second": 0.529, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2520808561236623, |
|
"grad_norm": 1.0672145334944887, |
|
"learning_rate": 1.9920183744738208e-05, |
|
"loss": 2.3485, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2544589774078478, |
|
"grad_norm": 1.0044582600402383, |
|
"learning_rate": 1.9917750093186036e-05, |
|
"loss": 2.1678, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2568370986920333, |
|
"grad_norm": 1.1029447003396373, |
|
"learning_rate": 1.9915280048028853e-05, |
|
"loss": 2.3967, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.25921521997621877, |
|
"grad_norm": 1.260079077740416, |
|
"learning_rate": 1.9912773618330595e-05, |
|
"loss": 2.4385, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2615933412604043, |
|
"grad_norm": 1.0846663572847435, |
|
"learning_rate": 1.9910230813288713e-05, |
|
"loss": 2.1431, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26397146254458975, |
|
"grad_norm": 0.991674367348856, |
|
"learning_rate": 1.9907651642234138e-05, |
|
"loss": 1.9523, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.26634958382877527, |
|
"grad_norm": 1.105394178981242, |
|
"learning_rate": 1.9905036114631247e-05, |
|
"loss": 2.3063, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.26872770511296074, |
|
"grad_norm": 0.9979450972746343, |
|
"learning_rate": 1.990238424007783e-05, |
|
"loss": 2.2218, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.27110582639714625, |
|
"grad_norm": 1.2796796522419402, |
|
"learning_rate": 1.989969602830505e-05, |
|
"loss": 2.2271, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2734839476813318, |
|
"grad_norm": 1.1720433923322253, |
|
"learning_rate": 1.9896971489177417e-05, |
|
"loss": 2.2736, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 1.0744949490065667, |
|
"learning_rate": 1.9894210632692745e-05, |
|
"loss": 2.1864, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.27824019024970276, |
|
"grad_norm": 1.9078078540875143, |
|
"learning_rate": 1.9891413468982112e-05, |
|
"loss": 2.2678, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2806183115338882, |
|
"grad_norm": 1.0427118571864202, |
|
"learning_rate": 1.988858000830983e-05, |
|
"loss": 2.1757, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.28299643281807374, |
|
"grad_norm": 1.8597307152991636, |
|
"learning_rate": 1.9885710261073402e-05, |
|
"loss": 2.2913, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2853745541022592, |
|
"grad_norm": 1.0401590654407316, |
|
"learning_rate": 1.9882804237803487e-05, |
|
"loss": 2.0629, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2877526753864447, |
|
"grad_norm": 1.5079024059776849, |
|
"learning_rate": 1.9879861949163863e-05, |
|
"loss": 2.1827, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2901307966706302, |
|
"grad_norm": 1.1499157717530648, |
|
"learning_rate": 1.9876883405951378e-05, |
|
"loss": 2.2959, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2925089179548157, |
|
"grad_norm": 1.030399166377001, |
|
"learning_rate": 1.987386861909593e-05, |
|
"loss": 2.279, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.29488703923900117, |
|
"grad_norm": 1.062349469745721, |
|
"learning_rate": 1.98708175996604e-05, |
|
"loss": 2.2865, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2972651605231867, |
|
"grad_norm": 1.0853089306877393, |
|
"learning_rate": 1.986773035884064e-05, |
|
"loss": 2.332, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.29964328180737215, |
|
"grad_norm": 1.0066223498076698, |
|
"learning_rate": 1.9864606907965407e-05, |
|
"loss": 2.3971, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.30202140309155767, |
|
"grad_norm": 1.101756427229776, |
|
"learning_rate": 1.986144725849634e-05, |
|
"loss": 2.2862, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.30439952437574314, |
|
"grad_norm": 1.1264625682995106, |
|
"learning_rate": 1.9858251422027903e-05, |
|
"loss": 2.0974, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.30677764565992865, |
|
"grad_norm": 1.0012622487091893, |
|
"learning_rate": 1.9855019410287355e-05, |
|
"loss": 2.2648, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3091557669441142, |
|
"grad_norm": 1.0030908712085922, |
|
"learning_rate": 1.98517512351347e-05, |
|
"loss": 2.2626, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.31153388822829964, |
|
"grad_norm": 1.1376149636856583, |
|
"learning_rate": 1.9848446908562647e-05, |
|
"loss": 2.2315, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.31391200951248516, |
|
"grad_norm": 0.9616294646184662, |
|
"learning_rate": 1.9845106442696563e-05, |
|
"loss": 2.4033, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3162901307966706, |
|
"grad_norm": 1.3402719458865533, |
|
"learning_rate": 1.9841729849794427e-05, |
|
"loss": 2.4429, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.31866825208085614, |
|
"grad_norm": 1.3575140959134089, |
|
"learning_rate": 1.983831714224679e-05, |
|
"loss": 2.291, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3210463733650416, |
|
"grad_norm": 0.9521047622766085, |
|
"learning_rate": 1.9834868332576727e-05, |
|
"loss": 2.2754, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3234244946492271, |
|
"grad_norm": 1.0875381930222732, |
|
"learning_rate": 1.9831383433439798e-05, |
|
"loss": 2.1469, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3258026159334126, |
|
"grad_norm": 1.0257090212605473, |
|
"learning_rate": 1.982786245762398e-05, |
|
"loss": 2.0848, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3281807372175981, |
|
"grad_norm": 1.0372156134974286, |
|
"learning_rate": 1.9824305418049645e-05, |
|
"loss": 2.4043, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.33055885850178357, |
|
"grad_norm": 0.9832029689246473, |
|
"learning_rate": 1.9820712327769503e-05, |
|
"loss": 2.177, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3329369797859691, |
|
"grad_norm": 1.0472107045649877, |
|
"learning_rate": 1.9817083199968552e-05, |
|
"loss": 2.3309, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.33531510107015455, |
|
"grad_norm": 1.0277879008926316, |
|
"learning_rate": 1.9813418047964025e-05, |
|
"loss": 2.1389, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3376932223543401, |
|
"grad_norm": 1.15382327979194, |
|
"learning_rate": 1.9809716885205363e-05, |
|
"loss": 2.3254, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3400713436385256, |
|
"grad_norm": 0.9933827586398313, |
|
"learning_rate": 1.980597972527413e-05, |
|
"loss": 2.2454, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.34244946492271106, |
|
"grad_norm": 0.9637942573486198, |
|
"learning_rate": 1.9802206581883992e-05, |
|
"loss": 2.1945, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 0.918837452187297, |
|
"learning_rate": 1.979839746888067e-05, |
|
"loss": 2.1599, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.34720570749108204, |
|
"grad_norm": 0.962272148586432, |
|
"learning_rate": 1.979455240024186e-05, |
|
"loss": 2.1504, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.34958382877526756, |
|
"grad_norm": 1.0902104459187203, |
|
"learning_rate": 1.97906713900772e-05, |
|
"loss": 2.1671, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.351961950059453, |
|
"grad_norm": 0.9528336584077055, |
|
"learning_rate": 1.9786754452628226e-05, |
|
"loss": 2.113, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.35434007134363854, |
|
"grad_norm": 0.9216300894356183, |
|
"learning_rate": 1.9782801602268306e-05, |
|
"loss": 1.9271, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.356718192627824, |
|
"grad_norm": 0.9148117236258577, |
|
"learning_rate": 1.9778812853502592e-05, |
|
"loss": 2.0221, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3590963139120095, |
|
"grad_norm": 0.9737793407143137, |
|
"learning_rate": 1.9774788220967968e-05, |
|
"loss": 2.2007, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.361474435196195, |
|
"grad_norm": 0.8979472123958888, |
|
"learning_rate": 1.9770727719432994e-05, |
|
"loss": 2.1295, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3638525564803805, |
|
"grad_norm": 1.0391719811646754, |
|
"learning_rate": 1.9766631363797852e-05, |
|
"loss": 2.2383, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.36623067776456597, |
|
"grad_norm": 1.0898893562871452, |
|
"learning_rate": 1.9762499169094288e-05, |
|
"loss": 2.085, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3686087990487515, |
|
"grad_norm": 2.9046586074281686, |
|
"learning_rate": 1.9758331150485576e-05, |
|
"loss": 2.2903, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.37098692033293695, |
|
"grad_norm": 1.0438135264691892, |
|
"learning_rate": 1.9754127323266426e-05, |
|
"loss": 2.349, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3733650416171225, |
|
"grad_norm": 1.2082790526216014, |
|
"learning_rate": 1.9749887702862972e-05, |
|
"loss": 2.2182, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.375743162901308, |
|
"grad_norm": 1.2415740026323197, |
|
"learning_rate": 1.9745612304832672e-05, |
|
"loss": 2.4834, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.37812128418549346, |
|
"grad_norm": 0.9901876124346225, |
|
"learning_rate": 1.9741301144864284e-05, |
|
"loss": 2.2873, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.380499405469679, |
|
"grad_norm": 1.1185971951047096, |
|
"learning_rate": 1.9736954238777793e-05, |
|
"loss": 2.2114, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38287752675386444, |
|
"grad_norm": 1.0186645068648283, |
|
"learning_rate": 1.9732571602524353e-05, |
|
"loss": 2.3323, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.38525564803804996, |
|
"grad_norm": 0.9856339888297305, |
|
"learning_rate": 1.972815325218624e-05, |
|
"loss": 2.2638, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3876337693222354, |
|
"grad_norm": 1.287711819624049, |
|
"learning_rate": 1.9723699203976768e-05, |
|
"loss": 2.3897, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.39001189060642094, |
|
"grad_norm": 0.9474533284935532, |
|
"learning_rate": 1.9719209474240263e-05, |
|
"loss": 1.8287, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3923900118906064, |
|
"grad_norm": 1.0505224096035144, |
|
"learning_rate": 1.971468407945198e-05, |
|
"loss": 2.3906, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3947681331747919, |
|
"grad_norm": 0.9322039774829307, |
|
"learning_rate": 1.9710123036218044e-05, |
|
"loss": 2.0246, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3971462544589774, |
|
"grad_norm": 1.1428006052468438, |
|
"learning_rate": 1.97055263612754e-05, |
|
"loss": 2.0085, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3995243757431629, |
|
"grad_norm": 0.9233456322532203, |
|
"learning_rate": 1.9700894071491736e-05, |
|
"loss": 2.0657, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.40190249702734837, |
|
"grad_norm": 1.1387607148614496, |
|
"learning_rate": 1.9696226183865436e-05, |
|
"loss": 2.2507, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4042806183115339, |
|
"grad_norm": 1.0240739510681864, |
|
"learning_rate": 1.969152271552552e-05, |
|
"loss": 2.1685, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.40665873959571935, |
|
"grad_norm": 1.2665975670284688, |
|
"learning_rate": 1.9686783683731557e-05, |
|
"loss": 2.3869, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4090368608799049, |
|
"grad_norm": 1.0148421037850517, |
|
"learning_rate": 1.9682009105873633e-05, |
|
"loss": 2.1379, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4114149821640904, |
|
"grad_norm": 1.0117482642225601, |
|
"learning_rate": 1.9677198999472257e-05, |
|
"loss": 2.1104, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 0.9720066057353862, |
|
"learning_rate": 1.967235338217832e-05, |
|
"loss": 2.2884, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4161712247324614, |
|
"grad_norm": 1.2836956527083296, |
|
"learning_rate": 1.9667472271773026e-05, |
|
"loss": 2.281, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.41854934601664684, |
|
"grad_norm": 0.9738075739171279, |
|
"learning_rate": 1.9662555686167808e-05, |
|
"loss": 2.2039, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.42092746730083236, |
|
"grad_norm": 1.0382703606377657, |
|
"learning_rate": 1.965760364340429e-05, |
|
"loss": 2.1142, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4233055885850178, |
|
"grad_norm": 0.9318167386351257, |
|
"learning_rate": 1.9652616161654204e-05, |
|
"loss": 2.2409, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.42568370986920334, |
|
"grad_norm": 1.3023888510009893, |
|
"learning_rate": 1.9647593259219328e-05, |
|
"loss": 2.1972, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4280618311533888, |
|
"grad_norm": 2.3590946551757583, |
|
"learning_rate": 1.964253495453141e-05, |
|
"loss": 2.1922, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4304399524375743, |
|
"grad_norm": 1.277353180787431, |
|
"learning_rate": 1.963744126615212e-05, |
|
"loss": 2.384, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4328180737217598, |
|
"grad_norm": 0.9912373962254282, |
|
"learning_rate": 1.9632312212772956e-05, |
|
"loss": 2.3005, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.4351961950059453, |
|
"grad_norm": 1.3088391002508755, |
|
"learning_rate": 1.9627147813215207e-05, |
|
"loss": 2.2924, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.43757431629013077, |
|
"grad_norm": 1.0523417119399674, |
|
"learning_rate": 1.9621948086429847e-05, |
|
"loss": 2.2255, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4399524375743163, |
|
"grad_norm": 1.386504728655603, |
|
"learning_rate": 1.9616713051497496e-05, |
|
"loss": 2.1963, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4423305588585018, |
|
"grad_norm": 0.9623064139518317, |
|
"learning_rate": 1.9611442727628344e-05, |
|
"loss": 2.3175, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4447086801426873, |
|
"grad_norm": 1.7130236244329065, |
|
"learning_rate": 1.960613713416206e-05, |
|
"loss": 2.2246, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4470868014268728, |
|
"grad_norm": 1.2183030146426241, |
|
"learning_rate": 1.9600796290567747e-05, |
|
"loss": 2.2345, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.44946492271105826, |
|
"grad_norm": 2.1564007093614497, |
|
"learning_rate": 1.9595420216443864e-05, |
|
"loss": 2.269, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4518430439952438, |
|
"grad_norm": 1.1685306329495788, |
|
"learning_rate": 1.9590008931518133e-05, |
|
"loss": 2.4817, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45422116527942924, |
|
"grad_norm": 1.9314760188762214, |
|
"learning_rate": 1.9584562455647494e-05, |
|
"loss": 2.2482, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.45659928656361476, |
|
"grad_norm": 1.2361103482091276, |
|
"learning_rate": 1.9579080808818035e-05, |
|
"loss": 2.2229, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4589774078478002, |
|
"grad_norm": 1.7230964883799738, |
|
"learning_rate": 1.9573564011144873e-05, |
|
"loss": 2.142, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.46135552913198574, |
|
"grad_norm": 1.2344882256405865, |
|
"learning_rate": 1.9568012082872148e-05, |
|
"loss": 2.0984, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4637336504161712, |
|
"grad_norm": 1.563594408096457, |
|
"learning_rate": 1.9562425044372884e-05, |
|
"loss": 1.9245, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4661117717003567, |
|
"grad_norm": 1.942680386259649, |
|
"learning_rate": 1.9556802916148963e-05, |
|
"loss": 2.2633, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4684898929845422, |
|
"grad_norm": 1.3339191105911707, |
|
"learning_rate": 1.955114571883102e-05, |
|
"loss": 2.1356, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4708680142687277, |
|
"grad_norm": 2.204421390949867, |
|
"learning_rate": 1.9545453473178384e-05, |
|
"loss": 2.2506, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.47324613555291317, |
|
"grad_norm": 1.106826809772669, |
|
"learning_rate": 1.9539726200078987e-05, |
|
"loss": 2.0526, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4756242568370987, |
|
"grad_norm": 2.7022504927488136, |
|
"learning_rate": 1.9533963920549307e-05, |
|
"loss": 2.3654, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4780023781212842, |
|
"grad_norm": 2.20401718688023, |
|
"learning_rate": 1.9528166655734267e-05, |
|
"loss": 2.3523, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4803804994054697, |
|
"grad_norm": 1.7098249314197853, |
|
"learning_rate": 1.9522334426907185e-05, |
|
"loss": 2.0789, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4827586206896552, |
|
"grad_norm": 2.893610442346491, |
|
"learning_rate": 1.951646725546966e-05, |
|
"loss": 2.2456, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.48513674197384066, |
|
"grad_norm": 2.2821472751414937, |
|
"learning_rate": 1.9510565162951538e-05, |
|
"loss": 2.3351, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4875148632580262, |
|
"grad_norm": 1.300863641854685, |
|
"learning_rate": 1.950462817101079e-05, |
|
"loss": 2.3617, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.48989298454221164, |
|
"grad_norm": 1.3899023575579752, |
|
"learning_rate": 1.9498656301433466e-05, |
|
"loss": 2.0628, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.49227110582639716, |
|
"grad_norm": 1.018395556873715, |
|
"learning_rate": 1.9492649576133594e-05, |
|
"loss": 2.2362, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4946492271105826, |
|
"grad_norm": 1.4600451736607643, |
|
"learning_rate": 1.94866080171531e-05, |
|
"loss": 2.2194, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.49702734839476814, |
|
"grad_norm": 4.244045726741938, |
|
"learning_rate": 1.9480531646661753e-05, |
|
"loss": 2.434, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.4994054696789536, |
|
"grad_norm": 1.2125371460079588, |
|
"learning_rate": 1.9474420486957045e-05, |
|
"loss": 2.228, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4994054696789536, |
|
"eval_loss": 2.4567697048187256, |
|
"eval_runtime": 151.5129, |
|
"eval_samples_per_second": 4.217, |
|
"eval_steps_per_second": 0.528, |
|
"step": 210 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1680, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 210, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3027053762772992e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|