|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.18066847335140018, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018066847335140017, |
|
"grad_norm": 2.8401210755443786, |
|
"learning_rate": 0.0, |
|
"loss": 0.8251, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0036133694670280035, |
|
"grad_norm": 2.884596771811998, |
|
"learning_rate": 1.7857142857142857e-06, |
|
"loss": 0.8284, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005420054200542005, |
|
"grad_norm": 2.885426730353446, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 0.8426, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.007226738934056007, |
|
"grad_norm": 2.651910724864975, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 0.8329, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.009033423667570008, |
|
"grad_norm": 2.138099539200911, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 0.8087, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01084010840108401, |
|
"grad_norm": 1.5516804515831568, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.7878, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.012646793134598013, |
|
"grad_norm": 1.3537216088647457, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.76, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.014453477868112014, |
|
"grad_norm": 2.2798033166818095, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.7502, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.016260162601626018, |
|
"grad_norm": 2.4280919600136315, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.7449, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.018066847335140017, |
|
"grad_norm": 1.931081683969551, |
|
"learning_rate": 1.6071428571428572e-05, |
|
"loss": 0.7271, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01987353206865402, |
|
"grad_norm": 2.033957326273334, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.7224, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02168021680216802, |
|
"grad_norm": 1.7440303482576578, |
|
"learning_rate": 1.9642857142857145e-05, |
|
"loss": 0.7373, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.023486901535682024, |
|
"grad_norm": 1.1423427090231095, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.7033, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.025293586269196026, |
|
"grad_norm": 0.9350758811837806, |
|
"learning_rate": 2.3214285714285715e-05, |
|
"loss": 0.6886, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02710027100271003, |
|
"grad_norm": 0.9041529205527444, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6858, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.028906955736224028, |
|
"grad_norm": 0.7774159470583337, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 0.676, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03071364046973803, |
|
"grad_norm": 0.6642849851627606, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.6816, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.032520325203252036, |
|
"grad_norm": 0.6696862181877565, |
|
"learning_rate": 3.0357142857142857e-05, |
|
"loss": 0.6727, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03432700993676603, |
|
"grad_norm": 0.6199596133483118, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 0.6678, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.036133694670280034, |
|
"grad_norm": 0.5724104094717255, |
|
"learning_rate": 3.392857142857143e-05, |
|
"loss": 0.6448, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.037940379403794036, |
|
"grad_norm": 0.5762214978597714, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.6506, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03974706413730804, |
|
"grad_norm": 0.5444644294356963, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.6413, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04155374887082204, |
|
"grad_norm": 0.4801008888914334, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 0.6425, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04336043360433604, |
|
"grad_norm": 0.4780671650041637, |
|
"learning_rate": 4.107142857142857e-05, |
|
"loss": 0.6489, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.045167118337850046, |
|
"grad_norm": 0.5145358853730851, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.6306, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04697380307136405, |
|
"grad_norm": 0.4229563893889767, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.6453, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 0.4021332182222708, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 0.6251, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05058717253839205, |
|
"grad_norm": 0.48615223659558016, |
|
"learning_rate": 4.8214285714285716e-05, |
|
"loss": 0.6272, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.052393857271906055, |
|
"grad_norm": 0.46304419453924084, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6385, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05420054200542006, |
|
"grad_norm": 0.4055914485047887, |
|
"learning_rate": 4.999955240022902e-05, |
|
"loss": 0.6406, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05600722673893405, |
|
"grad_norm": 0.4320919468210144, |
|
"learning_rate": 4.999820961694372e-05, |
|
"loss": 0.6303, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.057813911472448055, |
|
"grad_norm": 0.4042983593828653, |
|
"learning_rate": 4.999597169822646e-05, |
|
"loss": 0.6186, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05962059620596206, |
|
"grad_norm": 0.4310835292518631, |
|
"learning_rate": 4.9992838724212585e-05, |
|
"loss": 0.6287, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06142728093947606, |
|
"grad_norm": 0.412271814827728, |
|
"learning_rate": 4.9988810807087584e-05, |
|
"loss": 0.6165, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.06323396567299007, |
|
"grad_norm": 0.40598837987994935, |
|
"learning_rate": 4.998388809108303e-05, |
|
"loss": 0.622, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06504065040650407, |
|
"grad_norm": 0.4223908220324365, |
|
"learning_rate": 4.997807075247146e-05, |
|
"loss": 0.6189, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06684733514001806, |
|
"grad_norm": 0.38197899028265064, |
|
"learning_rate": 4.997135899956001e-05, |
|
"loss": 0.623, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06865401987353206, |
|
"grad_norm": 0.3885330150920031, |
|
"learning_rate": 4.9963753072683025e-05, |
|
"loss": 0.6164, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07046070460704607, |
|
"grad_norm": 0.41115019726029983, |
|
"learning_rate": 4.9955253244193375e-05, |
|
"loss": 0.6182, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07226738934056007, |
|
"grad_norm": 0.3495037257115665, |
|
"learning_rate": 4.994585981845278e-05, |
|
"loss": 0.6072, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 0.3973831198418077, |
|
"learning_rate": 4.9935573131820854e-05, |
|
"loss": 0.6137, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07588075880758807, |
|
"grad_norm": 0.3339390206679997, |
|
"learning_rate": 4.9924393552643075e-05, |
|
"loss": 0.6064, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07768744354110207, |
|
"grad_norm": 0.3384678373992373, |
|
"learning_rate": 4.991232148123761e-05, |
|
"loss": 0.606, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07949412827461608, |
|
"grad_norm": 0.3764492630812678, |
|
"learning_rate": 4.989935734988098e-05, |
|
"loss": 0.601, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08130081300813008, |
|
"grad_norm": 0.35707353533847597, |
|
"learning_rate": 4.988550162279255e-05, |
|
"loss": 0.6097, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08310749774164408, |
|
"grad_norm": 0.34040533355863656, |
|
"learning_rate": 4.987075479611796e-05, |
|
"loss": 0.6094, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.08491418247515808, |
|
"grad_norm": 0.34496537693497575, |
|
"learning_rate": 4.985511739791129e-05, |
|
"loss": 0.6129, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.08672086720867209, |
|
"grad_norm": 0.3655410266089117, |
|
"learning_rate": 4.983858998811622e-05, |
|
"loss": 0.5994, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08852755194218609, |
|
"grad_norm": 0.32151319632227454, |
|
"learning_rate": 4.9821173158545936e-05, |
|
"loss": 0.607, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09033423667570009, |
|
"grad_norm": 0.3573869377855791, |
|
"learning_rate": 4.980286753286195e-05, |
|
"loss": 0.6067, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0921409214092141, |
|
"grad_norm": 0.3601371463623409, |
|
"learning_rate": 4.978367376655177e-05, |
|
"loss": 0.5965, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0939476061427281, |
|
"grad_norm": 0.3288839599265164, |
|
"learning_rate": 4.976359254690543e-05, |
|
"loss": 0.6128, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0957542908762421, |
|
"grad_norm": 0.3899144366212297, |
|
"learning_rate": 4.974262459299087e-05, |
|
"loss": 0.5992, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 0.31373036051997816, |
|
"learning_rate": 4.972077065562821e-05, |
|
"loss": 0.5894, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0993676603432701, |
|
"grad_norm": 0.3317064945312652, |
|
"learning_rate": 4.969803151736284e-05, |
|
"loss": 0.6046, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1011743450767841, |
|
"grad_norm": 0.4008096818415782, |
|
"learning_rate": 4.9674407992437394e-05, |
|
"loss": 0.6091, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.10298102981029811, |
|
"grad_norm": 0.32205121061079506, |
|
"learning_rate": 4.964990092676263e-05, |
|
"loss": 0.5936, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.10478771454381211, |
|
"grad_norm": 0.3834592472356572, |
|
"learning_rate": 4.962451119788709e-05, |
|
"loss": 0.6035, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.10659439927732611, |
|
"grad_norm": 0.4364607834229955, |
|
"learning_rate": 4.959823971496574e-05, |
|
"loss": 0.5895, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.10840108401084012, |
|
"grad_norm": 0.39798614932188364, |
|
"learning_rate": 4.957108741872736e-05, |
|
"loss": 0.5903, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1102077687443541, |
|
"grad_norm": 0.4078983073259993, |
|
"learning_rate": 4.954305528144085e-05, |
|
"loss": 0.594, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1120144534778681, |
|
"grad_norm": 0.5064139886264023, |
|
"learning_rate": 4.9514144306880506e-05, |
|
"loss": 0.5989, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.11382113821138211, |
|
"grad_norm": 0.3049367414530056, |
|
"learning_rate": 4.9484355530289944e-05, |
|
"loss": 0.5982, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.11562782294489611, |
|
"grad_norm": 0.45932591541254997, |
|
"learning_rate": 4.9453690018345144e-05, |
|
"loss": 0.598, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.11743450767841011, |
|
"grad_norm": 0.34088884376991047, |
|
"learning_rate": 4.9422148869116194e-05, |
|
"loss": 0.5914, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11924119241192412, |
|
"grad_norm": 0.4408196994719187, |
|
"learning_rate": 4.938973321202799e-05, |
|
"loss": 0.5943, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.12104787714543812, |
|
"grad_norm": 0.4034840944061305, |
|
"learning_rate": 4.935644420781978e-05, |
|
"loss": 0.5852, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.12285456187895212, |
|
"grad_norm": 0.3532258693927161, |
|
"learning_rate": 4.932228304850363e-05, |
|
"loss": 0.6003, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.12466124661246612, |
|
"grad_norm": 0.4041417285254443, |
|
"learning_rate": 4.928725095732169e-05, |
|
"loss": 0.6019, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.12646793134598014, |
|
"grad_norm": 0.338119605860217, |
|
"learning_rate": 4.925134918870245e-05, |
|
"loss": 0.6056, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12827461607949414, |
|
"grad_norm": 0.2924641177682486, |
|
"learning_rate": 4.9214579028215776e-05, |
|
"loss": 0.5784, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.13008130081300814, |
|
"grad_norm": 0.37576892630911196, |
|
"learning_rate": 4.917694179252692e-05, |
|
"loss": 0.5966, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.13188798554652212, |
|
"grad_norm": 0.3066908816324021, |
|
"learning_rate": 4.91384388293493e-05, |
|
"loss": 0.5944, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.13369467028003612, |
|
"grad_norm": 0.3296017371410444, |
|
"learning_rate": 4.909907151739633e-05, |
|
"loss": 0.5863, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"grad_norm": 0.3510646842800697, |
|
"learning_rate": 4.9058841266332e-05, |
|
"loss": 0.5854, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13730803974706413, |
|
"grad_norm": 0.33050013383675975, |
|
"learning_rate": 4.90177495167204e-05, |
|
"loss": 0.5816, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.13911472448057813, |
|
"grad_norm": 0.336545072709173, |
|
"learning_rate": 4.897579773997415e-05, |
|
"loss": 0.5768, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.14092140921409213, |
|
"grad_norm": 0.3401542906091868, |
|
"learning_rate": 4.893298743830168e-05, |
|
"loss": 0.5877, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.14272809394760613, |
|
"grad_norm": 0.30833272476392615, |
|
"learning_rate": 4.888932014465352e-05, |
|
"loss": 0.5949, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.14453477868112014, |
|
"grad_norm": 0.40829913126640544, |
|
"learning_rate": 4.88447974226673e-05, |
|
"loss": 0.6045, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 0.3247095550760803, |
|
"learning_rate": 4.879942086661184e-05, |
|
"loss": 0.5901, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 0.40484730371305205, |
|
"learning_rate": 4.875319210133004e-05, |
|
"loss": 0.5825, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.14995483288166214, |
|
"grad_norm": 0.37019522474324174, |
|
"learning_rate": 4.870611278218066e-05, |
|
"loss": 0.5918, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.15176151761517614, |
|
"grad_norm": 0.3654744041300334, |
|
"learning_rate": 4.865818459497911e-05, |
|
"loss": 0.5865, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.15356820234869015, |
|
"grad_norm": 0.3001653084612634, |
|
"learning_rate": 4.860940925593703e-05, |
|
"loss": 0.5889, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.15537488708220415, |
|
"grad_norm": 0.3463142769052332, |
|
"learning_rate": 4.8559788511600876e-05, |
|
"loss": 0.5881, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.15718157181571815, |
|
"grad_norm": 0.3221292284344934, |
|
"learning_rate": 4.850932413878934e-05, |
|
"loss": 0.5901, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.15898825654923215, |
|
"grad_norm": 0.33253415770699135, |
|
"learning_rate": 4.8458017944529776e-05, |
|
"loss": 0.5952, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.16079494128274616, |
|
"grad_norm": 0.37021022779245716, |
|
"learning_rate": 4.8405871765993433e-05, |
|
"loss": 0.5928, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 0.3197338147243217, |
|
"learning_rate": 4.8352887470429726e-05, |
|
"loss": 0.5837, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16440831074977416, |
|
"grad_norm": 0.35706780201968874, |
|
"learning_rate": 4.8299066955099335e-05, |
|
"loss": 0.5811, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.16621499548328816, |
|
"grad_norm": 0.3730971582648582, |
|
"learning_rate": 4.8244412147206284e-05, |
|
"loss": 0.586, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.16802168021680217, |
|
"grad_norm": 0.33467520904104064, |
|
"learning_rate": 4.8188925003828945e-05, |
|
"loss": 0.5919, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.16982836495031617, |
|
"grad_norm": 0.41181901183464464, |
|
"learning_rate": 4.813260751184992e-05, |
|
"loss": 0.5922, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.17163504968383017, |
|
"grad_norm": 0.31057237794043846, |
|
"learning_rate": 4.807546168788494e-05, |
|
"loss": 0.5835, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.17344173441734417, |
|
"grad_norm": 0.4045185112342142, |
|
"learning_rate": 4.8017489578210604e-05, |
|
"loss": 0.5839, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.17524841915085818, |
|
"grad_norm": 0.342091450071029, |
|
"learning_rate": 4.7958693258691167e-05, |
|
"loss": 0.5891, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.17705510388437218, |
|
"grad_norm": 0.32193383766669476, |
|
"learning_rate": 4.7899074834704165e-05, |
|
"loss": 0.5814, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.17886178861788618, |
|
"grad_norm": 0.33881983844597735, |
|
"learning_rate": 4.783863644106502e-05, |
|
"loss": 0.5951, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.18066847335140018, |
|
"grad_norm": 0.3141049036238513, |
|
"learning_rate": 4.7777380241950645e-05, |
|
"loss": 0.5672, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 553, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 115440686923776.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|