|
{ |
|
"best_metric": 0.8490656018257141, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.18912529550827423, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009456264775413711, |
|
"grad_norm": 0.09641855955123901, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.1657, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009456264775413711, |
|
"eval_loss": 3.487557888031006, |
|
"eval_runtime": 54.6728, |
|
"eval_samples_per_second": 65.17, |
|
"eval_steps_per_second": 16.297, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018912529550827422, |
|
"grad_norm": 0.1267959028482437, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.2459, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0028368794326241137, |
|
"grad_norm": 0.13855446875095367, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.2912, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0037825059101654845, |
|
"grad_norm": 0.15804192423820496, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.3923, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004728132387706856, |
|
"grad_norm": 0.18095271289348602, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.4793, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005673758865248227, |
|
"grad_norm": 0.18571291863918304, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.585, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006619385342789598, |
|
"grad_norm": 0.2568407356739044, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.8612, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007565011820330969, |
|
"grad_norm": 0.3106652498245239, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 1.2384, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00851063829787234, |
|
"grad_norm": 0.2571044862270355, |
|
"learning_rate": 3e-06, |
|
"loss": 0.8217, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009456264775413711, |
|
"grad_norm": 0.29234185814857483, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.7603, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010401891252955082, |
|
"grad_norm": 0.24926097691059113, |
|
"learning_rate": 3.666666666666667e-06, |
|
"loss": 0.6962, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011347517730496455, |
|
"grad_norm": 0.2906411588191986, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9644, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012293144208037825, |
|
"grad_norm": 0.49177998304367065, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 1.7599, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013238770685579196, |
|
"grad_norm": 0.4265400767326355, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 1.493, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.014184397163120567, |
|
"grad_norm": 0.5240830779075623, |
|
"learning_rate": 5e-06, |
|
"loss": 1.9888, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015130023640661938, |
|
"grad_norm": 0.5526121854782104, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 2.2851, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01607565011820331, |
|
"grad_norm": 0.5441344976425171, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 1.9884, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01702127659574468, |
|
"grad_norm": 0.6593867540359497, |
|
"learning_rate": 6e-06, |
|
"loss": 2.5504, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.017966903073286054, |
|
"grad_norm": 0.6868957877159119, |
|
"learning_rate": 6.333333333333334e-06, |
|
"loss": 3.0568, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.018912529550827423, |
|
"grad_norm": 0.43732380867004395, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.956, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.019858156028368795, |
|
"grad_norm": 0.8580771088600159, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 2.8262, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.020803782505910164, |
|
"grad_norm": 0.6907251477241516, |
|
"learning_rate": 7.333333333333334e-06, |
|
"loss": 3.0729, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.021749408983451537, |
|
"grad_norm": 0.6712820529937744, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 2.7102, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02269503546099291, |
|
"grad_norm": 0.6856934428215027, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.7061, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02364066193853428, |
|
"grad_norm": 0.7126139402389526, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 2.6611, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02458628841607565, |
|
"grad_norm": 0.7721053957939148, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 2.8423, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02553191489361702, |
|
"grad_norm": 0.883097231388092, |
|
"learning_rate": 9e-06, |
|
"loss": 3.5565, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.026477541371158392, |
|
"grad_norm": 1.0787785053253174, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 3.8743, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.027423167848699765, |
|
"grad_norm": 0.881747305393219, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 3.628, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.028368794326241134, |
|
"grad_norm": 1.07874596118927, |
|
"learning_rate": 1e-05, |
|
"loss": 4.0762, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.029314420803782507, |
|
"grad_norm": 0.8328316807746887, |
|
"learning_rate": 1.0333333333333333e-05, |
|
"loss": 3.512, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.030260047281323876, |
|
"grad_norm": 0.8973889946937561, |
|
"learning_rate": 1.0666666666666667e-05, |
|
"loss": 3.9156, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.031205673758865248, |
|
"grad_norm": 0.7893106937408447, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 3.3497, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03215130023640662, |
|
"grad_norm": 0.8067853450775146, |
|
"learning_rate": 1.1333333333333334e-05, |
|
"loss": 3.3522, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03309692671394799, |
|
"grad_norm": 1.0948940515518188, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 4.0731, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03404255319148936, |
|
"grad_norm": 0.8288351893424988, |
|
"learning_rate": 1.2e-05, |
|
"loss": 3.0711, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.034988179669030735, |
|
"grad_norm": 0.7726929187774658, |
|
"learning_rate": 1.2333333333333334e-05, |
|
"loss": 3.0219, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03593380614657211, |
|
"grad_norm": 0.9294595122337341, |
|
"learning_rate": 1.2666666666666668e-05, |
|
"loss": 4.2024, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03687943262411347, |
|
"grad_norm": 0.9361292123794556, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 3.7119, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.037825059101654845, |
|
"grad_norm": 0.9386151432991028, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 3.7765, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03877068557919622, |
|
"grad_norm": 0.9665974974632263, |
|
"learning_rate": 1.3666666666666666e-05, |
|
"loss": 3.6401, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03971631205673759, |
|
"grad_norm": 0.9367566108703613, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 3.7931, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04066193853427896, |
|
"grad_norm": 0.763043999671936, |
|
"learning_rate": 1.4333333333333334e-05, |
|
"loss": 3.1378, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04160756501182033, |
|
"grad_norm": 1.134705901145935, |
|
"learning_rate": 1.4666666666666668e-05, |
|
"loss": 4.1756, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 1.111829161643982, |
|
"learning_rate": 1.5e-05, |
|
"loss": 4.4766, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.043498817966903074, |
|
"grad_norm": 0.8709962368011475, |
|
"learning_rate": 1.5333333333333334e-05, |
|
"loss": 2.983, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 1.0141003131866455, |
|
"learning_rate": 1.5666666666666667e-05, |
|
"loss": 4.134, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04539007092198582, |
|
"grad_norm": 0.9469693303108215, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 3.1916, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.046335697399527184, |
|
"grad_norm": 0.9570004940032959, |
|
"learning_rate": 1.6333333333333335e-05, |
|
"loss": 3.7659, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04728132387706856, |
|
"grad_norm": 1.1584417819976807, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 4.3539, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04728132387706856, |
|
"eval_loss": 3.263434410095215, |
|
"eval_runtime": 54.1739, |
|
"eval_samples_per_second": 65.77, |
|
"eval_steps_per_second": 16.447, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04822695035460993, |
|
"grad_norm": 0.1129605770111084, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.175, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0491725768321513, |
|
"grad_norm": 0.12607906758785248, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 0.208, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.050118203309692674, |
|
"grad_norm": 0.16476179659366608, |
|
"learning_rate": 1.7666666666666668e-05, |
|
"loss": 0.3471, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05106382978723404, |
|
"grad_norm": 0.1916539967060089, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.5088, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05200945626477541, |
|
"grad_norm": 0.19213494658470154, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.4579, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.052955082742316785, |
|
"grad_norm": 0.1868194341659546, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 0.4327, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05390070921985816, |
|
"grad_norm": 0.2641420066356659, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.6682, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05484633569739953, |
|
"grad_norm": 0.2992357611656189, |
|
"learning_rate": 1.9333333333333333e-05, |
|
"loss": 0.8189, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.055791962174940896, |
|
"grad_norm": 0.2843168377876282, |
|
"learning_rate": 1.9666666666666666e-05, |
|
"loss": 0.7047, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05673758865248227, |
|
"grad_norm": 0.3119712769985199, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6548, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05768321513002364, |
|
"grad_norm": 0.4131757616996765, |
|
"learning_rate": 2.0333333333333334e-05, |
|
"loss": 1.0262, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05862884160756501, |
|
"grad_norm": 0.399532675743103, |
|
"learning_rate": 2.0666666666666666e-05, |
|
"loss": 0.893, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.059574468085106386, |
|
"grad_norm": 0.4315282702445984, |
|
"learning_rate": 2.1e-05, |
|
"loss": 1.1419, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06052009456264775, |
|
"grad_norm": 0.5299156904220581, |
|
"learning_rate": 2.1333333333333335e-05, |
|
"loss": 1.3849, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.061465721040189124, |
|
"grad_norm": 0.5484279990196228, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 1.5874, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.062411347517730496, |
|
"grad_norm": 0.4971844255924225, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.3445, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06335697399527186, |
|
"grad_norm": 0.6058803796768188, |
|
"learning_rate": 2.2333333333333335e-05, |
|
"loss": 1.7419, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06430260047281323, |
|
"grad_norm": 0.9219263195991516, |
|
"learning_rate": 2.2666666666666668e-05, |
|
"loss": 2.4339, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06524822695035461, |
|
"grad_norm": 0.8259028196334839, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 2.2387, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06619385342789598, |
|
"grad_norm": 0.8110438585281372, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 2.7578, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06713947990543735, |
|
"grad_norm": 0.8333178758621216, |
|
"learning_rate": 2.3666666666666668e-05, |
|
"loss": 2.7401, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06808510638297872, |
|
"grad_norm": 0.6991426944732666, |
|
"learning_rate": 2.4e-05, |
|
"loss": 2.0404, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0690307328605201, |
|
"grad_norm": 0.8322498798370361, |
|
"learning_rate": 2.4333333333333336e-05, |
|
"loss": 2.5024, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06997635933806147, |
|
"grad_norm": 0.6573300957679749, |
|
"learning_rate": 2.466666666666667e-05, |
|
"loss": 2.2451, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07092198581560284, |
|
"grad_norm": 0.9917909502983093, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.7126, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07186761229314421, |
|
"grad_norm": 0.9231103658676147, |
|
"learning_rate": 2.5333333333333337e-05, |
|
"loss": 2.4502, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07281323877068557, |
|
"grad_norm": 0.8092001676559448, |
|
"learning_rate": 2.5666666666666666e-05, |
|
"loss": 2.3372, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07375886524822695, |
|
"grad_norm": 0.9932003617286682, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 2.9451, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07470449172576832, |
|
"grad_norm": 1.0481059551239014, |
|
"learning_rate": 2.633333333333333e-05, |
|
"loss": 3.2932, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07565011820330969, |
|
"grad_norm": 1.146222472190857, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 3.2256, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07659574468085106, |
|
"grad_norm": 1.144538164138794, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 2.9511, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07754137115839244, |
|
"grad_norm": 0.8968273997306824, |
|
"learning_rate": 2.733333333333333e-05, |
|
"loss": 2.6522, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07848699763593381, |
|
"grad_norm": 1.026343822479248, |
|
"learning_rate": 2.7666666666666667e-05, |
|
"loss": 2.9253, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07943262411347518, |
|
"grad_norm": 0.8586713671684265, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 2.4545, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08037825059101655, |
|
"grad_norm": 1.0284498929977417, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 2.8148, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08132387706855793, |
|
"grad_norm": 0.8303140997886658, |
|
"learning_rate": 2.8666666666666668e-05, |
|
"loss": 2.3988, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08226950354609928, |
|
"grad_norm": 0.7819446921348572, |
|
"learning_rate": 2.9e-05, |
|
"loss": 1.9212, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08321513002364066, |
|
"grad_norm": 1.0593886375427246, |
|
"learning_rate": 2.9333333333333336e-05, |
|
"loss": 2.6381, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08416075650118203, |
|
"grad_norm": 1.3569374084472656, |
|
"learning_rate": 2.9666666666666672e-05, |
|
"loss": 3.4768, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 0.878240168094635, |
|
"learning_rate": 3e-05, |
|
"loss": 2.403, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08605200945626477, |
|
"grad_norm": 1.1406848430633545, |
|
"learning_rate": 3.0333333333333337e-05, |
|
"loss": 3.1817, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08699763593380615, |
|
"grad_norm": 0.9278729557991028, |
|
"learning_rate": 3.066666666666667e-05, |
|
"loss": 2.433, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08794326241134752, |
|
"grad_norm": 0.9704206585884094, |
|
"learning_rate": 3.1e-05, |
|
"loss": 2.3984, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 1.2946637868881226, |
|
"learning_rate": 3.1333333333333334e-05, |
|
"loss": 3.0731, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08983451536643026, |
|
"grad_norm": 0.9340803623199463, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 2.2826, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09078014184397164, |
|
"grad_norm": 1.044756531715393, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 2.456, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.091725768321513, |
|
"grad_norm": 1.097231388092041, |
|
"learning_rate": 3.233333333333333e-05, |
|
"loss": 2.4481, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09267139479905437, |
|
"grad_norm": 1.1005371809005737, |
|
"learning_rate": 3.266666666666667e-05, |
|
"loss": 2.3557, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09361702127659574, |
|
"grad_norm": 0.9912729859352112, |
|
"learning_rate": 3.3e-05, |
|
"loss": 2.1763, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09456264775413711, |
|
"grad_norm": 1.5450869798660278, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 3.3969, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09456264775413711, |
|
"eval_loss": 1.9755831956863403, |
|
"eval_runtime": 54.202, |
|
"eval_samples_per_second": 65.736, |
|
"eval_steps_per_second": 16.439, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09550827423167849, |
|
"grad_norm": 0.1785513013601303, |
|
"learning_rate": 3.366666666666667e-05, |
|
"loss": 0.1393, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09645390070921986, |
|
"grad_norm": 0.21543779969215393, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.1675, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09739952718676123, |
|
"grad_norm": 0.27484190464019775, |
|
"learning_rate": 3.433333333333333e-05, |
|
"loss": 0.2331, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0983451536643026, |
|
"grad_norm": 0.22659574449062347, |
|
"learning_rate": 3.466666666666667e-05, |
|
"loss": 0.2908, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09929078014184398, |
|
"grad_norm": 0.28378260135650635, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2924, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10023640661938535, |
|
"grad_norm": 0.2579360902309418, |
|
"learning_rate": 3.5333333333333336e-05, |
|
"loss": 0.3687, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.10118203309692671, |
|
"grad_norm": 0.2652228772640228, |
|
"learning_rate": 3.566666666666667e-05, |
|
"loss": 0.5374, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.10212765957446808, |
|
"grad_norm": 0.32100433111190796, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.6874, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.10307328605200945, |
|
"grad_norm": 0.30462920665740967, |
|
"learning_rate": 3.633333333333333e-05, |
|
"loss": 0.4254, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.10401891252955082, |
|
"grad_norm": 0.2611568570137024, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.6701, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1049645390070922, |
|
"grad_norm": 0.29873016476631165, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.6449, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.10591016548463357, |
|
"grad_norm": 0.2605455219745636, |
|
"learning_rate": 3.733333333333334e-05, |
|
"loss": 0.6369, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.10685579196217494, |
|
"grad_norm": 0.5200917720794678, |
|
"learning_rate": 3.766666666666667e-05, |
|
"loss": 1.1225, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.10780141843971631, |
|
"grad_norm": 0.31477710604667664, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.772, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.10874704491725769, |
|
"grad_norm": 0.42271727323532104, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 1.0659, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.10969267139479906, |
|
"grad_norm": 0.37258803844451904, |
|
"learning_rate": 3.866666666666667e-05, |
|
"loss": 0.9266, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.11063829787234042, |
|
"grad_norm": 0.3200523257255554, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.782, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.11158392434988179, |
|
"grad_norm": 0.37096261978149414, |
|
"learning_rate": 3.933333333333333e-05, |
|
"loss": 0.8424, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11252955082742316, |
|
"grad_norm": 0.3387626111507416, |
|
"learning_rate": 3.966666666666667e-05, |
|
"loss": 0.7941, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11347517730496454, |
|
"grad_norm": 0.5522480607032776, |
|
"learning_rate": 4e-05, |
|
"loss": 1.3268, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11442080378250591, |
|
"grad_norm": 0.6289123892784119, |
|
"learning_rate": 4.0333333333333336e-05, |
|
"loss": 1.4588, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11536643026004728, |
|
"grad_norm": 0.4001694321632385, |
|
"learning_rate": 4.066666666666667e-05, |
|
"loss": 1.056, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.11631205673758865, |
|
"grad_norm": 0.4287935793399811, |
|
"learning_rate": 4.1e-05, |
|
"loss": 1.1171, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11725768321513003, |
|
"grad_norm": 0.5254130363464355, |
|
"learning_rate": 4.133333333333333e-05, |
|
"loss": 1.2804, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1182033096926714, |
|
"grad_norm": 0.5890396237373352, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.5757, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11914893617021277, |
|
"grad_norm": 0.5535526275634766, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.3113, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.12009456264775414, |
|
"grad_norm": 0.5088675618171692, |
|
"learning_rate": 4.233333333333334e-05, |
|
"loss": 1.3381, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.1210401891252955, |
|
"grad_norm": 0.6092506051063538, |
|
"learning_rate": 4.266666666666667e-05, |
|
"loss": 1.5369, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.12198581560283688, |
|
"grad_norm": 0.5836873650550842, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.3943, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12293144208037825, |
|
"grad_norm": 0.6445267200469971, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 1.3146, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12387706855791962, |
|
"grad_norm": 0.4378418028354645, |
|
"learning_rate": 4.3666666666666666e-05, |
|
"loss": 1.0086, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.12482269503546099, |
|
"grad_norm": 0.6770736575126648, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.5146, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.12576832151300235, |
|
"grad_norm": 0.6039463877677917, |
|
"learning_rate": 4.433333333333334e-05, |
|
"loss": 1.3621, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12671394799054372, |
|
"grad_norm": 0.6413301229476929, |
|
"learning_rate": 4.466666666666667e-05, |
|
"loss": 1.4514, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 0.6788724660873413, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.3301, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.12860520094562647, |
|
"grad_norm": 0.5223095417022705, |
|
"learning_rate": 4.5333333333333335e-05, |
|
"loss": 1.2304, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.12955082742316784, |
|
"grad_norm": 0.6495351195335388, |
|
"learning_rate": 4.566666666666667e-05, |
|
"loss": 1.4054, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.13049645390070921, |
|
"grad_norm": 0.6079133749008179, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.3832, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1314420803782506, |
|
"grad_norm": 0.5553860664367676, |
|
"learning_rate": 4.633333333333333e-05, |
|
"loss": 1.0288, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.13238770685579196, |
|
"grad_norm": 0.550632894039154, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 1.2453, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.9021189212799072, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.6022, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1342789598108747, |
|
"grad_norm": 0.6802974939346313, |
|
"learning_rate": 4.7333333333333336e-05, |
|
"loss": 1.5432, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.13522458628841608, |
|
"grad_norm": 0.5788073539733887, |
|
"learning_rate": 4.766666666666667e-05, |
|
"loss": 1.2525, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.13617021276595745, |
|
"grad_norm": 0.6477014422416687, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.0959, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.13711583924349882, |
|
"grad_norm": 0.4955628216266632, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 1.1016, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1380614657210402, |
|
"grad_norm": 0.4967166483402252, |
|
"learning_rate": 4.866666666666667e-05, |
|
"loss": 1.0116, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.13900709219858157, |
|
"grad_norm": 0.5145598649978638, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.0286, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.13995271867612294, |
|
"grad_norm": 0.7154728174209595, |
|
"learning_rate": 4.933333333333334e-05, |
|
"loss": 1.2846, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1408983451536643, |
|
"grad_norm": 0.6433387994766235, |
|
"learning_rate": 4.966666666666667e-05, |
|
"loss": 1.0688, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.14184397163120568, |
|
"grad_norm": 0.8238207697868347, |
|
"learning_rate": 5e-05, |
|
"loss": 1.694, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14184397163120568, |
|
"eval_loss": 1.0205990076065063, |
|
"eval_runtime": 54.4047, |
|
"eval_samples_per_second": 65.491, |
|
"eval_steps_per_second": 16.377, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14278959810874706, |
|
"grad_norm": 0.45450514554977417, |
|
"learning_rate": 4.995066821070679e-05, |
|
"loss": 0.1451, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.14373522458628843, |
|
"grad_norm": 0.5641042590141296, |
|
"learning_rate": 4.980286753286195e-05, |
|
"loss": 0.19, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14468085106382977, |
|
"grad_norm": 0.6517515182495117, |
|
"learning_rate": 4.9557181268217227e-05, |
|
"loss": 0.2196, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.14562647754137115, |
|
"grad_norm": 0.5124554634094238, |
|
"learning_rate": 4.9214579028215776e-05, |
|
"loss": 0.2285, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.14657210401891252, |
|
"grad_norm": 0.4333844482898712, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 0.2272, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1475177304964539, |
|
"grad_norm": 0.3877612352371216, |
|
"learning_rate": 4.8244412147206284e-05, |
|
"loss": 0.3144, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.14846335697399526, |
|
"grad_norm": 0.3211458921432495, |
|
"learning_rate": 4.762067631165049e-05, |
|
"loss": 0.3578, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.14940898345153664, |
|
"grad_norm": 0.24879109859466553, |
|
"learning_rate": 4.690766700109659e-05, |
|
"loss": 0.2756, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.150354609929078, |
|
"grad_norm": 0.22662314772605896, |
|
"learning_rate": 4.610819813755038e-05, |
|
"loss": 0.2839, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.15130023640661938, |
|
"grad_norm": 0.23902210593223572, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 0.3703, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15224586288416075, |
|
"grad_norm": 0.23963478207588196, |
|
"learning_rate": 4.426283106939474e-05, |
|
"loss": 0.3972, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.15319148936170213, |
|
"grad_norm": 0.23576639592647552, |
|
"learning_rate": 4.3224215685535294e-05, |
|
"loss": 0.3027, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.1541371158392435, |
|
"grad_norm": 0.33861732482910156, |
|
"learning_rate": 4.211367764821722e-05, |
|
"loss": 0.481, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.15508274231678487, |
|
"grad_norm": 0.34786704182624817, |
|
"learning_rate": 4.093559974371725e-05, |
|
"loss": 0.441, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.15602836879432624, |
|
"grad_norm": 0.5184177160263062, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 0.5629, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.15697399527186762, |
|
"grad_norm": 0.38847318291664124, |
|
"learning_rate": 3.8395669874474915e-05, |
|
"loss": 0.5856, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.157919621749409, |
|
"grad_norm": 0.4933064877986908, |
|
"learning_rate": 3.704384185254288e-05, |
|
"loss": 0.6682, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.15886524822695036, |
|
"grad_norm": 0.4688006043434143, |
|
"learning_rate": 3.564448228912682e-05, |
|
"loss": 0.6222, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.15981087470449173, |
|
"grad_norm": 0.7410577535629272, |
|
"learning_rate": 3.4203113817116957e-05, |
|
"loss": 0.8418, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1607565011820331, |
|
"grad_norm": 0.7604397535324097, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 1.1219, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16170212765957448, |
|
"grad_norm": 0.6462581753730774, |
|
"learning_rate": 3.121724717912138e-05, |
|
"loss": 0.9533, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.16264775413711585, |
|
"grad_norm": 0.2718241810798645, |
|
"learning_rate": 2.9684532864643122e-05, |
|
"loss": 0.5001, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1635933806146572, |
|
"grad_norm": 0.35057616233825684, |
|
"learning_rate": 2.8133330839107608e-05, |
|
"loss": 0.5601, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.16453900709219857, |
|
"grad_norm": 0.5394273996353149, |
|
"learning_rate": 2.656976298823284e-05, |
|
"loss": 0.8314, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.16548463356973994, |
|
"grad_norm": 0.3971342444419861, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.7177, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16643026004728131, |
|
"grad_norm": 0.3941769003868103, |
|
"learning_rate": 2.3430237011767167e-05, |
|
"loss": 0.6324, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1673758865248227, |
|
"grad_norm": 0.45238277316093445, |
|
"learning_rate": 2.186666916089239e-05, |
|
"loss": 0.7953, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.16832151300236406, |
|
"grad_norm": 0.5700445771217346, |
|
"learning_rate": 2.031546713535688e-05, |
|
"loss": 0.859, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.16926713947990543, |
|
"grad_norm": 0.47867706418037415, |
|
"learning_rate": 1.8782752820878634e-05, |
|
"loss": 0.8502, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 0.42379331588745117, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 0.8364, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.17115839243498818, |
|
"grad_norm": 0.4100590944290161, |
|
"learning_rate": 1.5796886182883053e-05, |
|
"loss": 0.7299, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.17210401891252955, |
|
"grad_norm": 0.5437663197517395, |
|
"learning_rate": 1.4355517710873184e-05, |
|
"loss": 0.7789, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.17304964539007092, |
|
"grad_norm": 0.47485315799713135, |
|
"learning_rate": 1.2956158147457115e-05, |
|
"loss": 0.8491, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1739952718676123, |
|
"grad_norm": 0.48820245265960693, |
|
"learning_rate": 1.1604330125525079e-05, |
|
"loss": 0.926, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.17494089834515367, |
|
"grad_norm": 0.32581862807273865, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 0.5398, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.17588652482269504, |
|
"grad_norm": 0.4086638391017914, |
|
"learning_rate": 9.064400256282757e-06, |
|
"loss": 0.8114, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1768321513002364, |
|
"grad_norm": 0.4679095447063446, |
|
"learning_rate": 7.886322351782783e-06, |
|
"loss": 0.8871, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.4896736145019531, |
|
"learning_rate": 6.775784314464717e-06, |
|
"loss": 0.81, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.17872340425531916, |
|
"grad_norm": 0.4382385015487671, |
|
"learning_rate": 5.737168930605272e-06, |
|
"loss": 0.8284, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.17966903073286053, |
|
"grad_norm": 0.5600558519363403, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 1.0058, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1806146572104019, |
|
"grad_norm": 0.5055614113807678, |
|
"learning_rate": 3.891801862449629e-06, |
|
"loss": 0.8065, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.18156028368794327, |
|
"grad_norm": 0.6402313113212585, |
|
"learning_rate": 3.092332998903416e-06, |
|
"loss": 1.0421, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.18250591016548465, |
|
"grad_norm": 0.5336678624153137, |
|
"learning_rate": 2.379323688349516e-06, |
|
"loss": 0.8897, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.183451536643026, |
|
"grad_norm": 0.5516868829727173, |
|
"learning_rate": 1.7555878527937164e-06, |
|
"loss": 0.8386, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.18439716312056736, |
|
"grad_norm": 0.6657341718673706, |
|
"learning_rate": 1.2235870926211619e-06, |
|
"loss": 1.1191, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.18534278959810874, |
|
"grad_norm": 0.5750419497489929, |
|
"learning_rate": 7.854209717842231e-07, |
|
"loss": 1.0913, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1862884160756501, |
|
"grad_norm": 0.4884861409664154, |
|
"learning_rate": 4.4281873178278475e-07, |
|
"loss": 0.976, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.18723404255319148, |
|
"grad_norm": 0.5465104579925537, |
|
"learning_rate": 1.9713246713805588e-07, |
|
"loss": 0.8359, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.18817966903073285, |
|
"grad_norm": 0.6062586903572083, |
|
"learning_rate": 4.9331789293211026e-08, |
|
"loss": 0.9467, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.18912529550827423, |
|
"grad_norm": 0.7774025201797485, |
|
"learning_rate": 0.0, |
|
"loss": 1.1784, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18912529550827423, |
|
"eval_loss": 0.8490656018257141, |
|
"eval_runtime": 54.2258, |
|
"eval_samples_per_second": 65.707, |
|
"eval_steps_per_second": 16.431, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.614235570896896e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|