|
{ |
|
"best_metric": 18.4285, |
|
"best_model_checkpoint": "./runtime/tFINE-base-300m-samsum/checkpoint-345", |
|
"epoch": 3.995656894679696, |
|
"eval_steps": 500, |
|
"global_step": 460, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04343105320304017, |
|
"grad_norm": 6.462469577789307, |
|
"learning_rate": 2.173913043478261e-05, |
|
"loss": 5.2862, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08686210640608034, |
|
"grad_norm": 4.075676918029785, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 4.0973, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13029315960912052, |
|
"grad_norm": 2.891948938369751, |
|
"learning_rate": 6.521739130434783e-05, |
|
"loss": 3.2568, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1737242128121607, |
|
"grad_norm": 1.456381916999817, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 2.6852, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21715526601520088, |
|
"grad_norm": 1.6086758375167847, |
|
"learning_rate": 9.999483191807244e-05, |
|
"loss": 2.4395, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.26058631921824105, |
|
"grad_norm": 1.1808384656906128, |
|
"learning_rate": 9.993670326516924e-05, |
|
"loss": 2.3317, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.30401737242128124, |
|
"grad_norm": 1.2403559684753418, |
|
"learning_rate": 9.981406120397172e-05, |
|
"loss": 2.2786, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3474484256243214, |
|
"grad_norm": 1.1080374717712402, |
|
"learning_rate": 9.962706417620413e-05, |
|
"loss": 2.2042, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.39087947882736157, |
|
"grad_norm": 1.1210006475448608, |
|
"learning_rate": 9.93759537640057e-05, |
|
"loss": 2.1659, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.43431053203040176, |
|
"grad_norm": 1.0776363611221313, |
|
"learning_rate": 9.90610543778299e-05, |
|
"loss": 2.1975, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4777415852334419, |
|
"grad_norm": 1.2220784425735474, |
|
"learning_rate": 9.868277283733726e-05, |
|
"loss": 2.1266, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5211726384364821, |
|
"grad_norm": 1.5034546852111816, |
|
"learning_rate": 9.824159784582368e-05, |
|
"loss": 2.1108, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5646036916395223, |
|
"grad_norm": 1.4478706121444702, |
|
"learning_rate": 9.773809935886287e-05, |
|
"loss": 2.0641, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6080347448425625, |
|
"grad_norm": 1.1443687677383423, |
|
"learning_rate": 9.717292784797854e-05, |
|
"loss": 2.0728, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6514657980456026, |
|
"grad_norm": 1.1472234725952148, |
|
"learning_rate": 9.654681346029808e-05, |
|
"loss": 2.0482, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6948968512486428, |
|
"grad_norm": 1.109851360321045, |
|
"learning_rate": 9.586056507527266e-05, |
|
"loss": 2.0456, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.738327904451683, |
|
"grad_norm": 1.109726905822754, |
|
"learning_rate": 9.5115069259683e-05, |
|
"loss": 2.0477, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7817589576547231, |
|
"grad_norm": 1.1185649633407593, |
|
"learning_rate": 9.43112891222806e-05, |
|
"loss": 2.0252, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8251900108577633, |
|
"grad_norm": 1.1537866592407227, |
|
"learning_rate": 9.345026306954386e-05, |
|
"loss": 2.0184, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8686210640608035, |
|
"grad_norm": 1.1786285638809204, |
|
"learning_rate": 9.253310346415714e-05, |
|
"loss": 1.9877, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9120521172638436, |
|
"grad_norm": 1.202744722366333, |
|
"learning_rate": 9.156099518794534e-05, |
|
"loss": 1.9814, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9554831704668838, |
|
"grad_norm": 1.3231650590896606, |
|
"learning_rate": 9.053519411112075e-05, |
|
"loss": 1.9585, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.998914223669924, |
|
"grad_norm": 1.330356240272522, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 1.9528, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.998914223669924, |
|
"eval_gen_len": 29.333333333333332, |
|
"eval_loss": 1.9189409017562866, |
|
"eval_rouge1": 40.093, |
|
"eval_rouge2": 18.2018, |
|
"eval_rougeL": 33.9749, |
|
"eval_rougeLsum": 36.9071, |
|
"eval_runtime": 64.3388, |
|
"eval_samples_per_second": 4.663, |
|
"eval_steps_per_second": 0.295, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0423452768729642, |
|
"grad_norm": 1.36496901512146, |
|
"learning_rate": 8.832788215402527e-05, |
|
"loss": 1.6338, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0857763300760044, |
|
"grad_norm": 1.3041751384735107, |
|
"learning_rate": 8.714922290808766e-05, |
|
"loss": 1.6039, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.1292073832790446, |
|
"grad_norm": 1.2985814809799194, |
|
"learning_rate": 8.592257044616702e-05, |
|
"loss": 1.6221, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.1726384364820848, |
|
"grad_norm": 1.2213962078094482, |
|
"learning_rate": 8.464950948503349e-05, |
|
"loss": 1.5671, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.216069489685125, |
|
"grad_norm": 1.110490322113037, |
|
"learning_rate": 8.333168469676595e-05, |
|
"loss": 1.6163, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.2595005428881652, |
|
"grad_norm": 1.22842538356781, |
|
"learning_rate": 8.197079858399403e-05, |
|
"loss": 1.6156, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.3029315960912053, |
|
"grad_norm": 1.2326569557189941, |
|
"learning_rate": 8.05686092804289e-05, |
|
"loss": 1.6263, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3463626492942453, |
|
"grad_norm": 1.2563903331756592, |
|
"learning_rate": 7.912692827952394e-05, |
|
"loss": 1.5923, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.3897937024972855, |
|
"grad_norm": 1.1862818002700806, |
|
"learning_rate": 7.76476180941997e-05, |
|
"loss": 1.6199, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.4332247557003257, |
|
"grad_norm": 1.186631202697754, |
|
"learning_rate": 7.613258985065672e-05, |
|
"loss": 1.6409, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.476655808903366, |
|
"grad_norm": 1.1485611200332642, |
|
"learning_rate": 7.45838008193847e-05, |
|
"loss": 1.6194, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.520086862106406, |
|
"grad_norm": 1.158892273902893, |
|
"learning_rate": 7.300325188655761e-05, |
|
"loss": 1.561, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.5635179153094463, |
|
"grad_norm": 1.1595680713653564, |
|
"learning_rate": 7.139298496908154e-05, |
|
"loss": 1.5814, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.6069489685124865, |
|
"grad_norm": 1.2705223560333252, |
|
"learning_rate": 6.97550803766349e-05, |
|
"loss": 1.5873, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.6503800217155264, |
|
"grad_norm": 1.5138100385665894, |
|
"learning_rate": 6.809165412410876e-05, |
|
"loss": 1.6218, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.6938110749185666, |
|
"grad_norm": 1.7399356365203857, |
|
"learning_rate": 6.640485519791953e-05, |
|
"loss": 1.5717, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.7372421281216068, |
|
"grad_norm": 1.1342989206314087, |
|
"learning_rate": 6.469686277972556e-05, |
|
"loss": 1.5773, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.780673181324647, |
|
"grad_norm": 1.1253174543380737, |
|
"learning_rate": 6.296988343113452e-05, |
|
"loss": 1.584, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.8241042345276872, |
|
"grad_norm": 1.1998904943466187, |
|
"learning_rate": 6.122614824303845e-05, |
|
"loss": 1.6189, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.8675352877307274, |
|
"grad_norm": 1.1804780960083008, |
|
"learning_rate": 5.946790995325924e-05, |
|
"loss": 1.5844, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.9109663409337676, |
|
"grad_norm": 1.3260307312011719, |
|
"learning_rate": 5.769744003622851e-05, |
|
"loss": 1.5731, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.9543973941368078, |
|
"grad_norm": 1.1990879774093628, |
|
"learning_rate": 5.59170257684616e-05, |
|
"loss": 1.6082, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.997828447339848, |
|
"grad_norm": 1.164106011390686, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 1.5346, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.997828447339848, |
|
"eval_gen_len": 27.663333333333334, |
|
"eval_loss": 1.8827488422393799, |
|
"eval_rouge1": 41.4676, |
|
"eval_rouge2": 18.3467, |
|
"eval_rougeL": 34.1909, |
|
"eval_rougeLsum": 38.2131, |
|
"eval_runtime": 41.5739, |
|
"eval_samples_per_second": 7.216, |
|
"eval_steps_per_second": 0.457, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.041259500542888, |
|
"grad_norm": 1.166826844215393, |
|
"learning_rate": 5.2335574550956446e-05, |
|
"loss": 1.268, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.0846905537459284, |
|
"grad_norm": 1.2919505834579468, |
|
"learning_rate": 5.053916449105219e-05, |
|
"loss": 1.2186, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.1281216069489686, |
|
"grad_norm": 1.4431166648864746, |
|
"learning_rate": 4.874205788258397e-05, |
|
"loss": 1.1827, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.1715526601520088, |
|
"grad_norm": 1.4269115924835205, |
|
"learning_rate": 4.694657641410549e-05, |
|
"loss": 1.1784, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.214983713355049, |
|
"grad_norm": 1.4717003107070923, |
|
"learning_rate": 4.515503967464618e-05, |
|
"loss": 1.1932, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.258414766558089, |
|
"grad_norm": 1.2849444150924683, |
|
"learning_rate": 4.336976215702574e-05, |
|
"loss": 1.1578, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.3018458197611293, |
|
"grad_norm": 1.2847343683242798, |
|
"learning_rate": 4.1593050267752485e-05, |
|
"loss": 1.1344, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.3452768729641695, |
|
"grad_norm": 1.281315565109253, |
|
"learning_rate": 3.982719934736832e-05, |
|
"loss": 1.1719, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.3887079261672097, |
|
"grad_norm": 1.2924513816833496, |
|
"learning_rate": 3.807449070508998e-05, |
|
"loss": 1.187, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.43213897937025, |
|
"grad_norm": 1.2814276218414307, |
|
"learning_rate": 3.633718867157746e-05, |
|
"loss": 1.2178, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.47557003257329, |
|
"grad_norm": 1.450994849205017, |
|
"learning_rate": 3.4617537673636866e-05, |
|
"loss": 1.1659, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.5190010857763303, |
|
"grad_norm": 1.2326401472091675, |
|
"learning_rate": 3.2917759334637374e-05, |
|
"loss": 1.1816, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.5624321389793705, |
|
"grad_norm": 1.2742615938186646, |
|
"learning_rate": 3.124004960438796e-05, |
|
"loss": 1.1543, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.6058631921824107, |
|
"grad_norm": 1.4592117071151733, |
|
"learning_rate": 2.9586575922181724e-05, |
|
"loss": 1.214, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.6492942453854504, |
|
"grad_norm": 1.3233025074005127, |
|
"learning_rate": 2.7959474416673336e-05, |
|
"loss": 1.1767, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.6927252985884906, |
|
"grad_norm": 1.187286376953125, |
|
"learning_rate": 2.6360847146206623e-05, |
|
"loss": 1.1769, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.736156351791531, |
|
"grad_norm": 1.3445571660995483, |
|
"learning_rate": 2.4792759383157748e-05, |
|
"loss": 1.2048, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.779587404994571, |
|
"grad_norm": 1.399775505065918, |
|
"learning_rate": 2.325723694580229e-05, |
|
"loss": 1.1756, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.823018458197611, |
|
"grad_norm": 1.3379682302474976, |
|
"learning_rate": 2.1756263581153424e-05, |
|
"loss": 1.1694, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.8664495114006514, |
|
"grad_norm": 1.4017045497894287, |
|
"learning_rate": 2.0291778402151685e-05, |
|
"loss": 1.1876, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.9098805646036916, |
|
"grad_norm": 1.3478162288665771, |
|
"learning_rate": 1.8865673382518145e-05, |
|
"loss": 1.1993, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.953311617806732, |
|
"grad_norm": 1.2747628688812256, |
|
"learning_rate": 1.7479790912506626e-05, |
|
"loss": 1.1913, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.996742671009772, |
|
"grad_norm": 1.3648200035095215, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 1.1696, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.996742671009772, |
|
"eval_gen_len": 27.803333333333335, |
|
"eval_loss": 1.9820051193237305, |
|
"eval_rouge1": 42.3629, |
|
"eval_rouge2": 18.4285, |
|
"eval_rougeL": 34.6339, |
|
"eval_rougeLsum": 38.7792, |
|
"eval_runtime": 38.5794, |
|
"eval_samples_per_second": 7.776, |
|
"eval_steps_per_second": 0.492, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.040173724212812, |
|
"grad_norm": 1.135827660560608, |
|
"learning_rate": 1.4835801051016463e-05, |
|
"loss": 0.9797, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.0836047774158524, |
|
"grad_norm": 1.2965835332870483, |
|
"learning_rate": 1.3581109439641588e-05, |
|
"loss": 0.9405, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 3.1270358306188926, |
|
"grad_norm": 1.2207958698272705, |
|
"learning_rate": 1.237346752523752e-05, |
|
"loss": 0.9258, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.1704668838219328, |
|
"grad_norm": 1.3027771711349487, |
|
"learning_rate": 1.1214435464779006e-05, |
|
"loss": 0.8973, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 3.213897937024973, |
|
"grad_norm": 1.2429888248443604, |
|
"learning_rate": 1.0105510615994051e-05, |
|
"loss": 0.8882, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.257328990228013, |
|
"grad_norm": 1.2552655935287476, |
|
"learning_rate": 9.048125602921842e-06, |
|
"loss": 0.9351, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.3007600434310533, |
|
"grad_norm": 1.290592074394226, |
|
"learning_rate": 8.043646465100697e-06, |
|
"loss": 0.9189, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.3441910966340935, |
|
"grad_norm": 1.3471736907958984, |
|
"learning_rate": 7.093370892776558e-06, |
|
"loss": 0.8981, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 3.3876221498371337, |
|
"grad_norm": 1.4423019886016846, |
|
"learning_rate": 6.1985265504122314e-06, |
|
"loss": 0.914, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.431053203040174, |
|
"grad_norm": 1.3276619911193848, |
|
"learning_rate": 5.360269490663278e-06, |
|
"loss": 0.8998, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 3.4744842562432137, |
|
"grad_norm": 1.3065807819366455, |
|
"learning_rate": 4.5796826608693274e-06, |
|
"loss": 0.9208, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.517915309446254, |
|
"grad_norm": 1.4401154518127441, |
|
"learning_rate": 3.857774503990514e-06, |
|
"loss": 0.9586, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 3.561346362649294, |
|
"grad_norm": 1.4484052658081055, |
|
"learning_rate": 3.1954776557963085e-06, |
|
"loss": 0.9459, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.6047774158523342, |
|
"grad_norm": 1.2900787591934204, |
|
"learning_rate": 2.593647739990068e-06, |
|
"loss": 0.9139, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 3.6482084690553744, |
|
"grad_norm": 1.4173898696899414, |
|
"learning_rate": 2.0530622628255615e-06, |
|
"loss": 0.9515, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.6916395222584146, |
|
"grad_norm": 1.3273446559906006, |
|
"learning_rate": 1.574419608643879e-06, |
|
"loss": 0.9536, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 3.735070575461455, |
|
"grad_norm": 1.28830087184906, |
|
"learning_rate": 1.1583381376281731e-06, |
|
"loss": 0.9209, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.778501628664495, |
|
"grad_norm": 1.2910932302474976, |
|
"learning_rate": 8.053553869418418e-07, |
|
"loss": 0.9536, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 3.821932681867535, |
|
"grad_norm": 1.351585030555725, |
|
"learning_rate": 5.159273762823657e-07, |
|
"loss": 0.908, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.8653637350705754, |
|
"grad_norm": 1.3552790880203247, |
|
"learning_rate": 2.9042801874777927e-07, |
|
"loss": 0.8984, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 3.9087947882736156, |
|
"grad_norm": 1.3222675323486328, |
|
"learning_rate": 1.2914863777698792e-07, |
|
"loss": 0.9384, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.952225841476656, |
|
"grad_norm": 1.4266947507858276, |
|
"learning_rate": 3.229759078795524e-08, |
|
"loss": 0.9457, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 3.995656894679696, |
|
"grad_norm": 1.3441340923309326, |
|
"learning_rate": 0.0, |
|
"loss": 0.9359, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.995656894679696, |
|
"eval_gen_len": 30.18, |
|
"eval_loss": 2.1588149070739746, |
|
"eval_rouge1": 41.2237, |
|
"eval_rouge2": 17.8161, |
|
"eval_rougeL": 33.7101, |
|
"eval_rougeLsum": 37.9569, |
|
"eval_runtime": 43.7657, |
|
"eval_samples_per_second": 6.855, |
|
"eval_steps_per_second": 0.434, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.995656894679696, |
|
"step": 460, |
|
"total_flos": 3.414853029293568e+16, |
|
"train_loss": 1.5300355652104254, |
|
"train_runtime": 1725.057, |
|
"train_samples_per_second": 34.158, |
|
"train_steps_per_second": 0.267 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 460, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.414853029293568e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|