|
{ |
|
"best_global_step": 5822, |
|
"best_metric": 1.5156257152557373, |
|
"best_model_checkpoint": "./output/bert-base-medmentions/checkpoint-5822", |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 43665, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1717622810030917, |
|
"grad_norm": 1.9404675960540771, |
|
"learning_rate": 4.9427459063323026e-05, |
|
"loss": 1.7239, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3435245620061834, |
|
"grad_norm": 1.6719824075698853, |
|
"learning_rate": 4.885491812664606e-05, |
|
"loss": 1.6163, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5152868430092752, |
|
"grad_norm": 1.837388515472412, |
|
"learning_rate": 4.828237718996909e-05, |
|
"loss": 1.5924, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6870491240123668, |
|
"grad_norm": 2.1180434226989746, |
|
"learning_rate": 4.770983625329211e-05, |
|
"loss": 1.5717, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8588114050154586, |
|
"grad_norm": 1.800995945930481, |
|
"learning_rate": 4.7137295316615135e-05, |
|
"loss": 1.5686, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8549719237162791, |
|
"eval_f1": 0.5650458863307036, |
|
"eval_loss": 1.5439889430999756, |
|
"eval_precision": 0.5245889387144993, |
|
"eval_recall": 0.6122644801116539, |
|
"eval_runtime": 4.1137, |
|
"eval_samples_per_second": 707.39, |
|
"eval_steps_per_second": 88.485, |
|
"step": 2911 |
|
}, |
|
{ |
|
"epoch": 1.0305736860185504, |
|
"grad_norm": 1.0917384624481201, |
|
"learning_rate": 4.6564754379938166e-05, |
|
"loss": 1.5484, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.202335967021642, |
|
"grad_norm": 1.0721220970153809, |
|
"learning_rate": 4.59922134432612e-05, |
|
"loss": 1.4824, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.3740982480247337, |
|
"grad_norm": 1.5045437812805176, |
|
"learning_rate": 4.541967250658422e-05, |
|
"loss": 1.4894, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5458605290278253, |
|
"grad_norm": 3.3368799686431885, |
|
"learning_rate": 4.484713156990725e-05, |
|
"loss": 1.4863, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.7176228100309172, |
|
"grad_norm": 2.191357374191284, |
|
"learning_rate": 4.427459063323028e-05, |
|
"loss": 1.4806, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.889385091034009, |
|
"grad_norm": 2.575375556945801, |
|
"learning_rate": 4.3702049696553306e-05, |
|
"loss": 1.4792, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8688595400463357, |
|
"eval_f1": 0.6071204975165909, |
|
"eval_loss": 1.5156257152557373, |
|
"eval_precision": 0.5820728291316527, |
|
"eval_recall": 0.6344207955338451, |
|
"eval_runtime": 4.3513, |
|
"eval_samples_per_second": 668.77, |
|
"eval_steps_per_second": 83.654, |
|
"step": 5822 |
|
}, |
|
{ |
|
"epoch": 2.0611473720371007, |
|
"grad_norm": 4.3115081787109375, |
|
"learning_rate": 4.3129508759876336e-05, |
|
"loss": 1.4566, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.2329096530401924, |
|
"grad_norm": 1.5031124353408813, |
|
"learning_rate": 4.255696782319936e-05, |
|
"loss": 1.4053, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.404671934043284, |
|
"grad_norm": 2.4502875804901123, |
|
"learning_rate": 4.198442688652239e-05, |
|
"loss": 1.4111, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.5764342150463757, |
|
"grad_norm": 1.5572278499603271, |
|
"learning_rate": 4.1411885949845415e-05, |
|
"loss": 1.4096, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.7481964960494674, |
|
"grad_norm": 1.3663930892944336, |
|
"learning_rate": 4.0839345013168445e-05, |
|
"loss": 1.4097, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.9199587770525595, |
|
"grad_norm": 3.3840882778167725, |
|
"learning_rate": 4.026680407649147e-05, |
|
"loss": 1.4111, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8713595727692771, |
|
"eval_f1": 0.6163334575106585, |
|
"eval_loss": 1.519059419631958, |
|
"eval_precision": 0.5864513588026782, |
|
"eval_recall": 0.6494242847173761, |
|
"eval_runtime": 4.33, |
|
"eval_samples_per_second": 672.06, |
|
"eval_steps_per_second": 84.065, |
|
"step": 8733 |
|
}, |
|
{ |
|
"epoch": 3.091721058055651, |
|
"grad_norm": 1.4997501373291016, |
|
"learning_rate": 3.96942631398145e-05, |
|
"loss": 1.3798, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.2634833390587428, |
|
"grad_norm": 1.4717656373977661, |
|
"learning_rate": 3.912172220313753e-05, |
|
"loss": 1.3517, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.4352456200618344, |
|
"grad_norm": 2.0151214599609375, |
|
"learning_rate": 3.8549181266460554e-05, |
|
"loss": 1.3532, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.607007901064926, |
|
"grad_norm": 2.7060940265655518, |
|
"learning_rate": 3.797664032978358e-05, |
|
"loss": 1.3546, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.7787701820680177, |
|
"grad_norm": 3.2001101970672607, |
|
"learning_rate": 3.740409939310661e-05, |
|
"loss": 1.355, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.9505324630711094, |
|
"grad_norm": 2.374141216278076, |
|
"learning_rate": 3.683155845642964e-05, |
|
"loss": 1.356, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8777209125773897, |
|
"eval_f1": 0.6318054658919734, |
|
"eval_loss": 1.5293220281600952, |
|
"eval_precision": 0.6235663919802905, |
|
"eval_recall": 0.6402651779483601, |
|
"eval_runtime": 4.3262, |
|
"eval_samples_per_second": 672.648, |
|
"eval_steps_per_second": 84.139, |
|
"step": 11644 |
|
}, |
|
{ |
|
"epoch": 4.1222947440742015, |
|
"grad_norm": 2.5122597217559814, |
|
"learning_rate": 3.6259017519752663e-05, |
|
"loss": 1.3298, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.294057025077293, |
|
"grad_norm": 1.870731234550476, |
|
"learning_rate": 3.568647658307569e-05, |
|
"loss": 1.3148, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.465819306080385, |
|
"grad_norm": 2.1130025386810303, |
|
"learning_rate": 3.511393564639872e-05, |
|
"loss": 1.3147, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.637581587083476, |
|
"grad_norm": 1.9891668558120728, |
|
"learning_rate": 3.454139470972175e-05, |
|
"loss": 1.3176, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.809343868086568, |
|
"grad_norm": 2.144550085067749, |
|
"learning_rate": 3.396885377304477e-05, |
|
"loss": 1.3207, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.98110614908966, |
|
"grad_norm": 1.166013240814209, |
|
"learning_rate": 3.33963128363678e-05, |
|
"loss": 1.3182, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8789120276443408, |
|
"eval_f1": 0.6353600689952565, |
|
"eval_loss": 1.543265461921692, |
|
"eval_precision": 0.6282619819205185, |
|
"eval_recall": 0.6426203768318214, |
|
"eval_runtime": 4.3312, |
|
"eval_samples_per_second": 671.862, |
|
"eval_steps_per_second": 84.041, |
|
"step": 14555 |
|
}, |
|
{ |
|
"epoch": 5.152868430092751, |
|
"grad_norm": 1.4789066314697266, |
|
"learning_rate": 3.2823771899690834e-05, |
|
"loss": 1.2897, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.3246307110958435, |
|
"grad_norm": 2.067422866821289, |
|
"learning_rate": 3.225123096301386e-05, |
|
"loss": 1.2908, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.496392992098935, |
|
"grad_norm": 1.4258556365966797, |
|
"learning_rate": 3.167869002633688e-05, |
|
"loss": 1.2946, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.668155273102027, |
|
"grad_norm": 1.9385099411010742, |
|
"learning_rate": 3.110614908965991e-05, |
|
"loss": 1.2914, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.839917554105119, |
|
"grad_norm": 2.5622832775115967, |
|
"learning_rate": 3.053360815298294e-05, |
|
"loss": 1.2919, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8793701488239375, |
|
"eval_f1": 0.6428903837204383, |
|
"eval_loss": 1.5671014785766602, |
|
"eval_precision": 0.6241682411895177, |
|
"eval_recall": 0.6627704117236567, |
|
"eval_runtime": 4.523, |
|
"eval_samples_per_second": 643.376, |
|
"eval_steps_per_second": 80.477, |
|
"step": 17466 |
|
}, |
|
{ |
|
"epoch": 6.01167983510821, |
|
"grad_norm": 1.9045183658599854, |
|
"learning_rate": 2.9961067216305967e-05, |
|
"loss": 1.2899, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.183442116111302, |
|
"grad_norm": 5.356103420257568, |
|
"learning_rate": 2.9388526279628997e-05, |
|
"loss": 1.2716, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.3552043971143934, |
|
"grad_norm": 1.4505314826965332, |
|
"learning_rate": 2.8815985342952025e-05, |
|
"loss": 1.2736, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.5269666781174855, |
|
"grad_norm": 2.0673441886901855, |
|
"learning_rate": 2.824344440627505e-05, |
|
"loss": 1.2755, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.698728959120577, |
|
"grad_norm": 7.07130765914917, |
|
"learning_rate": 2.7670903469598076e-05, |
|
"loss": 1.2739, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.870491240123669, |
|
"grad_norm": 2.692305326461792, |
|
"learning_rate": 2.7098362532921106e-05, |
|
"loss": 1.2743, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8809277608345659, |
|
"eval_f1": 0.6462844646455984, |
|
"eval_loss": 1.5696512460708618, |
|
"eval_precision": 0.6355739225773804, |
|
"eval_recall": 0.6573621772505234, |
|
"eval_runtime": 4.3283, |
|
"eval_samples_per_second": 672.312, |
|
"eval_steps_per_second": 84.097, |
|
"step": 20377 |
|
}, |
|
{ |
|
"epoch": 7.042253521126761, |
|
"grad_norm": 0.9224966764450073, |
|
"learning_rate": 2.6525821596244134e-05, |
|
"loss": 1.2716, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.214015802129852, |
|
"grad_norm": 2.72609543800354, |
|
"learning_rate": 2.5953280659567158e-05, |
|
"loss": 1.2578, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.385778083132944, |
|
"grad_norm": 1.7019892930984497, |
|
"learning_rate": 2.538073972289019e-05, |
|
"loss": 1.2612, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.5575403641360355, |
|
"grad_norm": 4.017130374908447, |
|
"learning_rate": 2.4808198786213216e-05, |
|
"loss": 1.2622, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.729302645139128, |
|
"grad_norm": 6.522401332855225, |
|
"learning_rate": 2.4235657849536243e-05, |
|
"loss": 1.2611, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.901064926142219, |
|
"grad_norm": 3.355700731277466, |
|
"learning_rate": 2.366311691285927e-05, |
|
"loss": 1.2633, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8812811686016833, |
|
"eval_f1": 0.6527559389741191, |
|
"eval_loss": 1.5806214809417725, |
|
"eval_precision": 0.636446507002569, |
|
"eval_recall": 0.6699232379623168, |
|
"eval_runtime": 4.329, |
|
"eval_samples_per_second": 672.206, |
|
"eval_steps_per_second": 84.084, |
|
"step": 23288 |
|
}, |
|
{ |
|
"epoch": 8.07282720714531, |
|
"grad_norm": 0.5228517651557922, |
|
"learning_rate": 2.30905759761823e-05, |
|
"loss": 1.2579, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.244589488148403, |
|
"grad_norm": 0.2985314726829529, |
|
"learning_rate": 2.2518035039505325e-05, |
|
"loss": 1.2527, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.416351769151495, |
|
"grad_norm": 1.963086724281311, |
|
"learning_rate": 2.1945494102828355e-05, |
|
"loss": 1.2542, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 8.588114050154585, |
|
"grad_norm": 0.8812742233276367, |
|
"learning_rate": 2.137295316615138e-05, |
|
"loss": 1.251, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.759876331157677, |
|
"grad_norm": 0.7020455002784729, |
|
"learning_rate": 2.080041222947441e-05, |
|
"loss": 1.2516, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 8.93163861216077, |
|
"grad_norm": 2.0791664123535156, |
|
"learning_rate": 2.0227871292797437e-05, |
|
"loss": 1.2542, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.880783779892407, |
|
"eval_f1": 0.6498043011657758, |
|
"eval_loss": 1.594204068183899, |
|
"eval_precision": 0.6277953972513621, |
|
"eval_recall": 0.6734124214933705, |
|
"eval_runtime": 4.3509, |
|
"eval_samples_per_second": 668.824, |
|
"eval_steps_per_second": 83.66, |
|
"step": 26199 |
|
}, |
|
{ |
|
"epoch": 9.103400893163862, |
|
"grad_norm": 2.5551717281341553, |
|
"learning_rate": 1.9655330356120464e-05, |
|
"loss": 1.2496, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 9.275163174166954, |
|
"grad_norm": 3.963749647140503, |
|
"learning_rate": 1.908278941944349e-05, |
|
"loss": 1.2453, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 9.446925455170044, |
|
"grad_norm": 0.5859785676002502, |
|
"learning_rate": 1.851024848276652e-05, |
|
"loss": 1.2463, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 9.618687736173136, |
|
"grad_norm": 0.3447531759738922, |
|
"learning_rate": 1.7937707546089546e-05, |
|
"loss": 1.2477, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 9.790450017176228, |
|
"grad_norm": 0.3794388175010681, |
|
"learning_rate": 1.7365166609412573e-05, |
|
"loss": 1.2468, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 9.96221229817932, |
|
"grad_norm": 1.6076109409332275, |
|
"learning_rate": 1.67926256727356e-05, |
|
"loss": 1.2457, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8814251495438422, |
|
"eval_f1": 0.6500277789649131, |
|
"eval_loss": 1.607577919960022, |
|
"eval_precision": 0.6372015081692501, |
|
"eval_recall": 0.6633810188415911, |
|
"eval_runtime": 4.3517, |
|
"eval_samples_per_second": 668.699, |
|
"eval_steps_per_second": 83.645, |
|
"step": 29110 |
|
}, |
|
{ |
|
"epoch": 10.13397457918241, |
|
"grad_norm": 1.843865990638733, |
|
"learning_rate": 1.622008473605863e-05, |
|
"loss": 1.2411, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 10.305736860185503, |
|
"grad_norm": 2.924731731414795, |
|
"learning_rate": 1.5647543799381655e-05, |
|
"loss": 1.2402, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 10.477499141188595, |
|
"grad_norm": 0.40096113085746765, |
|
"learning_rate": 1.5075002862704684e-05, |
|
"loss": 1.2416, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 10.649261422191687, |
|
"grad_norm": 1.3067082166671753, |
|
"learning_rate": 1.4502461926027711e-05, |
|
"loss": 1.2422, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 10.82102370319478, |
|
"grad_norm": 1.1540168523788452, |
|
"learning_rate": 1.3929920989350739e-05, |
|
"loss": 1.2406, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 10.99278598419787, |
|
"grad_norm": 2.354355812072754, |
|
"learning_rate": 1.3357380052673768e-05, |
|
"loss": 1.2398, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8834932394403068, |
|
"eval_f1": 0.6551724137931034, |
|
"eval_loss": 1.6077239513397217, |
|
"eval_precision": 0.6413770053475936, |
|
"eval_recall": 0.6695743196092114, |
|
"eval_runtime": 4.5309, |
|
"eval_samples_per_second": 642.254, |
|
"eval_steps_per_second": 80.337, |
|
"step": 32021 |
|
}, |
|
{ |
|
"epoch": 11.164548265200962, |
|
"grad_norm": 2.941948413848877, |
|
"learning_rate": 1.2784839115996793e-05, |
|
"loss": 1.2384, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 11.336310546204054, |
|
"grad_norm": 0.06978488713502884, |
|
"learning_rate": 1.2212298179319822e-05, |
|
"loss": 1.238, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 11.508072827207146, |
|
"grad_norm": 1.1202852725982666, |
|
"learning_rate": 1.163975724264285e-05, |
|
"loss": 1.2375, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 11.679835108210238, |
|
"grad_norm": 3.7290749549865723, |
|
"learning_rate": 1.1067216305965877e-05, |
|
"loss": 1.2373, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 11.851597389213328, |
|
"grad_norm": 0.5790780782699585, |
|
"learning_rate": 1.0494675369288904e-05, |
|
"loss": 1.2377, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8846974436838179, |
|
"eval_f1": 0.6615437158469945, |
|
"eval_loss": 1.6134886741638184, |
|
"eval_precision": 0.6478260869565218, |
|
"eval_recall": 0.6758548499651081, |
|
"eval_runtime": 4.3276, |
|
"eval_samples_per_second": 672.43, |
|
"eval_steps_per_second": 84.112, |
|
"step": 34932 |
|
}, |
|
{ |
|
"epoch": 12.02335967021642, |
|
"grad_norm": 0.04744827747344971, |
|
"learning_rate": 9.922134432611933e-06, |
|
"loss": 1.2362, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"grad_norm": 0.07846707850694656, |
|
"learning_rate": 9.34959349593496e-06, |
|
"loss": 1.235, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 12.366884232222604, |
|
"grad_norm": 3.9505062103271484, |
|
"learning_rate": 8.777052559257987e-06, |
|
"loss": 1.2345, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 12.538646513225697, |
|
"grad_norm": 0.13419800996780396, |
|
"learning_rate": 8.204511622581015e-06, |
|
"loss": 1.2362, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 12.710408794228787, |
|
"grad_norm": 0.205936998128891, |
|
"learning_rate": 7.631970685904042e-06, |
|
"loss": 1.2341, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 12.882171075231879, |
|
"grad_norm": 1.917006254196167, |
|
"learning_rate": 7.05942974922707e-06, |
|
"loss": 1.2349, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.883872825560544, |
|
"eval_f1": 0.6590367597004765, |
|
"eval_loss": 1.619519829750061, |
|
"eval_precision": 0.6432724252491694, |
|
"eval_recall": 0.6755931612002791, |
|
"eval_runtime": 4.3511, |
|
"eval_samples_per_second": 668.801, |
|
"eval_steps_per_second": 83.658, |
|
"step": 37843 |
|
}, |
|
{ |
|
"epoch": 13.053933356234971, |
|
"grad_norm": 0.5815674662590027, |
|
"learning_rate": 6.486888812550097e-06, |
|
"loss": 1.2342, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 13.225695637238063, |
|
"grad_norm": 0.48151713609695435, |
|
"learning_rate": 5.914347875873125e-06, |
|
"loss": 1.2335, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 13.397457918241154, |
|
"grad_norm": 4.141974925994873, |
|
"learning_rate": 5.341806939196153e-06, |
|
"loss": 1.2335, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 13.569220199244246, |
|
"grad_norm": 0.24046790599822998, |
|
"learning_rate": 4.76926600251918e-06, |
|
"loss": 1.2331, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 13.740982480247338, |
|
"grad_norm": 0.08363146334886551, |
|
"learning_rate": 4.196725065842208e-06, |
|
"loss": 1.233, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 13.91274476125043, |
|
"grad_norm": 0.5658828616142273, |
|
"learning_rate": 3.6241841291652353e-06, |
|
"loss": 1.2328, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8845272843885391, |
|
"eval_f1": 0.6591725081210464, |
|
"eval_loss": 1.6228290796279907, |
|
"eval_precision": 0.6462453905464298, |
|
"eval_recall": 0.6726273551988835, |
|
"eval_runtime": 4.3411, |
|
"eval_samples_per_second": 670.343, |
|
"eval_steps_per_second": 83.85, |
|
"step": 40754 |
|
}, |
|
{ |
|
"epoch": 14.084507042253522, |
|
"grad_norm": 1.4916341304779053, |
|
"learning_rate": 3.051643192488263e-06, |
|
"loss": 1.2318, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 14.256269323256612, |
|
"grad_norm": 0.1434667557477951, |
|
"learning_rate": 2.4791022558112906e-06, |
|
"loss": 1.2305, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 14.428031604259704, |
|
"grad_norm": 0.10652283579111099, |
|
"learning_rate": 1.906561319134318e-06, |
|
"loss": 1.232, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 14.599793885262796, |
|
"grad_norm": 0.8040905594825745, |
|
"learning_rate": 1.3340203824573458e-06, |
|
"loss": 1.2321, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 14.771556166265889, |
|
"grad_norm": 0.040788378566503525, |
|
"learning_rate": 7.614794457803733e-07, |
|
"loss": 1.2319, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 14.943318447268979, |
|
"grad_norm": 0.5179036259651184, |
|
"learning_rate": 1.889385091034009e-07, |
|
"loss": 1.231, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8847236220369377, |
|
"eval_f1": 0.660140218878249, |
|
"eval_loss": 1.6247130632400513, |
|
"eval_precision": 0.6473004694835681, |
|
"eval_recall": 0.6734996510816469, |
|
"eval_runtime": 4.3445, |
|
"eval_samples_per_second": 669.806, |
|
"eval_steps_per_second": 83.783, |
|
"step": 43665 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 43665, |
|
"total_flos": 1.1901430945516224e+16, |
|
"train_loss": 1.311661433704009, |
|
"train_runtime": 2299.911, |
|
"train_samples_per_second": 151.865, |
|
"train_steps_per_second": 18.986 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 43665, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1901430945516224e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|