|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.527180783817952, |
|
"eval_steps": 500, |
|
"global_step": 6745, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012642225031605562, |
|
"grad_norm": 0.06671903282403946, |
|
"learning_rate": 9e-06, |
|
"loss": 0.6431, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.025284450063211124, |
|
"grad_norm": 0.08679291605949402, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.6499, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03792667509481669, |
|
"grad_norm": 0.07710310071706772, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.625, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05056890012642225, |
|
"grad_norm": 0.08816391229629517, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.6109, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0632111251580278, |
|
"grad_norm": 0.1400187462568283, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6043, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07585335018963338, |
|
"grad_norm": 0.08173350989818573, |
|
"learning_rate": 5.9e-05, |
|
"loss": 0.6308, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08849557522123894, |
|
"grad_norm": 0.09538205713033676, |
|
"learning_rate": 6.9e-05, |
|
"loss": 0.6078, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1011378002528445, |
|
"grad_norm": 0.10508744418621063, |
|
"learning_rate": 7.900000000000001e-05, |
|
"loss": 0.6266, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11378002528445007, |
|
"grad_norm": 0.13323046267032623, |
|
"learning_rate": 8.900000000000001e-05, |
|
"loss": 0.6393, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1264222503160556, |
|
"grad_norm": 0.13296917080879211, |
|
"learning_rate": 9.900000000000001e-05, |
|
"loss": 0.6361, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1390644753476612, |
|
"grad_norm": 0.14028862118721008, |
|
"learning_rate": 0.000109, |
|
"loss": 0.6576, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15170670037926676, |
|
"grad_norm": 0.17391778528690338, |
|
"learning_rate": 0.000119, |
|
"loss": 0.6084, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16434892541087232, |
|
"grad_norm": 0.1644161194562912, |
|
"learning_rate": 0.00012900000000000002, |
|
"loss": 0.658, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 0.2195376306772232, |
|
"learning_rate": 0.000139, |
|
"loss": 0.6274, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18963337547408343, |
|
"grad_norm": 0.2098621428012848, |
|
"learning_rate": 0.00014900000000000002, |
|
"loss": 0.5906, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.202275600505689, |
|
"grad_norm": 0.24007147550582886, |
|
"learning_rate": 0.00015900000000000002, |
|
"loss": 0.6373, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21491782553729458, |
|
"grad_norm": 0.2894239127635956, |
|
"learning_rate": 0.00016900000000000002, |
|
"loss": 0.6371, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22756005056890014, |
|
"grad_norm": 0.23595209419727325, |
|
"learning_rate": 0.00017900000000000001, |
|
"loss": 0.6768, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2402022756005057, |
|
"grad_norm": 0.26761606335639954, |
|
"learning_rate": 0.00018899999999999999, |
|
"loss": 0.6431, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2528445006321112, |
|
"grad_norm": 0.2602802813053131, |
|
"learning_rate": 0.000199, |
|
"loss": 0.6443, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26548672566371684, |
|
"grad_norm": 0.4167614281177521, |
|
"learning_rate": 0.00019999934682007068, |
|
"loss": 0.6589, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2781289506953224, |
|
"grad_norm": 0.3008961081504822, |
|
"learning_rate": 0.00019999708892979201, |
|
"loss": 0.6986, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29077117572692796, |
|
"grad_norm": 0.5571665167808533, |
|
"learning_rate": 0.00019999321831722333, |
|
"loss": 0.6971, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3034134007585335, |
|
"grad_norm": 0.3433665335178375, |
|
"learning_rate": 0.000199987735071543, |
|
"loss": 0.6764, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.31605562579013907, |
|
"grad_norm": 0.36731716990470886, |
|
"learning_rate": 0.00019998063931908407, |
|
"loss": 0.6622, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32869785082174463, |
|
"grad_norm": 0.4063098132610321, |
|
"learning_rate": 0.0001999719312233317, |
|
"loss": 0.6727, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3413400758533502, |
|
"grad_norm": 0.4796123504638672, |
|
"learning_rate": 0.0001999616109849191, |
|
"loss": 0.6683, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 0.35787850618362427, |
|
"learning_rate": 0.00019994967884162285, |
|
"loss": 0.6856, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3666245259165613, |
|
"grad_norm": 2.5480282306671143, |
|
"learning_rate": 0.00019993613506835787, |
|
"loss": 0.7337, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.37926675094816686, |
|
"grad_norm": 132.1597137451172, |
|
"learning_rate": 0.00019992097997717054, |
|
"loss": 1.748, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3919089759797724, |
|
"grad_norm": 138.1280975341797, |
|
"learning_rate": 0.00019990421391723193, |
|
"loss": 2.0475, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.404551201011378, |
|
"grad_norm": 11.301629066467285, |
|
"learning_rate": 0.00019988583727482948, |
|
"loss": 1.949, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.41719342604298354, |
|
"grad_norm": 249.65650939941406, |
|
"learning_rate": 0.0001998658504733583, |
|
"loss": 1.3181, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.42983565107458915, |
|
"grad_norm": 9.794897079467773, |
|
"learning_rate": 0.0001998442539733111, |
|
"loss": 1.588, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4424778761061947, |
|
"grad_norm": 1.245341181755066, |
|
"learning_rate": 0.00019982104827226808, |
|
"loss": 0.8035, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.45512010113780027, |
|
"grad_norm": 1.8496769666671753, |
|
"learning_rate": 0.00019979623390488507, |
|
"loss": 0.7647, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.46776232616940583, |
|
"grad_norm": 16.008167266845703, |
|
"learning_rate": 0.0001997698114428813, |
|
"loss": 0.7563, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4804045512010114, |
|
"grad_norm": 6.476025104522705, |
|
"learning_rate": 0.00019974178149502624, |
|
"loss": 0.8981, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.49304677623261695, |
|
"grad_norm": 1.4814890623092651, |
|
"learning_rate": 0.0001997121447071257, |
|
"loss": 0.7543, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5056890012642224, |
|
"grad_norm": 0.695743978023529, |
|
"learning_rate": 0.0001996809017620067, |
|
"loss": 0.7414, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5183312262958281, |
|
"grad_norm": 9.911474227905273, |
|
"learning_rate": 0.000199648053379502, |
|
"loss": 0.7957, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5309734513274337, |
|
"grad_norm": 0.726256787776947, |
|
"learning_rate": 0.00019961360031643332, |
|
"loss": 0.7185, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5436156763590392, |
|
"grad_norm": 165.10116577148438, |
|
"learning_rate": 0.00019957754336659392, |
|
"loss": 0.901, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5562579013906448, |
|
"grad_norm": 41.61799621582031, |
|
"learning_rate": 0.0001995398833607306, |
|
"loss": 1.6791, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5689001264222503, |
|
"grad_norm": 26.158023834228516, |
|
"learning_rate": 0.0001995006211665241, |
|
"loss": 1.6933, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5815423514538559, |
|
"grad_norm": 1.6820884943008423, |
|
"learning_rate": 0.00019945975768856936, |
|
"loss": 0.8144, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5941845764854614, |
|
"grad_norm": 0.5475680828094482, |
|
"learning_rate": 0.00019941729386835472, |
|
"loss": 0.7117, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.606826801517067, |
|
"grad_norm": 0.5968815684318542, |
|
"learning_rate": 0.0001993732306842402, |
|
"loss": 0.7452, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6194690265486725, |
|
"grad_norm": 3.038395404815674, |
|
"learning_rate": 0.00019932756915143481, |
|
"loss": 0.7365, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6321112515802781, |
|
"grad_norm": 1.1817647218704224, |
|
"learning_rate": 0.0001992803103219733, |
|
"loss": 0.7883, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6447534766118836, |
|
"grad_norm": 0.8520786166191101, |
|
"learning_rate": 0.00019923145528469202, |
|
"loss": 0.7309, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.6573957016434893, |
|
"grad_norm": 0.9001318216323853, |
|
"learning_rate": 0.00019918100516520354, |
|
"loss": 0.8017, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6700379266750948, |
|
"grad_norm": 1.4401612281799316, |
|
"learning_rate": 0.00019912896112587092, |
|
"loss": 0.7814, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6826801517067004, |
|
"grad_norm": 34.98484420776367, |
|
"learning_rate": 0.00019907532436578098, |
|
"loss": 0.8461, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.695322376738306, |
|
"grad_norm": 1.0898733139038086, |
|
"learning_rate": 0.00019902009612071645, |
|
"loss": 0.9027, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 38.014892578125, |
|
"learning_rate": 0.00019896327766312773, |
|
"loss": 0.8073, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7206068268015171, |
|
"grad_norm": 2.3141884803771973, |
|
"learning_rate": 0.0001989048703021035, |
|
"loss": 0.7773, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7332490518331226, |
|
"grad_norm": 0.8214466571807861, |
|
"learning_rate": 0.00019884487538334038, |
|
"loss": 0.8214, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.7458912768647282, |
|
"grad_norm": 1.5186419486999512, |
|
"learning_rate": 0.00019878329428911227, |
|
"loss": 0.7852, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.7585335018963337, |
|
"grad_norm": 1.7092262506484985, |
|
"learning_rate": 0.00019872012843823815, |
|
"loss": 0.8048, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7711757269279393, |
|
"grad_norm": 1.04222571849823, |
|
"learning_rate": 0.00019865537928604967, |
|
"loss": 0.7578, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7838179519595448, |
|
"grad_norm": 6.094375133514404, |
|
"learning_rate": 0.00019858904832435745, |
|
"loss": 0.8016, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7964601769911505, |
|
"grad_norm": 35.207767486572266, |
|
"learning_rate": 0.00019852113708141675, |
|
"loss": 0.8785, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.809102402022756, |
|
"grad_norm": 2.344693422317505, |
|
"learning_rate": 0.00019845164712189233, |
|
"loss": 0.8803, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8217446270543616, |
|
"grad_norm": 3.754660129547119, |
|
"learning_rate": 0.00019838058004682224, |
|
"loss": 0.7746, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8343868520859671, |
|
"grad_norm": 0.9116389155387878, |
|
"learning_rate": 0.0001983079374935811, |
|
"loss": 0.756, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.8470290771175727, |
|
"grad_norm": 2.610806703567505, |
|
"learning_rate": 0.0001982337211358423, |
|
"loss": 0.7773, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.8596713021491783, |
|
"grad_norm": 10.215978622436523, |
|
"learning_rate": 0.00019815793268353944, |
|
"loss": 0.7995, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.8723135271807838, |
|
"grad_norm": 1.8970898389816284, |
|
"learning_rate": 0.000198080573882827, |
|
"loss": 0.7576, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 4.906523704528809, |
|
"learning_rate": 0.00019800164651603987, |
|
"loss": 0.8217, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8975979772439949, |
|
"grad_norm": 44.493133544921875, |
|
"learning_rate": 0.0001979211524016527, |
|
"loss": 0.8068, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.9102402022756005, |
|
"grad_norm": 348.29559326171875, |
|
"learning_rate": 0.00019783909339423758, |
|
"loss": 2.0656, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.922882427307206, |
|
"grad_norm": 8.599038124084473, |
|
"learning_rate": 0.00019775547138442157, |
|
"loss": 1.7869, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.9355246523388117, |
|
"grad_norm": 0.8781918287277222, |
|
"learning_rate": 0.00019767028829884313, |
|
"loss": 0.7973, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.9481668773704172, |
|
"grad_norm": 1.5494831800460815, |
|
"learning_rate": 0.00019758354610010753, |
|
"loss": 0.7974, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.9608091024020228, |
|
"grad_norm": 1.064113736152649, |
|
"learning_rate": 0.00019749524678674193, |
|
"loss": 0.8408, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.9734513274336283, |
|
"grad_norm": 1.0886831283569336, |
|
"learning_rate": 0.00019740539239314898, |
|
"loss": 0.7968, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.9860935524652339, |
|
"grad_norm": 1.2727103233337402, |
|
"learning_rate": 0.00019731398498956036, |
|
"loss": 0.7508, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.9987357774968394, |
|
"grad_norm": 5.564798831939697, |
|
"learning_rate": 0.00019722102668198868, |
|
"loss": 0.7573, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.011378002528445, |
|
"grad_norm": 0.7210967540740967, |
|
"learning_rate": 0.0001971265196121792, |
|
"loss": 0.6808, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0240202275600505, |
|
"grad_norm": 0.7716237306594849, |
|
"learning_rate": 0.00019703046595756054, |
|
"loss": 0.6197, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.0366624525916561, |
|
"grad_norm": 0.8090146780014038, |
|
"learning_rate": 0.00019693286793119423, |
|
"loss": 0.6632, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.0493046776232617, |
|
"grad_norm": 18.184663772583008, |
|
"learning_rate": 0.000196833727781724, |
|
"loss": 0.6638, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.0619469026548674, |
|
"grad_norm": 0.7779182195663452, |
|
"learning_rate": 0.0001967330477933238, |
|
"loss": 0.6878, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.0745891276864727, |
|
"grad_norm": 0.71025151014328, |
|
"learning_rate": 0.00019663083028564527, |
|
"loss": 0.6778, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.0872313527180784, |
|
"grad_norm": 2.1243929862976074, |
|
"learning_rate": 0.0001965270776137642, |
|
"loss": 0.7326, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.099873577749684, |
|
"grad_norm": 0.8968414068222046, |
|
"learning_rate": 0.0001964217921681265, |
|
"loss": 0.6792, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.1125158027812896, |
|
"grad_norm": 5.795286655426025, |
|
"learning_rate": 0.00019631497637449274, |
|
"loss": 0.7356, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.125158027812895, |
|
"grad_norm": 1.2587428092956543, |
|
"learning_rate": 0.0001962066326938826, |
|
"loss": 0.7505, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.1378002528445006, |
|
"grad_norm": 1.1835522651672363, |
|
"learning_rate": 0.000196096763622518, |
|
"loss": 0.7025, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1504424778761062, |
|
"grad_norm": 36.68544387817383, |
|
"learning_rate": 0.00019598537169176564, |
|
"loss": 0.6972, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.1630847029077118, |
|
"grad_norm": 877.5889892578125, |
|
"learning_rate": 0.0001958724594680787, |
|
"loss": 1.3335, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.1757269279393174, |
|
"grad_norm": 536.0718383789062, |
|
"learning_rate": 0.00019575802955293763, |
|
"loss": 8.5679, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.1883691529709228, |
|
"grad_norm": 22.20748519897461, |
|
"learning_rate": 0.00019564208458279034, |
|
"loss": 10.4269, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.2010113780025284, |
|
"grad_norm": 58.90277862548828, |
|
"learning_rate": 0.00019552462722899122, |
|
"loss": 7.7899, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.213653603034134, |
|
"grad_norm": 38.07368469238281, |
|
"learning_rate": 0.00019540566019773996, |
|
"loss": 8.0968, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.2262958280657394, |
|
"grad_norm": 3.39542555809021, |
|
"learning_rate": 0.00019528518623001878, |
|
"loss": 7.6929, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.238938053097345, |
|
"grad_norm": 8.227216720581055, |
|
"learning_rate": 0.0001951632081015296, |
|
"loss": 7.5381, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.2515802781289507, |
|
"grad_norm": 15.710060119628906, |
|
"learning_rate": 0.00019503972862263002, |
|
"loss": 7.471, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.2642225031605563, |
|
"grad_norm": 14.865936279296875, |
|
"learning_rate": 0.00019491475063826842, |
|
"loss": 7.1013, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.276864728192162, |
|
"grad_norm": 11.446512222290039, |
|
"learning_rate": 0.00019478827702791858, |
|
"loss": 7.1631, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.2895069532237673, |
|
"grad_norm": 4.988636016845703, |
|
"learning_rate": 0.00019466031070551325, |
|
"loss": 6.9726, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.302149178255373, |
|
"grad_norm": 6.260726451873779, |
|
"learning_rate": 0.00019453085461937705, |
|
"loss": 6.8037, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.3147914032869785, |
|
"grad_norm": 13.696749687194824, |
|
"learning_rate": 0.00019439991175215857, |
|
"loss": 6.823, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.3274336283185841, |
|
"grad_norm": 4.492152690887451, |
|
"learning_rate": 0.0001942674851207615, |
|
"loss": 6.694, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.3400758533501897, |
|
"grad_norm": 14.445012092590332, |
|
"learning_rate": 0.00019413357777627534, |
|
"loss": 6.5831, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.3527180783817951, |
|
"grad_norm": 187.8795623779297, |
|
"learning_rate": 0.00019399819280390492, |
|
"loss": 6.4136, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.3653603034134008, |
|
"grad_norm": 40.901546478271484, |
|
"learning_rate": 0.00019386133332289948, |
|
"loss": 5.5392, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.3780025284450064, |
|
"grad_norm": 7.47464656829834, |
|
"learning_rate": 0.00019372300248648064, |
|
"loss": 2.8936, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.3906447534766118, |
|
"grad_norm": 2.4701592922210693, |
|
"learning_rate": 0.00019358320348176978, |
|
"loss": 0.7719, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.4032869785082174, |
|
"grad_norm": 1.2766318321228027, |
|
"learning_rate": 0.00019344193952971486, |
|
"loss": 0.7532, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.415929203539823, |
|
"grad_norm": 1.149214744567871, |
|
"learning_rate": 0.00019329921388501573, |
|
"loss": 0.7712, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 3.015934944152832, |
|
"learning_rate": 0.0001931550298360496, |
|
"loss": 0.7567, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.4412136536030342, |
|
"grad_norm": 0.9380026459693909, |
|
"learning_rate": 0.00019300939070479508, |
|
"loss": 0.7604, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.4538558786346396, |
|
"grad_norm": 1.0415725708007812, |
|
"learning_rate": 0.00019286229984675558, |
|
"loss": 0.7313, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.4664981036662452, |
|
"grad_norm": 1.5267181396484375, |
|
"learning_rate": 0.0001927137606508821, |
|
"loss": 0.7071, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.4791403286978508, |
|
"grad_norm": 1.7632757425308228, |
|
"learning_rate": 0.00019256377653949515, |
|
"loss": 0.898, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.4917825537294565, |
|
"grad_norm": 2.4399545192718506, |
|
"learning_rate": 0.00019241235096820587, |
|
"loss": 0.7592, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.504424778761062, |
|
"grad_norm": 3.498751163482666, |
|
"learning_rate": 0.00019225948742583642, |
|
"loss": 0.8975, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.5170670037926675, |
|
"grad_norm": 5.950336456298828, |
|
"learning_rate": 0.00019210518943433953, |
|
"loss": 0.7509, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.529709228824273, |
|
"grad_norm": 13.960210800170898, |
|
"learning_rate": 0.00019194946054871753, |
|
"loss": 0.7932, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.5423514538558787, |
|
"grad_norm": 2.3052141666412354, |
|
"learning_rate": 0.0001917923043569403, |
|
"loss": 0.9414, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.554993678887484, |
|
"grad_norm": 1.8680328130722046, |
|
"learning_rate": 0.0001916337244798625, |
|
"loss": 0.7507, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.56763590391909, |
|
"grad_norm": 1.9041931629180908, |
|
"learning_rate": 0.00019147372457114045, |
|
"loss": 0.7368, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.5802781289506953, |
|
"grad_norm": 1.2977467775344849, |
|
"learning_rate": 0.00019131230831714776, |
|
"loss": 0.8548, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.592920353982301, |
|
"grad_norm": 1.5000057220458984, |
|
"learning_rate": 0.00019114947943689036, |
|
"loss": 0.7704, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.6055625790139065, |
|
"grad_norm": 3.4347245693206787, |
|
"learning_rate": 0.00019098524168192094, |
|
"loss": 0.7786, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.618204804045512, |
|
"grad_norm": 2.482739210128784, |
|
"learning_rate": 0.00019081959883625235, |
|
"loss": 0.7569, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.6308470290771175, |
|
"grad_norm": 1.322037696838379, |
|
"learning_rate": 0.00019065255471627062, |
|
"loss": 0.7714, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.6434892541087232, |
|
"grad_norm": 1.073613166809082, |
|
"learning_rate": 0.00019048411317064683, |
|
"loss": 0.742, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.6561314791403285, |
|
"grad_norm": 0.8305187225341797, |
|
"learning_rate": 0.00019031427808024866, |
|
"loss": 0.7216, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.6687737041719344, |
|
"grad_norm": 1.1198879480361938, |
|
"learning_rate": 0.0001901430533580508, |
|
"loss": 0.7477, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.6814159292035398, |
|
"grad_norm": 0.9730642437934875, |
|
"learning_rate": 0.0001899704429490447, |
|
"loss": 0.765, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.6940581542351454, |
|
"grad_norm": 1.542136311531067, |
|
"learning_rate": 0.00018979645083014809, |
|
"loss": 0.7338, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.706700379266751, |
|
"grad_norm": 1.3562628030776978, |
|
"learning_rate": 0.00018962108101011285, |
|
"loss": 0.7786, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.7193426042983564, |
|
"grad_norm": 1.8742653131484985, |
|
"learning_rate": 0.000189444337529433, |
|
"loss": 0.7812, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.7319848293299622, |
|
"grad_norm": 3.535946846008301, |
|
"learning_rate": 0.0001892662244602515, |
|
"loss": 0.7653, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.7446270543615676, |
|
"grad_norm": 0.9589079022407532, |
|
"learning_rate": 0.00018908674590626637, |
|
"loss": 0.8217, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.7572692793931732, |
|
"grad_norm": 2.254733085632324, |
|
"learning_rate": 0.00018890590600263618, |
|
"loss": 0.7535, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"grad_norm": 0.8984112739562988, |
|
"learning_rate": 0.00018872370891588491, |
|
"loss": 0.7839, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.7825537294563842, |
|
"grad_norm": 1.0168917179107666, |
|
"learning_rate": 0.00018854015884380568, |
|
"loss": 0.7443, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.7951959544879899, |
|
"grad_norm": 0.9075338840484619, |
|
"learning_rate": 0.00018835526001536424, |
|
"loss": 0.7515, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.8078381795195955, |
|
"grad_norm": 0.9690259695053101, |
|
"learning_rate": 0.00018816901669060156, |
|
"loss": 0.8179, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.8204804045512009, |
|
"grad_norm": 1.043910026550293, |
|
"learning_rate": 0.0001879814331605355, |
|
"loss": 0.8088, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.8331226295828067, |
|
"grad_norm": 0.9964724779129028, |
|
"learning_rate": 0.00018779251374706206, |
|
"loss": 0.7603, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.845764854614412, |
|
"grad_norm": 0.896278440952301, |
|
"learning_rate": 0.00018760226280285585, |
|
"loss": 0.7666, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.8584070796460177, |
|
"grad_norm": 1.0416340827941895, |
|
"learning_rate": 0.00018741068471126967, |
|
"loss": 0.7295, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.8710493046776233, |
|
"grad_norm": 1.1354191303253174, |
|
"learning_rate": 0.00018721778388623367, |
|
"loss": 0.7552, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.8836915297092287, |
|
"grad_norm": 1.0638015270233154, |
|
"learning_rate": 0.00018702356477215352, |
|
"loss": 0.7663, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.8963337547408345, |
|
"grad_norm": 0.9380121231079102, |
|
"learning_rate": 0.00018682803184380807, |
|
"loss": 0.7436, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.90897597977244, |
|
"grad_norm": 0.9272292256355286, |
|
"learning_rate": 0.0001866311896062463, |
|
"loss": 0.8219, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.9216182048040455, |
|
"grad_norm": 0.9718897938728333, |
|
"learning_rate": 0.00018643304259468346, |
|
"loss": 0.7357, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.9342604298356512, |
|
"grad_norm": 0.8963416218757629, |
|
"learning_rate": 0.00018623359537439654, |
|
"loss": 0.7421, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.9469026548672566, |
|
"grad_norm": 0.8436943888664246, |
|
"learning_rate": 0.0001860328525406192, |
|
"loss": 0.8123, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.9595448798988622, |
|
"grad_norm": 0.9509057998657227, |
|
"learning_rate": 0.00018583081871843585, |
|
"loss": 0.785, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.9721871049304678, |
|
"grad_norm": 1.5439331531524658, |
|
"learning_rate": 0.00018562749856267495, |
|
"loss": 0.7564, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.9848293299620732, |
|
"grad_norm": 1.1488640308380127, |
|
"learning_rate": 0.00018542289675780208, |
|
"loss": 0.7905, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.997471554993679, |
|
"grad_norm": 0.8320059776306152, |
|
"learning_rate": 0.00018521701801781172, |
|
"loss": 0.7636, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.0101137800252844, |
|
"grad_norm": 24.31561851501465, |
|
"learning_rate": 0.00018500986708611868, |
|
"loss": 0.7004, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.02275600505689, |
|
"grad_norm": 0.9889429211616516, |
|
"learning_rate": 0.00018480144873544898, |
|
"loss": 0.6018, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.0353982300884956, |
|
"grad_norm": 0.852366030216217, |
|
"learning_rate": 0.0001845917677677298, |
|
"loss": 0.6404, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.048040455120101, |
|
"grad_norm": 0.8665163516998291, |
|
"learning_rate": 0.00018438082901397866, |
|
"loss": 0.6277, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.060682680151707, |
|
"grad_norm": 0.959322452545166, |
|
"learning_rate": 0.00018416863733419246, |
|
"loss": 0.6274, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.0733249051833123, |
|
"grad_norm": 0.7421912550926208, |
|
"learning_rate": 0.0001839551976172352, |
|
"loss": 0.6483, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.0859671302149176, |
|
"grad_norm": 1.5782485008239746, |
|
"learning_rate": 0.0001837405147807256, |
|
"loss": 0.6964, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.0986093552465235, |
|
"grad_norm": 0.793574869632721, |
|
"learning_rate": 0.00018352459377092347, |
|
"loss": 0.6323, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.111251580278129, |
|
"grad_norm": 0.7756363153457642, |
|
"learning_rate": 0.00018330743956261616, |
|
"loss": 0.6988, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.1238938053097347, |
|
"grad_norm": 0.8382811546325684, |
|
"learning_rate": 0.0001830890571590036, |
|
"loss": 0.6159, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.13653603034134, |
|
"grad_norm": 1.7289704084396362, |
|
"learning_rate": 0.0001828694515915831, |
|
"loss": 0.655, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.1491782553729455, |
|
"grad_norm": 0.8287073373794556, |
|
"learning_rate": 0.00018264862792003367, |
|
"loss": 0.6869, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.1618204804045513, |
|
"grad_norm": 0.8839928507804871, |
|
"learning_rate": 0.00018242659123209905, |
|
"loss": 0.6807, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.1744627054361567, |
|
"grad_norm": 0.9569761753082275, |
|
"learning_rate": 0.0001822033466434708, |
|
"loss": 0.6826, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.187104930467762, |
|
"grad_norm": 1.1782281398773193, |
|
"learning_rate": 0.00018197889929767036, |
|
"loss": 0.6532, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.199747155499368, |
|
"grad_norm": 4.368149280548096, |
|
"learning_rate": 0.00018175325436593044, |
|
"loss": 0.6681, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.2123893805309733, |
|
"grad_norm": 0.9262805581092834, |
|
"learning_rate": 0.00018152641704707593, |
|
"loss": 0.6776, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.225031605562579, |
|
"grad_norm": 4.026210784912109, |
|
"learning_rate": 0.0001812983925674042, |
|
"loss": 0.6965, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.2376738305941846, |
|
"grad_norm": 0.9288873076438904, |
|
"learning_rate": 0.00018106918618056463, |
|
"loss": 0.7156, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.25031605562579, |
|
"grad_norm": 0.9781466126441956, |
|
"learning_rate": 0.00018083880316743757, |
|
"loss": 0.6843, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.262958280657396, |
|
"grad_norm": 0.8335726857185364, |
|
"learning_rate": 0.00018060724883601248, |
|
"loss": 0.6722, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.275600505689001, |
|
"grad_norm": 0.8793342709541321, |
|
"learning_rate": 0.00018037452852126613, |
|
"loss": 0.649, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.288242730720607, |
|
"grad_norm": 0.868864893913269, |
|
"learning_rate": 0.00018014064758503908, |
|
"loss": 0.6749, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.3008849557522124, |
|
"grad_norm": 0.8861690759658813, |
|
"learning_rate": 0.00017990561141591264, |
|
"loss": 0.6893, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.313527180783818, |
|
"grad_norm": 0.8054774403572083, |
|
"learning_rate": 0.00017966942542908435, |
|
"loss": 0.7254, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.3261694058154236, |
|
"grad_norm": 0.9192434549331665, |
|
"learning_rate": 0.0001794320950662435, |
|
"loss": 0.7071, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.338811630847029, |
|
"grad_norm": 1.0894279479980469, |
|
"learning_rate": 0.0001791936257954456, |
|
"loss": 0.6882, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.351453855878635, |
|
"grad_norm": 0.976393461227417, |
|
"learning_rate": 0.0001789540231109863, |
|
"loss": 0.6996, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.3640960809102403, |
|
"grad_norm": 1.0295621156692505, |
|
"learning_rate": 0.0001787132925332751, |
|
"loss": 0.7212, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.3767383059418457, |
|
"grad_norm": 0.9011755585670471, |
|
"learning_rate": 0.00017847143960870792, |
|
"loss": 0.6803, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.3893805309734515, |
|
"grad_norm": 0.9422768354415894, |
|
"learning_rate": 0.00017822846990953942, |
|
"loss": 0.7172, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.402022756005057, |
|
"grad_norm": 0.976975679397583, |
|
"learning_rate": 0.00017798438903375452, |
|
"loss": 0.6627, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.4146649810366623, |
|
"grad_norm": 0.8325662016868591, |
|
"learning_rate": 0.00017773920260493942, |
|
"loss": 0.6819, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.427307206068268, |
|
"grad_norm": 0.9316614866256714, |
|
"learning_rate": 0.00017749291627215224, |
|
"loss": 0.6842, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.4399494310998735, |
|
"grad_norm": 0.8595056533813477, |
|
"learning_rate": 0.0001772455357097927, |
|
"loss": 0.7084, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.452591656131479, |
|
"grad_norm": 0.834000825881958, |
|
"learning_rate": 0.00017699706661747125, |
|
"loss": 0.6951, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.4652338811630847, |
|
"grad_norm": 0.7746726274490356, |
|
"learning_rate": 0.0001767475147198781, |
|
"loss": 0.7076, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.47787610619469, |
|
"grad_norm": 5.648841857910156, |
|
"learning_rate": 0.00017649688576665094, |
|
"loss": 0.6874, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.490518331226296, |
|
"grad_norm": 0.8709747195243835, |
|
"learning_rate": 0.00017624518553224295, |
|
"loss": 0.7033, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.5031605562579013, |
|
"grad_norm": 1.2027637958526611, |
|
"learning_rate": 0.00017599241981578904, |
|
"loss": 0.6945, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.5158027812895067, |
|
"grad_norm": 0.866089403629303, |
|
"learning_rate": 0.00017573859444097308, |
|
"loss": 0.6611, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.5284450063211126, |
|
"grad_norm": 2.6877481937408447, |
|
"learning_rate": 0.00017548371525589302, |
|
"loss": 0.6922, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.541087231352718, |
|
"grad_norm": 1.8271033763885498, |
|
"learning_rate": 0.0001752277881329266, |
|
"loss": 0.7011, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.553729456384324, |
|
"grad_norm": 3.121169328689575, |
|
"learning_rate": 0.0001749708189685958, |
|
"loss": 0.7012, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.566371681415929, |
|
"grad_norm": 1.5094399452209473, |
|
"learning_rate": 0.00017471281368343114, |
|
"loss": 0.6682, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.5790139064475346, |
|
"grad_norm": 1.1823444366455078, |
|
"learning_rate": 0.00017445377822183518, |
|
"loss": 0.6828, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.5916561314791404, |
|
"grad_norm": 2.187333106994629, |
|
"learning_rate": 0.00017419371855194551, |
|
"loss": 0.65, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.604298356510746, |
|
"grad_norm": 1.0692399740219116, |
|
"learning_rate": 0.00017393264066549753, |
|
"loss": 0.6652, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.6169405815423517, |
|
"grad_norm": 0.8324422240257263, |
|
"learning_rate": 0.00017367055057768588, |
|
"loss": 0.6999, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.629582806573957, |
|
"grad_norm": 0.9880168437957764, |
|
"learning_rate": 0.00017340745432702654, |
|
"loss": 0.6859, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.6422250316055624, |
|
"grad_norm": 2.551191568374634, |
|
"learning_rate": 0.00017314335797521705, |
|
"loss": 0.6948, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.6548672566371683, |
|
"grad_norm": 0.9405047297477722, |
|
"learning_rate": 0.0001728782676069972, |
|
"loss": 0.6906, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.6675094816687737, |
|
"grad_norm": 5.015996932983398, |
|
"learning_rate": 0.00017261218933000878, |
|
"loss": 0.6867, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.6801517067003795, |
|
"grad_norm": 0.932569682598114, |
|
"learning_rate": 0.00017234512927465488, |
|
"loss": 0.7304, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.692793931731985, |
|
"grad_norm": 1.071932315826416, |
|
"learning_rate": 0.0001720770935939586, |
|
"loss": 0.7261, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.7054361567635903, |
|
"grad_norm": 0.8238343596458435, |
|
"learning_rate": 0.00017180808846342118, |
|
"loss": 0.7313, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.718078381795196, |
|
"grad_norm": 1.3495972156524658, |
|
"learning_rate": 0.0001715381200808801, |
|
"loss": 0.7418, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.7307206068268015, |
|
"grad_norm": 0.8959026336669922, |
|
"learning_rate": 0.00017126719466636572, |
|
"loss": 0.6729, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.7433628318584073, |
|
"grad_norm": 0.8978679180145264, |
|
"learning_rate": 0.0001709953184619585, |
|
"loss": 0.7, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.7560050568900127, |
|
"grad_norm": 1.033858060836792, |
|
"learning_rate": 0.00017072249773164485, |
|
"loss": 0.7142, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.768647281921618, |
|
"grad_norm": 0.9381289482116699, |
|
"learning_rate": 0.0001704487387611729, |
|
"loss": 0.7362, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.7812895069532235, |
|
"grad_norm": 1.0184166431427002, |
|
"learning_rate": 0.00017017404785790773, |
|
"loss": 0.7133, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.7939317319848294, |
|
"grad_norm": 0.9085473418235779, |
|
"learning_rate": 0.00016989843135068605, |
|
"loss": 0.6982, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.8065739570164348, |
|
"grad_norm": 0.8378614783287048, |
|
"learning_rate": 0.00016962189558967022, |
|
"loss": 0.6794, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.8192161820480406, |
|
"grad_norm": 0.9050717949867249, |
|
"learning_rate": 0.00016934444694620217, |
|
"loss": 0.6967, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.831858407079646, |
|
"grad_norm": 0.8742629289627075, |
|
"learning_rate": 0.00016906609181265654, |
|
"loss": 0.679, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.8445006321112514, |
|
"grad_norm": 1.250222086906433, |
|
"learning_rate": 0.0001687868366022932, |
|
"loss": 0.6866, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.7830986380577087, |
|
"learning_rate": 0.0001685066877491098, |
|
"loss": 0.7064, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.8697850821744626, |
|
"grad_norm": 0.837334394454956, |
|
"learning_rate": 0.0001682256517076933, |
|
"loss": 0.676, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.8824273072060684, |
|
"grad_norm": 1.7227693796157837, |
|
"learning_rate": 0.00016794373495307148, |
|
"loss": 0.6901, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.895069532237674, |
|
"grad_norm": 0.7620822191238403, |
|
"learning_rate": 0.00016766094398056337, |
|
"loss": 0.687, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.907711757269279, |
|
"grad_norm": 0.8214982748031616, |
|
"learning_rate": 0.00016737728530563013, |
|
"loss": 0.7061, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.920353982300885, |
|
"grad_norm": 0.9066684246063232, |
|
"learning_rate": 0.00016709276546372448, |
|
"loss": 0.7271, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.9329962073324904, |
|
"grad_norm": 0.9356798529624939, |
|
"learning_rate": 0.00016680739101014024, |
|
"loss": 0.6965, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.9456384323640963, |
|
"grad_norm": 0.8414567112922668, |
|
"learning_rate": 0.0001665211685198616, |
|
"loss": 0.6829, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.9582806573957017, |
|
"grad_norm": 0.9581737518310547, |
|
"learning_rate": 0.0001662341045874111, |
|
"loss": 0.6781, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.970922882427307, |
|
"grad_norm": 0.7672229409217834, |
|
"learning_rate": 0.0001659462058266982, |
|
"loss": 0.7107, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.983565107458913, |
|
"grad_norm": 0.8876848816871643, |
|
"learning_rate": 0.0001656574788708665, |
|
"loss": 0.682, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.9962073324905183, |
|
"grad_norm": 0.7291796207427979, |
|
"learning_rate": 0.00016536793037214134, |
|
"loss": 0.7012, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.0088495575221237, |
|
"grad_norm": 1.178667426109314, |
|
"learning_rate": 0.00016507756700167588, |
|
"loss": 0.5861, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.0214917825537295, |
|
"grad_norm": 1.044280767440796, |
|
"learning_rate": 0.00016478639544939826, |
|
"loss": 0.5248, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.034134007585335, |
|
"grad_norm": 0.8499409556388855, |
|
"learning_rate": 0.00016449442242385672, |
|
"loss": 0.5314, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.0467762326169407, |
|
"grad_norm": 0.8145996332168579, |
|
"learning_rate": 0.00016420165465206535, |
|
"loss": 0.5681, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.059418457648546, |
|
"grad_norm": 0.8090763688087463, |
|
"learning_rate": 0.00016390809887934914, |
|
"loss": 0.4982, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.0720606826801515, |
|
"grad_norm": 0.7884716391563416, |
|
"learning_rate": 0.00016361376186918846, |
|
"loss": 0.5338, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.0847029077117574, |
|
"grad_norm": 1.035247564315796, |
|
"learning_rate": 0.00016331865040306335, |
|
"loss": 0.521, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.0973451327433628, |
|
"grad_norm": 1.029201865196228, |
|
"learning_rate": 0.00016302277128029706, |
|
"loss": 0.5391, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.1099873577749686, |
|
"grad_norm": 0.8100953102111816, |
|
"learning_rate": 0.00016272613131789964, |
|
"loss": 0.5141, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.122629582806574, |
|
"grad_norm": 1.0345860719680786, |
|
"learning_rate": 0.0001624287373504107, |
|
"loss": 0.5576, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.1352718078381794, |
|
"grad_norm": 0.9381860494613647, |
|
"learning_rate": 0.00016213059622974214, |
|
"loss": 0.5373, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.147914032869785, |
|
"grad_norm": 0.8504341244697571, |
|
"learning_rate": 0.00016183171482502003, |
|
"loss": 0.5312, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.1605562579013906, |
|
"grad_norm": 1.0047380924224854, |
|
"learning_rate": 0.00016153210002242644, |
|
"loss": 0.5515, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.173198482932996, |
|
"grad_norm": 0.8505437970161438, |
|
"learning_rate": 0.00016123175872504098, |
|
"loss": 0.5257, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.185840707964602, |
|
"grad_norm": 1.0271879434585571, |
|
"learning_rate": 0.00016093069785268137, |
|
"loss": 0.5785, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.1984829329962072, |
|
"grad_norm": 1.0047165155410767, |
|
"learning_rate": 0.00016062892434174443, |
|
"loss": 0.5373, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.211125158027813, |
|
"grad_norm": 0.9564666152000427, |
|
"learning_rate": 0.00016032644514504604, |
|
"loss": 0.5285, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.2237673830594185, |
|
"grad_norm": 0.959581732749939, |
|
"learning_rate": 0.00016002326723166084, |
|
"loss": 0.5813, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.236409608091024, |
|
"grad_norm": 1.3242567777633667, |
|
"learning_rate": 0.00015971939758676186, |
|
"loss": 0.5669, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.2490518331226297, |
|
"grad_norm": 0.9959767460823059, |
|
"learning_rate": 0.00015941484321145953, |
|
"loss": 0.5766, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.261694058154235, |
|
"grad_norm": 0.8573315739631653, |
|
"learning_rate": 0.0001591096111226405, |
|
"loss": 0.5421, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.274336283185841, |
|
"grad_norm": 0.8555790781974792, |
|
"learning_rate": 0.00015880370835280553, |
|
"loss": 0.5606, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.2869785082174463, |
|
"grad_norm": 1.0024107694625854, |
|
"learning_rate": 0.00015849714194990803, |
|
"loss": 0.5406, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.2996207332490517, |
|
"grad_norm": 0.867758572101593, |
|
"learning_rate": 0.00015818991897719134, |
|
"loss": 0.5825, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.3122629582806575, |
|
"grad_norm": 0.828178346157074, |
|
"learning_rate": 0.00015788204651302602, |
|
"loss": 0.5528, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.324905183312263, |
|
"grad_norm": 0.9778569936752319, |
|
"learning_rate": 0.00015757353165074685, |
|
"loss": 0.5857, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.3375474083438688, |
|
"grad_norm": 0.9606329798698425, |
|
"learning_rate": 0.0001572643814984894, |
|
"loss": 0.6056, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.350189633375474, |
|
"grad_norm": 0.8577843308448792, |
|
"learning_rate": 0.00015695460317902615, |
|
"loss": 0.6096, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.3628318584070795, |
|
"grad_norm": 0.8798738718032837, |
|
"learning_rate": 0.00015664420382960256, |
|
"loss": 0.5979, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 3.3754740834386854, |
|
"grad_norm": 0.887492835521698, |
|
"learning_rate": 0.00015633319060177233, |
|
"loss": 0.5962, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 3.3881163084702908, |
|
"grad_norm": 0.8709145784378052, |
|
"learning_rate": 0.00015602157066123311, |
|
"loss": 0.5647, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.400758533501896, |
|
"grad_norm": 0.8413789868354797, |
|
"learning_rate": 0.00015570935118766087, |
|
"loss": 0.5846, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 3.413400758533502, |
|
"grad_norm": 0.9737523198127747, |
|
"learning_rate": 0.00015539653937454487, |
|
"loss": 0.5963, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.4260429835651074, |
|
"grad_norm": 1.0053389072418213, |
|
"learning_rate": 0.00015508314242902173, |
|
"loss": 0.599, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.438685208596713, |
|
"grad_norm": 0.9921556115150452, |
|
"learning_rate": 0.00015476916757170943, |
|
"loss": 0.5698, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.4513274336283186, |
|
"grad_norm": 0.9468759298324585, |
|
"learning_rate": 0.00015445462203654098, |
|
"loss": 0.5886, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.463969658659924, |
|
"grad_norm": 0.9463483095169067, |
|
"learning_rate": 0.0001541395130705977, |
|
"loss": 0.5829, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.47661188369153, |
|
"grad_norm": 0.9554671049118042, |
|
"learning_rate": 0.00015382384793394223, |
|
"loss": 0.6186, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.4892541087231352, |
|
"grad_norm": 0.7925019860267639, |
|
"learning_rate": 0.0001535076338994514, |
|
"loss": 0.5796, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.5018963337547406, |
|
"grad_norm": 0.92326819896698, |
|
"learning_rate": 0.00015319087825264846, |
|
"loss": 0.5647, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.5145385587863465, |
|
"grad_norm": 0.9871057868003845, |
|
"learning_rate": 0.0001528735882915354, |
|
"loss": 0.5622, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.527180783817952, |
|
"grad_norm": 0.9997586607933044, |
|
"learning_rate": 0.00015255577132642468, |
|
"loss": 0.629, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.5398230088495577, |
|
"grad_norm": 0.8749852180480957, |
|
"learning_rate": 0.00015223743467977088, |
|
"loss": 0.5883, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.552465233881163, |
|
"grad_norm": 0.8085633516311646, |
|
"learning_rate": 0.00015191858568600194, |
|
"loss": 0.5713, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.5651074589127685, |
|
"grad_norm": 0.951021134853363, |
|
"learning_rate": 0.00015159923169135025, |
|
"loss": 0.5965, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.5777496839443743, |
|
"grad_norm": 0.9590179324150085, |
|
"learning_rate": 0.00015127938005368323, |
|
"loss": 0.5678, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.5903919089759797, |
|
"grad_norm": 0.9921982884407043, |
|
"learning_rate": 0.0001509590381423341, |
|
"loss": 0.6115, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.6030341340075855, |
|
"grad_norm": 0.8661071062088013, |
|
"learning_rate": 0.00015063821333793172, |
|
"loss": 0.6495, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.615676359039191, |
|
"grad_norm": 0.8504185080528259, |
|
"learning_rate": 0.00015031691303223088, |
|
"loss": 0.5922, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.6283185840707963, |
|
"grad_norm": 0.8301743865013123, |
|
"learning_rate": 0.00014999514462794175, |
|
"loss": 0.6227, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.640960809102402, |
|
"grad_norm": 0.8586485385894775, |
|
"learning_rate": 0.0001496729155385595, |
|
"loss": 0.5801, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.6536030341340076, |
|
"grad_norm": 0.8772161602973938, |
|
"learning_rate": 0.00014935023318819334, |
|
"loss": 0.5712, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.6662452591656134, |
|
"grad_norm": 0.8610823750495911, |
|
"learning_rate": 0.00014902710501139556, |
|
"loss": 0.6007, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.678887484197219, |
|
"grad_norm": 0.8283450603485107, |
|
"learning_rate": 0.0001487035384529903, |
|
"loss": 0.5757, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.691529709228824, |
|
"grad_norm": 0.9658201336860657, |
|
"learning_rate": 0.00014837954096790182, |
|
"loss": 0.5899, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.7041719342604296, |
|
"grad_norm": 0.8245115280151367, |
|
"learning_rate": 0.000148055120020983, |
|
"loss": 0.6165, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.7168141592920354, |
|
"grad_norm": 0.9648094177246094, |
|
"learning_rate": 0.00014773028308684308, |
|
"loss": 0.6212, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.7294563843236412, |
|
"grad_norm": 0.8854801654815674, |
|
"learning_rate": 0.00014740503764967572, |
|
"loss": 0.5777, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.7420986093552466, |
|
"grad_norm": 0.8945504426956177, |
|
"learning_rate": 0.0001470793912030863, |
|
"loss": 0.6091, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 3.754740834386852, |
|
"grad_norm": 0.8189816474914551, |
|
"learning_rate": 0.00014675335124991946, |
|
"loss": 0.6035, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.7673830594184574, |
|
"grad_norm": 0.990737795829773, |
|
"learning_rate": 0.0001464269253020862, |
|
"loss": 0.5983, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 3.7800252844500632, |
|
"grad_norm": 0.8247061371803284, |
|
"learning_rate": 0.00014610012088039077, |
|
"loss": 0.6056, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 3.7926675094816686, |
|
"grad_norm": 0.8422549962997437, |
|
"learning_rate": 0.00014577294551435728, |
|
"loss": 0.6077, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.8053097345132745, |
|
"grad_norm": 0.9468559622764587, |
|
"learning_rate": 0.00014544540674205647, |
|
"loss": 0.592, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 3.81795195954488, |
|
"grad_norm": 0.8015314340591431, |
|
"learning_rate": 0.0001451175121099319, |
|
"loss": 0.5701, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 3.8305941845764853, |
|
"grad_norm": 0.896016001701355, |
|
"learning_rate": 0.00014478926917262607, |
|
"loss": 0.5985, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 3.843236409608091, |
|
"grad_norm": 0.965329110622406, |
|
"learning_rate": 0.00014446068549280633, |
|
"loss": 0.5693, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 3.8558786346396965, |
|
"grad_norm": 1.032674789428711, |
|
"learning_rate": 0.0001441317686409907, |
|
"loss": 0.6207, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.8685208596713023, |
|
"grad_norm": 1.4689821004867554, |
|
"learning_rate": 0.00014380252619537355, |
|
"loss": 0.6192, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 3.8811630847029077, |
|
"grad_norm": 0.9344895482063293, |
|
"learning_rate": 0.00014347296574165067, |
|
"loss": 0.5951, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 3.893805309734513, |
|
"grad_norm": 0.9095802903175354, |
|
"learning_rate": 0.00014314309487284486, |
|
"loss": 0.609, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 3.906447534766119, |
|
"grad_norm": 0.9843701720237732, |
|
"learning_rate": 0.00014281292118913084, |
|
"loss": 0.6107, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 3.9190897597977243, |
|
"grad_norm": 0.8768340349197388, |
|
"learning_rate": 0.00014248245229766005, |
|
"loss": 0.6268, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.93173198482933, |
|
"grad_norm": 0.9411798715591431, |
|
"learning_rate": 0.00014215169581238558, |
|
"loss": 0.6191, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 3.9443742098609356, |
|
"grad_norm": 0.8732224106788635, |
|
"learning_rate": 0.0001418206593538865, |
|
"loss": 0.614, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 3.957016434892541, |
|
"grad_norm": 0.8646383285522461, |
|
"learning_rate": 0.00014148935054919258, |
|
"loss": 0.6135, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 3.969658659924147, |
|
"grad_norm": 0.8871061205863953, |
|
"learning_rate": 0.00014115777703160824, |
|
"loss": 0.5987, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 3.982300884955752, |
|
"grad_norm": 0.7898637652397156, |
|
"learning_rate": 0.00014082594644053702, |
|
"loss": 0.6069, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 3.994943109987358, |
|
"grad_norm": 0.8474721908569336, |
|
"learning_rate": 0.00014049386642130522, |
|
"loss": 0.5762, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 4.007585335018963, |
|
"grad_norm": 1.1347519159317017, |
|
"learning_rate": 0.0001401615446249861, |
|
"loss": 0.4878, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 4.020227560050569, |
|
"grad_norm": 1.0003758668899536, |
|
"learning_rate": 0.00013982898870822322, |
|
"loss": 0.4266, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 4.032869785082174, |
|
"grad_norm": 1.7353389263153076, |
|
"learning_rate": 0.00013949620633305445, |
|
"loss": 0.4278, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 4.04551201011378, |
|
"grad_norm": 0.8438617587089539, |
|
"learning_rate": 0.00013916320516673512, |
|
"loss": 0.4255, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.058154235145386, |
|
"grad_norm": 0.9081391096115112, |
|
"learning_rate": 0.00013882999288156145, |
|
"loss": 0.4332, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 4.070796460176991, |
|
"grad_norm": 0.8712509274482727, |
|
"learning_rate": 0.00013849657715469385, |
|
"loss": 0.4263, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.083438685208597, |
|
"grad_norm": 0.8926701545715332, |
|
"learning_rate": 0.00013816296566798006, |
|
"loss": 0.4265, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 4.096080910240202, |
|
"grad_norm": 1.0100903511047363, |
|
"learning_rate": 0.00013782916610777793, |
|
"loss": 0.4601, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 4.108723135271807, |
|
"grad_norm": 0.9108811616897583, |
|
"learning_rate": 0.00013749518616477867, |
|
"loss": 0.4426, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.121365360303414, |
|
"grad_norm": 1.0556674003601074, |
|
"learning_rate": 0.00013716103353382937, |
|
"loss": 0.4641, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 4.134007585335019, |
|
"grad_norm": 0.8797064423561096, |
|
"learning_rate": 0.0001368267159137559, |
|
"loss": 0.4522, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 4.1466498103666245, |
|
"grad_norm": 0.9286285042762756, |
|
"learning_rate": 0.0001364922410071853, |
|
"loss": 0.4684, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.15929203539823, |
|
"grad_norm": 0.9558693170547485, |
|
"learning_rate": 0.00013615761652036872, |
|
"loss": 0.4597, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 4.171934260429835, |
|
"grad_norm": 0.8957265615463257, |
|
"learning_rate": 0.00013582285016300338, |
|
"loss": 0.5033, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.184576485461442, |
|
"grad_norm": 0.8720874786376953, |
|
"learning_rate": 0.00013548794964805531, |
|
"loss": 0.4636, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 4.197218710493047, |
|
"grad_norm": 0.9207468628883362, |
|
"learning_rate": 0.0001351529226915815, |
|
"loss": 0.4555, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 4.209860935524652, |
|
"grad_norm": 0.8886120319366455, |
|
"learning_rate": 0.000134817777012552, |
|
"loss": 0.4391, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 4.222503160556258, |
|
"grad_norm": 0.9986599087715149, |
|
"learning_rate": 0.00013448252033267246, |
|
"loss": 0.4848, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.235145385587863, |
|
"grad_norm": 3.081392288208008, |
|
"learning_rate": 0.0001341471603762057, |
|
"loss": 0.5096, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.247787610619469, |
|
"grad_norm": 1.0110422372817993, |
|
"learning_rate": 0.00013381170486979427, |
|
"loss": 0.4758, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 4.260429835651075, |
|
"grad_norm": 0.9332578182220459, |
|
"learning_rate": 0.00013347616154228193, |
|
"loss": 0.4607, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 4.27307206068268, |
|
"grad_norm": 1.1386651992797852, |
|
"learning_rate": 0.00013314053812453605, |
|
"loss": 0.4882, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.8812234401702881, |
|
"learning_rate": 0.0001328048423492691, |
|
"loss": 0.454, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 4.298356510745891, |
|
"grad_norm": 0.9429104328155518, |
|
"learning_rate": 0.00013246908195086072, |
|
"loss": 0.4724, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.310998735777497, |
|
"grad_norm": 0.9410486817359924, |
|
"learning_rate": 0.0001321332646651795, |
|
"loss": 0.4516, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 4.323640960809103, |
|
"grad_norm": 0.9896162748336792, |
|
"learning_rate": 0.00013179739822940454, |
|
"loss": 0.4949, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 4.336283185840708, |
|
"grad_norm": 0.9165130853652954, |
|
"learning_rate": 0.00013146149038184768, |
|
"loss": 0.487, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 4.348925410872313, |
|
"grad_norm": 2.110687494277954, |
|
"learning_rate": 0.00013112554886177447, |
|
"loss": 0.5062, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 4.361567635903919, |
|
"grad_norm": 0.8859379887580872, |
|
"learning_rate": 0.0001307895814092266, |
|
"loss": 0.4587, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.374209860935524, |
|
"grad_norm": 1.0231775045394897, |
|
"learning_rate": 0.00013045359576484305, |
|
"loss": 0.5083, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 4.3868520859671305, |
|
"grad_norm": 1.0273702144622803, |
|
"learning_rate": 0.00013011759966968204, |
|
"loss": 0.4849, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 4.399494310998736, |
|
"grad_norm": 0.9449805617332458, |
|
"learning_rate": 0.0001297816008650425, |
|
"loss": 0.493, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 4.412136536030341, |
|
"grad_norm": 0.8178017735481262, |
|
"learning_rate": 0.00012944560709228587, |
|
"loss": 0.464, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 4.424778761061947, |
|
"grad_norm": 1.0193867683410645, |
|
"learning_rate": 0.00012910962609265754, |
|
"loss": 0.4721, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.437420986093552, |
|
"grad_norm": 1.1380479335784912, |
|
"learning_rate": 0.00012877366560710868, |
|
"loss": 0.4589, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 4.450063211125158, |
|
"grad_norm": 0.8772681951522827, |
|
"learning_rate": 0.00012843773337611788, |
|
"loss": 0.4642, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 4.462705436156764, |
|
"grad_norm": 0.9058607220649719, |
|
"learning_rate": 0.00012810183713951264, |
|
"loss": 0.5033, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 4.475347661188369, |
|
"grad_norm": 0.938266932964325, |
|
"learning_rate": 0.00012776598463629118, |
|
"loss": 0.5098, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 4.4879898862199745, |
|
"grad_norm": 1.0325732231140137, |
|
"learning_rate": 0.00012743018360444422, |
|
"loss": 0.4833, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 4.50063211125158, |
|
"grad_norm": 0.8300301432609558, |
|
"learning_rate": 0.0001270944417807763, |
|
"loss": 0.4815, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 4.513274336283186, |
|
"grad_norm": 0.941461443901062, |
|
"learning_rate": 0.00012675876690072823, |
|
"loss": 0.4942, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 4.525916561314792, |
|
"grad_norm": 0.8629696369171143, |
|
"learning_rate": 0.00012642316669819812, |
|
"loss": 0.5091, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 4.538558786346397, |
|
"grad_norm": 0.9793810844421387, |
|
"learning_rate": 0.0001260876489053636, |
|
"loss": 0.52, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 4.551201011378002, |
|
"grad_norm": 0.9196791052818298, |
|
"learning_rate": 0.00012575222125250365, |
|
"loss": 0.4884, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.563843236409608, |
|
"grad_norm": 1.0433666706085205, |
|
"learning_rate": 0.00012541689146782048, |
|
"loss": 0.5041, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 4.576485461441214, |
|
"grad_norm": 1.0952868461608887, |
|
"learning_rate": 0.00012508166727726128, |
|
"loss": 0.5117, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 4.589127686472819, |
|
"grad_norm": 1.039157748222351, |
|
"learning_rate": 0.00012474655640434042, |
|
"loss": 0.5028, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 4.601769911504425, |
|
"grad_norm": 1.044838786125183, |
|
"learning_rate": 0.00012441156656996155, |
|
"loss": 0.4941, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 4.61441213653603, |
|
"grad_norm": 1.0558874607086182, |
|
"learning_rate": 0.00012407670549223953, |
|
"loss": 0.516, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 4.627054361567636, |
|
"grad_norm": 0.9311762452125549, |
|
"learning_rate": 0.0001237419808863227, |
|
"loss": 0.4933, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 4.639696586599241, |
|
"grad_norm": 1.0576010942459106, |
|
"learning_rate": 0.00012340740046421506, |
|
"loss": 0.5119, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 4.652338811630847, |
|
"grad_norm": 0.9502875208854675, |
|
"learning_rate": 0.0001230729719345987, |
|
"loss": 0.4875, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 4.664981036662453, |
|
"grad_norm": 0.9513876438140869, |
|
"learning_rate": 0.00012273870300265612, |
|
"loss": 0.4836, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 4.677623261694058, |
|
"grad_norm": 1.0516324043273926, |
|
"learning_rate": 0.00012240460136989274, |
|
"loss": 0.5168, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.6902654867256635, |
|
"grad_norm": 0.9066925644874573, |
|
"learning_rate": 0.00012207067473395935, |
|
"loss": 0.4947, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 4.70290771175727, |
|
"grad_norm": 0.9543781876564026, |
|
"learning_rate": 0.00012173693078847487, |
|
"loss": 0.5155, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 4.715549936788875, |
|
"grad_norm": 0.9955562949180603, |
|
"learning_rate": 0.00012140337722284914, |
|
"loss": 0.5302, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 4.7281921618204805, |
|
"grad_norm": 4.362971305847168, |
|
"learning_rate": 0.00012107002172210559, |
|
"loss": 0.5438, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 4.740834386852086, |
|
"grad_norm": 1.0576658248901367, |
|
"learning_rate": 0.00012073687196670429, |
|
"loss": 0.536, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 4.753476611883691, |
|
"grad_norm": 0.946419894695282, |
|
"learning_rate": 0.00012040393563236494, |
|
"loss": 0.5253, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 4.766118836915297, |
|
"grad_norm": 0.9340927004814148, |
|
"learning_rate": 0.00012007122038989012, |
|
"loss": 0.5117, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 4.778761061946903, |
|
"grad_norm": 0.9391945600509644, |
|
"learning_rate": 0.00011973873390498841, |
|
"loss": 0.5132, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 4.791403286978508, |
|
"grad_norm": 0.9951459169387817, |
|
"learning_rate": 0.00011940648383809794, |
|
"loss": 0.5356, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 4.804045512010114, |
|
"grad_norm": 1.0087045431137085, |
|
"learning_rate": 0.00011907447784420974, |
|
"loss": 0.4949, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.816687737041719, |
|
"grad_norm": 1.0418733358383179, |
|
"learning_rate": 0.00011874272357269138, |
|
"loss": 0.5044, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 4.8293299620733245, |
|
"grad_norm": 0.9647939801216125, |
|
"learning_rate": 0.0001184112286671109, |
|
"loss": 0.519, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 4.841972187104931, |
|
"grad_norm": 0.9896367788314819, |
|
"learning_rate": 0.00011808000076506056, |
|
"loss": 0.5376, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 4.854614412136536, |
|
"grad_norm": 1.1160699129104614, |
|
"learning_rate": 0.00011774904749798086, |
|
"loss": 0.4941, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.867256637168142, |
|
"grad_norm": 0.9226526021957397, |
|
"learning_rate": 0.00011741837649098477, |
|
"loss": 0.5044, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 4.879898862199747, |
|
"grad_norm": 0.959432065486908, |
|
"learning_rate": 0.00011708799536268202, |
|
"loss": 0.5051, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 4.892541087231352, |
|
"grad_norm": 0.8908069729804993, |
|
"learning_rate": 0.0001167579117250036, |
|
"loss": 0.5226, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 4.905183312262958, |
|
"grad_norm": 0.8914538025856018, |
|
"learning_rate": 0.00011642813318302639, |
|
"loss": 0.4971, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 4.917825537294564, |
|
"grad_norm": 0.940838098526001, |
|
"learning_rate": 0.00011609866733479784, |
|
"loss": 0.5349, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 4.9304677623261695, |
|
"grad_norm": 0.9459583759307861, |
|
"learning_rate": 0.00011576952177116095, |
|
"loss": 0.5137, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.943109987357775, |
|
"grad_norm": 0.988993227481842, |
|
"learning_rate": 0.00011544070407557961, |
|
"loss": 0.5061, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 4.95575221238938, |
|
"grad_norm": 0.8528466820716858, |
|
"learning_rate": 0.00011511222182396349, |
|
"loss": 0.4997, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 4.9683944374209865, |
|
"grad_norm": 0.9346151351928711, |
|
"learning_rate": 0.00011478408258449373, |
|
"loss": 0.5347, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 4.981036662452592, |
|
"grad_norm": 0.9937970638275146, |
|
"learning_rate": 0.00011445629391744854, |
|
"loss": 0.5138, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 4.993678887484197, |
|
"grad_norm": 1.021466612815857, |
|
"learning_rate": 0.00011412886337502894, |
|
"loss": 0.4953, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 5.006321112515803, |
|
"grad_norm": 0.8485009074211121, |
|
"learning_rate": 0.00011380179850118495, |
|
"loss": 0.4504, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 5.018963337547408, |
|
"grad_norm": 0.9451215267181396, |
|
"learning_rate": 0.00011347510683144151, |
|
"loss": 0.3505, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 5.0316055625790135, |
|
"grad_norm": 0.9910890460014343, |
|
"learning_rate": 0.00011314879589272505, |
|
"loss": 0.3889, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 5.04424778761062, |
|
"grad_norm": 1.070092797279358, |
|
"learning_rate": 0.00011282287320318996, |
|
"loss": 0.3514, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 5.056890012642225, |
|
"grad_norm": 0.9985383749008179, |
|
"learning_rate": 0.0001124973462720455, |
|
"loss": 0.3563, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0695322376738305, |
|
"grad_norm": 0.8897594213485718, |
|
"learning_rate": 0.00011217222259938272, |
|
"loss": 0.3402, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 5.082174462705436, |
|
"grad_norm": 0.981590211391449, |
|
"learning_rate": 0.00011184750967600157, |
|
"loss": 0.4163, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 5.094816687737041, |
|
"grad_norm": 0.8742545247077942, |
|
"learning_rate": 0.00011152321498323846, |
|
"loss": 0.3477, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 5.107458912768648, |
|
"grad_norm": 0.9774489402770996, |
|
"learning_rate": 0.0001111993459927938, |
|
"loss": 0.3722, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 5.120101137800253, |
|
"grad_norm": 0.9024301171302795, |
|
"learning_rate": 0.00011087591016656001, |
|
"loss": 0.3531, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 5.132743362831858, |
|
"grad_norm": 0.9952253103256226, |
|
"learning_rate": 0.00011055291495644926, |
|
"loss": 0.3762, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 5.145385587863464, |
|
"grad_norm": 0.9904897809028625, |
|
"learning_rate": 0.00011023036780422212, |
|
"loss": 0.4032, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 5.158027812895069, |
|
"grad_norm": 0.9370035529136658, |
|
"learning_rate": 0.00010990827614131594, |
|
"loss": 0.3717, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 5.1706700379266755, |
|
"grad_norm": 1.055816650390625, |
|
"learning_rate": 0.00010958664738867372, |
|
"loss": 0.3958, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 5.183312262958281, |
|
"grad_norm": 1.0066580772399902, |
|
"learning_rate": 0.00010926548895657303, |
|
"loss": 0.3793, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.195954487989886, |
|
"grad_norm": 1.0231560468673706, |
|
"learning_rate": 0.00010894480824445532, |
|
"loss": 0.3813, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 5.208596713021492, |
|
"grad_norm": 0.9747928977012634, |
|
"learning_rate": 0.00010862461264075542, |
|
"loss": 0.3594, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 5.221238938053097, |
|
"grad_norm": 1.0806195735931396, |
|
"learning_rate": 0.00010830490952273145, |
|
"loss": 0.3956, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 5.233881163084703, |
|
"grad_norm": 1.0321904420852661, |
|
"learning_rate": 0.00010798570625629461, |
|
"loss": 0.3585, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 5.246523388116309, |
|
"grad_norm": 1.2540595531463623, |
|
"learning_rate": 0.00010766701019583967, |
|
"loss": 0.391, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 5.259165613147914, |
|
"grad_norm": 1.035423994064331, |
|
"learning_rate": 0.00010734882868407537, |
|
"loss": 0.4028, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 5.2718078381795195, |
|
"grad_norm": 1.2022385597229004, |
|
"learning_rate": 0.00010703116905185541, |
|
"loss": 0.3841, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 5.284450063211125, |
|
"grad_norm": 1.045843482017517, |
|
"learning_rate": 0.00010671403861800946, |
|
"loss": 0.3939, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 5.29709228824273, |
|
"grad_norm": 0.9559326767921448, |
|
"learning_rate": 0.00010639744468917447, |
|
"loss": 0.3801, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 5.3097345132743365, |
|
"grad_norm": 1.033033847808838, |
|
"learning_rate": 0.0001060813945596265, |
|
"loss": 0.3846, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.322376738305942, |
|
"grad_norm": 0.9737249612808228, |
|
"learning_rate": 0.00010576589551111242, |
|
"loss": 0.39, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 5.335018963337547, |
|
"grad_norm": 1.0500105619430542, |
|
"learning_rate": 0.00010545095481268241, |
|
"loss": 0.3713, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 5.347661188369153, |
|
"grad_norm": 1.1261670589447021, |
|
"learning_rate": 0.00010513657972052228, |
|
"loss": 0.4112, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 5.360303413400759, |
|
"grad_norm": 0.9046671390533447, |
|
"learning_rate": 0.0001048227774777864, |
|
"loss": 0.3963, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 5.372945638432364, |
|
"grad_norm": 1.0187987089157104, |
|
"learning_rate": 0.00010450955531443067, |
|
"loss": 0.3954, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 5.38558786346397, |
|
"grad_norm": 0.9995326399803162, |
|
"learning_rate": 0.00010419692044704624, |
|
"loss": 0.3996, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 5.398230088495575, |
|
"grad_norm": 0.9701279997825623, |
|
"learning_rate": 0.00010388488007869282, |
|
"loss": 0.3805, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 5.410872313527181, |
|
"grad_norm": 0.9126356840133667, |
|
"learning_rate": 0.00010357344139873315, |
|
"loss": 0.3862, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 5.423514538558786, |
|
"grad_norm": 0.9048483371734619, |
|
"learning_rate": 0.00010326261158266701, |
|
"loss": 0.3767, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 5.436156763590392, |
|
"grad_norm": 0.9570040702819824, |
|
"learning_rate": 0.0001029523977919662, |
|
"loss": 0.3875, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.448798988621998, |
|
"grad_norm": 1.0698267221450806, |
|
"learning_rate": 0.00010264280717390927, |
|
"loss": 0.4159, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 5.461441213653603, |
|
"grad_norm": 1.03220796585083, |
|
"learning_rate": 0.00010233384686141701, |
|
"loss": 0.4062, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 5.474083438685208, |
|
"grad_norm": 0.9866275787353516, |
|
"learning_rate": 0.00010202552397288805, |
|
"loss": 0.4064, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 5.486725663716814, |
|
"grad_norm": 0.9090940356254578, |
|
"learning_rate": 0.00010171784561203485, |
|
"loss": 0.4178, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 5.49936788874842, |
|
"grad_norm": 1.0094218254089355, |
|
"learning_rate": 0.00010141081886772013, |
|
"loss": 0.4046, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 5.5120101137800255, |
|
"grad_norm": 0.9741319417953491, |
|
"learning_rate": 0.00010110445081379343, |
|
"loss": 0.3957, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 5.524652338811631, |
|
"grad_norm": 1.186471700668335, |
|
"learning_rate": 0.00010079874850892808, |
|
"loss": 0.4112, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 5.537294563843236, |
|
"grad_norm": 1.0046883821487427, |
|
"learning_rate": 0.00010049371899645874, |
|
"loss": 0.3976, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 5.549936788874842, |
|
"grad_norm": 2.301224946975708, |
|
"learning_rate": 0.00010018936930421907, |
|
"loss": 0.4381, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 5.562579013906447, |
|
"grad_norm": 1.1555812358856201, |
|
"learning_rate": 9.988570644437969e-05, |
|
"loss": 0.4139, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.575221238938053, |
|
"grad_norm": 1.0925663709640503, |
|
"learning_rate": 9.958273741328672e-05, |
|
"loss": 0.376, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 5.587863463969659, |
|
"grad_norm": 1.0395334959030151, |
|
"learning_rate": 9.928046919130056e-05, |
|
"loss": 0.4696, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 5.600505689001264, |
|
"grad_norm": 1.0506666898727417, |
|
"learning_rate": 9.897890874263518e-05, |
|
"loss": 0.4165, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 5.6131479140328695, |
|
"grad_norm": 0.9786500930786133, |
|
"learning_rate": 9.867806301519742e-05, |
|
"loss": 0.3949, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 5.625790139064476, |
|
"grad_norm": 1.0455806255340576, |
|
"learning_rate": 9.837793894042716e-05, |
|
"loss": 0.3976, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 5.638432364096081, |
|
"grad_norm": 0.9991239905357361, |
|
"learning_rate": 9.807854343313739e-05, |
|
"loss": 0.3862, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 5.651074589127687, |
|
"grad_norm": 1.0253965854644775, |
|
"learning_rate": 9.777988339135517e-05, |
|
"loss": 0.3859, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 5.663716814159292, |
|
"grad_norm": 0.9867163300514221, |
|
"learning_rate": 9.748196569616245e-05, |
|
"loss": 0.401, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 5.676359039190897, |
|
"grad_norm": 0.9973002672195435, |
|
"learning_rate": 9.718479721153764e-05, |
|
"loss": 0.4055, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 5.689001264222503, |
|
"grad_norm": 1.03886079788208, |
|
"learning_rate": 9.688838478419746e-05, |
|
"loss": 0.4031, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.701643489254109, |
|
"grad_norm": 1.1662676334381104, |
|
"learning_rate": 9.659273524343917e-05, |
|
"loss": 0.3998, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 0.9785062670707703, |
|
"learning_rate": 9.629785540098329e-05, |
|
"loss": 0.3925, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 5.72692793931732, |
|
"grad_norm": 1.0249117612838745, |
|
"learning_rate": 9.600375205081654e-05, |
|
"loss": 0.4195, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 5.739570164348925, |
|
"grad_norm": 1.0373821258544922, |
|
"learning_rate": 9.571043196903541e-05, |
|
"loss": 0.4197, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 5.752212389380531, |
|
"grad_norm": 0.9370099306106567, |
|
"learning_rate": 9.541790191368998e-05, |
|
"loss": 0.39, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 5.764854614412137, |
|
"grad_norm": 1.0252115726470947, |
|
"learning_rate": 9.512616862462831e-05, |
|
"loss": 0.408, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 5.777496839443742, |
|
"grad_norm": 1.033614158630371, |
|
"learning_rate": 9.483523882334102e-05, |
|
"loss": 0.4194, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 5.790139064475348, |
|
"grad_norm": 1.1127879619598389, |
|
"learning_rate": 9.454511921280651e-05, |
|
"loss": 0.4098, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 5.802781289506953, |
|
"grad_norm": 0.9151955246925354, |
|
"learning_rate": 9.425581647733652e-05, |
|
"loss": 0.4202, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 5.815423514538558, |
|
"grad_norm": 1.0775083303451538, |
|
"learning_rate": 9.396733728242207e-05, |
|
"loss": 0.4181, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.828065739570165, |
|
"grad_norm": 0.9415781497955322, |
|
"learning_rate": 9.367968827458003e-05, |
|
"loss": 0.4538, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 5.84070796460177, |
|
"grad_norm": 0.9953785538673401, |
|
"learning_rate": 9.339287608119976e-05, |
|
"loss": 0.4121, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 5.8533501896333755, |
|
"grad_norm": 1.0544629096984863, |
|
"learning_rate": 9.310690731039065e-05, |
|
"loss": 0.4025, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 5.865992414664981, |
|
"grad_norm": 0.9968181848526001, |
|
"learning_rate": 9.282178855082963e-05, |
|
"loss": 0.4179, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 5.878634639696586, |
|
"grad_norm": 0.9884583353996277, |
|
"learning_rate": 9.253752637160965e-05, |
|
"loss": 0.4345, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 5.891276864728193, |
|
"grad_norm": 1.0549771785736084, |
|
"learning_rate": 9.225412732208815e-05, |
|
"loss": 0.4171, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 5.903919089759798, |
|
"grad_norm": 1.058349847793579, |
|
"learning_rate": 9.19715979317361e-05, |
|
"loss": 0.3954, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 5.916561314791403, |
|
"grad_norm": 0.959523618221283, |
|
"learning_rate": 9.168994470998771e-05, |
|
"loss": 0.4078, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 5.929203539823009, |
|
"grad_norm": 1.0451573133468628, |
|
"learning_rate": 9.140917414609043e-05, |
|
"loss": 0.4477, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 5.941845764854614, |
|
"grad_norm": 1.0435268878936768, |
|
"learning_rate": 9.112929270895536e-05, |
|
"loss": 0.3955, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 5.9544879898862195, |
|
"grad_norm": 1.001197338104248, |
|
"learning_rate": 9.085030684700828e-05, |
|
"loss": 0.4086, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 5.967130214917826, |
|
"grad_norm": 1.0496070384979248, |
|
"learning_rate": 9.057222298804104e-05, |
|
"loss": 0.4342, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 5.979772439949431, |
|
"grad_norm": 0.955414891242981, |
|
"learning_rate": 9.029504753906348e-05, |
|
"loss": 0.4041, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 5.992414664981037, |
|
"grad_norm": 3.551063060760498, |
|
"learning_rate": 9.001878688615582e-05, |
|
"loss": 0.4304, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 6.005056890012642, |
|
"grad_norm": 0.8560709953308105, |
|
"learning_rate": 8.974344739432153e-05, |
|
"loss": 0.3485, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 6.017699115044247, |
|
"grad_norm": 1.048302412033081, |
|
"learning_rate": 8.946903540734064e-05, |
|
"loss": 0.2697, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 6.030341340075854, |
|
"grad_norm": 1.1160005331039429, |
|
"learning_rate": 8.919555724762359e-05, |
|
"loss": 0.2732, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 6.042983565107459, |
|
"grad_norm": 0.9840885400772095, |
|
"learning_rate": 8.892301921606567e-05, |
|
"loss": 0.2855, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 6.055625790139064, |
|
"grad_norm": 0.9168655872344971, |
|
"learning_rate": 8.865142759190168e-05, |
|
"loss": 0.2657, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 6.06826801517067, |
|
"grad_norm": 0.9473972916603088, |
|
"learning_rate": 8.838078863256136e-05, |
|
"loss": 0.2808, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.080910240202275, |
|
"grad_norm": 1.079185962677002, |
|
"learning_rate": 8.811110857352518e-05, |
|
"loss": 0.2815, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 6.0935524652338815, |
|
"grad_norm": 1.0252193212509155, |
|
"learning_rate": 8.784239362818074e-05, |
|
"loss": 0.2981, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 6.106194690265487, |
|
"grad_norm": 0.9863188862800598, |
|
"learning_rate": 8.757464998767951e-05, |
|
"loss": 0.2817, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 6.118836915297092, |
|
"grad_norm": 0.9947652220726013, |
|
"learning_rate": 8.730788382079432e-05, |
|
"loss": 0.2946, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 6.131479140328698, |
|
"grad_norm": 1.0014346837997437, |
|
"learning_rate": 8.704210127377708e-05, |
|
"loss": 0.2902, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 6.144121365360303, |
|
"grad_norm": 1.0144473314285278, |
|
"learning_rate": 8.677730847021724e-05, |
|
"loss": 0.2828, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 6.156763590391909, |
|
"grad_norm": 1.0776128768920898, |
|
"learning_rate": 8.651351151090082e-05, |
|
"loss": 0.306, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 6.169405815423515, |
|
"grad_norm": 1.13133704662323, |
|
"learning_rate": 8.625071647366963e-05, |
|
"loss": 0.2842, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 6.18204804045512, |
|
"grad_norm": 1.0843030214309692, |
|
"learning_rate": 8.598892941328137e-05, |
|
"loss": 0.2938, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 6.1946902654867255, |
|
"grad_norm": 1.0806282758712769, |
|
"learning_rate": 8.572815636127013e-05, |
|
"loss": 0.3009, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 6.207332490518331, |
|
"grad_norm": 1.2078369855880737, |
|
"learning_rate": 8.54684033258074e-05, |
|
"loss": 0.3298, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 6.219974715549937, |
|
"grad_norm": 1.0101124048233032, |
|
"learning_rate": 8.520967629156365e-05, |
|
"loss": 0.2938, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 6.232616940581543, |
|
"grad_norm": 1.0761367082595825, |
|
"learning_rate": 8.495198121957043e-05, |
|
"loss": 0.3062, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 6.245259165613148, |
|
"grad_norm": 1.1186556816101074, |
|
"learning_rate": 8.469532404708298e-05, |
|
"loss": 0.3024, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 6.257901390644753, |
|
"grad_norm": 1.0951234102249146, |
|
"learning_rate": 8.443971068744362e-05, |
|
"loss": 0.2902, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 6.270543615676359, |
|
"grad_norm": 0.9902530908584595, |
|
"learning_rate": 8.418514702994525e-05, |
|
"loss": 0.296, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 6.283185840707965, |
|
"grad_norm": 1.1143983602523804, |
|
"learning_rate": 8.393163893969586e-05, |
|
"loss": 0.3114, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 6.29582806573957, |
|
"grad_norm": 1.0336135625839233, |
|
"learning_rate": 8.367919225748333e-05, |
|
"loss": 0.3308, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 6.308470290771176, |
|
"grad_norm": 0.9870953559875488, |
|
"learning_rate": 8.34278127996408e-05, |
|
"loss": 0.2956, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 6.321112515802781, |
|
"grad_norm": 1.0120779275894165, |
|
"learning_rate": 8.317750635791284e-05, |
|
"loss": 0.313, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.333754740834387, |
|
"grad_norm": 0.9608586430549622, |
|
"learning_rate": 8.292827869932179e-05, |
|
"loss": 0.3005, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 6.346396965865992, |
|
"grad_norm": 1.1399952173233032, |
|
"learning_rate": 8.268013556603504e-05, |
|
"loss": 0.302, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 6.359039190897598, |
|
"grad_norm": 1.1939678192138672, |
|
"learning_rate": 8.243308267523261e-05, |
|
"loss": 0.3214, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 6.371681415929204, |
|
"grad_norm": 1.0933220386505127, |
|
"learning_rate": 8.218712571897564e-05, |
|
"loss": 0.3145, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 6.384323640960809, |
|
"grad_norm": 1.6601200103759766, |
|
"learning_rate": 8.194227036407498e-05, |
|
"loss": 0.3069, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 6.3969658659924145, |
|
"grad_norm": 1.105997920036316, |
|
"learning_rate": 8.169852225196077e-05, |
|
"loss": 0.2998, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 6.40960809102402, |
|
"grad_norm": 1.0879909992218018, |
|
"learning_rate": 8.145588699855247e-05, |
|
"loss": 0.3087, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 6.422250316055626, |
|
"grad_norm": 1.011335015296936, |
|
"learning_rate": 8.121437019412947e-05, |
|
"loss": 0.2982, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 6.4348925410872315, |
|
"grad_norm": 1.2018229961395264, |
|
"learning_rate": 8.09739774032022e-05, |
|
"loss": 0.3272, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 6.447534766118837, |
|
"grad_norm": 1.0991839170455933, |
|
"learning_rate": 8.073471416438405e-05, |
|
"loss": 0.3434, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.460176991150442, |
|
"grad_norm": 1.228576898574829, |
|
"learning_rate": 8.049658599026369e-05, |
|
"loss": 0.3113, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 6.472819216182048, |
|
"grad_norm": 1.0694067478179932, |
|
"learning_rate": 8.0259598367278e-05, |
|
"loss": 0.3114, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 6.485461441213654, |
|
"grad_norm": 1.0272830724716187, |
|
"learning_rate": 8.002375675558586e-05, |
|
"loss": 0.3103, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 6.498103666245259, |
|
"grad_norm": 0.974769115447998, |
|
"learning_rate": 7.978906658894213e-05, |
|
"loss": 0.3093, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 6.510745891276865, |
|
"grad_norm": 1.1441291570663452, |
|
"learning_rate": 7.955553327457256e-05, |
|
"loss": 0.3317, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 6.52338811630847, |
|
"grad_norm": 1.0339381694793701, |
|
"learning_rate": 7.932316219304925e-05, |
|
"loss": 0.2997, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 6.5360303413400755, |
|
"grad_norm": 1.0404632091522217, |
|
"learning_rate": 7.90919586981666e-05, |
|
"loss": 0.3095, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 6.548672566371682, |
|
"grad_norm": 1.1902042627334595, |
|
"learning_rate": 7.886192811681793e-05, |
|
"loss": 0.2978, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 6.561314791403287, |
|
"grad_norm": 1.089690089225769, |
|
"learning_rate": 7.863307574887296e-05, |
|
"loss": 0.3103, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 6.573957016434893, |
|
"grad_norm": 1.1589289903640747, |
|
"learning_rate": 7.840540686705539e-05, |
|
"loss": 0.3425, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.586599241466498, |
|
"grad_norm": 1.0016796588897705, |
|
"learning_rate": 7.817892671682173e-05, |
|
"loss": 0.3004, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 6.599241466498103, |
|
"grad_norm": 1.1263011693954468, |
|
"learning_rate": 7.795364051624015e-05, |
|
"loss": 0.3124, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 6.611883691529709, |
|
"grad_norm": 1.1125059127807617, |
|
"learning_rate": 7.77295534558705e-05, |
|
"loss": 0.331, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 6.624525916561315, |
|
"grad_norm": 1.1294969320297241, |
|
"learning_rate": 7.750667069864458e-05, |
|
"loss": 0.308, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 6.6371681415929205, |
|
"grad_norm": 1.0179051160812378, |
|
"learning_rate": 7.728499737974723e-05, |
|
"loss": 0.3057, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 6.649810366624526, |
|
"grad_norm": 1.0046980381011963, |
|
"learning_rate": 7.706453860649807e-05, |
|
"loss": 0.2959, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 6.662452591656131, |
|
"grad_norm": 1.110780954360962, |
|
"learning_rate": 7.684529945823368e-05, |
|
"loss": 0.3461, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 6.6750948166877375, |
|
"grad_norm": 1.0861669778823853, |
|
"learning_rate": 7.662728498619076e-05, |
|
"loss": 0.2993, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 6.687737041719343, |
|
"grad_norm": 1.0867419242858887, |
|
"learning_rate": 7.641050021338954e-05, |
|
"loss": 0.3354, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 6.700379266750948, |
|
"grad_norm": 1.1115156412124634, |
|
"learning_rate": 7.619495013451831e-05, |
|
"loss": 0.3177, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 6.713021491782554, |
|
"grad_norm": 1.0660215616226196, |
|
"learning_rate": 7.59806397158181e-05, |
|
"loss": 0.3141, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 6.725663716814159, |
|
"grad_norm": 0.9811689257621765, |
|
"learning_rate": 7.576757389496838e-05, |
|
"loss": 0.3354, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 6.7383059418457645, |
|
"grad_norm": 1.0768461227416992, |
|
"learning_rate": 7.555575758097325e-05, |
|
"loss": 0.3108, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 6.750948166877371, |
|
"grad_norm": 1.1170628070831299, |
|
"learning_rate": 7.534519565404843e-05, |
|
"loss": 0.3206, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 6.763590391908976, |
|
"grad_norm": 0.9863327145576477, |
|
"learning_rate": 7.51358929655087e-05, |
|
"loss": 0.2973, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 6.7762326169405815, |
|
"grad_norm": 1.1545705795288086, |
|
"learning_rate": 7.492785433765617e-05, |
|
"loss": 0.3393, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 6.788874841972187, |
|
"grad_norm": 1.0578138828277588, |
|
"learning_rate": 7.472108456366925e-05, |
|
"loss": 0.323, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 6.801517067003792, |
|
"grad_norm": 1.0187878608703613, |
|
"learning_rate": 7.451558840749207e-05, |
|
"loss": 0.3386, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 6.814159292035399, |
|
"grad_norm": 1.0566604137420654, |
|
"learning_rate": 7.431137060372486e-05, |
|
"loss": 0.3161, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 6.826801517067004, |
|
"grad_norm": 0.9965440034866333, |
|
"learning_rate": 7.410843585751477e-05, |
|
"loss": 0.322, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.839443742098609, |
|
"grad_norm": 1.1252332925796509, |
|
"learning_rate": 7.390678884444751e-05, |
|
"loss": 0.3421, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 6.852085967130215, |
|
"grad_norm": 1.5158134698867798, |
|
"learning_rate": 7.370643421043957e-05, |
|
"loss": 0.3375, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 6.86472819216182, |
|
"grad_norm": 1.0333036184310913, |
|
"learning_rate": 7.350737657163133e-05, |
|
"loss": 0.3173, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 6.877370417193426, |
|
"grad_norm": 1.0372684001922607, |
|
"learning_rate": 7.33096205142805e-05, |
|
"loss": 0.3362, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 6.890012642225032, |
|
"grad_norm": 0.9757832288742065, |
|
"learning_rate": 7.311317059465658e-05, |
|
"loss": 0.3255, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 6.902654867256637, |
|
"grad_norm": 1.0241106748580933, |
|
"learning_rate": 7.291803133893588e-05, |
|
"loss": 0.3146, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 6.915297092288243, |
|
"grad_norm": 1.095625638961792, |
|
"learning_rate": 7.272420724309719e-05, |
|
"loss": 0.3185, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 6.927939317319848, |
|
"grad_norm": 1.1619679927825928, |
|
"learning_rate": 7.25317027728182e-05, |
|
"loss": 0.3149, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 6.940581542351454, |
|
"grad_norm": 1.090199589729309, |
|
"learning_rate": 7.234052236337267e-05, |
|
"loss": 0.3194, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 6.95322376738306, |
|
"grad_norm": 1.0253422260284424, |
|
"learning_rate": 7.215067041952817e-05, |
|
"loss": 0.3748, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.965865992414665, |
|
"grad_norm": 0.9982818365097046, |
|
"learning_rate": 7.196215131544458e-05, |
|
"loss": 0.3315, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 6.9785082174462705, |
|
"grad_norm": 1.0616456270217896, |
|
"learning_rate": 7.177496939457349e-05, |
|
"loss": 0.3197, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 6.991150442477876, |
|
"grad_norm": 1.0430032014846802, |
|
"learning_rate": 7.158912896955785e-05, |
|
"loss": 0.332, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 7.003792667509481, |
|
"grad_norm": 0.9125473499298096, |
|
"learning_rate": 7.140463432213281e-05, |
|
"loss": 0.2938, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 7.0164348925410875, |
|
"grad_norm": 1.0734045505523682, |
|
"learning_rate": 7.122148970302702e-05, |
|
"loss": 0.2281, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 7.029077117572693, |
|
"grad_norm": 1.0642218589782715, |
|
"learning_rate": 7.103969933186467e-05, |
|
"loss": 0.2096, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 7.041719342604298, |
|
"grad_norm": 1.080702304840088, |
|
"learning_rate": 7.085926739706828e-05, |
|
"loss": 0.2014, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 7.054361567635904, |
|
"grad_norm": 1.0507287979125977, |
|
"learning_rate": 7.06801980557622e-05, |
|
"loss": 0.2107, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 7.067003792667509, |
|
"grad_norm": 1.0190140008926392, |
|
"learning_rate": 7.050249543367683e-05, |
|
"loss": 0.2106, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 7.079646017699115, |
|
"grad_norm": 1.1010105609893799, |
|
"learning_rate": 7.032616362505359e-05, |
|
"loss": 0.2142, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.092288242730721, |
|
"grad_norm": 0.9539241194725037, |
|
"learning_rate": 7.015120669255053e-05, |
|
"loss": 0.2138, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 7.104930467762326, |
|
"grad_norm": 1.2524183988571167, |
|
"learning_rate": 6.99776286671488e-05, |
|
"loss": 0.2166, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 7.117572692793932, |
|
"grad_norm": 1.0015676021575928, |
|
"learning_rate": 6.980543354805969e-05, |
|
"loss": 0.2075, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 7.130214917825537, |
|
"grad_norm": 1.0855770111083984, |
|
"learning_rate": 6.963462530263261e-05, |
|
"loss": 0.2322, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 1.1854267120361328, |
|
"learning_rate": 6.946520786626358e-05, |
|
"loss": 0.2192, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 7.155499367888749, |
|
"grad_norm": 1.1590447425842285, |
|
"learning_rate": 6.929718514230455e-05, |
|
"loss": 0.2286, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 7.168141592920354, |
|
"grad_norm": 1.0713489055633545, |
|
"learning_rate": 6.913056100197355e-05, |
|
"loss": 0.2101, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 7.180783817951959, |
|
"grad_norm": 1.0067224502563477, |
|
"learning_rate": 6.896533928426545e-05, |
|
"loss": 0.2191, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 7.193426042983565, |
|
"grad_norm": 1.0778611898422241, |
|
"learning_rate": 6.880152379586353e-05, |
|
"loss": 0.2242, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 7.206068268015171, |
|
"grad_norm": 1.1107529401779175, |
|
"learning_rate": 6.863911831105174e-05, |
|
"loss": 0.236, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 7.2187104930467765, |
|
"grad_norm": 1.1352819204330444, |
|
"learning_rate": 6.847812657162774e-05, |
|
"loss": 0.2306, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 7.231352718078382, |
|
"grad_norm": 1.1808239221572876, |
|
"learning_rate": 6.831855228681676e-05, |
|
"loss": 0.2313, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 7.243994943109987, |
|
"grad_norm": 1.161959171295166, |
|
"learning_rate": 6.816039913318605e-05, |
|
"loss": 0.2365, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 7.256637168141593, |
|
"grad_norm": 1.102596402168274, |
|
"learning_rate": 6.800367075456027e-05, |
|
"loss": 0.2247, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 7.269279393173198, |
|
"grad_norm": 0.9597683548927307, |
|
"learning_rate": 6.78483707619374e-05, |
|
"loss": 0.216, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 7.281921618204804, |
|
"grad_norm": 1.1682482957839966, |
|
"learning_rate": 6.769450273340572e-05, |
|
"loss": 0.2481, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 7.29456384323641, |
|
"grad_norm": 1.043906807899475, |
|
"learning_rate": 6.754207021406114e-05, |
|
"loss": 0.2284, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 7.307206068268015, |
|
"grad_norm": 1.110894799232483, |
|
"learning_rate": 6.73910767159258e-05, |
|
"loss": 0.2605, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 7.3198482932996205, |
|
"grad_norm": 1.06911039352417, |
|
"learning_rate": 6.724152571786693e-05, |
|
"loss": 0.2263, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 7.332490518331226, |
|
"grad_norm": 1.144773006439209, |
|
"learning_rate": 6.709342066551677e-05, |
|
"loss": 0.2363, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 7.345132743362832, |
|
"grad_norm": 3.6639792919158936, |
|
"learning_rate": 6.694676497119325e-05, |
|
"loss": 0.249, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 7.357774968394438, |
|
"grad_norm": 0.9481773376464844, |
|
"learning_rate": 6.680156201382128e-05, |
|
"loss": 0.2531, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 7.370417193426043, |
|
"grad_norm": 1.118088960647583, |
|
"learning_rate": 6.66578151388549e-05, |
|
"loss": 0.2158, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 7.383059418457648, |
|
"grad_norm": 1.0164135694503784, |
|
"learning_rate": 6.651552765820028e-05, |
|
"loss": 0.256, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 7.395701643489254, |
|
"grad_norm": 1.046364188194275, |
|
"learning_rate": 6.637470285013933e-05, |
|
"loss": 0.2344, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 7.40834386852086, |
|
"grad_norm": 1.0682607889175415, |
|
"learning_rate": 6.623534395925426e-05, |
|
"loss": 0.2189, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 7.420986093552465, |
|
"grad_norm": 1.1149200201034546, |
|
"learning_rate": 6.609745419635272e-05, |
|
"loss": 0.2313, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 7.433628318584071, |
|
"grad_norm": 1.2037601470947266, |
|
"learning_rate": 6.596103673839385e-05, |
|
"loss": 0.239, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 7.446270543615676, |
|
"grad_norm": 1.2147172689437866, |
|
"learning_rate": 6.582609472841519e-05, |
|
"loss": 0.253, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 7.458912768647282, |
|
"grad_norm": 1.061748743057251, |
|
"learning_rate": 6.569263127546012e-05, |
|
"loss": 0.2491, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 7.471554993678888, |
|
"grad_norm": 1.1806966066360474, |
|
"learning_rate": 6.556064945450633e-05, |
|
"loss": 0.2307, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 7.484197218710493, |
|
"grad_norm": 1.0720311403274536, |
|
"learning_rate": 6.54301523063949e-05, |
|
"loss": 0.2567, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 7.496839443742099, |
|
"grad_norm": 1.1361720561981201, |
|
"learning_rate": 6.530114283776029e-05, |
|
"loss": 0.221, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 7.509481668773704, |
|
"grad_norm": 1.7318781614303589, |
|
"learning_rate": 6.517362402096104e-05, |
|
"loss": 0.2343, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 7.522123893805309, |
|
"grad_norm": 1.2448699474334717, |
|
"learning_rate": 6.504759879401134e-05, |
|
"loss": 0.2487, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 7.534766118836915, |
|
"grad_norm": 1.144116997718811, |
|
"learning_rate": 6.492307006051322e-05, |
|
"loss": 0.2246, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 7.547408343868521, |
|
"grad_norm": 1.121053695678711, |
|
"learning_rate": 6.480004068958982e-05, |
|
"loss": 0.2345, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 7.5600505689001265, |
|
"grad_norm": 0.9634986519813538, |
|
"learning_rate": 6.46785135158191e-05, |
|
"loss": 0.2206, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 7.572692793931732, |
|
"grad_norm": 1.0400941371917725, |
|
"learning_rate": 6.455849133916868e-05, |
|
"loss": 0.2259, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 7.585335018963337, |
|
"grad_norm": 1.1151084899902344, |
|
"learning_rate": 6.44399769249313e-05, |
|
"loss": 0.2412, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.597977243994944, |
|
"grad_norm": 1.2084640264511108, |
|
"learning_rate": 6.432297300366104e-05, |
|
"loss": 0.2469, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 7.610619469026549, |
|
"grad_norm": 1.1408836841583252, |
|
"learning_rate": 6.420748227111045e-05, |
|
"loss": 0.2276, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 7.623261694058154, |
|
"grad_norm": 1.132438063621521, |
|
"learning_rate": 6.409350738816844e-05, |
|
"loss": 0.2476, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 7.63590391908976, |
|
"grad_norm": 1.0751878023147583, |
|
"learning_rate": 6.398105098079903e-05, |
|
"loss": 0.2527, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 7.648546144121365, |
|
"grad_norm": 1.1522191762924194, |
|
"learning_rate": 6.387011563998073e-05, |
|
"loss": 0.2596, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 7.6611883691529705, |
|
"grad_norm": 1.0497066974639893, |
|
"learning_rate": 6.376070392164694e-05, |
|
"loss": 0.2534, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 7.673830594184577, |
|
"grad_norm": 2.2555932998657227, |
|
"learning_rate": 6.3652818346627e-05, |
|
"loss": 0.2413, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 7.686472819216182, |
|
"grad_norm": 0.9901424646377563, |
|
"learning_rate": 6.354646140058816e-05, |
|
"loss": 0.2442, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 7.699115044247788, |
|
"grad_norm": 1.066794753074646, |
|
"learning_rate": 6.344163553397834e-05, |
|
"loss": 0.2428, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 7.711757269279393, |
|
"grad_norm": 1.0979070663452148, |
|
"learning_rate": 6.333834316196953e-05, |
|
"loss": 0.2457, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 7.724399494310998, |
|
"grad_norm": 1.1070395708084106, |
|
"learning_rate": 6.323658666440228e-05, |
|
"loss": 0.25, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 7.737041719342605, |
|
"grad_norm": 1.0736275911331177, |
|
"learning_rate": 6.313636838573086e-05, |
|
"loss": 0.2524, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 7.74968394437421, |
|
"grad_norm": 1.217236042022705, |
|
"learning_rate": 6.303769063496915e-05, |
|
"loss": 0.2707, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 7.762326169405815, |
|
"grad_norm": 1.180005669593811, |
|
"learning_rate": 6.294055568563754e-05, |
|
"loss": 0.2405, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 7.774968394437421, |
|
"grad_norm": 1.116621971130371, |
|
"learning_rate": 6.28449657757105e-05, |
|
"loss": 0.2469, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 7.787610619469026, |
|
"grad_norm": 1.0715476274490356, |
|
"learning_rate": 6.2750923107565e-05, |
|
"loss": 0.2482, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 7.8002528445006325, |
|
"grad_norm": 1.0930267572402954, |
|
"learning_rate": 6.265842984792986e-05, |
|
"loss": 0.2872, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 7.812895069532238, |
|
"grad_norm": 1.232857346534729, |
|
"learning_rate": 6.25674881278357e-05, |
|
"loss": 0.2536, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 7.825537294563843, |
|
"grad_norm": 1.1025636196136475, |
|
"learning_rate": 6.247810004256595e-05, |
|
"loss": 0.2513, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 7.838179519595449, |
|
"grad_norm": 2.9798877239227295, |
|
"learning_rate": 6.23902676516085e-05, |
|
"loss": 0.2668, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 7.850821744627054, |
|
"grad_norm": 1.3299516439437866, |
|
"learning_rate": 6.230399297860826e-05, |
|
"loss": 0.2637, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 7.86346396965866, |
|
"grad_norm": 1.1211531162261963, |
|
"learning_rate": 6.221927801132061e-05, |
|
"loss": 0.2385, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 7.876106194690266, |
|
"grad_norm": 1.2004640102386475, |
|
"learning_rate": 6.213612470156552e-05, |
|
"loss": 0.2594, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 7.888748419721871, |
|
"grad_norm": 1.0749276876449585, |
|
"learning_rate": 6.205453496518261e-05, |
|
"loss": 0.2551, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 7.9013906447534765, |
|
"grad_norm": 1.2336843013763428, |
|
"learning_rate": 6.197451068198699e-05, |
|
"loss": 0.284, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 7.914032869785082, |
|
"grad_norm": 1.194594383239746, |
|
"learning_rate": 6.189605369572598e-05, |
|
"loss": 0.2442, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 7.926675094816687, |
|
"grad_norm": 1.0185377597808838, |
|
"learning_rate": 6.181916581403667e-05, |
|
"loss": 0.2523, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 7.939317319848294, |
|
"grad_norm": 1.0479494333267212, |
|
"learning_rate": 6.174384880840409e-05, |
|
"loss": 0.2545, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 7.951959544879899, |
|
"grad_norm": 1.0949984788894653, |
|
"learning_rate": 6.167010441412064e-05, |
|
"loss": 0.2513, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 7.964601769911504, |
|
"grad_norm": 1.1074668169021606, |
|
"learning_rate": 6.159793433024597e-05, |
|
"loss": 0.2601, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 7.97724399494311, |
|
"grad_norm": 1.2110705375671387, |
|
"learning_rate": 6.152734021956782e-05, |
|
"loss": 0.2685, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 7.989886219974716, |
|
"grad_norm": 1.0655533075332642, |
|
"learning_rate": 6.145832370856379e-05, |
|
"loss": 0.2444, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 8.002528445006321, |
|
"grad_norm": 0.8317849040031433, |
|
"learning_rate": 6.139088638736378e-05, |
|
"loss": 0.2416, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 8.015170670037927, |
|
"grad_norm": 1.3935742378234863, |
|
"learning_rate": 6.132502980971345e-05, |
|
"loss": 0.1735, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 8.027812895069532, |
|
"grad_norm": 1.0203521251678467, |
|
"learning_rate": 6.12607554929383e-05, |
|
"loss": 0.1674, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 8.040455120101138, |
|
"grad_norm": 1.0844451189041138, |
|
"learning_rate": 6.119806491790886e-05, |
|
"loss": 0.1563, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 8.053097345132743, |
|
"grad_norm": 1.0661518573760986, |
|
"learning_rate": 6.113695952900643e-05, |
|
"loss": 0.1579, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 8.065739570164348, |
|
"grad_norm": 0.9967635869979858, |
|
"learning_rate": 6.107744073408987e-05, |
|
"loss": 0.1601, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 8.078381795195954, |
|
"grad_norm": 1.1493229866027832, |
|
"learning_rate": 6.10195099044632e-05, |
|
"loss": 0.1586, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 8.09102402022756, |
|
"grad_norm": 3.224154233932495, |
|
"learning_rate": 6.096316837484391e-05, |
|
"loss": 0.188, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 8.103666245259166, |
|
"grad_norm": 1.0153775215148926, |
|
"learning_rate": 6.090841744333229e-05, |
|
"loss": 0.1821, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 8.116308470290772, |
|
"grad_norm": 1.5251129865646362, |
|
"learning_rate": 6.0855258371381465e-05, |
|
"loss": 0.195, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 8.128950695322377, |
|
"grad_norm": 1.1285451650619507, |
|
"learning_rate": 6.0803692383768375e-05, |
|
"loss": 0.1559, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 8.141592920353983, |
|
"grad_norm": 1.0399773120880127, |
|
"learning_rate": 6.075372066856554e-05, |
|
"loss": 0.1609, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 8.154235145385588, |
|
"grad_norm": 1.1441960334777832, |
|
"learning_rate": 6.07053443771137e-05, |
|
"loss": 0.1731, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 8.166877370417193, |
|
"grad_norm": 1.0369312763214111, |
|
"learning_rate": 6.065856462399524e-05, |
|
"loss": 0.1661, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 8.179519595448799, |
|
"grad_norm": 1.1654633283615112, |
|
"learning_rate": 6.061338248700856e-05, |
|
"loss": 0.2005, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 8.192161820480404, |
|
"grad_norm": 1.0257656574249268, |
|
"learning_rate": 6.0569799007143233e-05, |
|
"loss": 0.1688, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 8.20480404551201, |
|
"grad_norm": 1.05653977394104, |
|
"learning_rate": 6.052781518855601e-05, |
|
"loss": 0.1732, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 8.217446270543615, |
|
"grad_norm": 0.9420139193534851, |
|
"learning_rate": 6.0487431998547705e-05, |
|
"loss": 0.1704, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.230088495575222, |
|
"grad_norm": 1.0948173999786377, |
|
"learning_rate": 6.044865036754086e-05, |
|
"loss": 0.178, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 8.242730720606827, |
|
"grad_norm": 1.1382850408554077, |
|
"learning_rate": 6.0411471189058353e-05, |
|
"loss": 0.1945, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 8.255372945638433, |
|
"grad_norm": 1.1092077493667603, |
|
"learning_rate": 6.037589531970283e-05, |
|
"loss": 0.1628, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 8.268015170670038, |
|
"grad_norm": 1.0578278303146362, |
|
"learning_rate": 6.0341923579136886e-05, |
|
"loss": 0.1815, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 8.280657395701644, |
|
"grad_norm": 1.170258641242981, |
|
"learning_rate": 6.030955675006428e-05, |
|
"loss": 0.1633, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 8.293299620733249, |
|
"grad_norm": 1.1795989274978638, |
|
"learning_rate": 6.027879557821183e-05, |
|
"loss": 0.1987, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 8.305941845764854, |
|
"grad_norm": 1.1237478256225586, |
|
"learning_rate": 6.0249640772312264e-05, |
|
"loss": 0.1878, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 8.31858407079646, |
|
"grad_norm": 1.2054628133773804, |
|
"learning_rate": 6.022209300408786e-05, |
|
"loss": 0.1765, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 8.331226295828065, |
|
"grad_norm": 1.16087806224823, |
|
"learning_rate": 6.019615290823503e-05, |
|
"loss": 0.1779, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 8.34386852085967, |
|
"grad_norm": 1.0747262239456177, |
|
"learning_rate": 6.017182108240963e-05, |
|
"loss": 0.1741, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.356510745891278, |
|
"grad_norm": 1.171136498451233, |
|
"learning_rate": 6.014909808721324e-05, |
|
"loss": 0.1928, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 8.369152970922883, |
|
"grad_norm": 1.09550940990448, |
|
"learning_rate": 6.0127984446180196e-05, |
|
"loss": 0.1745, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 8.381795195954489, |
|
"grad_norm": 1.184849739074707, |
|
"learning_rate": 6.010848064576561e-05, |
|
"loss": 0.1889, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 8.394437420986094, |
|
"grad_norm": 1.1877614259719849, |
|
"learning_rate": 6.009058713533404e-05, |
|
"loss": 0.1859, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 8.4070796460177, |
|
"grad_norm": 1.2223458290100098, |
|
"learning_rate": 6.007430432714928e-05, |
|
"loss": 0.1901, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 8.419721871049305, |
|
"grad_norm": 1.1974024772644043, |
|
"learning_rate": 6.005963259636473e-05, |
|
"loss": 0.2126, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 8.43236409608091, |
|
"grad_norm": 1.180246353149414, |
|
"learning_rate": 6.0046572281014854e-05, |
|
"loss": 0.1883, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 8.445006321112515, |
|
"grad_norm": 1.1506062746047974, |
|
"learning_rate": 6.003512368200732e-05, |
|
"loss": 0.186, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 8.45764854614412, |
|
"grad_norm": 1.1646850109100342, |
|
"learning_rate": 6.002528706311613e-05, |
|
"loss": 0.1949, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 8.470290771175726, |
|
"grad_norm": 1.0622496604919434, |
|
"learning_rate": 6.001706265097548e-05, |
|
"loss": 0.1958, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 8.482932996207332, |
|
"grad_norm": 1.1623327732086182, |
|
"learning_rate": 6.0010450635074554e-05, |
|
"loss": 0.1857, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 8.495575221238939, |
|
"grad_norm": 1.1403242349624634, |
|
"learning_rate": 6.000545116775322e-05, |
|
"loss": 0.1894, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 8.508217446270544, |
|
"grad_norm": 1.1317553520202637, |
|
"learning_rate": 6.000206436419843e-05, |
|
"loss": 0.1847, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 8.52085967130215, |
|
"grad_norm": 1.1617845296859741, |
|
"learning_rate": 6.000029030244164e-05, |
|
"loss": 0.1802, |
|
"memory/device_mem_reserved(gib)": 34.32, |
|
"memory/max_mem_active(gib)": 33.22, |
|
"memory/max_mem_allocated(gib)": 33.22, |
|
"step": 6740 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6745, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.882959280114762e+18, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|