apriasmoro's picture
Upload task output 5759f3ce-75c1-4433-a890-115eb2bf35bc
1654007 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.527180783817952,
"eval_steps": 500,
"global_step": 6745,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012642225031605562,
"grad_norm": 0.06671903282403946,
"learning_rate": 9e-06,
"loss": 0.6431,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 10
},
{
"epoch": 0.025284450063211124,
"grad_norm": 0.08679291605949402,
"learning_rate": 1.9e-05,
"loss": 0.6499,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 20
},
{
"epoch": 0.03792667509481669,
"grad_norm": 0.07710310071706772,
"learning_rate": 2.9e-05,
"loss": 0.625,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 30
},
{
"epoch": 0.05056890012642225,
"grad_norm": 0.08816391229629517,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.6109,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 40
},
{
"epoch": 0.0632111251580278,
"grad_norm": 0.1400187462568283,
"learning_rate": 4.9e-05,
"loss": 0.6043,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 50
},
{
"epoch": 0.07585335018963338,
"grad_norm": 0.08173350989818573,
"learning_rate": 5.9e-05,
"loss": 0.6308,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 60
},
{
"epoch": 0.08849557522123894,
"grad_norm": 0.09538205713033676,
"learning_rate": 6.9e-05,
"loss": 0.6078,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 70
},
{
"epoch": 0.1011378002528445,
"grad_norm": 0.10508744418621063,
"learning_rate": 7.900000000000001e-05,
"loss": 0.6266,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 80
},
{
"epoch": 0.11378002528445007,
"grad_norm": 0.13323046267032623,
"learning_rate": 8.900000000000001e-05,
"loss": 0.6393,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 90
},
{
"epoch": 0.1264222503160556,
"grad_norm": 0.13296917080879211,
"learning_rate": 9.900000000000001e-05,
"loss": 0.6361,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 100
},
{
"epoch": 0.1390644753476612,
"grad_norm": 0.14028862118721008,
"learning_rate": 0.000109,
"loss": 0.6576,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 110
},
{
"epoch": 0.15170670037926676,
"grad_norm": 0.17391778528690338,
"learning_rate": 0.000119,
"loss": 0.6084,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 120
},
{
"epoch": 0.16434892541087232,
"grad_norm": 0.1644161194562912,
"learning_rate": 0.00012900000000000002,
"loss": 0.658,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 130
},
{
"epoch": 0.17699115044247787,
"grad_norm": 0.2195376306772232,
"learning_rate": 0.000139,
"loss": 0.6274,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 140
},
{
"epoch": 0.18963337547408343,
"grad_norm": 0.2098621428012848,
"learning_rate": 0.00014900000000000002,
"loss": 0.5906,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 150
},
{
"epoch": 0.202275600505689,
"grad_norm": 0.24007147550582886,
"learning_rate": 0.00015900000000000002,
"loss": 0.6373,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 160
},
{
"epoch": 0.21491782553729458,
"grad_norm": 0.2894239127635956,
"learning_rate": 0.00016900000000000002,
"loss": 0.6371,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 170
},
{
"epoch": 0.22756005056890014,
"grad_norm": 0.23595209419727325,
"learning_rate": 0.00017900000000000001,
"loss": 0.6768,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 180
},
{
"epoch": 0.2402022756005057,
"grad_norm": 0.26761606335639954,
"learning_rate": 0.00018899999999999999,
"loss": 0.6431,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 190
},
{
"epoch": 0.2528445006321112,
"grad_norm": 0.2602802813053131,
"learning_rate": 0.000199,
"loss": 0.6443,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 200
},
{
"epoch": 0.26548672566371684,
"grad_norm": 0.4167614281177521,
"learning_rate": 0.00019999934682007068,
"loss": 0.6589,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 210
},
{
"epoch": 0.2781289506953224,
"grad_norm": 0.3008961081504822,
"learning_rate": 0.00019999708892979201,
"loss": 0.6986,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 220
},
{
"epoch": 0.29077117572692796,
"grad_norm": 0.5571665167808533,
"learning_rate": 0.00019999321831722333,
"loss": 0.6971,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 230
},
{
"epoch": 0.3034134007585335,
"grad_norm": 0.3433665335178375,
"learning_rate": 0.000199987735071543,
"loss": 0.6764,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 240
},
{
"epoch": 0.31605562579013907,
"grad_norm": 0.36731716990470886,
"learning_rate": 0.00019998063931908407,
"loss": 0.6622,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 250
},
{
"epoch": 0.32869785082174463,
"grad_norm": 0.4063098132610321,
"learning_rate": 0.0001999719312233317,
"loss": 0.6727,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 260
},
{
"epoch": 0.3413400758533502,
"grad_norm": 0.4796123504638672,
"learning_rate": 0.0001999616109849191,
"loss": 0.6683,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 270
},
{
"epoch": 0.35398230088495575,
"grad_norm": 0.35787850618362427,
"learning_rate": 0.00019994967884162285,
"loss": 0.6856,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 280
},
{
"epoch": 0.3666245259165613,
"grad_norm": 2.5480282306671143,
"learning_rate": 0.00019993613506835787,
"loss": 0.7337,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 290
},
{
"epoch": 0.37926675094816686,
"grad_norm": 132.1597137451172,
"learning_rate": 0.00019992097997717054,
"loss": 1.748,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 300
},
{
"epoch": 0.3919089759797724,
"grad_norm": 138.1280975341797,
"learning_rate": 0.00019990421391723193,
"loss": 2.0475,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 310
},
{
"epoch": 0.404551201011378,
"grad_norm": 11.301629066467285,
"learning_rate": 0.00019988583727482948,
"loss": 1.949,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 320
},
{
"epoch": 0.41719342604298354,
"grad_norm": 249.65650939941406,
"learning_rate": 0.0001998658504733583,
"loss": 1.3181,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 330
},
{
"epoch": 0.42983565107458915,
"grad_norm": 9.794897079467773,
"learning_rate": 0.0001998442539733111,
"loss": 1.588,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 340
},
{
"epoch": 0.4424778761061947,
"grad_norm": 1.245341181755066,
"learning_rate": 0.00019982104827226808,
"loss": 0.8035,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 350
},
{
"epoch": 0.45512010113780027,
"grad_norm": 1.8496769666671753,
"learning_rate": 0.00019979623390488507,
"loss": 0.7647,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 360
},
{
"epoch": 0.46776232616940583,
"grad_norm": 16.008167266845703,
"learning_rate": 0.0001997698114428813,
"loss": 0.7563,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 370
},
{
"epoch": 0.4804045512010114,
"grad_norm": 6.476025104522705,
"learning_rate": 0.00019974178149502624,
"loss": 0.8981,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 380
},
{
"epoch": 0.49304677623261695,
"grad_norm": 1.4814890623092651,
"learning_rate": 0.0001997121447071257,
"loss": 0.7543,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 390
},
{
"epoch": 0.5056890012642224,
"grad_norm": 0.695743978023529,
"learning_rate": 0.0001996809017620067,
"loss": 0.7414,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 400
},
{
"epoch": 0.5183312262958281,
"grad_norm": 9.911474227905273,
"learning_rate": 0.000199648053379502,
"loss": 0.7957,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 410
},
{
"epoch": 0.5309734513274337,
"grad_norm": 0.726256787776947,
"learning_rate": 0.00019961360031643332,
"loss": 0.7185,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 420
},
{
"epoch": 0.5436156763590392,
"grad_norm": 165.10116577148438,
"learning_rate": 0.00019957754336659392,
"loss": 0.901,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 430
},
{
"epoch": 0.5562579013906448,
"grad_norm": 41.61799621582031,
"learning_rate": 0.0001995398833607306,
"loss": 1.6791,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 440
},
{
"epoch": 0.5689001264222503,
"grad_norm": 26.158023834228516,
"learning_rate": 0.0001995006211665241,
"loss": 1.6933,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 450
},
{
"epoch": 0.5815423514538559,
"grad_norm": 1.6820884943008423,
"learning_rate": 0.00019945975768856936,
"loss": 0.8144,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 460
},
{
"epoch": 0.5941845764854614,
"grad_norm": 0.5475680828094482,
"learning_rate": 0.00019941729386835472,
"loss": 0.7117,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 470
},
{
"epoch": 0.606826801517067,
"grad_norm": 0.5968815684318542,
"learning_rate": 0.0001993732306842402,
"loss": 0.7452,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 480
},
{
"epoch": 0.6194690265486725,
"grad_norm": 3.038395404815674,
"learning_rate": 0.00019932756915143481,
"loss": 0.7365,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 490
},
{
"epoch": 0.6321112515802781,
"grad_norm": 1.1817647218704224,
"learning_rate": 0.0001992803103219733,
"loss": 0.7883,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 500
},
{
"epoch": 0.6447534766118836,
"grad_norm": 0.8520786166191101,
"learning_rate": 0.00019923145528469202,
"loss": 0.7309,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 510
},
{
"epoch": 0.6573957016434893,
"grad_norm": 0.9001318216323853,
"learning_rate": 0.00019918100516520354,
"loss": 0.8017,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 520
},
{
"epoch": 0.6700379266750948,
"grad_norm": 1.4401612281799316,
"learning_rate": 0.00019912896112587092,
"loss": 0.7814,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 530
},
{
"epoch": 0.6826801517067004,
"grad_norm": 34.98484420776367,
"learning_rate": 0.00019907532436578098,
"loss": 0.8461,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 540
},
{
"epoch": 0.695322376738306,
"grad_norm": 1.0898733139038086,
"learning_rate": 0.00019902009612071645,
"loss": 0.9027,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 550
},
{
"epoch": 0.7079646017699115,
"grad_norm": 38.014892578125,
"learning_rate": 0.00019896327766312773,
"loss": 0.8073,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 560
},
{
"epoch": 0.7206068268015171,
"grad_norm": 2.3141884803771973,
"learning_rate": 0.0001989048703021035,
"loss": 0.7773,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 570
},
{
"epoch": 0.7332490518331226,
"grad_norm": 0.8214466571807861,
"learning_rate": 0.00019884487538334038,
"loss": 0.8214,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 580
},
{
"epoch": 0.7458912768647282,
"grad_norm": 1.5186419486999512,
"learning_rate": 0.00019878329428911227,
"loss": 0.7852,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 590
},
{
"epoch": 0.7585335018963337,
"grad_norm": 1.7092262506484985,
"learning_rate": 0.00019872012843823815,
"loss": 0.8048,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 600
},
{
"epoch": 0.7711757269279393,
"grad_norm": 1.04222571849823,
"learning_rate": 0.00019865537928604967,
"loss": 0.7578,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 610
},
{
"epoch": 0.7838179519595448,
"grad_norm": 6.094375133514404,
"learning_rate": 0.00019858904832435745,
"loss": 0.8016,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 620
},
{
"epoch": 0.7964601769911505,
"grad_norm": 35.207767486572266,
"learning_rate": 0.00019852113708141675,
"loss": 0.8785,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 630
},
{
"epoch": 0.809102402022756,
"grad_norm": 2.344693422317505,
"learning_rate": 0.00019845164712189233,
"loss": 0.8803,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 640
},
{
"epoch": 0.8217446270543616,
"grad_norm": 3.754660129547119,
"learning_rate": 0.00019838058004682224,
"loss": 0.7746,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 650
},
{
"epoch": 0.8343868520859671,
"grad_norm": 0.9116389155387878,
"learning_rate": 0.0001983079374935811,
"loss": 0.756,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 660
},
{
"epoch": 0.8470290771175727,
"grad_norm": 2.610806703567505,
"learning_rate": 0.0001982337211358423,
"loss": 0.7773,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 670
},
{
"epoch": 0.8596713021491783,
"grad_norm": 10.215978622436523,
"learning_rate": 0.00019815793268353944,
"loss": 0.7995,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 680
},
{
"epoch": 0.8723135271807838,
"grad_norm": 1.8970898389816284,
"learning_rate": 0.000198080573882827,
"loss": 0.7576,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 690
},
{
"epoch": 0.8849557522123894,
"grad_norm": 4.906523704528809,
"learning_rate": 0.00019800164651603987,
"loss": 0.8217,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 700
},
{
"epoch": 0.8975979772439949,
"grad_norm": 44.493133544921875,
"learning_rate": 0.0001979211524016527,
"loss": 0.8068,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 710
},
{
"epoch": 0.9102402022756005,
"grad_norm": 348.29559326171875,
"learning_rate": 0.00019783909339423758,
"loss": 2.0656,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 720
},
{
"epoch": 0.922882427307206,
"grad_norm": 8.599038124084473,
"learning_rate": 0.00019775547138442157,
"loss": 1.7869,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 730
},
{
"epoch": 0.9355246523388117,
"grad_norm": 0.8781918287277222,
"learning_rate": 0.00019767028829884313,
"loss": 0.7973,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 740
},
{
"epoch": 0.9481668773704172,
"grad_norm": 1.5494831800460815,
"learning_rate": 0.00019758354610010753,
"loss": 0.7974,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 750
},
{
"epoch": 0.9608091024020228,
"grad_norm": 1.064113736152649,
"learning_rate": 0.00019749524678674193,
"loss": 0.8408,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 760
},
{
"epoch": 0.9734513274336283,
"grad_norm": 1.0886831283569336,
"learning_rate": 0.00019740539239314898,
"loss": 0.7968,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 770
},
{
"epoch": 0.9860935524652339,
"grad_norm": 1.2727103233337402,
"learning_rate": 0.00019731398498956036,
"loss": 0.7508,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 780
},
{
"epoch": 0.9987357774968394,
"grad_norm": 5.564798831939697,
"learning_rate": 0.00019722102668198868,
"loss": 0.7573,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 790
},
{
"epoch": 1.011378002528445,
"grad_norm": 0.7210967540740967,
"learning_rate": 0.0001971265196121792,
"loss": 0.6808,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 800
},
{
"epoch": 1.0240202275600505,
"grad_norm": 0.7716237306594849,
"learning_rate": 0.00019703046595756054,
"loss": 0.6197,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 810
},
{
"epoch": 1.0366624525916561,
"grad_norm": 0.8090146780014038,
"learning_rate": 0.00019693286793119423,
"loss": 0.6632,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 820
},
{
"epoch": 1.0493046776232617,
"grad_norm": 18.184663772583008,
"learning_rate": 0.000196833727781724,
"loss": 0.6638,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 830
},
{
"epoch": 1.0619469026548674,
"grad_norm": 0.7779182195663452,
"learning_rate": 0.0001967330477933238,
"loss": 0.6878,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 840
},
{
"epoch": 1.0745891276864727,
"grad_norm": 0.71025151014328,
"learning_rate": 0.00019663083028564527,
"loss": 0.6778,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 850
},
{
"epoch": 1.0872313527180784,
"grad_norm": 2.1243929862976074,
"learning_rate": 0.0001965270776137642,
"loss": 0.7326,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 860
},
{
"epoch": 1.099873577749684,
"grad_norm": 0.8968414068222046,
"learning_rate": 0.0001964217921681265,
"loss": 0.6792,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 870
},
{
"epoch": 1.1125158027812896,
"grad_norm": 5.795286655426025,
"learning_rate": 0.00019631497637449274,
"loss": 0.7356,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 880
},
{
"epoch": 1.125158027812895,
"grad_norm": 1.2587428092956543,
"learning_rate": 0.0001962066326938826,
"loss": 0.7505,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 890
},
{
"epoch": 1.1378002528445006,
"grad_norm": 1.1835522651672363,
"learning_rate": 0.000196096763622518,
"loss": 0.7025,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 900
},
{
"epoch": 1.1504424778761062,
"grad_norm": 36.68544387817383,
"learning_rate": 0.00019598537169176564,
"loss": 0.6972,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 910
},
{
"epoch": 1.1630847029077118,
"grad_norm": 877.5889892578125,
"learning_rate": 0.0001958724594680787,
"loss": 1.3335,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 920
},
{
"epoch": 1.1757269279393174,
"grad_norm": 536.0718383789062,
"learning_rate": 0.00019575802955293763,
"loss": 8.5679,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 930
},
{
"epoch": 1.1883691529709228,
"grad_norm": 22.20748519897461,
"learning_rate": 0.00019564208458279034,
"loss": 10.4269,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 940
},
{
"epoch": 1.2010113780025284,
"grad_norm": 58.90277862548828,
"learning_rate": 0.00019552462722899122,
"loss": 7.7899,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 950
},
{
"epoch": 1.213653603034134,
"grad_norm": 38.07368469238281,
"learning_rate": 0.00019540566019773996,
"loss": 8.0968,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 960
},
{
"epoch": 1.2262958280657394,
"grad_norm": 3.39542555809021,
"learning_rate": 0.00019528518623001878,
"loss": 7.6929,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 970
},
{
"epoch": 1.238938053097345,
"grad_norm": 8.227216720581055,
"learning_rate": 0.0001951632081015296,
"loss": 7.5381,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 980
},
{
"epoch": 1.2515802781289507,
"grad_norm": 15.710060119628906,
"learning_rate": 0.00019503972862263002,
"loss": 7.471,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 990
},
{
"epoch": 1.2642225031605563,
"grad_norm": 14.865936279296875,
"learning_rate": 0.00019491475063826842,
"loss": 7.1013,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1000
},
{
"epoch": 1.276864728192162,
"grad_norm": 11.446512222290039,
"learning_rate": 0.00019478827702791858,
"loss": 7.1631,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1010
},
{
"epoch": 1.2895069532237673,
"grad_norm": 4.988636016845703,
"learning_rate": 0.00019466031070551325,
"loss": 6.9726,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1020
},
{
"epoch": 1.302149178255373,
"grad_norm": 6.260726451873779,
"learning_rate": 0.00019453085461937705,
"loss": 6.8037,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1030
},
{
"epoch": 1.3147914032869785,
"grad_norm": 13.696749687194824,
"learning_rate": 0.00019439991175215857,
"loss": 6.823,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1040
},
{
"epoch": 1.3274336283185841,
"grad_norm": 4.492152690887451,
"learning_rate": 0.0001942674851207615,
"loss": 6.694,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1050
},
{
"epoch": 1.3400758533501897,
"grad_norm": 14.445012092590332,
"learning_rate": 0.00019413357777627534,
"loss": 6.5831,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1060
},
{
"epoch": 1.3527180783817951,
"grad_norm": 187.8795623779297,
"learning_rate": 0.00019399819280390492,
"loss": 6.4136,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1070
},
{
"epoch": 1.3653603034134008,
"grad_norm": 40.901546478271484,
"learning_rate": 0.00019386133332289948,
"loss": 5.5392,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1080
},
{
"epoch": 1.3780025284450064,
"grad_norm": 7.47464656829834,
"learning_rate": 0.00019372300248648064,
"loss": 2.8936,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1090
},
{
"epoch": 1.3906447534766118,
"grad_norm": 2.4701592922210693,
"learning_rate": 0.00019358320348176978,
"loss": 0.7719,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1100
},
{
"epoch": 1.4032869785082174,
"grad_norm": 1.2766318321228027,
"learning_rate": 0.00019344193952971486,
"loss": 0.7532,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1110
},
{
"epoch": 1.415929203539823,
"grad_norm": 1.149214744567871,
"learning_rate": 0.00019329921388501573,
"loss": 0.7712,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1120
},
{
"epoch": 1.4285714285714286,
"grad_norm": 3.015934944152832,
"learning_rate": 0.0001931550298360496,
"loss": 0.7567,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1130
},
{
"epoch": 1.4412136536030342,
"grad_norm": 0.9380026459693909,
"learning_rate": 0.00019300939070479508,
"loss": 0.7604,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1140
},
{
"epoch": 1.4538558786346396,
"grad_norm": 1.0415725708007812,
"learning_rate": 0.00019286229984675558,
"loss": 0.7313,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1150
},
{
"epoch": 1.4664981036662452,
"grad_norm": 1.5267181396484375,
"learning_rate": 0.0001927137606508821,
"loss": 0.7071,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1160
},
{
"epoch": 1.4791403286978508,
"grad_norm": 1.7632757425308228,
"learning_rate": 0.00019256377653949515,
"loss": 0.898,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1170
},
{
"epoch": 1.4917825537294565,
"grad_norm": 2.4399545192718506,
"learning_rate": 0.00019241235096820587,
"loss": 0.7592,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1180
},
{
"epoch": 1.504424778761062,
"grad_norm": 3.498751163482666,
"learning_rate": 0.00019225948742583642,
"loss": 0.8975,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1190
},
{
"epoch": 1.5170670037926675,
"grad_norm": 5.950336456298828,
"learning_rate": 0.00019210518943433953,
"loss": 0.7509,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1200
},
{
"epoch": 1.529709228824273,
"grad_norm": 13.960210800170898,
"learning_rate": 0.00019194946054871753,
"loss": 0.7932,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1210
},
{
"epoch": 1.5423514538558787,
"grad_norm": 2.3052141666412354,
"learning_rate": 0.0001917923043569403,
"loss": 0.9414,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1220
},
{
"epoch": 1.554993678887484,
"grad_norm": 1.8680328130722046,
"learning_rate": 0.0001916337244798625,
"loss": 0.7507,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1230
},
{
"epoch": 1.56763590391909,
"grad_norm": 1.9041931629180908,
"learning_rate": 0.00019147372457114045,
"loss": 0.7368,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1240
},
{
"epoch": 1.5802781289506953,
"grad_norm": 1.2977467775344849,
"learning_rate": 0.00019131230831714776,
"loss": 0.8548,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1250
},
{
"epoch": 1.592920353982301,
"grad_norm": 1.5000057220458984,
"learning_rate": 0.00019114947943689036,
"loss": 0.7704,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1260
},
{
"epoch": 1.6055625790139065,
"grad_norm": 3.4347245693206787,
"learning_rate": 0.00019098524168192094,
"loss": 0.7786,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1270
},
{
"epoch": 1.618204804045512,
"grad_norm": 2.482739210128784,
"learning_rate": 0.00019081959883625235,
"loss": 0.7569,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1280
},
{
"epoch": 1.6308470290771175,
"grad_norm": 1.322037696838379,
"learning_rate": 0.00019065255471627062,
"loss": 0.7714,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1290
},
{
"epoch": 1.6434892541087232,
"grad_norm": 1.073613166809082,
"learning_rate": 0.00019048411317064683,
"loss": 0.742,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1300
},
{
"epoch": 1.6561314791403285,
"grad_norm": 0.8305187225341797,
"learning_rate": 0.00019031427808024866,
"loss": 0.7216,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1310
},
{
"epoch": 1.6687737041719344,
"grad_norm": 1.1198879480361938,
"learning_rate": 0.0001901430533580508,
"loss": 0.7477,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1320
},
{
"epoch": 1.6814159292035398,
"grad_norm": 0.9730642437934875,
"learning_rate": 0.0001899704429490447,
"loss": 0.765,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1330
},
{
"epoch": 1.6940581542351454,
"grad_norm": 1.542136311531067,
"learning_rate": 0.00018979645083014809,
"loss": 0.7338,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1340
},
{
"epoch": 1.706700379266751,
"grad_norm": 1.3562628030776978,
"learning_rate": 0.00018962108101011285,
"loss": 0.7786,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1350
},
{
"epoch": 1.7193426042983564,
"grad_norm": 1.8742653131484985,
"learning_rate": 0.000189444337529433,
"loss": 0.7812,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1360
},
{
"epoch": 1.7319848293299622,
"grad_norm": 3.535946846008301,
"learning_rate": 0.0001892662244602515,
"loss": 0.7653,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1370
},
{
"epoch": 1.7446270543615676,
"grad_norm": 0.9589079022407532,
"learning_rate": 0.00018908674590626637,
"loss": 0.8217,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1380
},
{
"epoch": 1.7572692793931732,
"grad_norm": 2.254733085632324,
"learning_rate": 0.00018890590600263618,
"loss": 0.7535,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1390
},
{
"epoch": 1.7699115044247788,
"grad_norm": 0.8984112739562988,
"learning_rate": 0.00018872370891588491,
"loss": 0.7839,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1400
},
{
"epoch": 1.7825537294563842,
"grad_norm": 1.0168917179107666,
"learning_rate": 0.00018854015884380568,
"loss": 0.7443,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1410
},
{
"epoch": 1.7951959544879899,
"grad_norm": 0.9075338840484619,
"learning_rate": 0.00018835526001536424,
"loss": 0.7515,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1420
},
{
"epoch": 1.8078381795195955,
"grad_norm": 0.9690259695053101,
"learning_rate": 0.00018816901669060156,
"loss": 0.8179,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1430
},
{
"epoch": 1.8204804045512009,
"grad_norm": 1.043910026550293,
"learning_rate": 0.0001879814331605355,
"loss": 0.8088,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1440
},
{
"epoch": 1.8331226295828067,
"grad_norm": 0.9964724779129028,
"learning_rate": 0.00018779251374706206,
"loss": 0.7603,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1450
},
{
"epoch": 1.845764854614412,
"grad_norm": 0.896278440952301,
"learning_rate": 0.00018760226280285585,
"loss": 0.7666,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1460
},
{
"epoch": 1.8584070796460177,
"grad_norm": 1.0416340827941895,
"learning_rate": 0.00018741068471126967,
"loss": 0.7295,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1470
},
{
"epoch": 1.8710493046776233,
"grad_norm": 1.1354191303253174,
"learning_rate": 0.00018721778388623367,
"loss": 0.7552,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1480
},
{
"epoch": 1.8836915297092287,
"grad_norm": 1.0638015270233154,
"learning_rate": 0.00018702356477215352,
"loss": 0.7663,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1490
},
{
"epoch": 1.8963337547408345,
"grad_norm": 0.9380121231079102,
"learning_rate": 0.00018682803184380807,
"loss": 0.7436,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1500
},
{
"epoch": 1.90897597977244,
"grad_norm": 0.9272292256355286,
"learning_rate": 0.0001866311896062463,
"loss": 0.8219,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1510
},
{
"epoch": 1.9216182048040455,
"grad_norm": 0.9718897938728333,
"learning_rate": 0.00018643304259468346,
"loss": 0.7357,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1520
},
{
"epoch": 1.9342604298356512,
"grad_norm": 0.8963416218757629,
"learning_rate": 0.00018623359537439654,
"loss": 0.7421,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1530
},
{
"epoch": 1.9469026548672566,
"grad_norm": 0.8436943888664246,
"learning_rate": 0.0001860328525406192,
"loss": 0.8123,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1540
},
{
"epoch": 1.9595448798988622,
"grad_norm": 0.9509057998657227,
"learning_rate": 0.00018583081871843585,
"loss": 0.785,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1550
},
{
"epoch": 1.9721871049304678,
"grad_norm": 1.5439331531524658,
"learning_rate": 0.00018562749856267495,
"loss": 0.7564,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1560
},
{
"epoch": 1.9848293299620732,
"grad_norm": 1.1488640308380127,
"learning_rate": 0.00018542289675780208,
"loss": 0.7905,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1570
},
{
"epoch": 1.997471554993679,
"grad_norm": 0.8320059776306152,
"learning_rate": 0.00018521701801781172,
"loss": 0.7636,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1580
},
{
"epoch": 2.0101137800252844,
"grad_norm": 24.31561851501465,
"learning_rate": 0.00018500986708611868,
"loss": 0.7004,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1590
},
{
"epoch": 2.02275600505689,
"grad_norm": 0.9889429211616516,
"learning_rate": 0.00018480144873544898,
"loss": 0.6018,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1600
},
{
"epoch": 2.0353982300884956,
"grad_norm": 0.852366030216217,
"learning_rate": 0.0001845917677677298,
"loss": 0.6404,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1610
},
{
"epoch": 2.048040455120101,
"grad_norm": 0.8665163516998291,
"learning_rate": 0.00018438082901397866,
"loss": 0.6277,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1620
},
{
"epoch": 2.060682680151707,
"grad_norm": 0.959322452545166,
"learning_rate": 0.00018416863733419246,
"loss": 0.6274,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1630
},
{
"epoch": 2.0733249051833123,
"grad_norm": 0.7421912550926208,
"learning_rate": 0.0001839551976172352,
"loss": 0.6483,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1640
},
{
"epoch": 2.0859671302149176,
"grad_norm": 1.5782485008239746,
"learning_rate": 0.0001837405147807256,
"loss": 0.6964,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1650
},
{
"epoch": 2.0986093552465235,
"grad_norm": 0.793574869632721,
"learning_rate": 0.00018352459377092347,
"loss": 0.6323,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1660
},
{
"epoch": 2.111251580278129,
"grad_norm": 0.7756363153457642,
"learning_rate": 0.00018330743956261616,
"loss": 0.6988,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1670
},
{
"epoch": 2.1238938053097347,
"grad_norm": 0.8382811546325684,
"learning_rate": 0.0001830890571590036,
"loss": 0.6159,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1680
},
{
"epoch": 2.13653603034134,
"grad_norm": 1.7289704084396362,
"learning_rate": 0.0001828694515915831,
"loss": 0.655,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1690
},
{
"epoch": 2.1491782553729455,
"grad_norm": 0.8287073373794556,
"learning_rate": 0.00018264862792003367,
"loss": 0.6869,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1700
},
{
"epoch": 2.1618204804045513,
"grad_norm": 0.8839928507804871,
"learning_rate": 0.00018242659123209905,
"loss": 0.6807,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1710
},
{
"epoch": 2.1744627054361567,
"grad_norm": 0.9569761753082275,
"learning_rate": 0.0001822033466434708,
"loss": 0.6826,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1720
},
{
"epoch": 2.187104930467762,
"grad_norm": 1.1782281398773193,
"learning_rate": 0.00018197889929767036,
"loss": 0.6532,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1730
},
{
"epoch": 2.199747155499368,
"grad_norm": 4.368149280548096,
"learning_rate": 0.00018175325436593044,
"loss": 0.6681,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1740
},
{
"epoch": 2.2123893805309733,
"grad_norm": 0.9262805581092834,
"learning_rate": 0.00018152641704707593,
"loss": 0.6776,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1750
},
{
"epoch": 2.225031605562579,
"grad_norm": 4.026210784912109,
"learning_rate": 0.0001812983925674042,
"loss": 0.6965,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1760
},
{
"epoch": 2.2376738305941846,
"grad_norm": 0.9288873076438904,
"learning_rate": 0.00018106918618056463,
"loss": 0.7156,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1770
},
{
"epoch": 2.25031605562579,
"grad_norm": 0.9781466126441956,
"learning_rate": 0.00018083880316743757,
"loss": 0.6843,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1780
},
{
"epoch": 2.262958280657396,
"grad_norm": 0.8335726857185364,
"learning_rate": 0.00018060724883601248,
"loss": 0.6722,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1790
},
{
"epoch": 2.275600505689001,
"grad_norm": 0.8793342709541321,
"learning_rate": 0.00018037452852126613,
"loss": 0.649,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1800
},
{
"epoch": 2.288242730720607,
"grad_norm": 0.868864893913269,
"learning_rate": 0.00018014064758503908,
"loss": 0.6749,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1810
},
{
"epoch": 2.3008849557522124,
"grad_norm": 0.8861690759658813,
"learning_rate": 0.00017990561141591264,
"loss": 0.6893,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1820
},
{
"epoch": 2.313527180783818,
"grad_norm": 0.8054774403572083,
"learning_rate": 0.00017966942542908435,
"loss": 0.7254,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1830
},
{
"epoch": 2.3261694058154236,
"grad_norm": 0.9192434549331665,
"learning_rate": 0.0001794320950662435,
"loss": 0.7071,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1840
},
{
"epoch": 2.338811630847029,
"grad_norm": 1.0894279479980469,
"learning_rate": 0.0001791936257954456,
"loss": 0.6882,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1850
},
{
"epoch": 2.351453855878635,
"grad_norm": 0.976393461227417,
"learning_rate": 0.0001789540231109863,
"loss": 0.6996,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1860
},
{
"epoch": 2.3640960809102403,
"grad_norm": 1.0295621156692505,
"learning_rate": 0.0001787132925332751,
"loss": 0.7212,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1870
},
{
"epoch": 2.3767383059418457,
"grad_norm": 0.9011755585670471,
"learning_rate": 0.00017847143960870792,
"loss": 0.6803,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1880
},
{
"epoch": 2.3893805309734515,
"grad_norm": 0.9422768354415894,
"learning_rate": 0.00017822846990953942,
"loss": 0.7172,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1890
},
{
"epoch": 2.402022756005057,
"grad_norm": 0.976975679397583,
"learning_rate": 0.00017798438903375452,
"loss": 0.6627,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1900
},
{
"epoch": 2.4146649810366623,
"grad_norm": 0.8325662016868591,
"learning_rate": 0.00017773920260493942,
"loss": 0.6819,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1910
},
{
"epoch": 2.427307206068268,
"grad_norm": 0.9316614866256714,
"learning_rate": 0.00017749291627215224,
"loss": 0.6842,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1920
},
{
"epoch": 2.4399494310998735,
"grad_norm": 0.8595056533813477,
"learning_rate": 0.0001772455357097927,
"loss": 0.7084,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1930
},
{
"epoch": 2.452591656131479,
"grad_norm": 0.834000825881958,
"learning_rate": 0.00017699706661747125,
"loss": 0.6951,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1940
},
{
"epoch": 2.4652338811630847,
"grad_norm": 0.7746726274490356,
"learning_rate": 0.0001767475147198781,
"loss": 0.7076,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1950
},
{
"epoch": 2.47787610619469,
"grad_norm": 5.648841857910156,
"learning_rate": 0.00017649688576665094,
"loss": 0.6874,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1960
},
{
"epoch": 2.490518331226296,
"grad_norm": 0.8709747195243835,
"learning_rate": 0.00017624518553224295,
"loss": 0.7033,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1970
},
{
"epoch": 2.5031605562579013,
"grad_norm": 1.2027637958526611,
"learning_rate": 0.00017599241981578904,
"loss": 0.6945,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1980
},
{
"epoch": 2.5158027812895067,
"grad_norm": 0.866089403629303,
"learning_rate": 0.00017573859444097308,
"loss": 0.6611,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 1990
},
{
"epoch": 2.5284450063211126,
"grad_norm": 2.6877481937408447,
"learning_rate": 0.00017548371525589302,
"loss": 0.6922,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2000
},
{
"epoch": 2.541087231352718,
"grad_norm": 1.8271033763885498,
"learning_rate": 0.0001752277881329266,
"loss": 0.7011,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2010
},
{
"epoch": 2.553729456384324,
"grad_norm": 3.121169328689575,
"learning_rate": 0.0001749708189685958,
"loss": 0.7012,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2020
},
{
"epoch": 2.566371681415929,
"grad_norm": 1.5094399452209473,
"learning_rate": 0.00017471281368343114,
"loss": 0.6682,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2030
},
{
"epoch": 2.5790139064475346,
"grad_norm": 1.1823444366455078,
"learning_rate": 0.00017445377822183518,
"loss": 0.6828,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2040
},
{
"epoch": 2.5916561314791404,
"grad_norm": 2.187333106994629,
"learning_rate": 0.00017419371855194551,
"loss": 0.65,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2050
},
{
"epoch": 2.604298356510746,
"grad_norm": 1.0692399740219116,
"learning_rate": 0.00017393264066549753,
"loss": 0.6652,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2060
},
{
"epoch": 2.6169405815423517,
"grad_norm": 0.8324422240257263,
"learning_rate": 0.00017367055057768588,
"loss": 0.6999,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2070
},
{
"epoch": 2.629582806573957,
"grad_norm": 0.9880168437957764,
"learning_rate": 0.00017340745432702654,
"loss": 0.6859,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2080
},
{
"epoch": 2.6422250316055624,
"grad_norm": 2.551191568374634,
"learning_rate": 0.00017314335797521705,
"loss": 0.6948,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2090
},
{
"epoch": 2.6548672566371683,
"grad_norm": 0.9405047297477722,
"learning_rate": 0.0001728782676069972,
"loss": 0.6906,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2100
},
{
"epoch": 2.6675094816687737,
"grad_norm": 5.015996932983398,
"learning_rate": 0.00017261218933000878,
"loss": 0.6867,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2110
},
{
"epoch": 2.6801517067003795,
"grad_norm": 0.932569682598114,
"learning_rate": 0.00017234512927465488,
"loss": 0.7304,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2120
},
{
"epoch": 2.692793931731985,
"grad_norm": 1.071932315826416,
"learning_rate": 0.0001720770935939586,
"loss": 0.7261,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2130
},
{
"epoch": 2.7054361567635903,
"grad_norm": 0.8238343596458435,
"learning_rate": 0.00017180808846342118,
"loss": 0.7313,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2140
},
{
"epoch": 2.718078381795196,
"grad_norm": 1.3495972156524658,
"learning_rate": 0.0001715381200808801,
"loss": 0.7418,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2150
},
{
"epoch": 2.7307206068268015,
"grad_norm": 0.8959026336669922,
"learning_rate": 0.00017126719466636572,
"loss": 0.6729,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2160
},
{
"epoch": 2.7433628318584073,
"grad_norm": 0.8978679180145264,
"learning_rate": 0.0001709953184619585,
"loss": 0.7,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2170
},
{
"epoch": 2.7560050568900127,
"grad_norm": 1.033858060836792,
"learning_rate": 0.00017072249773164485,
"loss": 0.7142,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2180
},
{
"epoch": 2.768647281921618,
"grad_norm": 0.9381289482116699,
"learning_rate": 0.0001704487387611729,
"loss": 0.7362,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2190
},
{
"epoch": 2.7812895069532235,
"grad_norm": 1.0184166431427002,
"learning_rate": 0.00017017404785790773,
"loss": 0.7133,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2200
},
{
"epoch": 2.7939317319848294,
"grad_norm": 0.9085473418235779,
"learning_rate": 0.00016989843135068605,
"loss": 0.6982,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2210
},
{
"epoch": 2.8065739570164348,
"grad_norm": 0.8378614783287048,
"learning_rate": 0.00016962189558967022,
"loss": 0.6794,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2220
},
{
"epoch": 2.8192161820480406,
"grad_norm": 0.9050717949867249,
"learning_rate": 0.00016934444694620217,
"loss": 0.6967,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2230
},
{
"epoch": 2.831858407079646,
"grad_norm": 0.8742629289627075,
"learning_rate": 0.00016906609181265654,
"loss": 0.679,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2240
},
{
"epoch": 2.8445006321112514,
"grad_norm": 1.250222086906433,
"learning_rate": 0.0001687868366022932,
"loss": 0.6866,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2250
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.7830986380577087,
"learning_rate": 0.0001685066877491098,
"loss": 0.7064,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2260
},
{
"epoch": 2.8697850821744626,
"grad_norm": 0.837334394454956,
"learning_rate": 0.0001682256517076933,
"loss": 0.676,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2270
},
{
"epoch": 2.8824273072060684,
"grad_norm": 1.7227693796157837,
"learning_rate": 0.00016794373495307148,
"loss": 0.6901,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2280
},
{
"epoch": 2.895069532237674,
"grad_norm": 0.7620822191238403,
"learning_rate": 0.00016766094398056337,
"loss": 0.687,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2290
},
{
"epoch": 2.907711757269279,
"grad_norm": 0.8214982748031616,
"learning_rate": 0.00016737728530563013,
"loss": 0.7061,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2300
},
{
"epoch": 2.920353982300885,
"grad_norm": 0.9066684246063232,
"learning_rate": 0.00016709276546372448,
"loss": 0.7271,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2310
},
{
"epoch": 2.9329962073324904,
"grad_norm": 0.9356798529624939,
"learning_rate": 0.00016680739101014024,
"loss": 0.6965,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2320
},
{
"epoch": 2.9456384323640963,
"grad_norm": 0.8414567112922668,
"learning_rate": 0.0001665211685198616,
"loss": 0.6829,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2330
},
{
"epoch": 2.9582806573957017,
"grad_norm": 0.9581737518310547,
"learning_rate": 0.0001662341045874111,
"loss": 0.6781,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2340
},
{
"epoch": 2.970922882427307,
"grad_norm": 0.7672229409217834,
"learning_rate": 0.0001659462058266982,
"loss": 0.7107,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2350
},
{
"epoch": 2.983565107458913,
"grad_norm": 0.8876848816871643,
"learning_rate": 0.0001656574788708665,
"loss": 0.682,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2360
},
{
"epoch": 2.9962073324905183,
"grad_norm": 0.7291796207427979,
"learning_rate": 0.00016536793037214134,
"loss": 0.7012,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2370
},
{
"epoch": 3.0088495575221237,
"grad_norm": 1.178667426109314,
"learning_rate": 0.00016507756700167588,
"loss": 0.5861,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2380
},
{
"epoch": 3.0214917825537295,
"grad_norm": 1.044280767440796,
"learning_rate": 0.00016478639544939826,
"loss": 0.5248,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2390
},
{
"epoch": 3.034134007585335,
"grad_norm": 0.8499409556388855,
"learning_rate": 0.00016449442242385672,
"loss": 0.5314,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2400
},
{
"epoch": 3.0467762326169407,
"grad_norm": 0.8145996332168579,
"learning_rate": 0.00016420165465206535,
"loss": 0.5681,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2410
},
{
"epoch": 3.059418457648546,
"grad_norm": 0.8090763688087463,
"learning_rate": 0.00016390809887934914,
"loss": 0.4982,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2420
},
{
"epoch": 3.0720606826801515,
"grad_norm": 0.7884716391563416,
"learning_rate": 0.00016361376186918846,
"loss": 0.5338,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2430
},
{
"epoch": 3.0847029077117574,
"grad_norm": 1.035247564315796,
"learning_rate": 0.00016331865040306335,
"loss": 0.521,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2440
},
{
"epoch": 3.0973451327433628,
"grad_norm": 1.029201865196228,
"learning_rate": 0.00016302277128029706,
"loss": 0.5391,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2450
},
{
"epoch": 3.1099873577749686,
"grad_norm": 0.8100953102111816,
"learning_rate": 0.00016272613131789964,
"loss": 0.5141,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2460
},
{
"epoch": 3.122629582806574,
"grad_norm": 1.0345860719680786,
"learning_rate": 0.0001624287373504107,
"loss": 0.5576,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2470
},
{
"epoch": 3.1352718078381794,
"grad_norm": 0.9381860494613647,
"learning_rate": 0.00016213059622974214,
"loss": 0.5373,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2480
},
{
"epoch": 3.147914032869785,
"grad_norm": 0.8504341244697571,
"learning_rate": 0.00016183171482502003,
"loss": 0.5312,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2490
},
{
"epoch": 3.1605562579013906,
"grad_norm": 1.0047380924224854,
"learning_rate": 0.00016153210002242644,
"loss": 0.5515,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2500
},
{
"epoch": 3.173198482932996,
"grad_norm": 0.8505437970161438,
"learning_rate": 0.00016123175872504098,
"loss": 0.5257,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2510
},
{
"epoch": 3.185840707964602,
"grad_norm": 1.0271879434585571,
"learning_rate": 0.00016093069785268137,
"loss": 0.5785,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2520
},
{
"epoch": 3.1984829329962072,
"grad_norm": 1.0047165155410767,
"learning_rate": 0.00016062892434174443,
"loss": 0.5373,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2530
},
{
"epoch": 3.211125158027813,
"grad_norm": 0.9564666152000427,
"learning_rate": 0.00016032644514504604,
"loss": 0.5285,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2540
},
{
"epoch": 3.2237673830594185,
"grad_norm": 0.959581732749939,
"learning_rate": 0.00016002326723166084,
"loss": 0.5813,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2550
},
{
"epoch": 3.236409608091024,
"grad_norm": 1.3242567777633667,
"learning_rate": 0.00015971939758676186,
"loss": 0.5669,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2560
},
{
"epoch": 3.2490518331226297,
"grad_norm": 0.9959767460823059,
"learning_rate": 0.00015941484321145953,
"loss": 0.5766,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2570
},
{
"epoch": 3.261694058154235,
"grad_norm": 0.8573315739631653,
"learning_rate": 0.0001591096111226405,
"loss": 0.5421,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2580
},
{
"epoch": 3.274336283185841,
"grad_norm": 0.8555790781974792,
"learning_rate": 0.00015880370835280553,
"loss": 0.5606,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2590
},
{
"epoch": 3.2869785082174463,
"grad_norm": 1.0024107694625854,
"learning_rate": 0.00015849714194990803,
"loss": 0.5406,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2600
},
{
"epoch": 3.2996207332490517,
"grad_norm": 0.867758572101593,
"learning_rate": 0.00015818991897719134,
"loss": 0.5825,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2610
},
{
"epoch": 3.3122629582806575,
"grad_norm": 0.828178346157074,
"learning_rate": 0.00015788204651302602,
"loss": 0.5528,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2620
},
{
"epoch": 3.324905183312263,
"grad_norm": 0.9778569936752319,
"learning_rate": 0.00015757353165074685,
"loss": 0.5857,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2630
},
{
"epoch": 3.3375474083438688,
"grad_norm": 0.9606329798698425,
"learning_rate": 0.0001572643814984894,
"loss": 0.6056,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2640
},
{
"epoch": 3.350189633375474,
"grad_norm": 0.8577843308448792,
"learning_rate": 0.00015695460317902615,
"loss": 0.6096,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2650
},
{
"epoch": 3.3628318584070795,
"grad_norm": 0.8798738718032837,
"learning_rate": 0.00015664420382960256,
"loss": 0.5979,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2660
},
{
"epoch": 3.3754740834386854,
"grad_norm": 0.887492835521698,
"learning_rate": 0.00015633319060177233,
"loss": 0.5962,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2670
},
{
"epoch": 3.3881163084702908,
"grad_norm": 0.8709145784378052,
"learning_rate": 0.00015602157066123311,
"loss": 0.5647,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2680
},
{
"epoch": 3.400758533501896,
"grad_norm": 0.8413789868354797,
"learning_rate": 0.00015570935118766087,
"loss": 0.5846,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2690
},
{
"epoch": 3.413400758533502,
"grad_norm": 0.9737523198127747,
"learning_rate": 0.00015539653937454487,
"loss": 0.5963,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2700
},
{
"epoch": 3.4260429835651074,
"grad_norm": 1.0053389072418213,
"learning_rate": 0.00015508314242902173,
"loss": 0.599,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2710
},
{
"epoch": 3.438685208596713,
"grad_norm": 0.9921556115150452,
"learning_rate": 0.00015476916757170943,
"loss": 0.5698,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2720
},
{
"epoch": 3.4513274336283186,
"grad_norm": 0.9468759298324585,
"learning_rate": 0.00015445462203654098,
"loss": 0.5886,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2730
},
{
"epoch": 3.463969658659924,
"grad_norm": 0.9463483095169067,
"learning_rate": 0.0001541395130705977,
"loss": 0.5829,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2740
},
{
"epoch": 3.47661188369153,
"grad_norm": 0.9554671049118042,
"learning_rate": 0.00015382384793394223,
"loss": 0.6186,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2750
},
{
"epoch": 3.4892541087231352,
"grad_norm": 0.7925019860267639,
"learning_rate": 0.0001535076338994514,
"loss": 0.5796,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2760
},
{
"epoch": 3.5018963337547406,
"grad_norm": 0.92326819896698,
"learning_rate": 0.00015319087825264846,
"loss": 0.5647,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2770
},
{
"epoch": 3.5145385587863465,
"grad_norm": 0.9871057868003845,
"learning_rate": 0.0001528735882915354,
"loss": 0.5622,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2780
},
{
"epoch": 3.527180783817952,
"grad_norm": 0.9997586607933044,
"learning_rate": 0.00015255577132642468,
"loss": 0.629,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2790
},
{
"epoch": 3.5398230088495577,
"grad_norm": 0.8749852180480957,
"learning_rate": 0.00015223743467977088,
"loss": 0.5883,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2800
},
{
"epoch": 3.552465233881163,
"grad_norm": 0.8085633516311646,
"learning_rate": 0.00015191858568600194,
"loss": 0.5713,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2810
},
{
"epoch": 3.5651074589127685,
"grad_norm": 0.951021134853363,
"learning_rate": 0.00015159923169135025,
"loss": 0.5965,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2820
},
{
"epoch": 3.5777496839443743,
"grad_norm": 0.9590179324150085,
"learning_rate": 0.00015127938005368323,
"loss": 0.5678,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2830
},
{
"epoch": 3.5903919089759797,
"grad_norm": 0.9921982884407043,
"learning_rate": 0.0001509590381423341,
"loss": 0.6115,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2840
},
{
"epoch": 3.6030341340075855,
"grad_norm": 0.8661071062088013,
"learning_rate": 0.00015063821333793172,
"loss": 0.6495,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2850
},
{
"epoch": 3.615676359039191,
"grad_norm": 0.8504185080528259,
"learning_rate": 0.00015031691303223088,
"loss": 0.5922,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2860
},
{
"epoch": 3.6283185840707963,
"grad_norm": 0.8301743865013123,
"learning_rate": 0.00014999514462794175,
"loss": 0.6227,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2870
},
{
"epoch": 3.640960809102402,
"grad_norm": 0.8586485385894775,
"learning_rate": 0.0001496729155385595,
"loss": 0.5801,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2880
},
{
"epoch": 3.6536030341340076,
"grad_norm": 0.8772161602973938,
"learning_rate": 0.00014935023318819334,
"loss": 0.5712,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2890
},
{
"epoch": 3.6662452591656134,
"grad_norm": 0.8610823750495911,
"learning_rate": 0.00014902710501139556,
"loss": 0.6007,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2900
},
{
"epoch": 3.678887484197219,
"grad_norm": 0.8283450603485107,
"learning_rate": 0.0001487035384529903,
"loss": 0.5757,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2910
},
{
"epoch": 3.691529709228824,
"grad_norm": 0.9658201336860657,
"learning_rate": 0.00014837954096790182,
"loss": 0.5899,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2920
},
{
"epoch": 3.7041719342604296,
"grad_norm": 0.8245115280151367,
"learning_rate": 0.000148055120020983,
"loss": 0.6165,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2930
},
{
"epoch": 3.7168141592920354,
"grad_norm": 0.9648094177246094,
"learning_rate": 0.00014773028308684308,
"loss": 0.6212,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2940
},
{
"epoch": 3.7294563843236412,
"grad_norm": 0.8854801654815674,
"learning_rate": 0.00014740503764967572,
"loss": 0.5777,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2950
},
{
"epoch": 3.7420986093552466,
"grad_norm": 0.8945504426956177,
"learning_rate": 0.0001470793912030863,
"loss": 0.6091,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2960
},
{
"epoch": 3.754740834386852,
"grad_norm": 0.8189816474914551,
"learning_rate": 0.00014675335124991946,
"loss": 0.6035,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2970
},
{
"epoch": 3.7673830594184574,
"grad_norm": 0.990737795829773,
"learning_rate": 0.0001464269253020862,
"loss": 0.5983,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2980
},
{
"epoch": 3.7800252844500632,
"grad_norm": 0.8247061371803284,
"learning_rate": 0.00014610012088039077,
"loss": 0.6056,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 2990
},
{
"epoch": 3.7926675094816686,
"grad_norm": 0.8422549962997437,
"learning_rate": 0.00014577294551435728,
"loss": 0.6077,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3000
},
{
"epoch": 3.8053097345132745,
"grad_norm": 0.9468559622764587,
"learning_rate": 0.00014544540674205647,
"loss": 0.592,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3010
},
{
"epoch": 3.81795195954488,
"grad_norm": 0.8015314340591431,
"learning_rate": 0.0001451175121099319,
"loss": 0.5701,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3020
},
{
"epoch": 3.8305941845764853,
"grad_norm": 0.896016001701355,
"learning_rate": 0.00014478926917262607,
"loss": 0.5985,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3030
},
{
"epoch": 3.843236409608091,
"grad_norm": 0.965329110622406,
"learning_rate": 0.00014446068549280633,
"loss": 0.5693,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3040
},
{
"epoch": 3.8558786346396965,
"grad_norm": 1.032674789428711,
"learning_rate": 0.0001441317686409907,
"loss": 0.6207,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3050
},
{
"epoch": 3.8685208596713023,
"grad_norm": 1.4689821004867554,
"learning_rate": 0.00014380252619537355,
"loss": 0.6192,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3060
},
{
"epoch": 3.8811630847029077,
"grad_norm": 0.9344895482063293,
"learning_rate": 0.00014347296574165067,
"loss": 0.5951,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3070
},
{
"epoch": 3.893805309734513,
"grad_norm": 0.9095802903175354,
"learning_rate": 0.00014314309487284486,
"loss": 0.609,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3080
},
{
"epoch": 3.906447534766119,
"grad_norm": 0.9843701720237732,
"learning_rate": 0.00014281292118913084,
"loss": 0.6107,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3090
},
{
"epoch": 3.9190897597977243,
"grad_norm": 0.8768340349197388,
"learning_rate": 0.00014248245229766005,
"loss": 0.6268,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3100
},
{
"epoch": 3.93173198482933,
"grad_norm": 0.9411798715591431,
"learning_rate": 0.00014215169581238558,
"loss": 0.6191,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3110
},
{
"epoch": 3.9443742098609356,
"grad_norm": 0.8732224106788635,
"learning_rate": 0.0001418206593538865,
"loss": 0.614,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3120
},
{
"epoch": 3.957016434892541,
"grad_norm": 0.8646383285522461,
"learning_rate": 0.00014148935054919258,
"loss": 0.6135,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3130
},
{
"epoch": 3.969658659924147,
"grad_norm": 0.8871061205863953,
"learning_rate": 0.00014115777703160824,
"loss": 0.5987,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3140
},
{
"epoch": 3.982300884955752,
"grad_norm": 0.7898637652397156,
"learning_rate": 0.00014082594644053702,
"loss": 0.6069,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3150
},
{
"epoch": 3.994943109987358,
"grad_norm": 0.8474721908569336,
"learning_rate": 0.00014049386642130522,
"loss": 0.5762,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3160
},
{
"epoch": 4.007585335018963,
"grad_norm": 1.1347519159317017,
"learning_rate": 0.0001401615446249861,
"loss": 0.4878,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3170
},
{
"epoch": 4.020227560050569,
"grad_norm": 1.0003758668899536,
"learning_rate": 0.00013982898870822322,
"loss": 0.4266,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3180
},
{
"epoch": 4.032869785082174,
"grad_norm": 1.7353389263153076,
"learning_rate": 0.00013949620633305445,
"loss": 0.4278,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3190
},
{
"epoch": 4.04551201011378,
"grad_norm": 0.8438617587089539,
"learning_rate": 0.00013916320516673512,
"loss": 0.4255,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3200
},
{
"epoch": 4.058154235145386,
"grad_norm": 0.9081391096115112,
"learning_rate": 0.00013882999288156145,
"loss": 0.4332,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3210
},
{
"epoch": 4.070796460176991,
"grad_norm": 0.8712509274482727,
"learning_rate": 0.00013849657715469385,
"loss": 0.4263,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3220
},
{
"epoch": 4.083438685208597,
"grad_norm": 0.8926701545715332,
"learning_rate": 0.00013816296566798006,
"loss": 0.4265,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3230
},
{
"epoch": 4.096080910240202,
"grad_norm": 1.0100903511047363,
"learning_rate": 0.00013782916610777793,
"loss": 0.4601,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3240
},
{
"epoch": 4.108723135271807,
"grad_norm": 0.9108811616897583,
"learning_rate": 0.00013749518616477867,
"loss": 0.4426,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3250
},
{
"epoch": 4.121365360303414,
"grad_norm": 1.0556674003601074,
"learning_rate": 0.00013716103353382937,
"loss": 0.4641,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3260
},
{
"epoch": 4.134007585335019,
"grad_norm": 0.8797064423561096,
"learning_rate": 0.0001368267159137559,
"loss": 0.4522,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3270
},
{
"epoch": 4.1466498103666245,
"grad_norm": 0.9286285042762756,
"learning_rate": 0.0001364922410071853,
"loss": 0.4684,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3280
},
{
"epoch": 4.15929203539823,
"grad_norm": 0.9558693170547485,
"learning_rate": 0.00013615761652036872,
"loss": 0.4597,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3290
},
{
"epoch": 4.171934260429835,
"grad_norm": 0.8957265615463257,
"learning_rate": 0.00013582285016300338,
"loss": 0.5033,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3300
},
{
"epoch": 4.184576485461442,
"grad_norm": 0.8720874786376953,
"learning_rate": 0.00013548794964805531,
"loss": 0.4636,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3310
},
{
"epoch": 4.197218710493047,
"grad_norm": 0.9207468628883362,
"learning_rate": 0.0001351529226915815,
"loss": 0.4555,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3320
},
{
"epoch": 4.209860935524652,
"grad_norm": 0.8886120319366455,
"learning_rate": 0.000134817777012552,
"loss": 0.4391,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3330
},
{
"epoch": 4.222503160556258,
"grad_norm": 0.9986599087715149,
"learning_rate": 0.00013448252033267246,
"loss": 0.4848,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3340
},
{
"epoch": 4.235145385587863,
"grad_norm": 3.081392288208008,
"learning_rate": 0.0001341471603762057,
"loss": 0.5096,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3350
},
{
"epoch": 4.247787610619469,
"grad_norm": 1.0110422372817993,
"learning_rate": 0.00013381170486979427,
"loss": 0.4758,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3360
},
{
"epoch": 4.260429835651075,
"grad_norm": 0.9332578182220459,
"learning_rate": 0.00013347616154228193,
"loss": 0.4607,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3370
},
{
"epoch": 4.27307206068268,
"grad_norm": 1.1386651992797852,
"learning_rate": 0.00013314053812453605,
"loss": 0.4882,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3380
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.8812234401702881,
"learning_rate": 0.0001328048423492691,
"loss": 0.454,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3390
},
{
"epoch": 4.298356510745891,
"grad_norm": 0.9429104328155518,
"learning_rate": 0.00013246908195086072,
"loss": 0.4724,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3400
},
{
"epoch": 4.310998735777497,
"grad_norm": 0.9410486817359924,
"learning_rate": 0.0001321332646651795,
"loss": 0.4516,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3410
},
{
"epoch": 4.323640960809103,
"grad_norm": 0.9896162748336792,
"learning_rate": 0.00013179739822940454,
"loss": 0.4949,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3420
},
{
"epoch": 4.336283185840708,
"grad_norm": 0.9165130853652954,
"learning_rate": 0.00013146149038184768,
"loss": 0.487,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3430
},
{
"epoch": 4.348925410872313,
"grad_norm": 2.110687494277954,
"learning_rate": 0.00013112554886177447,
"loss": 0.5062,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3440
},
{
"epoch": 4.361567635903919,
"grad_norm": 0.8859379887580872,
"learning_rate": 0.0001307895814092266,
"loss": 0.4587,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3450
},
{
"epoch": 4.374209860935524,
"grad_norm": 1.0231775045394897,
"learning_rate": 0.00013045359576484305,
"loss": 0.5083,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3460
},
{
"epoch": 4.3868520859671305,
"grad_norm": 1.0273702144622803,
"learning_rate": 0.00013011759966968204,
"loss": 0.4849,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3470
},
{
"epoch": 4.399494310998736,
"grad_norm": 0.9449805617332458,
"learning_rate": 0.0001297816008650425,
"loss": 0.493,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3480
},
{
"epoch": 4.412136536030341,
"grad_norm": 0.8178017735481262,
"learning_rate": 0.00012944560709228587,
"loss": 0.464,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3490
},
{
"epoch": 4.424778761061947,
"grad_norm": 1.0193867683410645,
"learning_rate": 0.00012910962609265754,
"loss": 0.4721,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3500
},
{
"epoch": 4.437420986093552,
"grad_norm": 1.1380479335784912,
"learning_rate": 0.00012877366560710868,
"loss": 0.4589,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3510
},
{
"epoch": 4.450063211125158,
"grad_norm": 0.8772681951522827,
"learning_rate": 0.00012843773337611788,
"loss": 0.4642,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3520
},
{
"epoch": 4.462705436156764,
"grad_norm": 0.9058607220649719,
"learning_rate": 0.00012810183713951264,
"loss": 0.5033,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3530
},
{
"epoch": 4.475347661188369,
"grad_norm": 0.938266932964325,
"learning_rate": 0.00012776598463629118,
"loss": 0.5098,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3540
},
{
"epoch": 4.4879898862199745,
"grad_norm": 1.0325732231140137,
"learning_rate": 0.00012743018360444422,
"loss": 0.4833,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3550
},
{
"epoch": 4.50063211125158,
"grad_norm": 0.8300301432609558,
"learning_rate": 0.0001270944417807763,
"loss": 0.4815,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3560
},
{
"epoch": 4.513274336283186,
"grad_norm": 0.941461443901062,
"learning_rate": 0.00012675876690072823,
"loss": 0.4942,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3570
},
{
"epoch": 4.525916561314792,
"grad_norm": 0.8629696369171143,
"learning_rate": 0.00012642316669819812,
"loss": 0.5091,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3580
},
{
"epoch": 4.538558786346397,
"grad_norm": 0.9793810844421387,
"learning_rate": 0.0001260876489053636,
"loss": 0.52,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3590
},
{
"epoch": 4.551201011378002,
"grad_norm": 0.9196791052818298,
"learning_rate": 0.00012575222125250365,
"loss": 0.4884,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3600
},
{
"epoch": 4.563843236409608,
"grad_norm": 1.0433666706085205,
"learning_rate": 0.00012541689146782048,
"loss": 0.5041,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3610
},
{
"epoch": 4.576485461441214,
"grad_norm": 1.0952868461608887,
"learning_rate": 0.00012508166727726128,
"loss": 0.5117,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3620
},
{
"epoch": 4.589127686472819,
"grad_norm": 1.039157748222351,
"learning_rate": 0.00012474655640434042,
"loss": 0.5028,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3630
},
{
"epoch": 4.601769911504425,
"grad_norm": 1.044838786125183,
"learning_rate": 0.00012441156656996155,
"loss": 0.4941,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3640
},
{
"epoch": 4.61441213653603,
"grad_norm": 1.0558874607086182,
"learning_rate": 0.00012407670549223953,
"loss": 0.516,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3650
},
{
"epoch": 4.627054361567636,
"grad_norm": 0.9311762452125549,
"learning_rate": 0.0001237419808863227,
"loss": 0.4933,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3660
},
{
"epoch": 4.639696586599241,
"grad_norm": 1.0576010942459106,
"learning_rate": 0.00012340740046421506,
"loss": 0.5119,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3670
},
{
"epoch": 4.652338811630847,
"grad_norm": 0.9502875208854675,
"learning_rate": 0.0001230729719345987,
"loss": 0.4875,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3680
},
{
"epoch": 4.664981036662453,
"grad_norm": 0.9513876438140869,
"learning_rate": 0.00012273870300265612,
"loss": 0.4836,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3690
},
{
"epoch": 4.677623261694058,
"grad_norm": 1.0516324043273926,
"learning_rate": 0.00012240460136989274,
"loss": 0.5168,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3700
},
{
"epoch": 4.6902654867256635,
"grad_norm": 0.9066925644874573,
"learning_rate": 0.00012207067473395935,
"loss": 0.4947,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3710
},
{
"epoch": 4.70290771175727,
"grad_norm": 0.9543781876564026,
"learning_rate": 0.00012173693078847487,
"loss": 0.5155,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3720
},
{
"epoch": 4.715549936788875,
"grad_norm": 0.9955562949180603,
"learning_rate": 0.00012140337722284914,
"loss": 0.5302,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3730
},
{
"epoch": 4.7281921618204805,
"grad_norm": 4.362971305847168,
"learning_rate": 0.00012107002172210559,
"loss": 0.5438,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3740
},
{
"epoch": 4.740834386852086,
"grad_norm": 1.0576658248901367,
"learning_rate": 0.00012073687196670429,
"loss": 0.536,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3750
},
{
"epoch": 4.753476611883691,
"grad_norm": 0.946419894695282,
"learning_rate": 0.00012040393563236494,
"loss": 0.5253,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3760
},
{
"epoch": 4.766118836915297,
"grad_norm": 0.9340927004814148,
"learning_rate": 0.00012007122038989012,
"loss": 0.5117,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3770
},
{
"epoch": 4.778761061946903,
"grad_norm": 0.9391945600509644,
"learning_rate": 0.00011973873390498841,
"loss": 0.5132,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3780
},
{
"epoch": 4.791403286978508,
"grad_norm": 0.9951459169387817,
"learning_rate": 0.00011940648383809794,
"loss": 0.5356,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3790
},
{
"epoch": 4.804045512010114,
"grad_norm": 1.0087045431137085,
"learning_rate": 0.00011907447784420974,
"loss": 0.4949,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3800
},
{
"epoch": 4.816687737041719,
"grad_norm": 1.0418733358383179,
"learning_rate": 0.00011874272357269138,
"loss": 0.5044,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3810
},
{
"epoch": 4.8293299620733245,
"grad_norm": 0.9647939801216125,
"learning_rate": 0.0001184112286671109,
"loss": 0.519,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3820
},
{
"epoch": 4.841972187104931,
"grad_norm": 0.9896367788314819,
"learning_rate": 0.00011808000076506056,
"loss": 0.5376,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3830
},
{
"epoch": 4.854614412136536,
"grad_norm": 1.1160699129104614,
"learning_rate": 0.00011774904749798086,
"loss": 0.4941,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3840
},
{
"epoch": 4.867256637168142,
"grad_norm": 0.9226526021957397,
"learning_rate": 0.00011741837649098477,
"loss": 0.5044,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3850
},
{
"epoch": 4.879898862199747,
"grad_norm": 0.959432065486908,
"learning_rate": 0.00011708799536268202,
"loss": 0.5051,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3860
},
{
"epoch": 4.892541087231352,
"grad_norm": 0.8908069729804993,
"learning_rate": 0.0001167579117250036,
"loss": 0.5226,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3870
},
{
"epoch": 4.905183312262958,
"grad_norm": 0.8914538025856018,
"learning_rate": 0.00011642813318302639,
"loss": 0.4971,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3880
},
{
"epoch": 4.917825537294564,
"grad_norm": 0.940838098526001,
"learning_rate": 0.00011609866733479784,
"loss": 0.5349,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3890
},
{
"epoch": 4.9304677623261695,
"grad_norm": 0.9459583759307861,
"learning_rate": 0.00011576952177116095,
"loss": 0.5137,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3900
},
{
"epoch": 4.943109987357775,
"grad_norm": 0.988993227481842,
"learning_rate": 0.00011544070407557961,
"loss": 0.5061,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3910
},
{
"epoch": 4.95575221238938,
"grad_norm": 0.8528466820716858,
"learning_rate": 0.00011511222182396349,
"loss": 0.4997,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3920
},
{
"epoch": 4.9683944374209865,
"grad_norm": 0.9346151351928711,
"learning_rate": 0.00011478408258449373,
"loss": 0.5347,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3930
},
{
"epoch": 4.981036662452592,
"grad_norm": 0.9937970638275146,
"learning_rate": 0.00011445629391744854,
"loss": 0.5138,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3940
},
{
"epoch": 4.993678887484197,
"grad_norm": 1.021466612815857,
"learning_rate": 0.00011412886337502894,
"loss": 0.4953,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3950
},
{
"epoch": 5.006321112515803,
"grad_norm": 0.8485009074211121,
"learning_rate": 0.00011380179850118495,
"loss": 0.4504,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3960
},
{
"epoch": 5.018963337547408,
"grad_norm": 0.9451215267181396,
"learning_rate": 0.00011347510683144151,
"loss": 0.3505,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3970
},
{
"epoch": 5.0316055625790135,
"grad_norm": 0.9910890460014343,
"learning_rate": 0.00011314879589272505,
"loss": 0.3889,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3980
},
{
"epoch": 5.04424778761062,
"grad_norm": 1.070092797279358,
"learning_rate": 0.00011282287320318996,
"loss": 0.3514,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 3990
},
{
"epoch": 5.056890012642225,
"grad_norm": 0.9985383749008179,
"learning_rate": 0.0001124973462720455,
"loss": 0.3563,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4000
},
{
"epoch": 5.0695322376738305,
"grad_norm": 0.8897594213485718,
"learning_rate": 0.00011217222259938272,
"loss": 0.3402,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4010
},
{
"epoch": 5.082174462705436,
"grad_norm": 0.981590211391449,
"learning_rate": 0.00011184750967600157,
"loss": 0.4163,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4020
},
{
"epoch": 5.094816687737041,
"grad_norm": 0.8742545247077942,
"learning_rate": 0.00011152321498323846,
"loss": 0.3477,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4030
},
{
"epoch": 5.107458912768648,
"grad_norm": 0.9774489402770996,
"learning_rate": 0.0001111993459927938,
"loss": 0.3722,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4040
},
{
"epoch": 5.120101137800253,
"grad_norm": 0.9024301171302795,
"learning_rate": 0.00011087591016656001,
"loss": 0.3531,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4050
},
{
"epoch": 5.132743362831858,
"grad_norm": 0.9952253103256226,
"learning_rate": 0.00011055291495644926,
"loss": 0.3762,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4060
},
{
"epoch": 5.145385587863464,
"grad_norm": 0.9904897809028625,
"learning_rate": 0.00011023036780422212,
"loss": 0.4032,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4070
},
{
"epoch": 5.158027812895069,
"grad_norm": 0.9370035529136658,
"learning_rate": 0.00010990827614131594,
"loss": 0.3717,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4080
},
{
"epoch": 5.1706700379266755,
"grad_norm": 1.055816650390625,
"learning_rate": 0.00010958664738867372,
"loss": 0.3958,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4090
},
{
"epoch": 5.183312262958281,
"grad_norm": 1.0066580772399902,
"learning_rate": 0.00010926548895657303,
"loss": 0.3793,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4100
},
{
"epoch": 5.195954487989886,
"grad_norm": 1.0231560468673706,
"learning_rate": 0.00010894480824445532,
"loss": 0.3813,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4110
},
{
"epoch": 5.208596713021492,
"grad_norm": 0.9747928977012634,
"learning_rate": 0.00010862461264075542,
"loss": 0.3594,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4120
},
{
"epoch": 5.221238938053097,
"grad_norm": 1.0806195735931396,
"learning_rate": 0.00010830490952273145,
"loss": 0.3956,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4130
},
{
"epoch": 5.233881163084703,
"grad_norm": 1.0321904420852661,
"learning_rate": 0.00010798570625629461,
"loss": 0.3585,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4140
},
{
"epoch": 5.246523388116309,
"grad_norm": 1.2540595531463623,
"learning_rate": 0.00010766701019583967,
"loss": 0.391,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4150
},
{
"epoch": 5.259165613147914,
"grad_norm": 1.035423994064331,
"learning_rate": 0.00010734882868407537,
"loss": 0.4028,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4160
},
{
"epoch": 5.2718078381795195,
"grad_norm": 1.2022385597229004,
"learning_rate": 0.00010703116905185541,
"loss": 0.3841,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4170
},
{
"epoch": 5.284450063211125,
"grad_norm": 1.045843482017517,
"learning_rate": 0.00010671403861800946,
"loss": 0.3939,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4180
},
{
"epoch": 5.29709228824273,
"grad_norm": 0.9559326767921448,
"learning_rate": 0.00010639744468917447,
"loss": 0.3801,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4190
},
{
"epoch": 5.3097345132743365,
"grad_norm": 1.033033847808838,
"learning_rate": 0.0001060813945596265,
"loss": 0.3846,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4200
},
{
"epoch": 5.322376738305942,
"grad_norm": 0.9737249612808228,
"learning_rate": 0.00010576589551111242,
"loss": 0.39,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4210
},
{
"epoch": 5.335018963337547,
"grad_norm": 1.0500105619430542,
"learning_rate": 0.00010545095481268241,
"loss": 0.3713,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4220
},
{
"epoch": 5.347661188369153,
"grad_norm": 1.1261670589447021,
"learning_rate": 0.00010513657972052228,
"loss": 0.4112,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4230
},
{
"epoch": 5.360303413400759,
"grad_norm": 0.9046671390533447,
"learning_rate": 0.0001048227774777864,
"loss": 0.3963,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4240
},
{
"epoch": 5.372945638432364,
"grad_norm": 1.0187987089157104,
"learning_rate": 0.00010450955531443067,
"loss": 0.3954,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4250
},
{
"epoch": 5.38558786346397,
"grad_norm": 0.9995326399803162,
"learning_rate": 0.00010419692044704624,
"loss": 0.3996,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4260
},
{
"epoch": 5.398230088495575,
"grad_norm": 0.9701279997825623,
"learning_rate": 0.00010388488007869282,
"loss": 0.3805,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4270
},
{
"epoch": 5.410872313527181,
"grad_norm": 0.9126356840133667,
"learning_rate": 0.00010357344139873315,
"loss": 0.3862,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4280
},
{
"epoch": 5.423514538558786,
"grad_norm": 0.9048483371734619,
"learning_rate": 0.00010326261158266701,
"loss": 0.3767,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4290
},
{
"epoch": 5.436156763590392,
"grad_norm": 0.9570040702819824,
"learning_rate": 0.0001029523977919662,
"loss": 0.3875,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4300
},
{
"epoch": 5.448798988621998,
"grad_norm": 1.0698267221450806,
"learning_rate": 0.00010264280717390927,
"loss": 0.4159,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4310
},
{
"epoch": 5.461441213653603,
"grad_norm": 1.03220796585083,
"learning_rate": 0.00010233384686141701,
"loss": 0.4062,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4320
},
{
"epoch": 5.474083438685208,
"grad_norm": 0.9866275787353516,
"learning_rate": 0.00010202552397288805,
"loss": 0.4064,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4330
},
{
"epoch": 5.486725663716814,
"grad_norm": 0.9090940356254578,
"learning_rate": 0.00010171784561203485,
"loss": 0.4178,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4340
},
{
"epoch": 5.49936788874842,
"grad_norm": 1.0094218254089355,
"learning_rate": 0.00010141081886772013,
"loss": 0.4046,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4350
},
{
"epoch": 5.5120101137800255,
"grad_norm": 0.9741319417953491,
"learning_rate": 0.00010110445081379343,
"loss": 0.3957,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4360
},
{
"epoch": 5.524652338811631,
"grad_norm": 1.186471700668335,
"learning_rate": 0.00010079874850892808,
"loss": 0.4112,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4370
},
{
"epoch": 5.537294563843236,
"grad_norm": 1.0046883821487427,
"learning_rate": 0.00010049371899645874,
"loss": 0.3976,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4380
},
{
"epoch": 5.549936788874842,
"grad_norm": 2.301224946975708,
"learning_rate": 0.00010018936930421907,
"loss": 0.4381,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4390
},
{
"epoch": 5.562579013906447,
"grad_norm": 1.1555812358856201,
"learning_rate": 9.988570644437969e-05,
"loss": 0.4139,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4400
},
{
"epoch": 5.575221238938053,
"grad_norm": 1.0925663709640503,
"learning_rate": 9.958273741328672e-05,
"loss": 0.376,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4410
},
{
"epoch": 5.587863463969659,
"grad_norm": 1.0395334959030151,
"learning_rate": 9.928046919130056e-05,
"loss": 0.4696,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4420
},
{
"epoch": 5.600505689001264,
"grad_norm": 1.0506666898727417,
"learning_rate": 9.897890874263518e-05,
"loss": 0.4165,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4430
},
{
"epoch": 5.6131479140328695,
"grad_norm": 0.9786500930786133,
"learning_rate": 9.867806301519742e-05,
"loss": 0.3949,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4440
},
{
"epoch": 5.625790139064476,
"grad_norm": 1.0455806255340576,
"learning_rate": 9.837793894042716e-05,
"loss": 0.3976,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4450
},
{
"epoch": 5.638432364096081,
"grad_norm": 0.9991239905357361,
"learning_rate": 9.807854343313739e-05,
"loss": 0.3862,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4460
},
{
"epoch": 5.651074589127687,
"grad_norm": 1.0253965854644775,
"learning_rate": 9.777988339135517e-05,
"loss": 0.3859,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4470
},
{
"epoch": 5.663716814159292,
"grad_norm": 0.9867163300514221,
"learning_rate": 9.748196569616245e-05,
"loss": 0.401,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4480
},
{
"epoch": 5.676359039190897,
"grad_norm": 0.9973002672195435,
"learning_rate": 9.718479721153764e-05,
"loss": 0.4055,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4490
},
{
"epoch": 5.689001264222503,
"grad_norm": 1.03886079788208,
"learning_rate": 9.688838478419746e-05,
"loss": 0.4031,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4500
},
{
"epoch": 5.701643489254109,
"grad_norm": 1.1662676334381104,
"learning_rate": 9.659273524343917e-05,
"loss": 0.3998,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4510
},
{
"epoch": 5.714285714285714,
"grad_norm": 0.9785062670707703,
"learning_rate": 9.629785540098329e-05,
"loss": 0.3925,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4520
},
{
"epoch": 5.72692793931732,
"grad_norm": 1.0249117612838745,
"learning_rate": 9.600375205081654e-05,
"loss": 0.4195,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4530
},
{
"epoch": 5.739570164348925,
"grad_norm": 1.0373821258544922,
"learning_rate": 9.571043196903541e-05,
"loss": 0.4197,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4540
},
{
"epoch": 5.752212389380531,
"grad_norm": 0.9370099306106567,
"learning_rate": 9.541790191368998e-05,
"loss": 0.39,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4550
},
{
"epoch": 5.764854614412137,
"grad_norm": 1.0252115726470947,
"learning_rate": 9.512616862462831e-05,
"loss": 0.408,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4560
},
{
"epoch": 5.777496839443742,
"grad_norm": 1.033614158630371,
"learning_rate": 9.483523882334102e-05,
"loss": 0.4194,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4570
},
{
"epoch": 5.790139064475348,
"grad_norm": 1.1127879619598389,
"learning_rate": 9.454511921280651e-05,
"loss": 0.4098,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4580
},
{
"epoch": 5.802781289506953,
"grad_norm": 0.9151955246925354,
"learning_rate": 9.425581647733652e-05,
"loss": 0.4202,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4590
},
{
"epoch": 5.815423514538558,
"grad_norm": 1.0775083303451538,
"learning_rate": 9.396733728242207e-05,
"loss": 0.4181,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4600
},
{
"epoch": 5.828065739570165,
"grad_norm": 0.9415781497955322,
"learning_rate": 9.367968827458003e-05,
"loss": 0.4538,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4610
},
{
"epoch": 5.84070796460177,
"grad_norm": 0.9953785538673401,
"learning_rate": 9.339287608119976e-05,
"loss": 0.4121,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4620
},
{
"epoch": 5.8533501896333755,
"grad_norm": 1.0544629096984863,
"learning_rate": 9.310690731039065e-05,
"loss": 0.4025,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4630
},
{
"epoch": 5.865992414664981,
"grad_norm": 0.9968181848526001,
"learning_rate": 9.282178855082963e-05,
"loss": 0.4179,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4640
},
{
"epoch": 5.878634639696586,
"grad_norm": 0.9884583353996277,
"learning_rate": 9.253752637160965e-05,
"loss": 0.4345,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4650
},
{
"epoch": 5.891276864728193,
"grad_norm": 1.0549771785736084,
"learning_rate": 9.225412732208815e-05,
"loss": 0.4171,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4660
},
{
"epoch": 5.903919089759798,
"grad_norm": 1.058349847793579,
"learning_rate": 9.19715979317361e-05,
"loss": 0.3954,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4670
},
{
"epoch": 5.916561314791403,
"grad_norm": 0.959523618221283,
"learning_rate": 9.168994470998771e-05,
"loss": 0.4078,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4680
},
{
"epoch": 5.929203539823009,
"grad_norm": 1.0451573133468628,
"learning_rate": 9.140917414609043e-05,
"loss": 0.4477,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4690
},
{
"epoch": 5.941845764854614,
"grad_norm": 1.0435268878936768,
"learning_rate": 9.112929270895536e-05,
"loss": 0.3955,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4700
},
{
"epoch": 5.9544879898862195,
"grad_norm": 1.001197338104248,
"learning_rate": 9.085030684700828e-05,
"loss": 0.4086,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4710
},
{
"epoch": 5.967130214917826,
"grad_norm": 1.0496070384979248,
"learning_rate": 9.057222298804104e-05,
"loss": 0.4342,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4720
},
{
"epoch": 5.979772439949431,
"grad_norm": 0.955414891242981,
"learning_rate": 9.029504753906348e-05,
"loss": 0.4041,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4730
},
{
"epoch": 5.992414664981037,
"grad_norm": 3.551063060760498,
"learning_rate": 9.001878688615582e-05,
"loss": 0.4304,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4740
},
{
"epoch": 6.005056890012642,
"grad_norm": 0.8560709953308105,
"learning_rate": 8.974344739432153e-05,
"loss": 0.3485,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4750
},
{
"epoch": 6.017699115044247,
"grad_norm": 1.048302412033081,
"learning_rate": 8.946903540734064e-05,
"loss": 0.2697,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4760
},
{
"epoch": 6.030341340075854,
"grad_norm": 1.1160005331039429,
"learning_rate": 8.919555724762359e-05,
"loss": 0.2732,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4770
},
{
"epoch": 6.042983565107459,
"grad_norm": 0.9840885400772095,
"learning_rate": 8.892301921606567e-05,
"loss": 0.2855,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4780
},
{
"epoch": 6.055625790139064,
"grad_norm": 0.9168655872344971,
"learning_rate": 8.865142759190168e-05,
"loss": 0.2657,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4790
},
{
"epoch": 6.06826801517067,
"grad_norm": 0.9473972916603088,
"learning_rate": 8.838078863256136e-05,
"loss": 0.2808,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4800
},
{
"epoch": 6.080910240202275,
"grad_norm": 1.079185962677002,
"learning_rate": 8.811110857352518e-05,
"loss": 0.2815,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4810
},
{
"epoch": 6.0935524652338815,
"grad_norm": 1.0252193212509155,
"learning_rate": 8.784239362818074e-05,
"loss": 0.2981,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4820
},
{
"epoch": 6.106194690265487,
"grad_norm": 0.9863188862800598,
"learning_rate": 8.757464998767951e-05,
"loss": 0.2817,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4830
},
{
"epoch": 6.118836915297092,
"grad_norm": 0.9947652220726013,
"learning_rate": 8.730788382079432e-05,
"loss": 0.2946,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4840
},
{
"epoch": 6.131479140328698,
"grad_norm": 1.0014346837997437,
"learning_rate": 8.704210127377708e-05,
"loss": 0.2902,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4850
},
{
"epoch": 6.144121365360303,
"grad_norm": 1.0144473314285278,
"learning_rate": 8.677730847021724e-05,
"loss": 0.2828,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4860
},
{
"epoch": 6.156763590391909,
"grad_norm": 1.0776128768920898,
"learning_rate": 8.651351151090082e-05,
"loss": 0.306,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4870
},
{
"epoch": 6.169405815423515,
"grad_norm": 1.13133704662323,
"learning_rate": 8.625071647366963e-05,
"loss": 0.2842,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4880
},
{
"epoch": 6.18204804045512,
"grad_norm": 1.0843030214309692,
"learning_rate": 8.598892941328137e-05,
"loss": 0.2938,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4890
},
{
"epoch": 6.1946902654867255,
"grad_norm": 1.0806282758712769,
"learning_rate": 8.572815636127013e-05,
"loss": 0.3009,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4900
},
{
"epoch": 6.207332490518331,
"grad_norm": 1.2078369855880737,
"learning_rate": 8.54684033258074e-05,
"loss": 0.3298,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4910
},
{
"epoch": 6.219974715549937,
"grad_norm": 1.0101124048233032,
"learning_rate": 8.520967629156365e-05,
"loss": 0.2938,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4920
},
{
"epoch": 6.232616940581543,
"grad_norm": 1.0761367082595825,
"learning_rate": 8.495198121957043e-05,
"loss": 0.3062,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4930
},
{
"epoch": 6.245259165613148,
"grad_norm": 1.1186556816101074,
"learning_rate": 8.469532404708298e-05,
"loss": 0.3024,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4940
},
{
"epoch": 6.257901390644753,
"grad_norm": 1.0951234102249146,
"learning_rate": 8.443971068744362e-05,
"loss": 0.2902,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4950
},
{
"epoch": 6.270543615676359,
"grad_norm": 0.9902530908584595,
"learning_rate": 8.418514702994525e-05,
"loss": 0.296,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4960
},
{
"epoch": 6.283185840707965,
"grad_norm": 1.1143983602523804,
"learning_rate": 8.393163893969586e-05,
"loss": 0.3114,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4970
},
{
"epoch": 6.29582806573957,
"grad_norm": 1.0336135625839233,
"learning_rate": 8.367919225748333e-05,
"loss": 0.3308,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4980
},
{
"epoch": 6.308470290771176,
"grad_norm": 0.9870953559875488,
"learning_rate": 8.34278127996408e-05,
"loss": 0.2956,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 4990
},
{
"epoch": 6.321112515802781,
"grad_norm": 1.0120779275894165,
"learning_rate": 8.317750635791284e-05,
"loss": 0.313,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5000
},
{
"epoch": 6.333754740834387,
"grad_norm": 0.9608586430549622,
"learning_rate": 8.292827869932179e-05,
"loss": 0.3005,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5010
},
{
"epoch": 6.346396965865992,
"grad_norm": 1.1399952173233032,
"learning_rate": 8.268013556603504e-05,
"loss": 0.302,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5020
},
{
"epoch": 6.359039190897598,
"grad_norm": 1.1939678192138672,
"learning_rate": 8.243308267523261e-05,
"loss": 0.3214,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5030
},
{
"epoch": 6.371681415929204,
"grad_norm": 1.0933220386505127,
"learning_rate": 8.218712571897564e-05,
"loss": 0.3145,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5040
},
{
"epoch": 6.384323640960809,
"grad_norm": 1.6601200103759766,
"learning_rate": 8.194227036407498e-05,
"loss": 0.3069,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5050
},
{
"epoch": 6.3969658659924145,
"grad_norm": 1.105997920036316,
"learning_rate": 8.169852225196077e-05,
"loss": 0.2998,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5060
},
{
"epoch": 6.40960809102402,
"grad_norm": 1.0879909992218018,
"learning_rate": 8.145588699855247e-05,
"loss": 0.3087,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5070
},
{
"epoch": 6.422250316055626,
"grad_norm": 1.011335015296936,
"learning_rate": 8.121437019412947e-05,
"loss": 0.2982,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5080
},
{
"epoch": 6.4348925410872315,
"grad_norm": 1.2018229961395264,
"learning_rate": 8.09739774032022e-05,
"loss": 0.3272,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5090
},
{
"epoch": 6.447534766118837,
"grad_norm": 1.0991839170455933,
"learning_rate": 8.073471416438405e-05,
"loss": 0.3434,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5100
},
{
"epoch": 6.460176991150442,
"grad_norm": 1.228576898574829,
"learning_rate": 8.049658599026369e-05,
"loss": 0.3113,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5110
},
{
"epoch": 6.472819216182048,
"grad_norm": 1.0694067478179932,
"learning_rate": 8.0259598367278e-05,
"loss": 0.3114,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5120
},
{
"epoch": 6.485461441213654,
"grad_norm": 1.0272830724716187,
"learning_rate": 8.002375675558586e-05,
"loss": 0.3103,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5130
},
{
"epoch": 6.498103666245259,
"grad_norm": 0.974769115447998,
"learning_rate": 7.978906658894213e-05,
"loss": 0.3093,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5140
},
{
"epoch": 6.510745891276865,
"grad_norm": 1.1441291570663452,
"learning_rate": 7.955553327457256e-05,
"loss": 0.3317,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5150
},
{
"epoch": 6.52338811630847,
"grad_norm": 1.0339381694793701,
"learning_rate": 7.932316219304925e-05,
"loss": 0.2997,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5160
},
{
"epoch": 6.5360303413400755,
"grad_norm": 1.0404632091522217,
"learning_rate": 7.90919586981666e-05,
"loss": 0.3095,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5170
},
{
"epoch": 6.548672566371682,
"grad_norm": 1.1902042627334595,
"learning_rate": 7.886192811681793e-05,
"loss": 0.2978,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5180
},
{
"epoch": 6.561314791403287,
"grad_norm": 1.089690089225769,
"learning_rate": 7.863307574887296e-05,
"loss": 0.3103,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5190
},
{
"epoch": 6.573957016434893,
"grad_norm": 1.1589289903640747,
"learning_rate": 7.840540686705539e-05,
"loss": 0.3425,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5200
},
{
"epoch": 6.586599241466498,
"grad_norm": 1.0016796588897705,
"learning_rate": 7.817892671682173e-05,
"loss": 0.3004,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5210
},
{
"epoch": 6.599241466498103,
"grad_norm": 1.1263011693954468,
"learning_rate": 7.795364051624015e-05,
"loss": 0.3124,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5220
},
{
"epoch": 6.611883691529709,
"grad_norm": 1.1125059127807617,
"learning_rate": 7.77295534558705e-05,
"loss": 0.331,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5230
},
{
"epoch": 6.624525916561315,
"grad_norm": 1.1294969320297241,
"learning_rate": 7.750667069864458e-05,
"loss": 0.308,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5240
},
{
"epoch": 6.6371681415929205,
"grad_norm": 1.0179051160812378,
"learning_rate": 7.728499737974723e-05,
"loss": 0.3057,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5250
},
{
"epoch": 6.649810366624526,
"grad_norm": 1.0046980381011963,
"learning_rate": 7.706453860649807e-05,
"loss": 0.2959,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5260
},
{
"epoch": 6.662452591656131,
"grad_norm": 1.110780954360962,
"learning_rate": 7.684529945823368e-05,
"loss": 0.3461,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5270
},
{
"epoch": 6.6750948166877375,
"grad_norm": 1.0861669778823853,
"learning_rate": 7.662728498619076e-05,
"loss": 0.2993,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5280
},
{
"epoch": 6.687737041719343,
"grad_norm": 1.0867419242858887,
"learning_rate": 7.641050021338954e-05,
"loss": 0.3354,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5290
},
{
"epoch": 6.700379266750948,
"grad_norm": 1.1115156412124634,
"learning_rate": 7.619495013451831e-05,
"loss": 0.3177,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5300
},
{
"epoch": 6.713021491782554,
"grad_norm": 1.0660215616226196,
"learning_rate": 7.59806397158181e-05,
"loss": 0.3141,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5310
},
{
"epoch": 6.725663716814159,
"grad_norm": 0.9811689257621765,
"learning_rate": 7.576757389496838e-05,
"loss": 0.3354,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5320
},
{
"epoch": 6.7383059418457645,
"grad_norm": 1.0768461227416992,
"learning_rate": 7.555575758097325e-05,
"loss": 0.3108,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5330
},
{
"epoch": 6.750948166877371,
"grad_norm": 1.1170628070831299,
"learning_rate": 7.534519565404843e-05,
"loss": 0.3206,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5340
},
{
"epoch": 6.763590391908976,
"grad_norm": 0.9863327145576477,
"learning_rate": 7.51358929655087e-05,
"loss": 0.2973,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5350
},
{
"epoch": 6.7762326169405815,
"grad_norm": 1.1545705795288086,
"learning_rate": 7.492785433765617e-05,
"loss": 0.3393,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5360
},
{
"epoch": 6.788874841972187,
"grad_norm": 1.0578138828277588,
"learning_rate": 7.472108456366925e-05,
"loss": 0.323,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5370
},
{
"epoch": 6.801517067003792,
"grad_norm": 1.0187878608703613,
"learning_rate": 7.451558840749207e-05,
"loss": 0.3386,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5380
},
{
"epoch": 6.814159292035399,
"grad_norm": 1.0566604137420654,
"learning_rate": 7.431137060372486e-05,
"loss": 0.3161,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5390
},
{
"epoch": 6.826801517067004,
"grad_norm": 0.9965440034866333,
"learning_rate": 7.410843585751477e-05,
"loss": 0.322,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5400
},
{
"epoch": 6.839443742098609,
"grad_norm": 1.1252332925796509,
"learning_rate": 7.390678884444751e-05,
"loss": 0.3421,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5410
},
{
"epoch": 6.852085967130215,
"grad_norm": 1.5158134698867798,
"learning_rate": 7.370643421043957e-05,
"loss": 0.3375,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5420
},
{
"epoch": 6.86472819216182,
"grad_norm": 1.0333036184310913,
"learning_rate": 7.350737657163133e-05,
"loss": 0.3173,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5430
},
{
"epoch": 6.877370417193426,
"grad_norm": 1.0372684001922607,
"learning_rate": 7.33096205142805e-05,
"loss": 0.3362,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5440
},
{
"epoch": 6.890012642225032,
"grad_norm": 0.9757832288742065,
"learning_rate": 7.311317059465658e-05,
"loss": 0.3255,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5450
},
{
"epoch": 6.902654867256637,
"grad_norm": 1.0241106748580933,
"learning_rate": 7.291803133893588e-05,
"loss": 0.3146,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5460
},
{
"epoch": 6.915297092288243,
"grad_norm": 1.095625638961792,
"learning_rate": 7.272420724309719e-05,
"loss": 0.3185,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5470
},
{
"epoch": 6.927939317319848,
"grad_norm": 1.1619679927825928,
"learning_rate": 7.25317027728182e-05,
"loss": 0.3149,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5480
},
{
"epoch": 6.940581542351454,
"grad_norm": 1.090199589729309,
"learning_rate": 7.234052236337267e-05,
"loss": 0.3194,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5490
},
{
"epoch": 6.95322376738306,
"grad_norm": 1.0253422260284424,
"learning_rate": 7.215067041952817e-05,
"loss": 0.3748,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5500
},
{
"epoch": 6.965865992414665,
"grad_norm": 0.9982818365097046,
"learning_rate": 7.196215131544458e-05,
"loss": 0.3315,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5510
},
{
"epoch": 6.9785082174462705,
"grad_norm": 1.0616456270217896,
"learning_rate": 7.177496939457349e-05,
"loss": 0.3197,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5520
},
{
"epoch": 6.991150442477876,
"grad_norm": 1.0430032014846802,
"learning_rate": 7.158912896955785e-05,
"loss": 0.332,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5530
},
{
"epoch": 7.003792667509481,
"grad_norm": 0.9125473499298096,
"learning_rate": 7.140463432213281e-05,
"loss": 0.2938,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5540
},
{
"epoch": 7.0164348925410875,
"grad_norm": 1.0734045505523682,
"learning_rate": 7.122148970302702e-05,
"loss": 0.2281,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5550
},
{
"epoch": 7.029077117572693,
"grad_norm": 1.0642218589782715,
"learning_rate": 7.103969933186467e-05,
"loss": 0.2096,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5560
},
{
"epoch": 7.041719342604298,
"grad_norm": 1.080702304840088,
"learning_rate": 7.085926739706828e-05,
"loss": 0.2014,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5570
},
{
"epoch": 7.054361567635904,
"grad_norm": 1.0507287979125977,
"learning_rate": 7.06801980557622e-05,
"loss": 0.2107,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5580
},
{
"epoch": 7.067003792667509,
"grad_norm": 1.0190140008926392,
"learning_rate": 7.050249543367683e-05,
"loss": 0.2106,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5590
},
{
"epoch": 7.079646017699115,
"grad_norm": 1.1010105609893799,
"learning_rate": 7.032616362505359e-05,
"loss": 0.2142,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5600
},
{
"epoch": 7.092288242730721,
"grad_norm": 0.9539241194725037,
"learning_rate": 7.015120669255053e-05,
"loss": 0.2138,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5610
},
{
"epoch": 7.104930467762326,
"grad_norm": 1.2524183988571167,
"learning_rate": 6.99776286671488e-05,
"loss": 0.2166,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5620
},
{
"epoch": 7.117572692793932,
"grad_norm": 1.0015676021575928,
"learning_rate": 6.980543354805969e-05,
"loss": 0.2075,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5630
},
{
"epoch": 7.130214917825537,
"grad_norm": 1.0855770111083984,
"learning_rate": 6.963462530263261e-05,
"loss": 0.2322,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5640
},
{
"epoch": 7.142857142857143,
"grad_norm": 1.1854267120361328,
"learning_rate": 6.946520786626358e-05,
"loss": 0.2192,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5650
},
{
"epoch": 7.155499367888749,
"grad_norm": 1.1590447425842285,
"learning_rate": 6.929718514230455e-05,
"loss": 0.2286,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5660
},
{
"epoch": 7.168141592920354,
"grad_norm": 1.0713489055633545,
"learning_rate": 6.913056100197355e-05,
"loss": 0.2101,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5670
},
{
"epoch": 7.180783817951959,
"grad_norm": 1.0067224502563477,
"learning_rate": 6.896533928426545e-05,
"loss": 0.2191,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5680
},
{
"epoch": 7.193426042983565,
"grad_norm": 1.0778611898422241,
"learning_rate": 6.880152379586353e-05,
"loss": 0.2242,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5690
},
{
"epoch": 7.206068268015171,
"grad_norm": 1.1107529401779175,
"learning_rate": 6.863911831105174e-05,
"loss": 0.236,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5700
},
{
"epoch": 7.2187104930467765,
"grad_norm": 1.1352819204330444,
"learning_rate": 6.847812657162774e-05,
"loss": 0.2306,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5710
},
{
"epoch": 7.231352718078382,
"grad_norm": 1.1808239221572876,
"learning_rate": 6.831855228681676e-05,
"loss": 0.2313,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5720
},
{
"epoch": 7.243994943109987,
"grad_norm": 1.161959171295166,
"learning_rate": 6.816039913318605e-05,
"loss": 0.2365,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5730
},
{
"epoch": 7.256637168141593,
"grad_norm": 1.102596402168274,
"learning_rate": 6.800367075456027e-05,
"loss": 0.2247,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5740
},
{
"epoch": 7.269279393173198,
"grad_norm": 0.9597683548927307,
"learning_rate": 6.78483707619374e-05,
"loss": 0.216,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5750
},
{
"epoch": 7.281921618204804,
"grad_norm": 1.1682482957839966,
"learning_rate": 6.769450273340572e-05,
"loss": 0.2481,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5760
},
{
"epoch": 7.29456384323641,
"grad_norm": 1.043906807899475,
"learning_rate": 6.754207021406114e-05,
"loss": 0.2284,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5770
},
{
"epoch": 7.307206068268015,
"grad_norm": 1.110894799232483,
"learning_rate": 6.73910767159258e-05,
"loss": 0.2605,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5780
},
{
"epoch": 7.3198482932996205,
"grad_norm": 1.06911039352417,
"learning_rate": 6.724152571786693e-05,
"loss": 0.2263,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5790
},
{
"epoch": 7.332490518331226,
"grad_norm": 1.144773006439209,
"learning_rate": 6.709342066551677e-05,
"loss": 0.2363,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5800
},
{
"epoch": 7.345132743362832,
"grad_norm": 3.6639792919158936,
"learning_rate": 6.694676497119325e-05,
"loss": 0.249,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5810
},
{
"epoch": 7.357774968394438,
"grad_norm": 0.9481773376464844,
"learning_rate": 6.680156201382128e-05,
"loss": 0.2531,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5820
},
{
"epoch": 7.370417193426043,
"grad_norm": 1.118088960647583,
"learning_rate": 6.66578151388549e-05,
"loss": 0.2158,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5830
},
{
"epoch": 7.383059418457648,
"grad_norm": 1.0164135694503784,
"learning_rate": 6.651552765820028e-05,
"loss": 0.256,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5840
},
{
"epoch": 7.395701643489254,
"grad_norm": 1.046364188194275,
"learning_rate": 6.637470285013933e-05,
"loss": 0.2344,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5850
},
{
"epoch": 7.40834386852086,
"grad_norm": 1.0682607889175415,
"learning_rate": 6.623534395925426e-05,
"loss": 0.2189,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5860
},
{
"epoch": 7.420986093552465,
"grad_norm": 1.1149200201034546,
"learning_rate": 6.609745419635272e-05,
"loss": 0.2313,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5870
},
{
"epoch": 7.433628318584071,
"grad_norm": 1.2037601470947266,
"learning_rate": 6.596103673839385e-05,
"loss": 0.239,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5880
},
{
"epoch": 7.446270543615676,
"grad_norm": 1.2147172689437866,
"learning_rate": 6.582609472841519e-05,
"loss": 0.253,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5890
},
{
"epoch": 7.458912768647282,
"grad_norm": 1.061748743057251,
"learning_rate": 6.569263127546012e-05,
"loss": 0.2491,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5900
},
{
"epoch": 7.471554993678888,
"grad_norm": 1.1806966066360474,
"learning_rate": 6.556064945450633e-05,
"loss": 0.2307,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5910
},
{
"epoch": 7.484197218710493,
"grad_norm": 1.0720311403274536,
"learning_rate": 6.54301523063949e-05,
"loss": 0.2567,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5920
},
{
"epoch": 7.496839443742099,
"grad_norm": 1.1361720561981201,
"learning_rate": 6.530114283776029e-05,
"loss": 0.221,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5930
},
{
"epoch": 7.509481668773704,
"grad_norm": 1.7318781614303589,
"learning_rate": 6.517362402096104e-05,
"loss": 0.2343,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5940
},
{
"epoch": 7.522123893805309,
"grad_norm": 1.2448699474334717,
"learning_rate": 6.504759879401134e-05,
"loss": 0.2487,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5950
},
{
"epoch": 7.534766118836915,
"grad_norm": 1.144116997718811,
"learning_rate": 6.492307006051322e-05,
"loss": 0.2246,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5960
},
{
"epoch": 7.547408343868521,
"grad_norm": 1.121053695678711,
"learning_rate": 6.480004068958982e-05,
"loss": 0.2345,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5970
},
{
"epoch": 7.5600505689001265,
"grad_norm": 0.9634986519813538,
"learning_rate": 6.46785135158191e-05,
"loss": 0.2206,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5980
},
{
"epoch": 7.572692793931732,
"grad_norm": 1.0400941371917725,
"learning_rate": 6.455849133916868e-05,
"loss": 0.2259,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 5990
},
{
"epoch": 7.585335018963337,
"grad_norm": 1.1151084899902344,
"learning_rate": 6.44399769249313e-05,
"loss": 0.2412,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6000
},
{
"epoch": 7.597977243994944,
"grad_norm": 1.2084640264511108,
"learning_rate": 6.432297300366104e-05,
"loss": 0.2469,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6010
},
{
"epoch": 7.610619469026549,
"grad_norm": 1.1408836841583252,
"learning_rate": 6.420748227111045e-05,
"loss": 0.2276,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6020
},
{
"epoch": 7.623261694058154,
"grad_norm": 1.132438063621521,
"learning_rate": 6.409350738816844e-05,
"loss": 0.2476,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6030
},
{
"epoch": 7.63590391908976,
"grad_norm": 1.0751878023147583,
"learning_rate": 6.398105098079903e-05,
"loss": 0.2527,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6040
},
{
"epoch": 7.648546144121365,
"grad_norm": 1.1522191762924194,
"learning_rate": 6.387011563998073e-05,
"loss": 0.2596,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6050
},
{
"epoch": 7.6611883691529705,
"grad_norm": 1.0497066974639893,
"learning_rate": 6.376070392164694e-05,
"loss": 0.2534,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6060
},
{
"epoch": 7.673830594184577,
"grad_norm": 2.2555932998657227,
"learning_rate": 6.3652818346627e-05,
"loss": 0.2413,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6070
},
{
"epoch": 7.686472819216182,
"grad_norm": 0.9901424646377563,
"learning_rate": 6.354646140058816e-05,
"loss": 0.2442,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6080
},
{
"epoch": 7.699115044247788,
"grad_norm": 1.066794753074646,
"learning_rate": 6.344163553397834e-05,
"loss": 0.2428,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6090
},
{
"epoch": 7.711757269279393,
"grad_norm": 1.0979070663452148,
"learning_rate": 6.333834316196953e-05,
"loss": 0.2457,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6100
},
{
"epoch": 7.724399494310998,
"grad_norm": 1.1070395708084106,
"learning_rate": 6.323658666440228e-05,
"loss": 0.25,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6110
},
{
"epoch": 7.737041719342605,
"grad_norm": 1.0736275911331177,
"learning_rate": 6.313636838573086e-05,
"loss": 0.2524,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6120
},
{
"epoch": 7.74968394437421,
"grad_norm": 1.217236042022705,
"learning_rate": 6.303769063496915e-05,
"loss": 0.2707,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6130
},
{
"epoch": 7.762326169405815,
"grad_norm": 1.180005669593811,
"learning_rate": 6.294055568563754e-05,
"loss": 0.2405,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6140
},
{
"epoch": 7.774968394437421,
"grad_norm": 1.116621971130371,
"learning_rate": 6.28449657757105e-05,
"loss": 0.2469,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6150
},
{
"epoch": 7.787610619469026,
"grad_norm": 1.0715476274490356,
"learning_rate": 6.2750923107565e-05,
"loss": 0.2482,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6160
},
{
"epoch": 7.8002528445006325,
"grad_norm": 1.0930267572402954,
"learning_rate": 6.265842984792986e-05,
"loss": 0.2872,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6170
},
{
"epoch": 7.812895069532238,
"grad_norm": 1.232857346534729,
"learning_rate": 6.25674881278357e-05,
"loss": 0.2536,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6180
},
{
"epoch": 7.825537294563843,
"grad_norm": 1.1025636196136475,
"learning_rate": 6.247810004256595e-05,
"loss": 0.2513,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6190
},
{
"epoch": 7.838179519595449,
"grad_norm": 2.9798877239227295,
"learning_rate": 6.23902676516085e-05,
"loss": 0.2668,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6200
},
{
"epoch": 7.850821744627054,
"grad_norm": 1.3299516439437866,
"learning_rate": 6.230399297860826e-05,
"loss": 0.2637,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6210
},
{
"epoch": 7.86346396965866,
"grad_norm": 1.1211531162261963,
"learning_rate": 6.221927801132061e-05,
"loss": 0.2385,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6220
},
{
"epoch": 7.876106194690266,
"grad_norm": 1.2004640102386475,
"learning_rate": 6.213612470156552e-05,
"loss": 0.2594,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6230
},
{
"epoch": 7.888748419721871,
"grad_norm": 1.0749276876449585,
"learning_rate": 6.205453496518261e-05,
"loss": 0.2551,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6240
},
{
"epoch": 7.9013906447534765,
"grad_norm": 1.2336843013763428,
"learning_rate": 6.197451068198699e-05,
"loss": 0.284,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6250
},
{
"epoch": 7.914032869785082,
"grad_norm": 1.194594383239746,
"learning_rate": 6.189605369572598e-05,
"loss": 0.2442,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6260
},
{
"epoch": 7.926675094816687,
"grad_norm": 1.0185377597808838,
"learning_rate": 6.181916581403667e-05,
"loss": 0.2523,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6270
},
{
"epoch": 7.939317319848294,
"grad_norm": 1.0479494333267212,
"learning_rate": 6.174384880840409e-05,
"loss": 0.2545,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6280
},
{
"epoch": 7.951959544879899,
"grad_norm": 1.0949984788894653,
"learning_rate": 6.167010441412064e-05,
"loss": 0.2513,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6290
},
{
"epoch": 7.964601769911504,
"grad_norm": 1.1074668169021606,
"learning_rate": 6.159793433024597e-05,
"loss": 0.2601,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6300
},
{
"epoch": 7.97724399494311,
"grad_norm": 1.2110705375671387,
"learning_rate": 6.152734021956782e-05,
"loss": 0.2685,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6310
},
{
"epoch": 7.989886219974716,
"grad_norm": 1.0655533075332642,
"learning_rate": 6.145832370856379e-05,
"loss": 0.2444,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6320
},
{
"epoch": 8.002528445006321,
"grad_norm": 0.8317849040031433,
"learning_rate": 6.139088638736378e-05,
"loss": 0.2416,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6330
},
{
"epoch": 8.015170670037927,
"grad_norm": 1.3935742378234863,
"learning_rate": 6.132502980971345e-05,
"loss": 0.1735,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6340
},
{
"epoch": 8.027812895069532,
"grad_norm": 1.0203521251678467,
"learning_rate": 6.12607554929383e-05,
"loss": 0.1674,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6350
},
{
"epoch": 8.040455120101138,
"grad_norm": 1.0844451189041138,
"learning_rate": 6.119806491790886e-05,
"loss": 0.1563,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6360
},
{
"epoch": 8.053097345132743,
"grad_norm": 1.0661518573760986,
"learning_rate": 6.113695952900643e-05,
"loss": 0.1579,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6370
},
{
"epoch": 8.065739570164348,
"grad_norm": 0.9967635869979858,
"learning_rate": 6.107744073408987e-05,
"loss": 0.1601,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6380
},
{
"epoch": 8.078381795195954,
"grad_norm": 1.1493229866027832,
"learning_rate": 6.10195099044632e-05,
"loss": 0.1586,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6390
},
{
"epoch": 8.09102402022756,
"grad_norm": 3.224154233932495,
"learning_rate": 6.096316837484391e-05,
"loss": 0.188,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6400
},
{
"epoch": 8.103666245259166,
"grad_norm": 1.0153775215148926,
"learning_rate": 6.090841744333229e-05,
"loss": 0.1821,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6410
},
{
"epoch": 8.116308470290772,
"grad_norm": 1.5251129865646362,
"learning_rate": 6.0855258371381465e-05,
"loss": 0.195,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6420
},
{
"epoch": 8.128950695322377,
"grad_norm": 1.1285451650619507,
"learning_rate": 6.0803692383768375e-05,
"loss": 0.1559,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6430
},
{
"epoch": 8.141592920353983,
"grad_norm": 1.0399773120880127,
"learning_rate": 6.075372066856554e-05,
"loss": 0.1609,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6440
},
{
"epoch": 8.154235145385588,
"grad_norm": 1.1441960334777832,
"learning_rate": 6.07053443771137e-05,
"loss": 0.1731,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6450
},
{
"epoch": 8.166877370417193,
"grad_norm": 1.0369312763214111,
"learning_rate": 6.065856462399524e-05,
"loss": 0.1661,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6460
},
{
"epoch": 8.179519595448799,
"grad_norm": 1.1654633283615112,
"learning_rate": 6.061338248700856e-05,
"loss": 0.2005,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6470
},
{
"epoch": 8.192161820480404,
"grad_norm": 1.0257656574249268,
"learning_rate": 6.0569799007143233e-05,
"loss": 0.1688,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6480
},
{
"epoch": 8.20480404551201,
"grad_norm": 1.05653977394104,
"learning_rate": 6.052781518855601e-05,
"loss": 0.1732,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6490
},
{
"epoch": 8.217446270543615,
"grad_norm": 0.9420139193534851,
"learning_rate": 6.0487431998547705e-05,
"loss": 0.1704,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6500
},
{
"epoch": 8.230088495575222,
"grad_norm": 1.0948173999786377,
"learning_rate": 6.044865036754086e-05,
"loss": 0.178,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6510
},
{
"epoch": 8.242730720606827,
"grad_norm": 1.1382850408554077,
"learning_rate": 6.0411471189058353e-05,
"loss": 0.1945,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6520
},
{
"epoch": 8.255372945638433,
"grad_norm": 1.1092077493667603,
"learning_rate": 6.037589531970283e-05,
"loss": 0.1628,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6530
},
{
"epoch": 8.268015170670038,
"grad_norm": 1.0578278303146362,
"learning_rate": 6.0341923579136886e-05,
"loss": 0.1815,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6540
},
{
"epoch": 8.280657395701644,
"grad_norm": 1.170258641242981,
"learning_rate": 6.030955675006428e-05,
"loss": 0.1633,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6550
},
{
"epoch": 8.293299620733249,
"grad_norm": 1.1795989274978638,
"learning_rate": 6.027879557821183e-05,
"loss": 0.1987,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6560
},
{
"epoch": 8.305941845764854,
"grad_norm": 1.1237478256225586,
"learning_rate": 6.0249640772312264e-05,
"loss": 0.1878,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6570
},
{
"epoch": 8.31858407079646,
"grad_norm": 1.2054628133773804,
"learning_rate": 6.022209300408786e-05,
"loss": 0.1765,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6580
},
{
"epoch": 8.331226295828065,
"grad_norm": 1.16087806224823,
"learning_rate": 6.019615290823503e-05,
"loss": 0.1779,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6590
},
{
"epoch": 8.34386852085967,
"grad_norm": 1.0747262239456177,
"learning_rate": 6.017182108240963e-05,
"loss": 0.1741,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6600
},
{
"epoch": 8.356510745891278,
"grad_norm": 1.171136498451233,
"learning_rate": 6.014909808721324e-05,
"loss": 0.1928,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6610
},
{
"epoch": 8.369152970922883,
"grad_norm": 1.09550940990448,
"learning_rate": 6.0127984446180196e-05,
"loss": 0.1745,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6620
},
{
"epoch": 8.381795195954489,
"grad_norm": 1.184849739074707,
"learning_rate": 6.010848064576561e-05,
"loss": 0.1889,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6630
},
{
"epoch": 8.394437420986094,
"grad_norm": 1.1877614259719849,
"learning_rate": 6.009058713533404e-05,
"loss": 0.1859,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6640
},
{
"epoch": 8.4070796460177,
"grad_norm": 1.2223458290100098,
"learning_rate": 6.007430432714928e-05,
"loss": 0.1901,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6650
},
{
"epoch": 8.419721871049305,
"grad_norm": 1.1974024772644043,
"learning_rate": 6.005963259636473e-05,
"loss": 0.2126,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6660
},
{
"epoch": 8.43236409608091,
"grad_norm": 1.180246353149414,
"learning_rate": 6.0046572281014854e-05,
"loss": 0.1883,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6670
},
{
"epoch": 8.445006321112515,
"grad_norm": 1.1506062746047974,
"learning_rate": 6.003512368200732e-05,
"loss": 0.186,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6680
},
{
"epoch": 8.45764854614412,
"grad_norm": 1.1646850109100342,
"learning_rate": 6.002528706311613e-05,
"loss": 0.1949,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6690
},
{
"epoch": 8.470290771175726,
"grad_norm": 1.0622496604919434,
"learning_rate": 6.001706265097548e-05,
"loss": 0.1958,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6700
},
{
"epoch": 8.482932996207332,
"grad_norm": 1.1623327732086182,
"learning_rate": 6.0010450635074554e-05,
"loss": 0.1857,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6710
},
{
"epoch": 8.495575221238939,
"grad_norm": 1.1403242349624634,
"learning_rate": 6.000545116775322e-05,
"loss": 0.1894,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6720
},
{
"epoch": 8.508217446270544,
"grad_norm": 1.1317553520202637,
"learning_rate": 6.000206436419843e-05,
"loss": 0.1847,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6730
},
{
"epoch": 8.52085967130215,
"grad_norm": 1.1617845296859741,
"learning_rate": 6.000029030244164e-05,
"loss": 0.1802,
"memory/device_mem_reserved(gib)": 34.32,
"memory/max_mem_active(gib)": 33.22,
"memory/max_mem_allocated(gib)": 33.22,
"step": 6740
}
],
"logging_steps": 10,
"max_steps": 6745,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.882959280114762e+18,
"train_batch_size": 28,
"trial_name": null,
"trial_params": null
}