Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8386a050c5a0d6ac30c29464246237b5f77f6acfd82b1279bfad77b0338f6639
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:61fa39fe72d3b07877b75fe39479b59104182f209a2615a3e426dd500c69d610
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e11b8e0c7e6ef4ebe8274ebcc4f99efbb33e30279c8935ed93a8c6d481a124af
 size 81730644

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9b75154a8ac4a6cfadd72600ba7b4722dee87df5c3d0af9be47b403fa2dcf82
 size 81730644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b7047cccb255e2e5a046a928df43a7a12de98b75b435e3b319fb412f41b97b6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d6b5d9c6f0c15be87f5391bf4c24c5abc24693b4a4ae1f1e316a59c22584e8b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:343f8f4bfa74d3f8aa3d5cc6579e91dcd37c40481c5d048478a6e3e523bb44f8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fee25a74f8614e14501bbc1e4dbf79c416bb01f3b851bb0f5f52f090982cd43
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7969969511032104,
-  "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 0.03482500435312554,
   "eval_steps": 100,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3555,6 +3555,714 @@
       "eval_samples_per_second": 7.14,
       "eval_steps_per_second": 1.785,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3583,7 +4291,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.359663190769664e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7635987401008606,
+  "best_model_checkpoint": "miner_id_24/checkpoint-600",
+  "epoch": 0.041790005223750655,
   "eval_steps": 100,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.14,
       "eval_steps_per_second": 1.785,
       "step": 500
+    },
+    {
+      "epoch": 0.0348946543618318,
+      "grad_norm": 0.5795607566833496,
+      "learning_rate": 9.572925909767412e-05,
+      "loss": 0.4495,
+      "step": 501
+    },
+    {
+      "epoch": 0.034964304370538046,
+      "grad_norm": 0.6874101161956787,
+      "learning_rate": 9.540096409419296e-05,
+      "loss": 0.8444,
+      "step": 502
+    },
+    {
+      "epoch": 0.035033954379244295,
+      "grad_norm": 0.5595911145210266,
+      "learning_rate": 9.507271875570381e-05,
+      "loss": 0.9391,
+      "step": 503
+    },
+    {
+      "epoch": 0.03510360438795055,
+      "grad_norm": 0.525644063949585,
+      "learning_rate": 9.474452662692838e-05,
+      "loss": 0.7833,
+      "step": 504
+    },
+    {
+      "epoch": 0.0351732543966568,
+      "grad_norm": 0.6366891264915466,
+      "learning_rate": 9.441639125201368e-05,
+      "loss": 1.0472,
+      "step": 505
+    },
+    {
+      "epoch": 0.035242904405363054,
+      "grad_norm": 0.8487269878387451,
+      "learning_rate": 9.408831617449385e-05,
+      "loss": 1.0513,
+      "step": 506
+    },
+    {
+      "epoch": 0.0353125544140693,
+      "grad_norm": 0.7027648091316223,
+      "learning_rate": 9.376030493725189e-05,
+      "loss": 0.9505,
+      "step": 507
+    },
+    {
+      "epoch": 0.03538220442277555,
+      "grad_norm": 0.6772575974464417,
+      "learning_rate": 9.343236108248139e-05,
+      "loss": 1.0417,
+      "step": 508
+    },
+    {
+      "epoch": 0.03545185443148181,
+      "grad_norm": 0.5657368898391724,
+      "learning_rate": 9.310448815164826e-05,
+      "loss": 0.9236,
+      "step": 509
+    },
+    {
+      "epoch": 0.035521504440188055,
+      "grad_norm": 0.64215087890625,
+      "learning_rate": 9.277668968545253e-05,
+      "loss": 1.0035,
+      "step": 510
+    },
+    {
+      "epoch": 0.035591154448894304,
+      "grad_norm": 0.6276829242706299,
+      "learning_rate": 9.244896922379007e-05,
+      "loss": 0.8375,
+      "step": 511
+    },
+    {
+      "epoch": 0.03566080445760056,
+      "grad_norm": 0.5804170966148376,
+      "learning_rate": 9.212133030571437e-05,
+      "loss": 0.4934,
+      "step": 512
+    },
+    {
+      "epoch": 0.03573045446630681,
+      "grad_norm": 0.7230868935585022,
+      "learning_rate": 9.17937764693983e-05,
+      "loss": 0.9427,
+      "step": 513
+    },
+    {
+      "epoch": 0.035800104475013056,
+      "grad_norm": 0.6632394194602966,
+      "learning_rate": 9.146631125209607e-05,
+      "loss": 0.4176,
+      "step": 514
+    },
+    {
+      "epoch": 0.03586975448371931,
+      "grad_norm": 0.5885234475135803,
+      "learning_rate": 9.113893819010475e-05,
+      "loss": 0.6042,
+      "step": 515
+    },
+    {
+      "epoch": 0.03593940449242556,
+      "grad_norm": 0.5666863322257996,
+      "learning_rate": 9.081166081872626e-05,
+      "loss": 1.5152,
+      "step": 516
+    },
+    {
+      "epoch": 0.036009054501131815,
+      "grad_norm": 0.7007538676261902,
+      "learning_rate": 9.048448267222918e-05,
+      "loss": 0.9444,
+      "step": 517
+    },
+    {
+      "epoch": 0.036078704509838064,
+      "grad_norm": 0.6212923526763916,
+      "learning_rate": 9.015740728381054e-05,
+      "loss": 0.634,
+      "step": 518
+    },
+    {
+      "epoch": 0.03614835451854431,
+      "grad_norm": 0.6189596056938171,
+      "learning_rate": 8.98304381855577e-05,
+      "loss": 1.1091,
+      "step": 519
+    },
+    {
+      "epoch": 0.03621800452725057,
+      "grad_norm": 0.6159670948982239,
+      "learning_rate": 8.95035789084102e-05,
+      "loss": 0.787,
+      "step": 520
+    },
+    {
+      "epoch": 0.036287654535956816,
+      "grad_norm": 0.6371515989303589,
+      "learning_rate": 8.917683298212158e-05,
+      "loss": 0.6172,
+      "step": 521
+    },
+    {
+      "epoch": 0.036357304544663065,
+      "grad_norm": 0.6314066052436829,
+      "learning_rate": 8.885020393522135e-05,
+      "loss": 0.9702,
+      "step": 522
+    },
+    {
+      "epoch": 0.03642695455336932,
+      "grad_norm": 0.6285626888275146,
+      "learning_rate": 8.852369529497679e-05,
+      "loss": 0.9819,
+      "step": 523
+    },
+    {
+      "epoch": 0.03649660456207557,
+      "grad_norm": 0.5257949233055115,
+      "learning_rate": 8.819731058735501e-05,
+      "loss": 0.8288,
+      "step": 524
+    },
+    {
+      "epoch": 0.036566254570781824,
+      "grad_norm": 0.611438512802124,
+      "learning_rate": 8.787105333698465e-05,
+      "loss": 0.9246,
+      "step": 525
+    },
+    {
+      "epoch": 0.03663590457948807,
+      "grad_norm": 0.5995710492134094,
+      "learning_rate": 8.754492706711798e-05,
+      "loss": 0.6855,
+      "step": 526
+    },
+    {
+      "epoch": 0.03670555458819432,
+      "grad_norm": 0.681425154209137,
+      "learning_rate": 8.721893529959287e-05,
+      "loss": 1.1644,
+      "step": 527
+    },
+    {
+      "epoch": 0.036775204596900576,
+      "grad_norm": 0.7111718654632568,
+      "learning_rate": 8.68930815547946e-05,
+      "loss": 0.9181,
+      "step": 528
+    },
+    {
+      "epoch": 0.036844854605606825,
+      "grad_norm": 0.5794047713279724,
+      "learning_rate": 8.656736935161802e-05,
+      "loss": 1.061,
+      "step": 529
+    },
+    {
+      "epoch": 0.03691450461431307,
+      "grad_norm": 0.5971503257751465,
+      "learning_rate": 8.624180220742946e-05,
+      "loss": 0.5903,
+      "step": 530
+    },
+    {
+      "epoch": 0.03698415462301933,
+      "grad_norm": 0.7091482281684875,
+      "learning_rate": 8.59163836380287e-05,
+      "loss": 0.8907,
+      "step": 531
+    },
+    {
+      "epoch": 0.03705380463172558,
+      "grad_norm": 0.6185580492019653,
+      "learning_rate": 8.559111715761114e-05,
+      "loss": 0.8452,
+      "step": 532
+    },
+    {
+      "epoch": 0.03712345464043183,
+      "grad_norm": 0.68827223777771,
+      "learning_rate": 8.52660062787297e-05,
+      "loss": 0.8711,
+      "step": 533
+    },
+    {
+      "epoch": 0.03719310464913808,
+      "grad_norm": 0.6279632449150085,
+      "learning_rate": 8.494105451225704e-05,
+      "loss": 0.6453,
+      "step": 534
+    },
+    {
+      "epoch": 0.03726275465784433,
+      "grad_norm": 0.7252237200737,
+      "learning_rate": 8.461626536734753e-05,
+      "loss": 1.1148,
+      "step": 535
+    },
+    {
+      "epoch": 0.037332404666550585,
+      "grad_norm": 0.6377342939376831,
+      "learning_rate": 8.429164235139931e-05,
+      "loss": 1.0532,
+      "step": 536
+    },
+    {
+      "epoch": 0.037402054675256834,
+      "grad_norm": 0.7409278154373169,
+      "learning_rate": 8.396718897001663e-05,
+      "loss": 1.0161,
+      "step": 537
+    },
+    {
+      "epoch": 0.03747170468396308,
+      "grad_norm": 0.6048555970191956,
+      "learning_rate": 8.364290872697173e-05,
+      "loss": 1.012,
+      "step": 538
+    },
+    {
+      "epoch": 0.03754135469266934,
+      "grad_norm": 0.7676815390586853,
+      "learning_rate": 8.331880512416724e-05,
+      "loss": 0.9402,
+      "step": 539
+    },
+    {
+      "epoch": 0.037611004701375586,
+      "grad_norm": 0.6360906958580017,
+      "learning_rate": 8.299488166159817e-05,
+      "loss": 0.4591,
+      "step": 540
+    },
+    {
+      "epoch": 0.03768065471008184,
+      "grad_norm": 0.6816183924674988,
+      "learning_rate": 8.267114183731421e-05,
+      "loss": 0.661,
+      "step": 541
+    },
+    {
+      "epoch": 0.03775030471878809,
+      "grad_norm": 0.6955873966217041,
+      "learning_rate": 8.234758914738199e-05,
+      "loss": 0.8015,
+      "step": 542
+    },
+    {
+      "epoch": 0.03781995472749434,
+      "grad_norm": 0.787493884563446,
+      "learning_rate": 8.20242270858472e-05,
+      "loss": 0.6941,
+      "step": 543
+    },
+    {
+      "epoch": 0.037889604736200594,
+      "grad_norm": 0.5939062833786011,
+      "learning_rate": 8.170105914469702e-05,
+      "loss": 0.9034,
+      "step": 544
+    },
+    {
+      "epoch": 0.03795925474490684,
+      "grad_norm": 0.5235042572021484,
+      "learning_rate": 8.137808881382226e-05,
+      "loss": 1.0283,
+      "step": 545
+    },
+    {
+      "epoch": 0.03802890475361309,
+      "grad_norm": 0.7017082571983337,
+      "learning_rate": 8.105531958097972e-05,
+      "loss": 1.0407,
+      "step": 546
+    },
+    {
+      "epoch": 0.038098554762319346,
+      "grad_norm": 0.7762130498886108,
+      "learning_rate": 8.073275493175464e-05,
+      "loss": 0.7814,
+      "step": 547
+    },
+    {
+      "epoch": 0.038168204771025595,
+      "grad_norm": 0.588405191898346,
+      "learning_rate": 8.041039834952287e-05,
+      "loss": 0.8832,
+      "step": 548
+    },
+    {
+      "epoch": 0.03823785477973185,
+      "grad_norm": 0.7792285084724426,
+      "learning_rate": 8.008825331541335e-05,
+      "loss": 1.051,
+      "step": 549
+    },
+    {
+      "epoch": 0.0383075047884381,
+      "grad_norm": 0.6209467649459839,
+      "learning_rate": 7.976632330827056e-05,
+      "loss": 0.8802,
+      "step": 550
+    },
+    {
+      "epoch": 0.03837715479714435,
+      "grad_norm": 0.5231680274009705,
+      "learning_rate": 7.944461180461686e-05,
+      "loss": 0.7529,
+      "step": 551
+    },
+    {
+      "epoch": 0.0384468048058506,
+      "grad_norm": 0.6021607518196106,
+      "learning_rate": 7.912312227861503e-05,
+      "loss": 1.1235,
+      "step": 552
+    },
+    {
+      "epoch": 0.03851645481455685,
+      "grad_norm": 0.5573668479919434,
+      "learning_rate": 7.880185820203065e-05,
+      "loss": 0.6753,
+      "step": 553
+    },
+    {
+      "epoch": 0.0385861048232631,
+      "grad_norm": 0.5354910492897034,
+      "learning_rate": 7.848082304419478e-05,
+      "loss": 0.6843,
+      "step": 554
+    },
+    {
+      "epoch": 0.038655754831969355,
+      "grad_norm": 0.606436014175415,
+      "learning_rate": 7.816002027196627e-05,
+      "loss": 1.0557,
+      "step": 555
+    },
+    {
+      "epoch": 0.038725404840675604,
+      "grad_norm": 0.6580552458763123,
+      "learning_rate": 7.783945334969451e-05,
+      "loss": 0.6222,
+      "step": 556
+    },
+    {
+      "epoch": 0.03879505484938186,
+      "grad_norm": 0.6174128651618958,
+      "learning_rate": 7.751912573918193e-05,
+      "loss": 0.8194,
+      "step": 557
+    },
+    {
+      "epoch": 0.03886470485808811,
+      "grad_norm": 0.6724019646644592,
+      "learning_rate": 7.719904089964658e-05,
+      "loss": 1.0095,
+      "step": 558
+    },
+    {
+      "epoch": 0.038934354866794356,
+      "grad_norm": 0.7200993299484253,
+      "learning_rate": 7.687920228768493e-05,
+      "loss": 0.8115,
+      "step": 559
+    },
+    {
+      "epoch": 0.03900400487550061,
+      "grad_norm": 0.5682472586631775,
+      "learning_rate": 7.655961335723433e-05,
+      "loss": 0.7034,
+      "step": 560
+    },
+    {
+      "epoch": 0.03907365488420686,
+      "grad_norm": 0.7236086130142212,
+      "learning_rate": 7.624027755953592e-05,
+      "loss": 0.9028,
+      "step": 561
+    },
+    {
+      "epoch": 0.03914330489291311,
+      "grad_norm": 0.5866789221763611,
+      "learning_rate": 7.592119834309715e-05,
+      "loss": 0.8919,
+      "step": 562
+    },
+    {
+      "epoch": 0.039212954901619364,
+      "grad_norm": 0.6271937489509583,
+      "learning_rate": 7.560237915365472e-05,
+      "loss": 0.6447,
+      "step": 563
+    },
+    {
+      "epoch": 0.03928260491032561,
+      "grad_norm": 0.5319473147392273,
+      "learning_rate": 7.528382343413734e-05,
+      "loss": 1.0977,
+      "step": 564
+    },
+    {
+      "epoch": 0.03935225491903187,
+      "grad_norm": 0.673537015914917,
+      "learning_rate": 7.49655346246284e-05,
+      "loss": 0.6669,
+      "step": 565
+    },
+    {
+      "epoch": 0.039421904927738116,
+      "grad_norm": 0.7043957114219666,
+      "learning_rate": 7.464751616232902e-05,
+      "loss": 0.6334,
+      "step": 566
+    },
+    {
+      "epoch": 0.039491554936444365,
+      "grad_norm": 0.6532731652259827,
+      "learning_rate": 7.432977148152074e-05,
+      "loss": 0.659,
+      "step": 567
+    },
+    {
+      "epoch": 0.03956120494515062,
+      "grad_norm": 0.6882482767105103,
+      "learning_rate": 7.401230401352866e-05,
+      "loss": 0.711,
+      "step": 568
+    },
+    {
+      "epoch": 0.03963085495385687,
+      "grad_norm": 0.7171745896339417,
+      "learning_rate": 7.369511718668418e-05,
+      "loss": 0.941,
+      "step": 569
+    },
+    {
+      "epoch": 0.03970050496256312,
+      "grad_norm": 0.6474679708480835,
+      "learning_rate": 7.337821442628805e-05,
+      "loss": 0.8192,
+      "step": 570
+    },
+    {
+      "epoch": 0.03977015497126937,
+      "grad_norm": 0.7054280042648315,
+      "learning_rate": 7.306159915457342e-05,
+      "loss": 0.6327,
+      "step": 571
+    },
+    {
+      "epoch": 0.03983980497997562,
+      "grad_norm": 0.7624709606170654,
+      "learning_rate": 7.274527479066883e-05,
+      "loss": 0.8132,
+      "step": 572
+    },
+    {
+      "epoch": 0.039909454988681876,
+      "grad_norm": 0.6930527687072754,
+      "learning_rate": 7.242924475056127e-05,
+      "loss": 0.8482,
+      "step": 573
+    },
+    {
+      "epoch": 0.039979104997388125,
+      "grad_norm": 0.6599513292312622,
+      "learning_rate": 7.211351244705946e-05,
+      "loss": 0.6787,
+      "step": 574
+    },
+    {
+      "epoch": 0.04004875500609437,
+      "grad_norm": 0.7311400771141052,
+      "learning_rate": 7.179808128975674e-05,
+      "loss": 0.9747,
+      "step": 575
+    },
+    {
+      "epoch": 0.04011840501480063,
+      "grad_norm": 0.615138828754425,
+      "learning_rate": 7.148295468499438e-05,
+      "loss": 0.9404,
+      "step": 576
+    },
+    {
+      "epoch": 0.04018805502350688,
+      "grad_norm": 0.6401761174201965,
+      "learning_rate": 7.116813603582482e-05,
+      "loss": 0.4915,
+      "step": 577
+    },
+    {
+      "epoch": 0.040257705032213126,
+      "grad_norm": 0.6191440224647522,
+      "learning_rate": 7.08536287419749e-05,
+      "loss": 0.6031,
+      "step": 578
+    },
+    {
+      "epoch": 0.04032735504091938,
+      "grad_norm": 0.5751050710678101,
+      "learning_rate": 7.053943619980907e-05,
+      "loss": 0.8371,
+      "step": 579
+    },
+    {
+      "epoch": 0.04039700504962563,
+      "grad_norm": 0.518409252166748,
+      "learning_rate": 7.022556180229285e-05,
+      "loss": 0.4333,
+      "step": 580
+    },
+    {
+      "epoch": 0.040466655058331885,
+      "grad_norm": 0.5712803602218628,
+      "learning_rate": 6.991200893895608e-05,
+      "loss": 0.796,
+      "step": 581
+    },
+    {
+      "epoch": 0.040536305067038134,
+      "grad_norm": 0.661482036113739,
+      "learning_rate": 6.959878099585635e-05,
+      "loss": 0.8585,
+      "step": 582
+    },
+    {
+      "epoch": 0.04060595507574438,
+      "grad_norm": 0.6602011322975159,
+      "learning_rate": 6.92858813555424e-05,
+      "loss": 0.9474,
+      "step": 583
+    },
+    {
+      "epoch": 0.04067560508445064,
+      "grad_norm": 0.5971815586090088,
+      "learning_rate": 6.897331339701776e-05,
+      "loss": 0.7689,
+      "step": 584
+    },
+    {
+      "epoch": 0.040745255093156886,
+      "grad_norm": 0.571740448474884,
+      "learning_rate": 6.866108049570397e-05,
+      "loss": 0.9023,
+      "step": 585
+    },
+    {
+      "epoch": 0.040814905101863135,
+      "grad_norm": 0.6928638219833374,
+      "learning_rate": 6.834918602340438e-05,
+      "loss": 0.8899,
+      "step": 586
+    },
+    {
+      "epoch": 0.04088455511056939,
+      "grad_norm": 0.6468199491500854,
+      "learning_rate": 6.803763334826763e-05,
+      "loss": 0.8841,
+      "step": 587
+    },
+    {
+      "epoch": 0.04095420511927564,
+      "grad_norm": 0.6777251362800598,
+      "learning_rate": 6.772642583475126e-05,
+      "loss": 0.8491,
+      "step": 588
+    },
+    {
+      "epoch": 0.041023855127981894,
+      "grad_norm": 0.5866687297821045,
+      "learning_rate": 6.741556684358545e-05,
+      "loss": 0.6435,
+      "step": 589
+    },
+    {
+      "epoch": 0.04109350513668814,
+      "grad_norm": 0.5522730350494385,
+      "learning_rate": 6.710505973173664e-05,
+      "loss": 0.9188,
+      "step": 590
+    },
+    {
+      "epoch": 0.04116315514539439,
+      "grad_norm": 0.7048250436782837,
+      "learning_rate": 6.679490785237137e-05,
+      "loss": 0.911,
+      "step": 591
+    },
+    {
+      "epoch": 0.041232805154100646,
+      "grad_norm": 0.849677324295044,
+      "learning_rate": 6.648511455482003e-05,
+      "loss": 1.0408,
+      "step": 592
+    },
+    {
+      "epoch": 0.041302455162806895,
+      "grad_norm": 0.653287947177887,
+      "learning_rate": 6.617568318454059e-05,
+      "loss": 1.187,
+      "step": 593
+    },
+    {
+      "epoch": 0.04137210517151314,
+      "grad_norm": 0.5278560519218445,
+      "learning_rate": 6.586661708308272e-05,
+      "loss": 0.8789,
+      "step": 594
+    },
+    {
+      "epoch": 0.0414417551802194,
+      "grad_norm": 0.7803817987442017,
+      "learning_rate": 6.555791958805147e-05,
+      "loss": 0.8788,
+      "step": 595
+    },
+    {
+      "epoch": 0.04151140518892565,
+      "grad_norm": 0.6425774097442627,
+      "learning_rate": 6.524959403307125e-05,
+      "loss": 0.9296,
+      "step": 596
+    },
+    {
+      "epoch": 0.0415810551976319,
+      "grad_norm": 0.5787883400917053,
+      "learning_rate": 6.494164374775e-05,
+      "loss": 1.0127,
+      "step": 597
+    },
+    {
+      "epoch": 0.04165070520633815,
+      "grad_norm": 0.5686517357826233,
+      "learning_rate": 6.463407205764305e-05,
+      "loss": 0.7869,
+      "step": 598
+    },
+    {
+      "epoch": 0.0417203552150444,
+      "grad_norm": 0.5126462578773499,
+      "learning_rate": 6.43268822842173e-05,
+      "loss": 1.2029,
+      "step": 599
+    },
+    {
+      "epoch": 0.041790005223750655,
+      "grad_norm": 0.5618976950645447,
+      "learning_rate": 6.402007774481536e-05,
+      "loss": 0.5725,
+      "step": 600
+    },
+    {
+      "epoch": 0.041790005223750655,
+      "eval_loss": 0.7635987401008606,
+      "eval_runtime": 701.6781,
+      "eval_samples_per_second": 7.126,
+      "eval_steps_per_second": 1.781,
+      "step": 600
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.6329142901994947e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null