Training in progress, step 1463, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +445 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62bb9bb9b77666d17bdc3df547411bff527086e2cbe55ec527b6c0bff62c3c6c
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:76c50a918ed6e25310310327d96a41f7be0e1c8dba72e17f5343f2601e20241d
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2240d1cf7bee9ac6f7ad5b8ee959d9b94749f2444ea20777ea0317b7f0d24c5
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:b696ff230e34810056a87e2deb52ecbe8fdb57a70eb3920f5040424edba8b000
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7fddfe0e996aab68b2e2a4ae0b2b5688241a5f64c97387ef9b362e165ee6f48
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a0be03340b8e06f61dec93cc182f3169930283c402a6cbbe5edbaf390086811
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ad3eba5ecd76309cae37fb299b7255ca533e46092d32ba5488b2545bff9b369
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ddba364fa66c16b2353d38cd96d0db084fafe0b8f317c23722a5d9cf61bf020
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.8337556719779968,
   "best_model_checkpoint": "miner_id_24/checkpoint-1400",
-  "epoch": 1.9148572405539408,
   "eval_steps": 100,
-  "global_step": 1400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -9927,6 +9927,447 @@
       "eval_samples_per_second": 12.086,
       "eval_steps_per_second": 3.022,
       "step": 1400
     }
   ],
   "logging_steps": 1,
@@ -9950,12 +10391,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.5806932897014743e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.8337556719779968,
   "best_model_checkpoint": "miner_id_24/checkpoint-1400",
+  "epoch": 2.0010258163788683,
   "eval_steps": 100,
+  "global_step": 1463,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.086,
       "eval_steps_per_second": 3.022,
       "step": 1400
+    },
+    {
+      "epoch": 1.916224995725765,
+      "grad_norm": 0.461910605430603,
+      "learning_rate": 8.971631787046853e-07,
+      "loss": 0.8424,
+      "step": 1401
+    },
+    {
+      "epoch": 1.9175927508975894,
+      "grad_norm": 0.4228653907775879,
+      "learning_rate": 8.684974514604705e-07,
+      "loss": 0.6749,
+      "step": 1402
+    },
+    {
+      "epoch": 1.9189605060694137,
+      "grad_norm": 0.45295336842536926,
+      "learning_rate": 8.402951498328926e-07,
+      "loss": 0.8973,
+      "step": 1403
+    },
+    {
+      "epoch": 1.9203282612412378,
+      "grad_norm": 0.4639895558357239,
+      "learning_rate": 8.125564056637003e-07,
+      "loss": 0.6882,
+      "step": 1404
+    },
+    {
+      "epoch": 1.921696016413062,
+      "grad_norm": 0.5838170647621155,
+      "learning_rate": 7.852813486275423e-07,
+      "loss": 0.8531,
+      "step": 1405
+    },
+    {
+      "epoch": 1.9230637715848862,
+      "grad_norm": 0.44935956597328186,
+      "learning_rate": 7.584701062314237e-07,
+      "loss": 0.6856,
+      "step": 1406
+    },
+    {
+      "epoch": 1.9244315267567105,
+      "grad_norm": 0.5075057148933411,
+      "learning_rate": 7.321228038140726e-07,
+      "loss": 0.6377,
+      "step": 1407
+    },
+    {
+      "epoch": 1.9257992819285348,
+      "grad_norm": 0.4068799912929535,
+      "learning_rate": 7.062395645453634e-07,
+      "loss": 0.9096,
+      "step": 1408
+    },
+    {
+      "epoch": 1.927167037100359,
+      "grad_norm": 0.4617645740509033,
+      "learning_rate": 6.808205094257503e-07,
+      "loss": 0.6808,
+      "step": 1409
+    },
+    {
+      "epoch": 1.9285347922721834,
+      "grad_norm": 0.44651269912719727,
+      "learning_rate": 6.558657572856786e-07,
+      "loss": 0.5143,
+      "step": 1410
+    },
+    {
+      "epoch": 1.9299025474440077,
+      "grad_norm": 0.3676847517490387,
+      "learning_rate": 6.313754247850523e-07,
+      "loss": 0.611,
+      "step": 1411
+    },
+    {
+      "epoch": 1.9312703026158318,
+      "grad_norm": 0.4552142024040222,
+      "learning_rate": 6.073496264126788e-07,
+      "loss": 0.9821,
+      "step": 1412
+    },
+    {
+      "epoch": 1.9326380577876558,
+      "grad_norm": 0.4049084484577179,
+      "learning_rate": 5.837884744857358e-07,
+      "loss": 0.5781,
+      "step": 1413
+    },
+    {
+      "epoch": 1.9340058129594802,
+      "grad_norm": 0.49279075860977173,
+      "learning_rate": 5.606920791492387e-07,
+      "loss": 0.6507,
+      "step": 1414
+    },
+    {
+      "epoch": 1.9353735681313045,
+      "grad_norm": 0.47832998633384705,
+      "learning_rate": 5.380605483755408e-07,
+      "loss": 0.69,
+      "step": 1415
+    },
+    {
+      "epoch": 1.9367413233031288,
+      "grad_norm": 0.5709347128868103,
+      "learning_rate": 5.158939879638225e-07,
+      "loss": 0.8283,
+      "step": 1416
+    },
+    {
+      "epoch": 1.938109078474953,
+      "grad_norm": 0.44375723600387573,
+      "learning_rate": 4.941925015395699e-07,
+      "loss": 0.7771,
+      "step": 1417
+    },
+    {
+      "epoch": 1.9394768336467774,
+      "grad_norm": 0.48919472098350525,
+      "learning_rate": 4.729561905541524e-07,
+      "loss": 0.7034,
+      "step": 1418
+    },
+    {
+      "epoch": 1.9408445888186014,
+      "grad_norm": 0.38905012607574463,
+      "learning_rate": 4.521851542842681e-07,
+      "loss": 0.6598,
+      "step": 1419
+    },
+    {
+      "epoch": 1.9422123439904257,
+      "grad_norm": 0.4131908714771271,
+      "learning_rate": 4.31879489831577e-07,
+      "loss": 0.7236,
+      "step": 1420
+    },
+    {
+      "epoch": 1.9435800991622498,
+      "grad_norm": 0.37935328483581543,
+      "learning_rate": 4.12039292122135e-07,
+      "loss": 0.6964,
+      "step": 1421
+    },
+    {
+      "epoch": 1.9449478543340741,
+      "grad_norm": 0.3789116442203522,
+      "learning_rate": 3.9266465390603855e-07,
+      "loss": 0.6997,
+      "step": 1422
+    },
+    {
+      "epoch": 1.9463156095058984,
+      "grad_norm": 0.4881550669670105,
+      "learning_rate": 3.7375566575695854e-07,
+      "loss": 0.6896,
+      "step": 1423
+    },
+    {
+      "epoch": 1.9476833646777227,
+      "grad_norm": 0.4398065507411957,
+      "learning_rate": 3.5531241607170695e-07,
+      "loss": 0.6534,
+      "step": 1424
+    },
+    {
+      "epoch": 1.949051119849547,
+      "grad_norm": 0.45195892453193665,
+      "learning_rate": 3.373349910698487e-07,
+      "loss": 0.6845,
+      "step": 1425
+    },
+    {
+      "epoch": 1.9504188750213711,
+      "grad_norm": 0.38856571912765503,
+      "learning_rate": 3.1982347479327935e-07,
+      "loss": 0.6648,
+      "step": 1426
+    },
+    {
+      "epoch": 1.9517866301931954,
+      "grad_norm": 0.5333290100097656,
+      "learning_rate": 3.027779491058369e-07,
+      "loss": 0.8773,
+      "step": 1427
+    },
+    {
+      "epoch": 1.9531543853650195,
+      "grad_norm": 0.42469778656959534,
+      "learning_rate": 2.8619849369290185e-07,
+      "loss": 0.6108,
+      "step": 1428
+    },
+    {
+      "epoch": 1.9545221405368438,
+      "grad_norm": 0.4393293857574463,
+      "learning_rate": 2.7008518606108644e-07,
+      "loss": 0.7355,
+      "step": 1429
+    },
+    {
+      "epoch": 1.9558898957086681,
+      "grad_norm": 0.4575292468070984,
+      "learning_rate": 2.544381015377906e-07,
+      "loss": 0.7924,
+      "step": 1430
+    },
+    {
+      "epoch": 1.9572576508804924,
+      "grad_norm": 0.3630157709121704,
+      "learning_rate": 2.3925731327089086e-07,
+      "loss": 0.4605,
+      "step": 1431
+    },
+    {
+      "epoch": 1.9586254060523167,
+      "grad_norm": 0.48664095997810364,
+      "learning_rate": 2.2454289222842984e-07,
+      "loss": 0.7356,
+      "step": 1432
+    },
+    {
+      "epoch": 1.959993161224141,
+      "grad_norm": 0.49082890152931213,
+      "learning_rate": 2.1029490719819411e-07,
+      "loss": 0.654,
+      "step": 1433
+    },
+    {
+      "epoch": 1.961360916395965,
+      "grad_norm": 0.3563566207885742,
+      "learning_rate": 1.9651342478749223e-07,
+      "loss": 0.5259,
+      "step": 1434
+    },
+    {
+      "epoch": 1.9627286715677894,
+      "grad_norm": 0.4090525805950165,
+      "learning_rate": 1.8319850942278839e-07,
+      "loss": 0.8423,
+      "step": 1435
+    },
+    {
+      "epoch": 1.9640964267396135,
+      "grad_norm": 0.39165446162223816,
+      "learning_rate": 1.7035022334941364e-07,
+      "loss": 0.7934,
+      "step": 1436
+    },
+    {
+      "epoch": 1.9654641819114378,
+      "grad_norm": 0.4389599561691284,
+      "learning_rate": 1.5796862663126634e-07,
+      "loss": 0.5754,
+      "step": 1437
+    },
+    {
+      "epoch": 1.966831937083262,
+      "grad_norm": 0.47216951847076416,
+      "learning_rate": 1.4605377715053436e-07,
+      "loss": 0.789,
+      "step": 1438
+    },
+    {
+      "epoch": 1.9681996922550864,
+      "grad_norm": 0.39848554134368896,
+      "learning_rate": 1.3460573060745106e-07,
+      "loss": 0.7184,
+      "step": 1439
+    },
+    {
+      "epoch": 1.9695674474269107,
+      "grad_norm": 0.550101637840271,
+      "learning_rate": 1.2362454051998428e-07,
+      "loss": 0.7593,
+      "step": 1440
+    },
+    {
+      "epoch": 1.9709352025987348,
+      "grad_norm": 0.5361692905426025,
+      "learning_rate": 1.1311025822364762e-07,
+      "loss": 0.7345,
+      "step": 1441
+    },
+    {
+      "epoch": 1.972302957770559,
+      "grad_norm": 0.4945903420448303,
+      "learning_rate": 1.0306293287118962e-07,
+      "loss": 0.781,
+      "step": 1442
+    },
+    {
+      "epoch": 1.9736707129423832,
+      "grad_norm": 0.36988723278045654,
+      "learning_rate": 9.348261143243831e-08,
+      "loss": 0.7448,
+      "step": 1443
+    },
+    {
+      "epoch": 1.9750384681142075,
+      "grad_norm": 0.38076066970825195,
+      "learning_rate": 8.436933869402363e-08,
+      "loss": 0.4683,
+      "step": 1444
+    },
+    {
+      "epoch": 1.9764062232860318,
+      "grad_norm": 0.4106510281562805,
+      "learning_rate": 7.572315725918877e-08,
+      "loss": 0.9063,
+      "step": 1445
+    },
+    {
+      "epoch": 1.977773978457856,
+      "grad_norm": 0.486651211977005,
+      "learning_rate": 6.754410754759022e-08,
+      "loss": 0.864,
+      "step": 1446
+    },
+    {
+      "epoch": 1.9791417336296804,
+      "grad_norm": 0.4399421215057373,
+      "learning_rate": 5.983222779514242e-08,
+      "loss": 0.7105,
+      "step": 1447
+    },
+    {
+      "epoch": 1.9805094888015047,
+      "grad_norm": 0.3864593803882599,
+      "learning_rate": 5.2587554053751266e-08,
+      "loss": 0.7329,
+      "step": 1448
+    },
+    {
+      "epoch": 1.9818772439733288,
+      "grad_norm": 0.38582947850227356,
+      "learning_rate": 4.581012019125863e-08,
+      "loss": 0.7425,
+      "step": 1449
+    },
+    {
+      "epoch": 1.983244999145153,
+      "grad_norm": 0.37019839882850647,
+      "learning_rate": 3.949995789118699e-08,
+      "loss": 0.5574,
+      "step": 1450
+    },
+    {
+      "epoch": 1.9846127543169771,
+      "grad_norm": 0.44995346665382385,
+      "learning_rate": 3.36570966526395e-08,
+      "loss": 0.6735,
+      "step": 1451
+    },
+    {
+      "epoch": 1.9859805094888014,
+      "grad_norm": 0.4024134874343872,
+      "learning_rate": 2.8281563790166776e-08,
+      "loss": 0.8033,
+      "step": 1452
+    },
+    {
+      "epoch": 1.9873482646606258,
+      "grad_norm": 0.4701332449913025,
+      "learning_rate": 2.337338443361148e-08,
+      "loss": 0.8692,
+      "step": 1453
+    },
+    {
+      "epoch": 1.98871601983245,
+      "grad_norm": 0.4885278046131134,
+      "learning_rate": 1.8932581528008364e-08,
+      "loss": 0.7488,
+      "step": 1454
+    },
+    {
+      "epoch": 1.9900837750042744,
+      "grad_norm": 0.4467014670372009,
+      "learning_rate": 1.4959175833495486e-08,
+      "loss": 0.669,
+      "step": 1455
+    },
+    {
+      "epoch": 1.9914515301760984,
+      "grad_norm": 0.5788629651069641,
+      "learning_rate": 1.1453185925158761e-08,
+      "loss": 0.7926,
+      "step": 1456
+    },
+    {
+      "epoch": 1.9928192853479227,
+      "grad_norm": 0.5242011547088623,
+      "learning_rate": 8.414628192998652e-09,
+      "loss": 0.9303,
+      "step": 1457
+    },
+    {
+      "epoch": 1.9941870405197468,
+      "grad_norm": 0.3865733742713928,
+      "learning_rate": 5.84351684185247e-09,
+      "loss": 0.6104,
+      "step": 1458
+    },
+    {
+      "epoch": 1.9955547956915711,
+      "grad_norm": 0.5221365690231323,
+      "learning_rate": 3.739863891283335e-09,
+      "loss": 0.9627,
+      "step": 1459
+    },
+    {
+      "epoch": 1.9969225508633954,
+      "grad_norm": 0.5138529539108276,
+      "learning_rate": 2.1036791755801866e-09,
+      "loss": 0.613,
+      "step": 1460
+    },
+    {
+      "epoch": 1.9982903060352197,
+      "grad_norm": 0.5094766020774841,
+      "learning_rate": 9.349703436689617e-10,
+      "loss": 0.8272,
+      "step": 1461
+    },
+    {
+      "epoch": 1.999658061207044,
+      "grad_norm": 0.4561411142349243,
+      "learning_rate": 2.337428590903912e-10,
+      "loss": 0.7996,
+      "step": 1462
+    },
+    {
+      "epoch": 2.0010258163788683,
+      "grad_norm": 1.2804397344589233,
+      "learning_rate": 0.0,
+      "loss": 1.2298,
+      "step": 1463
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.6517521711772467e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null