Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22c6b3e3c851eace2649aace03a5b121a170c8977350dcd6665778a579a61dd6
 size 313820248

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bd09df8b25ea690336a9922d71544ae580c0eb08ee3270dfc982db56860688d
 size 313820248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7780f221aaa86fafc97811de5c9ed203493d4264fd41b7e4bcecde6ee2c0c18d
 size 627785018

 version https://git-lfs.github.com/spec/v1
+oid sha256:688bf0ba98a4ff80378bb6c915cb69357f03e966f1acea4f0b1a855e1bdb37c8
 size 627785018

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd0cf55dfa4a6aa2e7e0cd8395d24cb80e5f56c5925c8f467380805097d9eef7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2403f79b8e3200cacc9cf4f4901eb9cf443b2d6b483fa76094c3c74ad857ebf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.707120180130005,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.49875311720698257,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 25.582,
       "eval_steps_per_second": 12.816,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.187559710988698e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.6704180240631104,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.6650041562759768,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 25.582,
       "eval_steps_per_second": 12.816,
       "step": 150
+    },
+    {
+      "epoch": 0.5020781379883624,
+      "grad_norm": 1.2132184505462646,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 2.2669,
+      "step": 151
+    },
+    {
+      "epoch": 0.5054031587697423,
+      "grad_norm": 1.2493829727172852,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 1.9384,
+      "step": 152
+    },
+    {
+      "epoch": 0.5087281795511222,
+      "grad_norm": 1.2181565761566162,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 1.6402,
+      "step": 153
+    },
+    {
+      "epoch": 0.5120532003325021,
+      "grad_norm": 1.399489402770996,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 1.9986,
+      "step": 154
+    },
+    {
+      "epoch": 0.515378221113882,
+      "grad_norm": 1.311328411102295,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 1.5615,
+      "step": 155
+    },
+    {
+      "epoch": 0.5187032418952618,
+      "grad_norm": 1.1772243976593018,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 1.5309,
+      "step": 156
+    },
+    {
+      "epoch": 0.5220282626766417,
+      "grad_norm": 1.1859638690948486,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 1.9999,
+      "step": 157
+    },
+    {
+      "epoch": 0.5253532834580216,
+      "grad_norm": 1.4204869270324707,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 2.0745,
+      "step": 158
+    },
+    {
+      "epoch": 0.5286783042394015,
+      "grad_norm": 1.552939772605896,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 2.3328,
+      "step": 159
+    },
+    {
+      "epoch": 0.5320033250207814,
+      "grad_norm": 1.430346965789795,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 2.1166,
+      "step": 160
+    },
+    {
+      "epoch": 0.5353283458021613,
+      "grad_norm": 1.476499319076538,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 2.2905,
+      "step": 161
+    },
+    {
+      "epoch": 0.5386533665835411,
+      "grad_norm": 1.6628141403198242,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 2.4906,
+      "step": 162
+    },
+    {
+      "epoch": 0.541978387364921,
+      "grad_norm": 1.6934216022491455,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 2.2191,
+      "step": 163
+    },
+    {
+      "epoch": 0.545303408146301,
+      "grad_norm": 1.7026147842407227,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 2.1326,
+      "step": 164
+    },
+    {
+      "epoch": 0.5486284289276808,
+      "grad_norm": 1.9999020099639893,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 2.2275,
+      "step": 165
+    },
+    {
+      "epoch": 0.5519534497090607,
+      "grad_norm": 1.8090580701828003,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 2.2169,
+      "step": 166
+    },
+    {
+      "epoch": 0.5552784704904405,
+      "grad_norm": 1.9155068397521973,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 2.4614,
+      "step": 167
+    },
+    {
+      "epoch": 0.5586034912718204,
+      "grad_norm": 1.7961502075195312,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 2.3737,
+      "step": 168
+    },
+    {
+      "epoch": 0.5619285120532004,
+      "grad_norm": 1.702222228050232,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 2.4067,
+      "step": 169
+    },
+    {
+      "epoch": 0.5652535328345802,
+      "grad_norm": 1.8775525093078613,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 2.5256,
+      "step": 170
+    },
+    {
+      "epoch": 0.5685785536159601,
+      "grad_norm": 1.8795543909072876,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 2.4253,
+      "step": 171
+    },
+    {
+      "epoch": 0.57190357439734,
+      "grad_norm": 1.8501660823822021,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 2.593,
+      "step": 172
+    },
+    {
+      "epoch": 0.5752285951787198,
+      "grad_norm": 1.9064832925796509,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 2.5077,
+      "step": 173
+    },
+    {
+      "epoch": 0.5785536159600998,
+      "grad_norm": 2.1155214309692383,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 2.7283,
+      "step": 174
+    },
+    {
+      "epoch": 0.5818786367414797,
+      "grad_norm": 2.1804487705230713,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 2.8406,
+      "step": 175
+    },
+    {
+      "epoch": 0.5852036575228595,
+      "grad_norm": 2.1474153995513916,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 2.5451,
+      "step": 176
+    },
+    {
+      "epoch": 0.5885286783042394,
+      "grad_norm": 2.4907212257385254,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 2.9744,
+      "step": 177
+    },
+    {
+      "epoch": 0.5918536990856192,
+      "grad_norm": 2.397883415222168,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 2.7055,
+      "step": 178
+    },
+    {
+      "epoch": 0.5951787198669992,
+      "grad_norm": 2.78507137298584,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 2.8479,
+      "step": 179
+    },
+    {
+      "epoch": 0.5985037406483791,
+      "grad_norm": 2.816236734390259,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 2.7895,
+      "step": 180
+    },
+    {
+      "epoch": 0.6018287614297589,
+      "grad_norm": 2.9630537033081055,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 3.0076,
+      "step": 181
+    },
+    {
+      "epoch": 0.6051537822111388,
+      "grad_norm": 2.715308904647827,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 2.5574,
+      "step": 182
+    },
+    {
+      "epoch": 0.6084788029925187,
+      "grad_norm": 3.2204487323760986,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 2.8902,
+      "step": 183
+    },
+    {
+      "epoch": 0.6118038237738986,
+      "grad_norm": 3.073136329650879,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 2.8232,
+      "step": 184
+    },
+    {
+      "epoch": 0.6151288445552785,
+      "grad_norm": 3.2657856941223145,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 3.029,
+      "step": 185
+    },
+    {
+      "epoch": 0.6184538653366584,
+      "grad_norm": 3.361992835998535,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 2.9692,
+      "step": 186
+    },
+    {
+      "epoch": 0.6217788861180382,
+      "grad_norm": 3.456545114517212,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 3.0761,
+      "step": 187
+    },
+    {
+      "epoch": 0.6251039068994181,
+      "grad_norm": 3.416513442993164,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 2.9308,
+      "step": 188
+    },
+    {
+      "epoch": 0.628428927680798,
+      "grad_norm": 3.8827366828918457,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 3.3537,
+      "step": 189
+    },
+    {
+      "epoch": 0.6317539484621779,
+      "grad_norm": 3.5609307289123535,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 3.0339,
+      "step": 190
+    },
+    {
+      "epoch": 0.6350789692435578,
+      "grad_norm": 3.7422616481781006,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 3.2621,
+      "step": 191
+    },
+    {
+      "epoch": 0.6384039900249376,
+      "grad_norm": 4.707045078277588,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 3.3281,
+      "step": 192
+    },
+    {
+      "epoch": 0.6417290108063175,
+      "grad_norm": 3.956191301345825,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 3.1874,
+      "step": 193
+    },
+    {
+      "epoch": 0.6450540315876975,
+      "grad_norm": 4.328230857849121,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 3.3214,
+      "step": 194
+    },
+    {
+      "epoch": 0.6483790523690773,
+      "grad_norm": 4.496963977813721,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 3.205,
+      "step": 195
+    },
+    {
+      "epoch": 0.6517040731504572,
+      "grad_norm": 4.632934093475342,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 3.5702,
+      "step": 196
+    },
+    {
+      "epoch": 0.6550290939318371,
+      "grad_norm": 5.255158424377441,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 3.499,
+      "step": 197
+    },
+    {
+      "epoch": 0.6583541147132169,
+      "grad_norm": 5.30635404586792,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 3.5308,
+      "step": 198
+    },
+    {
+      "epoch": 0.6616791354945969,
+      "grad_norm": 6.519288063049316,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 3.8701,
+      "step": 199
+    },
+    {
+      "epoch": 0.6650041562759768,
+      "grad_norm": 8.301348686218262,
+      "learning_rate": 0.0,
+      "loss": 4.3302,
+      "step": 200
+    },
+    {
+      "epoch": 0.6650041562759768,
+      "eval_loss": 2.6704180240631104,
+      "eval_runtime": 19.8665,
+      "eval_samples_per_second": 25.52,
+      "eval_steps_per_second": 12.785,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.243328322004582e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null