Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:319fb3c0aaa11a67903f0926624e9be499009cb8d2b91326ebe0c533ba723858
 size 313820248

 version https://git-lfs.github.com/spec/v1
+oid sha256:10262938e4f1856798bdc7a65e34f268fd918589b61c2ba1033eba233752ea14
 size 313820248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9aff0cb3a559e774fadc77fe30403a22d6fdf182c77d7a3f809f7501426d7df6
 size 159641092

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ec00fbb720eb697cbdcf859838348dceb8a3278b3b9318908b4cb504e834286
 size 159641092

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ca59ece6bb9b6c964660dfce5318d8f15fdc3a09843266260257f34fdb5abf1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:82338bc22dab7e61863a50f8a871541b83d60fe3013b13e8a41bb2602174a95e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.181919574737549,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.03900916715428126,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 28.454,
       "eval_steps_per_second": 7.117,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.07616338231296e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.1752114295959473,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.05201222287237501,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 28.454,
       "eval_steps_per_second": 7.117,
       "step": 150
+    },
+    {
+      "epoch": 0.03926922826864313,
+      "grad_norm": 2.149697780609131,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 2.0546,
+      "step": 151
+    },
+    {
+      "epoch": 0.039529289383005005,
+      "grad_norm": 2.1155319213867188,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 2.0647,
+      "step": 152
+    },
+    {
+      "epoch": 0.03978935049736688,
+      "grad_norm": 2.1451268196105957,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 2.1496,
+      "step": 153
+    },
+    {
+      "epoch": 0.04004941161172876,
+      "grad_norm": 2.4642882347106934,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 2.0772,
+      "step": 154
+    },
+    {
+      "epoch": 0.04030947272609063,
+      "grad_norm": 2.1916542053222656,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 2.2441,
+      "step": 155
+    },
+    {
+      "epoch": 0.040569533840452504,
+      "grad_norm": 2.1746671199798584,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 2.1504,
+      "step": 156
+    },
+    {
+      "epoch": 0.040829594954814384,
+      "grad_norm": 2.157935857772827,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 2.107,
+      "step": 157
+    },
+    {
+      "epoch": 0.04108965606917626,
+      "grad_norm": 2.062431812286377,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 2.081,
+      "step": 158
+    },
+    {
+      "epoch": 0.04134971718353813,
+      "grad_norm": 2.142822027206421,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 2.0843,
+      "step": 159
+    },
+    {
+      "epoch": 0.0416097782979,
+      "grad_norm": 2.1664884090423584,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 2.0942,
+      "step": 160
+    },
+    {
+      "epoch": 0.04186983941226188,
+      "grad_norm": 2.2922542095184326,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 2.205,
+      "step": 161
+    },
+    {
+      "epoch": 0.042129900526623756,
+      "grad_norm": 2.0822224617004395,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 2.1184,
+      "step": 162
+    },
+    {
+      "epoch": 0.04238996164098563,
+      "grad_norm": 2.0957558155059814,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 2.0645,
+      "step": 163
+    },
+    {
+      "epoch": 0.04265002275534751,
+      "grad_norm": 2.269651174545288,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 2.1873,
+      "step": 164
+    },
+    {
+      "epoch": 0.04291008386970938,
+      "grad_norm": 2.290019989013672,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 2.2227,
+      "step": 165
+    },
+    {
+      "epoch": 0.043170144984071256,
+      "grad_norm": 2.2755532264709473,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 2.2698,
+      "step": 166
+    },
+    {
+      "epoch": 0.04343020609843313,
+      "grad_norm": 2.457045316696167,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 2.1578,
+      "step": 167
+    },
+    {
+      "epoch": 0.04369026721279501,
+      "grad_norm": 2.1705782413482666,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 2.1767,
+      "step": 168
+    },
+    {
+      "epoch": 0.04395032832715688,
+      "grad_norm": 2.231419563293457,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 2.2779,
+      "step": 169
+    },
+    {
+      "epoch": 0.044210389441518755,
+      "grad_norm": 2.3254246711730957,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 2.2079,
+      "step": 170
+    },
+    {
+      "epoch": 0.044470450555880635,
+      "grad_norm": 2.2333338260650635,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 2.1548,
+      "step": 171
+    },
+    {
+      "epoch": 0.04473051167024251,
+      "grad_norm": 2.515716791152954,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 2.1873,
+      "step": 172
+    },
+    {
+      "epoch": 0.04499057278460438,
+      "grad_norm": 2.3713366985321045,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 2.1824,
+      "step": 173
+    },
+    {
+      "epoch": 0.045250633898966254,
+      "grad_norm": 2.3272628784179688,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 2.212,
+      "step": 174
+    },
+    {
+      "epoch": 0.045510695013328134,
+      "grad_norm": 2.1713898181915283,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 2.127,
+      "step": 175
+    },
+    {
+      "epoch": 0.04577075612769001,
+      "grad_norm": 2.2558517456054688,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 2.2112,
+      "step": 176
+    },
+    {
+      "epoch": 0.04603081724205188,
+      "grad_norm": 2.3417723178863525,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 2.0578,
+      "step": 177
+    },
+    {
+      "epoch": 0.04629087835641376,
+      "grad_norm": 2.1906392574310303,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 2.2454,
+      "step": 178
+    },
+    {
+      "epoch": 0.04655093947077563,
+      "grad_norm": 2.1303303241729736,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 1.9599,
+      "step": 179
+    },
+    {
+      "epoch": 0.046811000585137506,
+      "grad_norm": 2.1788878440856934,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 2.022,
+      "step": 180
+    },
+    {
+      "epoch": 0.04707106169949938,
+      "grad_norm": 2.2112619876861572,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 2.2099,
+      "step": 181
+    },
+    {
+      "epoch": 0.04733112281386126,
+      "grad_norm": 2.2090134620666504,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 2.1532,
+      "step": 182
+    },
+    {
+      "epoch": 0.04759118392822313,
+      "grad_norm": 2.212982416152954,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 2.1756,
+      "step": 183
+    },
+    {
+      "epoch": 0.047851245042585006,
+      "grad_norm": 2.3446829319000244,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 2.2293,
+      "step": 184
+    },
+    {
+      "epoch": 0.048111306156946886,
+      "grad_norm": 2.1704304218292236,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.9746,
+      "step": 185
+    },
+    {
+      "epoch": 0.04837136727130876,
+      "grad_norm": 2.264618158340454,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 2.2944,
+      "step": 186
+    },
+    {
+      "epoch": 0.04863142838567063,
+      "grad_norm": 2.402517080307007,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 2.1472,
+      "step": 187
+    },
+    {
+      "epoch": 0.048891489500032505,
+      "grad_norm": 2.2531824111938477,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 2.0401,
+      "step": 188
+    },
+    {
+      "epoch": 0.049151550614394385,
+      "grad_norm": 2.224325656890869,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 2.0878,
+      "step": 189
+    },
+    {
+      "epoch": 0.04941161172875626,
+      "grad_norm": 2.3533246517181396,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 2.0941,
+      "step": 190
+    },
+    {
+      "epoch": 0.04967167284311813,
+      "grad_norm": 2.397167921066284,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 2.1501,
+      "step": 191
+    },
+    {
+      "epoch": 0.04993173395748001,
+      "grad_norm": 2.233166456222534,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 2.0635,
+      "step": 192
+    },
+    {
+      "epoch": 0.050191795071841884,
+      "grad_norm": 2.260296583175659,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 2.0526,
+      "step": 193
+    },
+    {
+      "epoch": 0.05045185618620376,
+      "grad_norm": 2.3124306201934814,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 2.136,
+      "step": 194
+    },
+    {
+      "epoch": 0.05071191730056563,
+      "grad_norm": 2.3403406143188477,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 2.1909,
+      "step": 195
+    },
+    {
+      "epoch": 0.05097197841492751,
+      "grad_norm": 2.357811212539673,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 2.2516,
+      "step": 196
+    },
+    {
+      "epoch": 0.05123203952928938,
+      "grad_norm": 2.270812511444092,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 2.1596,
+      "step": 197
+    },
+    {
+      "epoch": 0.051492100643651256,
+      "grad_norm": 2.2893905639648438,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 2.1138,
+      "step": 198
+    },
+    {
+      "epoch": 0.051752161758013136,
+      "grad_norm": 2.342320442199707,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 2.3086,
+      "step": 199
+    },
+    {
+      "epoch": 0.05201222287237501,
+      "grad_norm": 2.4791676998138428,
+      "learning_rate": 0.0,
+      "loss": 2.4156,
+      "step": 200
+    },
+    {
+      "epoch": 0.05201222287237501,
+      "eval_loss": 2.1752114295959473,
+      "eval_runtime": 227.536,
+      "eval_samples_per_second": 28.466,
+      "eval_steps_per_second": 7.12,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.10155117641728e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null