Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28bd21acbc00b1e472dd68c2ebc2d79e866f310791bed0d4d9f32755069d1e5c
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9b6ceccd1873798abdaf48d168201c2d9819a24adffda1c2498263320a0411b
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7df159fcb13b0ec0710ddb4f4a400656a626f031c385e64f44746470db43c29d
 size 71878612

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a25be7b6627adae88a85d25337b67cc532f9682484af4829f4b8a6aa5ae241f
 size 71878612

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fab15fe3381359f3c7c3ef3c9d3e1a887eb469ca9e756a376245f940f408f683
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae2a148c4195870457fdaf4d653e7b1709f2c1d0330488792c6add6d72face1d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.17842844128608704,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.01005294551303532,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 42.827,
       "eval_steps_per_second": 10.708,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.6461667579723776e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.16398899257183075,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.01340392735071376,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 42.827,
       "eval_steps_per_second": 10.708,
       "step": 150
+    },
+    {
+      "epoch": 0.010119965149788888,
+      "grad_norm": 0.591659426689148,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 0.1871,
+      "step": 151
+    },
+    {
+      "epoch": 0.010186984786542458,
+      "grad_norm": 0.46691226959228516,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 0.1496,
+      "step": 152
+    },
+    {
+      "epoch": 0.010254004423296026,
+      "grad_norm": 0.47545602917671204,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 0.1978,
+      "step": 153
+    },
+    {
+      "epoch": 0.010321024060049595,
+      "grad_norm": 0.3175927698612213,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 0.124,
+      "step": 154
+    },
+    {
+      "epoch": 0.010388043696803163,
+      "grad_norm": 0.3581377863883972,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 0.1057,
+      "step": 155
+    },
+    {
+      "epoch": 0.010455063333556731,
+      "grad_norm": 0.41008394956588745,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 0.1132,
+      "step": 156
+    },
+    {
+      "epoch": 0.010522082970310301,
+      "grad_norm": 0.34069183468818665,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 0.073,
+      "step": 157
+    },
+    {
+      "epoch": 0.01058910260706387,
+      "grad_norm": 0.31148192286491394,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 0.078,
+      "step": 158
+    },
+    {
+      "epoch": 0.010656122243817439,
+      "grad_norm": 0.34437206387519836,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 0.1078,
+      "step": 159
+    },
+    {
+      "epoch": 0.010723141880571007,
+      "grad_norm": 0.21956300735473633,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 0.1135,
+      "step": 160
+    },
+    {
+      "epoch": 0.010790161517324577,
+      "grad_norm": 0.17520062625408173,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 0.1016,
+      "step": 161
+    },
+    {
+      "epoch": 0.010857181154078145,
+      "grad_norm": 0.2024766057729721,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.0934,
+      "step": 162
+    },
+    {
+      "epoch": 0.010924200790831713,
+      "grad_norm": 0.16580188274383545,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 0.0888,
+      "step": 163
+    },
+    {
+      "epoch": 0.010991220427585283,
+      "grad_norm": 0.21035680174827576,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 0.1323,
+      "step": 164
+    },
+    {
+      "epoch": 0.01105824006433885,
+      "grad_norm": 0.20366999506950378,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 0.1069,
+      "step": 165
+    },
+    {
+      "epoch": 0.01112525970109242,
+      "grad_norm": 0.21103376150131226,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 0.1359,
+      "step": 166
+    },
+    {
+      "epoch": 0.011192279337845988,
+      "grad_norm": 0.19466574490070343,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 0.084,
+      "step": 167
+    },
+    {
+      "epoch": 0.011259298974599558,
+      "grad_norm": 0.2798488140106201,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 0.1039,
+      "step": 168
+    },
+    {
+      "epoch": 0.011326318611353126,
+      "grad_norm": 0.18003518879413605,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 0.0671,
+      "step": 169
+    },
+    {
+      "epoch": 0.011393338248106696,
+      "grad_norm": 0.26317933201789856,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 0.072,
+      "step": 170
+    },
+    {
+      "epoch": 0.011460357884860264,
+      "grad_norm": 0.2690606117248535,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 0.0974,
+      "step": 171
+    },
+    {
+      "epoch": 0.011527377521613832,
+      "grad_norm": 0.310910701751709,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 0.0823,
+      "step": 172
+    },
+    {
+      "epoch": 0.011594397158367402,
+      "grad_norm": 0.6066146492958069,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 0.111,
+      "step": 173
+    },
+    {
+      "epoch": 0.01166141679512097,
+      "grad_norm": 0.3648902177810669,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 0.104,
+      "step": 174
+    },
+    {
+      "epoch": 0.01172843643187454,
+      "grad_norm": 0.4662618339061737,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 0.1277,
+      "step": 175
+    },
+    {
+      "epoch": 0.011795456068628108,
+      "grad_norm": 0.4023761749267578,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 0.1068,
+      "step": 176
+    },
+    {
+      "epoch": 0.011862475705381677,
+      "grad_norm": 0.6577183604240417,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 0.1718,
+      "step": 177
+    },
+    {
+      "epoch": 0.011929495342135246,
+      "grad_norm": 0.8494563698768616,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 0.145,
+      "step": 178
+    },
+    {
+      "epoch": 0.011996514978888815,
+      "grad_norm": 0.7966896891593933,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 0.1644,
+      "step": 179
+    },
+    {
+      "epoch": 0.012063534615642383,
+      "grad_norm": 1.1843880414962769,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 0.2375,
+      "step": 180
+    },
+    {
+      "epoch": 0.012130554252395951,
+      "grad_norm": 0.8674980998039246,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.1877,
+      "step": 181
+    },
+    {
+      "epoch": 0.012197573889149521,
+      "grad_norm": 0.6157689094543457,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 0.1449,
+      "step": 182
+    },
+    {
+      "epoch": 0.01226459352590309,
+      "grad_norm": 0.4933188557624817,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 0.1689,
+      "step": 183
+    },
+    {
+      "epoch": 0.012331613162656659,
+      "grad_norm": 0.7543010711669922,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 0.1883,
+      "step": 184
+    },
+    {
+      "epoch": 0.012398632799410227,
+      "grad_norm": 0.6339919567108154,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 0.2073,
+      "step": 185
+    },
+    {
+      "epoch": 0.012465652436163797,
+      "grad_norm": 0.6074450612068176,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 0.1771,
+      "step": 186
+    },
+    {
+      "epoch": 0.012532672072917365,
+      "grad_norm": 0.7933152318000793,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 0.1976,
+      "step": 187
+    },
+    {
+      "epoch": 0.012599691709670933,
+      "grad_norm": 0.47381430864334106,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 0.1852,
+      "step": 188
+    },
+    {
+      "epoch": 0.012666711346424503,
+      "grad_norm": 0.5671533942222595,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 0.1633,
+      "step": 189
+    },
+    {
+      "epoch": 0.01273373098317807,
+      "grad_norm": 0.7143679261207581,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 0.1901,
+      "step": 190
+    },
+    {
+      "epoch": 0.01280075061993164,
+      "grad_norm": 0.8540722727775574,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 0.2312,
+      "step": 191
+    },
+    {
+      "epoch": 0.012867770256685208,
+      "grad_norm": 0.6961427927017212,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 0.1761,
+      "step": 192
+    },
+    {
+      "epoch": 0.012934789893438778,
+      "grad_norm": 0.893753707408905,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 0.1982,
+      "step": 193
+    },
+    {
+      "epoch": 0.013001809530192346,
+      "grad_norm": 0.9567275047302246,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 0.2465,
+      "step": 194
+    },
+    {
+      "epoch": 0.013068829166945916,
+      "grad_norm": 1.6597427129745483,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 0.3058,
+      "step": 195
+    },
+    {
+      "epoch": 0.013135848803699484,
+      "grad_norm": 2.6701152324676514,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 0.3691,
+      "step": 196
+    },
+    {
+      "epoch": 0.013202868440453052,
+      "grad_norm": 3.0059423446655273,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 0.3061,
+      "step": 197
+    },
+    {
+      "epoch": 0.013269888077206622,
+      "grad_norm": 2.072479724884033,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 0.2964,
+      "step": 198
+    },
+    {
+      "epoch": 0.01333690771396019,
+      "grad_norm": 1.9728002548217773,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 0.2754,
+      "step": 199
+    },
+    {
+      "epoch": 0.01340392735071376,
+      "grad_norm": 5.895201206207275,
+      "learning_rate": 0.0,
+      "loss": 0.5173,
+      "step": 200
+    },
+    {
+      "epoch": 0.01340392735071376,
+      "eval_loss": 0.16398899257183075,
+      "eval_runtime": 586.2268,
+      "eval_samples_per_second": 42.867,
+      "eval_steps_per_second": 10.718,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.191024769413939e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null