Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c27948d93ceb1d068247f200b0fa61d54d3353218932311845053fbe376f41af
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:06aa34013a392528d6af9dcc0653d68adce7c12fa33450107d81b47358b1043e
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8b218f014b8acddf4408dc445dcfbd8758282239478979cd876efbf45391934
 size 71878612

 version https://git-lfs.github.com/spec/v1
+oid sha256:029e4e5b2ea5f26c667a7dcddc8c0a86e7f50d4debb59724ce0366b6e783b456
 size 71878612

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa072540e00ec6592118bb61eaedf81fe5019c743d59ac4ed5c04351de7901aa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:357a229135d2d65f5983fec29464ec04086ca052cab7c02645af53c05169b119
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7871350049972534,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.04032529067813697,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 42.489,
       "eval_steps_per_second": 10.627,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.6210491900690432e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7871350049972534,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.05376705423751596,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 42.489,
       "eval_steps_per_second": 10.627,
       "step": 150
+    },
+    {
+      "epoch": 0.04059412594932455,
+      "grad_norm": 1.0386604070663452,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 0.8937,
+      "step": 151
+    },
+    {
+      "epoch": 0.04086296122051213,
+      "grad_norm": 0.6060370206832886,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 0.9488,
+      "step": 152
+    },
+    {
+      "epoch": 0.041131796491699714,
+      "grad_norm": 0.4730580747127533,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 1.0604,
+      "step": 153
+    },
+    {
+      "epoch": 0.04140063176288729,
+      "grad_norm": 0.5737183690071106,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 1.0372,
+      "step": 154
+    },
+    {
+      "epoch": 0.04166946703407487,
+      "grad_norm": 0.48760518431663513,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 0.9442,
+      "step": 155
+    },
+    {
+      "epoch": 0.04193830230526245,
+      "grad_norm": 0.4955401122570038,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 0.8898,
+      "step": 156
+    },
+    {
+      "epoch": 0.04220713757645003,
+      "grad_norm": 0.5258156657218933,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 0.9747,
+      "step": 157
+    },
+    {
+      "epoch": 0.04247597284763761,
+      "grad_norm": 0.5105649828910828,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 0.8317,
+      "step": 158
+    },
+    {
+      "epoch": 0.04274480811882519,
+      "grad_norm": 0.5645738840103149,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 0.826,
+      "step": 159
+    },
+    {
+      "epoch": 0.04301364339001277,
+      "grad_norm": 0.6887422204017639,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 0.8181,
+      "step": 160
+    },
+    {
+      "epoch": 0.04328247866120035,
+      "grad_norm": 0.6888102293014526,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 0.8185,
+      "step": 161
+    },
+    {
+      "epoch": 0.04355131393238793,
+      "grad_norm": 0.8524068593978882,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.964,
+      "step": 162
+    },
+    {
+      "epoch": 0.043820149203575506,
+      "grad_norm": 1.0922106504440308,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 0.9402,
+      "step": 163
+    },
+    {
+      "epoch": 0.04408898447476309,
+      "grad_norm": 1.101603627204895,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 0.8673,
+      "step": 164
+    },
+    {
+      "epoch": 0.04435781974595067,
+      "grad_norm": 1.0851558446884155,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 0.7081,
+      "step": 165
+    },
+    {
+      "epoch": 0.04462665501713825,
+      "grad_norm": 1.3626710176467896,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 0.8184,
+      "step": 166
+    },
+    {
+      "epoch": 0.04489549028832583,
+      "grad_norm": 1.892257809638977,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 0.8043,
+      "step": 167
+    },
+    {
+      "epoch": 0.045164325559513406,
+      "grad_norm": 2.0069141387939453,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 1.005,
+      "step": 168
+    },
+    {
+      "epoch": 0.04543316083070099,
+      "grad_norm": 2.4379069805145264,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 0.8454,
+      "step": 169
+    },
+    {
+      "epoch": 0.04570199610188857,
+      "grad_norm": 2.227325439453125,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 1.6294,
+      "step": 170
+    },
+    {
+      "epoch": 0.04597083137307615,
+      "grad_norm": 1.0205013751983643,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 0.9245,
+      "step": 171
+    },
+    {
+      "epoch": 0.046239666644263724,
+      "grad_norm": 0.969859778881073,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 0.7725,
+      "step": 172
+    },
+    {
+      "epoch": 0.046508501915451306,
+      "grad_norm": 1.1012479066848755,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 0.6842,
+      "step": 173
+    },
+    {
+      "epoch": 0.04677733718663889,
+      "grad_norm": 1.1155883073806763,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 1.0141,
+      "step": 174
+    },
+    {
+      "epoch": 0.04704617245782647,
+      "grad_norm": 1.2785975933074951,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 0.7848,
+      "step": 175
+    },
+    {
+      "epoch": 0.04731500772901405,
+      "grad_norm": 1.0930967330932617,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 0.546,
+      "step": 176
+    },
+    {
+      "epoch": 0.047583843000201624,
+      "grad_norm": 1.4280908107757568,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 0.7626,
+      "step": 177
+    },
+    {
+      "epoch": 0.047852678271389205,
+      "grad_norm": 1.3137999773025513,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 0.87,
+      "step": 178
+    },
+    {
+      "epoch": 0.04812151354257679,
+      "grad_norm": 1.2226773500442505,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 0.6399,
+      "step": 179
+    },
+    {
+      "epoch": 0.04839034881376437,
+      "grad_norm": 1.2721233367919922,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 0.5023,
+      "step": 180
+    },
+    {
+      "epoch": 0.04865918408495194,
+      "grad_norm": 1.656267762184143,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.0463,
+      "step": 181
+    },
+    {
+      "epoch": 0.048928019356139524,
+      "grad_norm": 1.5332951545715332,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 0.7921,
+      "step": 182
+    },
+    {
+      "epoch": 0.049196854627327105,
+      "grad_norm": 1.7572109699249268,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 0.9806,
+      "step": 183
+    },
+    {
+      "epoch": 0.049465689898514686,
+      "grad_norm": 2.0103704929351807,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 0.6089,
+      "step": 184
+    },
+    {
+      "epoch": 0.04973452516970227,
+      "grad_norm": 1.6781842708587646,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 0.5944,
+      "step": 185
+    },
+    {
+      "epoch": 0.05000336044088984,
+      "grad_norm": 1.9478318691253662,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 0.642,
+      "step": 186
+    },
+    {
+      "epoch": 0.050272195712077423,
+      "grad_norm": 2.606834650039673,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 0.9156,
+      "step": 187
+    },
+    {
+      "epoch": 0.050541030983265005,
+      "grad_norm": 2.366358518600464,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 0.6252,
+      "step": 188
+    },
+    {
+      "epoch": 0.050809866254452586,
+      "grad_norm": 2.2159924507141113,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 0.7652,
+      "step": 189
+    },
+    {
+      "epoch": 0.05107870152564016,
+      "grad_norm": 1.9476014375686646,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 0.9363,
+      "step": 190
+    },
+    {
+      "epoch": 0.05134753679682774,
+      "grad_norm": 1.9521092176437378,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 0.9807,
+      "step": 191
+    },
+    {
+      "epoch": 0.05161637206801532,
+      "grad_norm": 2.0122740268707275,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 0.5521,
+      "step": 192
+    },
+    {
+      "epoch": 0.051885207339202905,
+      "grad_norm": 2.72161602973938,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 0.7826,
+      "step": 193
+    },
+    {
+      "epoch": 0.052154042610390486,
+      "grad_norm": 4.295709133148193,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 1.1679,
+      "step": 194
+    },
+    {
+      "epoch": 0.05242287788157806,
+      "grad_norm": 2.841387987136841,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 0.46,
+      "step": 195
+    },
+    {
+      "epoch": 0.05269171315276564,
+      "grad_norm": 2.1911866664886475,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 0.4877,
+      "step": 196
+    },
+    {
+      "epoch": 0.05296054842395322,
+      "grad_norm": 2.492327928543091,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 0.6648,
+      "step": 197
+    },
+    {
+      "epoch": 0.053229383695140804,
+      "grad_norm": 2.643150806427002,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 0.3063,
+      "step": 198
+    },
+    {
+      "epoch": 0.053498218966328386,
+      "grad_norm": 2.426671266555786,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 0.6598,
+      "step": 199
+    },
+    {
+      "epoch": 0.05376705423751596,
+      "grad_norm": 3.6637401580810547,
+      "learning_rate": 0.0,
+      "loss": 0.6994,
+      "step": 200
+    },
+    {
+      "epoch": 0.05376705423751596,
+      "eval_loss": 0.7902288436889648,
+      "eval_runtime": 147.8262,
+      "eval_samples_per_second": 42.381,
+      "eval_steps_per_second": 10.6,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.162042960294707e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null