Training in progress, step 224, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +172 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fda2e763cacbf5384f32a5a88520cf1602185a56feea8f48caf776e6426b9b7
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:606d4ed2867ba0bf76578e3a2d28b7f5157628c4e08ad9b922fc1e50be900dbd
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e3f09930179102320a21c6f9abea0e1cdb2e1ab1919021f171f803d82927b0c
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:3dc075e2ca009ca108b4be5ee95b4d150b2366246e5ba577b39e69328ab03c48
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:584af298efc3b2c3d46a2cfe15d836a3398932f3cab868e94c719298ccae2d97
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8941cfccbcfcdf233d1937f7300fd6c7036057879aed68046c10cd5ede20934
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23ea67c3a25ae1d1c5130f8fae80127b447d72c85253a82322b78b264d40857f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:93fe870d91bc8391b278b0e9eb02bd97d2a13abf3d921d0d4fefc96fbe029409
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.3149925470352173,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.8948545861297539,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1431,6 +1431,174 @@
       "eval_samples_per_second": 11.415,
       "eval_steps_per_second": 2.876,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1454,12 +1622,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.4092406879656346e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.3149925470352173,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.0022371364653244,
   "eval_steps": 100,
+  "global_step": 224,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.415,
       "eval_steps_per_second": 2.876,
       "step": 200
+    },
+    {
+      "epoch": 0.8993288590604027,
+      "grad_norm": 0.3248676657676697,
+      "learning_rate": 3.1037725843711062e-06,
+      "loss": 1.5013,
+      "step": 201
+    },
+    {
+      "epoch": 0.9038031319910514,
+      "grad_norm": 0.3875062167644501,
+      "learning_rate": 2.842278276436128e-06,
+      "loss": 2.0208,
+      "step": 202
+    },
+    {
+      "epoch": 0.9082774049217002,
+      "grad_norm": 0.36224886775016785,
+      "learning_rate": 2.591967620451707e-06,
+      "loss": 1.9816,
+      "step": 203
+    },
+    {
+      "epoch": 0.912751677852349,
+      "grad_norm": 0.30799493193626404,
+      "learning_rate": 2.3528999786421756e-06,
+      "loss": 1.4076,
+      "step": 204
+    },
+    {
+      "epoch": 0.9172259507829977,
+      "grad_norm": 0.2949186861515045,
+      "learning_rate": 2.1251320469037827e-06,
+      "loss": 1.1052,
+      "step": 205
+    },
+    {
+      "epoch": 0.9217002237136466,
+      "grad_norm": 0.27485114336013794,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 1.04,
+      "step": 206
+    },
+    {
+      "epoch": 0.9261744966442953,
+      "grad_norm": 0.30447426438331604,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.1437,
+      "step": 207
+    },
+    {
+      "epoch": 0.930648769574944,
+      "grad_norm": 0.3369496166706085,
+      "learning_rate": 1.5101531982495308e-06,
+      "loss": 1.3196,
+      "step": 208
+    },
+    {
+      "epoch": 0.9351230425055929,
+      "grad_norm": 0.308193564414978,
+      "learning_rate": 1.328097281965357e-06,
+      "loss": 0.899,
+      "step": 209
+    },
+    {
+      "epoch": 0.9395973154362416,
+      "grad_norm": 0.31419670581817627,
+      "learning_rate": 1.157584112019966e-06,
+      "loss": 1.0979,
+      "step": 210
+    },
+    {
+      "epoch": 0.9440715883668904,
+      "grad_norm": 0.29612240195274353,
+      "learning_rate": 9.986541263284077e-07,
+      "loss": 0.8449,
+      "step": 211
+    },
+    {
+      "epoch": 0.9485458612975392,
+      "grad_norm": 0.3415446877479553,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 1.0074,
+      "step": 212
+    },
+    {
+      "epoch": 0.9530201342281879,
+      "grad_norm": 0.3255971670150757,
+      "learning_rate": 7.156917154243048e-07,
+      "loss": 1.017,
+      "step": 213
+    },
+    {
+      "epoch": 0.9574944071588367,
+      "grad_norm": 0.3114960491657257,
+      "learning_rate": 5.917263959370312e-07,
+      "loss": 0.8636,
+      "step": 214
+    },
+    {
+      "epoch": 0.9619686800894854,
+      "grad_norm": 0.3541721999645233,
+      "learning_rate": 4.794784562397458e-07,
+      "loss": 0.9719,
+      "step": 215
+    },
+    {
+      "epoch": 0.9664429530201343,
+      "grad_norm": 0.37471628189086914,
+      "learning_rate": 3.7897451640321323e-07,
+      "loss": 1.1838,
+      "step": 216
+    },
+    {
+      "epoch": 0.970917225950783,
+      "grad_norm": 0.3450222611427307,
+      "learning_rate": 2.902384113592782e-07,
+      "loss": 0.964,
+      "step": 217
+    },
+    {
+      "epoch": 0.9753914988814317,
+      "grad_norm": 0.41780760884284973,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 1.3496,
+      "step": 218
+    },
+    {
+      "epoch": 0.9798657718120806,
+      "grad_norm": 0.3904644548892975,
+      "learning_rate": 1.481510864283553e-07,
+      "loss": 1.0503,
+      "step": 219
+    },
+    {
+      "epoch": 0.9843400447427293,
+      "grad_norm": 0.441577672958374,
+      "learning_rate": 9.483356314779479e-08,
+      "loss": 1.2227,
+      "step": 220
+    },
+    {
+      "epoch": 0.9888143176733781,
+      "grad_norm": 0.48042798042297363,
+      "learning_rate": 5.3351259881379014e-08,
+      "loss": 1.2312,
+      "step": 221
+    },
+    {
+      "epoch": 0.9932885906040269,
+      "grad_norm": 0.5117903351783752,
+      "learning_rate": 2.371401433170495e-08,
+      "loss": 1.3979,
+      "step": 222
+    },
+    {
+      "epoch": 0.9977628635346756,
+      "grad_norm": 0.636166512966156,
+      "learning_rate": 5.928855096154484e-09,
+      "loss": 1.6438,
+      "step": 223
+    },
+    {
+      "epoch": 1.0022371364653244,
+      "grad_norm": 1.6414198875427246,
+      "learning_rate": 0.0,
+      "loss": 2.9316,
+      "step": 224
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.81795358570709e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null