Training in progress, step 53, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b401a8996e1094a3ea616fc7f7226bdf80cd30ccb2ddbd87762a19b280fd0f39
 size 121155320

 version https://git-lfs.github.com/spec/v1
+oid sha256:347755715d6412ed461b3e24f4c30cbc103765c379964a87826818d71b9ae1e7
 size 121155320

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd67a688192c0222264a6c41cb51fae902e274ef2e310a9285bad9253a74207d
 size 61896852

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bc8468acda25a8a77b68e9e04e9855d429892d8a94bf1a16bbb991a615b23c3
 size 61896852

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d9b21302b3043c033d4190a4eab85c75e7262c9b261a2438237e642de7538ff
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:edd21bfcd79332bda74bfee43e586f695faaab87015ca17dd9f4a8948d287855
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7e76384fe2e1907e44b199e48722ea251cbbcfea1285f875115318fffa6d887
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:62186d98e2e1a228b1a77580a02e85af5559d427fe6308d24fc721da049c3720
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.4689046144485474,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 2.857142857142857,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,27 @@
       "eval_samples_per_second": 57.17,
       "eval_steps_per_second": 15.245,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +417,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3308579507404800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.4689046144485474,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.0285714285714285,
   "eval_steps": 50,
+  "global_step": 53,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 57.17,
       "eval_steps_per_second": 15.245,
       "step": 50
+    },
+    {
+      "epoch": 2.914285714285714,
+      "grad_norm": 3.166837215423584,
+      "learning_rate": 5.328315962444874e-07,
+      "loss": 0.8571,
+      "step": 51
+    },
+    {
+      "epoch": 2.9714285714285715,
+      "grad_norm": 1.184228539466858,
+      "learning_rate": 1.333858168224178e-07,
+      "loss": 1.1258,
+      "step": 52
+    },
+    {
+      "epoch": 3.0285714285714285,
+      "grad_norm": 4.288901329040527,
+      "learning_rate": 0.0,
+      "loss": 1.5266,
+      "step": 53
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3497938460344320.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null