Training in progress, step 43, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +130 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b77fe3b7669d1b7d18e14b67bd2315112aba565bd1b048d461b5705959c3a5c
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e19f697d51eff60256b0f0de1ce71a1e64c1cbb2490a7a8c782b56815f41a1c
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:becd209e7aa6c1cca1d7b67ba571f59a49144adacd1d8236652dc0576d208008
 size 295765866

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2b930900c3b7fdef209f703df7b2deddc44ffe60e853552b246abd2ea9113ba
 size 295765866

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aac585c8e38965d3627b925420589cd3c9fe26b5c844a85bddbad040181b69c4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:892150506cf3a6995305bdb1cfcc76f0841edbc7d4e45bf06eb52bddd5e14d35
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f5e0fc2c89ce8c27a741eadd04f7c9b9502dc8b3cceab38f7c621bf65ba6668
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2cf44f27c25cbcc0a4425c840d0d5584aec3c391e9d68dae770cf46e324428d
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93d3a8c789021250edbf6751c2ae0c3a1aa1baf1a937527303af3998ef5e0e40
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3dcfc7ed163e783d579fa80c8fbf2babafa4a5b418b1daacc1e6f4bc146ae2da
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae21c6f869d426c470d5d27597da69d047733b6441a741492bc7935125a45d08
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:334e589a27d6f5da0176b498c8bddfadadd415ab00f9554f786fbbcca35b7b74
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36f0bc6b93defe30dc6f027b215341a162e86baeedeff10da11e3e51193535d5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4596f05f2825020af546e9fe75445728cf0ec904fa7301c7875d2ed533ffa193
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.9008495807647705,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 1.7699115044247788,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,132 @@
       "eval_samples_per_second": 39.26,
       "eval_steps_per_second": 4.959,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +347,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.29768176943104e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.9008495807647705,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 3.0442477876106193,
   "eval_steps": 25,
+  "global_step": 43,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 39.26,
       "eval_steps_per_second": 4.959,
       "step": 25
+    },
+    {
+      "epoch": 1.8407079646017699,
+      "grad_norm": 0.4535748362541199,
+      "learning_rate": 3.675092489016693e-05,
+      "loss": 2.9285,
+      "step": 26
+    },
+    {
+      "epoch": 1.911504424778761,
+      "grad_norm": 0.5968184471130371,
+      "learning_rate": 3.309915607957487e-05,
+      "loss": 2.8405,
+      "step": 27
+    },
+    {
+      "epoch": 1.9823008849557522,
+      "grad_norm": 0.7783849239349365,
+      "learning_rate": 2.9546568141433006e-05,
+      "loss": 3.0378,
+      "step": 28
+    },
+    {
+      "epoch": 2.0530973451327434,
+      "grad_norm": 0.8722625374794006,
+      "learning_rate": 2.6114009074386846e-05,
+      "loss": 5.2923,
+      "step": 29
+    },
+    {
+      "epoch": 2.1238938053097347,
+      "grad_norm": 0.5258104801177979,
+      "learning_rate": 2.282162249993895e-05,
+      "loss": 2.7651,
+      "step": 30
+    },
+    {
+      "epoch": 2.1946902654867255,
+      "grad_norm": 0.6721611618995667,
+      "learning_rate": 1.9688729451668114e-05,
+      "loss": 2.8586,
+      "step": 31
+    },
+    {
+      "epoch": 2.265486725663717,
+      "grad_norm": 0.4610719084739685,
+      "learning_rate": 1.673371499172174e-05,
+      "loss": 2.4631,
+      "step": 32
+    },
+    {
+      "epoch": 2.336283185840708,
+      "grad_norm": 0.5183499455451965,
+      "learning_rate": 1.3973920319960655e-05,
+      "loss": 3.2142,
+      "step": 33
+    },
+    {
+      "epoch": 2.4070796460176993,
+      "grad_norm": 0.5465492010116577,
+      "learning_rate": 1.1425541008902851e-05,
+      "loss": 2.848,
+      "step": 34
+    },
+    {
+      "epoch": 2.47787610619469,
+      "grad_norm": 0.8566042184829712,
+      "learning_rate": 9.103531961664118e-06,
+      "loss": 3.0925,
+      "step": 35
+    },
+    {
+      "epoch": 2.5486725663716814,
+      "grad_norm": 0.4418770968914032,
+      "learning_rate": 7.0215196506399515e-06,
+      "loss": 2.7127,
+      "step": 36
+    },
+    {
+      "epoch": 2.6194690265486726,
+      "grad_norm": 0.523547351360321,
+      "learning_rate": 5.191722151947226e-06,
+      "loss": 2.6927,
+      "step": 37
+    },
+    {
+      "epoch": 2.6902654867256635,
+      "grad_norm": 0.6238358616828918,
+      "learning_rate": 3.6248774448952695e-06,
+      "loss": 2.9245,
+      "step": 38
+    },
+    {
+      "epoch": 2.7610619469026547,
+      "grad_norm": 0.423245370388031,
+      "learning_rate": 2.330180397253473e-06,
+      "loss": 2.3783,
+      "step": 39
+    },
+    {
+      "epoch": 2.831858407079646,
+      "grad_norm": 0.5013735890388489,
+      "learning_rate": 1.3152288061110518e-06,
+      "loss": 3.1361,
+      "step": 40
+    },
+    {
+      "epoch": 2.9026548672566372,
+      "grad_norm": 0.5441763997077942,
+      "learning_rate": 5.859788109825793e-07,
+      "loss": 2.8389,
+      "step": 41
+    },
+    {
+      "epoch": 2.9734513274336285,
+      "grad_norm": 0.7712870836257935,
+      "learning_rate": 1.4670994081297795e-07,
+      "loss": 3.0242,
+      "step": 42
+    },
+    {
+      "epoch": 3.0442477876106193,
+      "grad_norm": 0.7878711223602295,
+      "learning_rate": 0.0,
+      "loss": 5.2321,
+      "step": 43
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9.112012643421389e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null