Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bea91234872996bf701226ffe97ccb200a63631c0201595002513f0ae7479970
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:542b3eb3574cd8cf22b0942149c9f0c62ac11a7462856ab6ebf4ef510baafe16
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae8cd75e970e1a4f14b57af33a6f7e58da9f6fd6fae94897f69bfcadb5d25108
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:50b0b19033c3ab8f13a1b2bcf1f0d51b19388f7a95ba045ba62fe60f971b4ed7
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ceb4011ff3d0181a6789cb9456a5b44b1148221592bde2640ba912e0474d33cb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:92fab4fd111c45cbb3a01f5f35b993db145577b14c814d572e2d2bdf1b11f6a4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ddb9588ea654e56e83effcf81a2bc03480954babcf6415cb44d41d3bfb8039f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.121528387069702,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.01968794605502781,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 8.603,
       "eval_steps_per_second": 2.151,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -745,7 +1103,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.508539348549632e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.9811595678329468,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.029531919082541713,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.603,
       "eval_steps_per_second": 2.151,
       "step": 100
+    },
+    {
+      "epoch": 0.01988482551557809,
+      "grad_norm": 4.464885711669922,
+      "learning_rate": 5.330452921628497e-05,
+      "loss": 2.2882,
+      "step": 101
+    },
+    {
+      "epoch": 0.020081704976128364,
+      "grad_norm": 4.608921051025391,
+      "learning_rate": 5.247918773366112e-05,
+      "loss": 2.3429,
+      "step": 102
+    },
+    {
+      "epoch": 0.020278584436678643,
+      "grad_norm": 3.279172658920288,
+      "learning_rate": 5.165316846586541e-05,
+      "loss": 2.3008,
+      "step": 103
+    },
+    {
+      "epoch": 0.020475463897228922,
+      "grad_norm": 2.4048798084259033,
+      "learning_rate": 5.0826697238317935e-05,
+      "loss": 1.9456,
+      "step": 104
+    },
+    {
+      "epoch": 0.0206723433577792,
+      "grad_norm": 1.8811132907867432,
+      "learning_rate": 5e-05,
+      "loss": 1.9413,
+      "step": 105
+    },
+    {
+      "epoch": 0.020869222818329477,
+      "grad_norm": 1.7084625959396362,
+      "learning_rate": 4.917330276168208e-05,
+      "loss": 2.1956,
+      "step": 106
+    },
+    {
+      "epoch": 0.021066102278879756,
+      "grad_norm": 1.6710535287857056,
+      "learning_rate": 4.834683153413459e-05,
+      "loss": 2.1052,
+      "step": 107
+    },
+    {
+      "epoch": 0.021262981739430035,
+      "grad_norm": 1.763252854347229,
+      "learning_rate": 4.7520812266338885e-05,
+      "loss": 1.9297,
+      "step": 108
+    },
+    {
+      "epoch": 0.021459861199980314,
+      "grad_norm": 1.7069237232208252,
+      "learning_rate": 4.669547078371504e-05,
+      "loss": 2.0104,
+      "step": 109
+    },
+    {
+      "epoch": 0.02165674066053059,
+      "grad_norm": 2.005214214324951,
+      "learning_rate": 4.5871032726383386e-05,
+      "loss": 2.1658,
+      "step": 110
+    },
+    {
+      "epoch": 0.021853620121080868,
+      "grad_norm": 2.3560802936553955,
+      "learning_rate": 4.504772348747687e-05,
+      "loss": 2.2162,
+      "step": 111
+    },
+    {
+      "epoch": 0.022050499581631147,
+      "grad_norm": 1.742017388343811,
+      "learning_rate": 4.4225768151520694e-05,
+      "loss": 1.8804,
+      "step": 112
+    },
+    {
+      "epoch": 0.022247379042181423,
+      "grad_norm": 1.8132730722427368,
+      "learning_rate": 4.3405391432896555e-05,
+      "loss": 2.0396,
+      "step": 113
+    },
+    {
+      "epoch": 0.0224442585027317,
+      "grad_norm": 1.8238927125930786,
+      "learning_rate": 4.2586817614407895e-05,
+      "loss": 2.0578,
+      "step": 114
+    },
+    {
+      "epoch": 0.02264113796328198,
+      "grad_norm": 1.8106420040130615,
+      "learning_rate": 4.17702704859633e-05,
+      "loss": 2.0014,
+      "step": 115
+    },
+    {
+      "epoch": 0.02283801742383226,
+      "grad_norm": 1.6731736660003662,
+      "learning_rate": 4.095597328339452e-05,
+      "loss": 1.8581,
+      "step": 116
+    },
+    {
+      "epoch": 0.023034896884382535,
+      "grad_norm": 1.6497639417648315,
+      "learning_rate": 4.0144148627425993e-05,
+      "loss": 1.9013,
+      "step": 117
+    },
+    {
+      "epoch": 0.023231776344932814,
+      "grad_norm": 1.7323822975158691,
+      "learning_rate": 3.933501846281267e-05,
+      "loss": 1.7526,
+      "step": 118
+    },
+    {
+      "epoch": 0.023428655805483093,
+      "grad_norm": 1.758415699005127,
+      "learning_rate": 3.852880399766243e-05,
+      "loss": 1.9007,
+      "step": 119
+    },
+    {
+      "epoch": 0.023625535266033372,
+      "grad_norm": 1.8373111486434937,
+      "learning_rate": 3.772572564296005e-05,
+      "loss": 2.1219,
+      "step": 120
+    },
+    {
+      "epoch": 0.023822414726583648,
+      "grad_norm": 1.7346097230911255,
+      "learning_rate": 3.6926002952309016e-05,
+      "loss": 1.9534,
+      "step": 121
+    },
+    {
+      "epoch": 0.024019294187133926,
+      "grad_norm": 1.764580488204956,
+      "learning_rate": 3.612985456190778e-05,
+      "loss": 1.9987,
+      "step": 122
+    },
+    {
+      "epoch": 0.024216173647684205,
+      "grad_norm": 1.750520944595337,
+      "learning_rate": 3.533749813077677e-05,
+      "loss": 1.9747,
+      "step": 123
+    },
+    {
+      "epoch": 0.024413053108234484,
+      "grad_norm": 1.7164589166641235,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 1.9209,
+      "step": 124
+    },
+    {
+      "epoch": 0.02460993256878476,
+      "grad_norm": 1.7653440237045288,
+      "learning_rate": 3.3765026539765834e-05,
+      "loss": 1.8264,
+      "step": 125
+    },
+    {
+      "epoch": 0.02480681202933504,
+      "grad_norm": 1.8179305791854858,
+      "learning_rate": 3.298534127791785e-05,
+      "loss": 1.8391,
+      "step": 126
+    },
+    {
+      "epoch": 0.025003691489885318,
+      "grad_norm": 1.7005746364593506,
+      "learning_rate": 3.221030765387417e-05,
+      "loss": 1.9018,
+      "step": 127
+    },
+    {
+      "epoch": 0.025200570950435597,
+      "grad_norm": 1.7188222408294678,
+      "learning_rate": 3.144013755408895e-05,
+      "loss": 1.8897,
+      "step": 128
+    },
+    {
+      "epoch": 0.025397450410985872,
+      "grad_norm": 1.712591290473938,
+      "learning_rate": 3.0675041535377405e-05,
+      "loss": 1.7492,
+      "step": 129
+    },
+    {
+      "epoch": 0.02559432987153615,
+      "grad_norm": 1.9163239002227783,
+      "learning_rate": 2.991522876735154e-05,
+      "loss": 2.0165,
+      "step": 130
+    },
+    {
+      "epoch": 0.02579120933208643,
+      "grad_norm": 1.9214532375335693,
+      "learning_rate": 2.916090697523549e-05,
+      "loss": 2.0788,
+      "step": 131
+    },
+    {
+      "epoch": 0.02598808879263671,
+      "grad_norm": 1.896920919418335,
+      "learning_rate": 2.8412282383075363e-05,
+      "loss": 1.9214,
+      "step": 132
+    },
+    {
+      "epoch": 0.026184968253186985,
+      "grad_norm": 1.6200294494628906,
+      "learning_rate": 2.766955965735968e-05,
+      "loss": 1.744,
+      "step": 133
+    },
+    {
+      "epoch": 0.026381847713737264,
+      "grad_norm": 1.626323938369751,
+      "learning_rate": 2.693294185106562e-05,
+      "loss": 1.8176,
+      "step": 134
+    },
+    {
+      "epoch": 0.026578727174287543,
+      "grad_norm": 1.9877479076385498,
+      "learning_rate": 2.6202630348146324e-05,
+      "loss": 2.0644,
+      "step": 135
+    },
+    {
+      "epoch": 0.02677560663483782,
+      "grad_norm": 1.8600821495056152,
+      "learning_rate": 2.547882480847461e-05,
+      "loss": 2.003,
+      "step": 136
+    },
+    {
+      "epoch": 0.026972486095388097,
+      "grad_norm": 1.773910641670227,
+      "learning_rate": 2.476172311325783e-05,
+      "loss": 1.9803,
+      "step": 137
+    },
+    {
+      "epoch": 0.027169365555938376,
+      "grad_norm": 1.6655477285385132,
+      "learning_rate": 2.405152131093926e-05,
+      "loss": 1.7294,
+      "step": 138
+    },
+    {
+      "epoch": 0.027366245016488655,
+      "grad_norm": 1.8801628351211548,
+      "learning_rate": 2.3348413563600325e-05,
+      "loss": 1.8536,
+      "step": 139
+    },
+    {
+      "epoch": 0.027563124477038934,
+      "grad_norm": 1.880556344985962,
+      "learning_rate": 2.2652592093878666e-05,
+      "loss": 1.924,
+      "step": 140
+    },
+    {
+      "epoch": 0.02776000393758921,
+      "grad_norm": 1.9137473106384277,
+      "learning_rate": 2.196424713241637e-05,
+      "loss": 2.0049,
+      "step": 141
+    },
+    {
+      "epoch": 0.02795688339813949,
+      "grad_norm": 1.8949143886566162,
+      "learning_rate": 2.128356686585282e-05,
+      "loss": 1.8792,
+      "step": 142
+    },
+    {
+      "epoch": 0.028153762858689767,
+      "grad_norm": 1.9418630599975586,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 2.05,
+      "step": 143
+    },
+    {
+      "epoch": 0.028350642319240046,
+      "grad_norm": 2.0430312156677246,
+      "learning_rate": 1.9945942635848748e-05,
+      "loss": 2.0717,
+      "step": 144
+    },
+    {
+      "epoch": 0.028547521779790322,
+      "grad_norm": 1.998457908630371,
+      "learning_rate": 1.928936436551661e-05,
+      "loss": 1.9202,
+      "step": 145
+    },
+    {
+      "epoch": 0.0287444012403406,
+      "grad_norm": 2.010180950164795,
+      "learning_rate": 1.8641182076323148e-05,
+      "loss": 1.979,
+      "step": 146
+    },
+    {
+      "epoch": 0.02894128070089088,
+      "grad_norm": 2.290452241897583,
+      "learning_rate": 1.800157297483417e-05,
+      "loss": 1.8416,
+      "step": 147
+    },
+    {
+      "epoch": 0.02913816016144116,
+      "grad_norm": 2.139098644256592,
+      "learning_rate": 1.7370711923791567e-05,
+      "loss": 2.0033,
+      "step": 148
+    },
+    {
+      "epoch": 0.029335039621991434,
+      "grad_norm": 2.1091532707214355,
+      "learning_rate": 1.6748771394307585e-05,
+      "loss": 1.7023,
+      "step": 149
+    },
+    {
+      "epoch": 0.029531919082541713,
+      "grad_norm": 2.765894651412964,
+      "learning_rate": 1.6135921418712956e-05,
+      "loss": 1.7839,
+      "step": 150
+    },
+    {
+      "epoch": 0.029531919082541713,
+      "eval_loss": 1.9811595678329468,
+      "eval_runtime": 995.7334,
+      "eval_samples_per_second": 8.592,
+      "eval_steps_per_second": 2.148,
+      "step": 150
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.262809022824448e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null