Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62e87acdbfcdd9dc20bfef94827a0ce59aa38bc2d132df573bd3d9744d1c99e1
 size 800116456

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6e40d174af6da6ffb7f7ba6bf3da71f2cd40cd59028a5b6726c4a08d821fcc4
 size 800116456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5718d0be5b17072815299de74071187ad008215d002410e5308ab4a9a40f404
 size 406743412

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b72cca2793ea8e8fb522768eac1e8f9e5d9e4b6cc2b1b32d6d1546e8773be4f
 size 406743412

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ba8ede8b1ce1289bd047fa1b015e236ff2970647049d9ac45b408f024313221
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4c4b49ebd10e72da64aaeee9a2568e4948c257827da60f7c3a9021a559f2182
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8685a85e01d0081c4ee6b3d27083bc45de61653fc346f2b531f3e09e6eff0d83
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f96196bd3544de2c28f6af356470f327df948539b0e3259c46b8a6786b633fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9501329064369202,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.005254768702597608,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 5.839,
       "eval_steps_per_second": 1.46,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.914116459003904e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.907011091709137,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.007006358270130143,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.839,
       "eval_steps_per_second": 1.46,
       "step": 150
+    },
+    {
+      "epoch": 0.005289800493948258,
+      "grad_norm": 4.7826056480407715,
+      "learning_rate": 2.599578947368421e-05,
+      "loss": 0.8471,
+      "step": 151
+    },
+    {
+      "epoch": 0.0053248322852989085,
+      "grad_norm": 4.294804573059082,
+      "learning_rate": 2.5465263157894738e-05,
+      "loss": 0.8585,
+      "step": 152
+    },
+    {
+      "epoch": 0.00535986407664956,
+      "grad_norm": 5.511264324188232,
+      "learning_rate": 2.493473684210526e-05,
+      "loss": 1.0528,
+      "step": 153
+    },
+    {
+      "epoch": 0.00539489586800021,
+      "grad_norm": 6.201894760131836,
+      "learning_rate": 2.440421052631579e-05,
+      "loss": 1.1332,
+      "step": 154
+    },
+    {
+      "epoch": 0.0054299276593508606,
+      "grad_norm": 4.838656425476074,
+      "learning_rate": 2.3873684210526313e-05,
+      "loss": 0.8937,
+      "step": 155
+    },
+    {
+      "epoch": 0.005464959450701512,
+      "grad_norm": 5.65955924987793,
+      "learning_rate": 2.3343157894736843e-05,
+      "loss": 1.0686,
+      "step": 156
+    },
+    {
+      "epoch": 0.005499991242052162,
+      "grad_norm": 6.030940055847168,
+      "learning_rate": 2.281263157894737e-05,
+      "loss": 1.0709,
+      "step": 157
+    },
+    {
+      "epoch": 0.005535023033402813,
+      "grad_norm": 5.607932090759277,
+      "learning_rate": 2.2282105263157892e-05,
+      "loss": 1.0595,
+      "step": 158
+    },
+    {
+      "epoch": 0.005570054824753464,
+      "grad_norm": 4.631815433502197,
+      "learning_rate": 2.175157894736842e-05,
+      "loss": 0.7808,
+      "step": 159
+    },
+    {
+      "epoch": 0.005605086616104114,
+      "grad_norm": 9.391894340515137,
+      "learning_rate": 2.1221052631578944e-05,
+      "loss": 1.6867,
+      "step": 160
+    },
+    {
+      "epoch": 0.005640118407454766,
+      "grad_norm": 6.0099616050720215,
+      "learning_rate": 2.0690526315789474e-05,
+      "loss": 0.9337,
+      "step": 161
+    },
+    {
+      "epoch": 0.005675150198805416,
+      "grad_norm": 7.833316802978516,
+      "learning_rate": 2.016e-05,
+      "loss": 1.6899,
+      "step": 162
+    },
+    {
+      "epoch": 0.0057101819901560665,
+      "grad_norm": 7.131737232208252,
+      "learning_rate": 1.9629473684210526e-05,
+      "loss": 1.3336,
+      "step": 163
+    },
+    {
+      "epoch": 0.005745213781506718,
+      "grad_norm": 7.738104343414307,
+      "learning_rate": 1.9098947368421053e-05,
+      "loss": 1.3881,
+      "step": 164
+    },
+    {
+      "epoch": 0.005780245572857368,
+      "grad_norm": 7.788103103637695,
+      "learning_rate": 1.856842105263158e-05,
+      "loss": 1.0198,
+      "step": 165
+    },
+    {
+      "epoch": 0.0058152773642080186,
+      "grad_norm": 7.89583158493042,
+      "learning_rate": 1.8037894736842105e-05,
+      "loss": 1.2767,
+      "step": 166
+    },
+    {
+      "epoch": 0.00585030915555867,
+      "grad_norm": 11.539169311523438,
+      "learning_rate": 1.750736842105263e-05,
+      "loss": 1.9096,
+      "step": 167
+    },
+    {
+      "epoch": 0.00588534094690932,
+      "grad_norm": 7.769778251647949,
+      "learning_rate": 1.6976842105263157e-05,
+      "loss": 1.4205,
+      "step": 168
+    },
+    {
+      "epoch": 0.005920372738259971,
+      "grad_norm": 4.522551536560059,
+      "learning_rate": 1.6446315789473684e-05,
+      "loss": 0.6861,
+      "step": 169
+    },
+    {
+      "epoch": 0.005955404529610622,
+      "grad_norm": 4.317171096801758,
+      "learning_rate": 1.591578947368421e-05,
+      "loss": 0.6416,
+      "step": 170
+    },
+    {
+      "epoch": 0.005990436320961272,
+      "grad_norm": 5.301867485046387,
+      "learning_rate": 1.5385263157894736e-05,
+      "loss": 0.8472,
+      "step": 171
+    },
+    {
+      "epoch": 0.006025468112311923,
+      "grad_norm": 3.946662425994873,
+      "learning_rate": 1.485473684210526e-05,
+      "loss": 0.505,
+      "step": 172
+    },
+    {
+      "epoch": 0.006060499903662574,
+      "grad_norm": 5.64309024810791,
+      "learning_rate": 1.4324210526315789e-05,
+      "loss": 0.9617,
+      "step": 173
+    },
+    {
+      "epoch": 0.0060955316950132244,
+      "grad_norm": 6.693427085876465,
+      "learning_rate": 1.3793684210526316e-05,
+      "loss": 0.8974,
+      "step": 174
+    },
+    {
+      "epoch": 0.006130563486363875,
+      "grad_norm": 4.707422733306885,
+      "learning_rate": 1.3263157894736841e-05,
+      "loss": 0.7757,
+      "step": 175
+    },
+    {
+      "epoch": 0.006165595277714526,
+      "grad_norm": 4.7554826736450195,
+      "learning_rate": 1.2732631578947369e-05,
+      "loss": 0.6396,
+      "step": 176
+    },
+    {
+      "epoch": 0.0062006270690651765,
+      "grad_norm": 4.768486499786377,
+      "learning_rate": 1.2202105263157895e-05,
+      "loss": 0.6989,
+      "step": 177
+    },
+    {
+      "epoch": 0.006235658860415827,
+      "grad_norm": 4.941887378692627,
+      "learning_rate": 1.1671578947368421e-05,
+      "loss": 0.6704,
+      "step": 178
+    },
+    {
+      "epoch": 0.006270690651766478,
+      "grad_norm": 5.827295780181885,
+      "learning_rate": 1.1141052631578946e-05,
+      "loss": 0.9484,
+      "step": 179
+    },
+    {
+      "epoch": 0.006305722443117129,
+      "grad_norm": 5.031542778015137,
+      "learning_rate": 1.0610526315789472e-05,
+      "loss": 0.7449,
+      "step": 180
+    },
+    {
+      "epoch": 0.00634075423446778,
+      "grad_norm": 5.917794704437256,
+      "learning_rate": 1.008e-05,
+      "loss": 1.0857,
+      "step": 181
+    },
+    {
+      "epoch": 0.00637578602581843,
+      "grad_norm": 4.596070766448975,
+      "learning_rate": 9.549473684210526e-06,
+      "loss": 0.6569,
+      "step": 182
+    },
+    {
+      "epoch": 0.006410817817169081,
+      "grad_norm": 4.4000325202941895,
+      "learning_rate": 9.018947368421052e-06,
+      "loss": 0.6524,
+      "step": 183
+    },
+    {
+      "epoch": 0.006445849608519732,
+      "grad_norm": 7.547094345092773,
+      "learning_rate": 8.488421052631579e-06,
+      "loss": 1.0409,
+      "step": 184
+    },
+    {
+      "epoch": 0.0064808813998703824,
+      "grad_norm": 5.421209335327148,
+      "learning_rate": 7.957894736842105e-06,
+      "loss": 0.8219,
+      "step": 185
+    },
+    {
+      "epoch": 0.006515913191221033,
+      "grad_norm": 6.741072177886963,
+      "learning_rate": 7.42736842105263e-06,
+      "loss": 0.8836,
+      "step": 186
+    },
+    {
+      "epoch": 0.006550944982571684,
+      "grad_norm": 6.431979656219482,
+      "learning_rate": 6.896842105263158e-06,
+      "loss": 0.9018,
+      "step": 187
+    },
+    {
+      "epoch": 0.0065859767739223345,
+      "grad_norm": 4.790152549743652,
+      "learning_rate": 6.3663157894736845e-06,
+      "loss": 0.5921,
+      "step": 188
+    },
+    {
+      "epoch": 0.006621008565272985,
+      "grad_norm": 5.673473358154297,
+      "learning_rate": 5.835789473684211e-06,
+      "loss": 0.6023,
+      "step": 189
+    },
+    {
+      "epoch": 0.006656040356623636,
+      "grad_norm": 4.413316249847412,
+      "learning_rate": 5.305263157894736e-06,
+      "loss": 0.6352,
+      "step": 190
+    },
+    {
+      "epoch": 0.006691072147974287,
+      "grad_norm": 5.780127048492432,
+      "learning_rate": 4.774736842105263e-06,
+      "loss": 0.9693,
+      "step": 191
+    },
+    {
+      "epoch": 0.006726103939324937,
+      "grad_norm": 4.126491069793701,
+      "learning_rate": 4.244210526315789e-06,
+      "loss": 0.5177,
+      "step": 192
+    },
+    {
+      "epoch": 0.006761135730675588,
+      "grad_norm": 8.283818244934082,
+      "learning_rate": 3.713684210526315e-06,
+      "loss": 0.874,
+      "step": 193
+    },
+    {
+      "epoch": 0.006796167522026239,
+      "grad_norm": 5.534949779510498,
+      "learning_rate": 3.1831578947368422e-06,
+      "loss": 0.8445,
+      "step": 194
+    },
+    {
+      "epoch": 0.006831199313376889,
+      "grad_norm": 6.650455951690674,
+      "learning_rate": 2.652631578947368e-06,
+      "loss": 1.0534,
+      "step": 195
+    },
+    {
+      "epoch": 0.00686623110472754,
+      "grad_norm": 6.208735942840576,
+      "learning_rate": 2.1221052631578947e-06,
+      "loss": 0.7896,
+      "step": 196
+    },
+    {
+      "epoch": 0.006901262896078191,
+      "grad_norm": 4.9419732093811035,
+      "learning_rate": 1.5915789473684211e-06,
+      "loss": 0.641,
+      "step": 197
+    },
+    {
+      "epoch": 0.006936294687428841,
+      "grad_norm": 5.195125579833984,
+      "learning_rate": 1.0610526315789473e-06,
+      "loss": 0.7821,
+      "step": 198
+    },
+    {
+      "epoch": 0.0069713264787794925,
+      "grad_norm": 5.101795196533203,
+      "learning_rate": 5.305263157894737e-07,
+      "loss": 0.5665,
+      "step": 199
+    },
+    {
+      "epoch": 0.007006358270130143,
+      "grad_norm": 4.9422173500061035,
+      "learning_rate": 0.0,
+      "loss": 0.5634,
+      "step": 200
+    },
+    {
+      "epoch": 0.007006358270130143,
+      "eval_loss": 0.907011091709137,
+      "eval_runtime": 2060.6424,
+      "eval_samples_per_second": 5.833,
+      "eval_steps_per_second": 1.458,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0571696306965709e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null