Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e62767e389492d8710832d54169831da017919bc5b541aa3a5c992507dc7d386
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a5d386f2a591f967daf2c056eb100b71300a1d45d299f2299c500bfb0e3da84
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2fa4019a8e43d03eb169dd8766c527944cb87ed2ca55cf70e5ae36eef1835c62
 size 71878612

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea56bbf9f5dd63f387c9225932e4edf76ba710a43326d0be7a08b14fbf6ea26d
 size 71878612

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcfb14afe93f959544217f7d88d17aa9a687f9ef7c2d1681ce29e0ac4381aed9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7b0c10a052c98a95e920221b51817a18a4d8f7ff658979b529c87379ae874da
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:191b4f74b8892fe464b31b446bc6f50032359ce22cb38236d5fdccf47f27920e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9abccd3ade815397c3a4e9cae178fd4a326a690915052661d8621974d592484a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.8144692182540894,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.004051535531966615,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 48.428,
       "eval_steps_per_second": 12.109,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4018810864533504.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.8002580404281616,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.005402047375955487,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 48.428,
       "eval_steps_per_second": 12.109,
       "step": 150
+    },
+    {
+      "epoch": 0.004078545768846393,
+      "grad_norm": 2.3892409801483154,
+      "learning_rate": 2.583589473684211e-05,
+      "loss": 1.8418,
+      "step": 151
+    },
+    {
+      "epoch": 0.00410555600572617,
+      "grad_norm": 2.7030460834503174,
+      "learning_rate": 2.530863157894737e-05,
+      "loss": 1.9883,
+      "step": 152
+    },
+    {
+      "epoch": 0.004132566242605947,
+      "grad_norm": 2.809255838394165,
+      "learning_rate": 2.4781368421052633e-05,
+      "loss": 1.994,
+      "step": 153
+    },
+    {
+      "epoch": 0.004159576479485725,
+      "grad_norm": 2.7749948501586914,
+      "learning_rate": 2.4254105263157896e-05,
+      "loss": 1.7347,
+      "step": 154
+    },
+    {
+      "epoch": 0.0041865867163655025,
+      "grad_norm": 2.7523014545440674,
+      "learning_rate": 2.372684210526316e-05,
+      "loss": 1.4725,
+      "step": 155
+    },
+    {
+      "epoch": 0.00421359695324528,
+      "grad_norm": 3.1755049228668213,
+      "learning_rate": 2.3199578947368422e-05,
+      "loss": 1.9171,
+      "step": 156
+    },
+    {
+      "epoch": 0.004240607190125058,
+      "grad_norm": 3.1159286499023438,
+      "learning_rate": 2.2672315789473688e-05,
+      "loss": 2.0677,
+      "step": 157
+    },
+    {
+      "epoch": 0.004267617427004835,
+      "grad_norm": 2.927952527999878,
+      "learning_rate": 2.2145052631578948e-05,
+      "loss": 1.6294,
+      "step": 158
+    },
+    {
+      "epoch": 0.004294627663884612,
+      "grad_norm": 2.920624256134033,
+      "learning_rate": 2.1617789473684214e-05,
+      "loss": 1.9123,
+      "step": 159
+    },
+    {
+      "epoch": 0.00432163790076439,
+      "grad_norm": 3.3584253787994385,
+      "learning_rate": 2.1090526315789473e-05,
+      "loss": 2.0167,
+      "step": 160
+    },
+    {
+      "epoch": 0.004348648137644167,
+      "grad_norm": 4.036590099334717,
+      "learning_rate": 2.056326315789474e-05,
+      "loss": 2.3696,
+      "step": 161
+    },
+    {
+      "epoch": 0.004375658374523945,
+      "grad_norm": 3.8149144649505615,
+      "learning_rate": 2.0036000000000003e-05,
+      "loss": 2.3095,
+      "step": 162
+    },
+    {
+      "epoch": 0.004402668611403722,
+      "grad_norm": 3.711400032043457,
+      "learning_rate": 1.9508736842105266e-05,
+      "loss": 1.9094,
+      "step": 163
+    },
+    {
+      "epoch": 0.004429678848283499,
+      "grad_norm": 3.479764699935913,
+      "learning_rate": 1.898147368421053e-05,
+      "loss": 2.1776,
+      "step": 164
+    },
+    {
+      "epoch": 0.004456689085163277,
+      "grad_norm": 4.012089729309082,
+      "learning_rate": 1.8454210526315788e-05,
+      "loss": 1.9562,
+      "step": 165
+    },
+    {
+      "epoch": 0.004483699322043054,
+      "grad_norm": 5.2212815284729,
+      "learning_rate": 1.7926947368421054e-05,
+      "loss": 2.3301,
+      "step": 166
+    },
+    {
+      "epoch": 0.0045107095589228315,
+      "grad_norm": 4.928096294403076,
+      "learning_rate": 1.7399684210526317e-05,
+      "loss": 2.0416,
+      "step": 167
+    },
+    {
+      "epoch": 0.0045377197958026095,
+      "grad_norm": 3.51525616645813,
+      "learning_rate": 1.687242105263158e-05,
+      "loss": 2.1433,
+      "step": 168
+    },
+    {
+      "epoch": 0.004564730032682387,
+      "grad_norm": 2.5053954124450684,
+      "learning_rate": 1.6345157894736843e-05,
+      "loss": 1.2284,
+      "step": 169
+    },
+    {
+      "epoch": 0.004591740269562164,
+      "grad_norm": 3.1079280376434326,
+      "learning_rate": 1.5817894736842106e-05,
+      "loss": 1.8039,
+      "step": 170
+    },
+    {
+      "epoch": 0.004618750506441942,
+      "grad_norm": 2.9641072750091553,
+      "learning_rate": 1.529063157894737e-05,
+      "loss": 1.7875,
+      "step": 171
+    },
+    {
+      "epoch": 0.004645760743321719,
+      "grad_norm": 2.7184035778045654,
+      "learning_rate": 1.4763368421052632e-05,
+      "loss": 1.7067,
+      "step": 172
+    },
+    {
+      "epoch": 0.004672770980201496,
+      "grad_norm": 2.5093767642974854,
+      "learning_rate": 1.4236105263157895e-05,
+      "loss": 1.5895,
+      "step": 173
+    },
+    {
+      "epoch": 0.004699781217081274,
+      "grad_norm": 3.6973133087158203,
+      "learning_rate": 1.370884210526316e-05,
+      "loss": 1.8151,
+      "step": 174
+    },
+    {
+      "epoch": 0.004726791453961051,
+      "grad_norm": 2.5547432899475098,
+      "learning_rate": 1.318157894736842e-05,
+      "loss": 1.5454,
+      "step": 175
+    },
+    {
+      "epoch": 0.004753801690840828,
+      "grad_norm": 3.5522589683532715,
+      "learning_rate": 1.2654315789473685e-05,
+      "loss": 1.8471,
+      "step": 176
+    },
+    {
+      "epoch": 0.004780811927720606,
+      "grad_norm": 2.897728681564331,
+      "learning_rate": 1.2127052631578948e-05,
+      "loss": 1.7892,
+      "step": 177
+    },
+    {
+      "epoch": 0.0048078221646003835,
+      "grad_norm": 3.0070700645446777,
+      "learning_rate": 1.1599789473684211e-05,
+      "loss": 1.7458,
+      "step": 178
+    },
+    {
+      "epoch": 0.004834832401480161,
+      "grad_norm": 3.040104389190674,
+      "learning_rate": 1.1072526315789474e-05,
+      "loss": 1.6547,
+      "step": 179
+    },
+    {
+      "epoch": 0.004861842638359939,
+      "grad_norm": 2.3258042335510254,
+      "learning_rate": 1.0545263157894737e-05,
+      "loss": 1.2709,
+      "step": 180
+    },
+    {
+      "epoch": 0.004888852875239716,
+      "grad_norm": 2.8469483852386475,
+      "learning_rate": 1.0018000000000001e-05,
+      "loss": 1.6427,
+      "step": 181
+    },
+    {
+      "epoch": 0.004915863112119493,
+      "grad_norm": 3.239359140396118,
+      "learning_rate": 9.490736842105264e-06,
+      "loss": 1.6716,
+      "step": 182
+    },
+    {
+      "epoch": 0.004942873348999271,
+      "grad_norm": 2.6865952014923096,
+      "learning_rate": 8.963473684210527e-06,
+      "loss": 1.693,
+      "step": 183
+    },
+    {
+      "epoch": 0.004969883585879048,
+      "grad_norm": 3.1464898586273193,
+      "learning_rate": 8.43621052631579e-06,
+      "loss": 1.9052,
+      "step": 184
+    },
+    {
+      "epoch": 0.004996893822758826,
+      "grad_norm": 2.902247905731201,
+      "learning_rate": 7.908947368421053e-06,
+      "loss": 1.8767,
+      "step": 185
+    },
+    {
+      "epoch": 0.005023904059638603,
+      "grad_norm": 2.765345573425293,
+      "learning_rate": 7.381684210526316e-06,
+      "loss": 1.4926,
+      "step": 186
+    },
+    {
+      "epoch": 0.00505091429651838,
+      "grad_norm": 3.451422691345215,
+      "learning_rate": 6.85442105263158e-06,
+      "loss": 1.9693,
+      "step": 187
+    },
+    {
+      "epoch": 0.005077924533398158,
+      "grad_norm": 2.784844398498535,
+      "learning_rate": 6.3271578947368425e-06,
+      "loss": 1.4707,
+      "step": 188
+    },
+    {
+      "epoch": 0.005104934770277935,
+      "grad_norm": 3.7692813873291016,
+      "learning_rate": 5.7998947368421054e-06,
+      "loss": 2.1994,
+      "step": 189
+    },
+    {
+      "epoch": 0.0051319450071577125,
+      "grad_norm": 3.0001394748687744,
+      "learning_rate": 5.272631578947368e-06,
+      "loss": 1.5634,
+      "step": 190
+    },
+    {
+      "epoch": 0.0051589552440374905,
+      "grad_norm": 2.9124486446380615,
+      "learning_rate": 4.745368421052632e-06,
+      "loss": 1.5113,
+      "step": 191
+    },
+    {
+      "epoch": 0.005185965480917268,
+      "grad_norm": 2.946713924407959,
+      "learning_rate": 4.218105263157895e-06,
+      "loss": 1.6126,
+      "step": 192
+    },
+    {
+      "epoch": 0.005212975717797045,
+      "grad_norm": 3.2477729320526123,
+      "learning_rate": 3.690842105263158e-06,
+      "loss": 1.362,
+      "step": 193
+    },
+    {
+      "epoch": 0.005239985954676823,
+      "grad_norm": 3.283214807510376,
+      "learning_rate": 3.1635789473684213e-06,
+      "loss": 1.6263,
+      "step": 194
+    },
+    {
+      "epoch": 0.0052669961915566,
+      "grad_norm": 2.9746735095977783,
+      "learning_rate": 2.636315789473684e-06,
+      "loss": 1.5695,
+      "step": 195
+    },
+    {
+      "epoch": 0.005294006428436377,
+      "grad_norm": 3.2501778602600098,
+      "learning_rate": 2.1090526315789475e-06,
+      "loss": 1.733,
+      "step": 196
+    },
+    {
+      "epoch": 0.005321016665316155,
+      "grad_norm": 2.839373826980591,
+      "learning_rate": 1.5817894736842106e-06,
+      "loss": 1.6794,
+      "step": 197
+    },
+    {
+      "epoch": 0.005348026902195932,
+      "grad_norm": 3.3260462284088135,
+      "learning_rate": 1.0545263157894738e-06,
+      "loss": 1.5539,
+      "step": 198
+    },
+    {
+      "epoch": 0.005375037139075709,
+      "grad_norm": 2.837555408477783,
+      "learning_rate": 5.272631578947369e-07,
+      "loss": 1.4777,
+      "step": 199
+    },
+    {
+      "epoch": 0.005402047375955487,
+      "grad_norm": 2.7860605716705322,
+      "learning_rate": 0.0,
+      "loss": 1.6072,
+      "step": 200
+    },
+    {
+      "epoch": 0.005402047375955487,
+      "eval_loss": 1.8002580404281616,
+      "eval_runtime": 323.0678,
+      "eval_samples_per_second": 48.253,
+      "eval_steps_per_second": 12.066,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5332652877938688.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null