Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63f9c561d26913b0d45756fcfafa8863a23122c66e0dbe7dbc82ffcc042b2537
 size 389074464

 version https://git-lfs.github.com/spec/v1
+oid sha256:88394326c6bf61b6ca37609a77a3a8995ad0ea036c280091024e776229d4f51b
 size 389074464

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9973fa49dd1818e61fab7b962848c47b97ec0a22af70fe2755a32ce360687c55
 size 198011252

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b1429234fa94663056b0fa0c050763ad0b86040011535e31b3ea11d624e8a72
 size 198011252

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:741b188532d5ed7f83ef7d58a413d797deb4f38779f0427ddda8341b9145f402
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1a13967d83048ef1e171f81ca5ff66f6dd7ae7af3b300a1f19451ef4991f7a5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed88ff7cbe02e7c054abfce3510882248a01917e1eb8cf2c358f612dd7312357
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c09a3e270daa077e38bac4ad605b0e6c9b43938bcf49dc3d62c3671f11248c14
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.8138833045959473,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.0028818720640928345,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 22.821,
       "eval_steps_per_second": 5.705,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.900629702587187e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.8127896785736084,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.003842496085457113,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.821,
       "eval_steps_per_second": 5.705,
       "step": 150
+    },
+    {
+      "epoch": 0.0029010845445201204,
+      "grad_norm": 0.3780750036239624,
+      "learning_rate": 1.397909658966454e-05,
+      "loss": 2.022,
+      "step": 151
+    },
+    {
+      "epoch": 0.0029202970249474058,
+      "grad_norm": 0.4781886637210846,
+      "learning_rate": 1.3444344707738015e-05,
+      "loss": 2.0794,
+      "step": 152
+    },
+    {
+      "epoch": 0.002939509505374691,
+      "grad_norm": 0.8120487332344055,
+      "learning_rate": 1.2918219826887136e-05,
+      "loss": 1.4991,
+      "step": 153
+    },
+    {
+      "epoch": 0.002958721985801977,
+      "grad_norm": 0.6794188618659973,
+      "learning_rate": 1.2400865784400998e-05,
+      "loss": 1.6916,
+      "step": 154
+    },
+    {
+      "epoch": 0.0029779344662292625,
+      "grad_norm": 0.8753079771995544,
+      "learning_rate": 1.189242401970908e-05,
+      "loss": 1.6724,
+      "step": 155
+    },
+    {
+      "epoch": 0.0029971469466565483,
+      "grad_norm": 0.9768186211585999,
+      "learning_rate": 1.139303353571309e-05,
+      "loss": 2.0687,
+      "step": 156
+    },
+    {
+      "epoch": 0.0030163594270838337,
+      "grad_norm": 0.9926120042800903,
+      "learning_rate": 1.0902830860784946e-05,
+      "loss": 1.5077,
+      "step": 157
+    },
+    {
+      "epoch": 0.003035571907511119,
+      "grad_norm": 0.7764493823051453,
+      "learning_rate": 1.0421950011441355e-05,
+      "loss": 1.8041,
+      "step": 158
+    },
+    {
+      "epoch": 0.003054784387938405,
+      "grad_norm": 1.189863681793213,
+      "learning_rate": 9.950522455704946e-06,
+      "loss": 1.5459,
+      "step": 159
+    },
+    {
+      "epoch": 0.0030739968683656904,
+      "grad_norm": 0.9329153895378113,
+      "learning_rate": 9.488677077162294e-06,
+      "loss": 2.1531,
+      "step": 160
+    },
+    {
+      "epoch": 0.003093209348792976,
+      "grad_norm": 0.9131903052330017,
+      "learning_rate": 9.03654013972839e-06,
+      "loss": 2.0871,
+      "step": 161
+    },
+    {
+      "epoch": 0.0031124218292202617,
+      "grad_norm": 1.1890541315078735,
+      "learning_rate": 8.59423525312737e-06,
+      "loss": 1.2826,
+      "step": 162
+    },
+    {
+      "epoch": 0.003131634309647547,
+      "grad_norm": 0.9106007218360901,
+      "learning_rate": 8.161883339098845e-06,
+      "loss": 1.9189,
+      "step": 163
+    },
+    {
+      "epoch": 0.0031508467900748325,
+      "grad_norm": 1.041857123374939,
+      "learning_rate": 7.739602598339099e-06,
+      "loss": 1.1295,
+      "step": 164
+    },
+    {
+      "epoch": 0.0031700592705021183,
+      "grad_norm": 1.1270183324813843,
+      "learning_rate": 7.327508478186216e-06,
+      "loss": 1.3541,
+      "step": 165
+    },
+    {
+      "epoch": 0.0031892717509294038,
+      "grad_norm": 1.1173748970031738,
+      "learning_rate": 6.925713641057902e-06,
+      "loss": 0.7724,
+      "step": 166
+    },
+    {
+      "epoch": 0.003208484231356689,
+      "grad_norm": 1.116767406463623,
+      "learning_rate": 6.53432793365074e-06,
+      "loss": 0.8932,
+      "step": 167
+    },
+    {
+      "epoch": 0.003227696711783975,
+      "grad_norm": 0.9970417618751526,
+      "learning_rate": 6.153458356909174e-06,
+      "loss": 1.0993,
+      "step": 168
+    },
+    {
+      "epoch": 0.0032469091922112604,
+      "grad_norm": 1.0910117626190186,
+      "learning_rate": 5.783209036772518e-06,
+      "loss": 0.9567,
+      "step": 169
+    },
+    {
+      "epoch": 0.003266121672638546,
+      "grad_norm": 1.4644685983657837,
+      "learning_rate": 5.423681195707997e-06,
+      "loss": 0.977,
+      "step": 170
+    },
+    {
+      "epoch": 0.0032853341530658317,
+      "grad_norm": 1.073829174041748,
+      "learning_rate": 5.074973125037469e-06,
+      "loss": 1.2876,
+      "step": 171
+    },
+    {
+      "epoch": 0.003304546633493117,
+      "grad_norm": 1.233479380607605,
+      "learning_rate": 4.737180158065644e-06,
+      "loss": 1.3618,
+      "step": 172
+    },
+    {
+      "epoch": 0.0033237591139204025,
+      "grad_norm": 1.34217369556427,
+      "learning_rate": 4.41039464401685e-06,
+      "loss": 1.3816,
+      "step": 173
+    },
+    {
+      "epoch": 0.0033429715943476884,
+      "grad_norm": 1.662285566329956,
+      "learning_rate": 4.094705922787687e-06,
+      "loss": 0.9877,
+      "step": 174
+    },
+    {
+      "epoch": 0.003362184074774974,
+      "grad_norm": 1.2599389553070068,
+      "learning_rate": 3.7902003005224126e-06,
+      "loss": 1.0554,
+      "step": 175
+    },
+    {
+      "epoch": 0.0033813965552022592,
+      "grad_norm": 1.3760629892349243,
+      "learning_rate": 3.4969610260176865e-06,
+      "loss": 1.3303,
+      "step": 176
+    },
+    {
+      "epoch": 0.003400609035629545,
+      "grad_norm": 1.4048978090286255,
+      "learning_rate": 3.2150682679631867e-06,
+      "loss": 1.2862,
+      "step": 177
+    },
+    {
+      "epoch": 0.0034198215160568305,
+      "grad_norm": 1.169244408607483,
+      "learning_rate": 2.9445990930242668e-06,
+      "loss": 1.0257,
+      "step": 178
+    },
+    {
+      "epoch": 0.003439033996484116,
+      "grad_norm": 1.1791973114013672,
+      "learning_rate": 2.6856274447727475e-06,
+      "loss": 1.061,
+      "step": 179
+    },
+    {
+      "epoch": 0.0034582464769114018,
+      "grad_norm": 1.7341974973678589,
+      "learning_rate": 2.4382241234714413e-06,
+      "loss": 1.2628,
+      "step": 180
+    },
+    {
+      "epoch": 0.003477458957338687,
+      "grad_norm": 1.8596497774124146,
+      "learning_rate": 2.2024567667180914e-06,
+      "loss": 1.8631,
+      "step": 181
+    },
+    {
+      "epoch": 0.0034966714377659726,
+      "grad_norm": 1.7305415868759155,
+      "learning_rate": 1.978389830953906e-06,
+      "loss": 1.1139,
+      "step": 182
+    },
+    {
+      "epoch": 0.0035158839181932584,
+      "grad_norm": 1.690251350402832,
+      "learning_rate": 1.7660845738418336e-06,
+      "loss": 1.1278,
+      "step": 183
+    },
+    {
+      "epoch": 0.003535096398620544,
+      "grad_norm": 1.4446978569030762,
+      "learning_rate": 1.5655990375193147e-06,
+      "loss": 0.899,
+      "step": 184
+    },
+    {
+      "epoch": 0.0035543088790478293,
+      "grad_norm": 1.9374878406524658,
+      "learning_rate": 1.3769880327301332e-06,
+      "loss": 0.9448,
+      "step": 185
+    },
+    {
+      "epoch": 0.003573521359475115,
+      "grad_norm": 1.8252419233322144,
+      "learning_rate": 1.2003031238397417e-06,
+      "loss": 0.7858,
+      "step": 186
+    },
+    {
+      "epoch": 0.0035927338399024005,
+      "grad_norm": 2.349891424179077,
+      "learning_rate": 1.035592614738033e-06,
+      "loss": 1.245,
+      "step": 187
+    },
+    {
+      "epoch": 0.0036119463203296864,
+      "grad_norm": 1.9561939239501953,
+      "learning_rate": 8.829015356335791e-07,
+      "loss": 1.5753,
+      "step": 188
+    },
+    {
+      "epoch": 0.003631158800756972,
+      "grad_norm": 1.7813923358917236,
+      "learning_rate": 7.422716307427936e-07,
+      "loss": 1.5121,
+      "step": 189
+    },
+    {
+      "epoch": 0.003650371281184257,
+      "grad_norm": 2.0774621963500977,
+      "learning_rate": 6.137413468774955e-07,
+      "loss": 1.9692,
+      "step": 190
+    },
+    {
+      "epoch": 0.003669583761611543,
+      "grad_norm": 1.7934973239898682,
+      "learning_rate": 4.973458229339179e-07,
+      "loss": 1.7271,
+      "step": 191
+    },
+    {
+      "epoch": 0.0036887962420388285,
+      "grad_norm": 2.4085757732391357,
+      "learning_rate": 3.9311688028611627e-07,
+      "loss": 1.9639,
+      "step": 192
+    },
+    {
+      "epoch": 0.003708008722466114,
+      "grad_norm": 2.4580140113830566,
+      "learning_rate": 3.010830140862836e-07,
+      "loss": 1.2718,
+      "step": 193
+    },
+    {
+      "epoch": 0.0037272212028933997,
+      "grad_norm": 2.0989091396331787,
+      "learning_rate": 2.2126938547448627e-07,
+      "loss": 2.1295,
+      "step": 194
+    },
+    {
+      "epoch": 0.003746433683320685,
+      "grad_norm": 2.8977036476135254,
+      "learning_rate": 1.536978146998569e-07,
+      "loss": 2.605,
+      "step": 195
+    },
+    {
+      "epoch": 0.0037656461637479706,
+      "grad_norm": 2.9574570655822754,
+      "learning_rate": 9.838677515514594e-08,
+      "loss": 2.2732,
+      "step": 196
+    },
+    {
+      "epoch": 0.0037848586441752564,
+      "grad_norm": 3.1725268363952637,
+      "learning_rate": 5.5351388326286834e-08,
+      "loss": 2.3167,
+      "step": 197
+    },
+    {
+      "epoch": 0.003804071124602542,
+      "grad_norm": 3.776048421859741,
+      "learning_rate": 2.4603419658327797e-08,
+      "loss": 2.1604,
+      "step": 198
+    },
+    {
+      "epoch": 0.0038232836050298273,
+      "grad_norm": 3.629805088043213,
+      "learning_rate": 6.151275338894813e-09,
+      "loss": 2.3745,
+      "step": 199
+    },
+    {
+      "epoch": 0.003842496085457113,
+      "grad_norm": 3.931309461593628,
+      "learning_rate": 0.0,
+      "loss": 2.6561,
+      "step": 200
+    },
+    {
+      "epoch": 0.003842496085457113,
+      "eval_loss": 1.8127896785736084,
+      "eval_runtime": 3843.9208,
+      "eval_samples_per_second": 22.805,
+      "eval_steps_per_second": 5.701,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1853173533075046e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null