Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6e4e6f1eb0abaa0d8d2cab98b34bcc459b3e576688459d60fec600ed9cc5f41
 size 800116456

 version https://git-lfs.github.com/spec/v1
+oid sha256:62e87acdbfcdd9dc20bfef94827a0ce59aa38bc2d132df573bd3d9744d1c99e1
 size 800116456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b30f29727e23641c9ab5b64772f1b01ab5c0f66212c68c72a8bfcb2f331932ab
 size 406743412

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5718d0be5b17072815299de74071187ad008215d002410e5308ab4a9a40f404
 size 406743412

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcbf7d5e4aa7b38b8ebe707846794c5b3732e8a4cdd09857174a2fb83ed221a8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ba8ede8b1ce1289bd047fa1b015e236ff2970647049d9ac45b408f024313221
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:429ac0058d053852a57e244c528ad2a9ef374afdf80fd23f716aee5119cd57d1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8685a85e01d0081c4ee6b3d27083bc45de61653fc346f2b531f3e09e6eff0d83
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.029884696006775,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.0035031791350650715,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 5.831,
       "eval_steps_per_second": 1.458,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.236995582748262e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9501329064369202,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.005254768702597608,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.831,
       "eval_steps_per_second": 1.458,
       "step": 100
+    },
+    {
+      "epoch": 0.0035382109264157223,
+      "grad_norm": 4.1783599853515625,
+      "learning_rate": 5.252210526315789e-05,
+      "loss": 0.9496,
+      "step": 101
+    },
+    {
+      "epoch": 0.003573242717766373,
+      "grad_norm": 5.620020866394043,
+      "learning_rate": 5.199157894736842e-05,
+      "loss": 1.3367,
+      "step": 102
+    },
+    {
+      "epoch": 0.0036082745091170236,
+      "grad_norm": 5.590014457702637,
+      "learning_rate": 5.1461052631578946e-05,
+      "loss": 1.0153,
+      "step": 103
+    },
+    {
+      "epoch": 0.0036433063004676744,
+      "grad_norm": 5.261481285095215,
+      "learning_rate": 5.0930526315789476e-05,
+      "loss": 0.9291,
+      "step": 104
+    },
+    {
+      "epoch": 0.0036783380918183253,
+      "grad_norm": 6.13337516784668,
+      "learning_rate": 5.04e-05,
+      "loss": 1.0299,
+      "step": 105
+    },
+    {
+      "epoch": 0.0037133698831689757,
+      "grad_norm": 4.944069862365723,
+      "learning_rate": 4.986947368421052e-05,
+      "loss": 0.9319,
+      "step": 106
+    },
+    {
+      "epoch": 0.0037484016745196265,
+      "grad_norm": 5.500107765197754,
+      "learning_rate": 4.933894736842105e-05,
+      "loss": 0.9859,
+      "step": 107
+    },
+    {
+      "epoch": 0.0037834334658702774,
+      "grad_norm": 4.677202224731445,
+      "learning_rate": 4.880842105263158e-05,
+      "loss": 0.9123,
+      "step": 108
+    },
+    {
+      "epoch": 0.003818465257220928,
+      "grad_norm": 6.256136417388916,
+      "learning_rate": 4.8277894736842103e-05,
+      "loss": 1.1807,
+      "step": 109
+    },
+    {
+      "epoch": 0.0038534970485715786,
+      "grad_norm": 8.950167655944824,
+      "learning_rate": 4.7747368421052626e-05,
+      "loss": 1.1984,
+      "step": 110
+    },
+    {
+      "epoch": 0.0038885288399222295,
+      "grad_norm": 7.130908966064453,
+      "learning_rate": 4.7216842105263156e-05,
+      "loss": 1.4225,
+      "step": 111
+    },
+    {
+      "epoch": 0.00392356063127288,
+      "grad_norm": 7.4759931564331055,
+      "learning_rate": 4.6686315789473686e-05,
+      "loss": 1.3882,
+      "step": 112
+    },
+    {
+      "epoch": 0.003958592422623531,
+      "grad_norm": 10.55746078491211,
+      "learning_rate": 4.615578947368421e-05,
+      "loss": 1.5642,
+      "step": 113
+    },
+    {
+      "epoch": 0.0039936242139741816,
+      "grad_norm": 7.839519500732422,
+      "learning_rate": 4.562526315789474e-05,
+      "loss": 1.2803,
+      "step": 114
+    },
+    {
+      "epoch": 0.004028656005324832,
+      "grad_norm": 7.067453861236572,
+      "learning_rate": 4.509473684210526e-05,
+      "loss": 1.0843,
+      "step": 115
+    },
+    {
+      "epoch": 0.004063687796675483,
+      "grad_norm": 7.410902500152588,
+      "learning_rate": 4.4564210526315784e-05,
+      "loss": 1.1034,
+      "step": 116
+    },
+    {
+      "epoch": 0.004098719588026134,
+      "grad_norm": 12.82276439666748,
+      "learning_rate": 4.403368421052632e-05,
+      "loss": 1.9388,
+      "step": 117
+    },
+    {
+      "epoch": 0.004133751379376784,
+      "grad_norm": 11.6984281539917,
+      "learning_rate": 4.350315789473684e-05,
+      "loss": 2.2554,
+      "step": 118
+    },
+    {
+      "epoch": 0.004168783170727435,
+      "grad_norm": 9.550557136535645,
+      "learning_rate": 4.2972631578947366e-05,
+      "loss": 1.2014,
+      "step": 119
+    },
+    {
+      "epoch": 0.004203814962078086,
+      "grad_norm": 6.161247730255127,
+      "learning_rate": 4.244210526315789e-05,
+      "loss": 0.8151,
+      "step": 120
+    },
+    {
+      "epoch": 0.004238846753428736,
+      "grad_norm": 7.186214447021484,
+      "learning_rate": 4.191157894736842e-05,
+      "loss": 0.9695,
+      "step": 121
+    },
+    {
+      "epoch": 0.0042738785447793875,
+      "grad_norm": 6.048009395599365,
+      "learning_rate": 4.138105263157895e-05,
+      "loss": 0.9508,
+      "step": 122
+    },
+    {
+      "epoch": 0.004308910336130038,
+      "grad_norm": 5.062438011169434,
+      "learning_rate": 4.085052631578947e-05,
+      "loss": 0.7397,
+      "step": 123
+    },
+    {
+      "epoch": 0.004343942127480689,
+      "grad_norm": 5.460752010345459,
+      "learning_rate": 4.032e-05,
+      "loss": 0.8562,
+      "step": 124
+    },
+    {
+      "epoch": 0.0043789739188313396,
+      "grad_norm": 4.651021480560303,
+      "learning_rate": 3.978947368421052e-05,
+      "loss": 0.7181,
+      "step": 125
+    },
+    {
+      "epoch": 0.00441400571018199,
+      "grad_norm": 7.1053667068481445,
+      "learning_rate": 3.925894736842105e-05,
+      "loss": 0.9492,
+      "step": 126
+    },
+    {
+      "epoch": 0.004449037501532641,
+      "grad_norm": 4.558400630950928,
+      "learning_rate": 3.8728421052631575e-05,
+      "loss": 0.8316,
+      "step": 127
+    },
+    {
+      "epoch": 0.004484069292883292,
+      "grad_norm": 4.557405948638916,
+      "learning_rate": 3.8197894736842105e-05,
+      "loss": 0.6187,
+      "step": 128
+    },
+    {
+      "epoch": 0.004519101084233942,
+      "grad_norm": 6.3636298179626465,
+      "learning_rate": 3.766736842105263e-05,
+      "loss": 0.7996,
+      "step": 129
+    },
+    {
+      "epoch": 0.004554132875584593,
+      "grad_norm": 4.011357307434082,
+      "learning_rate": 3.713684210526316e-05,
+      "loss": 0.7043,
+      "step": 130
+    },
+    {
+      "epoch": 0.004589164666935244,
+      "grad_norm": 5.260727405548096,
+      "learning_rate": 3.660631578947369e-05,
+      "loss": 0.6685,
+      "step": 131
+    },
+    {
+      "epoch": 0.004624196458285894,
+      "grad_norm": 5.384383678436279,
+      "learning_rate": 3.607578947368421e-05,
+      "loss": 0.8229,
+      "step": 132
+    },
+    {
+      "epoch": 0.0046592282496365454,
+      "grad_norm": 5.44893217086792,
+      "learning_rate": 3.554526315789473e-05,
+      "loss": 0.6983,
+      "step": 133
+    },
+    {
+      "epoch": 0.004694260040987196,
+      "grad_norm": 5.031147003173828,
+      "learning_rate": 3.501473684210526e-05,
+      "loss": 0.6835,
+      "step": 134
+    },
+    {
+      "epoch": 0.004729291832337846,
+      "grad_norm": 5.930494785308838,
+      "learning_rate": 3.448421052631579e-05,
+      "loss": 0.9263,
+      "step": 135
+    },
+    {
+      "epoch": 0.0047643236236884975,
+      "grad_norm": 5.080932140350342,
+      "learning_rate": 3.3953684210526315e-05,
+      "loss": 0.7292,
+      "step": 136
+    },
+    {
+      "epoch": 0.004799355415039148,
+      "grad_norm": 5.154940128326416,
+      "learning_rate": 3.342315789473684e-05,
+      "loss": 0.6465,
+      "step": 137
+    },
+    {
+      "epoch": 0.004834387206389798,
+      "grad_norm": 5.2444939613342285,
+      "learning_rate": 3.289263157894737e-05,
+      "loss": 0.7776,
+      "step": 138
+    },
+    {
+      "epoch": 0.00486941899774045,
+      "grad_norm": 5.0661211013793945,
+      "learning_rate": 3.23621052631579e-05,
+      "loss": 0.7375,
+      "step": 139
+    },
+    {
+      "epoch": 0.0049044507890911,
+      "grad_norm": 4.658995628356934,
+      "learning_rate": 3.183157894736842e-05,
+      "loss": 0.5983,
+      "step": 140
+    },
+    {
+      "epoch": 0.0049394825804417505,
+      "grad_norm": 5.3926920890808105,
+      "learning_rate": 3.130105263157895e-05,
+      "loss": 0.9688,
+      "step": 141
+    },
+    {
+      "epoch": 0.004974514371792402,
+      "grad_norm": 6.5495805740356445,
+      "learning_rate": 3.077052631578947e-05,
+      "loss": 0.912,
+      "step": 142
+    },
+    {
+      "epoch": 0.005009546163143052,
+      "grad_norm": 6.6911702156066895,
+      "learning_rate": 3.024e-05,
+      "loss": 1.257,
+      "step": 143
+    },
+    {
+      "epoch": 0.0050445779544937034,
+      "grad_norm": 5.760495662689209,
+      "learning_rate": 2.970947368421052e-05,
+      "loss": 0.8503,
+      "step": 144
+    },
+    {
+      "epoch": 0.005079609745844354,
+      "grad_norm": 3.4422309398651123,
+      "learning_rate": 2.9178947368421054e-05,
+      "loss": 0.4197,
+      "step": 145
+    },
+    {
+      "epoch": 0.005114641537195004,
+      "grad_norm": 4.122685432434082,
+      "learning_rate": 2.8648421052631577e-05,
+      "loss": 0.6266,
+      "step": 146
+    },
+    {
+      "epoch": 0.0051496733285456555,
+      "grad_norm": 4.601315975189209,
+      "learning_rate": 2.8117894736842103e-05,
+      "loss": 0.5214,
+      "step": 147
+    },
+    {
+      "epoch": 0.005184705119896306,
+      "grad_norm": 4.702164173126221,
+      "learning_rate": 2.7587368421052633e-05,
+      "loss": 0.6284,
+      "step": 148
+    },
+    {
+      "epoch": 0.005219736911246956,
+      "grad_norm": 5.033868789672852,
+      "learning_rate": 2.7056842105263156e-05,
+      "loss": 0.6065,
+      "step": 149
+    },
+    {
+      "epoch": 0.005254768702597608,
+      "grad_norm": 5.247815132141113,
+      "learning_rate": 2.6526315789473682e-05,
+      "loss": 0.887,
+      "step": 150
+    },
+    {
+      "epoch": 0.005254768702597608,
+      "eval_loss": 0.9501329064369202,
+      "eval_runtime": 2058.4097,
+      "eval_samples_per_second": 5.839,
+      "eval_steps_per_second": 1.46,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.914116459003904e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null