Training in progress, step 50, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0675efc70287c078a42a75ad0639d9c1275179512f51e2ba1fd62f66e807ecc
 size 912336848

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccbb69308e6619236e069cfa4b82f81f35acefdee3e1de9792796b520b022b53
 size 912336848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24b7be8d2e34be1f251f2dd5a2f1f607b836b34c0939ec3935a01ed9cdb03ffd
 size 463916180

 version https://git-lfs.github.com/spec/v1
+oid sha256:39b204061753812622c3c9356024f8550f2a4f70ea90b5f47046311764b0c72b
 size 463916180

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f482617e7c1ac553fc0137b2a4690def09a150f1b2522fb547710a0c2ca846f4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3eb432bf4fdd232e3e2c88fda68ed8993ded682f4637d2df5152d3ee3c8d3bc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb15fdd4c0ab874f4afa0ea95fae119cbc74554df5fbe92ab825fe27063eaa04
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.6961973905563354,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.005243288590604027,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 18.237,
       "eval_steps_per_second": 2.28,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,7 +395,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.930148957487104e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.6961973905563354,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.010486577181208054,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 18.237,
       "eval_steps_per_second": 2.28,
       "step": 25
+    },
+    {
+      "epoch": 0.005453020134228188,
+      "grad_norm": 3.5189576148986816,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 7.1497,
+      "step": 26
+    },
+    {
+      "epoch": 0.005662751677852349,
+      "grad_norm": 3.4907236099243164,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 6.8121,
+      "step": 27
+    },
+    {
+      "epoch": 0.00587248322147651,
+      "grad_norm": 3.352301597595215,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 6.9369,
+      "step": 28
+    },
+    {
+      "epoch": 0.006082214765100671,
+      "grad_norm": 3.8302619457244873,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 6.9222,
+      "step": 29
+    },
+    {
+      "epoch": 0.006291946308724832,
+      "grad_norm": 3.51759672164917,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 6.6552,
+      "step": 30
+    },
+    {
+      "epoch": 0.0065016778523489934,
+      "grad_norm": 3.75423526763916,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 6.5789,
+      "step": 31
+    },
+    {
+      "epoch": 0.006711409395973154,
+      "grad_norm": 4.301468372344971,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 6.8843,
+      "step": 32
+    },
+    {
+      "epoch": 0.006921140939597316,
+      "grad_norm": 4.287465572357178,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 6.3121,
+      "step": 33
+    },
+    {
+      "epoch": 0.007130872483221477,
+      "grad_norm": 4.683781147003174,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 6.3466,
+      "step": 34
+    },
+    {
+      "epoch": 0.007340604026845637,
+      "grad_norm": 4.297049045562744,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 6.5177,
+      "step": 35
+    },
+    {
+      "epoch": 0.007550335570469799,
+      "grad_norm": 5.890731334686279,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 6.3405,
+      "step": 36
+    },
+    {
+      "epoch": 0.00776006711409396,
+      "grad_norm": 5.064823627471924,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 5.9357,
+      "step": 37
+    },
+    {
+      "epoch": 0.007969798657718121,
+      "grad_norm": 4.774162292480469,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 5.8669,
+      "step": 38
+    },
+    {
+      "epoch": 0.008179530201342282,
+      "grad_norm": 4.649093151092529,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 5.9236,
+      "step": 39
+    },
+    {
+      "epoch": 0.008389261744966443,
+      "grad_norm": 5.287595272064209,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 6.5715,
+      "step": 40
+    },
+    {
+      "epoch": 0.008598993288590604,
+      "grad_norm": 5.230982780456543,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 5.8544,
+      "step": 41
+    },
+    {
+      "epoch": 0.008808724832214765,
+      "grad_norm": 5.401068210601807,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 5.6195,
+      "step": 42
+    },
+    {
+      "epoch": 0.009018456375838927,
+      "grad_norm": 5.703489303588867,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 6.1054,
+      "step": 43
+    },
+    {
+      "epoch": 0.009228187919463088,
+      "grad_norm": 6.4749274253845215,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 5.2847,
+      "step": 44
+    },
+    {
+      "epoch": 0.009437919463087249,
+      "grad_norm": 8.451642036437988,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 5.1672,
+      "step": 45
+    },
+    {
+      "epoch": 0.00964765100671141,
+      "grad_norm": 6.819253444671631,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 4.9192,
+      "step": 46
+    },
+    {
+      "epoch": 0.00985738255033557,
+      "grad_norm": 6.859738349914551,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 5.0944,
+      "step": 47
+    },
+    {
+      "epoch": 0.010067114093959731,
+      "grad_norm": 8.738635063171387,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 5.6795,
+      "step": 48
+    },
+    {
+      "epoch": 0.010276845637583893,
+      "grad_norm": 19.29412841796875,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 4.5156,
+      "step": 49
+    },
+    {
+      "epoch": 0.010486577181208054,
+      "grad_norm": 61.366493225097656,
+      "learning_rate": 0.0,
+      "loss": 6.8393,
+      "step": 50
+    },
+    {
+      "epoch": 0.010486577181208054,
+      "eval_loss": 1.8221542835235596,
+      "eval_runtime": 440.2648,
+      "eval_samples_per_second": 18.239,
+      "eval_steps_per_second": 2.28,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.831286539157504e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null