Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa94b20e98465ff50becad5b95af7319e7d524361503c6dddeb8c4b911d810df
 size 800116456

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c5eb2d6ea4db9103c40128f80a6b1e9c61c74361d322d35133c0a28f24cb09a
 size 800116456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67fe41bcfdb8dbdfd4f8abcec82e6a75ae6e39a18131ff23f98c3fbc4de03878
 size 406743412

 version https://git-lfs.github.com/spec/v1
+oid sha256:06af85245b156b52abd3f96b264adf158da36ce02e88802b626699b9b1efe90d
 size 406743412

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ba8ede8b1ce1289bd047fa1b015e236ff2970647049d9ac45b408f024313221
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4c4b49ebd10e72da64aaeee9a2568e4948c257827da60f7c3a9021a559f2182
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c16a9a2b8856e8f64eb7194578c6fcbc8625033d1caa318cc7b80ad824088880
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ba2dfce0937a08ee177ecd4ee2c0d73bc25d6c21d394bb71764724c742c652e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9562696814537048,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.005254768702597608,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 5.877,
       "eval_steps_per_second": 1.469,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.914116459003904e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9112640619277954,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.007006358270130143,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.877,
       "eval_steps_per_second": 1.469,
       "step": 150
+    },
+    {
+      "epoch": 0.005289800493948258,
+      "grad_norm": 4.701993465423584,
+      "learning_rate": 2.5833315789473687e-05,
+      "loss": 0.8749,
+      "step": 151
+    },
+    {
+      "epoch": 0.0053248322852989085,
+      "grad_norm": 4.541272163391113,
+      "learning_rate": 2.5306105263157895e-05,
+      "loss": 0.8578,
+      "step": 152
+    },
+    {
+      "epoch": 0.00535986407664956,
+      "grad_norm": 6.385204792022705,
+      "learning_rate": 2.4778894736842104e-05,
+      "loss": 1.0571,
+      "step": 153
+    },
+    {
+      "epoch": 0.00539489586800021,
+      "grad_norm": 6.275009632110596,
+      "learning_rate": 2.4251684210526315e-05,
+      "loss": 1.1863,
+      "step": 154
+    },
+    {
+      "epoch": 0.0054299276593508606,
+      "grad_norm": 5.322296619415283,
+      "learning_rate": 2.3724473684210524e-05,
+      "loss": 0.9377,
+      "step": 155
+    },
+    {
+      "epoch": 0.005464959450701512,
+      "grad_norm": 6.121055603027344,
+      "learning_rate": 2.3197263157894735e-05,
+      "loss": 1.1512,
+      "step": 156
+    },
+    {
+      "epoch": 0.005499991242052162,
+      "grad_norm": 6.184394359588623,
+      "learning_rate": 2.2670052631578947e-05,
+      "loss": 1.1134,
+      "step": 157
+    },
+    {
+      "epoch": 0.005535023033402813,
+      "grad_norm": 6.6305437088012695,
+      "learning_rate": 2.214284210526316e-05,
+      "loss": 1.0825,
+      "step": 158
+    },
+    {
+      "epoch": 0.005570054824753464,
+      "grad_norm": 8.778796195983887,
+      "learning_rate": 2.161563157894737e-05,
+      "loss": 0.8035,
+      "step": 159
+    },
+    {
+      "epoch": 0.005605086616104114,
+      "grad_norm": 8.88864803314209,
+      "learning_rate": 2.108842105263158e-05,
+      "loss": 1.6535,
+      "step": 160
+    },
+    {
+      "epoch": 0.005640118407454766,
+      "grad_norm": 7.5164713859558105,
+      "learning_rate": 2.056121052631579e-05,
+      "loss": 0.9809,
+      "step": 161
+    },
+    {
+      "epoch": 0.005675150198805416,
+      "grad_norm": 7.345613956451416,
+      "learning_rate": 2.0034e-05,
+      "loss": 1.6875,
+      "step": 162
+    },
+    {
+      "epoch": 0.0057101819901560665,
+      "grad_norm": 6.933657646179199,
+      "learning_rate": 1.950678947368421e-05,
+      "loss": 1.3107,
+      "step": 163
+    },
+    {
+      "epoch": 0.005745213781506718,
+      "grad_norm": 7.372086048126221,
+      "learning_rate": 1.897957894736842e-05,
+      "loss": 1.4343,
+      "step": 164
+    },
+    {
+      "epoch": 0.005780245572857368,
+      "grad_norm": 6.22285270690918,
+      "learning_rate": 1.845236842105263e-05,
+      "loss": 0.9742,
+      "step": 165
+    },
+    {
+      "epoch": 0.0058152773642080186,
+      "grad_norm": 8.803730964660645,
+      "learning_rate": 1.792515789473684e-05,
+      "loss": 1.3178,
+      "step": 166
+    },
+    {
+      "epoch": 0.00585030915555867,
+      "grad_norm": 12.660039901733398,
+      "learning_rate": 1.7397947368421053e-05,
+      "loss": 1.9204,
+      "step": 167
+    },
+    {
+      "epoch": 0.00588534094690932,
+      "grad_norm": 7.340970039367676,
+      "learning_rate": 1.687073684210526e-05,
+      "loss": 1.4054,
+      "step": 168
+    },
+    {
+      "epoch": 0.005920372738259971,
+      "grad_norm": 4.593935489654541,
+      "learning_rate": 1.6343526315789473e-05,
+      "loss": 0.7033,
+      "step": 169
+    },
+    {
+      "epoch": 0.005955404529610622,
+      "grad_norm": 4.283302307128906,
+      "learning_rate": 1.5816315789473685e-05,
+      "loss": 0.6679,
+      "step": 170
+    },
+    {
+      "epoch": 0.005990436320961272,
+      "grad_norm": 5.512580394744873,
+      "learning_rate": 1.5289105263157896e-05,
+      "loss": 0.8829,
+      "step": 171
+    },
+    {
+      "epoch": 0.006025468112311923,
+      "grad_norm": 4.068453311920166,
+      "learning_rate": 1.4761894736842103e-05,
+      "loss": 0.5252,
+      "step": 172
+    },
+    {
+      "epoch": 0.006060499903662574,
+      "grad_norm": 6.23944091796875,
+      "learning_rate": 1.4234684210526314e-05,
+      "loss": 0.9646,
+      "step": 173
+    },
+    {
+      "epoch": 0.0060955316950132244,
+      "grad_norm": 6.521369934082031,
+      "learning_rate": 1.3707473684210528e-05,
+      "loss": 0.8696,
+      "step": 174
+    },
+    {
+      "epoch": 0.006130563486363875,
+      "grad_norm": 4.816810607910156,
+      "learning_rate": 1.3180263157894736e-05,
+      "loss": 0.786,
+      "step": 175
+    },
+    {
+      "epoch": 0.006165595277714526,
+      "grad_norm": 4.4193501472473145,
+      "learning_rate": 1.2653052631578948e-05,
+      "loss": 0.6175,
+      "step": 176
+    },
+    {
+      "epoch": 0.0062006270690651765,
+      "grad_norm": 4.809851169586182,
+      "learning_rate": 1.2125842105263158e-05,
+      "loss": 0.6917,
+      "step": 177
+    },
+    {
+      "epoch": 0.006235658860415827,
+      "grad_norm": 4.602668285369873,
+      "learning_rate": 1.1598631578947368e-05,
+      "loss": 0.7125,
+      "step": 178
+    },
+    {
+      "epoch": 0.006270690651766478,
+      "grad_norm": 5.745820045471191,
+      "learning_rate": 1.107142105263158e-05,
+      "loss": 0.9309,
+      "step": 179
+    },
+    {
+      "epoch": 0.006305722443117129,
+      "grad_norm": 5.068687438964844,
+      "learning_rate": 1.054421052631579e-05,
+      "loss": 0.7615,
+      "step": 180
+    },
+    {
+      "epoch": 0.00634075423446778,
+      "grad_norm": 5.800779342651367,
+      "learning_rate": 1.0017e-05,
+      "loss": 1.0696,
+      "step": 181
+    },
+    {
+      "epoch": 0.00637578602581843,
+      "grad_norm": 4.405969142913818,
+      "learning_rate": 9.48978947368421e-06,
+      "loss": 0.6197,
+      "step": 182
+    },
+    {
+      "epoch": 0.006410817817169081,
+      "grad_norm": 4.871210098266602,
+      "learning_rate": 8.96257894736842e-06,
+      "loss": 0.6756,
+      "step": 183
+    },
+    {
+      "epoch": 0.006445849608519732,
+      "grad_norm": 6.21035623550415,
+      "learning_rate": 8.43536842105263e-06,
+      "loss": 1.0503,
+      "step": 184
+    },
+    {
+      "epoch": 0.0064808813998703824,
+      "grad_norm": 5.283048152923584,
+      "learning_rate": 7.908157894736842e-06,
+      "loss": 0.8885,
+      "step": 185
+    },
+    {
+      "epoch": 0.006515913191221033,
+      "grad_norm": 6.8126349449157715,
+      "learning_rate": 7.380947368421051e-06,
+      "loss": 0.8942,
+      "step": 186
+    },
+    {
+      "epoch": 0.006550944982571684,
+      "grad_norm": 6.314110279083252,
+      "learning_rate": 6.853736842105264e-06,
+      "loss": 0.8951,
+      "step": 187
+    },
+    {
+      "epoch": 0.0065859767739223345,
+      "grad_norm": 4.6299333572387695,
+      "learning_rate": 6.326526315789474e-06,
+      "loss": 0.5277,
+      "step": 188
+    },
+    {
+      "epoch": 0.006621008565272985,
+      "grad_norm": 4.60901403427124,
+      "learning_rate": 5.799315789473684e-06,
+      "loss": 0.6088,
+      "step": 189
+    },
+    {
+      "epoch": 0.006656040356623636,
+      "grad_norm": 4.363032341003418,
+      "learning_rate": 5.272105263157895e-06,
+      "loss": 0.6022,
+      "step": 190
+    },
+    {
+      "epoch": 0.006691072147974287,
+      "grad_norm": 6.278995990753174,
+      "learning_rate": 4.744894736842105e-06,
+      "loss": 0.9941,
+      "step": 191
+    },
+    {
+      "epoch": 0.006726103939324937,
+      "grad_norm": 4.180647850036621,
+      "learning_rate": 4.217684210526315e-06,
+      "loss": 0.5374,
+      "step": 192
+    },
+    {
+      "epoch": 0.006761135730675588,
+      "grad_norm": 7.0714111328125,
+      "learning_rate": 3.6904736842105257e-06,
+      "loss": 0.8828,
+      "step": 193
+    },
+    {
+      "epoch": 0.006796167522026239,
+      "grad_norm": 4.836719036102295,
+      "learning_rate": 3.163263157894737e-06,
+      "loss": 0.8575,
+      "step": 194
+    },
+    {
+      "epoch": 0.006831199313376889,
+      "grad_norm": 6.586860656738281,
+      "learning_rate": 2.6360526315789473e-06,
+      "loss": 1.0594,
+      "step": 195
+    },
+    {
+      "epoch": 0.00686623110472754,
+      "grad_norm": 6.527482986450195,
+      "learning_rate": 2.1088421052631577e-06,
+      "loss": 0.7441,
+      "step": 196
+    },
+    {
+      "epoch": 0.006901262896078191,
+      "grad_norm": 5.505125045776367,
+      "learning_rate": 1.5816315789473685e-06,
+      "loss": 0.6487,
+      "step": 197
+    },
+    {
+      "epoch": 0.006936294687428841,
+      "grad_norm": 5.350231170654297,
+      "learning_rate": 1.0544210526315788e-06,
+      "loss": 0.7856,
+      "step": 198
+    },
+    {
+      "epoch": 0.0069713264787794925,
+      "grad_norm": 5.1863179206848145,
+      "learning_rate": 5.272105263157894e-07,
+      "loss": 0.5979,
+      "step": 199
+    },
+    {
+      "epoch": 0.007006358270130143,
+      "grad_norm": 4.770188808441162,
+      "learning_rate": 0.0,
+      "loss": 0.5592,
+      "step": 200
+    },
+    {
+      "epoch": 0.007006358270130143,
+      "eval_loss": 0.9112640619277954,
+      "eval_runtime": 2048.644,
+      "eval_samples_per_second": 5.867,
+      "eval_steps_per_second": 1.467,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0571696306965709e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null