Training in progress, step 966, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +466 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:022acefac6286471ee8d2c71c3690fd8a565a014ef058c4e3af4f78951dbd0c6
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:48b44ffedd3bcc2378fb4b6ff819562642a49efef4ae3d053c11cf98092a991c
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6815cc9faa2ed01fbe0f7494c50e2dad1e8d76d810e7a58f8875d331c5a18757
 size 81730644

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c65324517999537d1a713978c190754cee80df1b8d07519564e3bdbbe6f6f00
 size 81730644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a51e93265e2c6a6f43a552e36ccd1c9901c5296dbb2ff8ac30a43fe5205e60a5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:901b58ab73b39325bfea8aed8a9f472d920fed35a9e5a296018097c13d84b1ca
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f7ccd40217515b1adb168de499560d4c6c803e2bb562e36a2dfe417244db986
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:823c6cdea481d02c5473910d781463e3c449c8632b36bf92b3536a92203bd40d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.723136305809021,
   "best_model_checkpoint": "miner_id_24/checkpoint-900",
-  "epoch": 0.06268500783562599,
   "eval_steps": 100,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6387,6 +6387,468 @@
       "eval_samples_per_second": 7.139,
       "eval_steps_per_second": 1.785,
       "step": 900
     }
   ],
   "logging_steps": 1,
@@ -6410,12 +6872,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.448712204661293e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.723136305809021,
   "best_model_checkpoint": "miner_id_24/checkpoint-900",
+  "epoch": 0.06728190841023855,
   "eval_steps": 100,
+  "global_step": 966,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.139,
       "eval_steps_per_second": 1.785,
       "step": 900
+    },
+    {
+      "epoch": 0.06275465784433223,
+      "grad_norm": 0.7109830379486084,
+      "learning_rate": 2.272630321321023e-06,
+      "loss": 0.704,
+      "step": 901
+    },
+    {
+      "epoch": 0.06282430785303848,
+      "grad_norm": 0.4886980950832367,
+      "learning_rate": 2.20349711463943e-06,
+      "loss": 0.4915,
+      "step": 902
+    },
+    {
+      "epoch": 0.06289395786174473,
+      "grad_norm": 0.6534592509269714,
+      "learning_rate": 2.135420012462619e-06,
+      "loss": 0.6073,
+      "step": 903
+    },
+    {
+      "epoch": 0.06296360787045098,
+      "grad_norm": 0.5471417903900146,
+      "learning_rate": 2.0683997499552632e-06,
+      "loss": 0.6319,
+      "step": 904
+    },
+    {
+      "epoch": 0.06303325787915723,
+      "grad_norm": 0.765691876411438,
+      "learning_rate": 2.0024370508692104e-06,
+      "loss": 0.9544,
+      "step": 905
+    },
+    {
+      "epoch": 0.06310290788786349,
+      "grad_norm": 0.6834742426872253,
+      "learning_rate": 1.9375326275357208e-06,
+      "loss": 0.8162,
+      "step": 906
+    },
+    {
+      "epoch": 0.06317255789656974,
+      "grad_norm": 0.7233893871307373,
+      "learning_rate": 1.8736871808576861e-06,
+      "loss": 1.0311,
+      "step": 907
+    },
+    {
+      "epoch": 0.06324220790527599,
+      "grad_norm": 0.6150738000869751,
+      "learning_rate": 1.8109014003021452e-06,
+      "loss": 0.9241,
+      "step": 908
+    },
+    {
+      "epoch": 0.06331185791398224,
+      "grad_norm": 0.7470687031745911,
+      "learning_rate": 1.7491759638927686e-06,
+      "loss": 1.1686,
+      "step": 909
+    },
+    {
+      "epoch": 0.06338150792268848,
+      "grad_norm": 0.7098023295402527,
+      "learning_rate": 1.6885115382026085e-06,
+      "loss": 1.1531,
+      "step": 910
+    },
+    {
+      "epoch": 0.06345115793139475,
+      "grad_norm": 0.6397354006767273,
+      "learning_rate": 1.628908778346827e-06,
+      "loss": 0.9153,
+      "step": 911
+    },
+    {
+      "epoch": 0.063520807940101,
+      "grad_norm": 0.6609793305397034,
+      "learning_rate": 1.5703683279756797e-06,
+      "loss": 0.641,
+      "step": 912
+    },
+    {
+      "epoch": 0.06359045794880724,
+      "grad_norm": 0.7062059640884399,
+      "learning_rate": 1.5128908192675318e-06,
+      "loss": 0.7182,
+      "step": 913
+    },
+    {
+      "epoch": 0.06366010795751349,
+      "grad_norm": 0.6093196272850037,
+      "learning_rate": 1.4564768729220412e-06,
+      "loss": 0.6793,
+      "step": 914
+    },
+    {
+      "epoch": 0.06372975796621974,
+      "grad_norm": 0.6978054642677307,
+      "learning_rate": 1.401127098153443e-06,
+      "loss": 0.7592,
+      "step": 915
+    },
+    {
+      "epoch": 0.063799407974926,
+      "grad_norm": 0.5635403394699097,
+      "learning_rate": 1.3468420926840197e-06,
+      "loss": 0.869,
+      "step": 916
+    },
+    {
+      "epoch": 0.06386905798363225,
+      "grad_norm": 0.6903446912765503,
+      "learning_rate": 1.2936224427375521e-06,
+      "loss": 0.7401,
+      "step": 917
+    },
+    {
+      "epoch": 0.0639387079923385,
+      "grad_norm": 0.6210869550704956,
+      "learning_rate": 1.2414687230331123e-06,
+      "loss": 0.5908,
+      "step": 918
+    },
+    {
+      "epoch": 0.06400835800104475,
+      "grad_norm": 0.6113409399986267,
+      "learning_rate": 1.1903814967787253e-06,
+      "loss": 0.5493,
+      "step": 919
+    },
+    {
+      "epoch": 0.064078008009751,
+      "grad_norm": 0.9400643706321716,
+      "learning_rate": 1.1403613156654059e-06,
+      "loss": 1.0418,
+      "step": 920
+    },
+    {
+      "epoch": 0.06414765801845725,
+      "grad_norm": 0.683574378490448,
+      "learning_rate": 1.091408719861109e-06,
+      "loss": 0.9345,
+      "step": 921
+    },
+    {
+      "epoch": 0.06421730802716351,
+      "grad_norm": 0.7595987915992737,
+      "learning_rate": 1.0435242380049559e-06,
+      "loss": 0.8716,
+      "step": 922
+    },
+    {
+      "epoch": 0.06428695803586976,
+      "grad_norm": 0.6851724982261658,
+      "learning_rate": 9.967083872015282e-07,
+      "loss": 0.5158,
+      "step": 923
+    },
+    {
+      "epoch": 0.064356608044576,
+      "grad_norm": 0.6724770069122314,
+      "learning_rate": 9.509616730151827e-07,
+      "loss": 0.5133,
+      "step": 924
+    },
+    {
+      "epoch": 0.06442625805328225,
+      "grad_norm": 0.6596947312355042,
+      "learning_rate": 9.062845894647676e-07,
+      "loss": 0.6722,
+      "step": 925
+    },
+    {
+      "epoch": 0.0644959080619885,
+      "grad_norm": 0.5619158148765564,
+      "learning_rate": 8.626776190181041e-07,
+      "loss": 0.9499,
+      "step": 926
+    },
+    {
+      "epoch": 0.06456555807069476,
+      "grad_norm": 0.7573150992393494,
+      "learning_rate": 8.20141232586924e-07,
+      "loss": 0.7521,
+      "step": 927
+    },
+    {
+      "epoch": 0.06463520807940101,
+      "grad_norm": 0.6126770377159119,
+      "learning_rate": 7.786758895216629e-07,
+      "loss": 0.6616,
+      "step": 928
+    },
+    {
+      "epoch": 0.06470485808810726,
+      "grad_norm": 0.7481774687767029,
+      "learning_rate": 7.382820376066302e-07,
+      "loss": 0.8779,
+      "step": 929
+    },
+    {
+      "epoch": 0.06477450809681351,
+      "grad_norm": 0.7029200792312622,
+      "learning_rate": 6.98960113055025e-07,
+      "loss": 0.7685,
+      "step": 930
+    },
+    {
+      "epoch": 0.06484415810551976,
+      "grad_norm": 0.6455416679382324,
+      "learning_rate": 6.607105405043612e-07,
+      "loss": 1.0069,
+      "step": 931
+    },
+    {
+      "epoch": 0.06491380811422602,
+      "grad_norm": 0.7011751532554626,
+      "learning_rate": 6.23533733011783e-07,
+      "loss": 0.6548,
+      "step": 932
+    },
+    {
+      "epoch": 0.06498345812293227,
+      "grad_norm": 0.7533524036407471,
+      "learning_rate": 5.8743009204969e-07,
+      "loss": 0.7463,
+      "step": 933
+    },
+    {
+      "epoch": 0.06505310813163852,
+      "grad_norm": 0.5586950182914734,
+      "learning_rate": 5.52400007501297e-07,
+      "loss": 0.6125,
+      "step": 934
+    },
+    {
+      "epoch": 0.06512275814034477,
+      "grad_norm": 0.6539096832275391,
+      "learning_rate": 5.184438576565253e-07,
+      "loss": 0.8559,
+      "step": 935
+    },
+    {
+      "epoch": 0.06519240814905101,
+      "grad_norm": 0.7584323883056641,
+      "learning_rate": 4.855620092078627e-07,
+      "loss": 1.1142,
+      "step": 936
+    },
+    {
+      "epoch": 0.06526205815775726,
+      "grad_norm": 0.6609397530555725,
+      "learning_rate": 4.537548172464101e-07,
+      "loss": 0.8978,
+      "step": 937
+    },
+    {
+      "epoch": 0.06533170816646353,
+      "grad_norm": 0.6159988641738892,
+      "learning_rate": 4.230226252580516e-07,
+      "loss": 0.6993,
+      "step": 938
+    },
+    {
+      "epoch": 0.06540135817516977,
+      "grad_norm": 0.6153664588928223,
+      "learning_rate": 3.9336576511976863e-07,
+      "loss": 0.4574,
+      "step": 939
+    },
+    {
+      "epoch": 0.06547100818387602,
+      "grad_norm": 0.6489300727844238,
+      "learning_rate": 3.6478455709598734e-07,
+      "loss": 0.7568,
+      "step": 940
+    },
+    {
+      "epoch": 0.06554065819258227,
+      "grad_norm": 0.6248874664306641,
+      "learning_rate": 3.372793098352256e-07,
+      "loss": 0.6879,
+      "step": 941
+    },
+    {
+      "epoch": 0.06561030820128852,
+      "grad_norm": 0.5801978707313538,
+      "learning_rate": 3.108503203666402e-07,
+      "loss": 0.7331,
+      "step": 942
+    },
+    {
+      "epoch": 0.06567995820999478,
+      "grad_norm": 0.605501115322113,
+      "learning_rate": 2.8549787409691833e-07,
+      "loss": 0.6179,
+      "step": 943
+    },
+    {
+      "epoch": 0.06574960821870103,
+      "grad_norm": 0.5972608327865601,
+      "learning_rate": 2.6122224480715775e-07,
+      "loss": 0.6514,
+      "step": 944
+    },
+    {
+      "epoch": 0.06581925822740728,
+      "grad_norm": 0.7556172609329224,
+      "learning_rate": 2.380236946498693e-07,
+      "loss": 0.8719,
+      "step": 945
+    },
+    {
+      "epoch": 0.06588890823611353,
+      "grad_norm": 0.6486802101135254,
+      "learning_rate": 2.1590247414624566e-07,
+      "loss": 0.5719,
+      "step": 946
+    },
+    {
+      "epoch": 0.06595855824481978,
+      "grad_norm": 0.638469398021698,
+      "learning_rate": 1.948588221833303e-07,
+      "loss": 0.6393,
+      "step": 947
+    },
+    {
+      "epoch": 0.06602820825352604,
+      "grad_norm": 0.7082604765892029,
+      "learning_rate": 1.7489296601156392e-07,
+      "loss": 1.0018,
+      "step": 948
+    },
+    {
+      "epoch": 0.06609785826223229,
+      "grad_norm": 0.6530460119247437,
+      "learning_rate": 1.5600512124221978e-07,
+      "loss": 0.7418,
+      "step": 949
+    },
+    {
+      "epoch": 0.06616750827093854,
+      "grad_norm": 0.653685986995697,
+      "learning_rate": 1.3819549184516112e-07,
+      "loss": 0.9309,
+      "step": 950
+    },
+    {
+      "epoch": 0.06623715827964478,
+      "grad_norm": 0.5263675451278687,
+      "learning_rate": 1.2146427014657625e-07,
+      "loss": 0.7189,
+      "step": 951
+    },
+    {
+      "epoch": 0.06630680828835103,
+      "grad_norm": 0.6783672571182251,
+      "learning_rate": 1.0581163682695793e-07,
+      "loss": 0.5871,
+      "step": 952
+    },
+    {
+      "epoch": 0.06637645829705728,
+      "grad_norm": 0.4727168083190918,
+      "learning_rate": 9.123776091908287e-08,
+      "loss": 0.3484,
+      "step": 953
+    },
+    {
+      "epoch": 0.06644610830576354,
+      "grad_norm": 0.5385925769805908,
+      "learning_rate": 7.774279980626853e-08,
+      "loss": 0.5899,
+      "step": 954
+    },
+    {
+      "epoch": 0.06651575831446979,
+      "grad_norm": 0.6668855547904968,
+      "learning_rate": 6.532689922059687e-08,
+      "loss": 1.0131,
+      "step": 955
+    },
+    {
+      "epoch": 0.06658540832317604,
+      "grad_norm": 0.6244344115257263,
+      "learning_rate": 5.3990193241393313e-08,
+      "loss": 0.7458,
+      "step": 956
+    },
+    {
+      "epoch": 0.06665505833188229,
+      "grad_norm": 0.6702743768692017,
+      "learning_rate": 4.373280429375015e-08,
+      "loss": 0.8924,
+      "step": 957
+    },
+    {
+      "epoch": 0.06672470834058854,
+      "grad_norm": 0.6103947758674622,
+      "learning_rate": 3.4554843147216464e-08,
+      "loss": 1.0036,
+      "step": 958
+    },
+    {
+      "epoch": 0.0667943583492948,
+      "grad_norm": 0.622797966003418,
+      "learning_rate": 2.6456408914599108e-08,
+      "loss": 0.8497,
+      "step": 959
+    },
+    {
+      "epoch": 0.06686400835800105,
+      "grad_norm": 0.7076674699783325,
+      "learning_rate": 1.9437589050907977e-08,
+      "loss": 0.5629,
+      "step": 960
+    },
+    {
+      "epoch": 0.0669336583667073,
+      "grad_norm": 0.7682867050170898,
+      "learning_rate": 1.3498459352367931e-08,
+      "loss": 0.7463,
+      "step": 961
+    },
+    {
+      "epoch": 0.06700330837541355,
+      "grad_norm": 0.7987236380577087,
+      "learning_rate": 8.639083955663818e-09,
+      "loss": 1.1664,
+      "step": 962
+    },
+    {
+      "epoch": 0.0670729583841198,
+      "grad_norm": 0.7837391495704651,
+      "learning_rate": 4.859515337174436e-09,
+      "loss": 0.6505,
+      "step": 963
+    },
+    {
+      "epoch": 0.06714260839282606,
+      "grad_norm": 0.6566223502159119,
+      "learning_rate": 2.1597943124729292e-09,
+      "loss": 0.8524,
+      "step": 964
+    },
+    {
+      "epoch": 0.0672122584015323,
+      "grad_norm": 0.6998875737190247,
+      "learning_rate": 5.399500358493903e-10,
+      "loss": 0.8817,
+      "step": 965
+    },
+    {
+      "epoch": 0.06728190841023855,
+      "grad_norm": 0.6083624362945557,
+      "learning_rate": 0.0,
+      "loss": 0.8767,
+      "step": 966
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.628352553502376e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null