Training in progress, epoch 2, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +235 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fccf64734ff924caa5eddb5c6f6b322eabbd44a5ed01712af59ac3d42ed0938
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ace832017d535a2701404defed5440bdb64ee82aca5c376dddbe27cf7b88b38
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e787ded47db8e1a655c87ff1cd78a9c576fb9c34818ded3b16e2ab0b15d313b
 size 671473298

 version https://git-lfs.github.com/spec/v1
+oid sha256:936af1b63fb498f0091dfc26166ad62fc436853ab06bd1f27c2633956894ba8a
 size 671473298

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efcbef283f1e087a50e99a91600bcaba3fb5d5d9c71a2c10ff9c48425ae427ac
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1a984a18d8a1e358ff31fb23848c08d5854b29401bd2aa1d0a25d5617579e98
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6dfefdd350a9c50a5ce8f17f77222cc43ade97bd1bc4dfe20825b1649c6776a6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:891cad020bf7bee78efa739dc10e1e4315e34b096ed70226b38590ec81d7d418
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9988177339901478,
   "eval_steps": 500,
-  "global_step": 1268,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1274,6 +1274,237 @@
       "learning_rate": 6.680041969810203e-06,
       "loss": 0.5018,
       "step": 1267
     }
   ],
   "logging_steps": 7,
@@ -1288,12 +1519,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.8930015223676928e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.3645320197044333,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.680041969810203e-06,
       "loss": 0.5018,
       "step": 1267
+    },
+    {
+      "epoch": 2.0082758620689654,
+      "grad_norm": 0.39984405040740967,
+      "learning_rate": 6.293212171147206e-06,
+      "loss": 0.5029,
+      "step": 1274
+    },
+    {
+      "epoch": 2.0193103448275864,
+      "grad_norm": 0.409271776676178,
+      "learning_rate": 5.917166368382277e-06,
+      "loss": 0.4303,
+      "step": 1281
+    },
+    {
+      "epoch": 2.030344827586207,
+      "grad_norm": 0.4450385570526123,
+      "learning_rate": 5.5519973451903405e-06,
+      "loss": 0.4304,
+      "step": 1288
+    },
+    {
+      "epoch": 2.0413793103448277,
+      "grad_norm": 0.4327320456504822,
+      "learning_rate": 5.197795201563743e-06,
+      "loss": 0.4258,
+      "step": 1295
+    },
+    {
+      "epoch": 2.0524137931034483,
+      "grad_norm": 0.42840683460235596,
+      "learning_rate": 4.8546473315813856e-06,
+      "loss": 0.4298,
+      "step": 1302
+    },
+    {
+      "epoch": 2.063448275862069,
+      "grad_norm": 0.41301754117012024,
+      "learning_rate": 4.522638401845547e-06,
+      "loss": 0.4287,
+      "step": 1309
+    },
+    {
+      "epoch": 2.0744827586206895,
+      "grad_norm": 0.4132091701030731,
+      "learning_rate": 4.2018503305916775e-06,
+      "loss": 0.4211,
+      "step": 1316
+    },
+    {
+      "epoch": 2.08551724137931,
+      "grad_norm": 0.43061113357543945,
+      "learning_rate": 3.892362267476313e-06,
+      "loss": 0.429,
+      "step": 1323
+    },
+    {
+      "epoch": 2.0965517241379312,
+      "grad_norm": 0.41242024302482605,
+      "learning_rate": 3.5942505740480582e-06,
+      "loss": 0.4254,
+      "step": 1330
+    },
+    {
+      "epoch": 2.107586206896552,
+      "grad_norm": 0.4244064390659332,
+      "learning_rate": 3.3075888049065196e-06,
+      "loss": 0.4263,
+      "step": 1337
+    },
+    {
+      "epoch": 2.1186206896551725,
+      "grad_norm": 0.4177404046058655,
+      "learning_rate": 3.03244768955383e-06,
+      "loss": 0.431,
+      "step": 1344
+    },
+    {
+      "epoch": 2.129655172413793,
+      "grad_norm": 0.42242953181266785,
+      "learning_rate": 2.7688951149431595e-06,
+      "loss": 0.4312,
+      "step": 1351
+    },
+    {
+      "epoch": 2.1406896551724137,
+      "grad_norm": 0.43014487624168396,
+      "learning_rate": 2.5169961087286974e-06,
+      "loss": 0.4265,
+      "step": 1358
+    },
+    {
+      "epoch": 2.1517241379310343,
+      "grad_norm": 0.43356141448020935,
+      "learning_rate": 2.276812823220964e-06,
+      "loss": 0.4384,
+      "step": 1365
+    },
+    {
+      "epoch": 2.162758620689655,
+      "grad_norm": 0.4384445548057556,
+      "learning_rate": 2.048404520051722e-06,
+      "loss": 0.4187,
+      "step": 1372
+    },
+    {
+      "epoch": 2.173793103448276,
+      "grad_norm": 0.4111485183238983,
+      "learning_rate": 1.8318275555520237e-06,
+      "loss": 0.4313,
+      "step": 1379
+    },
+    {
+      "epoch": 2.1848275862068967,
+      "grad_norm": 0.4494364261627197,
+      "learning_rate": 1.6271353668471655e-06,
+      "loss": 0.4305,
+      "step": 1386
+    },
+    {
+      "epoch": 2.1958620689655173,
+      "grad_norm": 0.422455757856369,
+      "learning_rate": 1.4343784586718311e-06,
+      "loss": 0.4174,
+      "step": 1393
+    },
+    {
+      "epoch": 2.206896551724138,
+      "grad_norm": 0.4379701614379883,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 0.4177,
+      "step": 1400
+    },
+    {
+      "epoch": 2.2179310344827585,
+      "grad_norm": 0.4343680143356323,
+      "learning_rate": 1.0848577668543802e-06,
+      "loss": 0.4176,
+      "step": 1407
+    },
+    {
+      "epoch": 2.228965517241379,
+      "grad_norm": 0.42176929116249084,
+      "learning_rate": 9.281802222129765e-07,
+      "loss": 0.417,
+      "step": 1414
+    },
+    {
+      "epoch": 2.24,
+      "grad_norm": 0.4197026491165161,
+      "learning_rate": 7.836104148243484e-07,
+      "loss": 0.4201,
+      "step": 1421
+    },
+    {
+      "epoch": 2.251034482758621,
+      "grad_norm": 0.42568454146385193,
+      "learning_rate": 6.511840151252169e-07,
+      "loss": 0.4164,
+      "step": 1428
+    },
+    {
+      "epoch": 2.2620689655172415,
+      "grad_norm": 0.4492688775062561,
+      "learning_rate": 5.309336973481683e-07,
+      "loss": 0.4289,
+      "step": 1435
+    },
+    {
+      "epoch": 2.273103448275862,
+      "grad_norm": 0.43771427869796753,
+      "learning_rate": 4.228891314597694e-07,
+      "loss": 0.4362,
+      "step": 1442
+    },
+    {
+      "epoch": 2.2841379310344827,
+      "grad_norm": 0.4285426139831543,
+      "learning_rate": 3.2707697583995167e-07,
+      "loss": 0.4161,
+      "step": 1449
+    },
+    {
+      "epoch": 2.2951724137931033,
+      "grad_norm": 0.433178573846817,
+      "learning_rate": 2.4352087070443895e-07,
+      "loss": 0.4252,
+      "step": 1456
+    },
+    {
+      "epoch": 2.306206896551724,
+      "grad_norm": 0.43515661358833313,
+      "learning_rate": 1.7224143227190236e-07,
+      "loss": 0.4155,
+      "step": 1463
+    },
+    {
+      "epoch": 2.317241379310345,
+      "grad_norm": 0.45461907982826233,
+      "learning_rate": 1.132562476771959e-07,
+      "loss": 0.4276,
+      "step": 1470
+    },
+    {
+      "epoch": 2.3282758620689656,
+      "grad_norm": 0.42468348145484924,
+      "learning_rate": 6.657987063200533e-08,
+      "loss": 0.4256,
+      "step": 1477
+    },
+    {
+      "epoch": 2.3393103448275863,
+      "grad_norm": 0.4367210865020752,
+      "learning_rate": 3.2223817833931805e-08,
+      "loss": 0.429,
+      "step": 1484
+    },
+    {
+      "epoch": 2.350344827586207,
+      "grad_norm": 0.43067917227745056,
+      "learning_rate": 1.019656612492592e-08,
+      "loss": 0.4242,
+      "step": 1491
+    },
+    {
+      "epoch": 2.3613793103448275,
+      "grad_norm": 0.44099918007850647,
+      "learning_rate": 5.035503997385949e-10,
+      "loss": 0.424,
+      "step": 1498
     }
   ],
   "logging_steps": 7,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.238031356493824e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null