Training in progress, epoch 1, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +403 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5a4db17a6b0d3aa294ab149bdab58b500912c49a77a26b84efeaa8c5ba0a8cf
 size 194563400

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6869b2d71e2f9a989f469926b109e48afa90079a4410cb2bc4606b2f558a88b
 size 194563400

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:000693a5cee3243f5f119e4269236d0545278d41e28f23ba8f0185105a641f78
 size 389358058

 version https://git-lfs.github.com/spec/v1
+oid sha256:d666fc4f02e4046fb59081b8bc0c0045b07bcfff4cdf17054f52eb6207791394
 size 389358058

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b22b562a9fd534225eadfb95f9c8a88b6901d71ec1a24ce1b5d77950baecd454
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb59e5d55e15032974abe5598466b524fa64fa84b0cbe8c580722b1f9d299a9b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a6586a314fcf3dc86442a2142990493b4f7a7df2410791efb9baa5882c5e09a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:891cad020bf7bee78efa739dc10e1e4315e34b096ed70226b38590ec81d7d418
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9998866855524079,
   "eval_steps": 500,
-  "global_step": 1103,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1106,6 +1106,405 @@
       "learning_rate": 1.8913309610379015e-05,
       "loss": 1.0895,
       "step": 1099
     }
   ],
   "logging_steps": 7,
@@ -1120,12 +1519,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.111012984516772e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.3597733711048159,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.8913309610379015e-05,
       "loss": 1.0895,
       "step": 1099
+    },
+    {
+      "epoch": 1.0026062322946176,
+      "grad_norm": 1.0843678712844849,
+      "learning_rate": 1.8302023674591935e-05,
+      "loss": 1.0332,
+      "step": 1106
+    },
+    {
+      "epoch": 1.0089518413597733,
+      "grad_norm": 1.0455607175827026,
+      "learning_rate": 1.7698558740156135e-05,
+      "loss": 0.733,
+      "step": 1113
+    },
+    {
+      "epoch": 1.0152974504249292,
+      "grad_norm": 1.5471065044403076,
+      "learning_rate": 1.7103063703014372e-05,
+      "loss": 0.8191,
+      "step": 1120
+    },
+    {
+      "epoch": 1.021643059490085,
+      "grad_norm": 1.0794677734375,
+      "learning_rate": 1.6515685492656467e-05,
+      "loss": 0.6541,
+      "step": 1127
+    },
+    {
+      "epoch": 1.0279886685552408,
+      "grad_norm": 1.2111717462539673,
+      "learning_rate": 1.59365690358667e-05,
+      "loss": 0.6832,
+      "step": 1134
+    },
+    {
+      "epoch": 1.0343342776203965,
+      "grad_norm": 1.243001937866211,
+      "learning_rate": 1.5365857220965275e-05,
+      "loss": 0.7858,
+      "step": 1141
+    },
+    {
+      "epoch": 1.0406798866855524,
+      "grad_norm": 1.0979713201522827,
+      "learning_rate": 1.4803690862552755e-05,
+      "loss": 0.6973,
+      "step": 1148
+    },
+    {
+      "epoch": 1.0470254957507081,
+      "grad_norm": 1.1694271564483643,
+      "learning_rate": 1.4250208666766235e-05,
+      "loss": 0.7052,
+      "step": 1155
+    },
+    {
+      "epoch": 1.053371104815864,
+      "grad_norm": 1.2045671939849854,
+      "learning_rate": 1.3705547197055584e-05,
+      "loss": 0.7855,
+      "step": 1162
+    },
+    {
+      "epoch": 1.0597167138810197,
+      "grad_norm": 1.1639701128005981,
+      "learning_rate": 1.3169840840488501e-05,
+      "loss": 0.6912,
+      "step": 1169
+    },
+    {
+      "epoch": 1.0660623229461756,
+      "grad_norm": 1.2501615285873413,
+      "learning_rate": 1.2643221774592518e-05,
+      "loss": 0.6945,
+      "step": 1176
+    },
+    {
+      "epoch": 1.0724079320113313,
+      "grad_norm": 0.9910327792167664,
+      "learning_rate": 1.2125819934742188e-05,
+      "loss": 0.6741,
+      "step": 1183
+    },
+    {
+      "epoch": 1.0787535410764872,
+      "grad_norm": 1.535058617591858,
+      "learning_rate": 1.1617762982099446e-05,
+      "loss": 0.7266,
+      "step": 1190
+    },
+    {
+      "epoch": 1.0850991501416432,
+      "grad_norm": 1.055584192276001,
+      "learning_rate": 1.1119176272115128e-05,
+      "loss": 0.6585,
+      "step": 1197
+    },
+    {
+      "epoch": 1.0914447592067988,
+      "grad_norm": 1.0781307220458984,
+      "learning_rate": 1.0630182823599399e-05,
+      "loss": 0.7043,
+      "step": 1204
+    },
+    {
+      "epoch": 1.0977903682719548,
+      "grad_norm": 1.3469446897506714,
+      "learning_rate": 1.0150903288368741e-05,
+      "loss": 0.7075,
+      "step": 1211
+    },
+    {
+      "epoch": 1.1041359773371104,
+      "grad_norm": 1.3512985706329346,
+      "learning_rate": 9.681455921476839e-06,
+      "loss": 0.6877,
+      "step": 1218
+    },
+    {
+      "epoch": 1.1104815864022664,
+      "grad_norm": 1.4418903589248657,
+      "learning_rate": 9.221956552036992e-06,
+      "loss": 0.7409,
+      "step": 1225
+    },
+    {
+      "epoch": 1.116827195467422,
+      "grad_norm": 1.3517122268676758,
+      "learning_rate": 8.772518554642973e-06,
+      "loss": 0.7251,
+      "step": 1232
+    },
+    {
+      "epoch": 1.123172804532578,
+      "grad_norm": 1.2315598726272583,
+      "learning_rate": 8.333252821395526e-06,
+      "loss": 0.7255,
+      "step": 1239
+    },
+    {
+      "epoch": 1.1295184135977336,
+      "grad_norm": 1.1964752674102783,
+      "learning_rate": 7.904267734541498e-06,
+      "loss": 0.6761,
+      "step": 1246
+    },
+    {
+      "epoch": 1.1358640226628895,
+      "grad_norm": 1.4459127187728882,
+      "learning_rate": 7.485669139732004e-06,
+      "loss": 0.681,
+      "step": 1253
+    },
+    {
+      "epoch": 1.1422096317280452,
+      "grad_norm": 1.2472503185272217,
+      "learning_rate": 7.077560319906695e-06,
+      "loss": 0.6352,
+      "step": 1260
+    },
+    {
+      "epoch": 1.1485552407932011,
+      "grad_norm": 1.5689338445663452,
+      "learning_rate": 6.680041969810203e-06,
+      "loss": 0.729,
+      "step": 1267
+    },
+    {
+      "epoch": 1.154900849858357,
+      "grad_norm": 1.3047913312911987,
+      "learning_rate": 6.293212171147206e-06,
+      "loss": 0.7166,
+      "step": 1274
+    },
+    {
+      "epoch": 1.1612464589235127,
+      "grad_norm": 1.1112953424453735,
+      "learning_rate": 5.917166368382277e-06,
+      "loss": 0.7171,
+      "step": 1281
+    },
+    {
+      "epoch": 1.1675920679886684,
+      "grad_norm": 1.3600448369979858,
+      "learning_rate": 5.5519973451903405e-06,
+      "loss": 0.8326,
+      "step": 1288
+    },
+    {
+      "epoch": 1.1739376770538243,
+      "grad_norm": 1.092890977859497,
+      "learning_rate": 5.197795201563743e-06,
+      "loss": 0.6963,
+      "step": 1295
+    },
+    {
+      "epoch": 1.1802832861189803,
+      "grad_norm": 1.1682928800582886,
+      "learning_rate": 4.8546473315813856e-06,
+      "loss": 0.667,
+      "step": 1302
+    },
+    {
+      "epoch": 1.186628895184136,
+      "grad_norm": 1.8733927011489868,
+      "learning_rate": 4.522638401845547e-06,
+      "loss": 0.743,
+      "step": 1309
+    },
+    {
+      "epoch": 1.1929745042492919,
+      "grad_norm": 1.3126906156539917,
+      "learning_rate": 4.2018503305916775e-06,
+      "loss": 0.7463,
+      "step": 1316
+    },
+    {
+      "epoch": 1.1993201133144475,
+      "grad_norm": 1.325699806213379,
+      "learning_rate": 3.892362267476313e-06,
+      "loss": 0.6793,
+      "step": 1323
+    },
+    {
+      "epoch": 1.2056657223796035,
+      "grad_norm": 1.2438085079193115,
+      "learning_rate": 3.5942505740480582e-06,
+      "loss": 0.7638,
+      "step": 1330
+    },
+    {
+      "epoch": 1.2120113314447591,
+      "grad_norm": 1.0289039611816406,
+      "learning_rate": 3.3075888049065196e-06,
+      "loss": 0.7364,
+      "step": 1337
+    },
+    {
+      "epoch": 1.218356940509915,
+      "grad_norm": 1.4916263818740845,
+      "learning_rate": 3.03244768955383e-06,
+      "loss": 0.7621,
+      "step": 1344
+    },
+    {
+      "epoch": 1.2247025495750707,
+      "grad_norm": 1.6056360006332397,
+      "learning_rate": 2.7688951149431595e-06,
+      "loss": 0.6288,
+      "step": 1351
+    },
+    {
+      "epoch": 1.2310481586402267,
+      "grad_norm": 1.4644125699996948,
+      "learning_rate": 2.5169961087286974e-06,
+      "loss": 0.7109,
+      "step": 1358
+    },
+    {
+      "epoch": 1.2373937677053823,
+      "grad_norm": 1.2924115657806396,
+      "learning_rate": 2.276812823220964e-06,
+      "loss": 0.6827,
+      "step": 1365
+    },
+    {
+      "epoch": 1.2437393767705383,
+      "grad_norm": 1.34138023853302,
+      "learning_rate": 2.048404520051722e-06,
+      "loss": 0.7187,
+      "step": 1372
+    },
+    {
+      "epoch": 1.2500849858356942,
+      "grad_norm": 1.1116788387298584,
+      "learning_rate": 1.8318275555520237e-06,
+      "loss": 0.7556,
+      "step": 1379
+    },
+    {
+      "epoch": 1.2564305949008499,
+      "grad_norm": 1.0539860725402832,
+      "learning_rate": 1.6271353668471655e-06,
+      "loss": 0.6949,
+      "step": 1386
+    },
+    {
+      "epoch": 1.2627762039660055,
+      "grad_norm": 1.5152783393859863,
+      "learning_rate": 1.4343784586718311e-06,
+      "loss": 0.7225,
+      "step": 1393
+    },
+    {
+      "epoch": 1.2691218130311614,
+      "grad_norm": 1.1229016780853271,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 0.6508,
+      "step": 1400
+    },
+    {
+      "epoch": 1.2754674220963174,
+      "grad_norm": 1.4070674180984497,
+      "learning_rate": 1.0848577668543802e-06,
+      "loss": 0.7195,
+      "step": 1407
+    },
+    {
+      "epoch": 1.281813031161473,
+      "grad_norm": 1.3908133506774902,
+      "learning_rate": 9.281802222129765e-07,
+      "loss": 0.6959,
+      "step": 1414
+    },
+    {
+      "epoch": 1.288158640226629,
+      "grad_norm": 1.5261811017990112,
+      "learning_rate": 7.836104148243484e-07,
+      "loss": 0.6707,
+      "step": 1421
+    },
+    {
+      "epoch": 1.2945042492917846,
+      "grad_norm": 1.4674750566482544,
+      "learning_rate": 6.511840151252169e-07,
+      "loss": 0.6538,
+      "step": 1428
+    },
+    {
+      "epoch": 1.3008498583569406,
+      "grad_norm": 1.7782832384109497,
+      "learning_rate": 5.309336973481683e-07,
+      "loss": 0.6876,
+      "step": 1435
+    },
+    {
+      "epoch": 1.3071954674220962,
+      "grad_norm": 1.080048680305481,
+      "learning_rate": 4.228891314597694e-07,
+      "loss": 0.6361,
+      "step": 1442
+    },
+    {
+      "epoch": 1.3135410764872522,
+      "grad_norm": 1.295278549194336,
+      "learning_rate": 3.2707697583995167e-07,
+      "loss": 0.6829,
+      "step": 1449
+    },
+    {
+      "epoch": 1.319886685552408,
+      "grad_norm": 1.4356597661972046,
+      "learning_rate": 2.4352087070443895e-07,
+      "loss": 0.6593,
+      "step": 1456
+    },
+    {
+      "epoch": 1.3262322946175638,
+      "grad_norm": 1.2196033000946045,
+      "learning_rate": 1.7224143227190236e-07,
+      "loss": 0.7315,
+      "step": 1463
+    },
+    {
+      "epoch": 1.3325779036827194,
+      "grad_norm": 1.365475058555603,
+      "learning_rate": 1.132562476771959e-07,
+      "loss": 0.6959,
+      "step": 1470
+    },
+    {
+      "epoch": 1.3389235127478754,
+      "grad_norm": 1.3029407262802124,
+      "learning_rate": 6.657987063200533e-08,
+      "loss": 0.7561,
+      "step": 1477
+    },
+    {
+      "epoch": 1.3452691218130313,
+      "grad_norm": 1.1381126642227173,
+      "learning_rate": 3.2223817833931805e-08,
+      "loss": 0.687,
+      "step": 1484
+    },
+    {
+      "epoch": 1.351614730878187,
+      "grad_norm": 1.1441847085952759,
+      "learning_rate": 1.019656612492592e-08,
+      "loss": 0.7373,
+      "step": 1491
+    },
+    {
+      "epoch": 1.3579603399433426,
+      "grad_norm": 1.493086814880371,
+      "learning_rate": 5.035503997385949e-10,
+      "loss": 0.7458,
+      "step": 1498
     }
   ],
   "logging_steps": 7,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.230406863197307e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null