aleegis commited on
Commit
15b7eb0
·
verified ·
1 Parent(s): c21898d

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fccf64734ff924caa5eddb5c6f6b322eabbd44a5ed01712af59ac3d42ed0938
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ace832017d535a2701404defed5440bdb64ee82aca5c376dddbe27cf7b88b38
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e787ded47db8e1a655c87ff1cd78a9c576fb9c34818ded3b16e2ab0b15d313b
3
  size 671473298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:936af1b63fb498f0091dfc26166ad62fc436853ab06bd1f27c2633956894ba8a
3
  size 671473298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efcbef283f1e087a50e99a91600bcaba3fb5d5d9c71a2c10ff9c48425ae427ac
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1a984a18d8a1e358ff31fb23848c08d5854b29401bd2aa1d0a25d5617579e98
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dfefdd350a9c50a5ce8f17f77222cc43ade97bd1bc4dfe20825b1649c6776a6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:891cad020bf7bee78efa739dc10e1e4315e34b096ed70226b38590ec81d7d418
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9988177339901478,
5
  "eval_steps": 500,
6
- "global_step": 1268,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1274,6 +1274,237 @@
1274
  "learning_rate": 6.680041969810203e-06,
1275
  "loss": 0.5018,
1276
  "step": 1267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1277
  }
1278
  ],
1279
  "logging_steps": 7,
@@ -1288,12 +1519,12 @@
1288
  "should_evaluate": false,
1289
  "should_log": false,
1290
  "should_save": true,
1291
- "should_training_stop": false
1292
  },
1293
  "attributes": {}
1294
  }
1295
  },
1296
- "total_flos": 1.8930015223676928e+18,
1297
  "train_batch_size": 2,
1298
  "trial_name": null,
1299
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3645320197044333,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1274
  "learning_rate": 6.680041969810203e-06,
1275
  "loss": 0.5018,
1276
  "step": 1267
1277
+ },
1278
+ {
1279
+ "epoch": 2.0082758620689654,
1280
+ "grad_norm": 0.39984405040740967,
1281
+ "learning_rate": 6.293212171147206e-06,
1282
+ "loss": 0.5029,
1283
+ "step": 1274
1284
+ },
1285
+ {
1286
+ "epoch": 2.0193103448275864,
1287
+ "grad_norm": 0.409271776676178,
1288
+ "learning_rate": 5.917166368382277e-06,
1289
+ "loss": 0.4303,
1290
+ "step": 1281
1291
+ },
1292
+ {
1293
+ "epoch": 2.030344827586207,
1294
+ "grad_norm": 0.4450385570526123,
1295
+ "learning_rate": 5.5519973451903405e-06,
1296
+ "loss": 0.4304,
1297
+ "step": 1288
1298
+ },
1299
+ {
1300
+ "epoch": 2.0413793103448277,
1301
+ "grad_norm": 0.4327320456504822,
1302
+ "learning_rate": 5.197795201563743e-06,
1303
+ "loss": 0.4258,
1304
+ "step": 1295
1305
+ },
1306
+ {
1307
+ "epoch": 2.0524137931034483,
1308
+ "grad_norm": 0.42840683460235596,
1309
+ "learning_rate": 4.8546473315813856e-06,
1310
+ "loss": 0.4298,
1311
+ "step": 1302
1312
+ },
1313
+ {
1314
+ "epoch": 2.063448275862069,
1315
+ "grad_norm": 0.41301754117012024,
1316
+ "learning_rate": 4.522638401845547e-06,
1317
+ "loss": 0.4287,
1318
+ "step": 1309
1319
+ },
1320
+ {
1321
+ "epoch": 2.0744827586206895,
1322
+ "grad_norm": 0.4132091701030731,
1323
+ "learning_rate": 4.2018503305916775e-06,
1324
+ "loss": 0.4211,
1325
+ "step": 1316
1326
+ },
1327
+ {
1328
+ "epoch": 2.08551724137931,
1329
+ "grad_norm": 0.43061113357543945,
1330
+ "learning_rate": 3.892362267476313e-06,
1331
+ "loss": 0.429,
1332
+ "step": 1323
1333
+ },
1334
+ {
1335
+ "epoch": 2.0965517241379312,
1336
+ "grad_norm": 0.41242024302482605,
1337
+ "learning_rate": 3.5942505740480582e-06,
1338
+ "loss": 0.4254,
1339
+ "step": 1330
1340
+ },
1341
+ {
1342
+ "epoch": 2.107586206896552,
1343
+ "grad_norm": 0.4244064390659332,
1344
+ "learning_rate": 3.3075888049065196e-06,
1345
+ "loss": 0.4263,
1346
+ "step": 1337
1347
+ },
1348
+ {
1349
+ "epoch": 2.1186206896551725,
1350
+ "grad_norm": 0.4177404046058655,
1351
+ "learning_rate": 3.03244768955383e-06,
1352
+ "loss": 0.431,
1353
+ "step": 1344
1354
+ },
1355
+ {
1356
+ "epoch": 2.129655172413793,
1357
+ "grad_norm": 0.42242953181266785,
1358
+ "learning_rate": 2.7688951149431595e-06,
1359
+ "loss": 0.4312,
1360
+ "step": 1351
1361
+ },
1362
+ {
1363
+ "epoch": 2.1406896551724137,
1364
+ "grad_norm": 0.43014487624168396,
1365
+ "learning_rate": 2.5169961087286974e-06,
1366
+ "loss": 0.4265,
1367
+ "step": 1358
1368
+ },
1369
+ {
1370
+ "epoch": 2.1517241379310343,
1371
+ "grad_norm": 0.43356141448020935,
1372
+ "learning_rate": 2.276812823220964e-06,
1373
+ "loss": 0.4384,
1374
+ "step": 1365
1375
+ },
1376
+ {
1377
+ "epoch": 2.162758620689655,
1378
+ "grad_norm": 0.4384445548057556,
1379
+ "learning_rate": 2.048404520051722e-06,
1380
+ "loss": 0.4187,
1381
+ "step": 1372
1382
+ },
1383
+ {
1384
+ "epoch": 2.173793103448276,
1385
+ "grad_norm": 0.4111485183238983,
1386
+ "learning_rate": 1.8318275555520237e-06,
1387
+ "loss": 0.4313,
1388
+ "step": 1379
1389
+ },
1390
+ {
1391
+ "epoch": 2.1848275862068967,
1392
+ "grad_norm": 0.4494364261627197,
1393
+ "learning_rate": 1.6271353668471655e-06,
1394
+ "loss": 0.4305,
1395
+ "step": 1386
1396
+ },
1397
+ {
1398
+ "epoch": 2.1958620689655173,
1399
+ "grad_norm": 0.422455757856369,
1400
+ "learning_rate": 1.4343784586718311e-06,
1401
+ "loss": 0.4174,
1402
+ "step": 1393
1403
+ },
1404
+ {
1405
+ "epoch": 2.206896551724138,
1406
+ "grad_norm": 0.4379701614379883,
1407
+ "learning_rate": 1.2536043909088191e-06,
1408
+ "loss": 0.4177,
1409
+ "step": 1400
1410
+ },
1411
+ {
1412
+ "epoch": 2.2179310344827585,
1413
+ "grad_norm": 0.4343680143356323,
1414
+ "learning_rate": 1.0848577668543802e-06,
1415
+ "loss": 0.4176,
1416
+ "step": 1407
1417
+ },
1418
+ {
1419
+ "epoch": 2.228965517241379,
1420
+ "grad_norm": 0.42176929116249084,
1421
+ "learning_rate": 9.281802222129765e-07,
1422
+ "loss": 0.417,
1423
+ "step": 1414
1424
+ },
1425
+ {
1426
+ "epoch": 2.24,
1427
+ "grad_norm": 0.4197026491165161,
1428
+ "learning_rate": 7.836104148243484e-07,
1429
+ "loss": 0.4201,
1430
+ "step": 1421
1431
+ },
1432
+ {
1433
+ "epoch": 2.251034482758621,
1434
+ "grad_norm": 0.42568454146385193,
1435
+ "learning_rate": 6.511840151252169e-07,
1436
+ "loss": 0.4164,
1437
+ "step": 1428
1438
+ },
1439
+ {
1440
+ "epoch": 2.2620689655172415,
1441
+ "grad_norm": 0.4492688775062561,
1442
+ "learning_rate": 5.309336973481683e-07,
1443
+ "loss": 0.4289,
1444
+ "step": 1435
1445
+ },
1446
+ {
1447
+ "epoch": 2.273103448275862,
1448
+ "grad_norm": 0.43771427869796753,
1449
+ "learning_rate": 4.228891314597694e-07,
1450
+ "loss": 0.4362,
1451
+ "step": 1442
1452
+ },
1453
+ {
1454
+ "epoch": 2.2841379310344827,
1455
+ "grad_norm": 0.4285426139831543,
1456
+ "learning_rate": 3.2707697583995167e-07,
1457
+ "loss": 0.4161,
1458
+ "step": 1449
1459
+ },
1460
+ {
1461
+ "epoch": 2.2951724137931033,
1462
+ "grad_norm": 0.433178573846817,
1463
+ "learning_rate": 2.4352087070443895e-07,
1464
+ "loss": 0.4252,
1465
+ "step": 1456
1466
+ },
1467
+ {
1468
+ "epoch": 2.306206896551724,
1469
+ "grad_norm": 0.43515661358833313,
1470
+ "learning_rate": 1.7224143227190236e-07,
1471
+ "loss": 0.4155,
1472
+ "step": 1463
1473
+ },
1474
+ {
1475
+ "epoch": 2.317241379310345,
1476
+ "grad_norm": 0.45461907982826233,
1477
+ "learning_rate": 1.132562476771959e-07,
1478
+ "loss": 0.4276,
1479
+ "step": 1470
1480
+ },
1481
+ {
1482
+ "epoch": 2.3282758620689656,
1483
+ "grad_norm": 0.42468348145484924,
1484
+ "learning_rate": 6.657987063200533e-08,
1485
+ "loss": 0.4256,
1486
+ "step": 1477
1487
+ },
1488
+ {
1489
+ "epoch": 2.3393103448275863,
1490
+ "grad_norm": 0.4367210865020752,
1491
+ "learning_rate": 3.2223817833931805e-08,
1492
+ "loss": 0.429,
1493
+ "step": 1484
1494
+ },
1495
+ {
1496
+ "epoch": 2.350344827586207,
1497
+ "grad_norm": 0.43067917227745056,
1498
+ "learning_rate": 1.019656612492592e-08,
1499
+ "loss": 0.4242,
1500
+ "step": 1491
1501
+ },
1502
+ {
1503
+ "epoch": 2.3613793103448275,
1504
+ "grad_norm": 0.44099918007850647,
1505
+ "learning_rate": 5.035503997385949e-10,
1506
+ "loss": 0.424,
1507
+ "step": 1498
1508
  }
1509
  ],
1510
  "logging_steps": 7,
 
1519
  "should_evaluate": false,
1520
  "should_log": false,
1521
  "should_save": true,
1522
+ "should_training_stop": true
1523
  },
1524
  "attributes": {}
1525
  }
1526
  },
1527
+ "total_flos": 2.238031356493824e+18,
1528
  "train_batch_size": 2,
1529
  "trial_name": null,
1530
  "trial_params": null