aleegis12 commited on
Commit
271e95c
·
verified ·
1 Parent(s): 305e0dc

Training in progress, step 224, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fda2e763cacbf5384f32a5a88520cf1602185a56feea8f48caf776e6426b9b7
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:606d4ed2867ba0bf76578e3a2d28b7f5157628c4e08ad9b922fc1e50be900dbd
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e3f09930179102320a21c6f9abea0e1cdb2e1ab1919021f171f803d82927b0c
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc075e2ca009ca108b4be5ee95b4d150b2366246e5ba577b39e69328ab03c48
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:584af298efc3b2c3d46a2cfe15d836a3398932f3cab868e94c719298ccae2d97
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8941cfccbcfcdf233d1937f7300fd6c7036057879aed68046c10cd5ede20934
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23ea67c3a25ae1d1c5130f8fae80127b447d72c85253a82322b78b264d40857f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93fe870d91bc8391b278b0e9eb02bd97d2a13abf3d921d0d4fefc96fbe029409
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.3149925470352173,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 0.8948545861297539,
5
  "eval_steps": 100,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1431,6 +1431,174 @@
1431
  "eval_samples_per_second": 11.415,
1432
  "eval_steps_per_second": 2.876,
1433
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1434
  }
1435
  ],
1436
  "logging_steps": 1,
@@ -1454,12 +1622,12 @@
1454
  "should_evaluate": false,
1455
  "should_log": false,
1456
  "should_save": true,
1457
- "should_training_stop": false
1458
  },
1459
  "attributes": {}
1460
  }
1461
  },
1462
- "total_flos": 3.4092406879656346e+17,
1463
  "train_batch_size": 8,
1464
  "trial_name": null,
1465
  "trial_params": null
 
1
  {
2
  "best_metric": 1.3149925470352173,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 1.0022371364653244,
5
  "eval_steps": 100,
6
+ "global_step": 224,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1431
  "eval_samples_per_second": 11.415,
1432
  "eval_steps_per_second": 2.876,
1433
  "step": 200
1434
+ },
1435
+ {
1436
+ "epoch": 0.8993288590604027,
1437
+ "grad_norm": 0.3248676657676697,
1438
+ "learning_rate": 3.1037725843711062e-06,
1439
+ "loss": 1.5013,
1440
+ "step": 201
1441
+ },
1442
+ {
1443
+ "epoch": 0.9038031319910514,
1444
+ "grad_norm": 0.3875062167644501,
1445
+ "learning_rate": 2.842278276436128e-06,
1446
+ "loss": 2.0208,
1447
+ "step": 202
1448
+ },
1449
+ {
1450
+ "epoch": 0.9082774049217002,
1451
+ "grad_norm": 0.36224886775016785,
1452
+ "learning_rate": 2.591967620451707e-06,
1453
+ "loss": 1.9816,
1454
+ "step": 203
1455
+ },
1456
+ {
1457
+ "epoch": 0.912751677852349,
1458
+ "grad_norm": 0.30799493193626404,
1459
+ "learning_rate": 2.3528999786421756e-06,
1460
+ "loss": 1.4076,
1461
+ "step": 204
1462
+ },
1463
+ {
1464
+ "epoch": 0.9172259507829977,
1465
+ "grad_norm": 0.2949186861515045,
1466
+ "learning_rate": 2.1251320469037827e-06,
1467
+ "loss": 1.1052,
1468
+ "step": 205
1469
+ },
1470
+ {
1471
+ "epoch": 0.9217002237136466,
1472
+ "grad_norm": 0.27485114336013794,
1473
+ "learning_rate": 1.908717841359048e-06,
1474
+ "loss": 1.04,
1475
+ "step": 206
1476
+ },
1477
+ {
1478
+ "epoch": 0.9261744966442953,
1479
+ "grad_norm": 0.30447426438331604,
1480
+ "learning_rate": 1.70370868554659e-06,
1481
+ "loss": 1.1437,
1482
+ "step": 207
1483
+ },
1484
+ {
1485
+ "epoch": 0.930648769574944,
1486
+ "grad_norm": 0.3369496166706085,
1487
+ "learning_rate": 1.5101531982495308e-06,
1488
+ "loss": 1.3196,
1489
+ "step": 208
1490
+ },
1491
+ {
1492
+ "epoch": 0.9351230425055929,
1493
+ "grad_norm": 0.308193564414978,
1494
+ "learning_rate": 1.328097281965357e-06,
1495
+ "loss": 0.899,
1496
+ "step": 209
1497
+ },
1498
+ {
1499
+ "epoch": 0.9395973154362416,
1500
+ "grad_norm": 0.31419670581817627,
1501
+ "learning_rate": 1.157584112019966e-06,
1502
+ "loss": 1.0979,
1503
+ "step": 210
1504
+ },
1505
+ {
1506
+ "epoch": 0.9440715883668904,
1507
+ "grad_norm": 0.29612240195274353,
1508
+ "learning_rate": 9.986541263284077e-07,
1509
+ "loss": 0.8449,
1510
+ "step": 211
1511
+ },
1512
+ {
1513
+ "epoch": 0.9485458612975392,
1514
+ "grad_norm": 0.3415446877479553,
1515
+ "learning_rate": 8.513450158049108e-07,
1516
+ "loss": 1.0074,
1517
+ "step": 212
1518
+ },
1519
+ {
1520
+ "epoch": 0.9530201342281879,
1521
+ "grad_norm": 0.3255971670150757,
1522
+ "learning_rate": 7.156917154243048e-07,
1523
+ "loss": 1.017,
1524
+ "step": 213
1525
+ },
1526
+ {
1527
+ "epoch": 0.9574944071588367,
1528
+ "grad_norm": 0.3114960491657257,
1529
+ "learning_rate": 5.917263959370312e-07,
1530
+ "loss": 0.8636,
1531
+ "step": 214
1532
+ },
1533
+ {
1534
+ "epoch": 0.9619686800894854,
1535
+ "grad_norm": 0.3541721999645233,
1536
+ "learning_rate": 4.794784562397458e-07,
1537
+ "loss": 0.9719,
1538
+ "step": 215
1539
+ },
1540
+ {
1541
+ "epoch": 0.9664429530201343,
1542
+ "grad_norm": 0.37471628189086914,
1543
+ "learning_rate": 3.7897451640321323e-07,
1544
+ "loss": 1.1838,
1545
+ "step": 216
1546
+ },
1547
+ {
1548
+ "epoch": 0.970917225950783,
1549
+ "grad_norm": 0.3450222611427307,
1550
+ "learning_rate": 2.902384113592782e-07,
1551
+ "loss": 0.964,
1552
+ "step": 217
1553
+ },
1554
+ {
1555
+ "epoch": 0.9753914988814317,
1556
+ "grad_norm": 0.41780760884284973,
1557
+ "learning_rate": 2.1329118524827662e-07,
1558
+ "loss": 1.3496,
1559
+ "step": 218
1560
+ },
1561
+ {
1562
+ "epoch": 0.9798657718120806,
1563
+ "grad_norm": 0.3904644548892975,
1564
+ "learning_rate": 1.481510864283553e-07,
1565
+ "loss": 1.0503,
1566
+ "step": 219
1567
+ },
1568
+ {
1569
+ "epoch": 0.9843400447427293,
1570
+ "grad_norm": 0.441577672958374,
1571
+ "learning_rate": 9.483356314779479e-08,
1572
+ "loss": 1.2227,
1573
+ "step": 220
1574
+ },
1575
+ {
1576
+ "epoch": 0.9888143176733781,
1577
+ "grad_norm": 0.48042798042297363,
1578
+ "learning_rate": 5.3351259881379014e-08,
1579
+ "loss": 1.2312,
1580
+ "step": 221
1581
+ },
1582
+ {
1583
+ "epoch": 0.9932885906040269,
1584
+ "grad_norm": 0.5117903351783752,
1585
+ "learning_rate": 2.371401433170495e-08,
1586
+ "loss": 1.3979,
1587
+ "step": 222
1588
+ },
1589
+ {
1590
+ "epoch": 0.9977628635346756,
1591
+ "grad_norm": 0.636166512966156,
1592
+ "learning_rate": 5.928855096154484e-09,
1593
+ "loss": 1.6438,
1594
+ "step": 223
1595
+ },
1596
+ {
1597
+ "epoch": 1.0022371364653244,
1598
+ "grad_norm": 1.6414198875427246,
1599
+ "learning_rate": 0.0,
1600
+ "loss": 2.9316,
1601
+ "step": 224
1602
  }
1603
  ],
1604
  "logging_steps": 1,
 
1622
  "should_evaluate": false,
1623
  "should_log": false,
1624
  "should_save": true,
1625
+ "should_training_stop": true
1626
  },
1627
  "attributes": {}
1628
  }
1629
  },
1630
+ "total_flos": 3.81795358570709e+17,
1631
  "train_batch_size": 8,
1632
  "trial_name": null,
1633
  "trial_params": null