aseratus1 commited on
Commit
523636e
·
verified ·
1 Parent(s): d9a953f

Training in progress, step 230, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90cf86756df572135f03c0f7b02e368ed11704950657d207cb96e852a67974c0
3
  size 645975704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67781916596e6cc2f2fac9ce421bfaa2f0acabebdb950101d67bb016d1ab4fa2
3
  size 645975704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c49898add5a21782109bcea17e094f097a2730fa5548b1a51e188fe8213625b0
3
  size 328468404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe2309002b0ed2be8f8a2fbe4dd663580b8c9421017827ac1ff4450f7ad68fc
3
  size 328468404
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c1ef41deb3863cb4e4ca9f914e8469725fb857615262228d60028b63b24c217
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c1050e20827c53ea3b03c4a195af3abf926f5fe4840e67e6093132eeba079cf
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a94566b02bf28490606f2fb9cb895b81b322144d6c373560bb6cc082ffc05f1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe9cf7f3790980c81262679dcebf11d7a584b88476e5dbdf63e7cf839a7cd620
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.10709268599748611,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 0.8705114254624592,
5
  "eval_steps": 100,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1431,6 +1431,216 @@
1431
  "eval_samples_per_second": 12.048,
1432
  "eval_steps_per_second": 3.02,
1433
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1434
  }
1435
  ],
1436
  "logging_steps": 1,
@@ -1454,12 +1664,12 @@
1454
  "should_evaluate": false,
1455
  "should_log": false,
1456
  "should_save": true,
1457
- "should_training_stop": false
1458
  },
1459
  "attributes": {}
1460
  }
1461
  },
1462
- "total_flos": 3.238351480868045e+17,
1463
  "train_batch_size": 8,
1464
  "trial_name": null,
1465
  "trial_params": null
 
1
  {
2
  "best_metric": 0.10709268599748611,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 1.001088139281828,
5
  "eval_steps": 100,
6
+ "global_step": 230,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1431
  "eval_samples_per_second": 12.048,
1432
  "eval_steps_per_second": 3.02,
1433
  "step": 200
1434
+ },
1435
+ {
1436
+ "epoch": 0.8748639825897715,
1437
+ "grad_norm": 0.04893573746085167,
1438
+ "learning_rate": 4.632065271606756e-06,
1439
+ "loss": 0.0464,
1440
+ "step": 201
1441
+ },
1442
+ {
1443
+ "epoch": 0.8792165397170838,
1444
+ "grad_norm": 0.054525069892406464,
1445
+ "learning_rate": 4.322727117869951e-06,
1446
+ "loss": 0.0653,
1447
+ "step": 202
1448
+ },
1449
+ {
1450
+ "epoch": 0.8835690968443961,
1451
+ "grad_norm": 0.06352470815181732,
1452
+ "learning_rate": 4.023611372427471e-06,
1453
+ "loss": 0.07,
1454
+ "step": 203
1455
+ },
1456
+ {
1457
+ "epoch": 0.8879216539717084,
1458
+ "grad_norm": 0.05715398117899895,
1459
+ "learning_rate": 3.734784976300165e-06,
1460
+ "loss": 0.0565,
1461
+ "step": 204
1462
+ },
1463
+ {
1464
+ "epoch": 0.8922742110990207,
1465
+ "grad_norm": 0.06614100933074951,
1466
+ "learning_rate": 3.4563125677897932e-06,
1467
+ "loss": 0.0706,
1468
+ "step": 205
1469
+ },
1470
+ {
1471
+ "epoch": 0.8966267682263329,
1472
+ "grad_norm": 0.07538996636867523,
1473
+ "learning_rate": 3.18825646801314e-06,
1474
+ "loss": 0.081,
1475
+ "step": 206
1476
+ },
1477
+ {
1478
+ "epoch": 0.9009793253536452,
1479
+ "grad_norm": 0.07782811671495438,
1480
+ "learning_rate": 2.930676666954846e-06,
1481
+ "loss": 0.0761,
1482
+ "step": 207
1483
+ },
1484
+ {
1485
+ "epoch": 0.9053318824809575,
1486
+ "grad_norm": 0.07623429596424103,
1487
+ "learning_rate": 2.6836308100417873e-06,
1488
+ "loss": 0.0808,
1489
+ "step": 208
1490
+ },
1491
+ {
1492
+ "epoch": 0.9096844396082698,
1493
+ "grad_norm": 0.06650186330080032,
1494
+ "learning_rate": 2.4471741852423237e-06,
1495
+ "loss": 0.0677,
1496
+ "step": 209
1497
+ },
1498
+ {
1499
+ "epoch": 0.9140369967355821,
1500
+ "grad_norm": 0.0781843438744545,
1501
+ "learning_rate": 2.221359710692961e-06,
1502
+ "loss": 0.0835,
1503
+ "step": 210
1504
+ },
1505
+ {
1506
+ "epoch": 0.9183895538628944,
1507
+ "grad_norm": 0.08650253713130951,
1508
+ "learning_rate": 2.006237922855553e-06,
1509
+ "loss": 0.0895,
1510
+ "step": 211
1511
+ },
1512
+ {
1513
+ "epoch": 0.9227421109902068,
1514
+ "grad_norm": 0.08480913937091827,
1515
+ "learning_rate": 1.8018569652073381e-06,
1516
+ "loss": 0.0849,
1517
+ "step": 212
1518
+ },
1519
+ {
1520
+ "epoch": 0.9270946681175191,
1521
+ "grad_norm": 0.07606717199087143,
1522
+ "learning_rate": 1.6082625774666794e-06,
1523
+ "loss": 0.0723,
1524
+ "step": 213
1525
+ },
1526
+ {
1527
+ "epoch": 0.9314472252448314,
1528
+ "grad_norm": 0.08265078812837601,
1529
+ "learning_rate": 1.4254980853566247e-06,
1530
+ "loss": 0.0782,
1531
+ "step": 214
1532
+ },
1533
+ {
1534
+ "epoch": 0.9357997823721437,
1535
+ "grad_norm": 0.07758332788944244,
1536
+ "learning_rate": 1.2536043909088191e-06,
1537
+ "loss": 0.0664,
1538
+ "step": 215
1539
+ },
1540
+ {
1541
+ "epoch": 0.940152339499456,
1542
+ "grad_norm": 0.08595745265483856,
1543
+ "learning_rate": 1.0926199633097157e-06,
1544
+ "loss": 0.0833,
1545
+ "step": 216
1546
+ },
1547
+ {
1548
+ "epoch": 0.9445048966267682,
1549
+ "grad_norm": 0.09878195077180862,
1550
+ "learning_rate": 9.42580830291373e-07,
1551
+ "loss": 0.1022,
1552
+ "step": 217
1553
+ },
1554
+ {
1555
+ "epoch": 0.9488574537540805,
1556
+ "grad_norm": 0.1008504182100296,
1557
+ "learning_rate": 8.035205700685167e-07,
1558
+ "loss": 0.0786,
1559
+ "step": 218
1560
+ },
1561
+ {
1562
+ "epoch": 0.9532100108813928,
1563
+ "grad_norm": 0.10530146956443787,
1564
+ "learning_rate": 6.75470303823933e-07,
1565
+ "loss": 0.0882,
1566
+ "step": 219
1567
+ },
1568
+ {
1569
+ "epoch": 0.9575625680087051,
1570
+ "grad_norm": 0.11855965107679367,
1571
+ "learning_rate": 5.584586887435739e-07,
1572
+ "loss": 0.0952,
1573
+ "step": 220
1574
+ },
1575
+ {
1576
+ "epoch": 0.9619151251360174,
1577
+ "grad_norm": 0.129081130027771,
1578
+ "learning_rate": 4.52511911603265e-07,
1579
+ "loss": 0.0809,
1580
+ "step": 221
1581
+ },
1582
+ {
1583
+ "epoch": 0.9662676822633297,
1584
+ "grad_norm": 0.16669493913650513,
1585
+ "learning_rate": 3.576536829081323e-07,
1586
+ "loss": 0.1204,
1587
+ "step": 222
1588
+ },
1589
+ {
1590
+ "epoch": 0.970620239390642,
1591
+ "grad_norm": 0.16019439697265625,
1592
+ "learning_rate": 2.7390523158633554e-07,
1593
+ "loss": 0.1158,
1594
+ "step": 223
1595
+ },
1596
+ {
1597
+ "epoch": 0.9749727965179543,
1598
+ "grad_norm": 0.2115449756383896,
1599
+ "learning_rate": 2.012853002380466e-07,
1600
+ "loss": 0.1401,
1601
+ "step": 224
1602
+ },
1603
+ {
1604
+ "epoch": 0.9793253536452666,
1605
+ "grad_norm": 0.23436211049556732,
1606
+ "learning_rate": 1.3981014094099353e-07,
1607
+ "loss": 0.1593,
1608
+ "step": 225
1609
+ },
1610
+ {
1611
+ "epoch": 0.9836779107725789,
1612
+ "grad_norm": 0.24148912727832794,
1613
+ "learning_rate": 8.949351161324227e-08,
1614
+ "loss": 0.1317,
1615
+ "step": 226
1616
+ },
1617
+ {
1618
+ "epoch": 0.9880304678998912,
1619
+ "grad_norm": 0.30770501494407654,
1620
+ "learning_rate": 5.0346672934270534e-08,
1621
+ "loss": 0.162,
1622
+ "step": 227
1623
+ },
1624
+ {
1625
+ "epoch": 0.9923830250272034,
1626
+ "grad_norm": 0.2943860590457916,
1627
+ "learning_rate": 2.237838582483387e-08,
1628
+ "loss": 0.1882,
1629
+ "step": 228
1630
+ },
1631
+ {
1632
+ "epoch": 0.9967355821545157,
1633
+ "grad_norm": 0.3997349441051483,
1634
+ "learning_rate": 5.594909486328348e-09,
1635
+ "loss": 0.2187,
1636
+ "step": 229
1637
+ },
1638
+ {
1639
+ "epoch": 1.001088139281828,
1640
+ "grad_norm": 0.5895123481750488,
1641
+ "learning_rate": 0.0,
1642
+ "loss": 0.2661,
1643
+ "step": 230
1644
  }
1645
  ],
1646
  "logging_steps": 1,
 
1664
  "should_evaluate": false,
1665
  "should_log": false,
1666
  "should_save": true,
1667
+ "should_training_stop": true
1668
  },
1669
  "attributes": {}
1670
  }
1671
  },
1672
+ "total_flos": 3.7320134588104704e+17,
1673
  "train_batch_size": 8,
1674
  "trial_name": null,
1675
  "trial_params": null