Krisbiantoro commited on
Commit
ea87049
·
1 Parent(s): de5d682

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. adapter_model.bin +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +735 -3
  6. training_args.bin +1 -1
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af4c3a65b7a773b24b082fa3d2bdf3b889f0c61459c6c6dc53f6e339785bcd4
3
  size 75641741
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9055d8916d0c01135ce2a001532dec396deba87cc0a07dbf0c98d4dace46d428
3
  size 75641741
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:461887c9ec08fe4b1692b9fa6def1741d6a60b64bb08c37d3fdc064aa795bd26
3
  size 151222021
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e8f78d2228b6babafffa91ef4beb07ff87afac189965b622082f7cfda273dcc
3
  size 151222021
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdda7482499b855a06935901db3ef5c0346fd12eb58a510bec30c9e4dab13b1e
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dd3a816ab8628e6038ecf426e93a907752049203fbc39b63fcde557182a866f
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:749e3338e97b1ab6783e7f614fd50b2475fd9e06f92f74c2d73b555d52907729
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518177a2ab7cf6b410a68ee416a0234864cb6c8e1fae811ce6d16681d4149303
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.19331142470520007,
5
  "eval_steps": 200,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1487,10 +1487,742 @@
1487
  "eval_samples_per_second": 0.729,
1488
  "eval_steps_per_second": 0.729,
1489
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1490
  }
1491
  ],
1492
  "logging_steps": 10,
1493
- "max_steps": 5173,
1494
  "num_train_epochs": 1,
1495
  "save_steps": 100,
1496
  "total_flos": 0.0,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2648831203231574,
5
  "eval_steps": 200,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1487
  "eval_samples_per_second": 0.729,
1488
  "eval_steps_per_second": 0.729,
1489
  "step": 1000
1490
+ },
1491
+ {
1492
+ "epoch": 0.18,
1493
+ "learning_rate": 0.0004917027842051741,
1494
+ "logits/chosen": -2.3743691444396973,
1495
+ "logits/rejected": -2.3645715713500977,
1496
+ "logps/chosen": -467.918701171875,
1497
+ "logps/rejected": -392.2208557128906,
1498
+ "loss": 13.2044,
1499
+ "rewards/accuracies": 0.4124999940395355,
1500
+ "rewards/chosen": -34.59047317504883,
1501
+ "rewards/margins": -6.761924743652344,
1502
+ "rewards/rejected": -27.82854652404785,
1503
+ "step": 1010
1504
+ },
1505
+ {
1506
+ "epoch": 0.18,
1507
+ "learning_rate": 0.0004913043488808868,
1508
+ "logits/chosen": -2.2389774322509766,
1509
+ "logits/rejected": -2.229212760925293,
1510
+ "logps/chosen": -486.40362548828125,
1511
+ "logps/rejected": -460.124267578125,
1512
+ "loss": 12.247,
1513
+ "rewards/accuracies": 0.4749999940395355,
1514
+ "rewards/chosen": -36.01793670654297,
1515
+ "rewards/margins": -2.4814937114715576,
1516
+ "rewards/rejected": -33.53643798828125,
1517
+ "step": 1020
1518
+ },
1519
+ {
1520
+ "epoch": 0.18,
1521
+ "learning_rate": 0.0004909379125837757,
1522
+ "logits/chosen": -2.3661270141601562,
1523
+ "logits/rejected": -2.35339093208313,
1524
+ "logps/chosen": -450.0957946777344,
1525
+ "logps/rejected": -419.64453125,
1526
+ "loss": 9.5754,
1527
+ "rewards/accuracies": 0.4625000059604645,
1528
+ "rewards/chosen": -32.51788330078125,
1529
+ "rewards/margins": -2.8007171154022217,
1530
+ "rewards/rejected": -29.717166900634766,
1531
+ "step": 1030
1532
+ },
1533
+ {
1534
+ "epoch": 0.18,
1535
+ "learning_rate": 0.000490689498823928,
1536
+ "logits/chosen": -2.502885341644287,
1537
+ "logits/rejected": -2.475445032119751,
1538
+ "logps/chosen": -544.1683349609375,
1539
+ "logps/rejected": -447.0438537597656,
1540
+ "loss": 14.0802,
1541
+ "rewards/accuracies": 0.38749998807907104,
1542
+ "rewards/chosen": -40.440181732177734,
1543
+ "rewards/margins": -7.666708946228027,
1544
+ "rewards/rejected": -32.773475646972656,
1545
+ "step": 1040
1546
+ },
1547
+ {
1548
+ "epoch": 0.19,
1549
+ "learning_rate": 0.0004903107023416835,
1550
+ "logits/chosen": -2.5913939476013184,
1551
+ "logits/rejected": -2.5680718421936035,
1552
+ "logps/chosen": -486.11907958984375,
1553
+ "logps/rejected": -368.26251220703125,
1554
+ "loss": 13.0584,
1555
+ "rewards/accuracies": 0.32499998807907104,
1556
+ "rewards/chosen": -35.45328140258789,
1557
+ "rewards/margins": -9.003652572631836,
1558
+ "rewards/rejected": -26.449630737304688,
1559
+ "step": 1050
1560
+ },
1561
+ {
1562
+ "epoch": 0.19,
1563
+ "learning_rate": 0.0004898811381307269,
1564
+ "logits/chosen": -2.5621819496154785,
1565
+ "logits/rejected": -2.5630691051483154,
1566
+ "logps/chosen": -415.2149353027344,
1567
+ "logps/rejected": -372.393798828125,
1568
+ "loss": 10.9069,
1569
+ "rewards/accuracies": 0.48750001192092896,
1570
+ "rewards/chosen": -30.74907875061035,
1571
+ "rewards/margins": -3.5560336112976074,
1572
+ "rewards/rejected": -27.193042755126953,
1573
+ "step": 1060
1574
+ },
1575
+ {
1576
+ "epoch": 0.19,
1577
+ "learning_rate": 0.0004894424536834149,
1578
+ "logits/chosen": -2.6120645999908447,
1579
+ "logits/rejected": -2.604825019836426,
1580
+ "logps/chosen": -513.4226684570312,
1581
+ "logps/rejected": -459.19647216796875,
1582
+ "loss": 11.3672,
1583
+ "rewards/accuracies": 0.4375,
1584
+ "rewards/chosen": -39.00333023071289,
1585
+ "rewards/margins": -4.482884883880615,
1586
+ "rewards/rejected": -34.520442962646484,
1587
+ "step": 1070
1588
+ },
1589
+ {
1590
+ "epoch": 0.19,
1591
+ "learning_rate": 0.000488994665678449,
1592
+ "logits/chosen": -2.880282163619995,
1593
+ "logits/rejected": -2.8795719146728516,
1594
+ "logps/chosen": -424.77874755859375,
1595
+ "logps/rejected": -367.4000244140625,
1596
+ "loss": 11.0388,
1597
+ "rewards/accuracies": 0.4375,
1598
+ "rewards/chosen": -31.353382110595703,
1599
+ "rewards/margins": -5.197685718536377,
1600
+ "rewards/rejected": -26.15569496154785,
1601
+ "step": 1080
1602
+ },
1603
+ {
1604
+ "epoch": 0.19,
1605
+ "learning_rate": 0.0004885377911406459,
1606
+ "logits/chosen": -2.947252035140991,
1607
+ "logits/rejected": -2.940441370010376,
1608
+ "logps/chosen": -442.61651611328125,
1609
+ "logps/rejected": -374.4497985839844,
1610
+ "loss": 11.9975,
1611
+ "rewards/accuracies": 0.44999998807907104,
1612
+ "rewards/chosen": -31.82815170288086,
1613
+ "rewards/margins": -5.211056709289551,
1614
+ "rewards/rejected": -26.617095947265625,
1615
+ "step": 1090
1616
+ },
1617
+ {
1618
+ "epoch": 0.19,
1619
+ "learning_rate": 0.00048807184744029076,
1620
+ "logits/chosen": -2.9204514026641846,
1621
+ "logits/rejected": -2.922818660736084,
1622
+ "logps/chosen": -416.2978515625,
1623
+ "logps/rejected": -395.44378662109375,
1624
+ "loss": 9.0951,
1625
+ "rewards/accuracies": 0.4749999940395355,
1626
+ "rewards/chosen": -30.16245460510254,
1627
+ "rewards/margins": -1.954272985458374,
1628
+ "rewards/rejected": -28.208179473876953,
1629
+ "step": 1100
1630
+ },
1631
+ {
1632
+ "epoch": 0.2,
1633
+ "learning_rate": 0.00048759685229247675,
1634
+ "logits/chosen": -2.950378179550171,
1635
+ "logits/rejected": -2.9522385597229004,
1636
+ "logps/chosen": -456.4190368652344,
1637
+ "logps/rejected": -417.6458435058594,
1638
+ "loss": 10.6616,
1639
+ "rewards/accuracies": 0.44999998807907104,
1640
+ "rewards/chosen": -33.50844192504883,
1641
+ "rewards/margins": -3.696442127227783,
1642
+ "rewards/rejected": -29.811996459960938,
1643
+ "step": 1110
1644
+ },
1645
+ {
1646
+ "epoch": 0.2,
1647
+ "learning_rate": 0.0004872103512563103,
1648
+ "logits/chosen": NaN,
1649
+ "logits/rejected": NaN,
1650
+ "logps/chosen": NaN,
1651
+ "logps/rejected": NaN,
1652
+ "loss": 24.7455,
1653
+ "rewards/accuracies": 0.4000000059604645,
1654
+ "rewards/chosen": NaN,
1655
+ "rewards/margins": NaN,
1656
+ "rewards/rejected": NaN,
1657
+ "step": 1120
1658
+ },
1659
+ {
1660
+ "epoch": 0.2,
1661
+ "learning_rate": 0.00048716163259071837,
1662
+ "logits/chosen": NaN,
1663
+ "logits/rejected": NaN,
1664
+ "logps/chosen": NaN,
1665
+ "logps/rejected": NaN,
1666
+ "loss": 41.8228,
1667
+ "rewards/accuracies": 0.375,
1668
+ "rewards/chosen": NaN,
1669
+ "rewards/margins": NaN,
1670
+ "rewards/rejected": NaN,
1671
+ "step": 1130
1672
+ },
1673
+ {
1674
+ "epoch": 0.2,
1675
+ "learning_rate": 0.000487112823756431,
1676
+ "logits/chosen": NaN,
1677
+ "logits/rejected": NaN,
1678
+ "logps/chosen": NaN,
1679
+ "logps/rejected": NaN,
1680
+ "loss": 110.7265,
1681
+ "rewards/accuracies": 0.25,
1682
+ "rewards/chosen": NaN,
1683
+ "rewards/margins": NaN,
1684
+ "rewards/rejected": NaN,
1685
+ "step": 1140
1686
+ },
1687
+ {
1688
+ "epoch": 0.2,
1689
+ "learning_rate": 0.000487112823756431,
1690
+ "logits/chosen": NaN,
1691
+ "logits/rejected": NaN,
1692
+ "logps/chosen": NaN,
1693
+ "logps/rejected": NaN,
1694
+ "loss": 245.3322,
1695
+ "rewards/accuracies": 0.2750000059604645,
1696
+ "rewards/chosen": NaN,
1697
+ "rewards/margins": NaN,
1698
+ "rewards/rejected": NaN,
1699
+ "step": 1150
1700
+ },
1701
+ {
1702
+ "epoch": 0.2,
1703
+ "learning_rate": 0.000487112823756431,
1704
+ "logits/chosen": NaN,
1705
+ "logits/rejected": NaN,
1706
+ "logps/chosen": NaN,
1707
+ "logps/rejected": NaN,
1708
+ "loss": 1041.9771,
1709
+ "rewards/accuracies": 0.32499998807907104,
1710
+ "rewards/chosen": NaN,
1711
+ "rewards/margins": NaN,
1712
+ "rewards/rejected": NaN,
1713
+ "step": 1160
1714
+ },
1715
+ {
1716
+ "epoch": 0.21,
1717
+ "learning_rate": 0.0004870639247720053,
1718
+ "logits/chosen": NaN,
1719
+ "logits/rejected": NaN,
1720
+ "logps/chosen": NaN,
1721
+ "logps/rejected": NaN,
1722
+ "loss": 437.3346,
1723
+ "rewards/accuracies": 0.3375000059604645,
1724
+ "rewards/chosen": NaN,
1725
+ "rewards/margins": NaN,
1726
+ "rewards/rejected": NaN,
1727
+ "step": 1170
1728
+ },
1729
+ {
1730
+ "epoch": 0.21,
1731
+ "learning_rate": 0.0004870639247720053,
1732
+ "logits/chosen": NaN,
1733
+ "logits/rejected": NaN,
1734
+ "logps/chosen": NaN,
1735
+ "logps/rejected": NaN,
1736
+ "loss": 123.7268,
1737
+ "rewards/accuracies": 0.22499999403953552,
1738
+ "rewards/chosen": NaN,
1739
+ "rewards/margins": NaN,
1740
+ "rewards/rejected": NaN,
1741
+ "step": 1180
1742
+ },
1743
+ {
1744
+ "epoch": 0.21,
1745
+ "learning_rate": 0.0004870639247720053,
1746
+ "logits/chosen": NaN,
1747
+ "logits/rejected": NaN,
1748
+ "logps/chosen": NaN,
1749
+ "logps/rejected": NaN,
1750
+ "loss": 315.4085,
1751
+ "rewards/accuracies": 0.23749999701976776,
1752
+ "rewards/chosen": NaN,
1753
+ "rewards/margins": NaN,
1754
+ "rewards/rejected": NaN,
1755
+ "step": 1190
1756
+ },
1757
+ {
1758
+ "epoch": 0.21,
1759
+ "learning_rate": 0.0004870639247720053,
1760
+ "logits/chosen": NaN,
1761
+ "logits/rejected": NaN,
1762
+ "logps/chosen": NaN,
1763
+ "logps/rejected": NaN,
1764
+ "loss": 52542.0625,
1765
+ "rewards/accuracies": 0.25,
1766
+ "rewards/chosen": NaN,
1767
+ "rewards/margins": NaN,
1768
+ "rewards/rejected": NaN,
1769
+ "step": 1200
1770
+ },
1771
+ {
1772
+ "epoch": 0.21,
1773
+ "eval_logits/chosen": NaN,
1774
+ "eval_logits/rejected": NaN,
1775
+ "eval_logps/chosen": NaN,
1776
+ "eval_logps/rejected": NaN,
1777
+ "eval_loss": NaN,
1778
+ "eval_rewards/accuracies": 0.2244604378938675,
1779
+ "eval_rewards/chosen": NaN,
1780
+ "eval_rewards/margins": NaN,
1781
+ "eval_rewards/rejected": NaN,
1782
+ "eval_runtime": 988.4543,
1783
+ "eval_samples_per_second": 0.703,
1784
+ "eval_steps_per_second": 0.703,
1785
+ "step": 1200
1786
+ },
1787
+ {
1788
+ "epoch": 0.21,
1789
+ "learning_rate": 0.0004870149356560326,
1790
+ "logits/chosen": NaN,
1791
+ "logits/rejected": NaN,
1792
+ "logps/chosen": NaN,
1793
+ "logps/rejected": NaN,
1794
+ "loss": 197.8537,
1795
+ "rewards/accuracies": 0.20000000298023224,
1796
+ "rewards/chosen": NaN,
1797
+ "rewards/margins": NaN,
1798
+ "rewards/rejected": NaN,
1799
+ "step": 1210
1800
+ },
1801
+ {
1802
+ "epoch": 0.22,
1803
+ "learning_rate": 0.0004870149356560326,
1804
+ "logits/chosen": NaN,
1805
+ "logits/rejected": NaN,
1806
+ "logps/chosen": NaN,
1807
+ "logps/rejected": NaN,
1808
+ "loss": 4813.8465,
1809
+ "rewards/accuracies": 0.13750000298023224,
1810
+ "rewards/chosen": NaN,
1811
+ "rewards/margins": NaN,
1812
+ "rewards/rejected": NaN,
1813
+ "step": 1220
1814
+ },
1815
+ {
1816
+ "epoch": 0.22,
1817
+ "learning_rate": 0.0004870149356560326,
1818
+ "logits/chosen": NaN,
1819
+ "logits/rejected": NaN,
1820
+ "logps/chosen": NaN,
1821
+ "logps/rejected": NaN,
1822
+ "loss": 2048.3082,
1823
+ "rewards/accuracies": 0.25,
1824
+ "rewards/chosen": NaN,
1825
+ "rewards/margins": NaN,
1826
+ "rewards/rejected": NaN,
1827
+ "step": 1230
1828
+ },
1829
+ {
1830
+ "epoch": 0.22,
1831
+ "learning_rate": 0.0004870149356560326,
1832
+ "logits/chosen": NaN,
1833
+ "logits/rejected": NaN,
1834
+ "logps/chosen": NaN,
1835
+ "logps/rejected": NaN,
1836
+ "loss": 143.0693,
1837
+ "rewards/accuracies": 0.16249999403953552,
1838
+ "rewards/chosen": NaN,
1839
+ "rewards/margins": NaN,
1840
+ "rewards/rejected": NaN,
1841
+ "step": 1240
1842
+ },
1843
+ {
1844
+ "epoch": 0.22,
1845
+ "learning_rate": 0.0004870149356560326,
1846
+ "logits/chosen": NaN,
1847
+ "logits/rejected": NaN,
1848
+ "logps/chosen": NaN,
1849
+ "logps/rejected": NaN,
1850
+ "loss": 16874.5531,
1851
+ "rewards/accuracies": 0.21250000596046448,
1852
+ "rewards/chosen": NaN,
1853
+ "rewards/margins": NaN,
1854
+ "rewards/rejected": NaN,
1855
+ "step": 1250
1856
+ },
1857
+ {
1858
+ "epoch": 0.22,
1859
+ "learning_rate": 0.0004870149356560326,
1860
+ "logits/chosen": NaN,
1861
+ "logits/rejected": NaN,
1862
+ "logps/chosen": NaN,
1863
+ "logps/rejected": NaN,
1864
+ "loss": 2509.9437,
1865
+ "rewards/accuracies": 0.17499999701976776,
1866
+ "rewards/chosen": NaN,
1867
+ "rewards/margins": NaN,
1868
+ "rewards/rejected": NaN,
1869
+ "step": 1260
1870
+ },
1871
+ {
1872
+ "epoch": 0.22,
1873
+ "learning_rate": 0.0004870149356560326,
1874
+ "logits/chosen": NaN,
1875
+ "logits/rejected": NaN,
1876
+ "logps/chosen": NaN,
1877
+ "logps/rejected": NaN,
1878
+ "loss": 9635.2938,
1879
+ "rewards/accuracies": 0.1875,
1880
+ "rewards/chosen": NaN,
1881
+ "rewards/margins": NaN,
1882
+ "rewards/rejected": NaN,
1883
+ "step": 1270
1884
+ },
1885
+ {
1886
+ "epoch": 0.23,
1887
+ "learning_rate": 0.0004870149356560326,
1888
+ "logits/chosen": NaN,
1889
+ "logits/rejected": NaN,
1890
+ "logps/chosen": NaN,
1891
+ "logps/rejected": NaN,
1892
+ "loss": 3857.9113,
1893
+ "rewards/accuracies": 0.13750000298023224,
1894
+ "rewards/chosen": NaN,
1895
+ "rewards/margins": NaN,
1896
+ "rewards/rejected": NaN,
1897
+ "step": 1280
1898
+ },
1899
+ {
1900
+ "epoch": 0.23,
1901
+ "learning_rate": 0.0004870149356560326,
1902
+ "logits/chosen": NaN,
1903
+ "logits/rejected": NaN,
1904
+ "logps/chosen": NaN,
1905
+ "logps/rejected": NaN,
1906
+ "loss": 4572.7609,
1907
+ "rewards/accuracies": 0.20000000298023224,
1908
+ "rewards/chosen": NaN,
1909
+ "rewards/margins": NaN,
1910
+ "rewards/rejected": NaN,
1911
+ "step": 1290
1912
+ },
1913
+ {
1914
+ "epoch": 0.23,
1915
+ "learning_rate": 0.0004870149356560326,
1916
+ "logits/chosen": NaN,
1917
+ "logits/rejected": NaN,
1918
+ "logps/chosen": NaN,
1919
+ "logps/rejected": NaN,
1920
+ "loss": 606.0347,
1921
+ "rewards/accuracies": 0.15000000596046448,
1922
+ "rewards/chosen": NaN,
1923
+ "rewards/margins": NaN,
1924
+ "rewards/rejected": NaN,
1925
+ "step": 1300
1926
+ },
1927
+ {
1928
+ "epoch": 0.23,
1929
+ "learning_rate": 0.0004870149356560326,
1930
+ "logits/chosen": NaN,
1931
+ "logits/rejected": NaN,
1932
+ "logps/chosen": NaN,
1933
+ "logps/rejected": NaN,
1934
+ "loss": 1759.0598,
1935
+ "rewards/accuracies": 0.17499999701976776,
1936
+ "rewards/chosen": NaN,
1937
+ "rewards/margins": NaN,
1938
+ "rewards/rejected": NaN,
1939
+ "step": 1310
1940
+ },
1941
+ {
1942
+ "epoch": 0.23,
1943
+ "learning_rate": 0.0004870149356560326,
1944
+ "logits/chosen": NaN,
1945
+ "logits/rejected": NaN,
1946
+ "logps/chosen": NaN,
1947
+ "logps/rejected": NaN,
1948
+ "loss": 14436.2953,
1949
+ "rewards/accuracies": 0.21250000596046448,
1950
+ "rewards/chosen": NaN,
1951
+ "rewards/margins": NaN,
1952
+ "rewards/rejected": NaN,
1953
+ "step": 1320
1954
+ },
1955
+ {
1956
+ "epoch": 0.23,
1957
+ "learning_rate": 0.0004870149356560326,
1958
+ "logits/chosen": NaN,
1959
+ "logits/rejected": NaN,
1960
+ "logps/chosen": NaN,
1961
+ "logps/rejected": NaN,
1962
+ "loss": 386264.375,
1963
+ "rewards/accuracies": 0.0625,
1964
+ "rewards/chosen": NaN,
1965
+ "rewards/margins": NaN,
1966
+ "rewards/rejected": NaN,
1967
+ "step": 1330
1968
+ },
1969
+ {
1970
+ "epoch": 0.24,
1971
+ "learning_rate": 0.0004870149356560326,
1972
+ "logits/chosen": NaN,
1973
+ "logits/rejected": NaN,
1974
+ "logps/chosen": NaN,
1975
+ "logps/rejected": NaN,
1976
+ "loss": 8430.657,
1977
+ "rewards/accuracies": 0.1875,
1978
+ "rewards/chosen": NaN,
1979
+ "rewards/margins": NaN,
1980
+ "rewards/rejected": NaN,
1981
+ "step": 1340
1982
+ },
1983
+ {
1984
+ "epoch": 0.24,
1985
+ "learning_rate": 0.0004870149356560326,
1986
+ "logits/chosen": NaN,
1987
+ "logits/rejected": NaN,
1988
+ "logps/chosen": NaN,
1989
+ "logps/rejected": NaN,
1990
+ "loss": 6939.275,
1991
+ "rewards/accuracies": 0.13750000298023224,
1992
+ "rewards/chosen": NaN,
1993
+ "rewards/margins": NaN,
1994
+ "rewards/rejected": NaN,
1995
+ "step": 1350
1996
+ },
1997
+ {
1998
+ "epoch": 0.24,
1999
+ "learning_rate": 0.0004870149356560326,
2000
+ "logits/chosen": NaN,
2001
+ "logits/rejected": NaN,
2002
+ "logps/chosen": NaN,
2003
+ "logps/rejected": NaN,
2004
+ "loss": 4295.0949,
2005
+ "rewards/accuracies": 0.20000000298023224,
2006
+ "rewards/chosen": NaN,
2007
+ "rewards/margins": NaN,
2008
+ "rewards/rejected": NaN,
2009
+ "step": 1360
2010
+ },
2011
+ {
2012
+ "epoch": 0.24,
2013
+ "learning_rate": 0.0004870149356560326,
2014
+ "logits/chosen": NaN,
2015
+ "logits/rejected": NaN,
2016
+ "logps/chosen": NaN,
2017
+ "logps/rejected": NaN,
2018
+ "loss": 17283.3672,
2019
+ "rewards/accuracies": 0.20000000298023224,
2020
+ "rewards/chosen": NaN,
2021
+ "rewards/margins": NaN,
2022
+ "rewards/rejected": NaN,
2023
+ "step": 1370
2024
+ },
2025
+ {
2026
+ "epoch": 0.24,
2027
+ "learning_rate": 0.0004870149356560326,
2028
+ "logits/chosen": NaN,
2029
+ "logits/rejected": NaN,
2030
+ "logps/chosen": NaN,
2031
+ "logps/rejected": NaN,
2032
+ "loss": 24895.7469,
2033
+ "rewards/accuracies": 0.20000000298023224,
2034
+ "rewards/chosen": NaN,
2035
+ "rewards/margins": NaN,
2036
+ "rewards/rejected": NaN,
2037
+ "step": 1380
2038
+ },
2039
+ {
2040
+ "epoch": 0.25,
2041
+ "learning_rate": 0.0004870149356560326,
2042
+ "logits/chosen": NaN,
2043
+ "logits/rejected": NaN,
2044
+ "logps/chosen": NaN,
2045
+ "logps/rejected": NaN,
2046
+ "loss": 45136.4094,
2047
+ "rewards/accuracies": 0.17499999701976776,
2048
+ "rewards/chosen": NaN,
2049
+ "rewards/margins": NaN,
2050
+ "rewards/rejected": NaN,
2051
+ "step": 1390
2052
+ },
2053
+ {
2054
+ "epoch": 0.25,
2055
+ "learning_rate": 0.0004870149356560326,
2056
+ "logits/chosen": NaN,
2057
+ "logits/rejected": NaN,
2058
+ "logps/chosen": NaN,
2059
+ "logps/rejected": NaN,
2060
+ "loss": 165.0189,
2061
+ "rewards/accuracies": 0.20000000298023224,
2062
+ "rewards/chosen": NaN,
2063
+ "rewards/margins": NaN,
2064
+ "rewards/rejected": NaN,
2065
+ "step": 1400
2066
+ },
2067
+ {
2068
+ "epoch": 0.25,
2069
+ "eval_logits/chosen": NaN,
2070
+ "eval_logits/rejected": NaN,
2071
+ "eval_logps/chosen": NaN,
2072
+ "eval_logps/rejected": NaN,
2073
+ "eval_loss": NaN,
2074
+ "eval_rewards/accuracies": 0.17553956806659698,
2075
+ "eval_rewards/chosen": NaN,
2076
+ "eval_rewards/margins": NaN,
2077
+ "eval_rewards/rejected": NaN,
2078
+ "eval_runtime": 987.1061,
2079
+ "eval_samples_per_second": 0.704,
2080
+ "eval_steps_per_second": 0.704,
2081
+ "step": 1400
2082
+ },
2083
+ {
2084
+ "epoch": 0.25,
2085
+ "learning_rate": 0.0004870149356560326,
2086
+ "logits/chosen": NaN,
2087
+ "logits/rejected": NaN,
2088
+ "logps/chosen": NaN,
2089
+ "logps/rejected": NaN,
2090
+ "loss": 8688.7203,
2091
+ "rewards/accuracies": 0.22499999403953552,
2092
+ "rewards/chosen": NaN,
2093
+ "rewards/margins": NaN,
2094
+ "rewards/rejected": NaN,
2095
+ "step": 1410
2096
+ },
2097
+ {
2098
+ "epoch": 0.25,
2099
+ "learning_rate": 0.0004870149356560326,
2100
+ "logits/chosen": NaN,
2101
+ "logits/rejected": NaN,
2102
+ "logps/chosen": NaN,
2103
+ "logps/rejected": NaN,
2104
+ "loss": 9484.9836,
2105
+ "rewards/accuracies": 0.1875,
2106
+ "rewards/chosen": NaN,
2107
+ "rewards/margins": NaN,
2108
+ "rewards/rejected": NaN,
2109
+ "step": 1420
2110
+ },
2111
+ {
2112
+ "epoch": 0.25,
2113
+ "learning_rate": 0.0004870149356560326,
2114
+ "logits/chosen": NaN,
2115
+ "logits/rejected": NaN,
2116
+ "logps/chosen": NaN,
2117
+ "logps/rejected": NaN,
2118
+ "loss": 9308.6922,
2119
+ "rewards/accuracies": 0.17499999701976776,
2120
+ "rewards/chosen": NaN,
2121
+ "rewards/margins": NaN,
2122
+ "rewards/rejected": NaN,
2123
+ "step": 1430
2124
+ },
2125
+ {
2126
+ "epoch": 0.25,
2127
+ "learning_rate": 0.0004870149356560326,
2128
+ "logits/chosen": NaN,
2129
+ "logits/rejected": NaN,
2130
+ "logps/chosen": NaN,
2131
+ "logps/rejected": NaN,
2132
+ "loss": 2905.9373,
2133
+ "rewards/accuracies": 0.22499999403953552,
2134
+ "rewards/chosen": NaN,
2135
+ "rewards/margins": NaN,
2136
+ "rewards/rejected": NaN,
2137
+ "step": 1440
2138
+ },
2139
+ {
2140
+ "epoch": 0.26,
2141
+ "learning_rate": 0.0004870149356560326,
2142
+ "logits/chosen": NaN,
2143
+ "logits/rejected": NaN,
2144
+ "logps/chosen": NaN,
2145
+ "logps/rejected": NaN,
2146
+ "loss": 4738.7867,
2147
+ "rewards/accuracies": 0.20000000298023224,
2148
+ "rewards/chosen": NaN,
2149
+ "rewards/margins": NaN,
2150
+ "rewards/rejected": NaN,
2151
+ "step": 1450
2152
+ },
2153
+ {
2154
+ "epoch": 0.26,
2155
+ "learning_rate": 0.0004870149356560326,
2156
+ "logits/chosen": NaN,
2157
+ "logits/rejected": NaN,
2158
+ "logps/chosen": NaN,
2159
+ "logps/rejected": NaN,
2160
+ "loss": 424.0728,
2161
+ "rewards/accuracies": 0.20000000298023224,
2162
+ "rewards/chosen": NaN,
2163
+ "rewards/margins": NaN,
2164
+ "rewards/rejected": NaN,
2165
+ "step": 1460
2166
+ },
2167
+ {
2168
+ "epoch": 0.26,
2169
+ "learning_rate": 0.0004870149356560326,
2170
+ "logits/chosen": NaN,
2171
+ "logits/rejected": NaN,
2172
+ "logps/chosen": NaN,
2173
+ "logps/rejected": NaN,
2174
+ "loss": 2181.5133,
2175
+ "rewards/accuracies": 0.13750000298023224,
2176
+ "rewards/chosen": NaN,
2177
+ "rewards/margins": NaN,
2178
+ "rewards/rejected": NaN,
2179
+ "step": 1470
2180
+ },
2181
+ {
2182
+ "epoch": 0.26,
2183
+ "learning_rate": 0.0004870149356560326,
2184
+ "logits/chosen": NaN,
2185
+ "logits/rejected": NaN,
2186
+ "logps/chosen": NaN,
2187
+ "logps/rejected": NaN,
2188
+ "loss": 23597.5219,
2189
+ "rewards/accuracies": 0.16249999403953552,
2190
+ "rewards/chosen": NaN,
2191
+ "rewards/margins": NaN,
2192
+ "rewards/rejected": NaN,
2193
+ "step": 1480
2194
+ },
2195
+ {
2196
+ "epoch": 0.26,
2197
+ "learning_rate": 0.0004870149356560326,
2198
+ "logits/chosen": NaN,
2199
+ "logits/rejected": NaN,
2200
+ "logps/chosen": NaN,
2201
+ "logps/rejected": NaN,
2202
+ "loss": 1259.9596,
2203
+ "rewards/accuracies": 0.25,
2204
+ "rewards/chosen": NaN,
2205
+ "rewards/margins": NaN,
2206
+ "rewards/rejected": NaN,
2207
+ "step": 1490
2208
+ },
2209
+ {
2210
+ "epoch": 0.26,
2211
+ "learning_rate": 0.0004870149356560326,
2212
+ "logits/chosen": NaN,
2213
+ "logits/rejected": NaN,
2214
+ "logps/chosen": NaN,
2215
+ "logps/rejected": NaN,
2216
+ "loss": 7475.4719,
2217
+ "rewards/accuracies": 0.21250000596046448,
2218
+ "rewards/chosen": NaN,
2219
+ "rewards/margins": NaN,
2220
+ "rewards/rejected": NaN,
2221
+ "step": 1500
2222
  }
2223
  ],
2224
  "logging_steps": 10,
2225
+ "max_steps": 5662,
2226
  "num_train_epochs": 1,
2227
  "save_steps": 100,
2228
  "total_flos": 0.0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5958bf8104a088a450e0b4310623d9428611ebdb5fcbb92ab0f567ab6883bfbd
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:396a28579afc547a6f46654c7646d9a3770584a235915799960ff8d28e97bcac
3
  size 4091