Training in progress, step 4375
Browse files- adapter_model.safetensors +1 -1
- train.log +125 -0
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140991056
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d72193233033e5c6c0c99c40c7040346b97f0f0198750f409969f2c3d4faa937
|
3 |
size 1140991056
|
train.log
CHANGED
@@ -8720,3 +8720,128 @@ Time to load cpu_adam op: 2.2494730949401855 seconds
|
|
8720 |
|
8721 |
[Rank 1] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
|
8722 |
{'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07, 'epoch': 0.9}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8720 |
|
8721 |
[Rank 1] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
|
8722 |
{'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07, 'epoch': 0.9}
|
8723 |
+
[Rank 0] Trainer log: {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07}[Rank 3] Trainer log: {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07}[Rank 1] Trainer log: {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07}
|
8724 |
+
|
8725 |
+
|
8726 |
+
[Rank 2] Trainer log: {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07}
|
8727 |
+
{'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07, 'epoch': 0.9}
|
8728 |
+
[Rank 3] Trainer log: {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07}[Rank 0] Trainer log: {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07}
|
8729 |
+
[Rank 2] Trainer log: {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07}[Rank 1] Trainer log: {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07}
|
8730 |
+
|
8731 |
+
|
8732 |
+
{'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07, 'epoch': 0.9}
|
8733 |
+
[Rank 1] Trainer log: {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07}
|
8734 |
+
[Rank 3] Trainer log: {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07}
|
8735 |
+
[Rank 0] Trainer log: {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07}[Rank 2] Trainer log: {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07}
|
8736 |
+
|
8737 |
+
{'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07, 'epoch': 0.9}
|
8738 |
+
[Rank 3] Trainer log: {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07}
|
8739 |
+
[Rank 0] Trainer log: {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07}[Rank 1] Trainer log: {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07}
|
8740 |
+
[Rank 2] Trainer log: {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07}
|
8741 |
+
|
8742 |
+
{'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07, 'epoch': 0.9}
|
8743 |
+
[Rank 3] Trainer log: {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07}[Rank 2] Trainer log: {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07}
|
8744 |
+
|
8745 |
+
[Rank 1] Trainer log: {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07}
|
8746 |
+
[Rank 0] Trainer log: {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07}
|
8747 |
+
{'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07, 'epoch': 0.9}
|
8748 |
+
[Rank 1] Trainer log: {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07}
|
8749 |
+
[Rank 2] Trainer log: {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07}
|
8750 |
+
[Rank 3] Trainer log: {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07}[Rank 0] Trainer log: {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07}
|
8751 |
+
|
8752 |
+
{'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07, 'epoch': 0.9}
|
8753 |
+
[Rank 3] Trainer log: {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07}
|
8754 |
+
[Rank 0] Trainer log: {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07}[Rank 2] Trainer log: {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07}
|
8755 |
+
|
8756 |
+
[Rank 1] Trainer log: {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07}
|
8757 |
+
{'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07, 'epoch': 0.9}
|
8758 |
+
[Rank 2] Trainer log: {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07}[Rank 0] Trainer log: {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07}[Rank 3] Trainer log: {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07}
|
8759 |
+
|
8760 |
+
|
8761 |
+
[Rank 1] Trainer log: {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07}
|
8762 |
+
{'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07, 'epoch': 0.9}
|
8763 |
+
[Rank 3] Trainer log: {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07}[Rank 1] Trainer log: {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07}
|
8764 |
+
[Rank 0] Trainer log: {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07}
|
8765 |
+
[Rank 2] Trainer log: {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07}
|
8766 |
+
|
8767 |
+
{'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07, 'epoch': 0.9}
|
8768 |
+
[Rank 3] Trainer log: {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07}[Rank 2] Trainer log: {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07}
|
8769 |
+
[Rank 1] Trainer log: {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07}
|
8770 |
+
|
8771 |
+
[Rank 0] Trainer log: {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07}
|
8772 |
+
{'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07, 'epoch': 0.9}
|
8773 |
+
[Rank 0] Trainer log: {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07}[Rank 3] Trainer log: {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07}[Rank 1] Trainer log: {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07}
|
8774 |
+
|
8775 |
+
|
8776 |
+
[Rank 2] Trainer log: {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07}
|
8777 |
+
{'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07, 'epoch': 0.9}
|
8778 |
+
[Rank 0] Trainer log: {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07}[Rank 2] Trainer log: {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07}[Rank 3] Trainer log: {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07}
|
8779 |
+
[Rank 1] Trainer log: {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07}
|
8780 |
+
|
8781 |
+
|
8782 |
+
{'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07, 'epoch': 0.9}
|
8783 |
+
[Rank 3] Trainer log: {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07}[Rank 2] Trainer log: {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07}
|
8784 |
+
[Rank 0] Trainer log: {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07}
|
8785 |
+
|
8786 |
+
[Rank 1] Trainer log: {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07}
|
8787 |
+
{'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07, 'epoch': 0.9}
|
8788 |
+
[Rank 3] Trainer log: {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07}[Rank 0] Trainer log: {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07}[Rank 1] Trainer log: {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07}
|
8789 |
+
|
8790 |
+
|
8791 |
+
[Rank 2] Trainer log: {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07}
|
8792 |
+
{'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07, 'epoch': 0.9}
|
8793 |
+
[Rank 0] Trainer log: {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07}[Rank 3] Trainer log: {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07}
|
8794 |
+
[Rank 2] Trainer log: {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07}
|
8795 |
+
|
8796 |
+
[Rank 1] Trainer log: {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07}
|
8797 |
+
{'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07, 'epoch': 0.9}
|
8798 |
+
[Rank 1] Trainer log: {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07}[Rank 0] Trainer log: {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07}[Rank 3] Trainer log: {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07}
|
8799 |
+
|
8800 |
+
|
8801 |
+
[Rank 2] Trainer log: {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07}
|
8802 |
+
{'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07, 'epoch': 0.9}
|
8803 |
+
[Rank 3] Trainer log: {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07}
|
8804 |
+
[Rank 1] Trainer log: {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07}
|
8805 |
+
[Rank 0] Trainer log: {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07}[Rank 2] Trainer log: {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07}
|
8806 |
+
|
8807 |
+
{'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07, 'epoch': 0.9}
|
8808 |
+
[Rank 3] Trainer log: {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07}
|
8809 |
+
[Rank 0] Trainer log: {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07}[Rank 2] Trainer log: {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07}
|
8810 |
+
|
8811 |
+
[Rank 1] Trainer log: {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07}
|
8812 |
+
{'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07, 'epoch': 0.9}
|
8813 |
+
[Rank 1] Trainer log: {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07}[Rank 3] Trainer log: {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07}[Rank 0] Trainer log: {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07}
|
8814 |
+
|
8815 |
+
[Rank 2] Trainer log: {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07}
|
8816 |
+
|
8817 |
+
{'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07, 'epoch': 0.9}
|
8818 |
+
[Rank 3] Trainer log: {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07}
|
8819 |
+
[Rank 0] Trainer log: {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07}[Rank 1] Trainer log: {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07}
|
8820 |
+
|
8821 |
+
[Rank 2] Trainer log: {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07}
|
8822 |
+
{'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07, 'epoch': 0.9}
|
8823 |
+
[Rank 2] Trainer log: {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07}
|
8824 |
+
[Rank 0] Trainer log: {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07}[Rank 3] Trainer log: {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07}
|
8825 |
+
[Rank 1] Trainer log: {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07}
|
8826 |
+
|
8827 |
+
{'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07, 'epoch': 0.9}
|
8828 |
+
[Rank 2] Trainer log: {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07}
|
8829 |
+
[Rank 0] Trainer log: {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07}[Rank 3] Trainer log: {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07}
|
8830 |
+
|
8831 |
+
[Rank 1] Trainer log: {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07}
|
8832 |
+
{'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07, 'epoch': 0.9}
|
8833 |
+
[Rank 3] Trainer log: {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07}[Rank 1] Trainer log: {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07}[Rank 0] Trainer log: {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07}
|
8834 |
+
|
8835 |
+
[Rank 2] Trainer log: {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07}
|
8836 |
+
|
8837 |
+
{'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07, 'epoch': 0.9}
|
8838 |
+
[Rank 3] Trainer log: {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07}[Rank 2] Trainer log: {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07}[Rank 1] Trainer log: {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07}
|
8839 |
+
|
8840 |
+
|
8841 |
+
[Rank 0] Trainer log: {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07}
|
8842 |
+
{'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07, 'epoch': 0.9}
|
8843 |
+
[Rank 3] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}[Rank 2] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}[Rank 1] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
|
8844 |
+
|
8845 |
+
|
8846 |
+
[Rank 0] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
|
8847 |
+
{'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07, 'epoch': 0.9}
|