aiden200 commited on
Commit
bc27afe
·
verified ·
1 Parent(s): e48275b

Training in progress, step 4375

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. train.log +125 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:002a23ea4ec33d68169cf04be99ea953375df47ab38dedc2e7ebb6618e79408a
3
  size 1140991056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72193233033e5c6c0c99c40c7040346b97f0f0198750f409969f2c3d4faa937
3
  size 1140991056
train.log CHANGED
@@ -8720,3 +8720,128 @@ Time to load cpu_adam op: 2.2494730949401855 seconds
8720
 
8721
  [Rank 1] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
8722
  {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07, 'epoch': 0.9}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8720
 
8721
  [Rank 1] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
8722
  {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07, 'epoch': 0.9}
8723
+ [Rank 0] Trainer log: {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07}[Rank 3] Trainer log: {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07}[Rank 1] Trainer log: {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07}
8724
+
8725
+
8726
+ [Rank 2] Trainer log: {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07}
8727
+ {'loss': 0.9234, 'grad_norm': 8.1272554397583, 'learning_rate': 5.688334469706446e-07, 'epoch': 0.9}
8728
+ [Rank 3] Trainer log: {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07}[Rank 0] Trainer log: {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07}
8729
+ [Rank 2] Trainer log: {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07}[Rank 1] Trainer log: {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07}
8730
+
8731
+
8732
+ {'loss': 0.9453, 'grad_norm': 4.378366947174072, 'learning_rate': 5.665685243730068e-07, 'epoch': 0.9}
8733
+ [Rank 1] Trainer log: {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07}
8734
+ [Rank 3] Trainer log: {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07}
8735
+ [Rank 0] Trainer log: {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07}[Rank 2] Trainer log: {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07}
8736
+
8737
+ {'loss': 1.0494, 'grad_norm': 3.2552075386047363, 'learning_rate': 5.643079884255565e-07, 'epoch': 0.9}
8738
+ [Rank 3] Trainer log: {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07}
8739
+ [Rank 0] Trainer log: {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07}[Rank 1] Trainer log: {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07}
8740
+ [Rank 2] Trainer log: {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07}
8741
+
8742
+ {'loss': 0.8545, 'grad_norm': 22.334938049316406, 'learning_rate': 5.620518401794672e-07, 'epoch': 0.9}
8743
+ [Rank 3] Trainer log: {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07}[Rank 2] Trainer log: {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07}
8744
+
8745
+ [Rank 1] Trainer log: {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07}
8746
+ [Rank 0] Trainer log: {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07}
8747
+ {'loss': 0.8423, 'grad_norm': 4.628236293792725, 'learning_rate': 5.598000806838766e-07, 'epoch': 0.9}
8748
+ [Rank 1] Trainer log: {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07}
8749
+ [Rank 2] Trainer log: {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07}
8750
+ [Rank 3] Trainer log: {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07}[Rank 0] Trainer log: {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07}
8751
+
8752
+ {'loss': 0.8247, 'grad_norm': 3.0932765007019043, 'learning_rate': 5.575527109858747e-07, 'epoch': 0.9}
8753
+ [Rank 3] Trainer log: {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07}
8754
+ [Rank 0] Trainer log: {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07}[Rank 2] Trainer log: {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07}
8755
+
8756
+ [Rank 1] Trainer log: {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07}
8757
+ {'loss': 1.0044, 'grad_norm': 4.8956298828125, 'learning_rate': 5.553097321305134e-07, 'epoch': 0.9}
8758
+ [Rank 2] Trainer log: {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07}[Rank 0] Trainer log: {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07}[Rank 3] Trainer log: {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07}
8759
+
8760
+
8761
+ [Rank 1] Trainer log: {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07}
8762
+ {'loss': 0.7957, 'grad_norm': 2.652259111404419, 'learning_rate': 5.53071145160804e-07, 'epoch': 0.9}
8763
+ [Rank 3] Trainer log: {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07}[Rank 1] Trainer log: {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07}
8764
+ [Rank 0] Trainer log: {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07}
8765
+ [Rank 2] Trainer log: {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07}
8766
+
8767
+ {'loss': 1.0521, 'grad_norm': 5.651650905609131, 'learning_rate': 5.508369511177136e-07, 'epoch': 0.9}
8768
+ [Rank 3] Trainer log: {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07}[Rank 2] Trainer log: {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07}
8769
+ [Rank 1] Trainer log: {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07}
8770
+
8771
+ [Rank 0] Trainer log: {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07}
8772
+ {'loss': 0.8885, 'grad_norm': 3.7264742851257324, 'learning_rate': 5.486071510401658e-07, 'epoch': 0.9}
8773
+ [Rank 0] Trainer log: {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07}[Rank 3] Trainer log: {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07}[Rank 1] Trainer log: {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07}
8774
+
8775
+
8776
+ [Rank 2] Trainer log: {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07}
8777
+ {'loss': 0.8208, 'grad_norm': 6.894049644470215, 'learning_rate': 5.463817459650467e-07, 'epoch': 0.9}
8778
+ [Rank 0] Trainer log: {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07}[Rank 2] Trainer log: {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07}[Rank 3] Trainer log: {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07}
8779
+ [Rank 1] Trainer log: {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07}
8780
+
8781
+
8782
+ {'loss': 0.8222, 'grad_norm': 3.1531314849853516, 'learning_rate': 5.441607369271906e-07, 'epoch': 0.9}
8783
+ [Rank 3] Trainer log: {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07}[Rank 2] Trainer log: {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07}
8784
+ [Rank 0] Trainer log: {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07}
8785
+
8786
+ [Rank 1] Trainer log: {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07}
8787
+ {'loss': 0.8004, 'grad_norm': 5.507090091705322, 'learning_rate': 5.419441249593916e-07, 'epoch': 0.9}
8788
+ [Rank 3] Trainer log: {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07}[Rank 0] Trainer log: {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07}[Rank 1] Trainer log: {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07}
8789
+
8790
+
8791
+ [Rank 2] Trainer log: {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07}
8792
+ {'loss': 0.8163, 'grad_norm': 4.2549591064453125, 'learning_rate': 5.397319110924016e-07, 'epoch': 0.9}
8793
+ [Rank 0] Trainer log: {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07}[Rank 3] Trainer log: {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07}
8794
+ [Rank 2] Trainer log: {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07}
8795
+
8796
+ [Rank 1] Trainer log: {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07}
8797
+ {'loss': 0.725, 'grad_norm': 4.9870924949646, 'learning_rate': 5.375240963549211e-07, 'epoch': 0.9}
8798
+ [Rank 1] Trainer log: {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07}[Rank 0] Trainer log: {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07}[Rank 3] Trainer log: {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07}
8799
+
8800
+
8801
+ [Rank 2] Trainer log: {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07}
8802
+ {'loss': 0.7699, 'grad_norm': 7.060013771057129, 'learning_rate': 5.353206817736101e-07, 'epoch': 0.9}
8803
+ [Rank 3] Trainer log: {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07}
8804
+ [Rank 1] Trainer log: {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07}
8805
+ [Rank 0] Trainer log: {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07}[Rank 2] Trainer log: {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07}
8806
+
8807
+ {'loss': 0.7494, 'grad_norm': 3.444916248321533, 'learning_rate': 5.331216683730789e-07, 'epoch': 0.9}
8808
+ [Rank 3] Trainer log: {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07}
8809
+ [Rank 0] Trainer log: {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07}[Rank 2] Trainer log: {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07}
8810
+
8811
+ [Rank 1] Trainer log: {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07}
8812
+ {'loss': 0.7373, 'grad_norm': 3.4014649391174316, 'learning_rate': 5.309270571758951e-07, 'epoch': 0.9}
8813
+ [Rank 1] Trainer log: {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07}[Rank 3] Trainer log: {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07}[Rank 0] Trainer log: {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07}
8814
+
8815
+ [Rank 2] Trainer log: {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07}
8816
+
8817
+ {'loss': 0.7781, 'grad_norm': 5.809092998504639, 'learning_rate': 5.28736849202578e-07, 'epoch': 0.9}
8818
+ [Rank 3] Trainer log: {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07}
8819
+ [Rank 0] Trainer log: {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07}[Rank 1] Trainer log: {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07}
8820
+
8821
+ [Rank 2] Trainer log: {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07}
8822
+ {'loss': 0.9076, 'grad_norm': 2.3170580863952637, 'learning_rate': 5.265510454715961e-07, 'epoch': 0.9}
8823
+ [Rank 2] Trainer log: {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07}
8824
+ [Rank 0] Trainer log: {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07}[Rank 3] Trainer log: {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07}
8825
+ [Rank 1] Trainer log: {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07}
8826
+
8827
+ {'loss': 0.9457, 'grad_norm': 6.048744201660156, 'learning_rate': 5.243696469993753e-07, 'epoch': 0.9}
8828
+ [Rank 2] Trainer log: {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07}
8829
+ [Rank 0] Trainer log: {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07}[Rank 3] Trainer log: {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07}
8830
+
8831
+ [Rank 1] Trainer log: {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07}
8832
+ {'loss': 0.8399, 'grad_norm': 4.671056270599365, 'learning_rate': 5.221926548002876e-07, 'epoch': 0.9}
8833
+ [Rank 3] Trainer log: {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07}[Rank 1] Trainer log: {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07}[Rank 0] Trainer log: {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07}
8834
+
8835
+ [Rank 2] Trainer log: {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07}
8836
+
8837
+ {'loss': 0.5527, 'grad_norm': 22.148271560668945, 'learning_rate': 5.200200698866587e-07, 'epoch': 0.9}
8838
+ [Rank 3] Trainer log: {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07}[Rank 2] Trainer log: {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07}[Rank 1] Trainer log: {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07}
8839
+
8840
+
8841
+ [Rank 0] Trainer log: {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07}
8842
+ {'loss': 1.0388, 'grad_norm': 3.8115804195404053, 'learning_rate': 5.178518932687671e-07, 'epoch': 0.9}
8843
+ [Rank 3] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}[Rank 2] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}[Rank 1] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
8844
+
8845
+
8846
+ [Rank 0] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
8847
+ {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07, 'epoch': 0.9}