aiden200 commited on
Commit
e48275b
·
verified ·
1 Parent(s): 07396c1

Training in progress, step 4350

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. train.log +125 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:badb32843897978102b35b7bbcd3f3463077dd48911a93591713c6836235fdfc
3
  size 1140991056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:002a23ea4ec33d68169cf04be99ea953375df47ab38dedc2e7ebb6618e79408a
3
  size 1140991056
train.log CHANGED
@@ -8595,3 +8595,128 @@ Time to load cpu_adam op: 2.2494730949401855 seconds
8595
  [Rank 0] Trainer log: {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07}[Rank 2] Trainer log: {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07}
8596
 
8597
  {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07, 'epoch': 0.89}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8595
  [Rank 0] Trainer log: {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07}[Rank 2] Trainer log: {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07}
8596
 
8597
  {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07, 'epoch': 0.89}
8598
+ [Rank 0] Trainer log: {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07}[Rank 1] Trainer log: {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07}[Rank 3] Trainer log: {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07}
8599
+
8600
+
8601
+ [Rank 2] Trainer log: {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07}
8602
+ {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07, 'epoch': 0.89}
8603
+ [Rank 3] Trainer log: {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07}[Rank 0] Trainer log: {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07}[Rank 1] Trainer log: {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07}
8604
+
8605
+
8606
+ [Rank 2] Trainer log: {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07}
8607
+ {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07, 'epoch': 0.89}
8608
+ [Rank 1] Trainer log: {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07}[Rank 3] Trainer log: {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07}
8609
+ [Rank 2] Trainer log: {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07}
8610
+
8611
+ [Rank 0] Trainer log: {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07}
8612
+ {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07, 'epoch': 0.89}
8613
+ [Rank 0] Trainer log: {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07}[Rank 1] Trainer log: {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07}
8614
+ [Rank 3] Trainer log: {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07}
8615
+
8616
+ [Rank 2] Trainer log: {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07}
8617
+ {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07, 'epoch': 0.89}
8618
+ [Rank 3] Trainer log: {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07}[Rank 1] Trainer log: {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07}[Rank 2] Trainer log: {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07}
8619
+
8620
+
8621
+ [Rank 0] Trainer log: {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07}
8622
+ {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07, 'epoch': 0.89}
8623
+ [Rank 3] Trainer log: {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07}
8624
+ [Rank 0] Trainer log: {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07}[Rank 2] Trainer log: {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07}
8625
+ [Rank 1] Trainer log: {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07}
8626
+
8627
+ {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07, 'epoch': 0.89}
8628
+ [Rank 3] Trainer log: {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07}
8629
+ [Rank 1] Trainer log: {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07}[Rank 0] Trainer log: {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07}
8630
+ [Rank 2] Trainer log: {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07}
8631
+
8632
+ {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07, 'epoch': 0.89}
8633
+ [Rank 1] Trainer log: {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07}[Rank 0] Trainer log: {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07}
8634
+ [Rank 3] Trainer log: {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07}
8635
+
8636
+ [Rank 2] Trainer log: {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07}
8637
+ {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07, 'epoch': 0.89}
8638
+ [Rank 3] Trainer log: {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07}
8639
+ [Rank 1] Trainer log: {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07}
8640
+ [Rank 2] Trainer log: {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07}
8641
+ [Rank 0] Trainer log: {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07}
8642
+ {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07, 'epoch': 0.89}
8643
+ [Rank 2] Trainer log: {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07}[Rank 3] Trainer log: {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07}[Rank 1] Trainer log: {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07}
8644
+
8645
+
8646
+ [Rank 0] Trainer log: {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07}
8647
+ {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07, 'epoch': 0.89}
8648
+ [Rank 0] Trainer log: {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07}[Rank 3] Trainer log: {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07}[Rank 1] Trainer log: {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07}
8649
+
8650
+
8651
+ [Rank 2] Trainer log: {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07}
8652
+ {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07, 'epoch': 0.89}
8653
+ [Rank 3] Trainer log: {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07}[Rank 1] Trainer log: {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07}
8654
+ [Rank 2] Trainer log: {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07}
8655
+
8656
+ [Rank 0] Trainer log: {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07}
8657
+ {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07, 'epoch': 0.89}
8658
+ [Rank 3] Trainer log: {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07}
8659
+ [Rank 2] Trainer log: {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07}
8660
+ [Rank 0] Trainer log: {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07}[Rank 1] Trainer log: {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07}
8661
+
8662
+ {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07, 'epoch': 0.89}
8663
+ [Rank 3] Trainer log: {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07}
8664
+ [Rank 0] Trainer log: {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07}[Rank 1] Trainer log: {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07}
8665
+
8666
+ [Rank 2] Trainer log: {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07}
8667
+ {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07, 'epoch': 0.89}
8668
+ [Rank 0] Trainer log: {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07}[Rank 3] Trainer log: {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07}[Rank 1] Trainer log: {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07}
8669
+
8670
+ [Rank 2] Trainer log: {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07}
8671
+
8672
+ {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07, 'epoch': 0.9}
8673
+ [Rank 1] Trainer log: {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07}[Rank 3] Trainer log: {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07}[Rank 0] Trainer log: {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07}
8674
+
8675
+
8676
+ [Rank 2] Trainer log: {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07}
8677
+ {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07, 'epoch': 0.9}
8678
+ [Rank 3] Trainer log: {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07}[Rank 1] Trainer log: {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07}
8679
+
8680
+ [Rank 0] Trainer log: {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07}[Rank 2] Trainer log: {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07}
8681
+
8682
+ {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07, 'epoch': 0.9}
8683
+ [Rank 3] Trainer log: {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07}[Rank 2] Trainer log: {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07}
8684
+
8685
+ [Rank 0] Trainer log: {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07}[Rank 1] Trainer log: {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07}
8686
+
8687
+ {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07, 'epoch': 0.9}
8688
+ [Rank 1] Trainer log: {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07}[Rank 3] Trainer log: {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07}
8689
+ [Rank 2] Trainer log: {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07}
8690
+
8691
+ [Rank 0] Trainer log: {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07}
8692
+ {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07, 'epoch': 0.9}
8693
+ [Rank 3] Trainer log: {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07}[Rank 0] Trainer log: {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07}[Rank 1] Trainer log: {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07}
8694
+
8695
+
8696
+ [Rank 2] Trainer log: {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07}
8697
+ {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07, 'epoch': 0.9}
8698
+ [Rank 2] Trainer log: {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07}[Rank 3] Trainer log: {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07}
8699
+
8700
+ [Rank 1] Trainer log: {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07}
8701
+ [Rank 0] Trainer log: {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07}
8702
+ {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07, 'epoch': 0.9}
8703
+ [Rank 0] Trainer log: {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07}[Rank 1] Trainer log: {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07}
8704
+ [Rank 3] Trainer log: {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07}
8705
+
8706
+ [Rank 2] Trainer log: {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07}
8707
+ {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07, 'epoch': 0.9}
8708
+ [Rank 3] Trainer log: {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07}[Rank 0] Trainer log: {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07}
8709
+
8710
+ [Rank 2] Trainer log: {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07}
8711
+ [Rank 1] Trainer log: {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07}
8712
+ {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07, 'epoch': 0.9}
8713
+ [Rank 2] Trainer log: {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07}
8714
+ [Rank 0] Trainer log: {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07}
8715
+ [Rank 1] Trainer log: {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07}
8716
+ [Rank 3] Trainer log: {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07}
8717
+ {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07, 'epoch': 0.9}
8718
+ [Rank 2] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
8719
+ [Rank 3] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}[Rank 0] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
8720
+
8721
+ [Rank 1] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
8722
+ {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07, 'epoch': 0.9}