Training in progress, step 4350
Browse files- adapter_model.safetensors +1 -1
- train.log +125 -0
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140991056
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:002a23ea4ec33d68169cf04be99ea953375df47ab38dedc2e7ebb6618e79408a
|
3 |
size 1140991056
|
train.log
CHANGED
@@ -8595,3 +8595,128 @@ Time to load cpu_adam op: 2.2494730949401855 seconds
|
|
8595 |
[Rank 0] Trainer log: {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07}[Rank 2] Trainer log: {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07}
|
8596 |
|
8597 |
{'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07, 'epoch': 0.89}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8595 |
[Rank 0] Trainer log: {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07}[Rank 2] Trainer log: {'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07}
|
8596 |
|
8597 |
{'loss': 0.7007, 'grad_norm': 5.15255069732666, 'learning_rate': 6.292576566887787e-07, 'epoch': 0.89}
|
8598 |
+
[Rank 0] Trainer log: {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07}[Rank 1] Trainer log: {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07}[Rank 3] Trainer log: {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07}
|
8599 |
+
|
8600 |
+
|
8601 |
+
[Rank 2] Trainer log: {'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07}
|
8602 |
+
{'loss': 0.7336, 'grad_norm': 8.90782642364502, 'learning_rate': 6.268790568208116e-07, 'epoch': 0.89}
|
8603 |
+
[Rank 3] Trainer log: {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07}[Rank 0] Trainer log: {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07}[Rank 1] Trainer log: {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07}
|
8604 |
+
|
8605 |
+
|
8606 |
+
[Rank 2] Trainer log: {'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07}
|
8607 |
+
{'loss': 0.9483, 'grad_norm': 5.516266345977783, 'learning_rate': 6.24504815557967e-07, 'epoch': 0.89}
|
8608 |
+
[Rank 1] Trainer log: {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07}[Rank 3] Trainer log: {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07}
|
8609 |
+
[Rank 2] Trainer log: {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07}
|
8610 |
+
|
8611 |
+
[Rank 0] Trainer log: {'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07}
|
8612 |
+
{'loss': 0.9115, 'grad_norm': 5.219529628753662, 'learning_rate': 6.221349340042937e-07, 'epoch': 0.89}
|
8613 |
+
[Rank 0] Trainer log: {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07}[Rank 1] Trainer log: {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07}
|
8614 |
+
[Rank 3] Trainer log: {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07}
|
8615 |
+
|
8616 |
+
[Rank 2] Trainer log: {'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07}
|
8617 |
+
{'loss': 0.799, 'grad_norm': 3.442587375640869, 'learning_rate': 6.197694132618115e-07, 'epoch': 0.89}
|
8618 |
+
[Rank 3] Trainer log: {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07}[Rank 1] Trainer log: {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07}[Rank 2] Trainer log: {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07}
|
8619 |
+
|
8620 |
+
|
8621 |
+
[Rank 0] Trainer log: {'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07}
|
8622 |
+
{'loss': 0.6708, 'grad_norm': 7.776933193206787, 'learning_rate': 6.174082544305149e-07, 'epoch': 0.89}
|
8623 |
+
[Rank 3] Trainer log: {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07}
|
8624 |
+
[Rank 0] Trainer log: {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07}[Rank 2] Trainer log: {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07}
|
8625 |
+
[Rank 1] Trainer log: {'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07}
|
8626 |
+
|
8627 |
+
{'loss': 0.8275, 'grad_norm': 4.444672107696533, 'learning_rate': 6.1505145860837e-07, 'epoch': 0.89}
|
8628 |
+
[Rank 3] Trainer log: {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07}
|
8629 |
+
[Rank 1] Trainer log: {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07}[Rank 0] Trainer log: {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07}
|
8630 |
+
[Rank 2] Trainer log: {'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07}
|
8631 |
+
|
8632 |
+
{'loss': 0.8794, 'grad_norm': 4.909470558166504, 'learning_rate': 6.126990268913091e-07, 'epoch': 0.89}
|
8633 |
+
[Rank 1] Trainer log: {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07}[Rank 0] Trainer log: {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07}
|
8634 |
+
[Rank 3] Trainer log: {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07}
|
8635 |
+
|
8636 |
+
[Rank 2] Trainer log: {'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07}
|
8637 |
+
{'loss': 0.691, 'grad_norm': 6.546082019805908, 'learning_rate': 6.103509603732416e-07, 'epoch': 0.89}
|
8638 |
+
[Rank 3] Trainer log: {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07}
|
8639 |
+
[Rank 1] Trainer log: {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07}
|
8640 |
+
[Rank 2] Trainer log: {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07}
|
8641 |
+
[Rank 0] Trainer log: {'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07}
|
8642 |
+
{'loss': 0.921, 'grad_norm': 5.066746234893799, 'learning_rate': 6.080072601460451e-07, 'epoch': 0.89}
|
8643 |
+
[Rank 2] Trainer log: {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07}[Rank 3] Trainer log: {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07}[Rank 1] Trainer log: {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07}
|
8644 |
+
|
8645 |
+
|
8646 |
+
[Rank 0] Trainer log: {'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07}
|
8647 |
+
{'loss': 0.7493, 'grad_norm': 9.553936958312988, 'learning_rate': 6.056679272995647e-07, 'epoch': 0.89}
|
8648 |
+
[Rank 0] Trainer log: {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07}[Rank 3] Trainer log: {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07}[Rank 1] Trainer log: {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07}
|
8649 |
+
|
8650 |
+
|
8651 |
+
[Rank 2] Trainer log: {'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07}
|
8652 |
+
{'loss': 0.7182, 'grad_norm': 8.36179256439209, 'learning_rate': 6.033329629216189e-07, 'epoch': 0.89}
|
8653 |
+
[Rank 3] Trainer log: {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07}[Rank 1] Trainer log: {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07}
|
8654 |
+
[Rank 2] Trainer log: {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07}
|
8655 |
+
|
8656 |
+
[Rank 0] Trainer log: {'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07}
|
8657 |
+
{'loss': 0.8551, 'grad_norm': 2.233321189880371, 'learning_rate': 6.010023680979893e-07, 'epoch': 0.89}
|
8658 |
+
[Rank 3] Trainer log: {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07}
|
8659 |
+
[Rank 2] Trainer log: {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07}
|
8660 |
+
[Rank 0] Trainer log: {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07}[Rank 1] Trainer log: {'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07}
|
8661 |
+
|
8662 |
+
{'loss': 0.9446, 'grad_norm': 5.3710737228393555, 'learning_rate': 5.986761439124289e-07, 'epoch': 0.89}
|
8663 |
+
[Rank 3] Trainer log: {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07}
|
8664 |
+
[Rank 0] Trainer log: {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07}[Rank 1] Trainer log: {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07}
|
8665 |
+
|
8666 |
+
[Rank 2] Trainer log: {'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07}
|
8667 |
+
{'loss': 0.8222, 'grad_norm': 5.1239848136901855, 'learning_rate': 5.963542914466569e-07, 'epoch': 0.89}
|
8668 |
+
[Rank 0] Trainer log: {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07}[Rank 3] Trainer log: {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07}[Rank 1] Trainer log: {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07}
|
8669 |
+
|
8670 |
+
[Rank 2] Trainer log: {'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07}
|
8671 |
+
|
8672 |
+
{'loss': 0.9728, 'grad_norm': 2.581252336502075, 'learning_rate': 5.94036811780363e-07, 'epoch': 0.9}
|
8673 |
+
[Rank 1] Trainer log: {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07}[Rank 3] Trainer log: {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07}[Rank 0] Trainer log: {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07}
|
8674 |
+
|
8675 |
+
|
8676 |
+
[Rank 2] Trainer log: {'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07}
|
8677 |
+
{'loss': 0.8425, 'grad_norm': 2.5363917350769043, 'learning_rate': 5.917237059911963e-07, 'epoch': 0.9}
|
8678 |
+
[Rank 3] Trainer log: {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07}[Rank 1] Trainer log: {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07}
|
8679 |
+
|
8680 |
+
[Rank 0] Trainer log: {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07}[Rank 2] Trainer log: {'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07}
|
8681 |
+
|
8682 |
+
{'loss': 0.6119, 'grad_norm': 8.582449913024902, 'learning_rate': 5.894149751547806e-07, 'epoch': 0.9}
|
8683 |
+
[Rank 3] Trainer log: {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07}[Rank 2] Trainer log: {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07}
|
8684 |
+
|
8685 |
+
[Rank 0] Trainer log: {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07}[Rank 1] Trainer log: {'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07}
|
8686 |
+
|
8687 |
+
{'loss': 0.9873, 'grad_norm': 9.198247909545898, 'learning_rate': 5.871106203447019e-07, 'epoch': 0.9}
|
8688 |
+
[Rank 1] Trainer log: {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07}[Rank 3] Trainer log: {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07}
|
8689 |
+
[Rank 2] Trainer log: {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07}
|
8690 |
+
|
8691 |
+
[Rank 0] Trainer log: {'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07}
|
8692 |
+
{'loss': 0.8318, 'grad_norm': 4.501786231994629, 'learning_rate': 5.848106426325095e-07, 'epoch': 0.9}
|
8693 |
+
[Rank 3] Trainer log: {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07}[Rank 0] Trainer log: {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07}[Rank 1] Trainer log: {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07}
|
8694 |
+
|
8695 |
+
|
8696 |
+
[Rank 2] Trainer log: {'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07}
|
8697 |
+
{'loss': 0.7579, 'grad_norm': 4.943948268890381, 'learning_rate': 5.825150430877157e-07, 'epoch': 0.9}
|
8698 |
+
[Rank 2] Trainer log: {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07}[Rank 3] Trainer log: {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07}
|
8699 |
+
|
8700 |
+
[Rank 1] Trainer log: {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07}
|
8701 |
+
[Rank 0] Trainer log: {'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07}
|
8702 |
+
{'loss': 1.0215, 'grad_norm': 2.7522549629211426, 'learning_rate': 5.802238227778045e-07, 'epoch': 0.9}
|
8703 |
+
[Rank 0] Trainer log: {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07}[Rank 1] Trainer log: {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07}
|
8704 |
+
[Rank 3] Trainer log: {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07}
|
8705 |
+
|
8706 |
+
[Rank 2] Trainer log: {'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07}
|
8707 |
+
{'loss': 0.738, 'grad_norm': 4.653674125671387, 'learning_rate': 5.779369827682158e-07, 'epoch': 0.9}
|
8708 |
+
[Rank 3] Trainer log: {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07}[Rank 0] Trainer log: {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07}
|
8709 |
+
|
8710 |
+
[Rank 2] Trainer log: {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07}
|
8711 |
+
[Rank 1] Trainer log: {'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07}
|
8712 |
+
{'loss': 0.7365, 'grad_norm': 4.448984622955322, 'learning_rate': 5.756545241223554e-07, 'epoch': 0.9}
|
8713 |
+
[Rank 2] Trainer log: {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07}
|
8714 |
+
[Rank 0] Trainer log: {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07}
|
8715 |
+
[Rank 1] Trainer log: {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07}
|
8716 |
+
[Rank 3] Trainer log: {'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07}
|
8717 |
+
{'loss': 0.9334, 'grad_norm': 3.2630815505981445, 'learning_rate': 5.733764479015935e-07, 'epoch': 0.9}
|
8718 |
+
[Rank 2] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
|
8719 |
+
[Rank 3] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}[Rank 0] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
|
8720 |
+
|
8721 |
+
[Rank 1] Trainer log: {'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07}
|
8722 |
+
{'loss': 0.6217, 'grad_norm': 7.506957530975342, 'learning_rate': 5.711027551652593e-07, 'epoch': 0.9}
|