Training in progress, step 4400
Browse files- adapter_model.safetensors +1 -1
- train.log +125 -0
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140991056
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adadadd83a5ad68250f3e1b4189b3e81254529415f9d29dce99337e536692f04
|
3 |
size 1140991056
|
train.log
CHANGED
@@ -8845,3 +8845,128 @@ Time to load cpu_adam op: 2.2494730949401855 seconds
|
|
8845 |
|
8846 |
[Rank 0] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
|
8847 |
{'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07, 'epoch': 0.9}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8845 |
|
8846 |
[Rank 0] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
|
8847 |
{'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07, 'epoch': 0.9}
|
8848 |
+
[Rank 3] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}[Rank 1] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}
|
8849 |
+
|
8850 |
+
[Rank 0] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}[Rank 2] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}
|
8851 |
+
|
8852 |
+
{'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07, 'epoch': 0.9}
|
8853 |
+
[Rank 2] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}[Rank 1] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}[Rank 3] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}
|
8854 |
+
|
8855 |
+
|
8856 |
+
[Rank 0] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}
|
8857 |
+
{'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07, 'epoch': 0.9}
|
8858 |
+
[Rank 1] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}[Rank 3] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
|
8859 |
+
|
8860 |
+
[Rank 0] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
|
8861 |
+
[Rank 2] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
|
8862 |
+
{'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07, 'epoch': 0.9}
|
8863 |
+
[Rank 1] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}[Rank 3] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}
|
8864 |
+
|
8865 |
+
[Rank 0] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}[Rank 2] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}
|
8866 |
+
|
8867 |
+
{'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07, 'epoch': 0.9}
|
8868 |
+
[Rank 3] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
|
8869 |
+
[Rank 0] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}[Rank 1] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
|
8870 |
+
[Rank 2] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
|
8871 |
+
|
8872 |
+
{'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07, 'epoch': 0.9}
|
8873 |
+
[Rank 3] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}[Rank 1] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}[Rank 0] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}
|
8874 |
+
|
8875 |
+
|
8876 |
+
[Rank 2] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}
|
8877 |
+
{'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07, 'epoch': 0.9}
|
8878 |
+
[Rank 0] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}[Rank 1] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}[Rank 3] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}
|
8879 |
+
|
8880 |
+
|
8881 |
+
[Rank 2] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}
|
8882 |
+
{'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07, 'epoch': 0.9}
|
8883 |
+
[Rank 3] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}[Rank 1] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}[Rank 0] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}
|
8884 |
+
|
8885 |
+
|
8886 |
+
[Rank 2] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}
|
8887 |
+
{'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07, 'epoch': 0.9}
|
8888 |
+
[Rank 3] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}
|
8889 |
+
[Rank 0] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}[Rank 1] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}[Rank 2] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}
|
8890 |
+
|
8891 |
+
|
8892 |
+
{'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07, 'epoch': 0.9}
|
8893 |
+
[Rank 0] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
|
8894 |
+
[Rank 1] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
|
8895 |
+
[Rank 2] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
|
8896 |
+
[Rank 3] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
|
8897 |
+
{'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07, 'epoch': 0.9}
|
8898 |
+
[Rank 3] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}[Rank 0] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}[Rank 2] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}
|
8899 |
+
|
8900 |
+
[Rank 1] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}
|
8901 |
+
|
8902 |
+
{'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07, 'epoch': 0.9}
|
8903 |
+
[Rank 3] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}[Rank 1] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
|
8904 |
+
|
8905 |
+
[Rank 2] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
|
8906 |
+
[Rank 0] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
|
8907 |
+
{'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07, 'epoch': 0.9}
|
8908 |
+
[Rank 2] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
|
8909 |
+
[Rank 3] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
|
8910 |
+
[Rank 0] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}[Rank 1] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
|
8911 |
+
|
8912 |
+
{'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07, 'epoch': 0.91}
|
8913 |
+
[Rank 3] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}[Rank 0] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
|
8914 |
+
[Rank 1] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
|
8915 |
+
|
8916 |
+
[Rank 2] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
|
8917 |
+
{'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07, 'epoch': 0.91}
|
8918 |
+
[Rank 0] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
|
8919 |
+
[Rank 3] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
|
8920 |
+
[Rank 1] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
|
8921 |
+
[Rank 2] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
|
8922 |
+
{'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07, 'epoch': 0.91}
|
8923 |
+
[Rank 1] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}[Rank 0] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
|
8924 |
+
[Rank 3] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
|
8925 |
+
|
8926 |
+
[Rank 2] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
|
8927 |
+
{'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07, 'epoch': 0.91}
|
8928 |
+
[Rank 0] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}[Rank 3] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
|
8929 |
+
[Rank 1] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
|
8930 |
+
[Rank 2] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
|
8931 |
+
|
8932 |
+
{'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07, 'epoch': 0.91}
|
8933 |
+
[Rank 0] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}[Rank 3] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}[Rank 1] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}
|
8934 |
+
|
8935 |
+
|
8936 |
+
[Rank 2] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}
|
8937 |
+
{'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07, 'epoch': 0.91}
|
8938 |
+
[Rank 3] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
|
8939 |
+
[Rank 0] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}[Rank 2] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
|
8940 |
+
|
8941 |
+
[Rank 1] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
|
8942 |
+
{'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07, 'epoch': 0.91}
|
8943 |
+
[Rank 1] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
|
8944 |
+
[Rank 0] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}[Rank 3] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
|
8945 |
+
|
8946 |
+
[Rank 2] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
|
8947 |
+
{'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07, 'epoch': 0.91}
|
8948 |
+
[Rank 1] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}[Rank 3] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}[Rank 0] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}
|
8949 |
+
|
8950 |
+
|
8951 |
+
[Rank 2] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}
|
8952 |
+
{'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07, 'epoch': 0.91}
|
8953 |
+
[Rank 1] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
|
8954 |
+
[Rank 0] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}[Rank 3] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
|
8955 |
+
[Rank 2] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
|
8956 |
+
|
8957 |
+
{'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07, 'epoch': 0.91}
|
8958 |
+
[Rank 3] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}[Rank 0] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}[Rank 2] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}
|
8959 |
+
|
8960 |
+
|
8961 |
+
[Rank 1] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}
|
8962 |
+
{'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07, 'epoch': 0.91}
|
8963 |
+
[Rank 2] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}[Rank 1] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}[Rank 3] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}
|
8964 |
+
|
8965 |
+
|
8966 |
+
[Rank 0] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}
|
8967 |
+
{'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07, 'epoch': 0.91}
|
8968 |
+
[Rank 3] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}[Rank 0] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
|
8969 |
+
[Rank 1] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
|
8970 |
+
|
8971 |
+
[Rank 2] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
|
8972 |
+
{'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07, 'epoch': 0.91}
|