Training in progress, step 4400
Browse files- adapter_model.safetensors +1 -1
- train.log +125 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1140991056
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:adadadd83a5ad68250f3e1b4189b3e81254529415f9d29dce99337e536692f04
|
| 3 |
size 1140991056
|
train.log
CHANGED
|
@@ -8845,3 +8845,128 @@ Time to load cpu_adam op: 2.2494730949401855 seconds
|
|
| 8845 |
|
| 8846 |
[Rank 0] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
|
| 8847 |
{'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07, 'epoch': 0.9}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8845 |
|
| 8846 |
[Rank 0] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
|
| 8847 |
{'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07, 'epoch': 0.9}
|
| 8848 |
+
[Rank 3] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}[Rank 1] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}
|
| 8849 |
+
|
| 8850 |
+
[Rank 0] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}[Rank 2] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}
|
| 8851 |
+
|
| 8852 |
+
{'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07, 'epoch': 0.9}
|
| 8853 |
+
[Rank 2] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}[Rank 1] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}[Rank 3] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}
|
| 8854 |
+
|
| 8855 |
+
|
| 8856 |
+
[Rank 0] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}
|
| 8857 |
+
{'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07, 'epoch': 0.9}
|
| 8858 |
+
[Rank 1] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}[Rank 3] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
|
| 8859 |
+
|
| 8860 |
+
[Rank 0] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
|
| 8861 |
+
[Rank 2] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
|
| 8862 |
+
{'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07, 'epoch': 0.9}
|
| 8863 |
+
[Rank 1] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}[Rank 3] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}
|
| 8864 |
+
|
| 8865 |
+
[Rank 0] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}[Rank 2] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}
|
| 8866 |
+
|
| 8867 |
+
{'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07, 'epoch': 0.9}
|
| 8868 |
+
[Rank 3] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
|
| 8869 |
+
[Rank 0] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}[Rank 1] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
|
| 8870 |
+
[Rank 2] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
|
| 8871 |
+
|
| 8872 |
+
{'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07, 'epoch': 0.9}
|
| 8873 |
+
[Rank 3] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}[Rank 1] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}[Rank 0] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}
|
| 8874 |
+
|
| 8875 |
+
|
| 8876 |
+
[Rank 2] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}
|
| 8877 |
+
{'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07, 'epoch': 0.9}
|
| 8878 |
+
[Rank 0] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}[Rank 1] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}[Rank 3] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}
|
| 8879 |
+
|
| 8880 |
+
|
| 8881 |
+
[Rank 2] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}
|
| 8882 |
+
{'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07, 'epoch': 0.9}
|
| 8883 |
+
[Rank 3] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}[Rank 1] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}[Rank 0] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}
|
| 8884 |
+
|
| 8885 |
+
|
| 8886 |
+
[Rank 2] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}
|
| 8887 |
+
{'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07, 'epoch': 0.9}
|
| 8888 |
+
[Rank 3] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}
|
| 8889 |
+
[Rank 0] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}[Rank 1] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}[Rank 2] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}
|
| 8890 |
+
|
| 8891 |
+
|
| 8892 |
+
{'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07, 'epoch': 0.9}
|
| 8893 |
+
[Rank 0] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
|
| 8894 |
+
[Rank 1] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
|
| 8895 |
+
[Rank 2] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
|
| 8896 |
+
[Rank 3] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
|
| 8897 |
+
{'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07, 'epoch': 0.9}
|
| 8898 |
+
[Rank 3] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}[Rank 0] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}[Rank 2] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}
|
| 8899 |
+
|
| 8900 |
+
[Rank 1] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}
|
| 8901 |
+
|
| 8902 |
+
{'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07, 'epoch': 0.9}
|
| 8903 |
+
[Rank 3] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}[Rank 1] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
|
| 8904 |
+
|
| 8905 |
+
[Rank 2] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
|
| 8906 |
+
[Rank 0] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
|
| 8907 |
+
{'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07, 'epoch': 0.9}
|
| 8908 |
+
[Rank 2] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
|
| 8909 |
+
[Rank 3] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
|
| 8910 |
+
[Rank 0] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}[Rank 1] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
|
| 8911 |
+
|
| 8912 |
+
{'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07, 'epoch': 0.91}
|
| 8913 |
+
[Rank 3] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}[Rank 0] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
|
| 8914 |
+
[Rank 1] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
|
| 8915 |
+
|
| 8916 |
+
[Rank 2] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
|
| 8917 |
+
{'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07, 'epoch': 0.91}
|
| 8918 |
+
[Rank 0] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
|
| 8919 |
+
[Rank 3] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
|
| 8920 |
+
[Rank 1] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
|
| 8921 |
+
[Rank 2] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
|
| 8922 |
+
{'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07, 'epoch': 0.91}
|
| 8923 |
+
[Rank 1] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}[Rank 0] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
|
| 8924 |
+
[Rank 3] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
|
| 8925 |
+
|
| 8926 |
+
[Rank 2] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
|
| 8927 |
+
{'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07, 'epoch': 0.91}
|
| 8928 |
+
[Rank 0] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}[Rank 3] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
|
| 8929 |
+
[Rank 1] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
|
| 8930 |
+
[Rank 2] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
|
| 8931 |
+
|
| 8932 |
+
{'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07, 'epoch': 0.91}
|
| 8933 |
+
[Rank 0] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}[Rank 3] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}[Rank 1] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}
|
| 8934 |
+
|
| 8935 |
+
|
| 8936 |
+
[Rank 2] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}
|
| 8937 |
+
{'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07, 'epoch': 0.91}
|
| 8938 |
+
[Rank 3] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
|
| 8939 |
+
[Rank 0] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}[Rank 2] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
|
| 8940 |
+
|
| 8941 |
+
[Rank 1] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
|
| 8942 |
+
{'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07, 'epoch': 0.91}
|
| 8943 |
+
[Rank 1] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
|
| 8944 |
+
[Rank 0] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}[Rank 3] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
|
| 8945 |
+
|
| 8946 |
+
[Rank 2] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
|
| 8947 |
+
{'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07, 'epoch': 0.91}
|
| 8948 |
+
[Rank 1] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}[Rank 3] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}[Rank 0] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}
|
| 8949 |
+
|
| 8950 |
+
|
| 8951 |
+
[Rank 2] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}
|
| 8952 |
+
{'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07, 'epoch': 0.91}
|
| 8953 |
+
[Rank 1] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
|
| 8954 |
+
[Rank 0] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}[Rank 3] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
|
| 8955 |
+
[Rank 2] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
|
| 8956 |
+
|
| 8957 |
+
{'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07, 'epoch': 0.91}
|
| 8958 |
+
[Rank 3] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}[Rank 0] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}[Rank 2] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}
|
| 8959 |
+
|
| 8960 |
+
|
| 8961 |
+
[Rank 1] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}
|
| 8962 |
+
{'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07, 'epoch': 0.91}
|
| 8963 |
+
[Rank 2] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}[Rank 1] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}[Rank 3] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}
|
| 8964 |
+
|
| 8965 |
+
|
| 8966 |
+
[Rank 0] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}
|
| 8967 |
+
{'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07, 'epoch': 0.91}
|
| 8968 |
+
[Rank 3] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}[Rank 0] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
|
| 8969 |
+
[Rank 1] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
|
| 8970 |
+
|
| 8971 |
+
[Rank 2] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
|
| 8972 |
+
{'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07, 'epoch': 0.91}
|