aiden200 commited on
Commit
284aab5
·
verified ·
1 Parent(s): bc27afe

Training in progress, step 4400

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. train.log +125 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d72193233033e5c6c0c99c40c7040346b97f0f0198750f409969f2c3d4faa937
3
  size 1140991056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adadadd83a5ad68250f3e1b4189b3e81254529415f9d29dce99337e536692f04
3
  size 1140991056
train.log CHANGED
@@ -8845,3 +8845,128 @@ Time to load cpu_adam op: 2.2494730949401855 seconds
8845
 
8846
  [Rank 0] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
8847
  {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07, 'epoch': 0.9}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8845
 
8846
  [Rank 0] Trainer log: {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07}
8847
  {'loss': 0.8972, 'grad_norm': 2.9750633239746094, 'learning_rate': 5.156881259548363e-07, 'epoch': 0.9}
8848
+ [Rank 3] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}[Rank 1] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}
8849
+
8850
+ [Rank 0] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}[Rank 2] Trainer log: {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07}
8851
+
8852
+ {'loss': 0.9015, 'grad_norm': 5.232257843017578, 'learning_rate': 5.135287689510415e-07, 'epoch': 0.9}
8853
+ [Rank 2] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}[Rank 1] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}[Rank 3] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}
8854
+
8855
+
8856
+ [Rank 0] Trainer log: {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07}
8857
+ {'loss': 0.9241, 'grad_norm': 4.551783084869385, 'learning_rate': 5.113738232615096e-07, 'epoch': 0.9}
8858
+ [Rank 1] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}[Rank 3] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
8859
+
8860
+ [Rank 0] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
8861
+ [Rank 2] Trainer log: {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07}
8862
+ {'loss': 0.912, 'grad_norm': 20.157852172851562, 'learning_rate': 5.092232898883143e-07, 'epoch': 0.9}
8863
+ [Rank 1] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}[Rank 3] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}
8864
+
8865
+ [Rank 0] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}[Rank 2] Trainer log: {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07}
8866
+
8867
+ {'loss': 0.6931, 'grad_norm': 6.071656703948975, 'learning_rate': 5.070771698314758e-07, 'epoch': 0.9}
8868
+ [Rank 3] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
8869
+ [Rank 0] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}[Rank 1] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
8870
+ [Rank 2] Trainer log: {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07}
8871
+
8872
+ {'loss': 0.9125, 'grad_norm': 11.450650215148926, 'learning_rate': 5.04935464088967e-07, 'epoch': 0.9}
8873
+ [Rank 3] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}[Rank 1] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}[Rank 0] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}
8874
+
8875
+
8876
+ [Rank 2] Trainer log: {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07}
8877
+ {'loss': 0.6252, 'grad_norm': 5.621769428253174, 'learning_rate': 5.027981736567012e-07, 'epoch': 0.9}
8878
+ [Rank 0] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}[Rank 1] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}[Rank 3] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}
8879
+
8880
+
8881
+ [Rank 2] Trainer log: {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07}
8882
+ {'loss': 0.7758, 'grad_norm': 2.5873305797576904, 'learning_rate': 5.006652995285433e-07, 'epoch': 0.9}
8883
+ [Rank 3] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}[Rank 1] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}[Rank 0] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}
8884
+
8885
+
8886
+ [Rank 2] Trainer log: {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07}
8887
+ {'loss': 0.7091, 'grad_norm': 6.8832926750183105, 'learning_rate': 4.985368426963044e-07, 'epoch': 0.9}
8888
+ [Rank 3] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}
8889
+ [Rank 0] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}[Rank 1] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}[Rank 2] Trainer log: {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07}
8890
+
8891
+
8892
+ {'loss': 0.8563, 'grad_norm': 6.846954822540283, 'learning_rate': 4.964128041497395e-07, 'epoch': 0.9}
8893
+ [Rank 0] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
8894
+ [Rank 1] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
8895
+ [Rank 2] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
8896
+ [Rank 3] Trainer log: {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07}
8897
+ {'loss': 0.9188, 'grad_norm': 2.3758630752563477, 'learning_rate': 4.942931848765497e-07, 'epoch': 0.9}
8898
+ [Rank 3] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}[Rank 0] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}[Rank 2] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}
8899
+
8900
+ [Rank 1] Trainer log: {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07}
8901
+
8902
+ {'loss': 0.8354, 'grad_norm': 6.890372276306152, 'learning_rate': 4.92177985862382e-07, 'epoch': 0.9}
8903
+ [Rank 3] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}[Rank 1] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
8904
+
8905
+ [Rank 2] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
8906
+ [Rank 0] Trainer log: {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07}
8907
+ {'loss': 0.9419, 'grad_norm': 3.9403789043426514, 'learning_rate': 4.900672080908275e-07, 'epoch': 0.9}
8908
+ [Rank 2] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
8909
+ [Rank 3] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
8910
+ [Rank 0] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}[Rank 1] Trainer log: {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07}
8911
+
8912
+ {'loss': 0.8959, 'grad_norm': 5.921530246734619, 'learning_rate': 4.87960852543421e-07, 'epoch': 0.91}
8913
+ [Rank 3] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}[Rank 0] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
8914
+ [Rank 1] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
8915
+
8916
+ [Rank 2] Trainer log: {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07}
8917
+ {'loss': 1.055, 'grad_norm': 2.1771347522735596, 'learning_rate': 4.858589201996433e-07, 'epoch': 0.91}
8918
+ [Rank 0] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
8919
+ [Rank 3] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
8920
+ [Rank 1] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
8921
+ [Rank 2] Trainer log: {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07}
8922
+ {'loss': 0.8018, 'grad_norm': 9.169463157653809, 'learning_rate': 4.837614120369128e-07, 'epoch': 0.91}
8923
+ [Rank 1] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}[Rank 0] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
8924
+ [Rank 3] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
8925
+
8926
+ [Rank 2] Trainer log: {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07}
8927
+ {'loss': 0.7282, 'grad_norm': 2.8889811038970947, 'learning_rate': 4.816683290305968e-07, 'epoch': 0.91}
8928
+ [Rank 0] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}[Rank 3] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
8929
+ [Rank 1] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
8930
+ [Rank 2] Trainer log: {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07}
8931
+
8932
+ {'loss': 0.5659, 'grad_norm': 1.881841778755188, 'learning_rate': 4.79579672153998e-07, 'epoch': 0.91}
8933
+ [Rank 0] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}[Rank 3] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}[Rank 1] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}
8934
+
8935
+
8936
+ [Rank 2] Trainer log: {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07}
8937
+ {'loss': 0.8104, 'grad_norm': 3.9715282917022705, 'learning_rate': 4.774954423783706e-07, 'epoch': 0.91}
8938
+ [Rank 3] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
8939
+ [Rank 0] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}[Rank 2] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
8940
+
8941
+ [Rank 1] Trainer log: {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07}
8942
+ {'loss': 0.7233, 'grad_norm': 6.943088054656982, 'learning_rate': 4.7541564067290046e-07, 'epoch': 0.91}
8943
+ [Rank 1] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
8944
+ [Rank 0] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}[Rank 3] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
8945
+
8946
+ [Rank 2] Trainer log: {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07}
8947
+ {'loss': 0.7962, 'grad_norm': 6.085477828979492, 'learning_rate': 4.7334026800471945e-07, 'epoch': 0.91}
8948
+ [Rank 1] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}[Rank 3] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}[Rank 0] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}
8949
+
8950
+
8951
+ [Rank 2] Trainer log: {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07}
8952
+ {'loss': 0.6936, 'grad_norm': 3.458329677581787, 'learning_rate': 4.712693253389e-07, 'epoch': 0.91}
8953
+ [Rank 1] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
8954
+ [Rank 0] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}[Rank 3] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
8955
+ [Rank 2] Trainer log: {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07}
8956
+
8957
+ {'loss': 0.6519, 'grad_norm': 4.977847099304199, 'learning_rate': 4.6920281363845297e-07, 'epoch': 0.91}
8958
+ [Rank 3] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}[Rank 0] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}[Rank 2] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}
8959
+
8960
+
8961
+ [Rank 1] Trainer log: {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07}
8962
+ {'loss': 0.9911, 'grad_norm': 5.911740303039551, 'learning_rate': 4.6714073386432745e-07, 'epoch': 0.91}
8963
+ [Rank 2] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}[Rank 1] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}[Rank 3] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}
8964
+
8965
+
8966
+ [Rank 0] Trainer log: {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07}
8967
+ {'loss': 1.0587, 'grad_norm': 3.858769416809082, 'learning_rate': 4.6508308697541525e-07, 'epoch': 0.91}
8968
+ [Rank 3] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}[Rank 0] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
8969
+ [Rank 1] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
8970
+
8971
+ [Rank 2] Trainer log: {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07}
8972
+ {'loss': 0.863, 'grad_norm': 2.815195083618164, 'learning_rate': 4.6302987392854547e-07, 'epoch': 0.91}