sandernotenbaert commited on
Commit
e1c8f51
·
verified ·
1 Parent(s): 6c75a2d

Training in progress, step 1900, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ffea88dc725e20729d532103e571a95a4de3551370932d9d4e61da3ea133608
3
  size 66690264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b697653fb1fadb3338a18ba8ca2e508da3603eebebb92cd2d4d0a19bf2b728f
3
  size 66690264
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f7af3b482cdc49aae6cc245c60e2136652a9553b92d1c5fe46630580e99a200
3
  size 133393631
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741805ded654520994730dcf337a922e9f9e0b80f29b4d60e7af05b2ee502050
3
  size 133393631
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45acb58335aba7afa287af2edce6d3a48d6f29a6175f1f95ebd9de42d1629344
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ac6cc9f96cb23f38dad14b3c8b94857940a2c54443bb178825363cab7260c0
3
  size 13990
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d20175a7ce58b36f63cec288bb82ff2aed63eaabfb6a48072fbb4e518a9945a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2375bb704062abe009d16c81c5208b4a77486ea2b7e1b6be5a4f624a4401587f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1000,
3
  "best_metric": 9.911575317382812,
4
  "best_model_checkpoint": "./models/v-001/checkpoint-1000",
5
- "epoch": 51.65217391304348,
6
  "eval_steps": 100,
7
- "global_step": 1600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -712,6 +712,138 @@
712
  "eval_samples_per_second": 9.516,
713
  "eval_steps_per_second": 0.599,
714
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  }
716
  ],
717
  "logging_steps": 20,
@@ -731,7 +863,7 @@
731
  "attributes": {}
732
  }
733
  },
734
- "total_flos": 2088670266432000.0,
735
  "train_batch_size": 8,
736
  "trial_name": null,
737
  "trial_params": null
 
2
  "best_global_step": 1000,
3
  "best_metric": 9.911575317382812,
4
  "best_model_checkpoint": "./models/v-001/checkpoint-1000",
5
+ "epoch": 61.32608695652174,
6
  "eval_steps": 100,
7
+ "global_step": 1900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
712
  "eval_samples_per_second": 9.516,
713
  "eval_steps_per_second": 0.599,
714
  "step": 1600
715
+ },
716
+ {
717
+ "epoch": 52.29347826086956,
718
+ "grad_norm": 0.8501729369163513,
719
+ "learning_rate": 1.7187640879434553e-05,
720
+ "loss": 3.7589,
721
+ "step": 1620
722
+ },
723
+ {
724
+ "epoch": 52.94565217391305,
725
+ "grad_norm": 0.8114346861839294,
726
+ "learning_rate": 1.552805187300389e-05,
727
+ "loss": 3.8172,
728
+ "step": 1640
729
+ },
730
+ {
731
+ "epoch": 53.58695652173913,
732
+ "grad_norm": 0.8227590918540955,
733
+ "learning_rate": 1.3937884782483484e-05,
734
+ "loss": 3.6933,
735
+ "step": 1660
736
+ },
737
+ {
738
+ "epoch": 54.22826086956522,
739
+ "grad_norm": 0.7768607139587402,
740
+ "learning_rate": 1.242034199277008e-05,
741
+ "loss": 3.8079,
742
+ "step": 1680
743
+ },
744
+ {
745
+ "epoch": 54.880434782608695,
746
+ "grad_norm": 0.8110019564628601,
747
+ "learning_rate": 1.097847963308351e-05,
748
+ "loss": 3.681,
749
+ "step": 1700
750
+ },
751
+ {
752
+ "epoch": 54.880434782608695,
753
+ "eval_accuracy": 0.0008273447429635729,
754
+ "eval_loss": 10.178533554077148,
755
+ "eval_runtime": 31.6166,
756
+ "eval_samples_per_second": 9.552,
757
+ "eval_steps_per_second": 0.601,
758
+ "step": 1700
759
+ },
760
+ {
761
+ "epoch": 55.52173913043478,
762
+ "grad_norm": 0.8320772647857666,
763
+ "learning_rate": 9.615201422329406e-06,
764
+ "loss": 3.6494,
765
+ "step": 1720
766
+ },
767
+ {
768
+ "epoch": 56.16304347826087,
769
+ "grad_norm": 0.7713989019393921,
770
+ "learning_rate": 8.333252821395526e-06,
771
+ "loss": 3.7021,
772
+ "step": 1740
773
+ },
774
+ {
775
+ "epoch": 56.81521739130435,
776
+ "grad_norm": 0.7743974924087524,
777
+ "learning_rate": 7.135215504159115e-06,
778
+ "loss": 3.7404,
779
+ "step": 1760
780
+ },
781
+ {
782
+ "epoch": 57.45652173913044,
783
+ "grad_norm": 0.7438375353813171,
784
+ "learning_rate": 6.023502158339078e-06,
785
+ "loss": 3.6467,
786
+ "step": 1780
787
+ },
788
+ {
789
+ "epoch": 58.09782608695652,
790
+ "grad_norm": 0.8585782051086426,
791
+ "learning_rate": 5.000351626664207e-06,
792
+ "loss": 3.688,
793
+ "step": 1800
794
+ },
795
+ {
796
+ "epoch": 58.09782608695652,
797
+ "eval_accuracy": 0.0008273447429635729,
798
+ "eval_loss": 10.187094688415527,
799
+ "eval_runtime": 31.7031,
800
+ "eval_samples_per_second": 9.526,
801
+ "eval_steps_per_second": 0.599,
802
+ "step": 1800
803
+ },
804
+ {
805
+ "epoch": 58.75,
806
+ "grad_norm": 0.7883967161178589,
807
+ "learning_rate": 4.067824398141701e-06,
808
+ "loss": 3.6471,
809
+ "step": 1820
810
+ },
811
+ {
812
+ "epoch": 59.391304347826086,
813
+ "grad_norm": 0.7768418192863464,
814
+ "learning_rate": 3.2277984585066366e-06,
815
+ "loss": 3.6824,
816
+ "step": 1840
817
+ },
818
+ {
819
+ "epoch": 60.03260869565217,
820
+ "grad_norm": 0.7814875245094299,
821
+ "learning_rate": 2.4819655082085835e-06,
822
+ "loss": 3.6767,
823
+ "step": 1860
824
+ },
825
+ {
826
+ "epoch": 60.68478260869565,
827
+ "grad_norm": 0.8055542707443237,
828
+ "learning_rate": 1.8318275555520237e-06,
829
+ "loss": 3.5899,
830
+ "step": 1880
831
+ },
832
+ {
833
+ "epoch": 61.32608695652174,
834
+ "grad_norm": 0.8155117630958557,
835
+ "learning_rate": 1.2786938918515568e-06,
836
+ "loss": 3.6685,
837
+ "step": 1900
838
+ },
839
+ {
840
+ "epoch": 61.32608695652174,
841
+ "eval_accuracy": 0.0008233479084565024,
842
+ "eval_loss": 10.19116497039795,
843
+ "eval_runtime": 36.7079,
844
+ "eval_samples_per_second": 8.227,
845
+ "eval_steps_per_second": 0.518,
846
+ "step": 1900
847
  }
848
  ],
849
  "logging_steps": 20,
 
863
  "attributes": {}
864
  }
865
  },
866
+ "total_flos": 2479524451392000.0,
867
  "train_batch_size": 8,
868
  "trial_name": null,
869
  "trial_params": null