sandernotenbaert commited on
Commit
f2bdb0a
·
verified ·
1 Parent(s): 31ff66e

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7066ad238f161a22d0683e51ba0e6258706bc9aa90c5787542555b1a4a97128d
3
  size 141303176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee9cd99fd4e0bce962b6a15a1efc71256eb22dd10414b17bbba9e4d7ba5afef6
3
  size 141303176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:163c91ff02e2ddb5dacee5972ca6bb5038e19cbb07bed2cea725f9d8a4a6a70f
3
  size 282653387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99dda54b707289404dfd51e3037728795d5829b695ffde00ebd856d63ac56334
3
  size 282653387
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b652c6269b998b96ab924b2734c0818fab436c642524e13fc6cd4d9082e62b5
3
  size 14455
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a86db31fed9ea7cb26ae0c97afab5343450ff022d57fcc9367fce247f2ff49e
3
  size 14455
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77fc95ff7d80af03c17939ca85b9568e652a4cf9d2e9aaf323a942fee4510f31
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e60c4242800a43debb114b88afb278b08fae0786db1dbcddf2d0081073c925c
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0805199801384049,
6
  "eval_steps": 500,
7
- "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -710,6 +710,84 @@
710
  "eval_samples_per_second": 10.826,
711
  "eval_steps_per_second": 2.707,
712
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
  }
714
  ],
715
  "logging_steps": 50,
@@ -729,7 +807,7 @@
729
  "attributes": {}
730
  }
731
  },
732
- "total_flos": 1.6735071633408e+16,
733
  "train_batch_size": 4,
734
  "trial_name": null,
735
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08946664459822766,
6
  "eval_steps": 500,
7
+ "global_step": 5000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
710
  "eval_samples_per_second": 10.826,
711
  "eval_steps_per_second": 2.707,
712
  "step": 4500
713
+ },
714
+ {
715
+ "epoch": 0.08141464658438717,
716
+ "grad_norm": 0.6884807348251343,
717
+ "learning_rate": 4.521868787276342e-05,
718
+ "loss": 0.5328,
719
+ "step": 4550
720
+ },
721
+ {
722
+ "epoch": 0.08230931303036945,
723
+ "grad_norm": 0.8444722890853882,
724
+ "learning_rate": 4.5715705765407554e-05,
725
+ "loss": 0.5296,
726
+ "step": 4600
727
+ },
728
+ {
729
+ "epoch": 0.08320397947635173,
730
+ "grad_norm": 0.7077602744102478,
731
+ "learning_rate": 4.621272365805169e-05,
732
+ "loss": 0.5303,
733
+ "step": 4650
734
+ },
735
+ {
736
+ "epoch": 0.084098645922334,
737
+ "grad_norm": 0.6824318766593933,
738
+ "learning_rate": 4.670974155069583e-05,
739
+ "loss": 0.5311,
740
+ "step": 4700
741
+ },
742
+ {
743
+ "epoch": 0.08499331236831628,
744
+ "grad_norm": 0.8206039071083069,
745
+ "learning_rate": 4.720675944333996e-05,
746
+ "loss": 0.5283,
747
+ "step": 4750
748
+ },
749
+ {
750
+ "epoch": 0.08588797881429856,
751
+ "grad_norm": 0.7358501553535461,
752
+ "learning_rate": 4.7703777335984095e-05,
753
+ "loss": 0.5325,
754
+ "step": 4800
755
+ },
756
+ {
757
+ "epoch": 0.08678264526028083,
758
+ "grad_norm": 0.5400606393814087,
759
+ "learning_rate": 4.8200795228628234e-05,
760
+ "loss": 0.5271,
761
+ "step": 4850
762
+ },
763
+ {
764
+ "epoch": 0.08767731170626311,
765
+ "grad_norm": 0.6589324474334717,
766
+ "learning_rate": 4.8697813121272365e-05,
767
+ "loss": 0.5265,
768
+ "step": 4900
769
+ },
770
+ {
771
+ "epoch": 0.08857197815224539,
772
+ "grad_norm": 0.6744178533554077,
773
+ "learning_rate": 4.9194831013916504e-05,
774
+ "loss": 0.5186,
775
+ "step": 4950
776
+ },
777
+ {
778
+ "epoch": 0.08946664459822766,
779
+ "grad_norm": 0.7791246771812439,
780
+ "learning_rate": 4.969184890656064e-05,
781
+ "loss": 0.5273,
782
+ "step": 5000
783
+ },
784
+ {
785
+ "epoch": 0.08946664459822766,
786
+ "eval_loss": 0.503763735294342,
787
+ "eval_runtime": 833.0418,
788
+ "eval_samples_per_second": 10.843,
789
+ "eval_steps_per_second": 2.712,
790
+ "step": 5000
791
  }
792
  ],
793
  "logging_steps": 50,
 
807
  "attributes": {}
808
  }
809
  },
810
+ "total_flos": 1.859452403712e+16,
811
  "train_batch_size": 4,
812
  "trial_name": null,
813
  "trial_params": null