sandernotenbaert commited on
Commit
0b38d0e
·
verified ·
1 Parent(s): 9185d76

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04e15b8643b4c494ce4eb0063c82104b4508cb5e6567ed58c27b238b56f6c53
3
  size 141303176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7066ad238f161a22d0683e51ba0e6258706bc9aa90c5787542555b1a4a97128d
3
  size 141303176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a500937edc6135ea093707089bbb73d696f423887f0a26cbb5eaf549882a8e4
3
  size 282653387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:163c91ff02e2ddb5dacee5972ca6bb5038e19cbb07bed2cea725f9d8a4a6a70f
3
  size 282653387
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74b5a01f40080db725add4e3d836250ec8c7db0a9e994d36deb55546885f2ea9
3
  size 14455
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b652c6269b998b96ab924b2734c0818fab436c642524e13fc6cd4d9082e62b5
3
  size 14455
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e2af7e9421c7a30caace99544c124328400803d9b4a09499769e6d71fad6ae9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77fc95ff7d80af03c17939ca85b9568e652a4cf9d2e9aaf323a942fee4510f31
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.07157331567858213,
6
  "eval_steps": 500,
7
- "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -632,6 +632,84 @@
632
  "eval_samples_per_second": 10.579,
633
  "eval_steps_per_second": 2.646,
634
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  }
636
  ],
637
  "logging_steps": 50,
@@ -651,7 +729,7 @@
651
  "attributes": {}
652
  }
653
  },
654
- "total_flos": 1.4875619229696e+16,
655
  "train_batch_size": 4,
656
  "trial_name": null,
657
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0805199801384049,
6
  "eval_steps": 500,
7
+ "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
632
  "eval_samples_per_second": 10.579,
633
  "eval_steps_per_second": 2.646,
634
  "step": 4000
635
+ },
636
+ {
637
+ "epoch": 0.07246798212456441,
638
+ "grad_norm": 0.6943932175636292,
639
+ "learning_rate": 4.024850894632207e-05,
640
+ "loss": 0.5381,
641
+ "step": 4050
642
+ },
643
+ {
644
+ "epoch": 0.07336264857054668,
645
+ "grad_norm": 0.7221015095710754,
646
+ "learning_rate": 4.07455268389662e-05,
647
+ "loss": 0.5521,
648
+ "step": 4100
649
+ },
650
+ {
651
+ "epoch": 0.07425731501652896,
652
+ "grad_norm": 0.6903452277183533,
653
+ "learning_rate": 4.124254473161034e-05,
654
+ "loss": 0.5389,
655
+ "step": 4150
656
+ },
657
+ {
658
+ "epoch": 0.07515198146251124,
659
+ "grad_norm": 0.7749171853065491,
660
+ "learning_rate": 4.173956262425447e-05,
661
+ "loss": 0.539,
662
+ "step": 4200
663
+ },
664
+ {
665
+ "epoch": 0.07604664790849351,
666
+ "grad_norm": 0.7912935614585876,
667
+ "learning_rate": 4.223658051689861e-05,
668
+ "loss": 0.5363,
669
+ "step": 4250
670
+ },
671
+ {
672
+ "epoch": 0.07694131435447579,
673
+ "grad_norm": 0.7532743811607361,
674
+ "learning_rate": 4.273359840954275e-05,
675
+ "loss": 0.5376,
676
+ "step": 4300
677
+ },
678
+ {
679
+ "epoch": 0.07783598080045807,
680
+ "grad_norm": 0.6006184220314026,
681
+ "learning_rate": 4.323061630218688e-05,
682
+ "loss": 0.5348,
683
+ "step": 4350
684
+ },
685
+ {
686
+ "epoch": 0.07873064724644034,
687
+ "grad_norm": 0.6265826225280762,
688
+ "learning_rate": 4.372763419483102e-05,
689
+ "loss": 0.5346,
690
+ "step": 4400
691
+ },
692
+ {
693
+ "epoch": 0.07962531369242262,
694
+ "grad_norm": 0.6724209189414978,
695
+ "learning_rate": 4.422465208747515e-05,
696
+ "loss": 0.5312,
697
+ "step": 4450
698
+ },
699
+ {
700
+ "epoch": 0.0805199801384049,
701
+ "grad_norm": 0.8128370046615601,
702
+ "learning_rate": 4.472166998011928e-05,
703
+ "loss": 0.5365,
704
+ "step": 4500
705
+ },
706
+ {
707
+ "epoch": 0.0805199801384049,
708
+ "eval_loss": 0.509487509727478,
709
+ "eval_runtime": 834.3525,
710
+ "eval_samples_per_second": 10.826,
711
+ "eval_steps_per_second": 2.707,
712
+ "step": 4500
713
  }
714
  ],
715
  "logging_steps": 50,
 
729
  "attributes": {}
730
  }
731
  },
732
+ "total_flos": 1.6735071633408e+16,
733
  "train_batch_size": 4,
734
  "trial_name": null,
735
  "trial_params": null