souging commited on
Commit
f4aca0a
·
verified ·
1 Parent(s): 9f80601

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a29ac7ba6f53f92462f92ab317dac22f616978288aa0a0bf5eb7b73a93e8623b
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff84ff1b2afe3c81648f69c662425e5133939e807416e00c8d1b88dd05b828ee
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87a7f4d414d1b3ef9f1439d2a5c336e25a673350c1f7c1a36bae8691277656ef
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e34e4100b5f60fc0bc15eaee4d6276d4e620ba5bfdf7a6a550ec8399c77cd5
3
  size 49846644
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b34c69f6d6c32635ff50b0a1faed18792675796ed61b4917357f7fa723205e2f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed20ba68feaa8e3d536e2039713340ed613cdb4247d843b649089065ce841e9
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f00b65fe360af6cac79aca5d512b7d43a8aea8109a2419986cb6ec4493d90572
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f095da1de3720c4e8b58cb81dca9e1af4e0f6355ea16173b6c815307c3df5b3
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3338a4e0a24655344a2e4fe71d8047a4351bfcfb4de0042932ed9ef74e6a9a04
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b17c7c306ef73d802004550d50a4263de1aea4f38798606e532d283cc52d5499
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a28ffcb98b6e03917e67aa0d155cd2bcb5bba7adff5ab9f65b2a312750f07526
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:336df95e2b6b6d4f80034891f2f2867ce34241dc9a53efc7bdcba0fceba4bb0f
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f34acd8813d6b9c98be80489c6f644fc7604245d4167815c19292650ca21464
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96e143ad827376f99ce0dd7da519bd5666cb85c9dfcd6fffd79ebe3195919c83
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359fcea393c9e357ac9329090b6808db28e869fcad41f7d4e9d5f4ea556aac70
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5725c1870961018862c368a373dab7c0f2aedede8c986506b38291d26b170bc
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9ec9b30c1aa25c7c28c666461ba2a0d9c5781d925d9a3f624531498737156b1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63227904d9c4715c227588ec3c511ae61e475d692c6bb4f2db8cc9e3a7643d3f
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b73a0419b8ce412c9ab0e7b0fff0b7e12dd9a30c2645954ee9b29ede96197d8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5b6ebc45a3c99a84ff77360ea3679108eed2a18f52093642827fbeac96f83d
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3b5e41a3049ed9c6f775655d186804bf7846bdec4fbbd638a092baab2602b8a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ac31dcfba988823188c44099086577dd09e162577217892c3f4fd5ad8489c2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.99812382739212,
5
  "eval_steps": 500,
6
- "global_step": 666,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4669,6 +4669,244 @@
4669
  "learning_rate": 1.59624359510657e-06,
4670
  "loss": 0.8788,
4671
  "step": 666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4672
  }
4673
  ],
4674
  "logging_steps": 1,
@@ -4683,12 +4921,12 @@
4683
  "should_evaluate": false,
4684
  "should_log": false,
4685
  "should_save": true,
4686
- "should_training_stop": false
4687
  },
4688
  "attributes": {}
4689
  }
4690
  },
4691
- "total_flos": 1.4625303974966723e+18,
4692
  "train_batch_size": 6,
4693
  "trial_name": null,
4694
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.25328330206379,
5
  "eval_steps": 500,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4669
  "learning_rate": 1.59624359510657e-06,
4670
  "loss": 0.8788,
4671
  "step": 666
4672
+ },
4673
+ {
4674
+ "epoch": 5.00562851782364,
4675
+ "grad_norm": 0.5438546538352966,
4676
+ "learning_rate": 1.5039580583678393e-06,
4677
+ "loss": 1.7329,
4678
+ "step": 667
4679
+ },
4680
+ {
4681
+ "epoch": 5.01313320825516,
4682
+ "grad_norm": 0.2500527501106262,
4683
+ "learning_rate": 1.414400255879008e-06,
4684
+ "loss": 1.012,
4685
+ "step": 668
4686
+ },
4687
+ {
4688
+ "epoch": 5.0206378986866795,
4689
+ "grad_norm": 0.2738587558269501,
4690
+ "learning_rate": 1.327572642912468e-06,
4691
+ "loss": 1.1039,
4692
+ "step": 669
4693
+ },
4694
+ {
4695
+ "epoch": 5.028142589118199,
4696
+ "grad_norm": 0.2560110092163086,
4697
+ "learning_rate": 1.2434775998910964e-06,
4698
+ "loss": 1.027,
4699
+ "step": 670
4700
+ },
4701
+ {
4702
+ "epoch": 5.0356472795497185,
4703
+ "grad_norm": 0.2611444890499115,
4704
+ "learning_rate": 1.1621174323229612e-06,
4705
+ "loss": 1.0072,
4706
+ "step": 671
4707
+ },
4708
+ {
4709
+ "epoch": 5.043151969981238,
4710
+ "grad_norm": 0.24755235016345978,
4711
+ "learning_rate": 1.0834943707381784e-06,
4712
+ "loss": 0.9297,
4713
+ "step": 672
4714
+ },
4715
+ {
4716
+ "epoch": 5.050656660412758,
4717
+ "grad_norm": 0.2472180277109146,
4718
+ "learning_rate": 1.0076105706276888e-06,
4719
+ "loss": 0.8869,
4720
+ "step": 673
4721
+ },
4722
+ {
4723
+ "epoch": 5.058161350844277,
4724
+ "grad_norm": 0.2653760313987732,
4725
+ "learning_rate": 9.344681123841967e-07,
4726
+ "loss": 0.8558,
4727
+ "step": 674
4728
+ },
4729
+ {
4730
+ "epoch": 5.065666041275797,
4731
+ "grad_norm": 0.2679085433483124,
4732
+ "learning_rate": 8.640690012451515e-07,
4733
+ "loss": 1.1758,
4734
+ "step": 675
4735
+ },
4736
+ {
4737
+ "epoch": 5.073170731707317,
4738
+ "grad_norm": 0.23908266425132751,
4739
+ "learning_rate": 7.964151672377458e-07,
4740
+ "loss": 0.8884,
4741
+ "step": 676
4742
+ },
4743
+ {
4744
+ "epoch": 5.080675422138837,
4745
+ "grad_norm": 0.2759111225605011,
4746
+ "learning_rate": 7.315084651260009e-07,
4747
+ "loss": 1.1085,
4748
+ "step": 677
4749
+ },
4750
+ {
4751
+ "epoch": 5.088180112570356,
4752
+ "grad_norm": 0.2899288833141327,
4753
+ "learning_rate": 6.69350674359959e-07,
4754
+ "loss": 1.1352,
4755
+ "step": 678
4756
+ },
4757
+ {
4758
+ "epoch": 5.095684803001876,
4759
+ "grad_norm": 0.27089551091194153,
4760
+ "learning_rate": 6.099434990268609e-07,
4761
+ "loss": 0.9914,
4762
+ "step": 679
4763
+ },
4764
+ {
4765
+ "epoch": 5.103189493433396,
4766
+ "grad_norm": 0.29111695289611816,
4767
+ "learning_rate": 5.532885678043977e-07,
4768
+ "loss": 0.971,
4769
+ "step": 680
4770
+ },
4771
+ {
4772
+ "epoch": 5.110694183864916,
4773
+ "grad_norm": 0.2626079022884369,
4774
+ "learning_rate": 4.9938743391615e-07,
4775
+ "loss": 0.9628,
4776
+ "step": 681
4777
+ },
4778
+ {
4779
+ "epoch": 5.118198874296436,
4780
+ "grad_norm": 0.27037861943244934,
4781
+ "learning_rate": 4.482415750889204e-07,
4782
+ "loss": 1.0561,
4783
+ "step": 682
4784
+ },
4785
+ {
4786
+ "epoch": 5.125703564727955,
4787
+ "grad_norm": 0.2747006118297577,
4788
+ "learning_rate": 3.998523935122772e-07,
4789
+ "loss": 1.0828,
4790
+ "step": 683
4791
+ },
4792
+ {
4793
+ "epoch": 5.133208255159475,
4794
+ "grad_norm": 0.2585286498069763,
4795
+ "learning_rate": 3.5422121580005864e-07,
4796
+ "loss": 0.9272,
4797
+ "step": 684
4798
+ },
4799
+ {
4800
+ "epoch": 5.140712945590995,
4801
+ "grad_norm": 0.2688470184803009,
4802
+ "learning_rate": 3.1134929295407564e-07,
4803
+ "loss": 1.0374,
4804
+ "step": 685
4805
+ },
4806
+ {
4807
+ "epoch": 5.1482176360225145,
4808
+ "grad_norm": 0.25770020484924316,
4809
+ "learning_rate": 2.7123780032973235e-07,
4810
+ "loss": 0.9986,
4811
+ "step": 686
4812
+ },
4813
+ {
4814
+ "epoch": 5.1557223264540335,
4815
+ "grad_norm": 0.25769320130348206,
4816
+ "learning_rate": 2.3388783760386601e-07,
4817
+ "loss": 1.0169,
4818
+ "step": 687
4819
+ },
4820
+ {
4821
+ "epoch": 5.163227016885553,
4822
+ "grad_norm": 0.29741230607032776,
4823
+ "learning_rate": 1.9930042874457254e-07,
4824
+ "loss": 1.0703,
4825
+ "step": 688
4826
+ },
4827
+ {
4828
+ "epoch": 5.170731707317073,
4829
+ "grad_norm": 0.2753787636756897,
4830
+ "learning_rate": 1.6747652198313957e-07,
4831
+ "loss": 1.0371,
4832
+ "step": 689
4833
+ },
4834
+ {
4835
+ "epoch": 5.178236397748593,
4836
+ "grad_norm": 0.2647511959075928,
4837
+ "learning_rate": 1.3841698978804285e-07,
4838
+ "loss": 1.0168,
4839
+ "step": 690
4840
+ },
4841
+ {
4842
+ "epoch": 5.185741088180112,
4843
+ "grad_norm": 0.27614572644233704,
4844
+ "learning_rate": 1.1212262884103974e-07,
4845
+ "loss": 1.0725,
4846
+ "step": 691
4847
+ },
4848
+ {
4849
+ "epoch": 5.193245778611632,
4850
+ "grad_norm": 0.2685404121875763,
4851
+ "learning_rate": 8.85941600153033e-08,
4852
+ "loss": 1.0614,
4853
+ "step": 692
4854
+ },
4855
+ {
4856
+ "epoch": 5.200750469043152,
4857
+ "grad_norm": 0.2600098252296448,
4858
+ "learning_rate": 6.783222835572055e-08,
4859
+ "loss": 1.0501,
4860
+ "step": 693
4861
+ },
4862
+ {
4863
+ "epoch": 5.208255159474672,
4864
+ "grad_norm": 0.25625452399253845,
4865
+ "learning_rate": 4.98374030611084e-08,
4866
+ "loss": 0.9857,
4867
+ "step": 694
4868
+ },
4869
+ {
4870
+ "epoch": 5.215759849906191,
4871
+ "grad_norm": 0.26039209961891174,
4872
+ "learning_rate": 3.461017746871675e-08,
4873
+ "loss": 1.017,
4874
+ "step": 695
4875
+ },
4876
+ {
4877
+ "epoch": 5.223264540337711,
4878
+ "grad_norm": 0.2616319954395294,
4879
+ "learning_rate": 2.215096904060454e-08,
4880
+ "loss": 1.0131,
4881
+ "step": 696
4882
+ },
4883
+ {
4884
+ "epoch": 5.230769230769231,
4885
+ "grad_norm": 0.264681875705719,
4886
+ "learning_rate": 1.246011935228064e-08,
4887
+ "loss": 1.0703,
4888
+ "step": 697
4889
+ },
4890
+ {
4891
+ "epoch": 5.238273921200751,
4892
+ "grad_norm": 0.26076602935791016,
4893
+ "learning_rate": 5.537894083273543e-09,
4894
+ "loss": 0.9102,
4895
+ "step": 698
4896
+ },
4897
+ {
4898
+ "epoch": 5.24577861163227,
4899
+ "grad_norm": 0.25187480449676514,
4900
+ "learning_rate": 1.384483009898796e-09,
4901
+ "loss": 0.9244,
4902
+ "step": 699
4903
+ },
4904
+ {
4905
+ "epoch": 5.25328330206379,
4906
+ "grad_norm": 0.2918677031993866,
4907
+ "learning_rate": 0.0,
4908
+ "loss": 1.1823,
4909
+ "step": 700
4910
  }
4911
  ],
4912
  "logging_steps": 1,
 
4921
  "should_evaluate": false,
4922
  "should_log": false,
4923
  "should_save": true,
4924
+ "should_training_stop": true
4925
  },
4926
  "attributes": {}
4927
  }
4928
  },
4929
+ "total_flos": 1.5363404536186143e+18,
4930
  "train_batch_size": 6,
4931
  "trial_name": null,
4932
  "trial_params": null