0x1202 commited on
Commit
31926dd
·
verified ·
1 Parent(s): ce9d980

Training in progress, step 1050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:686c2baefc99317843ca16b54b88b2e76160991d944cc00803f7f08a289215cb
3
  size 389074464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59266812baaf7d8e90c63f0bb7ca8c79f2b5b010097451c73c885e1f300daf35
3
  size 389074464
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb850aa633249af1404c19533fb60dabbd3a6700aa8b9a66912558b1ed529914
3
  size 198011700
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a927c375ee983878c51e7210ed2422e48b31710cb736d343bcb2185df58c66ae
3
  size 198011700
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac891541b8baaf3cdb8037980b509b8620ed381d4ed63ca08ac8517d1928ff03
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de91cf14d0d38fb6339c62deb3669ffdf5c3c5a71414c1585f253bcb3233eae
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f791a846752ffe0fc581ae9fe60078e654108249fdbba946c80897dc152a53ff
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88236662c0aa97434d741d8e87aa4b49487e3d51a7d8edb811a91fbf0268db4c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8035673499107361,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-900",
4
- "epoch": 0.011964067916025537,
5
  "eval_steps": 150,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -693,6 +693,119 @@
693
  "eval_samples_per_second": 43.135,
694
  "eval_steps_per_second": 10.784,
695
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
  }
697
  ],
698
  "logging_steps": 10,
@@ -721,7 +834,7 @@
721
  "attributes": {}
722
  }
723
  },
724
- "total_flos": 2.589208946493358e+17,
725
  "train_batch_size": 8,
726
  "trial_name": null,
727
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7980687022209167,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1050",
4
+ "epoch": 0.013958079235363127,
5
  "eval_steps": 150,
6
+ "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
693
  "eval_samples_per_second": 43.135,
694
  "eval_steps_per_second": 10.784,
695
  "step": 900
696
+ },
697
+ {
698
+ "epoch": 0.012097002003981377,
699
+ "grad_norm": 0.820439338684082,
700
+ "learning_rate": 9.106417448664306e-05,
701
+ "loss": 0.8856,
702
+ "step": 910
703
+ },
704
+ {
705
+ "epoch": 0.012229936091937215,
706
+ "grad_norm": 1.0313884019851685,
707
+ "learning_rate": 9.086176605071805e-05,
708
+ "loss": 0.8814,
709
+ "step": 920
710
+ },
711
+ {
712
+ "epoch": 0.012362870179893055,
713
+ "grad_norm": 1.2534911632537842,
714
+ "learning_rate": 9.06573210649077e-05,
715
+ "loss": 0.8069,
716
+ "step": 930
717
+ },
718
+ {
719
+ "epoch": 0.012495804267848893,
720
+ "grad_norm": 1.3286960124969482,
721
+ "learning_rate": 9.045084971874738e-05,
722
+ "loss": 0.7651,
723
+ "step": 940
724
+ },
725
+ {
726
+ "epoch": 0.012628738355804733,
727
+ "grad_norm": 3.241807460784912,
728
+ "learning_rate": 9.024236230276629e-05,
729
+ "loss": 0.8244,
730
+ "step": 950
731
+ },
732
+ {
733
+ "epoch": 0.012761672443760572,
734
+ "grad_norm": 0.8718632459640503,
735
+ "learning_rate": 9.003186920797452e-05,
736
+ "loss": 0.8792,
737
+ "step": 960
738
+ },
739
+ {
740
+ "epoch": 0.012894606531716412,
741
+ "grad_norm": 0.9083179235458374,
742
+ "learning_rate": 8.981938092534517e-05,
743
+ "loss": 0.8644,
744
+ "step": 970
745
+ },
746
+ {
747
+ "epoch": 0.013027540619672252,
748
+ "grad_norm": 1.1338489055633545,
749
+ "learning_rate": 8.960490804529144e-05,
750
+ "loss": 0.8368,
751
+ "step": 980
752
+ },
753
+ {
754
+ "epoch": 0.01316047470762809,
755
+ "grad_norm": 1.466839075088501,
756
+ "learning_rate": 8.938846125713891e-05,
757
+ "loss": 0.7183,
758
+ "step": 990
759
+ },
760
+ {
761
+ "epoch": 0.01329340879558393,
762
+ "grad_norm": 2.3635900020599365,
763
+ "learning_rate": 8.917005134859263e-05,
764
+ "loss": 0.7475,
765
+ "step": 1000
766
+ },
767
+ {
768
+ "epoch": 0.013426342883539768,
769
+ "grad_norm": 0.8753401041030884,
770
+ "learning_rate": 8.894968920519959e-05,
771
+ "loss": 0.9097,
772
+ "step": 1010
773
+ },
774
+ {
775
+ "epoch": 0.013559276971495608,
776
+ "grad_norm": 1.0394352674484253,
777
+ "learning_rate": 8.872738580980615e-05,
778
+ "loss": 0.8533,
779
+ "step": 1020
780
+ },
781
+ {
782
+ "epoch": 0.013692211059451448,
783
+ "grad_norm": 1.3622921705245972,
784
+ "learning_rate": 8.850315224201063e-05,
785
+ "loss": 0.8018,
786
+ "step": 1030
787
+ },
788
+ {
789
+ "epoch": 0.013825145147407287,
790
+ "grad_norm": 1.4305646419525146,
791
+ "learning_rate": 8.827699967761108e-05,
792
+ "loss": 0.8168,
793
+ "step": 1040
794
+ },
795
+ {
796
+ "epoch": 0.013958079235363127,
797
+ "grad_norm": 2.2436411380767822,
798
+ "learning_rate": 8.80489393880484e-05,
799
+ "loss": 0.7678,
800
+ "step": 1050
801
+ },
802
+ {
803
+ "epoch": 0.013958079235363127,
804
+ "eval_loss": 0.7980687022209167,
805
+ "eval_runtime": 2936.4148,
806
+ "eval_samples_per_second": 43.146,
807
+ "eval_steps_per_second": 10.787,
808
+ "step": 1050
809
  }
810
  ],
811
  "logging_steps": 10,
 
834
  "attributes": {}
835
  }
836
  },
837
+ "total_flos": 3.0206243314335744e+17,
838
  "train_batch_size": 8,
839
  "trial_name": null,
840
  "trial_params": null