ncbateman commited on
Commit
7255a1f
·
verified ·
1 Parent(s): f8c4344

Training in progress, step 420, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3589878ef8c0ccabc0dfaf9fc1f2093bf0b9e36f86facf8e0d277a263a6fbe1
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:630cf3c2bcece4a6f38060d2903813ff7a951cedd54e2bc72b9f2607b14a70fd
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c82faa383347f9a66cc669f03b87baec13337b2db57e610e3deb7405a5e82ad
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea13f39701c5f79159d5868ea33377233b14b510a34125092be22e18107dee32
3
  size 85723732
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eba64919589245b61d7fdff757f934e96ccc4e53bfe4f67aa18658ed422c3f73
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3301910976c38e17fe4c27ae989f885599778135e1335beb8e8e72e5c10e3c
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9432da3919ae09332aefc4d1aaa6096141e1756766415ba5bba478d58a6c8edd
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af347bd6e9a64e9d103a4bd43a4757792a255b273b942d70aeddb5b7a243efa6
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0af7110163e9993572ed4e63f812642004987cdabb74edc789bc7391e1ab0f15
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a821a31a15e8c39e25415cc4954957cfbae7d4d73e706211955c475fdac1633
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4174c84ac9512d0ebb0c57b6ee4e9def3943b7376106f0123b689d81dee53fc5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcfbc84a1d987ab5465db7241354afa4d4494c36e9c1879e23aee4df617417f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa2f50ba8ccd569e36bb5b9125e41b80ac00866bb3946942d8b5dd6a6e75f034
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7957f0bede386b9e7dfa8afbce84db80049c290d0482836d6c77ca519d043dd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9260304912478825,
5
  "eval_steps": 222,
6
- "global_step": 410,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2893,6 +2893,76 @@
2893
  "learning_rate": 1.567060735921344e-06,
2894
  "loss": 1.5086,
2895
  "step": 410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2896
  }
2897
  ],
2898
  "logging_steps": 1,
@@ -2912,7 +2982,7 @@
2912
  "attributes": {}
2913
  }
2914
  },
2915
- "total_flos": 2.4341321769591767e+18,
2916
  "train_batch_size": 2,
2917
  "trial_name": null,
2918
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9486166007905138,
5
  "eval_steps": 222,
6
+ "global_step": 420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2893
  "learning_rate": 1.567060735921344e-06,
2894
  "loss": 1.5086,
2895
  "step": 410
2896
+ },
2897
+ {
2898
+ "epoch": 0.9282891022021457,
2899
+ "grad_norm": 0.6804593801498413,
2900
+ "learning_rate": 1.4739887303249877e-06,
2901
+ "loss": 1.4831,
2902
+ "step": 411
2903
+ },
2904
+ {
2905
+ "epoch": 0.9305477131564088,
2906
+ "grad_norm": 0.5399026274681091,
2907
+ "learning_rate": 1.383724563345451e-06,
2908
+ "loss": 1.6713,
2909
+ "step": 412
2910
+ },
2911
+ {
2912
+ "epoch": 0.932806324110672,
2913
+ "grad_norm": 0.6251387596130371,
2914
+ "learning_rate": 1.2962734578973568e-06,
2915
+ "loss": 1.3986,
2916
+ "step": 413
2917
+ },
2918
+ {
2919
+ "epoch": 0.935064935064935,
2920
+ "grad_norm": 0.6313952803611755,
2921
+ "learning_rate": 1.2116404741244203e-06,
2922
+ "loss": 1.4082,
2923
+ "step": 414
2924
+ },
2925
+ {
2926
+ "epoch": 0.9373235460191982,
2927
+ "grad_norm": 0.5223222970962524,
2928
+ "learning_rate": 1.1298305091066664e-06,
2929
+ "loss": 1.3455,
2930
+ "step": 415
2931
+ },
2932
+ {
2933
+ "epoch": 0.9395821569734614,
2934
+ "grad_norm": 0.6576501131057739,
2935
+ "learning_rate": 1.0508482965770505e-06,
2936
+ "loss": 1.5945,
2937
+ "step": 416
2938
+ },
2939
+ {
2940
+ "epoch": 0.9418407679277244,
2941
+ "grad_norm": 0.5625395774841309,
2942
+ "learning_rate": 9.746984066475729e-07,
2943
+ "loss": 1.3081,
2944
+ "step": 417
2945
+ },
2946
+ {
2947
+ "epoch": 0.9440993788819876,
2948
+ "grad_norm": 0.7256038188934326,
2949
+ "learning_rate": 9.013852455448335e-07,
2950
+ "loss": 1.2728,
2951
+ "step": 418
2952
+ },
2953
+ {
2954
+ "epoch": 0.9463579898362507,
2955
+ "grad_norm": 0.9706465005874634,
2956
+ "learning_rate": 8.309130553550815e-07,
2957
+ "loss": 1.4802,
2958
+ "step": 419
2959
+ },
2960
+ {
2961
+ "epoch": 0.9486166007905138,
2962
+ "grad_norm": 0.41532036662101746,
2963
+ "learning_rate": 7.63285913778733e-07,
2964
+ "loss": 1.3479,
2965
+ "step": 420
2966
  }
2967
  ],
2968
  "logging_steps": 1,
 
2982
  "attributes": {}
2983
  }
2984
  },
2985
+ "total_flos": 2.4934831596440125e+18,
2986
  "train_batch_size": 2,
2987
  "trial_name": null,
2988
  "trial_params": null