ncbateman commited on
Commit
fcf0a3d
·
verified ·
1 Parent(s): 9da4192

Training in progress, step 440, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53beea0bcf2f0cdbe55aacc7b382970aab72a5280a75677185475e3f7e194d77
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a6ffca18df63c3794d60ab499f1b48e2f158228670c80877c8b50425888456b
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c74711845e45cdefb1ea114594ec076703a6568f23e6200b1e79a139e6e5e049
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d467c9948855ae875e2d5d2d2c0dc42d3d0e6cd43ce40db3c97511f54ffa61c2
3
  size 85723732
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:027ab1e02da1e657cb0e6e7794fa85bd909800b97027112d2b1f64c0e103d497
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cca9f96c96b922acaa204e7459a265396d5e1c98f1f296f230d8e331885b8ac
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87bfdda30185873c0a9cb229d25fc230d1146196d05e81747db223517c4eafcf
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62fb05d1751019ac24743a26e17e60ff4857e4af1f72362a4b410a6c8410a65b
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cd61f66c9dd6299c5f3f99f7ceb5a152a1d64c4909020bec811205c4243a070
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54547a23ea029fe03b5f58871d4527a748e7ac8f837926dd1a6e08b721fbdc33
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66172525291e0f467bfc09091521e14e32a3eaaef79955a2826a4348133fc8e0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bef39a40db1a483262af7570836839950da73fbe882edbfe93922ade4768db34
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79b607b3724fadd616761069f9d370b3b216d512a0fbae133eb5c1bd8e06f7d6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8323834fccc4192732a390e4554f47c9681faaee6040082de0c2920fe02fd450
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9712027103331451,
5
  "eval_steps": 222,
6
- "global_step": 430,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3033,6 +3033,76 @@
3033
  "learning_rate": 2.442712862748775e-07,
3034
  "loss": 1.7907,
3035
  "step": 430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3036
  }
3037
  ],
3038
  "logging_steps": 1,
@@ -3052,7 +3122,7 @@
3052
  "attributes": {}
3053
  }
3054
  },
3055
- "total_flos": 2.5528341423288484e+18,
3056
  "train_batch_size": 2,
3057
  "trial_name": null,
3058
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9937888198757764,
5
  "eval_steps": 222,
6
+ "global_step": 440,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3033
  "learning_rate": 2.442712862748775e-07,
3034
  "loss": 1.7907,
3035
  "step": 430
3036
+ },
3037
+ {
3038
+ "epoch": 0.9734613212874083,
3039
+ "grad_norm": 0.7090250849723816,
3040
+ "learning_rate": 2.0816157625706545e-07,
3041
+ "loss": 1.3354,
3042
+ "step": 431
3043
+ },
3044
+ {
3045
+ "epoch": 0.9757199322416714,
3046
+ "grad_norm": 0.6182876825332642,
3047
+ "learning_rate": 1.749329488395124e-07,
3048
+ "loss": 1.6426,
3049
+ "step": 432
3050
+ },
3051
+ {
3052
+ "epoch": 0.9779785431959345,
3053
+ "grad_norm": 0.42393407225608826,
3054
+ "learning_rate": 1.4458732671523977e-07,
3055
+ "loss": 1.2865,
3056
+ "step": 433
3057
+ },
3058
+ {
3059
+ "epoch": 0.9802371541501976,
3060
+ "grad_norm": 0.7182570695877075,
3061
+ "learning_rate": 1.1712646575922637e-07,
3062
+ "loss": 1.3558,
3063
+ "step": 434
3064
+ },
3065
+ {
3066
+ "epoch": 0.9824957651044608,
3067
+ "grad_norm": 0.5384182929992676,
3068
+ "learning_rate": 9.255195492685609e-08,
3069
+ "loss": 1.4792,
3070
+ "step": 435
3071
+ },
3072
+ {
3073
+ "epoch": 0.9847543760587238,
3074
+ "grad_norm": 0.6862145662307739,
3075
+ "learning_rate": 7.086521616190279e-08,
3076
+ "loss": 1.4509,
3077
+ "step": 436
3078
+ },
3079
+ {
3080
+ "epoch": 0.987012987012987,
3081
+ "grad_norm": 0.9429339170455933,
3082
+ "learning_rate": 5.2067504314323723e-08,
3083
+ "loss": 1.2235,
3084
+ "step": 437
3085
+ },
3086
+ {
3087
+ "epoch": 0.9892715979672502,
3088
+ "grad_norm": 0.48626038432121277,
3089
+ "learning_rate": 3.6159907067601085e-08,
3090
+ "loss": 1.3711,
3091
+ "step": 438
3092
+ },
3093
+ {
3094
+ "epoch": 0.9915302089215132,
3095
+ "grad_norm": 0.6334356665611267,
3096
+ "learning_rate": 2.3143344875831142e-08,
3097
+ "loss": 1.3192,
3098
+ "step": 439
3099
+ },
3100
+ {
3101
+ "epoch": 0.9937888198757764,
3102
+ "grad_norm": 0.49483799934387207,
3103
+ "learning_rate": 1.3018570910466877e-08,
3104
+ "loss": 1.1924,
3105
+ "step": 440
3106
  }
3107
  ],
3108
  "logging_steps": 1,
 
3122
  "attributes": {}
3123
  }
3124
  },
3125
+ "total_flos": 2.6129270118945915e+18,
3126
  "train_batch_size": 2,
3127
  "trial_name": null,
3128
  "trial_params": null