0x1202 commited on
Commit
39f7d94
·
verified ·
1 Parent(s): 18ade69

Training in progress, step 219, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36c515db32d3d7f09379932d8af8a180603557ea1ceb0780e1597e26233f2d44
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b2678287b7c7ef4f6654abac8603d0ce7a466cab7c09e008a66b5345600e4b
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:901764707d693145d88f726ca25c4ebcf5a68f2f4445887acb0bdbb8135f4ad4
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab68acaceb01f23f60015123483035808654ef4e1d525149158036836fdaf50
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4d565184f9f5cda7759630e9b34000ae1d43c3424fb3dfd30d7bf55a7baafda
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0cae3ee409db3bdf9e402fd2df770d0dec0a2a308b35538b2bf87b8effeccd2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f8713d9f688bebdbd86b1fb4350ff186e7695576b2c8da59fde7edc7e2e209
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb42eaa476d66164742d8e57c54ee1c2ea541bedefcf1ea63778a36c221c9563
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7080769538879395,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.6857142857142857,
5
  "eval_steps": 150,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -128,6 +128,48 @@
128
  "eval_samples_per_second": 14.012,
129
  "eval_steps_per_second": 3.532,
130
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "logging_steps": 10,
@@ -151,12 +193,12 @@
151
  "should_evaluate": false,
152
  "should_log": false,
153
  "should_save": true,
154
- "should_training_stop": false
155
  },
156
  "attributes": {}
157
  }
158
  },
159
- "total_flos": 1.9497347872456704e+17,
160
  "train_batch_size": 8,
161
  "trial_name": null,
162
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7080769538879395,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 1.0022857142857142,
5
  "eval_steps": 150,
6
+ "global_step": 219,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
128
  "eval_samples_per_second": 14.012,
129
  "eval_steps_per_second": 3.532,
130
  "step": 150
131
+ },
132
+ {
133
+ "epoch": 0.7314285714285714,
134
+ "grad_norm": 3.1380746364593506,
135
+ "learning_rate": 2.717633947550651e-05,
136
+ "loss": 3.0081,
137
+ "step": 160
138
+ },
139
+ {
140
+ "epoch": 0.7771428571428571,
141
+ "grad_norm": 3.2932016849517822,
142
+ "learning_rate": 1.934730363037237e-05,
143
+ "loss": 3.0259,
144
+ "step": 170
145
+ },
146
+ {
147
+ "epoch": 0.8228571428571428,
148
+ "grad_norm": 3.7868399620056152,
149
+ "learning_rate": 1.257446259144494e-05,
150
+ "loss": 3.1174,
151
+ "step": 180
152
+ },
153
+ {
154
+ "epoch": 0.8685714285714285,
155
+ "grad_norm": 3.2612147331237793,
156
+ "learning_rate": 7.0911870039138015e-06,
157
+ "loss": 2.7663,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 0.9142857142857143,
162
+ "grad_norm": 22.563203811645508,
163
+ "learning_rate": 3.0864131729807398e-06,
164
+ "loss": 1.9854,
165
+ "step": 200
166
+ },
167
+ {
168
+ "epoch": 0.96,
169
+ "grad_norm": 3.5667941570281982,
170
+ "learning_rate": 6.98132917350991e-07,
171
+ "loss": 3.0253,
172
+ "step": 210
173
  }
174
  ],
175
  "logging_steps": 10,
 
193
  "should_evaluate": false,
194
  "should_log": false,
195
  "should_save": true,
196
+ "should_training_stop": true
197
  },
198
  "attributes": {}
199
  }
200
  },
201
+ "total_flos": 2.8530522804191232e+17,
202
  "train_batch_size": 8,
203
  "trial_name": null,
204
  "trial_params": null