0x1202 commited on
Commit
c90ec2a
·
verified ·
1 Parent(s): d07b7f1

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84c54a9afbc4eb7c75bfb55a139724528a733d50ecc9d9480c15b439b13db8e5
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c67cb69054ec58aa440717cdfb6bb73eed282ad38d6f1d8876b92f689dce4a3
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a80b398665c89896098c06ce44d0aab9ee6573cafe59740768c374973eee82c0
3
- size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3277342f65874d847ea16268cf966c19181a740039e3a76dc9dfa826e4bb748
3
+ size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07f82241b9e643be3241205d5eb1a2f2a9fc078760341a2caefd27106208888a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835154b511893bb89c75363ca2b5e39fbfa155a8e7afb051eb41d91d2f9e28a0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1eaa4c8e8f9133451ddb25d9cc07f61c13ab6775afadbfb025eb8a3e054e859c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61b5a82b906ef255ed934ff84d648a93ce52daed15c002b1d3821ff35829eb10
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 3.1905009746551514,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.08915304606240713,
5
  "eval_steps": 150,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -128,6 +128,119 @@
128
  "eval_samples_per_second": 13.326,
129
  "eval_steps_per_second": 3.334,
130
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "logging_steps": 10,
@@ -142,7 +255,7 @@
142
  "early_stopping_threshold": 0.0
143
  },
144
  "attributes": {
145
- "early_stopping_patience_counter": 0
146
  }
147
  },
148
  "TrainerControl": {
@@ -156,7 +269,7 @@
156
  "attributes": {}
157
  }
158
  },
159
- "total_flos": 2.2194006027298406e+17,
160
  "train_batch_size": 8,
161
  "trial_name": null,
162
  "trial_params": null
 
1
  {
2
  "best_metric": 3.1905009746551514,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 0.17830609212481427,
5
  "eval_steps": 150,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
128
  "eval_samples_per_second": 13.326,
129
  "eval_steps_per_second": 3.334,
130
  "step": 150
131
+ },
132
+ {
133
+ "epoch": 0.0950965824665676,
134
+ "grad_norm": 1.8121700286865234,
135
+ "learning_rate": 9.888459804735679e-05,
136
+ "loss": 3.1284,
137
+ "step": 160
138
+ },
139
+ {
140
+ "epoch": 0.10104011887072809,
141
+ "grad_norm": 2.5314412117004395,
142
+ "learning_rate": 9.867352156521488e-05,
143
+ "loss": 3.1552,
144
+ "step": 170
145
+ },
146
+ {
147
+ "epoch": 0.10698365527488855,
148
+ "grad_norm": 2.6681604385375977,
149
+ "learning_rate": 9.844443122444238e-05,
150
+ "loss": 2.9376,
151
+ "step": 180
152
+ },
153
+ {
154
+ "epoch": 0.11292719167904904,
155
+ "grad_norm": 2.87355375289917,
156
+ "learning_rate": 9.819741181037799e-05,
157
+ "loss": 2.8799,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 0.1188707280832095,
162
+ "grad_norm": 4.919130325317383,
163
+ "learning_rate": 9.793255474383249e-05,
164
+ "loss": 2.8424,
165
+ "step": 200
166
+ },
167
+ {
168
+ "epoch": 0.12481426448736999,
169
+ "grad_norm": 2.0036211013793945,
170
+ "learning_rate": 9.764995804725424e-05,
171
+ "loss": 3.2481,
172
+ "step": 210
173
+ },
174
+ {
175
+ "epoch": 0.13075780089153047,
176
+ "grad_norm": 2.4777944087982178,
177
+ "learning_rate": 9.734972630845151e-05,
178
+ "loss": 3.0006,
179
+ "step": 220
180
+ },
181
+ {
182
+ "epoch": 0.13670133729569092,
183
+ "grad_norm": 2.4493539333343506,
184
+ "learning_rate": 9.703197064188498e-05,
185
+ "loss": 2.9379,
186
+ "step": 230
187
+ },
188
+ {
189
+ "epoch": 0.1426448736998514,
190
+ "grad_norm": 3.2666187286376953,
191
+ "learning_rate": 9.669680864754484e-05,
192
+ "loss": 2.8128,
193
+ "step": 240
194
+ },
195
+ {
196
+ "epoch": 0.1485884101040119,
197
+ "grad_norm": 7.568844795227051,
198
+ "learning_rate": 9.63443643674274e-05,
199
+ "loss": 2.8483,
200
+ "step": 250
201
+ },
202
+ {
203
+ "epoch": 0.15453194650817237,
204
+ "grad_norm": 1.986729383468628,
205
+ "learning_rate": 9.597476823962784e-05,
206
+ "loss": 3.2472,
207
+ "step": 260
208
+ },
209
+ {
210
+ "epoch": 0.16047548291233285,
211
+ "grad_norm": 2.4101827144622803,
212
+ "learning_rate": 9.558815705006555e-05,
213
+ "loss": 3.0007,
214
+ "step": 270
215
+ },
216
+ {
217
+ "epoch": 0.1664190193164933,
218
+ "grad_norm": 2.539492607116699,
219
+ "learning_rate": 9.51846738818602e-05,
220
+ "loss": 2.9353,
221
+ "step": 280
222
+ },
223
+ {
224
+ "epoch": 0.1723625557206538,
225
+ "grad_norm": 2.762788772583008,
226
+ "learning_rate": 9.476446806237749e-05,
227
+ "loss": 2.8221,
228
+ "step": 290
229
+ },
230
+ {
231
+ "epoch": 0.17830609212481427,
232
+ "grad_norm": 7.846682071685791,
233
+ "learning_rate": 9.432769510796353e-05,
234
+ "loss": 2.8917,
235
+ "step": 300
236
+ },
237
+ {
238
+ "epoch": 0.17830609212481427,
239
+ "eval_loss": 3.3436031341552734,
240
+ "eval_runtime": 212.4479,
241
+ "eval_samples_per_second": 13.34,
242
+ "eval_steps_per_second": 3.337,
243
+ "step": 300
244
  }
245
  ],
246
  "logging_steps": 10,
 
255
  "early_stopping_threshold": 0.0
256
  },
257
  "attributes": {
258
+ "early_stopping_patience_counter": 1
259
  }
260
  },
261
  "TrainerControl": {
 
269
  "attributes": {}
270
  }
271
  },
272
+ "total_flos": 4.436915563486249e+17,
273
  "train_batch_size": 8,
274
  "trial_name": null,
275
  "trial_params": null