DeepDream2045 commited on
Commit
ffac7e3
·
verified ·
1 Parent(s): 8ba6569

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -21,12 +21,12 @@
21
  "revision": null,
22
  "target_modules": [
23
  "down_proj",
24
- "up_proj",
25
- "v_proj",
26
- "q_proj",
27
- "o_proj",
28
  "gate_proj",
29
- "k_proj"
 
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "down_proj",
 
 
 
 
24
  "gate_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "up_proj",
29
+ "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05746f7ea36e1c8d2f86e95b550f932f9925b9d6d6fe6f7c4a246ce454cb3548
3
  size 432223744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6194354092db75cf52814663d1970d75ded31798a74533293f5e68869152658e
3
  size 432223744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaa92982403fc4fc2cbc06653a6892d8e76b97b5eb9a9b05e8d043b0e8fe4d32
3
  size 864785974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a564288a72a567f9b461e5fd1d0f0ea4f3832498afb3f2d825f3cbcb809ee90
3
  size 864785974
last-checkpoint/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.03170052915811539,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
  "epoch": 0.721370604147881,
5
  "eval_steps": 25,
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.014427412082957619,
13
- "grad_norm": 12.593785285949707,
14
  "learning_rate": 5e-05,
15
  "loss": 4.8158,
16
  "step": 1
@@ -18,368 +18,368 @@
18
  {
19
  "epoch": 0.014427412082957619,
20
  "eval_loss": 4.899957656860352,
21
- "eval_runtime": 29.6783,
22
- "eval_samples_per_second": 15.735,
23
- "eval_steps_per_second": 1.988,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.028854824165915238,
28
- "grad_norm": 11.860882759094238,
29
  "learning_rate": 0.0001,
30
  "loss": 4.3372,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.04328223624887286,
35
- "grad_norm": 10.129796028137207,
36
  "learning_rate": 9.989294616193017e-05,
37
- "loss": 3.5527,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.057709648331830475,
42
- "grad_norm": 9.78841781616211,
43
  "learning_rate": 9.957224306869053e-05,
44
- "loss": 1.9134,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.0721370604147881,
49
- "grad_norm": 7.427072525024414,
50
  "learning_rate": 9.903926402016153e-05,
51
- "loss": 1.0155,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.08656447249774572,
56
- "grad_norm": 3.731661081314087,
57
  "learning_rate": 9.829629131445342e-05,
58
- "loss": 0.6265,
59
  "step": 6
60
  },
61
  {
62
  "epoch": 0.10099188458070334,
63
- "grad_norm": 5.106438159942627,
64
  "learning_rate": 9.73465064747553e-05,
65
- "loss": 0.4308,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 0.11541929666366095,
70
- "grad_norm": 2.3000714778900146,
71
  "learning_rate": 9.619397662556435e-05,
72
- "loss": 0.3065,
73
  "step": 8
74
  },
75
  {
76
  "epoch": 0.12984670874661858,
77
- "grad_norm": 2.5791616439819336,
78
  "learning_rate": 9.484363707663442e-05,
79
- "loss": 0.154,
80
  "step": 9
81
  },
82
  {
83
  "epoch": 0.1442741208295762,
84
- "grad_norm": 2.1033170223236084,
85
  "learning_rate": 9.330127018922194e-05,
86
  "loss": 0.3065,
87
  "step": 10
88
  },
89
  {
90
  "epoch": 0.1587015329125338,
91
- "grad_norm": 1.8672364950180054,
92
  "learning_rate": 9.157348061512727e-05,
93
- "loss": 0.2196,
94
  "step": 11
95
  },
96
  {
97
  "epoch": 0.17312894499549145,
98
- "grad_norm": 1.7648990154266357,
99
  "learning_rate": 8.966766701456177e-05,
100
- "loss": 0.1896,
101
  "step": 12
102
  },
103
  {
104
  "epoch": 0.18755635707844906,
105
- "grad_norm": 2.424722909927368,
106
  "learning_rate": 8.759199037394887e-05,
107
- "loss": 0.3486,
108
  "step": 13
109
  },
110
  {
111
  "epoch": 0.20198376916140667,
112
- "grad_norm": 2.8545210361480713,
113
  "learning_rate": 8.535533905932738e-05,
114
- "loss": 0.3139,
115
  "step": 14
116
  },
117
  {
118
  "epoch": 0.2164111812443643,
119
- "grad_norm": 1.8271303176879883,
120
  "learning_rate": 8.296729075500344e-05,
121
- "loss": 0.2364,
122
  "step": 15
123
  },
124
  {
125
  "epoch": 0.2308385933273219,
126
- "grad_norm": 1.1875014305114746,
127
  "learning_rate": 8.043807145043604e-05,
128
- "loss": 0.1507,
129
  "step": 16
130
  },
131
  {
132
  "epoch": 0.24526600541027954,
133
- "grad_norm": 1.014682650566101,
134
  "learning_rate": 7.777851165098012e-05,
135
- "loss": 0.1298,
136
  "step": 17
137
  },
138
  {
139
  "epoch": 0.25969341749323716,
140
- "grad_norm": 1.273716688156128,
141
  "learning_rate": 7.500000000000001e-05,
142
- "loss": 0.1948,
143
  "step": 18
144
  },
145
  {
146
  "epoch": 0.27412082957619477,
147
- "grad_norm": 0.9742581248283386,
148
  "learning_rate": 7.211443451095007e-05,
149
- "loss": 0.1087,
150
  "step": 19
151
  },
152
  {
153
  "epoch": 0.2885482416591524,
154
- "grad_norm": 1.013121485710144,
155
  "learning_rate": 6.91341716182545e-05,
156
- "loss": 0.1107,
157
  "step": 20
158
  },
159
  {
160
  "epoch": 0.30297565374211,
161
- "grad_norm": 0.9588230848312378,
162
  "learning_rate": 6.607197326515808e-05,
163
- "loss": 0.1007,
164
  "step": 21
165
  },
166
  {
167
  "epoch": 0.3174030658250676,
168
- "grad_norm": 1.2048404216766357,
169
  "learning_rate": 6.294095225512603e-05,
170
  "loss": 0.1354,
171
  "step": 22
172
  },
173
  {
174
  "epoch": 0.3318304779080252,
175
- "grad_norm": 1.152151346206665,
176
  "learning_rate": 5.9754516100806423e-05,
177
- "loss": 0.1269,
178
  "step": 23
179
  },
180
  {
181
  "epoch": 0.3462578899909829,
182
- "grad_norm": 1.1883742809295654,
183
  "learning_rate": 5.6526309611002594e-05,
184
- "loss": 0.1032,
185
  "step": 24
186
  },
187
  {
188
  "epoch": 0.3606853020739405,
189
- "grad_norm": 1.3539164066314697,
190
  "learning_rate": 5.327015646150716e-05,
191
- "loss": 0.1446,
192
  "step": 25
193
  },
194
  {
195
  "epoch": 0.3606853020739405,
196
- "eval_loss": 0.08934932947158813,
197
- "eval_runtime": 25.0942,
198
- "eval_samples_per_second": 18.61,
199
- "eval_steps_per_second": 2.351,
200
  "step": 25
201
  },
202
  {
203
  "epoch": 0.3751127141568981,
204
- "grad_norm": 1.3412762880325317,
205
  "learning_rate": 5e-05,
206
- "loss": 0.132,
207
  "step": 26
208
  },
209
  {
210
  "epoch": 0.38954012623985573,
211
- "grad_norm": 1.1308883428573608,
212
  "learning_rate": 4.6729843538492847e-05,
213
- "loss": 0.1017,
214
  "step": 27
215
  },
216
  {
217
  "epoch": 0.40396753832281335,
218
- "grad_norm": 0.8957684636116028,
219
  "learning_rate": 4.347369038899744e-05,
220
- "loss": 0.0924,
221
  "step": 28
222
  },
223
  {
224
  "epoch": 0.41839495040577096,
225
- "grad_norm": 0.8164778351783752,
226
  "learning_rate": 4.0245483899193595e-05,
227
- "loss": 0.0701,
228
  "step": 29
229
  },
230
  {
231
  "epoch": 0.4328223624887286,
232
- "grad_norm": 0.5023563504219055,
233
  "learning_rate": 3.705904774487396e-05,
234
- "loss": 0.0358,
235
  "step": 30
236
  },
237
  {
238
  "epoch": 0.4472497745716862,
239
- "grad_norm": 1.1154674291610718,
240
  "learning_rate": 3.392802673484193e-05,
241
- "loss": 0.115,
242
  "step": 31
243
  },
244
  {
245
  "epoch": 0.4616771866546438,
246
- "grad_norm": 1.0004397630691528,
247
  "learning_rate": 3.086582838174551e-05,
248
- "loss": 0.077,
249
  "step": 32
250
  },
251
  {
252
  "epoch": 0.47610459873760147,
253
- "grad_norm": 0.5923359990119934,
254
  "learning_rate": 2.7885565489049946e-05,
255
- "loss": 0.0419,
256
  "step": 33
257
  },
258
  {
259
  "epoch": 0.4905320108205591,
260
- "grad_norm": 0.723777711391449,
261
  "learning_rate": 2.500000000000001e-05,
262
- "loss": 0.0508,
263
  "step": 34
264
  },
265
  {
266
  "epoch": 0.5049594229035167,
267
- "grad_norm": 0.3847024142742157,
268
  "learning_rate": 2.2221488349019903e-05,
269
- "loss": 0.027,
270
  "step": 35
271
  },
272
  {
273
  "epoch": 0.5193868349864743,
274
- "grad_norm": 0.49491414427757263,
275
  "learning_rate": 1.9561928549563968e-05,
276
- "loss": 0.0316,
277
  "step": 36
278
  },
279
  {
280
  "epoch": 0.5338142470694319,
281
- "grad_norm": 0.8395834565162659,
282
  "learning_rate": 1.703270924499656e-05,
283
- "loss": 0.0529,
284
  "step": 37
285
  },
286
  {
287
  "epoch": 0.5482416591523895,
288
- "grad_norm": 0.8114633560180664,
289
  "learning_rate": 1.4644660940672627e-05,
290
- "loss": 0.0757,
291
  "step": 38
292
  },
293
  {
294
  "epoch": 0.5626690712353472,
295
- "grad_norm": 1.0842156410217285,
296
  "learning_rate": 1.2408009626051137e-05,
297
- "loss": 0.0637,
298
  "step": 39
299
  },
300
  {
301
  "epoch": 0.5770964833183048,
302
- "grad_norm": 0.648339033126831,
303
  "learning_rate": 1.0332332985438248e-05,
304
- "loss": 0.0313,
305
  "step": 40
306
  },
307
  {
308
  "epoch": 0.5915238954012624,
309
- "grad_norm": 0.689995527267456,
310
  "learning_rate": 8.426519384872733e-06,
311
- "loss": 0.0442,
312
  "step": 41
313
  },
314
  {
315
  "epoch": 0.60595130748422,
316
- "grad_norm": 0.36629754304885864,
317
  "learning_rate": 6.698729810778065e-06,
318
- "loss": 0.019,
319
  "step": 42
320
  },
321
  {
322
  "epoch": 0.6203787195671776,
323
- "grad_norm": 0.6916483640670776,
324
  "learning_rate": 5.156362923365588e-06,
325
- "loss": 0.0566,
326
  "step": 43
327
  },
328
  {
329
  "epoch": 0.6348061316501352,
330
- "grad_norm": 0.7285714745521545,
331
  "learning_rate": 3.8060233744356633e-06,
332
- "loss": 0.0803,
333
  "step": 44
334
  },
335
  {
336
  "epoch": 0.6492335437330928,
337
- "grad_norm": 0.2885792553424835,
338
  "learning_rate": 2.653493525244721e-06,
339
- "loss": 0.021,
340
  "step": 45
341
  },
342
  {
343
  "epoch": 0.6636609558160504,
344
- "grad_norm": 0.6232315301895142,
345
  "learning_rate": 1.70370868554659e-06,
346
- "loss": 0.0502,
347
  "step": 46
348
  },
349
  {
350
  "epoch": 0.6780883678990082,
351
- "grad_norm": 0.5776230692863464,
352
  "learning_rate": 9.607359798384785e-07,
353
- "loss": 0.0221,
354
  "step": 47
355
  },
356
  {
357
  "epoch": 0.6925157799819658,
358
- "grad_norm": 0.3192145526409149,
359
  "learning_rate": 4.277569313094809e-07,
360
- "loss": 0.0192,
361
  "step": 48
362
  },
363
  {
364
  "epoch": 0.7069431920649234,
365
- "grad_norm": 0.3708704710006714,
366
  "learning_rate": 1.0705383806982606e-07,
367
- "loss": 0.017,
368
  "step": 49
369
  },
370
  {
371
  "epoch": 0.721370604147881,
372
- "grad_norm": 0.8798678517341614,
373
  "learning_rate": 0.0,
374
- "loss": 0.0488,
375
  "step": 50
376
  },
377
  {
378
  "epoch": 0.721370604147881,
379
- "eval_loss": 0.03170052915811539,
380
- "eval_runtime": 25.0939,
381
- "eval_samples_per_second": 18.61,
382
- "eval_steps_per_second": 2.351,
383
  "step": 50
384
  }
385
  ],
 
1
  {
2
+ "best_metric": 0.03202689066529274,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
  "epoch": 0.721370604147881,
5
  "eval_steps": 25,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.014427412082957619,
13
+ "grad_norm": 12.869671821594238,
14
  "learning_rate": 5e-05,
15
  "loss": 4.8158,
16
  "step": 1
 
18
  {
19
  "epoch": 0.014427412082957619,
20
  "eval_loss": 4.899957656860352,
21
+ "eval_runtime": 25.2347,
22
+ "eval_samples_per_second": 18.506,
23
+ "eval_steps_per_second": 2.338,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.028854824165915238,
28
+ "grad_norm": 12.127988815307617,
29
  "learning_rate": 0.0001,
30
  "loss": 4.3372,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.04328223624887286,
35
+ "grad_norm": 10.47082233428955,
36
  "learning_rate": 9.989294616193017e-05,
37
+ "loss": 3.537,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.057709648331830475,
42
+ "grad_norm": 10.397321701049805,
43
  "learning_rate": 9.957224306869053e-05,
44
+ "loss": 1.8995,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.0721370604147881,
49
+ "grad_norm": 7.817731857299805,
50
  "learning_rate": 9.903926402016153e-05,
51
+ "loss": 1.0094,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.08656447249774572,
56
+ "grad_norm": 3.884652614593506,
57
  "learning_rate": 9.829629131445342e-05,
58
+ "loss": 0.6253,
59
  "step": 6
60
  },
61
  {
62
  "epoch": 0.10099188458070334,
63
+ "grad_norm": 5.4003143310546875,
64
  "learning_rate": 9.73465064747553e-05,
65
+ "loss": 0.4278,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 0.11541929666366095,
70
+ "grad_norm": 2.3437485694885254,
71
  "learning_rate": 9.619397662556435e-05,
72
+ "loss": 0.3056,
73
  "step": 8
74
  },
75
  {
76
  "epoch": 0.12984670874661858,
77
+ "grad_norm": 3.3503763675689697,
78
  "learning_rate": 9.484363707663442e-05,
79
+ "loss": 0.1621,
80
  "step": 9
81
  },
82
  {
83
  "epoch": 0.1442741208295762,
84
+ "grad_norm": 2.092745065689087,
85
  "learning_rate": 9.330127018922194e-05,
86
  "loss": 0.3065,
87
  "step": 10
88
  },
89
  {
90
  "epoch": 0.1587015329125338,
91
+ "grad_norm": 1.8460384607315063,
92
  "learning_rate": 9.157348061512727e-05,
93
+ "loss": 0.2195,
94
  "step": 11
95
  },
96
  {
97
  "epoch": 0.17312894499549145,
98
+ "grad_norm": 1.7845020294189453,
99
  "learning_rate": 8.966766701456177e-05,
100
+ "loss": 0.1868,
101
  "step": 12
102
  },
103
  {
104
  "epoch": 0.18755635707844906,
105
+ "grad_norm": 2.3783392906188965,
106
  "learning_rate": 8.759199037394887e-05,
107
+ "loss": 0.3424,
108
  "step": 13
109
  },
110
  {
111
  "epoch": 0.20198376916140667,
112
+ "grad_norm": 2.957718849182129,
113
  "learning_rate": 8.535533905932738e-05,
114
+ "loss": 0.3271,
115
  "step": 14
116
  },
117
  {
118
  "epoch": 0.2164111812443643,
119
+ "grad_norm": 1.7753095626831055,
120
  "learning_rate": 8.296729075500344e-05,
121
+ "loss": 0.2367,
122
  "step": 15
123
  },
124
  {
125
  "epoch": 0.2308385933273219,
126
+ "grad_norm": 1.1912699937820435,
127
  "learning_rate": 8.043807145043604e-05,
128
+ "loss": 0.152,
129
  "step": 16
130
  },
131
  {
132
  "epoch": 0.24526600541027954,
133
+ "grad_norm": 1.0135375261306763,
134
  "learning_rate": 7.777851165098012e-05,
135
+ "loss": 0.1297,
136
  "step": 17
137
  },
138
  {
139
  "epoch": 0.25969341749323716,
140
+ "grad_norm": 1.2794404029846191,
141
  "learning_rate": 7.500000000000001e-05,
142
+ "loss": 0.1951,
143
  "step": 18
144
  },
145
  {
146
  "epoch": 0.27412082957619477,
147
+ "grad_norm": 0.9989929795265198,
148
  "learning_rate": 7.211443451095007e-05,
149
+ "loss": 0.1089,
150
  "step": 19
151
  },
152
  {
153
  "epoch": 0.2885482416591524,
154
+ "grad_norm": 1.0387895107269287,
155
  "learning_rate": 6.91341716182545e-05,
156
+ "loss": 0.1125,
157
  "step": 20
158
  },
159
  {
160
  "epoch": 0.30297565374211,
161
+ "grad_norm": 0.9766268134117126,
162
  "learning_rate": 6.607197326515808e-05,
163
+ "loss": 0.1028,
164
  "step": 21
165
  },
166
  {
167
  "epoch": 0.3174030658250676,
168
+ "grad_norm": 1.1917319297790527,
169
  "learning_rate": 6.294095225512603e-05,
170
  "loss": 0.1354,
171
  "step": 22
172
  },
173
  {
174
  "epoch": 0.3318304779080252,
175
+ "grad_norm": 1.130652904510498,
176
  "learning_rate": 5.9754516100806423e-05,
177
+ "loss": 0.1233,
178
  "step": 23
179
  },
180
  {
181
  "epoch": 0.3462578899909829,
182
+ "grad_norm": 1.20221745967865,
183
  "learning_rate": 5.6526309611002594e-05,
184
+ "loss": 0.1017,
185
  "step": 24
186
  },
187
  {
188
  "epoch": 0.3606853020739405,
189
+ "grad_norm": 1.392185091972351,
190
  "learning_rate": 5.327015646150716e-05,
191
+ "loss": 0.1485,
192
  "step": 25
193
  },
194
  {
195
  "epoch": 0.3606853020739405,
196
+ "eval_loss": 0.08833841979503632,
197
+ "eval_runtime": 25.2222,
198
+ "eval_samples_per_second": 18.515,
199
+ "eval_steps_per_second": 2.339,
200
  "step": 25
201
  },
202
  {
203
  "epoch": 0.3751127141568981,
204
+ "grad_norm": 1.3019057512283325,
205
  "learning_rate": 5e-05,
206
+ "loss": 0.1304,
207
  "step": 26
208
  },
209
  {
210
  "epoch": 0.38954012623985573,
211
+ "grad_norm": 1.0852798223495483,
212
  "learning_rate": 4.6729843538492847e-05,
213
+ "loss": 0.0982,
214
  "step": 27
215
  },
216
  {
217
  "epoch": 0.40396753832281335,
218
+ "grad_norm": 0.9120009541511536,
219
  "learning_rate": 4.347369038899744e-05,
220
+ "loss": 0.0921,
221
  "step": 28
222
  },
223
  {
224
  "epoch": 0.41839495040577096,
225
+ "grad_norm": 0.8051550984382629,
226
  "learning_rate": 4.0245483899193595e-05,
227
+ "loss": 0.0685,
228
  "step": 29
229
  },
230
  {
231
  "epoch": 0.4328223624887286,
232
+ "grad_norm": 0.5153581500053406,
233
  "learning_rate": 3.705904774487396e-05,
234
+ "loss": 0.0357,
235
  "step": 30
236
  },
237
  {
238
  "epoch": 0.4472497745716862,
239
+ "grad_norm": 1.1851726770401,
240
  "learning_rate": 3.392802673484193e-05,
241
+ "loss": 0.1203,
242
  "step": 31
243
  },
244
  {
245
  "epoch": 0.4616771866546438,
246
+ "grad_norm": 0.9966608881950378,
247
  "learning_rate": 3.086582838174551e-05,
248
+ "loss": 0.0767,
249
  "step": 32
250
  },
251
  {
252
  "epoch": 0.47610459873760147,
253
+ "grad_norm": 0.602455735206604,
254
  "learning_rate": 2.7885565489049946e-05,
255
+ "loss": 0.0417,
256
  "step": 33
257
  },
258
  {
259
  "epoch": 0.4905320108205591,
260
+ "grad_norm": 0.760809600353241,
261
  "learning_rate": 2.500000000000001e-05,
262
+ "loss": 0.053,
263
  "step": 34
264
  },
265
  {
266
  "epoch": 0.5049594229035167,
267
+ "grad_norm": 0.3891076445579529,
268
  "learning_rate": 2.2221488349019903e-05,
269
+ "loss": 0.0259,
270
  "step": 35
271
  },
272
  {
273
  "epoch": 0.5193868349864743,
274
+ "grad_norm": 0.4962649345397949,
275
  "learning_rate": 1.9561928549563968e-05,
276
+ "loss": 0.0317,
277
  "step": 36
278
  },
279
  {
280
  "epoch": 0.5338142470694319,
281
+ "grad_norm": 0.8641317486763,
282
  "learning_rate": 1.703270924499656e-05,
283
+ "loss": 0.0545,
284
  "step": 37
285
  },
286
  {
287
  "epoch": 0.5482416591523895,
288
+ "grad_norm": 0.7959501147270203,
289
  "learning_rate": 1.4644660940672627e-05,
290
+ "loss": 0.0723,
291
  "step": 38
292
  },
293
  {
294
  "epoch": 0.5626690712353472,
295
+ "grad_norm": 1.0905723571777344,
296
  "learning_rate": 1.2408009626051137e-05,
297
+ "loss": 0.0648,
298
  "step": 39
299
  },
300
  {
301
  "epoch": 0.5770964833183048,
302
+ "grad_norm": 0.646719753742218,
303
  "learning_rate": 1.0332332985438248e-05,
304
+ "loss": 0.0293,
305
  "step": 40
306
  },
307
  {
308
  "epoch": 0.5915238954012624,
309
+ "grad_norm": 0.6736879348754883,
310
  "learning_rate": 8.426519384872733e-06,
311
+ "loss": 0.043,
312
  "step": 41
313
  },
314
  {
315
  "epoch": 0.60595130748422,
316
+ "grad_norm": 0.36716800928115845,
317
  "learning_rate": 6.698729810778065e-06,
318
+ "loss": 0.018,
319
  "step": 42
320
  },
321
  {
322
  "epoch": 0.6203787195671776,
323
+ "grad_norm": 0.7141299247741699,
324
  "learning_rate": 5.156362923365588e-06,
325
+ "loss": 0.0563,
326
  "step": 43
327
  },
328
  {
329
  "epoch": 0.6348061316501352,
330
+ "grad_norm": 0.748298168182373,
331
  "learning_rate": 3.8060233744356633e-06,
332
+ "loss": 0.0834,
333
  "step": 44
334
  },
335
  {
336
  "epoch": 0.6492335437330928,
337
+ "grad_norm": 0.29328033328056335,
338
  "learning_rate": 2.653493525244721e-06,
339
+ "loss": 0.0223,
340
  "step": 45
341
  },
342
  {
343
  "epoch": 0.6636609558160504,
344
+ "grad_norm": 0.6507622599601746,
345
  "learning_rate": 1.70370868554659e-06,
346
+ "loss": 0.0515,
347
  "step": 46
348
  },
349
  {
350
  "epoch": 0.6780883678990082,
351
+ "grad_norm": 0.6655824780464172,
352
  "learning_rate": 9.607359798384785e-07,
353
+ "loss": 0.0271,
354
  "step": 47
355
  },
356
  {
357
  "epoch": 0.6925157799819658,
358
+ "grad_norm": 0.3097899258136749,
359
  "learning_rate": 4.277569313094809e-07,
360
+ "loss": 0.018,
361
  "step": 48
362
  },
363
  {
364
  "epoch": 0.7069431920649234,
365
+ "grad_norm": 0.2763792872428894,
366
  "learning_rate": 1.0705383806982606e-07,
367
+ "loss": 0.0143,
368
  "step": 49
369
  },
370
  {
371
  "epoch": 0.721370604147881,
372
+ "grad_norm": 0.8340288400650024,
373
  "learning_rate": 0.0,
374
+ "loss": 0.0485,
375
  "step": 50
376
  },
377
  {
378
  "epoch": 0.721370604147881,
379
+ "eval_loss": 0.03202689066529274,
380
+ "eval_runtime": 25.7723,
381
+ "eval_samples_per_second": 18.12,
382
+ "eval_steps_per_second": 2.289,
383
  "step": 50
384
  }
385
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7acc0cf6c02e346a49a9f44b29df69885aac295931248d24b77f45bc3730482
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e04ff39143d3a08df1503997c88c750664e522682c1aba5a43153d07c12a6873
3
  size 6776