Barleysack commited on
Commit
d8cfd17
Β·
1 Parent(s): 9f96c80

hard time tho

Browse files
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_samples": 474,
4
+ "exact_match": 68.75,
5
+ "f1": 76.5876322751323,
6
+ "init_mem_cpu_alloc_delta": 17783119,
7
+ "init_mem_cpu_peaked_delta": 18230,
8
+ "init_mem_gpu_alloc_delta": 1414861824,
9
+ "init_mem_gpu_peaked_delta": 67165696,
10
+ "train_mem_cpu_alloc_delta": 2991504,
11
+ "train_mem_cpu_peaked_delta": 264499229,
12
+ "train_mem_gpu_alloc_delta": 4256099328,
13
+ "train_mem_gpu_peaked_delta": 11330810880,
14
+ "train_runtime": 8323.4424,
15
+ "train_samples": 7978,
16
+ "train_samples_per_second": 1.199
17
+ }
optimizer.pt β†’ checkpoint-9500/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db17e7ecb5d8209144c77f48530f20623e5012d732c516b793a3d7d34df7949b
3
  size 2819470077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2939c7c78324e191101544dc3cb7ab8bdc8c8bf6ac6956b10a9ebe29a2c032b5
3
  size 2819470077
pytorch_model.bin β†’ checkpoint-9500/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46083e65954b4d081bd00995edde7772af0098346f2d0c19df14480d01797636
3
  size 1413985490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1edd8b3db3558e5ad79f866e47ff5a4a502a2e99b60d82805de41bcbf3ad0f
3
  size 1413985490
scheduler.pt β†’ checkpoint-9500/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04876499127dd83ce05f49419379d3ce78d67b20405c3a81867244d553097304
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58b1a986b1dea83d69bef3937079a177d0187daf845e863be53b4af16af445a6
3
  size 623
special_tokens_map.json β†’ checkpoint-9500/special_tokens_map.json RENAMED
File without changes
tokenizer_config.json β†’ checkpoint-9500/tokenizer_config.json RENAMED
@@ -1 +1 @@
1
- {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "never_split": null, "bos_token": "[CLS]", "eos_token": "[SEP]", "model_max_length": 512, "special_tokens_map_file": "/opt/ml/.cache/huggingface/transformers/1a24ab4628028ed80dea35ce3334a636dc656fd9a17a09bad377f88f0cbecdac.70c17d6e4d492c8f24f5bb97ab56c7f272e947112c6faf9dd846da42ba13eb23", "name_or_path": "klue/roberta-large", "tokenizer_class": "BertTokenizer"}
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "never_split": null, "bos_token": "[CLS]", "eos_token": "[SEP]", "model_max_length": 512, "tokenizer_class": "BertTokenizer", "special_tokens_map_file": "/opt/ml/.cache/huggingface/transformers/1a24ab4628028ed80dea35ce3334a636dc656fd9a17a09bad377f88f0cbecdac.70c17d6e4d492c8f24f5bb97ab56c7f272e947112c6faf9dd846da42ba13eb23", "name_or_path": "klue/roberta-large"}
checkpoint-9500/trainer_state.json ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.519038076152304,
5
+ "global_step": 9500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "learning_rate": 9.997522904167843e-07,
13
+ "loss": 26.4633,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.2,
18
+ "learning_rate": 9.990094071072878e-07,
19
+ "loss": 9.8101,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.3,
24
+ "learning_rate": 9.977720861487699e-07,
25
+ "loss": 5.8569,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.4,
30
+ "learning_rate": 9.96041553526267e-07,
31
+ "loss": 5.5213,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.5,
36
+ "learning_rate": 9.938195239178374e-07,
37
+ "loss": 5.1344,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.6,
42
+ "learning_rate": 9.911081989955939e-07,
43
+ "loss": 4.9245,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 0.7,
48
+ "learning_rate": 9.879102652442023e-07,
49
+ "loss": 4.5925,
50
+ "step": 700
51
+ },
52
+ {
53
+ "epoch": 0.8,
54
+ "learning_rate": 9.842288912990095e-07,
55
+ "loss": 4.676,
56
+ "step": 800
57
+ },
58
+ {
59
+ "epoch": 0.9,
60
+ "learning_rate": 9.80067724806438e-07,
61
+ "loss": 4.4576,
62
+ "step": 900
63
+ },
64
+ {
65
+ "epoch": 1.0,
66
+ "learning_rate": 9.754308888097582e-07,
67
+ "loss": 4.2986,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 1.1,
72
+ "learning_rate": 9.703229776638185e-07,
73
+ "loss": 3.8739,
74
+ "step": 1100
75
+ },
76
+ {
77
+ "epoch": 1.2,
78
+ "learning_rate": 9.647490524827833e-07,
79
+ "loss": 3.5629,
80
+ "step": 1200
81
+ },
82
+ {
83
+ "epoch": 1.3,
84
+ "learning_rate": 9.587146361253867e-07,
85
+ "loss": 2.9575,
86
+ "step": 1300
87
+ },
88
+ {
89
+ "epoch": 1.4,
90
+ "learning_rate": 9.522257077226716e-07,
91
+ "loss": 2.8326,
92
+ "step": 1400
93
+ },
94
+ {
95
+ "epoch": 1.5,
96
+ "learning_rate": 9.452886967536388e-07,
97
+ "loss": 2.7891,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 1.6,
102
+ "learning_rate": 9.379104766746722e-07,
103
+ "loss": 2.4971,
104
+ "step": 1600
105
+ },
106
+ {
107
+ "epoch": 1.7,
108
+ "learning_rate": 9.30098358109054e-07,
109
+ "loss": 2.4297,
110
+ "step": 1700
111
+ },
112
+ {
113
+ "epoch": 1.8,
114
+ "learning_rate": 9.218600816033199e-07,
115
+ "loss": 2.183,
116
+ "step": 1800
117
+ },
118
+ {
119
+ "epoch": 1.9,
120
+ "learning_rate": 9.132038099576289e-07,
121
+ "loss": 2.0732,
122
+ "step": 1900
123
+ },
124
+ {
125
+ "epoch": 2.0,
126
+ "learning_rate": 9.041381201377467e-07,
127
+ "loss": 2.2732,
128
+ "step": 2000
129
+ },
130
+ {
131
+ "epoch": 2.1,
132
+ "learning_rate": 8.94671994776661e-07,
133
+ "loss": 1.7384,
134
+ "step": 2100
135
+ },
136
+ {
137
+ "epoch": 2.2,
138
+ "learning_rate": 8.84814813274243e-07,
139
+ "loss": 1.8955,
140
+ "step": 2200
141
+ },
142
+ {
143
+ "epoch": 2.3,
144
+ "learning_rate": 8.745763425037795e-07,
145
+ "loss": 2.0546,
146
+ "step": 2300
147
+ },
148
+ {
149
+ "epoch": 2.4,
150
+ "learning_rate": 8.639667271345798e-07,
151
+ "loss": 1.6855,
152
+ "step": 2400
153
+ },
154
+ {
155
+ "epoch": 2.51,
156
+ "learning_rate": 8.529964795802484e-07,
157
+ "loss": 1.7512,
158
+ "step": 2500
159
+ },
160
+ {
161
+ "epoch": 2.61,
162
+ "learning_rate": 8.416764695825834e-07,
163
+ "loss": 1.7856,
164
+ "step": 2600
165
+ },
166
+ {
167
+ "epoch": 2.71,
168
+ "learning_rate": 8.300179134414187e-07,
169
+ "loss": 1.7027,
170
+ "step": 2700
171
+ },
172
+ {
173
+ "epoch": 2.81,
174
+ "learning_rate": 8.180323629010848e-07,
175
+ "loss": 1.5552,
176
+ "step": 2800
177
+ },
178
+ {
179
+ "epoch": 2.91,
180
+ "learning_rate": 8.057316937044976e-07,
181
+ "loss": 1.6427,
182
+ "step": 2900
183
+ },
184
+ {
185
+ "epoch": 3.01,
186
+ "learning_rate": 7.931280938262168e-07,
187
+ "loss": 1.7996,
188
+ "step": 3000
189
+ },
190
+ {
191
+ "epoch": 3.11,
192
+ "learning_rate": 7.802340513961341e-07,
193
+ "loss": 1.4622,
194
+ "step": 3100
195
+ },
196
+ {
197
+ "epoch": 3.21,
198
+ "learning_rate": 7.670623423257547e-07,
199
+ "loss": 1.4137,
200
+ "step": 3200
201
+ },
202
+ {
203
+ "epoch": 3.31,
204
+ "learning_rate": 7.536260176493347e-07,
205
+ "loss": 1.4894,
206
+ "step": 3300
207
+ },
208
+ {
209
+ "epoch": 3.41,
210
+ "learning_rate": 7.399383905924165e-07,
211
+ "loss": 1.4263,
212
+ "step": 3400
213
+ },
214
+ {
215
+ "epoch": 3.51,
216
+ "learning_rate": 7.26013023380574e-07,
217
+ "loss": 1.491,
218
+ "step": 3500
219
+ },
220
+ {
221
+ "epoch": 3.61,
222
+ "learning_rate": 7.118637138014395e-07,
223
+ "loss": 1.2858,
224
+ "step": 3600
225
+ },
226
+ {
227
+ "epoch": 3.71,
228
+ "learning_rate": 6.975044815333281e-07,
229
+ "loss": 1.2851,
230
+ "step": 3700
231
+ },
232
+ {
233
+ "epoch": 3.81,
234
+ "learning_rate": 6.829495542540013e-07,
235
+ "loss": 1.3229,
236
+ "step": 3800
237
+ },
238
+ {
239
+ "epoch": 3.91,
240
+ "learning_rate": 6.682133535433393e-07,
241
+ "loss": 1.3675,
242
+ "step": 3900
243
+ },
244
+ {
245
+ "epoch": 4.01,
246
+ "learning_rate": 6.533104805938873e-07,
247
+ "loss": 1.3686,
248
+ "step": 4000
249
+ },
250
+ {
251
+ "epoch": 4.11,
252
+ "learning_rate": 6.382557017434331e-07,
253
+ "loss": 1.0781,
254
+ "step": 4100
255
+ },
256
+ {
257
+ "epoch": 4.21,
258
+ "learning_rate": 6.230639338439549e-07,
259
+ "loss": 1.1388,
260
+ "step": 4200
261
+ },
262
+ {
263
+ "epoch": 4.31,
264
+ "learning_rate": 6.077502294814311e-07,
265
+ "loss": 1.0966,
266
+ "step": 4300
267
+ },
268
+ {
269
+ "epoch": 4.41,
270
+ "learning_rate": 5.923297620611622e-07,
271
+ "loss": 1.1764,
272
+ "step": 4400
273
+ },
274
+ {
275
+ "epoch": 4.51,
276
+ "learning_rate": 5.76817810773379e-07,
277
+ "loss": 1.2625,
278
+ "step": 4500
279
+ },
280
+ {
281
+ "epoch": 4.61,
282
+ "learning_rate": 5.612297454540351e-07,
283
+ "loss": 1.1718,
284
+ "step": 4600
285
+ },
286
+ {
287
+ "epoch": 4.71,
288
+ "learning_rate": 5.455810113557839e-07,
289
+ "loss": 1.218,
290
+ "step": 4700
291
+ },
292
+ {
293
+ "epoch": 4.81,
294
+ "learning_rate": 5.298871138442307e-07,
295
+ "loss": 1.2203,
296
+ "step": 4800
297
+ },
298
+ {
299
+ "epoch": 4.91,
300
+ "learning_rate": 5.14163603034622e-07,
301
+ "loss": 1.0743,
302
+ "step": 4900
303
+ },
304
+ {
305
+ "epoch": 5.01,
306
+ "learning_rate": 4.984260583841952e-07,
307
+ "loss": 1.1825,
308
+ "step": 5000
309
+ },
310
+ {
311
+ "epoch": 5.11,
312
+ "learning_rate": 4.82690073255455e-07,
313
+ "loss": 0.8899,
314
+ "step": 5100
315
+ },
316
+ {
317
+ "epoch": 5.21,
318
+ "learning_rate": 4.6697123946567224e-07,
319
+ "loss": 0.9867,
320
+ "step": 5200
321
+ },
322
+ {
323
+ "epoch": 5.31,
324
+ "learning_rate": 4.512851318379138e-07,
325
+ "loss": 0.9842,
326
+ "step": 5300
327
+ },
328
+ {
329
+ "epoch": 5.41,
330
+ "learning_rate": 4.3564729276891087e-07,
331
+ "loss": 1.1441,
332
+ "step": 5400
333
+ },
334
+ {
335
+ "epoch": 5.51,
336
+ "learning_rate": 4.2007321682905594e-07,
337
+ "loss": 0.9965,
338
+ "step": 5500
339
+ },
340
+ {
341
+ "epoch": 5.61,
342
+ "learning_rate": 4.0457833540978923e-07,
343
+ "loss": 0.9929,
344
+ "step": 5600
345
+ },
346
+ {
347
+ "epoch": 5.71,
348
+ "learning_rate": 3.89178001433584e-07,
349
+ "loss": 1.152,
350
+ "step": 5700
351
+ },
352
+ {
353
+ "epoch": 5.81,
354
+ "learning_rate": 3.7388747414168295e-07,
355
+ "loss": 0.8896,
356
+ "step": 5800
357
+ },
358
+ {
359
+ "epoch": 5.91,
360
+ "learning_rate": 3.5872190397465635e-07,
361
+ "loss": 1.1022,
362
+ "step": 5900
363
+ },
364
+ {
365
+ "epoch": 6.01,
366
+ "learning_rate": 3.436963175607656e-07,
367
+ "loss": 0.8925,
368
+ "step": 6000
369
+ },
370
+ {
371
+ "epoch": 6.11,
372
+ "learning_rate": 3.288256028270033e-07,
373
+ "loss": 0.9647,
374
+ "step": 6100
375
+ },
376
+ {
377
+ "epoch": 6.21,
378
+ "learning_rate": 3.141244942475647e-07,
379
+ "loss": 0.7659,
380
+ "step": 6200
381
+ },
382
+ {
383
+ "epoch": 6.31,
384
+ "learning_rate": 2.996075582443658e-07,
385
+ "loss": 1.0098,
386
+ "step": 6300
387
+ },
388
+ {
389
+ "epoch": 6.41,
390
+ "learning_rate": 2.8528917875407433e-07,
391
+ "loss": 1.0704,
392
+ "step": 6400
393
+ },
394
+ {
395
+ "epoch": 6.51,
396
+ "learning_rate": 2.711835429759539e-07,
397
+ "loss": 1.0039,
398
+ "step": 6500
399
+ },
400
+ {
401
+ "epoch": 6.61,
402
+ "learning_rate": 2.573046273146427e-07,
403
+ "loss": 0.9493,
404
+ "step": 6600
405
+ },
406
+ {
407
+ "epoch": 6.71,
408
+ "learning_rate": 2.4366618353179644e-07,
409
+ "loss": 0.9582,
410
+ "step": 6700
411
+ },
412
+ {
413
+ "epoch": 6.81,
414
+ "learning_rate": 2.30281725120316e-07,
415
+ "loss": 0.8875,
416
+ "step": 6800
417
+ },
418
+ {
419
+ "epoch": 6.91,
420
+ "learning_rate": 2.1716451391466006e-07,
421
+ "loss": 0.8132,
422
+ "step": 6900
423
+ },
424
+ {
425
+ "epoch": 7.01,
426
+ "learning_rate": 2.0432754695051136e-07,
427
+ "loss": 1.0703,
428
+ "step": 7000
429
+ },
430
+ {
431
+ "epoch": 7.11,
432
+ "learning_rate": 1.9178354358681548e-07,
433
+ "loss": 1.0679,
434
+ "step": 7100
435
+ },
436
+ {
437
+ "epoch": 7.21,
438
+ "learning_rate": 1.7954493290295309e-07,
439
+ "loss": 0.8124,
440
+ "step": 7200
441
+ },
442
+ {
443
+ "epoch": 7.31,
444
+ "learning_rate": 1.6762384138353075e-07,
445
+ "loss": 0.9779,
446
+ "step": 7300
447
+ },
448
+ {
449
+ "epoch": 7.41,
450
+ "learning_rate": 1.5603208090299496e-07,
451
+ "loss": 0.7439,
452
+ "step": 7400
453
+ },
454
+ {
455
+ "epoch": 7.52,
456
+ "learning_rate": 1.4478113702197569e-07,
457
+ "loss": 0.858,
458
+ "step": 7500
459
+ },
460
+ {
461
+ "epoch": 7.62,
462
+ "learning_rate": 1.3388215760695098e-07,
463
+ "loss": 1.0146,
464
+ "step": 7600
465
+ },
466
+ {
467
+ "epoch": 7.72,
468
+ "learning_rate": 1.2334594178451424e-07,
469
+ "loss": 0.7714,
470
+ "step": 7700
471
+ },
472
+ {
473
+ "epoch": 7.82,
474
+ "learning_rate": 1.1318292924118584e-07,
475
+ "loss": 0.8889,
476
+ "step": 7800
477
+ },
478
+ {
479
+ "epoch": 7.92,
480
+ "learning_rate": 1.0340318987937097e-07,
481
+ "loss": 1.0001,
482
+ "step": 7900
483
+ },
484
+ {
485
+ "epoch": 8.02,
486
+ "learning_rate": 9.401641383971476e-08,
487
+ "loss": 0.9567,
488
+ "step": 8000
489
+ },
490
+ {
491
+ "epoch": 8.12,
492
+ "learning_rate": 8.503190189973914e-08,
493
+ "loss": 0.834,
494
+ "step": 8100
495
+ },
496
+ {
497
+ "epoch": 8.22,
498
+ "learning_rate": 7.645855625827657e-08,
499
+ "loss": 0.9169,
500
+ "step": 8200
501
+ },
502
+ {
503
+ "epoch": 8.32,
504
+ "learning_rate": 6.830487171482935e-08,
505
+ "loss": 0.841,
506
+ "step": 8300
507
+ },
508
+ {
509
+ "epoch": 8.42,
510
+ "learning_rate": 6.057892725259717e-08,
511
+ "loss": 1.0763,
512
+ "step": 8400
513
+ },
514
+ {
515
+ "epoch": 8.52,
516
+ "learning_rate": 5.328837803351083e-08,
517
+ "loss": 0.8885,
518
+ "step": 8500
519
+ },
520
+ {
521
+ "epoch": 8.62,
522
+ "learning_rate": 4.644044781320422e-08,
523
+ "loss": 0.7986,
524
+ "step": 8600
525
+ },
526
+ {
527
+ "epoch": 8.72,
528
+ "learning_rate": 4.004192178344029e-08,
529
+ "loss": 0.701,
530
+ "step": 8700
531
+ },
532
+ {
533
+ "epoch": 8.82,
534
+ "learning_rate": 3.4099139849083304e-08,
535
+ "loss": 0.9347,
536
+ "step": 8800
537
+ },
538
+ {
539
+ "epoch": 8.92,
540
+ "learning_rate": 2.8617990346277655e-08,
541
+ "loss": 1.0685,
542
+ "step": 8900
543
+ },
544
+ {
545
+ "epoch": 9.02,
546
+ "learning_rate": 2.3603904208058688e-08,
547
+ "loss": 0.8046,
548
+ "step": 9000
549
+ },
550
+ {
551
+ "epoch": 9.12,
552
+ "learning_rate": 1.9061849583176636e-08,
553
+ "loss": 0.8515,
554
+ "step": 9100
555
+ },
556
+ {
557
+ "epoch": 9.22,
558
+ "learning_rate": 1.499632691346375e-08,
559
+ "loss": 0.8998,
560
+ "step": 9200
561
+ },
562
+ {
563
+ "epoch": 9.32,
564
+ "learning_rate": 1.1411364474624264e-08,
565
+ "loss": 0.8923,
566
+ "step": 9300
567
+ },
568
+ {
569
+ "epoch": 9.42,
570
+ "learning_rate": 8.31051438486441e-09,
571
+ "loss": 0.7543,
572
+ "step": 9400
573
+ },
574
+ {
575
+ "epoch": 9.52,
576
+ "learning_rate": 5.696849085317645e-09,
577
+ "loss": 0.883,
578
+ "step": 9500
579
+ }
580
+ ],
581
+ "max_steps": 9980,
582
+ "num_train_epochs": 10,
583
+ "total_flos": 0.0,
584
+ "trial_name": null,
585
+ "trial_params": null
586
+ }
training_args.bin β†’ checkpoint-9500/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afe3ac874b2d51fc44874710f9192cd0e67d40eba3403f25e0f6abd7d2dab16f
3
- size 2863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574e5e2a956b3e59901c3ed9848a90b98bd67a72f27a5e69b189b003d4e05fd4
3
+ size 2287
vocab.txt β†’ checkpoint-9500/vocab.txt RENAMED
File without changes
config copy.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "QAWithLSTMModel",
3
+ "architectures": [
4
+ "QAWithLSTMModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 24,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "tokenizer_class": "BertTokenizer",
23
+ "transformers_version": "4.4.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "architectures": [
3
- "klue/roberta-large"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "bos_token_id": 0,
7
- "classifier_dropout": null,
8
- "eos_token_id": 2,
9
- "gradient_checkpointing": false,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 1024,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 4096,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 514,
17
- "model_type": "roberta",
18
- "num_attention_heads": 16,
19
- "num_hidden_layers": 24,
20
- "pad_token_id": 1,
21
- "position_embedding_type": "absolute",
22
- "tokenizer_class": "BertTokenizer",
23
- "transformers_version": "4.11.3",
24
- "type_vocab_size": 1,
25
- "use_cache": true,
26
- "vocab_size": 32000
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_samples": 474,
4
+ "exact_match": 68.75,
5
+ "f1": 76.5876322751323
6
+ }
nbest_predictions.json ADDED
The diff for this file is too large to render. See raw diff
 
predictions.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mrc-0-003264": "ν•œλ³΄μ² κ°•",
3
+ "mrc-0-004762": "1871λ…„",
4
+ "mrc-1-001810": "λ‚˜λ­‡μžŽ",
5
+ "mrc-1-000219": "κΈˆλŒ€μ•Ό",
6
+ "mrc-1-000285": "μˆ˜ν‰μ  관계",
7
+ "mrc-0-005106": "쇼와 μ²œν™©μ˜ μ˜₯μŒλ°©μ†‘",
8
+ "mrc-0-002076": "μ½”μΉ­ μŠ€ν‹°μΉ˜",
9
+ "mrc-1-000414": "감염병",
10
+ "mrc-0-002875": "슀페인",
11
+ "mrc-0-003828": "20μ„ΈκΈ° 초",
12
+ "mrc-0-002778": "\"5μ›”μ˜ μ™•\"",
13
+ "mrc-0-003931": "'일급 λΉ„λ°€ ν”„λ‘œμ νŠΈ 2501'",
14
+ "mrc-0-002485": "ν…Œν—€λž€",
15
+ "mrc-0-004483": "μ—­μ‚¬κ΅μœ‘κ³Όμ •κ°œλ°œμΆ”μ§„μœ„μ›νšŒ",
16
+ "mrc-0-003032": "1967λ…„ 8μ›” 16일",
17
+ "mrc-1-000724": "1963λ…„",
18
+ "mrc-0-002138": "쿠빌라이",
19
+ "mrc-0-003727": "γ€ˆμ€‘μ•™μΌλ³΄γ€‰",
20
+ "mrc-0-003115": "이이노야 μ„±",
21
+ "mrc-0-003088": "전체 4μˆœμœ„",
22
+ "mrc-0-005296": "λ‡Œλ¬Ό",
23
+ "mrc-1-001495": "μ€€μœ΅ν•©μ„± μ²œμ—°λ‘",
24
+ "mrc-0-005289": "λ°μŠ€νƒ± μž₯κ΅°",
25
+ "mrc-0-002240": "λ°•νŠΈλ¦¬μ•„",
26
+ "mrc-0-000248": "'μ§„μ „(ι™³η”°)'이라 μƒˆκ²¨μ§„ 기와쑰각",
27
+ "mrc-0-001846": "γ€Šκ΅­κ°€γ€‹",
28
+ "mrc-1-001653": "2011λ…„ 3μ›” 19일",
29
+ "mrc-0-000785": "브라질 포λ₯΄νˆ¬κ°ˆμ–΄",
30
+ "mrc-0-001519": "λ¦Ώμ§€λ‹Ή",
31
+ "mrc-0-002457": "강도역왕",
32
+ "mrc-1-001479": "ν¬λ“œ κ·Ήμž₯",
33
+ "mrc-0-001707": "만주ꡭ 관리",
34
+ "mrc-0-002280": "γ€Žν˜‘λ™μ‘°ν•©μ— κ΄€ν•˜μ—¬γ€",
35
+ "mrc-1-001023": "도고쿠",
36
+ "mrc-0-001022": "λ£¨ν”Όμ˜ 할아버지인 λͺ½ν‚€ D. κ°€ν”„",
37
+ "mrc-0-001150": "μ‘°λ₯˜",
38
+ "mrc-0-001807": "κ΄‘μ£Όκ΅λ„μ†Œ",
39
+ "mrc-0-001161": "λ² κ²Œν‹°μš°μŠ€",
40
+ "mrc-0-004654": "κ°€μ˜€μŠ μ‹œ",
41
+ "mrc-0-002468": "망치",
42
+ "mrc-0-001870": "마리즈 ꡐ수",
43
+ "mrc-1-000753": "\"νŠΈλ‘œμ΄μ•„ λ…Έλ°”\"",
44
+ "mrc-0-001073": "ν† μš”μΌκ³Ό μΌμš”μΌ 이틀",
45
+ "mrc-1-001116": "μ˜μƒλŒ€μ‚¬",
46
+ "mrc-1-000995": "μ‹œλͺ¨ν‚€νƒ€ λ°˜λ„",
47
+ "mrc-0-002175": "β€˜μš°μœ μ˜ 바닀’",
48
+ "mrc-0-001894": "μž₯μ–΄",
49
+ "mrc-0-000337": "μ†Œλ ¨",
50
+ "mrc-0-002013": "μ „μΉ˜",
51
+ "mrc-1-001766": "ν‘μƒ‰μœ‘(黑色肉)",
52
+ "mrc-0-002333": "욱",
53
+ "mrc-1-001814": "독일 인민당",
54
+ "mrc-0-001554": "ν•˜κΈ°λ…Έ μ—­",
55
+ "mrc-0-002247": "회칠",
56
+ "mrc-0-000547": "황강닀리",
57
+ "mrc-1-001177": "직업 ꡐ윑",
58
+ "mrc-0-000939": "곡산당",
59
+ "mrc-0-000223": "음λ ₯ μ •μ›”",
60
+ "mrc-0-005386": "쑰계쒅",
61
+ "mrc-0-000118": "베이징",
62
+ "mrc-0-003529": "독일ꡰ",
63
+ "mrc-1-000459": "1932λ…„",
64
+ "mrc-0-003801": "μ œμ„œμ§€μ „(齊θ₯ΏδΉ‹ζˆ°)",
65
+ "mrc-0-004342": "μž¦μ€ μ΄μŠ¬λΉ„",
66
+ "mrc-0-001311": "μΆœμƒ μ²œκΆλ„",
67
+ "mrc-0-002931": "λ‚˜λ§μ—¬μ΄ˆ λΆˆμƒ",
68
+ "mrc-0-002767": "1939λ…„",
69
+ "mrc-1-000961": "창 절제술",
70
+ "mrc-1-000796": "도버 λ°€",
71
+ "mrc-1-000449": "μ—¬μ •ν˜„(呂正鉉)",
72
+ "mrc-0-004133": "κ΄‘λ°°",
73
+ "mrc-0-003576": "μšΈμ‚°",
74
+ "mrc-0-002692": "λ•…(은색)κ³Ό ν•˜λŠ˜(κΈˆμƒ‰)을 λ§Ίκ³  ν‘ΈλŠ” κΆŒν•œ",
75
+ "mrc-0-004899": "μ €μˆ˜μ§€",
76
+ "mrc-0-003677": "ν€˜μ΄μ»€ ꡐ도",
77
+ "mrc-1-000127": "피에λ₯΄ 였주둜",
78
+ "mrc-0-005412": "μ— λ„·λ―Έλ””μ–΄ μ§€λΆ„μ˜ 일뢀",
79
+ "mrc-0-000561": "μŠ€νƒˆλ¦°κ·ΈλΌλ“œ μ „νˆ¬",
80
+ "mrc-0-004268": "제 3자",
81
+ "mrc-1-000835": "νŽ˜μ΄νŒ”",
82
+ "mrc-0-001704": "가리타 νžˆμ‚¬λ…Έλ¦¬μ—",
83
+ "mrc-1-001132": "점수",
84
+ "mrc-0-002512": "μ½”μŠ€λͺ¨μΌ€λΌν†±μŠ€",
85
+ "mrc-0-003017": "μ‚¬μ±„νšŒμ‚¬",
86
+ "mrc-0-000215": "μŠ€μœ„μŠ€",
87
+ "mrc-0-002873": "\"κ³΅μœ μ§€μ˜ λΉ„κ·Ή\"",
88
+ "mrc-0-003118": "계급 λͺ¨μˆœ",
89
+ "mrc-1-000384": "해리 트루먼 λŒ€ν†΅λ Ή",
90
+ "mrc-0-005270": "λ¬΄μœ„νƒœμˆ˜",
91
+ "mrc-0-002981": "ν¬λΌμš΄λΌμ΄ν„°",
92
+ "mrc-1-000158": "1990λ…„",
93
+ "mrc-0-002189": "\"인간과 μš”κ΄΄μ˜ μ™„μ „ν•œ 평등\"",
94
+ "mrc-0-000905": "κ²½μ• μ™•",
95
+ "mrc-0-001198": "λ„λ„λ“œ νŠΈλŸΌν”„ λ―Έκ΅­ λŒ€ν†΅λ Ή",
96
+ "mrc-0-003947": "λ°”λ₯΄ν†¨λ‘œλ©”μ˜€ λΈŒλ€ΌκΈ°μ—λ₯΄ μ‹ λΆ€",
97
+ "mrc-1-001328": "유ꡐ",
98
+ "mrc-0-000166": "κ²½μœ„λŒ€μ‹ 망원경",
99
+ "mrc-0-004090": "λ‚¨νμŠˆ μžλ™μ°¨ μ „μš© λ„λ‘œ",
100
+ "mrc-0-003522": "세싀리아 페인",
101
+ "mrc-1-001398": "μ›Ή 2.0",
102
+ "mrc-0-000355": "μˆ˜λ…€",
103
+ "mrc-0-002906": "λ§¨ν•΄νŠΌ",
104
+ "mrc-0-001590": "μƒλŒ€μ„±μ΄λ‘ ",
105
+ "mrc-1-001522": "λŒ€μ œ μ†κΆŒ",
106
+ "mrc-0-004307": "λ§ˆκ·Έλ„¨ν‹°μš°μŠ€",
107
+ "mrc-0-002471": "μ œμž„μŠ€ λ·°μΊλ„Œ",
108
+ "mrc-1-001313": "트렁크",
109
+ "mrc-0-004083": "데코행진",
110
+ "mrc-0-005186": "μ•½ 600λ…„",
111
+ "mrc-0-004197": "κΉƒν„Έ μ…”ν‹€μ½•μ˜ 타ꡬ감을 μ„ ν˜Έν•˜κ³ , λ˜ν•œ ν”ŒλΌμŠ€ν‹±λ³΄λ‹€ κΉƒν„Έ 셔틀콕이 μ •κ΅ν•œ μ»¨νŠΈλ‘€μ„ ν•˜κΈ°μ— 보닀 더 μ ν•©ν•˜κΈ° λ•Œλ¬Έ",
112
+ "mrc-0-003208": "1998λ…„",
113
+ "mrc-1-000297": "λ“œλΌ",
114
+ "mrc-1-000358": "κ°€λ₯΄λ―ΈμŠˆνŒŒλ₯΄ν…ν‚€λ₯΄ν—¨",
115
+ "mrc-1-001785": "μ •νƒœμ μΈ 자본주의",
116
+ "mrc-0-005042": "λ°λ―Έμ•ˆ",
117
+ "mrc-1-000839": "λͺ©",
118
+ "mrc-0-004677": "34자",
119
+ "mrc-0-003564": "ν˜Όλ¬˜μ§€",
120
+ "mrc-0-004202": "λ°”λ₯΄λ°”λ‘œμ‚¬ μž‘μ „",
121
+ "mrc-0-001486": "동계건쑰(wintertrocken)",
122
+ "mrc-1-000291": "이질",
123
+ "mrc-0-003753": "쀑간 생���물",
124
+ "mrc-0-003033": "귀인 평ν–₯κ³Ό ν–‰λ™μ˜ 상관 관계",
125
+ "mrc-0-005478": "μ „λ₯˜",
126
+ "mrc-0-005155": "기독ꡐ",
127
+ "mrc-1-000516": "1945λ…„",
128
+ "mrc-0-002679": "λΈŒλ¦¬νŠΌμΈλ“€",
129
+ "mrc-0-004527": "λ―Έκ΅­",
130
+ "mrc-0-001980": "8기의 수혈유ꡬ, 1기",
131
+ "mrc-0-004495": "μ „μŸ",
132
+ "mrc-1-000037": "λ‡Œμ‘Έμ€‘",
133
+ "mrc-0-004092": "κΉ€μˆ˜ν™˜ μΆ”κΈ°κ²½",
134
+ "mrc-0-004015": "4λ…„ μ—°μƒμ˜ κΉ€μ˜₯μ„±(ι‡‘ηŽ‰θ²)κ³Ό κ²°ν˜Όν–ˆλ‹€. 이후 κ·ΈλŠ” 60λ…„",
135
+ "mrc-0-000707": "νŽœμ‹€λ² μ΄λ‹ˆμ•„κΈ°",
136
+ "mrc-1-000658": "μ„œλ‚¨μͺ½",
137
+ "mrc-1-001446": "ν™”μ΄νŠΈ 베이슀",
138
+ "mrc-0-000412": "κ΅μˆ˜ν˜•",
139
+ "mrc-0-001288": "곽상",
140
+ "mrc-1-000197": "신도 케이",
141
+ "mrc-0-002253": "λ””μ˜€κ²Œλ„€μŠ€",
142
+ "mrc-1-000367": "λΉ„λ°€ νˆ¬ν‘œ",
143
+ "mrc-0-003906": "111ν™”",
144
+ "mrc-0-001254": "μΉ΄λˆ„",
145
+ "mrc-0-003289": "μ•Œν”„μŠ€ μ „κΈ° μ£Όμ‹νšŒμ‚¬",
146
+ "mrc-0-004435": "ν¬λ¦¬μŠ€νƒ€",
147
+ "mrc-0-003844": "λ°°μƒκΈˆ",
148
+ "mrc-0-005355": "β€œμ–΄λ–€ κ²½μš°μ—λ„ ν–‰ν•  μˆ˜β€",
149
+ "mrc-0-000521": "속도별 배치",
150
+ "mrc-1-000066": "1975λ…„",
151
+ "mrc-0-001952": "μ œλΉ„μ΄ˆλ¦¬ν˜•",
152
+ "mrc-0-002462": "석달",
153
+ "mrc-1-000899": "보은ꡰ 남μͺ½ μƒμš©λ¦¬(ν˜„ μ˜λ™κ΅° μš©μ‚°λ©΄ μƒμš©λ¦¬) λ§ˆμ„",
154
+ "mrc-0-002886": "νƒœν™”κ΄€",
155
+ "mrc-0-004454": "κ±°λŒ€ 고래",
156
+ "mrc-0-001646": "ν•˜λ…Έμ΄",
157
+ "mrc-1-001534": "토끼λ₯Ό 보고도 κ·Έλƒ₯ 뛰어갔기에",
158
+ "mrc-1-000918": "μ’Œμš°λŒ€μΉ­",
159
+ "mrc-0-005046": "1913λ…„",
160
+ "mrc-1-001611": "λΌμžκ·Έλ¦¬ν•˜",
161
+ "mrc-0-005222": "λŒ€λ Ή",
162
+ "mrc-0-004879": "κ±Έν”„ μΉ΄λ₯΄ν…”",
163
+ "mrc-1-001481": "ν”Όν„° μΌ€μŠ€μΉ΄νŠΈ μ™“μŠ¨",
164
+ "mrc-0-003637": "좩격파",
165
+ "mrc-0-003882": "λŒ€ν•œλ―Όκ΅­ 3보병사단",
166
+ "mrc-1-001285": "ν† λ¨ΈμŠ€ λ°”ν΄λ ˆμ΄",
167
+ "mrc-0-005109": "감정 μ „μ—Ό",
168
+ "mrc-1-001455": "μŠ€μœ„μΉ˜ 이더넷",
169
+ "mrc-0-001274": "μ†ŒλΉ„κ°μ†Œ",
170
+ "mrc-1-000855": "λ§ˆμΈ ν‚€ 촌",
171
+ "mrc-0-003587": "ν™˜κ°, 망상, 사고μž₯μ• ",
172
+ "mrc-0-005031": "μ„Έν¬μ§ˆ",
173
+ "mrc-0-005154": "μ•„μΌ€λ””μ•„",
174
+ "mrc-0-000477": "κ³Όμ‹œν•˜λŠ” λ“―ν•œ 신앙심",
175
+ "mrc-0-002513": "μ†ŒλΉ„μžλ“€μ˜ λ°˜μ‘",
176
+ "mrc-0-003022": "골룸바노",
177
+ "mrc-1-000946": "λ‹€μΌ€λ‹€",
178
+ "mrc-0-001240": "μ•…λ‹Ή",
179
+ "mrc-0-001110": "μœ μ΄μ—”",
180
+ "mrc-0-003149": "ν”Όλ„€μŠ€ 리먼",
181
+ "mrc-0-004132": "포슀카λ₯΄λ„€νŠΈ",
182
+ "mrc-0-005300": "면세증",
183
+ "mrc-0-004203": "μˆ™μ˜ 정씨",
184
+ "mrc-0-003566": "λ‚¨μ„±μ—°λŒ€ ν™ˆνŽ˜μ΄μ§€",
185
+ "mrc-0-002926": "1895λ…„",
186
+ "mrc-0-000536": "γ€Šμ²œμ²΄μ˜ νšŒμ „μ— κ΄€ν•˜μ—¬γ€‹",
187
+ "mrc-0-003848": "νžˆλ‘œμ‹œλ§ˆ μ’…ν•© ꡬμž₯μ—μ„œ μ—΄λ¦° νžˆλ‘œμ‹œλ§ˆ λ„μš” μΉ΄ν”„μ™€μ˜ κ²½κΈ°μ—μ„œ μž‘μ€ λ…ΌμŸμœΌλ‘œ 인해 ν˜„μ§€μ˜ νžˆλ‘œμ‹œλ§ˆ",
188
+ "mrc-0-003760": "5Β·16 κ΄‘μž₯(μ—¬μ˜λ„ κ΄‘μž₯)",
189
+ "mrc-0-001989": "6m",
190
+ "mrc-0-004863": "μ΄λ“±κ³΅μ†‘λ•λΉ„κ±΄μ˜μ†Œ",
191
+ "mrc-1-000714": "ν—¬λ¦¬μ˜€μ‹œμŠ€",
192
+ "mrc-0-003211": "ν• λ¨Έλ‹ˆ",
193
+ "mrc-1-001173": "탄광 ν™˜κ²½ 문제",
194
+ "mrc-0-005360": "1796λ…„",
195
+ "mrc-1-000096": "ν”Όμžν—›",
196
+ "mrc-0-003467": "κ°€μ‚°",
197
+ "mrc-1-000593": "마거릿 λŒ€μ²˜",
198
+ "mrc-0-001001": "λ””μ§€ν„Έ 컴퓨터",
199
+ "mrc-1-001567": "35λ…„κ°„",
200
+ "mrc-1-000132": "문치미λ₯΄",
201
+ "mrc-0-000787": "1947λ…„",
202
+ "mrc-0-001494": "μ•„μ΄μΉ˜ μ „κΈ° μ „μš©μ„ ",
203
+ "mrc-0-003146": "'달빛 정원'(Moonlight Garden)",
204
+ "mrc-0-004411": "λž€λ‹€μš° 튜브",
205
+ "mrc-1-001622": "우천",
206
+ "mrc-0-002887": "οΌœμ Šμ€ κ·Όμœ„λŒ€οΌž",
207
+ "mrc-0-003437": "10λ§Œν•„",
208
+ "mrc-1-001121": "κΈ°μ‘΄ λŠ₯λ ₯ μ„±λŠ₯ ν…ŒμŠ€νŠΈλ₯Ό 기반으둜 개발되며, 미래 λŠ₯λ ₯ 결정은 κ°€λŠ₯ν•œ λ‹€μ–‘ν•œ 미래 μ‹œλ‚˜λ¦¬μ˜€μ˜ 도전을 ν•΄κ²°ν•  μˆ˜μžˆλŠ” κ΅°λŒ€",
209
+ "mrc-0-005286": "μ°¨κ³ λ‚˜λΌ μ „νˆ¬",
210
+ "mrc-1-000295": "λ΄…μŠ¬λ ˆμ΄",
211
+ "mrc-0-000159": "일등병 ν† λ¨ΈμŠ€",
212
+ "mrc-0-004725": "1966λ…„",
213
+ "mrc-0-004837": "클레λͺ½νŠΈ",
214
+ "mrc-1-000507": "17μ„ΈκΈ° μ‘°μ„  μ‹œλŒ€",
215
+ "mrc-0-001719": "μ°½",
216
+ "mrc-0-001253": "μˆ˜μž… κΈˆμ§€ 쑰치",
217
+ "mrc-1-001270": "\"κ΄‘νœ˜μ—μ˜ 각성\"",
218
+ "mrc-0-005324": "μΉ΄λ°”λ ˆλ‚˜ 클럽",
219
+ "mrc-0-005105": "보수 μ„±ν–₯",
220
+ "mrc-1-000983": "리퍼",
221
+ "mrc-0-002718": "ꡭ무회의",
222
+ "mrc-0-001552": "비단",
223
+ "mrc-0-003752": "μ‚¬ν˜κ°„",
224
+ "mrc-0-004530": "λŒ€κ³Ό κΈ‰μ œμž",
225
+ "mrc-0-003057": "μ˜€μ‚¬μΉ΄ μ‹œμ˜ μ§€ν•˜μ² ",
226
+ "mrc-0-003850": "νƒœμ‘° 왕건",
227
+ "mrc-0-003262": "맘루크λ₯Ό λͺ°μ•„λ‚΄κΈ° μœ„ν•œ μ‹­μžκ΅° 섀ꡐλ₯Ό μ§€μ‹œν–ˆλ‹€. κ·Έ κ²°κ³Ό 1309λ…„ 7μ›” 아비뇽 μ„±λ¬Έ μ•žμ— 일λͺ… 빈자의 μ‹­μžκ΅°",
228
+ "mrc-0-001650": "λ©•μ‹œμ½”",
229
+ "mrc-0-004662": "μ§€μ§ˆ κ³Όμ‚°ν™” 생성물",
230
+ "mrc-0-003759": "독감",
231
+ "mrc-1-001279": "μ–‘λ‹Ή(ζ₯Šι»¨)",
232
+ "mrc-0-001960": "μŠˆλ°”μ΄μΈ  μ•” 쑴탁",
233
+ "mrc-0-001162": "μ•¨λ¦¬μŠ€ νŽ˜μ–΄μ‚΄ 슀미슀",
234
+ "mrc-0-004565": "λ‹€μˆ˜κ²°",
235
+ "mrc-0-000754": "μ½”ν”„λ¨Όκ³Ό μ €μŠ€ν‹°μŠ¨",
236
+ "mrc-1-000024": "물적 μ„±κ³Ό",
237
+ "mrc-0-000484": "제 μ–‘μ™•",
238
+ "mrc-0-002095": "'일곱 개의 μ‹ μ „ οΏ½οΏ½οΏ½μž₯'",
239
+ "mrc-0-003083": "윀치호",
240
+ "mrc-0-002978": "200,000λͺ…",
241
+ "mrc-1-000622": "μ˜€κ·€μŠ€νŠΈ λΈ”λž‘ν‚€(Auguste Blanqui)의 λΉ„λ°€κ²°μ‚¬μ£Όμ˜"
242
+ }
pytorch_model copy.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a336762ed49849ec3f97bd08769e616293835b98b9041cb9e15b4bc1b38cd2
3
+ size 1413985490
special_tokens_map copy.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config copy.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "never_split": null, "bos_token": "[CLS]", "eos_token": "[SEP]", "model_max_length": 512, "tokenizer_class": "BertTokenizer", "special_tokens_map_file": "/opt/ml/.cache/huggingface/transformers/1a24ab4628028ed80dea35ce3334a636dc656fd9a17a09bad377f88f0cbecdac.70c17d6e4d492c8f24f5bb97ab56c7f272e947112c6faf9dd846da42ba13eb23", "name_or_path": "klue/roberta-large"}
train_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "init_mem_cpu_alloc_delta": 17783119,
4
+ "init_mem_cpu_peaked_delta": 18230,
5
+ "init_mem_gpu_alloc_delta": 1414861824,
6
+ "init_mem_gpu_peaked_delta": 67165696,
7
+ "train_mem_cpu_alloc_delta": 2991504,
8
+ "train_mem_cpu_peaked_delta": 264499229,
9
+ "train_mem_gpu_alloc_delta": 4256099328,
10
+ "train_mem_gpu_peaked_delta": 11330810880,
11
+ "train_runtime": 8323.4424,
12
+ "train_samples": 7978,
13
+ "train_samples_per_second": 1.199
14
+ }
train_results.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch = 10.0
2
+ init_mem_cpu_alloc_delta = 17783119
3
+ init_mem_cpu_peaked_delta = 18230
4
+ init_mem_gpu_alloc_delta = 1414861824
5
+ init_mem_gpu_peaked_delta = 67165696
6
+ train_mem_cpu_alloc_delta = 2991504
7
+ train_mem_cpu_peaked_delta = 264499229
8
+ train_mem_gpu_alloc_delta = 4256099328
9
+ train_mem_gpu_peaked_delta = 11330810880
10
+ train_runtime = 8323.4424
11
+ train_samples = 7978
12
+ train_samples_per_second = 1.199
trainer_state copy.json ADDED
@@ -0,0 +1,617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 9980,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "learning_rate": 9.997522904167843e-07,
13
+ "loss": 26.4633,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.2,
18
+ "learning_rate": 9.990094071072878e-07,
19
+ "loss": 9.8101,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.3,
24
+ "learning_rate": 9.977720861487699e-07,
25
+ "loss": 5.8569,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.4,
30
+ "learning_rate": 9.96041553526267e-07,
31
+ "loss": 5.5213,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.5,
36
+ "learning_rate": 9.938195239178374e-07,
37
+ "loss": 5.1344,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.6,
42
+ "learning_rate": 9.911081989955939e-07,
43
+ "loss": 4.9245,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 0.7,
48
+ "learning_rate": 9.879102652442023e-07,
49
+ "loss": 4.5925,
50
+ "step": 700
51
+ },
52
+ {
53
+ "epoch": 0.8,
54
+ "learning_rate": 9.842288912990095e-07,
55
+ "loss": 4.676,
56
+ "step": 800
57
+ },
58
+ {
59
+ "epoch": 0.9,
60
+ "learning_rate": 9.80067724806438e-07,
61
+ "loss": 4.4576,
62
+ "step": 900
63
+ },
64
+ {
65
+ "epoch": 1.0,
66
+ "learning_rate": 9.754308888097582e-07,
67
+ "loss": 4.2986,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 1.1,
72
+ "learning_rate": 9.703229776638185e-07,
73
+ "loss": 3.8739,
74
+ "step": 1100
75
+ },
76
+ {
77
+ "epoch": 1.2,
78
+ "learning_rate": 9.647490524827833e-07,
79
+ "loss": 3.5629,
80
+ "step": 1200
81
+ },
82
+ {
83
+ "epoch": 1.3,
84
+ "learning_rate": 9.587146361253867e-07,
85
+ "loss": 2.9575,
86
+ "step": 1300
87
+ },
88
+ {
89
+ "epoch": 1.4,
90
+ "learning_rate": 9.522257077226716e-07,
91
+ "loss": 2.8326,
92
+ "step": 1400
93
+ },
94
+ {
95
+ "epoch": 1.5,
96
+ "learning_rate": 9.452886967536388e-07,
97
+ "loss": 2.7891,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 1.6,
102
+ "learning_rate": 9.379104766746722e-07,
103
+ "loss": 2.4971,
104
+ "step": 1600
105
+ },
106
+ {
107
+ "epoch": 1.7,
108
+ "learning_rate": 9.30098358109054e-07,
109
+ "loss": 2.4297,
110
+ "step": 1700
111
+ },
112
+ {
113
+ "epoch": 1.8,
114
+ "learning_rate": 9.218600816033199e-07,
115
+ "loss": 2.183,
116
+ "step": 1800
117
+ },
118
+ {
119
+ "epoch": 1.9,
120
+ "learning_rate": 9.132038099576289e-07,
121
+ "loss": 2.0732,
122
+ "step": 1900
123
+ },
124
+ {
125
+ "epoch": 2.0,
126
+ "learning_rate": 9.041381201377467e-07,
127
+ "loss": 2.2732,
128
+ "step": 2000
129
+ },
130
+ {
131
+ "epoch": 2.1,
132
+ "learning_rate": 8.94671994776661e-07,
133
+ "loss": 1.7384,
134
+ "step": 2100
135
+ },
136
+ {
137
+ "epoch": 2.2,
138
+ "learning_rate": 8.84814813274243e-07,
139
+ "loss": 1.8955,
140
+ "step": 2200
141
+ },
142
+ {
143
+ "epoch": 2.3,
144
+ "learning_rate": 8.745763425037795e-07,
145
+ "loss": 2.0546,
146
+ "step": 2300
147
+ },
148
+ {
149
+ "epoch": 2.4,
150
+ "learning_rate": 8.639667271345798e-07,
151
+ "loss": 1.6855,
152
+ "step": 2400
153
+ },
154
+ {
155
+ "epoch": 2.51,
156
+ "learning_rate": 8.529964795802484e-07,
157
+ "loss": 1.7512,
158
+ "step": 2500
159
+ },
160
+ {
161
+ "epoch": 2.61,
162
+ "learning_rate": 8.416764695825834e-07,
163
+ "loss": 1.7856,
164
+ "step": 2600
165
+ },
166
+ {
167
+ "epoch": 2.71,
168
+ "learning_rate": 8.300179134414187e-07,
169
+ "loss": 1.7027,
170
+ "step": 2700
171
+ },
172
+ {
173
+ "epoch": 2.81,
174
+ "learning_rate": 8.180323629010848e-07,
175
+ "loss": 1.5552,
176
+ "step": 2800
177
+ },
178
+ {
179
+ "epoch": 2.91,
180
+ "learning_rate": 8.057316937044976e-07,
181
+ "loss": 1.6427,
182
+ "step": 2900
183
+ },
184
+ {
185
+ "epoch": 3.01,
186
+ "learning_rate": 7.931280938262168e-07,
187
+ "loss": 1.7996,
188
+ "step": 3000
189
+ },
190
+ {
191
+ "epoch": 3.11,
192
+ "learning_rate": 7.802340513961341e-07,
193
+ "loss": 1.4622,
194
+ "step": 3100
195
+ },
196
+ {
197
+ "epoch": 3.21,
198
+ "learning_rate": 7.670623423257547e-07,
199
+ "loss": 1.4137,
200
+ "step": 3200
201
+ },
202
+ {
203
+ "epoch": 3.31,
204
+ "learning_rate": 7.536260176493347e-07,
205
+ "loss": 1.4894,
206
+ "step": 3300
207
+ },
208
+ {
209
+ "epoch": 3.41,
210
+ "learning_rate": 7.399383905924165e-07,
211
+ "loss": 1.4263,
212
+ "step": 3400
213
+ },
214
+ {
215
+ "epoch": 3.51,
216
+ "learning_rate": 7.26013023380574e-07,
217
+ "loss": 1.491,
218
+ "step": 3500
219
+ },
220
+ {
221
+ "epoch": 3.61,
222
+ "learning_rate": 7.118637138014395e-07,
223
+ "loss": 1.2858,
224
+ "step": 3600
225
+ },
226
+ {
227
+ "epoch": 3.71,
228
+ "learning_rate": 6.975044815333281e-07,
229
+ "loss": 1.2851,
230
+ "step": 3700
231
+ },
232
+ {
233
+ "epoch": 3.81,
234
+ "learning_rate": 6.829495542540013e-07,
235
+ "loss": 1.3229,
236
+ "step": 3800
237
+ },
238
+ {
239
+ "epoch": 3.91,
240
+ "learning_rate": 6.682133535433393e-07,
241
+ "loss": 1.3675,
242
+ "step": 3900
243
+ },
244
+ {
245
+ "epoch": 4.01,
246
+ "learning_rate": 6.533104805938873e-07,
247
+ "loss": 1.3686,
248
+ "step": 4000
249
+ },
250
+ {
251
+ "epoch": 4.11,
252
+ "learning_rate": 6.382557017434331e-07,
253
+ "loss": 1.0781,
254
+ "step": 4100
255
+ },
256
+ {
257
+ "epoch": 4.21,
258
+ "learning_rate": 6.230639338439549e-07,
259
+ "loss": 1.1388,
260
+ "step": 4200
261
+ },
262
+ {
263
+ "epoch": 4.31,
264
+ "learning_rate": 6.077502294814311e-07,
265
+ "loss": 1.0966,
266
+ "step": 4300
267
+ },
268
+ {
269
+ "epoch": 4.41,
270
+ "learning_rate": 5.923297620611622e-07,
271
+ "loss": 1.1764,
272
+ "step": 4400
273
+ },
274
+ {
275
+ "epoch": 4.51,
276
+ "learning_rate": 5.76817810773379e-07,
277
+ "loss": 1.2625,
278
+ "step": 4500
279
+ },
280
+ {
281
+ "epoch": 4.61,
282
+ "learning_rate": 5.612297454540351e-07,
283
+ "loss": 1.1718,
284
+ "step": 4600
285
+ },
286
+ {
287
+ "epoch": 4.71,
288
+ "learning_rate": 5.455810113557839e-07,
289
+ "loss": 1.218,
290
+ "step": 4700
291
+ },
292
+ {
293
+ "epoch": 4.81,
294
+ "learning_rate": 5.298871138442307e-07,
295
+ "loss": 1.2203,
296
+ "step": 4800
297
+ },
298
+ {
299
+ "epoch": 4.91,
300
+ "learning_rate": 5.14163603034622e-07,
301
+ "loss": 1.0743,
302
+ "step": 4900
303
+ },
304
+ {
305
+ "epoch": 5.01,
306
+ "learning_rate": 4.984260583841952e-07,
307
+ "loss": 1.1825,
308
+ "step": 5000
309
+ },
310
+ {
311
+ "epoch": 5.11,
312
+ "learning_rate": 4.82690073255455e-07,
313
+ "loss": 0.8899,
314
+ "step": 5100
315
+ },
316
+ {
317
+ "epoch": 5.21,
318
+ "learning_rate": 4.6697123946567224e-07,
319
+ "loss": 0.9867,
320
+ "step": 5200
321
+ },
322
+ {
323
+ "epoch": 5.31,
324
+ "learning_rate": 4.512851318379138e-07,
325
+ "loss": 0.9842,
326
+ "step": 5300
327
+ },
328
+ {
329
+ "epoch": 5.41,
330
+ "learning_rate": 4.3564729276891087e-07,
331
+ "loss": 1.1441,
332
+ "step": 5400
333
+ },
334
+ {
335
+ "epoch": 5.51,
336
+ "learning_rate": 4.2007321682905594e-07,
337
+ "loss": 0.9965,
338
+ "step": 5500
339
+ },
340
+ {
341
+ "epoch": 5.61,
342
+ "learning_rate": 4.0457833540978923e-07,
343
+ "loss": 0.9929,
344
+ "step": 5600
345
+ },
346
+ {
347
+ "epoch": 5.71,
348
+ "learning_rate": 3.89178001433584e-07,
349
+ "loss": 1.152,
350
+ "step": 5700
351
+ },
352
+ {
353
+ "epoch": 5.81,
354
+ "learning_rate": 3.7388747414168295e-07,
355
+ "loss": 0.8896,
356
+ "step": 5800
357
+ },
358
+ {
359
+ "epoch": 5.91,
360
+ "learning_rate": 3.5872190397465635e-07,
361
+ "loss": 1.1022,
362
+ "step": 5900
363
+ },
364
+ {
365
+ "epoch": 6.01,
366
+ "learning_rate": 3.436963175607656e-07,
367
+ "loss": 0.8925,
368
+ "step": 6000
369
+ },
370
+ {
371
+ "epoch": 6.11,
372
+ "learning_rate": 3.288256028270033e-07,
373
+ "loss": 0.9647,
374
+ "step": 6100
375
+ },
376
+ {
377
+ "epoch": 6.21,
378
+ "learning_rate": 3.141244942475647e-07,
379
+ "loss": 0.7659,
380
+ "step": 6200
381
+ },
382
+ {
383
+ "epoch": 6.31,
384
+ "learning_rate": 2.996075582443658e-07,
385
+ "loss": 1.0098,
386
+ "step": 6300
387
+ },
388
+ {
389
+ "epoch": 6.41,
390
+ "learning_rate": 2.8528917875407433e-07,
391
+ "loss": 1.0704,
392
+ "step": 6400
393
+ },
394
+ {
395
+ "epoch": 6.51,
396
+ "learning_rate": 2.711835429759539e-07,
397
+ "loss": 1.0039,
398
+ "step": 6500
399
+ },
400
+ {
401
+ "epoch": 6.61,
402
+ "learning_rate": 2.573046273146427e-07,
403
+ "loss": 0.9493,
404
+ "step": 6600
405
+ },
406
+ {
407
+ "epoch": 6.71,
408
+ "learning_rate": 2.4366618353179644e-07,
409
+ "loss": 0.9582,
410
+ "step": 6700
411
+ },
412
+ {
413
+ "epoch": 6.81,
414
+ "learning_rate": 2.30281725120316e-07,
415
+ "loss": 0.8875,
416
+ "step": 6800
417
+ },
418
+ {
419
+ "epoch": 6.91,
420
+ "learning_rate": 2.1716451391466006e-07,
421
+ "loss": 0.8132,
422
+ "step": 6900
423
+ },
424
+ {
425
+ "epoch": 7.01,
426
+ "learning_rate": 2.0432754695051136e-07,
427
+ "loss": 1.0703,
428
+ "step": 7000
429
+ },
430
+ {
431
+ "epoch": 7.11,
432
+ "learning_rate": 1.9178354358681548e-07,
433
+ "loss": 1.0679,
434
+ "step": 7100
435
+ },
436
+ {
437
+ "epoch": 7.21,
438
+ "learning_rate": 1.7954493290295309e-07,
439
+ "loss": 0.8124,
440
+ "step": 7200
441
+ },
442
+ {
443
+ "epoch": 7.31,
444
+ "learning_rate": 1.6762384138353075e-07,
445
+ "loss": 0.9779,
446
+ "step": 7300
447
+ },
448
+ {
449
+ "epoch": 7.41,
450
+ "learning_rate": 1.5603208090299496e-07,
451
+ "loss": 0.7439,
452
+ "step": 7400
453
+ },
454
+ {
455
+ "epoch": 7.52,
456
+ "learning_rate": 1.4478113702197569e-07,
457
+ "loss": 0.858,
458
+ "step": 7500
459
+ },
460
+ {
461
+ "epoch": 7.62,
462
+ "learning_rate": 1.3388215760695098e-07,
463
+ "loss": 1.0146,
464
+ "step": 7600
465
+ },
466
+ {
467
+ "epoch": 7.72,
468
+ "learning_rate": 1.2334594178451424e-07,
469
+ "loss": 0.7714,
470
+ "step": 7700
471
+ },
472
+ {
473
+ "epoch": 7.82,
474
+ "learning_rate": 1.1318292924118584e-07,
475
+ "loss": 0.8889,
476
+ "step": 7800
477
+ },
478
+ {
479
+ "epoch": 7.92,
480
+ "learning_rate": 1.0340318987937097e-07,
481
+ "loss": 1.0001,
482
+ "step": 7900
483
+ },
484
+ {
485
+ "epoch": 8.02,
486
+ "learning_rate": 9.401641383971476e-08,
487
+ "loss": 0.9567,
488
+ "step": 8000
489
+ },
490
+ {
491
+ "epoch": 8.12,
492
+ "learning_rate": 8.503190189973914e-08,
493
+ "loss": 0.834,
494
+ "step": 8100
495
+ },
496
+ {
497
+ "epoch": 8.22,
498
+ "learning_rate": 7.645855625827657e-08,
499
+ "loss": 0.9169,
500
+ "step": 8200
501
+ },
502
+ {
503
+ "epoch": 8.32,
504
+ "learning_rate": 6.830487171482935e-08,
505
+ "loss": 0.841,
506
+ "step": 8300
507
+ },
508
+ {
509
+ "epoch": 8.42,
510
+ "learning_rate": 6.057892725259717e-08,
511
+ "loss": 1.0763,
512
+ "step": 8400
513
+ },
514
+ {
515
+ "epoch": 8.52,
516
+ "learning_rate": 5.328837803351083e-08,
517
+ "loss": 0.8885,
518
+ "step": 8500
519
+ },
520
+ {
521
+ "epoch": 8.62,
522
+ "learning_rate": 4.644044781320422e-08,
523
+ "loss": 0.7986,
524
+ "step": 8600
525
+ },
526
+ {
527
+ "epoch": 8.72,
528
+ "learning_rate": 4.004192178344029e-08,
529
+ "loss": 0.701,
530
+ "step": 8700
531
+ },
532
+ {
533
+ "epoch": 8.82,
534
+ "learning_rate": 3.4099139849083304e-08,
535
+ "loss": 0.9347,
536
+ "step": 8800
537
+ },
538
+ {
539
+ "epoch": 8.92,
540
+ "learning_rate": 2.8617990346277655e-08,
541
+ "loss": 1.0685,
542
+ "step": 8900
543
+ },
544
+ {
545
+ "epoch": 9.02,
546
+ "learning_rate": 2.3603904208058688e-08,
547
+ "loss": 0.8046,
548
+ "step": 9000
549
+ },
550
+ {
551
+ "epoch": 9.12,
552
+ "learning_rate": 1.9061849583176636e-08,
553
+ "loss": 0.8515,
554
+ "step": 9100
555
+ },
556
+ {
557
+ "epoch": 9.22,
558
+ "learning_rate": 1.499632691346375e-08,
559
+ "loss": 0.8998,
560
+ "step": 9200
561
+ },
562
+ {
563
+ "epoch": 9.32,
564
+ "learning_rate": 1.1411364474624264e-08,
565
+ "loss": 0.8923,
566
+ "step": 9300
567
+ },
568
+ {
569
+ "epoch": 9.42,
570
+ "learning_rate": 8.31051438486441e-09,
571
+ "loss": 0.7543,
572
+ "step": 9400
573
+ },
574
+ {
575
+ "epoch": 9.52,
576
+ "learning_rate": 5.696849085317645e-09,
577
+ "loss": 0.883,
578
+ "step": 9500
579
+ },
580
+ {
581
+ "epoch": 9.62,
582
+ "learning_rate": 3.5729582957520486e-09,
583
+ "loss": 0.8342,
584
+ "step": 9600
585
+ },
586
+ {
587
+ "epoch": 9.72,
588
+ "learning_rate": 1.9409464485766746e-09,
589
+ "loss": 0.9553,
590
+ "step": 9700
591
+ },
592
+ {
593
+ "epoch": 9.82,
594
+ "learning_rate": 8.024306036893968e-10,
595
+ "loss": 0.9193,
596
+ "step": 9800
597
+ },
598
+ {
599
+ "epoch": 9.92,
600
+ "learning_rate": 1.5853884623195922e-10,
601
+ "loss": 0.8958,
602
+ "step": 9900
603
+ },
604
+ {
605
+ "epoch": 10.0,
606
+ "step": 9980,
607
+ "total_flos": 0.0,
608
+ "train_runtime": 8323.4424,
609
+ "train_samples_per_second": 1.199
610
+ }
611
+ ],
612
+ "max_steps": 9980,
613
+ "num_train_epochs": 10,
614
+ "total_flos": 0.0,
615
+ "trial_name": null,
616
+ "trial_params": null
617
+ }
trainer_state.json DELETED
@@ -1,130 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 9.519038076152304,
5
- "global_step": 9500,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.5,
12
- "learning_rate": 9.498997995991983e-07,
13
- "loss": 10.6313,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 1.0,
18
- "learning_rate": 8.997995991983968e-07,
19
- "loss": 5.4942,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 1.5,
24
- "learning_rate": 8.496993987975952e-07,
25
- "loss": 3.0541,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 2.0,
30
- "learning_rate": 7.995991983967935e-07,
31
- "loss": 2.0122,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 2.51,
36
- "learning_rate": 7.494989979959919e-07,
37
- "loss": 1.6182,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 3.01,
42
- "learning_rate": 6.993987975951904e-07,
43
- "loss": 1.6316,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 3.51,
48
- "learning_rate": 6.492985971943887e-07,
49
- "loss": 1.4137,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 4.01,
54
- "learning_rate": 5.991983967935872e-07,
55
- "loss": 1.2858,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 4.51,
60
- "learning_rate": 5.490981963927856e-07,
61
- "loss": 1.1587,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 5.01,
66
- "learning_rate": 4.989979959919839e-07,
67
- "loss": 1.1424,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 5.51,
72
- "learning_rate": 4.4889779559118236e-07,
73
- "loss": 0.9812,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 6.01,
78
- "learning_rate": 3.987975951903808e-07,
79
- "loss": 1.0498,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 6.51,
84
- "learning_rate": 3.4869739478957914e-07,
85
- "loss": 0.9131,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 7.01,
90
- "learning_rate": 2.9859719438877756e-07,
91
- "loss": 0.9897,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 7.52,
96
- "learning_rate": 2.4849699398797593e-07,
97
- "loss": 0.8315,
98
- "step": 7500
99
- },
100
- {
101
- "epoch": 8.02,
102
- "learning_rate": 1.9839679358717435e-07,
103
- "loss": 0.9413,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 8.52,
108
- "learning_rate": 1.4829659318637274e-07,
109
- "loss": 0.9108,
110
- "step": 8500
111
- },
112
- {
113
- "epoch": 9.02,
114
- "learning_rate": 9.819639278557115e-08,
115
- "loss": 0.7817,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 9.52,
120
- "learning_rate": 4.809619238476954e-08,
121
- "loss": 0.7999,
122
- "step": 9500
123
- }
124
- ],
125
- "max_steps": 9980,
126
- "num_train_epochs": 10,
127
- "total_flos": 0.0,
128
- "trial_name": null,
129
- "trial_params": null
130
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rng_state.pth β†’ training_args copy.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a2d729fdcde9734ee5b1b7d1c85b821418dfebfff8b57f99daf6a35b9e78ee3
3
- size 14657
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574e5e2a956b3e59901c3ed9848a90b98bd67a72f27a5e69b189b003d4e05fd4
3
+ size 2287
vocab copy.txt ADDED
The diff for this file is too large to render. See raw diff