hllj commited on
Commit
d4a23ae
·
1 Parent(s): c6fcbd7

Model save

Browse files
adapter_config.json CHANGED
@@ -16,9 +16,9 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "q_proj",
20
  "k_proj",
21
  "v_proj",
 
22
  "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
19
  "k_proj",
20
  "v_proj",
21
+ "q_proj",
22
  "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 2.5,
3
  "eval_loss": 0.3125712275505066,
4
- "eval_runtime": 13.3953,
5
  "eval_samples": 120,
6
- "eval_samples_per_second": 8.958,
7
- "eval_steps_per_second": 2.24,
8
  "train_loss": 0.4085692544042328,
9
- "train_runtime": 1490.0921,
10
  "train_samples": 1076,
11
  "train_samples_per_second": 2.166,
12
  "train_steps_per_second": 0.542
 
1
  {
2
  "epoch": 2.5,
3
  "eval_loss": 0.3125712275505066,
4
+ "eval_runtime": 13.3998,
5
  "eval_samples": 120,
6
+ "eval_samples_per_second": 8.955,
7
+ "eval_steps_per_second": 2.239,
8
  "train_loss": 0.4085692544042328,
9
+ "train_runtime": 1490.1076,
10
  "train_samples": 1076,
11
  "train_samples_per_second": 2.166,
12
  "train_steps_per_second": 0.542
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.5,
3
  "eval_loss": 0.3125712275505066,
4
- "eval_runtime": 13.3953,
5
  "eval_samples": 120,
6
- "eval_samples_per_second": 8.958,
7
- "eval_steps_per_second": 2.24
8
  }
 
1
  {
2
  "epoch": 2.5,
3
  "eval_loss": 0.3125712275505066,
4
+ "eval_runtime": 13.3998,
5
  "eval_samples": 120,
6
+ "eval_samples_per_second": 8.955,
7
+ "eval_steps_per_second": 2.239
8
  }
runs/Nov17_19-26-11_7a59b30c842e/events.out.tfevents.1700249177.7a59b30c842e.5618.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90d523fd6593c2045ac62fede13556623b7ffac702fc4664a3942fb4d9a4e455
3
+ size 13242
runs/Nov17_19-26-11_7a59b30c842e/events.out.tfevents.1700250680.7a59b30c842e.5618.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2afcbcc6d2b8fe5842707556ab62faa8b66ce093b4383fd4950e31f0ac2c9258
3
+ size 359
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 2.5,
3
  "train_loss": 0.4085692544042328,
4
- "train_runtime": 1490.0921,
5
  "train_samples": 1076,
6
  "train_samples_per_second": 2.166,
7
  "train_steps_per_second": 0.542
 
1
  {
2
  "epoch": 2.5,
3
  "train_loss": 0.4085692544042328,
4
+ "train_runtime": 1490.1076,
5
  "train_samples": 1076,
6
  "train_samples_per_second": 2.166,
7
  "train_steps_per_second": 0.542
trainer_state.json CHANGED
@@ -47,9 +47,9 @@
47
  {
48
  "epoch": 0.19,
49
  "eval_loss": 0.5209892392158508,
50
- "eval_runtime": 13.386,
51
- "eval_samples_per_second": 8.965,
52
- "eval_steps_per_second": 2.241,
53
  "step": 50
54
  },
55
  {
@@ -85,8 +85,8 @@
85
  {
86
  "epoch": 0.37,
87
  "eval_loss": 0.3655029237270355,
88
- "eval_runtime": 13.3923,
89
- "eval_samples_per_second": 8.96,
90
  "eval_steps_per_second": 2.24,
91
  "step": 100
92
  },
@@ -123,8 +123,8 @@
123
  {
124
  "epoch": 1.06,
125
  "eval_loss": 0.33579277992248535,
126
- "eval_runtime": 13.3932,
127
- "eval_samples_per_second": 8.96,
128
  "eval_steps_per_second": 2.24,
129
  "step": 150
130
  },
@@ -161,9 +161,9 @@
161
  {
162
  "epoch": 1.24,
163
  "eval_loss": 0.32932165265083313,
164
- "eval_runtime": 13.3968,
165
- "eval_samples_per_second": 8.957,
166
- "eval_steps_per_second": 2.239,
167
  "step": 200
168
  },
169
  {
@@ -199,9 +199,9 @@
199
  {
200
  "epoch": 1.43,
201
  "eval_loss": 0.3206278085708618,
202
- "eval_runtime": 13.3982,
203
- "eval_samples_per_second": 8.956,
204
- "eval_steps_per_second": 2.239,
205
  "step": 250
206
  },
207
  {
@@ -237,9 +237,9 @@
237
  {
238
  "epoch": 2.11,
239
  "eval_loss": 0.3176751732826233,
240
- "eval_runtime": 13.3874,
241
- "eval_samples_per_second": 8.964,
242
- "eval_steps_per_second": 2.241,
243
  "step": 300
244
  },
245
  {
@@ -275,8 +275,8 @@
275
  {
276
  "epoch": 2.3,
277
  "eval_loss": 0.31533822417259216,
278
- "eval_runtime": 13.3997,
279
- "eval_samples_per_second": 8.955,
280
  "eval_steps_per_second": 2.239,
281
  "step": 350
282
  },
@@ -313,7 +313,7 @@
313
  {
314
  "epoch": 2.48,
315
  "eval_loss": 0.31296682357788086,
316
- "eval_runtime": 13.3926,
317
  "eval_samples_per_second": 8.96,
318
  "eval_steps_per_second": 2.24,
319
  "step": 400
@@ -323,7 +323,7 @@
323
  "step": 405,
324
  "total_flos": 7.077875641902694e+16,
325
  "train_loss": 0.4085692544042328,
326
- "train_runtime": 1490.0921,
327
  "train_samples_per_second": 2.166,
328
  "train_steps_per_second": 0.542
329
  }
 
47
  {
48
  "epoch": 0.19,
49
  "eval_loss": 0.5209892392158508,
50
+ "eval_runtime": 13.3799,
51
+ "eval_samples_per_second": 8.969,
52
+ "eval_steps_per_second": 2.242,
53
  "step": 50
54
  },
55
  {
 
85
  {
86
  "epoch": 0.37,
87
  "eval_loss": 0.3655029237270355,
88
+ "eval_runtime": 13.394,
89
+ "eval_samples_per_second": 8.959,
90
  "eval_steps_per_second": 2.24,
91
  "step": 100
92
  },
 
123
  {
124
  "epoch": 1.06,
125
  "eval_loss": 0.33579277992248535,
126
+ "eval_runtime": 13.3918,
127
+ "eval_samples_per_second": 8.961,
128
  "eval_steps_per_second": 2.24,
129
  "step": 150
130
  },
 
161
  {
162
  "epoch": 1.24,
163
  "eval_loss": 0.32932165265083313,
164
+ "eval_runtime": 13.3944,
165
+ "eval_samples_per_second": 8.959,
166
+ "eval_steps_per_second": 2.24,
167
  "step": 200
168
  },
169
  {
 
199
  {
200
  "epoch": 1.43,
201
  "eval_loss": 0.3206278085708618,
202
+ "eval_runtime": 13.393,
203
+ "eval_samples_per_second": 8.96,
204
+ "eval_steps_per_second": 2.24,
205
  "step": 250
206
  },
207
  {
 
237
  {
238
  "epoch": 2.11,
239
  "eval_loss": 0.3176751732826233,
240
+ "eval_runtime": 13.3957,
241
+ "eval_samples_per_second": 8.958,
242
+ "eval_steps_per_second": 2.24,
243
  "step": 300
244
  },
245
  {
 
275
  {
276
  "epoch": 2.3,
277
  "eval_loss": 0.31533822417259216,
278
+ "eval_runtime": 13.396,
279
+ "eval_samples_per_second": 8.958,
280
  "eval_steps_per_second": 2.239,
281
  "step": 350
282
  },
 
313
  {
314
  "epoch": 2.48,
315
  "eval_loss": 0.31296682357788086,
316
+ "eval_runtime": 13.3929,
317
  "eval_samples_per_second": 8.96,
318
  "eval_steps_per_second": 2.24,
319
  "step": 400
 
323
  "step": 405,
324
  "total_flos": 7.077875641902694e+16,
325
  "train_loss": 0.4085692544042328,
326
+ "train_runtime": 1490.1076,
327
  "train_samples_per_second": 2.166,
328
  "train_steps_per_second": 0.542
329
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66793634e8d7f0f1ba7604b0abaf49c95b94b5297f3a1a786e7ecf55c6231a4d
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8afa4b1ac7680ace81555ad3d37c75491f2ba1a85f21347a2fcd7fb4e08d8f6
3
  size 4664