Augusto777 commited on
Commit
397bd35
·
verified ·
1 Parent(s): 81437cc

End of training

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ model-index:
21
  metrics:
22
  - name: Accuracy
23
  type: accuracy
24
- value: 0.45161290322580644
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,8 +31,8 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on the imagefolder dataset.
33
  It achieves the following results on the evaluation set:
34
- - Loss: 4.2860
35
- - Accuracy: 0.4516
36
 
37
  ## Model description
38
 
@@ -51,7 +51,7 @@ More information needed
51
  ### Training hyperparameters
52
 
53
  The following hyperparameters were used during training:
54
- - learning_rate: 0.1
55
  - train_batch_size: 16
56
  - eval_batch_size: 16
57
  - seed: 42
@@ -63,44 +63,44 @@ The following hyperparameters were used during training:
63
 
64
  ### Training results
65
 
66
- | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
- |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
- | No log | 0.89 | 4 | 1107108.5 | 0.3226 |
69
- | No log | 2.0 | 9 | 6932819.5 | 0.0806 |
70
- | 85.3671 | 2.89 | 13 | 164.3687 | 0.3065 |
71
- | 85.3671 | 4.0 | 18 | 94.7157 | 0.3226 |
72
- | 17.1959 | 4.89 | 22 | 15.0014 | 0.3226 |
73
- | 17.1959 | 6.0 | 27 | 14.2037 | 0.3226 |
74
- | 5.0119 | 6.89 | 31 | 9.6017 | 0.3226 |
75
- | 5.0119 | 8.0 | 36 | 3.5761 | 0.1452 |
76
- | 4.5338 | 8.89 | 40 | 6.3193 | 0.3226 |
77
- | 4.5338 | 10.0 | 45 | 6.0539 | 0.3226 |
78
- | 4.5338 | 10.89 | 49 | 4.3710 | 0.1452 |
79
- | 3.8092 | 12.0 | 54 | 2.4122 | 0.3226 |
80
- | 3.8092 | 12.89 | 58 | 4.2860 | 0.4516 |
81
- | 3.9126 | 14.0 | 63 | 2.8649 | 0.3226 |
82
- | 3.9126 | 14.89 | 67 | 2.8482 | 0.4516 |
83
- | 2.3835 | 16.0 | 72 | 1.7011 | 0.4516 |
84
- | 2.3835 | 16.89 | 76 | 2.2730 | 0.3226 |
85
- | 1.5977 | 18.0 | 81 | 22.8877 | 0.0806 |
86
- | 1.5977 | 18.89 | 85 | 13.0729 | 0.0806 |
87
- | 1.538 | 20.0 | 90 | 9.3500 | 0.0806 |
88
- | 1.538 | 20.89 | 94 | 4.1034 | 0.0806 |
89
- | 1.538 | 22.0 | 99 | 2.0537 | 0.0806 |
90
- | 1.98 | 22.89 | 103 | 2.4364 | 0.3226 |
91
- | 1.98 | 24.0 | 108 | 1.4017 | 0.4516 |
92
- | 1.9173 | 24.89 | 112 | 2.1187 | 0.4516 |
93
- | 1.9173 | 26.0 | 117 | 1.6016 | 0.3226 |
94
- | 1.4335 | 26.89 | 121 | 1.7112 | 0.4516 |
95
- | 1.4335 | 28.0 | 126 | 1.3195 | 0.4516 |
96
- | 1.5525 | 28.89 | 130 | 1.5629 | 0.4516 |
97
- | 1.5525 | 30.0 | 135 | 1.2883 | 0.4516 |
98
- | 1.5525 | 30.89 | 139 | 1.4228 | 0.3226 |
99
- | 1.3748 | 32.0 | 144 | 1.2587 | 0.4516 |
100
- | 1.3748 | 32.89 | 148 | 1.3212 | 0.3226 |
101
- | 1.2849 | 34.0 | 153 | 1.2401 | 0.4516 |
102
- | 1.2849 | 34.89 | 157 | 1.2035 | 0.4516 |
103
- | 1.2221 | 35.56 | 160 | 1.1935 | 0.4516 |
104
 
105
 
106
  ### Framework versions
 
21
  metrics:
22
  - name: Accuracy
23
  type: accuracy
24
+ value: 0.6129032258064516
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on the imagefolder dataset.
33
  It achieves the following results on the evaluation set:
34
+ - Loss: 1064469725184.0
35
+ - Accuracy: 0.6129
36
 
37
  ## Model description
38
 
 
51
  ### Training hyperparameters
52
 
53
  The following hyperparameters were used during training:
54
+ - learning_rate: 1.5e-05
55
  - train_batch_size: 16
56
  - eval_batch_size: 16
57
  - seed: 42
 
63
 
64
  ### Training results
65
 
66
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
+ |:---------------:|:-----:|:----:|:---------------:|:--------:|
68
+ | No log | 0.89 | 4 | 1064469725184.0 | 0.5484 |
69
+ | No log | 2.0 | 9 | 1064469725184.0 | 0.6129 |
70
+ | 1134325609267.2 | 2.89 | 13 | 1064469725184.0 | 0.5806 |
71
+ | 1134325609267.2 | 4.0 | 18 | 1064469725184.0 | 0.5645 |
72
+ | 1175573862809.6 | 4.89 | 22 | 1064469725184.0 | 0.5484 |
73
+ | 1175573862809.6 | 6.0 | 27 | 1064469725184.0 | 0.5645 |
74
+ | 1175573967667.2 | 6.89 | 31 | 1064469725184.0 | 0.5323 |
75
+ | 1175573967667.2 | 8.0 | 36 | 1064469725184.0 | 0.5323 |
76
+ | 1093077460582.4 | 8.89 | 40 | 1064469725184.0 | 0.5484 |
77
+ | 1093077460582.4 | 10.0 | 45 | 1064469725184.0 | 0.5484 |
78
+ | 1093077460582.4 | 10.89 | 49 | 1064469725184.0 | 0.5323 |
79
+ | 1134325399552.0 | 12.0 | 54 | 1064469725184.0 | 0.5806 |
80
+ | 1134325399552.0 | 12.89 | 58 | 1064469725184.0 | 0.5806 |
81
+ | 1134325609267.2 | 14.0 | 63 | 1064469725184.0 | 0.5645 |
82
+ | 1134325609267.2 | 14.89 | 67 | 1064469725184.0 | 0.5484 |
83
+ | 1154949736038.4 | 16.0 | 72 | 1064469725184.0 | 0.5968 |
84
+ | 1154949736038.4 | 16.89 | 76 | 1064469725184.0 | 0.5645 |
85
+ | 1093077355724.8 | 18.0 | 81 | 1064469725184.0 | 0.5484 |
86
+ | 1093077355724.8 | 18.89 | 85 | 1064469725184.0 | 0.5161 |
87
+ | 1216822116352.0 | 20.0 | 90 | 1064469725184.0 | 0.5484 |
88
+ | 1216822116352.0 | 20.89 | 94 | 1064469725184.0 | 0.5323 |
89
+ | 1216822116352.0 | 22.0 | 99 | 1064469725184.0 | 0.5968 |
90
+ | 1134325609267.2 | 22.89 | 103 | 1064469725184.0 | 0.5806 |
91
+ | 1134325609267.2 | 24.0 | 108 | 1064469725184.0 | 0.5484 |
92
+ | 1196197884723.2 | 24.89 | 112 | 1064469725184.0 | 0.5484 |
93
+ | 1196197884723.2 | 26.0 | 117 | 1064469725184.0 | 0.5323 |
94
+ | 1072453333811.2 | 26.89 | 121 | 1064469725184.0 | 0.5645 |
95
+ | 1072453333811.2 | 28.0 | 126 | 1064469725184.0 | 0.5484 |
96
+ | 1175573862809.6 | 28.89 | 130 | 1064469725184.0 | 0.5323 |
97
+ | 1175573862809.6 | 30.0 | 135 | 1064469725184.0 | 0.5484 |
98
+ | 1175573862809.6 | 30.89 | 139 | 1064469725184.0 | 0.5484 |
99
+ | 1134325609267.2 | 32.0 | 144 | 1064469725184.0 | 0.5323 |
100
+ | 1134325609267.2 | 32.89 | 148 | 1064469725184.0 | 0.5484 |
101
+ | 1216822011494.4 | 34.0 | 153 | 1064469725184.0 | 0.5323 |
102
+ | 1216822011494.4 | 34.89 | 157 | 1064469725184.0 | 0.5323 |
103
+ | 1051829102182.4 | 35.56 | 160 | 1064469725184.0 | 0.5323 |
104
 
105
 
106
  ### Framework versions
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 35.56,
3
- "eval_accuracy": 0.45161290322580644,
4
- "eval_loss": 4.285992622375488,
5
- "eval_runtime": 2.2878,
6
- "eval_samples_per_second": 27.101,
7
- "eval_steps_per_second": 1.748,
8
- "train_loss": 8.507176971435547,
9
- "train_runtime": 445.2603,
10
- "train_samples_per_second": 25.873,
11
- "train_steps_per_second": 0.359
12
  }
 
1
  {
2
  "epoch": 35.56,
3
+ "eval_accuracy": 0.6129032258064516,
4
+ "eval_loss": 1064469725184.0,
5
+ "eval_runtime": 2.1855,
6
+ "eval_samples_per_second": 28.369,
7
+ "eval_steps_per_second": 1.83,
8
+ "train_loss": 1143348658176.0,
9
+ "train_runtime": 448.7824,
10
+ "train_samples_per_second": 25.669,
11
+ "train_steps_per_second": 0.357
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 35.56,
3
- "eval_accuracy": 0.45161290322580644,
4
- "eval_loss": 4.285992622375488,
5
- "eval_runtime": 2.2878,
6
- "eval_samples_per_second": 27.101,
7
- "eval_steps_per_second": 1.748
8
  }
 
1
  {
2
  "epoch": 35.56,
3
+ "eval_accuracy": 0.6129032258064516,
4
+ "eval_loss": 1064469725184.0,
5
+ "eval_runtime": 2.1855,
6
+ "eval_samples_per_second": 28.369,
7
+ "eval_steps_per_second": 1.83
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:494f4498c2eaf731a0ca9fe7923786e7eb74d9092cbdd9322d1062db17972c9c
3
  size 12203648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4fbc425797df76034fbcbf070ccc3b206a5df727efd25a94bfe5cde03a7307c
3
  size 12203648
runs/Dec02_11-23-24_DESKTOP-SKBE9FB/events.out.tfevents.1733160205.DESKTOP-SKBE9FB.19844.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06f06f9cb8ecf13d9420ef56245ab8ef0513f1a11a2da70eb9be4b1820d8aaf1
3
+ size 18844
runs/Dec02_11-23-24_DESKTOP-SKBE9FB/events.out.tfevents.1733160656.DESKTOP-SKBE9FB.19844.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea33030e3c2957f91897698e9767905f2c29318ddc3d0279ed60d2309517d3cf
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 35.56,
3
- "train_loss": 8.507176971435547,
4
- "train_runtime": 445.2603,
5
- "train_samples_per_second": 25.873,
6
- "train_steps_per_second": 0.359
7
  }
 
1
  {
2
  "epoch": 35.56,
3
+ "train_loss": 1143348658176.0,
4
+ "train_runtime": 448.7824,
5
+ "train_samples_per_second": 25.669,
6
+ "train_steps_per_second": 0.357
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.45161290322580644,
3
- "best_model_checkpoint": "swiftformer-xs-OT\\checkpoint-58",
4
  "epoch": 35.55555555555556,
5
  "eval_steps": 500,
6
  "global_step": 160,
@@ -10,432 +10,432 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.89,
13
- "eval_accuracy": 0.3225806451612903,
14
- "eval_loss": 1107108.5,
15
- "eval_runtime": 2.147,
16
- "eval_samples_per_second": 28.877,
17
- "eval_steps_per_second": 1.863,
18
  "step": 4
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_accuracy": 0.08064516129032258,
23
- "eval_loss": 6932819.5,
24
- "eval_runtime": 2.0075,
25
- "eval_samples_per_second": 30.885,
26
- "eval_steps_per_second": 1.993,
27
  "step": 9
28
  },
29
  {
30
  "epoch": 2.22,
31
- "learning_rate": 0.09375,
32
- "loss": 85.3671,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 2.89,
37
- "eval_accuracy": 0.3064516129032258,
38
- "eval_loss": 164.3686981201172,
39
- "eval_runtime": 2.0995,
40
- "eval_samples_per_second": 29.531,
41
- "eval_steps_per_second": 1.905,
42
  "step": 13
43
  },
44
  {
45
  "epoch": 4.0,
46
- "eval_accuracy": 0.3225806451612903,
47
- "eval_loss": 94.71566772460938,
48
- "eval_runtime": 2.1915,
49
- "eval_samples_per_second": 28.291,
50
- "eval_steps_per_second": 1.825,
51
  "step": 18
52
  },
53
  {
54
  "epoch": 4.44,
55
- "learning_rate": 0.08750000000000001,
56
- "loss": 17.1959,
57
  "step": 20
58
  },
59
  {
60
  "epoch": 4.89,
61
- "eval_accuracy": 0.3225806451612903,
62
- "eval_loss": 15.001376152038574,
63
- "eval_runtime": 2.123,
64
- "eval_samples_per_second": 29.204,
65
- "eval_steps_per_second": 1.884,
66
  "step": 22
67
  },
68
  {
69
  "epoch": 6.0,
70
- "eval_accuracy": 0.3225806451612903,
71
- "eval_loss": 14.203704833984375,
72
- "eval_runtime": 1.9425,
73
- "eval_samples_per_second": 31.918,
74
- "eval_steps_per_second": 2.059,
75
  "step": 27
76
  },
77
  {
78
  "epoch": 6.67,
79
- "learning_rate": 0.08125,
80
- "loss": 5.0119,
81
  "step": 30
82
  },
83
  {
84
  "epoch": 6.89,
85
- "eval_accuracy": 0.3225806451612903,
86
- "eval_loss": 9.601675033569336,
87
- "eval_runtime": 2.1205,
88
- "eval_samples_per_second": 29.238,
89
- "eval_steps_per_second": 1.886,
90
  "step": 31
91
  },
92
  {
93
  "epoch": 8.0,
94
- "eval_accuracy": 0.14516129032258066,
95
- "eval_loss": 3.5760791301727295,
96
- "eval_runtime": 1.986,
97
- "eval_samples_per_second": 31.218,
98
- "eval_steps_per_second": 2.014,
99
  "step": 36
100
  },
101
  {
102
  "epoch": 8.89,
103
- "learning_rate": 0.07500000000000001,
104
- "loss": 4.5338,
105
  "step": 40
106
  },
107
  {
108
  "epoch": 8.89,
109
- "eval_accuracy": 0.3225806451612903,
110
- "eval_loss": 6.319271087646484,
111
- "eval_runtime": 2.1545,
112
- "eval_samples_per_second": 28.777,
113
- "eval_steps_per_second": 1.857,
114
  "step": 40
115
  },
116
  {
117
  "epoch": 10.0,
118
- "eval_accuracy": 0.3225806451612903,
119
- "eval_loss": 6.053876876831055,
120
- "eval_runtime": 2.0175,
121
- "eval_samples_per_second": 30.731,
122
- "eval_steps_per_second": 1.983,
123
  "step": 45
124
  },
125
  {
126
  "epoch": 10.89,
127
- "eval_accuracy": 0.14516129032258066,
128
- "eval_loss": 4.370992183685303,
129
- "eval_runtime": 2.0745,
130
- "eval_samples_per_second": 29.887,
131
- "eval_steps_per_second": 1.928,
132
  "step": 49
133
  },
134
  {
135
  "epoch": 11.11,
136
- "learning_rate": 0.06875,
137
- "loss": 3.8092,
138
  "step": 50
139
  },
140
  {
141
  "epoch": 12.0,
142
- "eval_accuracy": 0.3225806451612903,
143
- "eval_loss": 2.4121696949005127,
144
- "eval_runtime": 1.9905,
145
- "eval_samples_per_second": 31.148,
146
- "eval_steps_per_second": 2.01,
147
  "step": 54
148
  },
149
  {
150
  "epoch": 12.89,
151
- "eval_accuracy": 0.45161290322580644,
152
- "eval_loss": 4.285992622375488,
153
- "eval_runtime": 1.937,
154
- "eval_samples_per_second": 32.009,
155
- "eval_steps_per_second": 2.065,
156
  "step": 58
157
  },
158
  {
159
  "epoch": 13.33,
160
- "learning_rate": 0.0625,
161
- "loss": 3.9126,
162
  "step": 60
163
  },
164
  {
165
  "epoch": 14.0,
166
- "eval_accuracy": 0.3225806451612903,
167
- "eval_loss": 2.8649463653564453,
168
- "eval_runtime": 1.965,
169
- "eval_samples_per_second": 31.553,
170
- "eval_steps_per_second": 2.036,
171
  "step": 63
172
  },
173
  {
174
  "epoch": 14.89,
175
- "eval_accuracy": 0.45161290322580644,
176
- "eval_loss": 2.8482446670532227,
177
- "eval_runtime": 2.215,
178
- "eval_samples_per_second": 27.991,
179
- "eval_steps_per_second": 1.806,
180
  "step": 67
181
  },
182
  {
183
  "epoch": 15.56,
184
- "learning_rate": 0.05625,
185
- "loss": 2.3835,
186
  "step": 70
187
  },
188
  {
189
  "epoch": 16.0,
190
- "eval_accuracy": 0.45161290322580644,
191
- "eval_loss": 1.701102375984192,
192
- "eval_runtime": 2.132,
193
- "eval_samples_per_second": 29.081,
194
- "eval_steps_per_second": 1.876,
195
  "step": 72
196
  },
197
  {
198
  "epoch": 16.89,
199
- "eval_accuracy": 0.3225806451612903,
200
- "eval_loss": 2.2729971408843994,
201
- "eval_runtime": 2.0795,
202
- "eval_samples_per_second": 29.815,
203
- "eval_steps_per_second": 1.924,
204
  "step": 76
205
  },
206
  {
207
  "epoch": 17.78,
208
- "learning_rate": 0.05,
209
- "loss": 1.5977,
210
  "step": 80
211
  },
212
  {
213
  "epoch": 18.0,
214
- "eval_accuracy": 0.08064516129032258,
215
- "eval_loss": 22.88765525817871,
216
- "eval_runtime": 2.0645,
217
- "eval_samples_per_second": 30.032,
218
- "eval_steps_per_second": 1.938,
219
  "step": 81
220
  },
221
  {
222
  "epoch": 18.89,
223
- "eval_accuracy": 0.08064516129032258,
224
- "eval_loss": 13.072916984558105,
225
- "eval_runtime": 1.9475,
226
- "eval_samples_per_second": 31.836,
227
- "eval_steps_per_second": 2.054,
228
  "step": 85
229
  },
230
  {
231
  "epoch": 20.0,
232
- "learning_rate": 0.043750000000000004,
233
- "loss": 1.538,
234
  "step": 90
235
  },
236
  {
237
  "epoch": 20.0,
238
- "eval_accuracy": 0.08064516129032258,
239
- "eval_loss": 9.349954605102539,
240
- "eval_runtime": 2.0785,
241
- "eval_samples_per_second": 29.829,
242
- "eval_steps_per_second": 1.924,
243
  "step": 90
244
  },
245
  {
246
  "epoch": 20.89,
247
- "eval_accuracy": 0.08064516129032258,
248
- "eval_loss": 4.103433609008789,
249
- "eval_runtime": 1.946,
250
- "eval_samples_per_second": 31.861,
251
- "eval_steps_per_second": 2.056,
252
  "step": 94
253
  },
254
  {
255
  "epoch": 22.0,
256
- "eval_accuracy": 0.08064516129032258,
257
- "eval_loss": 2.053744077682495,
258
- "eval_runtime": 1.9975,
259
- "eval_samples_per_second": 31.039,
260
- "eval_steps_per_second": 2.003,
261
  "step": 99
262
  },
263
  {
264
  "epoch": 22.22,
265
- "learning_rate": 0.037500000000000006,
266
- "loss": 1.98,
267
  "step": 100
268
  },
269
  {
270
  "epoch": 22.89,
271
- "eval_accuracy": 0.3225806451612903,
272
- "eval_loss": 2.4363696575164795,
273
- "eval_runtime": 2.071,
274
- "eval_samples_per_second": 29.937,
275
- "eval_steps_per_second": 1.931,
276
  "step": 103
277
  },
278
  {
279
  "epoch": 24.0,
280
- "eval_accuracy": 0.45161290322580644,
281
- "eval_loss": 1.4017163515090942,
282
- "eval_runtime": 2.0465,
283
- "eval_samples_per_second": 30.296,
284
- "eval_steps_per_second": 1.955,
285
  "step": 108
286
  },
287
  {
288
  "epoch": 24.44,
289
- "learning_rate": 0.03125,
290
- "loss": 1.9173,
291
  "step": 110
292
  },
293
  {
294
  "epoch": 24.89,
295
- "eval_accuracy": 0.45161290322580644,
296
- "eval_loss": 2.11865234375,
297
- "eval_runtime": 1.8829,
298
- "eval_samples_per_second": 32.927,
299
- "eval_steps_per_second": 2.124,
300
  "step": 112
301
  },
302
  {
303
  "epoch": 26.0,
304
- "eval_accuracy": 0.3225806451612903,
305
- "eval_loss": 1.6016442775726318,
306
- "eval_runtime": 2.0465,
307
- "eval_samples_per_second": 30.296,
308
- "eval_steps_per_second": 1.955,
309
  "step": 117
310
  },
311
  {
312
  "epoch": 26.67,
313
- "learning_rate": 0.025,
314
- "loss": 1.4335,
315
  "step": 120
316
  },
317
  {
318
  "epoch": 26.89,
319
- "eval_accuracy": 0.45161290322580644,
320
- "eval_loss": 1.7111594676971436,
321
- "eval_runtime": 1.9185,
322
- "eval_samples_per_second": 32.318,
323
- "eval_steps_per_second": 2.085,
324
  "step": 121
325
  },
326
  {
327
  "epoch": 28.0,
328
- "eval_accuracy": 0.45161290322580644,
329
- "eval_loss": 1.3195487260818481,
330
- "eval_runtime": 1.8439,
331
- "eval_samples_per_second": 33.624,
332
- "eval_steps_per_second": 2.169,
333
  "step": 126
334
  },
335
  {
336
  "epoch": 28.89,
337
- "learning_rate": 0.018750000000000003,
338
- "loss": 1.5525,
339
  "step": 130
340
  },
341
  {
342
  "epoch": 28.89,
343
- "eval_accuracy": 0.45161290322580644,
344
- "eval_loss": 1.5628976821899414,
345
- "eval_runtime": 1.933,
346
- "eval_samples_per_second": 32.075,
347
- "eval_steps_per_second": 2.069,
348
  "step": 130
349
  },
350
  {
351
  "epoch": 30.0,
352
- "eval_accuracy": 0.45161290322580644,
353
- "eval_loss": 1.288311243057251,
354
- "eval_runtime": 2.036,
355
- "eval_samples_per_second": 30.452,
356
- "eval_steps_per_second": 1.965,
357
  "step": 135
358
  },
359
  {
360
  "epoch": 30.89,
361
- "eval_accuracy": 0.3225806451612903,
362
- "eval_loss": 1.422782301902771,
363
- "eval_runtime": 2.1235,
364
- "eval_samples_per_second": 29.197,
365
- "eval_steps_per_second": 1.884,
366
  "step": 139
367
  },
368
  {
369
  "epoch": 31.11,
370
- "learning_rate": 0.0125,
371
- "loss": 1.3748,
372
  "step": 140
373
  },
374
  {
375
  "epoch": 32.0,
376
- "eval_accuracy": 0.45161290322580644,
377
- "eval_loss": 1.258745789527893,
378
- "eval_runtime": 1.916,
379
- "eval_samples_per_second": 32.36,
380
- "eval_steps_per_second": 2.088,
381
  "step": 144
382
  },
383
  {
384
  "epoch": 32.89,
385
- "eval_accuracy": 0.3225806451612903,
386
- "eval_loss": 1.3211716413497925,
387
- "eval_runtime": 1.9079,
388
- "eval_samples_per_second": 32.496,
389
- "eval_steps_per_second": 2.096,
390
  "step": 148
391
  },
392
  {
393
  "epoch": 33.33,
394
- "learning_rate": 0.00625,
395
- "loss": 1.2849,
396
  "step": 150
397
  },
398
  {
399
  "epoch": 34.0,
400
- "eval_accuracy": 0.45161290322580644,
401
- "eval_loss": 1.2401268482208252,
402
- "eval_runtime": 1.933,
403
- "eval_samples_per_second": 32.075,
404
- "eval_steps_per_second": 2.069,
405
  "step": 153
406
  },
407
  {
408
  "epoch": 34.89,
409
- "eval_accuracy": 0.45161290322580644,
410
- "eval_loss": 1.203456163406372,
411
- "eval_runtime": 2.085,
412
- "eval_samples_per_second": 29.736,
413
- "eval_steps_per_second": 1.918,
414
  "step": 157
415
  },
416
  {
417
  "epoch": 35.56,
418
  "learning_rate": 0.0,
419
- "loss": 1.2221,
420
  "step": 160
421
  },
422
  {
423
  "epoch": 35.56,
424
- "eval_accuracy": 0.45161290322580644,
425
- "eval_loss": 1.1935251951217651,
426
- "eval_runtime": 2.0055,
427
- "eval_samples_per_second": 30.915,
428
- "eval_steps_per_second": 1.995,
429
  "step": 160
430
  },
431
  {
432
  "epoch": 35.56,
433
  "step": 160,
434
  "total_flos": 2.807020017156096e+16,
435
- "train_loss": 8.507176971435547,
436
- "train_runtime": 445.2603,
437
- "train_samples_per_second": 25.873,
438
- "train_steps_per_second": 0.359
439
  }
440
  ],
441
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.6129032258064516,
3
+ "best_model_checkpoint": "swiftformer-xs-OT\\checkpoint-9",
4
  "epoch": 35.55555555555556,
5
  "eval_steps": 500,
6
  "global_step": 160,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.89,
13
+ "eval_accuracy": 0.5483870967741935,
14
+ "eval_loss": 1064469725184.0,
15
+ "eval_runtime": 2.21,
16
+ "eval_samples_per_second": 28.054,
17
+ "eval_steps_per_second": 1.81,
18
  "step": 4
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.6129032258064516,
23
+ "eval_loss": 1064469725184.0,
24
+ "eval_runtime": 2.2095,
25
+ "eval_samples_per_second": 28.06,
26
+ "eval_steps_per_second": 1.81,
27
  "step": 9
28
  },
29
  {
30
  "epoch": 2.22,
31
+ "learning_rate": 1.40625e-05,
32
+ "loss": 1134325609267.2,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 2.89,
37
+ "eval_accuracy": 0.5806451612903226,
38
+ "eval_loss": 1064469725184.0,
39
+ "eval_runtime": 2.153,
40
+ "eval_samples_per_second": 28.797,
41
+ "eval_steps_per_second": 1.858,
42
  "step": 13
43
  },
44
  {
45
  "epoch": 4.0,
46
+ "eval_accuracy": 0.5645161290322581,
47
+ "eval_loss": 1064469725184.0,
48
+ "eval_runtime": 1.9785,
49
+ "eval_samples_per_second": 31.337,
50
+ "eval_steps_per_second": 2.022,
51
  "step": 18
52
  },
53
  {
54
  "epoch": 4.44,
55
+ "learning_rate": 1.3125e-05,
56
+ "loss": 1175573862809.6,
57
  "step": 20
58
  },
59
  {
60
  "epoch": 4.89,
61
+ "eval_accuracy": 0.5483870967741935,
62
+ "eval_loss": 1064469725184.0,
63
+ "eval_runtime": 2.109,
64
+ "eval_samples_per_second": 29.398,
65
+ "eval_steps_per_second": 1.897,
66
  "step": 22
67
  },
68
  {
69
  "epoch": 6.0,
70
+ "eval_accuracy": 0.5645161290322581,
71
+ "eval_loss": 1064469725184.0,
72
+ "eval_runtime": 2.1312,
73
+ "eval_samples_per_second": 29.091,
74
+ "eval_steps_per_second": 1.877,
75
  "step": 27
76
  },
77
  {
78
  "epoch": 6.67,
79
+ "learning_rate": 1.21875e-05,
80
+ "loss": 1175573967667.2,
81
  "step": 30
82
  },
83
  {
84
  "epoch": 6.89,
85
+ "eval_accuracy": 0.532258064516129,
86
+ "eval_loss": 1064469725184.0,
87
+ "eval_runtime": 2.0865,
88
+ "eval_samples_per_second": 29.715,
89
+ "eval_steps_per_second": 1.917,
90
  "step": 31
91
  },
92
  {
93
  "epoch": 8.0,
94
+ "eval_accuracy": 0.532258064516129,
95
+ "eval_loss": 1064469725184.0,
96
+ "eval_runtime": 2.1275,
97
+ "eval_samples_per_second": 29.142,
98
+ "eval_steps_per_second": 1.88,
99
  "step": 36
100
  },
101
  {
102
  "epoch": 8.89,
103
+ "learning_rate": 1.125e-05,
104
+ "loss": 1093077460582.4,
105
  "step": 40
106
  },
107
  {
108
  "epoch": 8.89,
109
+ "eval_accuracy": 0.5483870967741935,
110
+ "eval_loss": 1064469725184.0,
111
+ "eval_runtime": 1.995,
112
+ "eval_samples_per_second": 31.078,
113
+ "eval_steps_per_second": 2.005,
114
  "step": 40
115
  },
116
  {
117
  "epoch": 10.0,
118
+ "eval_accuracy": 0.5483870967741935,
119
+ "eval_loss": 1064469725184.0,
120
+ "eval_runtime": 1.964,
121
+ "eval_samples_per_second": 31.569,
122
+ "eval_steps_per_second": 2.037,
123
  "step": 45
124
  },
125
  {
126
  "epoch": 10.89,
127
+ "eval_accuracy": 0.532258064516129,
128
+ "eval_loss": 1064469725184.0,
129
+ "eval_runtime": 2.145,
130
+ "eval_samples_per_second": 28.904,
131
+ "eval_steps_per_second": 1.865,
132
  "step": 49
133
  },
134
  {
135
  "epoch": 11.11,
136
+ "learning_rate": 1.03125e-05,
137
+ "loss": 1134325399552.0,
138
  "step": 50
139
  },
140
  {
141
  "epoch": 12.0,
142
+ "eval_accuracy": 0.5806451612903226,
143
+ "eval_loss": 1064469725184.0,
144
+ "eval_runtime": 2.2306,
145
+ "eval_samples_per_second": 27.795,
146
+ "eval_steps_per_second": 1.793,
147
  "step": 54
148
  },
149
  {
150
  "epoch": 12.89,
151
+ "eval_accuracy": 0.5806451612903226,
152
+ "eval_loss": 1064469725184.0,
153
+ "eval_runtime": 2.0875,
154
+ "eval_samples_per_second": 29.701,
155
+ "eval_steps_per_second": 1.916,
156
  "step": 58
157
  },
158
  {
159
  "epoch": 13.33,
160
+ "learning_rate": 9.375000000000001e-06,
161
+ "loss": 1134325609267.2,
162
  "step": 60
163
  },
164
  {
165
  "epoch": 14.0,
166
+ "eval_accuracy": 0.5645161290322581,
167
+ "eval_loss": 1064469725184.0,
168
+ "eval_runtime": 2.1535,
169
+ "eval_samples_per_second": 28.79,
170
+ "eval_steps_per_second": 1.857,
171
  "step": 63
172
  },
173
  {
174
  "epoch": 14.89,
175
+ "eval_accuracy": 0.5483870967741935,
176
+ "eval_loss": 1064469725184.0,
177
+ "eval_runtime": 2.2291,
178
+ "eval_samples_per_second": 27.814,
179
+ "eval_steps_per_second": 1.794,
180
  "step": 67
181
  },
182
  {
183
  "epoch": 15.56,
184
+ "learning_rate": 8.4375e-06,
185
+ "loss": 1154949736038.4,
186
  "step": 70
187
  },
188
  {
189
  "epoch": 16.0,
190
+ "eval_accuracy": 0.5967741935483871,
191
+ "eval_loss": 1064469725184.0,
192
+ "eval_runtime": 2.08,
193
+ "eval_samples_per_second": 29.808,
194
+ "eval_steps_per_second": 1.923,
195
  "step": 72
196
  },
197
  {
198
  "epoch": 16.89,
199
+ "eval_accuracy": 0.5645161290322581,
200
+ "eval_loss": 1064469725184.0,
201
+ "eval_runtime": 2.035,
202
+ "eval_samples_per_second": 30.467,
203
+ "eval_steps_per_second": 1.966,
204
  "step": 76
205
  },
206
  {
207
  "epoch": 17.78,
208
+ "learning_rate": 7.5e-06,
209
+ "loss": 1093077355724.8,
210
  "step": 80
211
  },
212
  {
213
  "epoch": 18.0,
214
+ "eval_accuracy": 0.5483870967741935,
215
+ "eval_loss": 1064469725184.0,
216
+ "eval_runtime": 2.2025,
217
+ "eval_samples_per_second": 28.149,
218
+ "eval_steps_per_second": 1.816,
219
  "step": 81
220
  },
221
  {
222
  "epoch": 18.89,
223
+ "eval_accuracy": 0.5161290322580645,
224
+ "eval_loss": 1064469725184.0,
225
+ "eval_runtime": 1.971,
226
+ "eval_samples_per_second": 31.457,
227
+ "eval_steps_per_second": 2.029,
228
  "step": 85
229
  },
230
  {
231
  "epoch": 20.0,
232
+ "learning_rate": 6.5625e-06,
233
+ "loss": 1216822116352.0,
234
  "step": 90
235
  },
236
  {
237
  "epoch": 20.0,
238
+ "eval_accuracy": 0.5483870967741935,
239
+ "eval_loss": 1064469725184.0,
240
+ "eval_runtime": 1.985,
241
+ "eval_samples_per_second": 31.235,
242
+ "eval_steps_per_second": 2.015,
243
  "step": 90
244
  },
245
  {
246
  "epoch": 20.89,
247
+ "eval_accuracy": 0.532258064516129,
248
+ "eval_loss": 1064469725184.0,
249
+ "eval_runtime": 1.9935,
250
+ "eval_samples_per_second": 31.101,
251
+ "eval_steps_per_second": 2.007,
252
  "step": 94
253
  },
254
  {
255
  "epoch": 22.0,
256
+ "eval_accuracy": 0.5967741935483871,
257
+ "eval_loss": 1064469725184.0,
258
+ "eval_runtime": 2.075,
259
+ "eval_samples_per_second": 29.88,
260
+ "eval_steps_per_second": 1.928,
261
  "step": 99
262
  },
263
  {
264
  "epoch": 22.22,
265
+ "learning_rate": 5.625e-06,
266
+ "loss": 1134325609267.2,
267
  "step": 100
268
  },
269
  {
270
  "epoch": 22.89,
271
+ "eval_accuracy": 0.5806451612903226,
272
+ "eval_loss": 1064469725184.0,
273
+ "eval_runtime": 2.0175,
274
+ "eval_samples_per_second": 30.731,
275
+ "eval_steps_per_second": 1.983,
276
  "step": 103
277
  },
278
  {
279
  "epoch": 24.0,
280
+ "eval_accuracy": 0.5483870967741935,
281
+ "eval_loss": 1064469725184.0,
282
+ "eval_runtime": 2.0675,
283
+ "eval_samples_per_second": 29.988,
284
+ "eval_steps_per_second": 1.935,
285
  "step": 108
286
  },
287
  {
288
  "epoch": 24.44,
289
+ "learning_rate": 4.6875000000000004e-06,
290
+ "loss": 1196197884723.2,
291
  "step": 110
292
  },
293
  {
294
  "epoch": 24.89,
295
+ "eval_accuracy": 0.5483870967741935,
296
+ "eval_loss": 1064469725184.0,
297
+ "eval_runtime": 2.075,
298
+ "eval_samples_per_second": 29.879,
299
+ "eval_steps_per_second": 1.928,
300
  "step": 112
301
  },
302
  {
303
  "epoch": 26.0,
304
+ "eval_accuracy": 0.532258064516129,
305
+ "eval_loss": 1064469725184.0,
306
+ "eval_runtime": 2.1282,
307
+ "eval_samples_per_second": 29.133,
308
+ "eval_steps_per_second": 1.88,
309
  "step": 117
310
  },
311
  {
312
  "epoch": 26.67,
313
+ "learning_rate": 3.75e-06,
314
+ "loss": 1072453333811.2,
315
  "step": 120
316
  },
317
  {
318
  "epoch": 26.89,
319
+ "eval_accuracy": 0.5645161290322581,
320
+ "eval_loss": 1064469725184.0,
321
+ "eval_runtime": 1.9508,
322
+ "eval_samples_per_second": 31.782,
323
+ "eval_steps_per_second": 2.05,
324
  "step": 121
325
  },
326
  {
327
  "epoch": 28.0,
328
+ "eval_accuracy": 0.5483870967741935,
329
+ "eval_loss": 1064469725184.0,
330
+ "eval_runtime": 1.9533,
331
+ "eval_samples_per_second": 31.742,
332
+ "eval_steps_per_second": 2.048,
333
  "step": 126
334
  },
335
  {
336
  "epoch": 28.89,
337
+ "learning_rate": 2.8125e-06,
338
+ "loss": 1175573862809.6,
339
  "step": 130
340
  },
341
  {
342
  "epoch": 28.89,
343
+ "eval_accuracy": 0.532258064516129,
344
+ "eval_loss": 1064469725184.0,
345
+ "eval_runtime": 2.1424,
346
+ "eval_samples_per_second": 28.94,
347
+ "eval_steps_per_second": 1.867,
348
  "step": 130
349
  },
350
  {
351
  "epoch": 30.0,
352
+ "eval_accuracy": 0.5483870967741935,
353
+ "eval_loss": 1064469725184.0,
354
+ "eval_runtime": 2.0848,
355
+ "eval_samples_per_second": 29.739,
356
+ "eval_steps_per_second": 1.919,
357
  "step": 135
358
  },
359
  {
360
  "epoch": 30.89,
361
+ "eval_accuracy": 0.5483870967741935,
362
+ "eval_loss": 1064469725184.0,
363
+ "eval_runtime": 1.9558,
364
+ "eval_samples_per_second": 31.7,
365
+ "eval_steps_per_second": 2.045,
366
  "step": 139
367
  },
368
  {
369
  "epoch": 31.11,
370
+ "learning_rate": 1.875e-06,
371
+ "loss": 1134325609267.2,
372
  "step": 140
373
  },
374
  {
375
  "epoch": 32.0,
376
+ "eval_accuracy": 0.532258064516129,
377
+ "eval_loss": 1064469725184.0,
378
+ "eval_runtime": 1.9384,
379
+ "eval_samples_per_second": 31.984,
380
+ "eval_steps_per_second": 2.064,
381
  "step": 144
382
  },
383
  {
384
  "epoch": 32.89,
385
+ "eval_accuracy": 0.5483870967741935,
386
+ "eval_loss": 1064469725184.0,
387
+ "eval_runtime": 1.997,
388
+ "eval_samples_per_second": 31.046,
389
+ "eval_steps_per_second": 2.003,
390
  "step": 148
391
  },
392
  {
393
  "epoch": 33.33,
394
+ "learning_rate": 9.375e-07,
395
+ "loss": 1216822011494.4,
396
  "step": 150
397
  },
398
  {
399
  "epoch": 34.0,
400
+ "eval_accuracy": 0.532258064516129,
401
+ "eval_loss": 1064469725184.0,
402
+ "eval_runtime": 2.2368,
403
+ "eval_samples_per_second": 27.718,
404
+ "eval_steps_per_second": 1.788,
405
  "step": 153
406
  },
407
  {
408
  "epoch": 34.89,
409
+ "eval_accuracy": 0.532258064516129,
410
+ "eval_loss": 1064469725184.0,
411
+ "eval_runtime": 1.9757,
412
+ "eval_samples_per_second": 31.382,
413
+ "eval_steps_per_second": 2.025,
414
  "step": 157
415
  },
416
  {
417
  "epoch": 35.56,
418
  "learning_rate": 0.0,
419
+ "loss": 1051829102182.4,
420
  "step": 160
421
  },
422
  {
423
  "epoch": 35.56,
424
+ "eval_accuracy": 0.532258064516129,
425
+ "eval_loss": 1064469725184.0,
426
+ "eval_runtime": 1.937,
427
+ "eval_samples_per_second": 32.009,
428
+ "eval_steps_per_second": 2.065,
429
  "step": 160
430
  },
431
  {
432
  "epoch": 35.56,
433
  "step": 160,
434
  "total_flos": 2.807020017156096e+16,
435
+ "train_loss": 1143348658176.0,
436
+ "train_runtime": 448.7824,
437
+ "train_samples_per_second": 25.669,
438
+ "train_steps_per_second": 0.357
439
  }
440
  ],
441
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf38d914e5eaa698758645526a9651b15c22134f6ea1860cf5f99a8d040ef183
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d0202a7e131b7554da06d99dc90e4f4622b4cd33fba64e736dbc93ac4016a88
3
  size 4728