mtzig commited on
Commit
a16714f
·
verified ·
1 Parent(s): 37f8cbf

Model save

Browse files
Files changed (4) hide show
  1. README.md +158 -158
  2. config.json +2 -2
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 1.6781
20
  - Accuracy: 0.546
21
 
22
  ## Model description
@@ -49,163 +49,163 @@ The following hyperparameters were used during training:
49
 
50
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
  |:-------------:|:------:|:-----:|:---------------:|:--------:|
52
- | No log | 0 | 0 | 2.6912 | 0.0 |
53
- | 2.687 | 0.0064 | 100 | 2.6906 | 0.0 |
54
- | 2.6844 | 0.0128 | 200 | 2.6886 | 0.0 |
55
- | 2.6844 | 0.0192 | 300 | 2.6851 | 0.0 |
56
- | 2.6854 | 0.0256 | 400 | 2.6796 | 0.0 |
57
- | 2.6752 | 0.032 | 500 | 2.6708 | 0.0 |
58
- | 2.6582 | 0.0384 | 600 | 2.6574 | 0.0 |
59
- | 2.6385 | 0.0448 | 700 | 2.6386 | 0.0 |
60
- | 2.6156 | 0.0512 | 800 | 2.6157 | 0.0 |
61
- | 2.5971 | 0.0576 | 900 | 2.5925 | 0.0 |
62
- | 2.5815 | 0.064 | 1000 | 2.5722 | 0.546 |
63
- | 2.5522 | 0.0704 | 1100 | 2.5549 | 0.546 |
64
- | 2.5304 | 0.0768 | 1200 | 2.5381 | 0.546 |
65
- | 2.5259 | 0.0832 | 1300 | 2.5216 | 0.546 |
66
- | 2.5204 | 0.0896 | 1400 | 2.5049 | 0.546 |
67
- | 2.4877 | 0.096 | 1500 | 2.4880 | 0.546 |
68
- | 2.4693 | 0.1024 | 1600 | 2.4704 | 0.546 |
69
- | 2.458 | 0.1088 | 1700 | 2.4534 | 0.546 |
70
- | 2.443 | 0.1152 | 1800 | 2.4378 | 0.546 |
71
- | 2.4251 | 0.1216 | 1900 | 2.4235 | 0.546 |
72
- | 2.4096 | 0.128 | 2000 | 2.4099 | 0.631 |
73
- | 2.4012 | 0.1344 | 2100 | 2.3970 | 0.684 |
74
- | 2.3777 | 0.1408 | 2200 | 2.3844 | 0.743 |
75
- | 2.3667 | 0.1472 | 2300 | 2.3722 | 0.743 |
76
- | 2.3664 | 0.1536 | 2400 | 2.3603 | 0.743 |
77
- | 2.3481 | 0.16 | 2500 | 2.3486 | 0.743 |
78
- | 2.3298 | 0.1664 | 2600 | 2.3369 | 0.743 |
79
- | 2.3234 | 0.1728 | 2700 | 2.3255 | 0.743 |
80
- | 2.3098 | 0.1792 | 2800 | 2.3141 | 0.743 |
81
- | 2.3039 | 0.1856 | 2900 | 2.3029 | 0.743 |
82
- | 2.2921 | 0.192 | 3000 | 2.2918 | 0.743 |
83
- | 2.2831 | 0.1984 | 3100 | 2.2807 | 0.743 |
84
- | 2.2629 | 0.2048 | 3200 | 2.2697 | 0.743 |
85
- | 2.2523 | 0.2112 | 3300 | 2.2587 | 0.743 |
86
- | 2.25 | 0.2176 | 3400 | 2.2478 | 0.743 |
87
- | 2.2391 | 0.224 | 3500 | 2.2370 | 0.743 |
88
- | 2.2191 | 0.2304 | 3600 | 2.2262 | 0.743 |
89
- | 2.2187 | 0.2368 | 3700 | 2.2155 | 0.684 |
90
- | 2.2022 | 0.2432 | 3800 | 2.2049 | 0.684 |
91
- | 2.1899 | 0.2496 | 3900 | 2.1943 | 0.631 |
92
- | 2.1841 | 0.256 | 4000 | 2.1838 | 0.631 |
93
- | 2.171 | 0.2624 | 4100 | 2.1734 | 0.631 |
94
- | 2.1758 | 0.2688 | 4200 | 2.1631 | 0.631 |
95
- | 2.1515 | 0.2752 | 4300 | 2.1528 | 0.631 |
96
- | 2.1436 | 0.2816 | 4400 | 2.1427 | 0.631 |
97
- | 2.1283 | 0.288 | 4500 | 2.1326 | 0.631 |
98
- | 2.1226 | 0.2944 | 4600 | 2.1226 | 0.631 |
99
- | 2.1094 | 0.3008 | 4700 | 2.1127 | 0.583 |
100
- | 2.0977 | 0.3072 | 4800 | 2.1029 | 0.583 |
101
- | 2.0957 | 0.3136 | 4900 | 2.0932 | 0.546 |
102
- | 2.0891 | 0.32 | 5000 | 2.0835 | 0.546 |
103
- | 2.0747 | 0.3264 | 5100 | 2.0740 | 0.546 |
104
- | 2.0697 | 0.3328 | 5200 | 2.0646 | 0.546 |
105
- | 2.0644 | 0.3392 | 5300 | 2.0552 | 0.546 |
106
- | 2.0489 | 0.3456 | 5400 | 2.0460 | 0.546 |
107
- | 2.0377 | 0.352 | 5500 | 2.0369 | 0.546 |
108
- | 2.0253 | 0.3584 | 5600 | 2.0278 | 0.546 |
109
- | 2.0204 | 0.3648 | 5700 | 2.0189 | 0.546 |
110
- | 2.0073 | 0.3712 | 5800 | 2.0101 | 0.546 |
111
- | 2.0001 | 0.3776 | 5900 | 2.0014 | 0.546 |
112
- | 2.0007 | 0.384 | 6000 | 1.9928 | 0.546 |
113
- | 1.983 | 0.3904 | 6100 | 1.9843 | 0.546 |
114
- | 1.9755 | 0.3968 | 6200 | 1.9760 | 0.546 |
115
- | 1.9607 | 0.4032 | 6300 | 1.9678 | 0.546 |
116
- | 1.966 | 0.4096 | 6400 | 1.9596 | 0.546 |
117
- | 1.9511 | 0.416 | 6500 | 1.9516 | 0.546 |
118
- | 1.9506 | 0.4224 | 6600 | 1.9437 | 0.546 |
119
- | 1.933 | 0.4288 | 6700 | 1.9360 | 0.546 |
120
- | 1.9257 | 0.4352 | 6800 | 1.9283 | 0.546 |
121
- | 1.9197 | 0.4416 | 6900 | 1.9208 | 0.546 |
122
- | 1.9103 | 0.448 | 7000 | 1.9134 | 0.546 |
123
- | 1.9039 | 0.4544 | 7100 | 1.9062 | 0.546 |
124
- | 1.8954 | 0.4608 | 7200 | 1.8990 | 0.546 |
125
- | 1.8918 | 0.4672 | 7300 | 1.8919 | 0.546 |
126
- | 1.8791 | 0.4736 | 7400 | 1.8851 | 0.546 |
127
- | 1.8713 | 0.48 | 7500 | 1.8783 | 0.546 |
128
- | 1.869 | 0.4864 | 7600 | 1.8716 | 0.546 |
129
- | 1.8617 | 0.4928 | 7700 | 1.8651 | 0.546 |
130
- | 1.8579 | 0.4992 | 7800 | 1.8587 | 0.546 |
131
- | 1.8546 | 0.5056 | 7900 | 1.8524 | 0.546 |
132
- | 1.8395 | 0.512 | 8000 | 1.8462 | 0.546 |
133
- | 1.8351 | 0.5184 | 8100 | 1.8402 | 0.546 |
134
- | 1.8368 | 0.5248 | 8200 | 1.8343 | 0.546 |
135
- | 1.8282 | 0.5312 | 8300 | 1.8285 | 0.546 |
136
- | 1.8264 | 0.5376 | 8400 | 1.8229 | 0.546 |
137
- | 1.8133 | 0.544 | 8500 | 1.8174 | 0.546 |
138
- | 1.8156 | 0.5504 | 8600 | 1.8120 | 0.546 |
139
- | 1.8119 | 0.5568 | 8700 | 1.8067 | 0.546 |
140
- | 1.805 | 0.5632 | 8800 | 1.8016 | 0.546 |
141
- | 1.7968 | 0.5696 | 8900 | 1.7965 | 0.546 |
142
- | 1.7948 | 0.576 | 9000 | 1.7917 | 0.546 |
143
- | 1.7882 | 0.5824 | 9100 | 1.7869 | 0.546 |
144
- | 1.7901 | 0.5888 | 9200 | 1.7822 | 0.546 |
145
- | 1.7753 | 0.5952 | 9300 | 1.7777 | 0.546 |
146
- | 1.7721 | 0.6016 | 9400 | 1.7733 | 0.546 |
147
- | 1.7653 | 0.608 | 9500 | 1.7690 | 0.546 |
148
- | 1.767 | 0.6144 | 9600 | 1.7649 | 0.546 |
149
- | 1.7554 | 0.6208 | 9700 | 1.7608 | 0.546 |
150
- | 1.7674 | 0.6272 | 9800 | 1.7569 | 0.546 |
151
- | 1.751 | 0.6336 | 9900 | 1.7531 | 0.546 |
152
- | 1.7567 | 0.64 | 10000 | 1.7494 | 0.546 |
153
- | 1.745 | 0.6464 | 10100 | 1.7458 | 0.546 |
154
- | 1.7365 | 0.6528 | 10200 | 1.7424 | 0.546 |
155
- | 1.7361 | 0.6592 | 10300 | 1.7390 | 0.546 |
156
- | 1.7411 | 0.6656 | 10400 | 1.7358 | 0.546 |
157
- | 1.73 | 0.672 | 10500 | 1.7327 | 0.546 |
158
- | 1.7308 | 0.6784 | 10600 | 1.7297 | 0.546 |
159
- | 1.7237 | 0.6848 | 10700 | 1.7268 | 0.546 |
160
- | 1.7205 | 0.6912 | 10800 | 1.7239 | 0.546 |
161
- | 1.7357 | 0.6976 | 10900 | 1.7212 | 0.546 |
162
- | 1.7142 | 0.704 | 11000 | 1.7186 | 0.546 |
163
- | 1.7261 | 0.7104 | 11100 | 1.7161 | 0.546 |
164
- | 1.7127 | 0.7168 | 11200 | 1.7137 | 0.546 |
165
- | 1.7097 | 0.7232 | 11300 | 1.7114 | 0.546 |
166
- | 1.7037 | 0.7296 | 11400 | 1.7092 | 0.546 |
167
- | 1.7071 | 0.736 | 11500 | 1.7071 | 0.546 |
168
- | 1.7015 | 0.7424 | 11600 | 1.7051 | 0.546 |
169
- | 1.7005 | 0.7488 | 11700 | 1.7032 | 0.546 |
170
- | 1.7013 | 0.7552 | 11800 | 1.7014 | 0.546 |
171
- | 1.7037 | 0.7616 | 11900 | 1.6996 | 0.546 |
172
- | 1.704 | 0.768 | 12000 | 1.6980 | 0.546 |
173
- | 1.6848 | 0.7744 | 12100 | 1.6964 | 0.546 |
174
- | 1.6937 | 0.7808 | 12200 | 1.6949 | 0.546 |
175
- | 1.6961 | 0.7872 | 12300 | 1.6935 | 0.546 |
176
- | 1.6927 | 0.7936 | 12400 | 1.6922 | 0.546 |
177
- | 1.6875 | 0.8 | 12500 | 1.6909 | 0.546 |
178
- | 1.6861 | 0.8064 | 12600 | 1.6897 | 0.546 |
179
- | 1.6852 | 0.8128 | 12700 | 1.6886 | 0.546 |
180
- | 1.6952 | 0.8192 | 12800 | 1.6876 | 0.546 |
181
- | 1.6886 | 0.8256 | 12900 | 1.6866 | 0.546 |
182
- | 1.6861 | 0.832 | 13000 | 1.6858 | 0.546 |
183
- | 1.6854 | 0.8384 | 13100 | 1.6849 | 0.546 |
184
- | 1.6805 | 0.8448 | 13200 | 1.6841 | 0.546 |
185
- | 1.6831 | 0.8512 | 13300 | 1.6834 | 0.546 |
186
- | 1.6805 | 0.8576 | 13400 | 1.6828 | 0.546 |
187
- | 1.6843 | 0.864 | 13500 | 1.6822 | 0.546 |
188
- | 1.6899 | 0.8704 | 13600 | 1.6816 | 0.546 |
189
- | 1.6789 | 0.8768 | 13700 | 1.6811 | 0.546 |
190
- | 1.6852 | 0.8832 | 13800 | 1.6807 | 0.546 |
191
- | 1.683 | 0.8896 | 13900 | 1.6803 | 0.546 |
192
- | 1.685 | 0.896 | 14000 | 1.6799 | 0.546 |
193
- | 1.6816 | 0.9024 | 14100 | 1.6796 | 0.546 |
194
- | 1.6831 | 0.9088 | 14200 | 1.6793 | 0.546 |
195
- | 1.6819 | 0.9152 | 14300 | 1.6791 | 0.546 |
196
- | 1.6726 | 0.9216 | 14400 | 1.6789 | 0.546 |
197
- | 1.6788 | 0.928 | 14500 | 1.6787 | 0.546 |
198
- | 1.679 | 0.9344 | 14600 | 1.6786 | 0.546 |
199
- | 1.6809 | 0.9408 | 14700 | 1.6785 | 0.546 |
200
- | 1.6732 | 0.9472 | 14800 | 1.6784 | 0.546 |
201
- | 1.6722 | 0.9536 | 14900 | 1.6783 | 0.546 |
202
- | 1.6701 | 0.96 | 15000 | 1.6782 | 0.546 |
203
- | 1.6866 | 0.9664 | 15100 | 1.6782 | 0.546 |
204
- | 1.6767 | 0.9728 | 15200 | 1.6781 | 0.546 |
205
- | 1.6699 | 0.9792 | 15300 | 1.6781 | 0.546 |
206
- | 1.6794 | 0.9856 | 15400 | 1.6781 | 0.546 |
207
- | 1.6697 | 0.992 | 15500 | 1.6781 | 0.546 |
208
- | 1.6732 | 0.9984 | 15600 | 1.6781 | 0.546 |
209
 
210
 
211
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 2.3269
20
  - Accuracy: 0.546
21
 
22
  ## Model description
 
49
 
50
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
  |:-------------:|:------:|:-----:|:---------------:|:--------:|
52
+ | No log | 0 | 0 | 2.6414 | 0.0 |
53
+ | 2.6425 | 0.0064 | 100 | 2.6413 | 0.0 |
54
+ | 2.6424 | 0.0128 | 200 | 2.6410 | 0.0 |
55
+ | 2.6407 | 0.0192 | 300 | 2.6405 | 0.0 |
56
+ | 2.6374 | 0.0256 | 400 | 2.6398 | 0.0 |
57
+ | 2.6367 | 0.032 | 500 | 2.6389 | 0.0 |
58
+ | 2.6372 | 0.0384 | 600 | 2.6379 | 0.0 |
59
+ | 2.6372 | 0.0448 | 700 | 2.6366 | 0.0 |
60
+ | 2.634 | 0.0512 | 800 | 2.6350 | 0.0 |
61
+ | 2.6349 | 0.0576 | 900 | 2.6330 | 0.0 |
62
+ | 2.6337 | 0.064 | 1000 | 2.6308 | 0.0 |
63
+ | 2.6276 | 0.0704 | 1100 | 2.6284 | 0.0 |
64
+ | 2.6237 | 0.0768 | 1200 | 2.6256 | 0.0 |
65
+ | 2.6237 | 0.0832 | 1300 | 2.6227 | 0.0 |
66
+ | 2.6234 | 0.0896 | 1400 | 2.6195 | 0.0 |
67
+ | 2.6157 | 0.096 | 1500 | 2.6160 | 0.0 |
68
+ | 2.6118 | 0.1024 | 1600 | 2.6123 | 0.0 |
69
+ | 2.6095 | 0.1088 | 1700 | 2.6085 | 0.0 |
70
+ | 2.6063 | 0.1152 | 1800 | 2.6047 | 0.0 |
71
+ | 2.6017 | 0.1216 | 1900 | 2.6009 | 0.546 |
72
+ | 2.5971 | 0.128 | 2000 | 2.5971 | 0.546 |
73
+ | 2.5954 | 0.1344 | 2100 | 2.5933 | 0.546 |
74
+ | 2.5872 | 0.1408 | 2200 | 2.5895 | 0.546 |
75
+ | 2.5839 | 0.1472 | 2300 | 2.5857 | 0.546 |
76
+ | 2.5847 | 0.1536 | 2400 | 2.5819 | 0.546 |
77
+ | 2.5778 | 0.16 | 2500 | 2.5781 | 0.546 |
78
+ | 2.5714 | 0.1664 | 2600 | 2.5742 | 0.546 |
79
+ | 2.5699 | 0.1728 | 2700 | 2.5704 | 0.546 |
80
+ | 2.565 | 0.1792 | 2800 | 2.5666 | 0.546 |
81
+ | 2.5638 | 0.1856 | 2900 | 2.5628 | 0.546 |
82
+ | 2.5592 | 0.192 | 3000 | 2.5589 | 0.546 |
83
+ | 2.5564 | 0.1984 | 3100 | 2.5551 | 0.546 |
84
+ | 2.5486 | 0.2048 | 3200 | 2.5513 | 0.546 |
85
+ | 2.5454 | 0.2112 | 3300 | 2.5475 | 0.546 |
86
+ | 2.5448 | 0.2176 | 3400 | 2.5437 | 0.546 |
87
+ | 2.541 | 0.224 | 3500 | 2.5399 | 0.546 |
88
+ | 2.5337 | 0.2304 | 3600 | 2.5361 | 0.546 |
89
+ | 2.5337 | 0.2368 | 3700 | 2.5324 | 0.546 |
90
+ | 2.5278 | 0.2432 | 3800 | 2.5286 | 0.546 |
91
+ | 2.5233 | 0.2496 | 3900 | 2.5249 | 0.546 |
92
+ | 2.5214 | 0.256 | 4000 | 2.5212 | 0.546 |
93
+ | 2.5166 | 0.2624 | 4100 | 2.5175 | 0.546 |
94
+ | 2.5189 | 0.2688 | 4200 | 2.5138 | 0.546 |
95
+ | 2.5098 | 0.2752 | 4300 | 2.5101 | 0.546 |
96
+ | 2.507 | 0.2816 | 4400 | 2.5065 | 0.546 |
97
+ | 2.5015 | 0.288 | 4500 | 2.5028 | 0.546 |
98
+ | 2.4993 | 0.2944 | 4600 | 2.4992 | 0.546 |
99
+ | 2.4946 | 0.3008 | 4700 | 2.4957 | 0.546 |
100
+ | 2.4905 | 0.3072 | 4800 | 2.4921 | 0.546 |
101
+ | 2.4897 | 0.3136 | 4900 | 2.4886 | 0.546 |
102
+ | 2.4873 | 0.32 | 5000 | 2.4851 | 0.546 |
103
+ | 2.4822 | 0.3264 | 5100 | 2.4816 | 0.546 |
104
+ | 2.4801 | 0.3328 | 5200 | 2.4782 | 0.546 |
105
+ | 2.4784 | 0.3392 | 5300 | 2.4747 | 0.546 |
106
+ | 2.4728 | 0.3456 | 5400 | 2.4714 | 0.546 |
107
+ | 2.4686 | 0.352 | 5500 | 2.4680 | 0.546 |
108
+ | 2.4635 | 0.3584 | 5600 | 2.4647 | 0.546 |
109
+ | 2.4619 | 0.3648 | 5700 | 2.4613 | 0.546 |
110
+ | 2.4572 | 0.3712 | 5800 | 2.4581 | 0.546 |
111
+ | 2.4545 | 0.3776 | 5900 | 2.4548 | 0.546 |
112
+ | 2.4547 | 0.384 | 6000 | 2.4516 | 0.546 |
113
+ | 2.4482 | 0.3904 | 6100 | 2.4484 | 0.546 |
114
+ | 2.4453 | 0.3968 | 6200 | 2.4453 | 0.546 |
115
+ | 2.4399 | 0.4032 | 6300 | 2.4422 | 0.546 |
116
+ | 2.4417 | 0.4096 | 6400 | 2.4391 | 0.546 |
117
+ | 2.4361 | 0.416 | 6500 | 2.4361 | 0.546 |
118
+ | 2.436 | 0.4224 | 6600 | 2.4331 | 0.546 |
119
+ | 2.4293 | 0.4288 | 6700 | 2.4302 | 0.546 |
120
+ | 2.4264 | 0.4352 | 6800 | 2.4272 | 0.546 |
121
+ | 2.4241 | 0.4416 | 6900 | 2.4244 | 0.546 |
122
+ | 2.4206 | 0.448 | 7000 | 2.4215 | 0.546 |
123
+ | 2.4178 | 0.4544 | 7100 | 2.4187 | 0.546 |
124
+ | 2.4148 | 0.4608 | 7200 | 2.4160 | 0.546 |
125
+ | 2.4135 | 0.4672 | 7300 | 2.4132 | 0.546 |
126
+ | 2.4085 | 0.4736 | 7400 | 2.4106 | 0.546 |
127
+ | 2.4053 | 0.48 | 7500 | 2.4079 | 0.546 |
128
+ | 2.4044 | 0.4864 | 7600 | 2.4053 | 0.546 |
129
+ | 2.4016 | 0.4928 | 7700 | 2.4028 | 0.546 |
130
+ | 2.4 | 0.4992 | 7800 | 2.4003 | 0.546 |
131
+ | 2.3987 | 0.5056 | 7900 | 2.3978 | 0.546 |
132
+ | 2.393 | 0.512 | 8000 | 2.3954 | 0.546 |
133
+ | 2.3912 | 0.5184 | 8100 | 2.3930 | 0.546 |
134
+ | 2.3918 | 0.5248 | 8200 | 2.3907 | 0.546 |
135
+ | 2.3884 | 0.5312 | 8300 | 2.3884 | 0.546 |
136
+ | 2.3876 | 0.5376 | 8400 | 2.3861 | 0.546 |
137
+ | 2.3825 | 0.544 | 8500 | 2.3839 | 0.546 |
138
+ | 2.3833 | 0.5504 | 8600 | 2.3818 | 0.546 |
139
+ | 2.3817 | 0.5568 | 8700 | 2.3797 | 0.546 |
140
+ | 2.3791 | 0.5632 | 8800 | 2.3776 | 0.546 |
141
+ | 2.3759 | 0.5696 | 8900 | 2.3756 | 0.546 |
142
+ | 2.3751 | 0.576 | 9000 | 2.3737 | 0.546 |
143
+ | 2.3723 | 0.5824 | 9100 | 2.3717 | 0.546 |
144
+ | 2.3731 | 0.5888 | 9200 | 2.3699 | 0.546 |
145
+ | 2.3674 | 0.5952 | 9300 | 2.3680 | 0.546 |
146
+ | 2.3659 | 0.6016 | 9400 | 2.3663 | 0.546 |
147
+ | 2.3633 | 0.608 | 9500 | 2.3645 | 0.546 |
148
+ | 2.3637 | 0.6144 | 9600 | 2.3628 | 0.546 |
149
+ | 2.3594 | 0.6208 | 9700 | 2.3612 | 0.546 |
150
+ | 2.3637 | 0.6272 | 9800 | 2.3596 | 0.546 |
151
+ | 2.3574 | 0.6336 | 9900 | 2.3580 | 0.546 |
152
+ | 2.3595 | 0.64 | 10000 | 2.3565 | 0.546 |
153
+ | 2.355 | 0.6464 | 10100 | 2.3551 | 0.546 |
154
+ | 2.3515 | 0.6528 | 10200 | 2.3536 | 0.546 |
155
+ | 2.3514 | 0.6592 | 10300 | 2.3523 | 0.546 |
156
+ | 2.353 | 0.6656 | 10400 | 2.3509 | 0.546 |
157
+ | 2.3487 | 0.672 | 10500 | 2.3496 | 0.546 |
158
+ | 2.349 | 0.6784 | 10600 | 2.3484 | 0.546 |
159
+ | 2.3463 | 0.6848 | 10700 | 2.3472 | 0.546 |
160
+ | 2.3448 | 0.6912 | 10800 | 2.3460 | 0.546 |
161
+ | 2.3506 | 0.6976 | 10900 | 2.3449 | 0.546 |
162
+ | 2.3423 | 0.704 | 11000 | 2.3438 | 0.546 |
163
+ | 2.3467 | 0.7104 | 11100 | 2.3428 | 0.546 |
164
+ | 2.3415 | 0.7168 | 11200 | 2.3418 | 0.546 |
165
+ | 2.3402 | 0.7232 | 11300 | 2.3408 | 0.546 |
166
+ | 2.3381 | 0.7296 | 11400 | 2.3399 | 0.546 |
167
+ | 2.3393 | 0.736 | 11500 | 2.3390 | 0.546 |
168
+ | 2.337 | 0.7424 | 11600 | 2.3382 | 0.546 |
169
+ | 2.3365 | 0.7488 | 11700 | 2.3374 | 0.546 |
170
+ | 2.3366 | 0.7552 | 11800 | 2.3366 | 0.546 |
171
+ | 2.3374 | 0.7616 | 11900 | 2.3359 | 0.546 |
172
+ | 2.3376 | 0.768 | 12000 | 2.3352 | 0.546 |
173
+ | 2.3303 | 0.7744 | 12100 | 2.3346 | 0.546 |
174
+ | 2.3336 | 0.7808 | 12200 | 2.3339 | 0.546 |
175
+ | 2.3345 | 0.7872 | 12300 | 2.3333 | 0.546 |
176
+ | 2.3331 | 0.7936 | 12400 | 2.3328 | 0.546 |
177
+ | 2.331 | 0.8 | 12500 | 2.3323 | 0.546 |
178
+ | 2.3305 | 0.8064 | 12600 | 2.3318 | 0.546 |
179
+ | 2.3301 | 0.8128 | 12700 | 2.3313 | 0.546 |
180
+ | 2.3338 | 0.8192 | 12800 | 2.3309 | 0.546 |
181
+ | 2.3313 | 0.8256 | 12900 | 2.3305 | 0.546 |
182
+ | 2.3304 | 0.832 | 13000 | 2.3301 | 0.546 |
183
+ | 2.33 | 0.8384 | 13100 | 2.3297 | 0.546 |
184
+ | 2.3282 | 0.8448 | 13200 | 2.3294 | 0.546 |
185
+ | 2.3291 | 0.8512 | 13300 | 2.3291 | 0.546 |
186
+ | 2.3282 | 0.8576 | 13400 | 2.3288 | 0.546 |
187
+ | 2.3295 | 0.864 | 13500 | 2.3286 | 0.546 |
188
+ | 2.3316 | 0.8704 | 13600 | 2.3284 | 0.546 |
189
+ | 2.3275 | 0.8768 | 13700 | 2.3281 | 0.546 |
190
+ | 2.3297 | 0.8832 | 13800 | 2.3280 | 0.546 |
191
+ | 2.329 | 0.8896 | 13900 | 2.3278 | 0.546 |
192
+ | 2.3297 | 0.896 | 14000 | 2.3276 | 0.546 |
193
+ | 2.3284 | 0.9024 | 14100 | 2.3275 | 0.546 |
194
+ | 2.3289 | 0.9088 | 14200 | 2.3274 | 0.546 |
195
+ | 2.3285 | 0.9152 | 14300 | 2.3273 | 0.546 |
196
+ | 2.3251 | 0.9216 | 14400 | 2.3272 | 0.546 |
197
+ | 2.3274 | 0.928 | 14500 | 2.3271 | 0.546 |
198
+ | 2.3273 | 0.9344 | 14600 | 2.3271 | 0.546 |
199
+ | 2.3279 | 0.9408 | 14700 | 2.3270 | 0.546 |
200
+ | 2.3251 | 0.9472 | 14800 | 2.3270 | 0.546 |
201
+ | 2.3248 | 0.9536 | 14900 | 2.3269 | 0.546 |
202
+ | 2.3239 | 0.96 | 15000 | 2.3269 | 0.546 |
203
+ | 2.3302 | 0.9664 | 15100 | 2.3269 | 0.546 |
204
+ | 2.3265 | 0.9728 | 15200 | 2.3269 | 0.546 |
205
+ | 2.3238 | 0.9792 | 15300 | 2.3269 | 0.546 |
206
+ | 2.3274 | 0.9856 | 15400 | 2.3269 | 0.546 |
207
+ | 2.3238 | 0.992 | 15500 | 2.3269 | 0.546 |
208
+ | 2.325 | 0.9984 | 15600 | 2.3269 | 0.546 |
209
 
210
 
211
  ### Framework versions
config.json CHANGED
@@ -5,9 +5,9 @@
5
  "bias": true,
6
  "block_size": 256,
7
  "dropout": 0.0,
8
- "mlp_dim": 1,
9
  "model_type": "nanogpt",
10
- "n_embd": 6,
11
  "n_head": 1,
12
  "n_layer": 1,
13
  "nonlinearity": "RELU",
 
5
  "bias": true,
6
  "block_size": 256,
7
  "dropout": 0.0,
8
+ "mlp_dim": 12,
9
  "model_type": "nanogpt",
10
+ "n_embd": 2,
11
  "n_head": 1,
12
  "n_layer": 1,
13
  "nonlinearity": "RELU",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aade9790166feb8dc654c0ad7eb06b557cb56edf72334f41e2d38715f094459
3
- size 3240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39bd7c42d798309e1ff8e61e70b37187f481fbc4d4824365cc620e6c0d7dfa8a
3
+ size 2248
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b8771e94380d5a22f9f2c18550eb28573db1ae00f28340556c7fc63571dccfc
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523c24cbcc8ad0066fa26b506e22771ed04a4f483dbde192af2f51a13d70869d
3
  size 5240