Add files using upload-large-folder tool
Browse files
.gitattributes
CHANGED
@@ -39,3 +39,5 @@ Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task filter=lfs diff=lfs merg
|
|
39 |
Qwen2.5-1.5B-Instruct_seq128_q8_ekv4096.task filter=lfs diff=lfs merge=lfs -text
|
40 |
Qwen2.5-1.5B-Instruct_seq128_q8_ekv1280.task filter=lfs diff=lfs merge=lfs -text
|
41 |
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
39 |
Qwen2.5-1.5B-Instruct_seq128_q8_ekv4096.task filter=lfs diff=lfs merge=lfs -text
|
40 |
Qwen2.5-1.5B-Instruct_seq128_q8_ekv1280.task filter=lfs diff=lfs merge=lfs -text
|
41 |
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task filter=lfs diff=lfs merge=lfs -text
|
42 |
+
Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.litertlm filter=lfs diff=lfs merge=lfs -text
|
43 |
+
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.litertlm filter=lfs diff=lfs merge=lfs -text
|
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.litertlm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:708483abb477c10dd8db8cc6b4995bbd9cd2deda5539249a37c218028a669391
|
3 |
+
size 6182436864
|
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:addf45e6d75f60c9cd34bdf42d84f896f5b7b4faba6ee9f631bf6139de6af087
|
3 |
+
size 6182391796
|
Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.litertlm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ac49902133f729199bbd23ad6714df32b20585c786f2546d2293cc371f35278
|
3 |
+
size 1598603264
|
Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82968d0a6c3872cf016fdbcfc591571605f4c7fd2b0f64d2533df502cc6596b3
|
3 |
+
size 1598556720
|
README.md
CHANGED
@@ -62,8 +62,8 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
62 |
<th></th>
|
63 |
</tr>
|
64 |
<tr>
|
65 |
-
<td rowspan="
|
66 |
-
<td><p style="text-align: left">fp32 (baseline)</p></td>
|
67 |
<td><p style="text-align: right">1280</p></td>
|
68 |
<td><p style="text-align: right">27 tk/s</p></td>
|
69 |
<td><p style="text-align: right">6 tk/s</p></td>
|
@@ -74,8 +74,7 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
74 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv1280.task">🔗</a></p></td>
|
75 |
</tr>
|
76 |
<tr>
|
77 |
-
<td rowspan="
|
78 |
-
<td><p style="text-align: right">1280</p></td>
|
79 |
<td><p style="text-align: right">106 tk/s</p></td>
|
80 |
<td><p style="text-align: right">23 tk/s</p></td>
|
81 |
<td><p style="text-align: right">2.74 s</p></td>
|
@@ -85,7 +84,6 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
85 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">🔗</a></p></td>
|
86 |
</tr>
|
87 |
<tr>
|
88 |
-
<td><p style="text-align: right">4096</p></td>
|
89 |
<td><p style="text-align: right">63 tk/s</p></td>
|
90 |
<td><p style="text-align: right">20 tk/s</p></td>
|
91 |
<td><p style="text-align: right">4.40 s</p></td>
|
@@ -95,8 +93,8 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
95 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task">🔗</a></p></td>
|
96 |
</tr>
|
97 |
<tr>
|
98 |
-
<td rowspan="2"><p style="text-align: left">
|
99 |
-
<td><p style="text-align: right">1280</p></td>
|
100 |
<td><p style="text-align: right">706 tk/s</p></td>
|
101 |
<td><p style="text-align: right">24 tk/s</p></td>
|
102 |
<td><p style="text-align: right">6.94 s</p></td>
|
@@ -106,7 +104,6 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
106 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">🔗</a></p></td>
|
107 |
</tr>
|
108 |
<tr>
|
109 |
-
<td><p style="text-align: right">4096</p></td>
|
110 |
<td><p style="text-align: right">417 tk/s</p></td>
|
111 |
<td><p style="text-align: right">22 tk/s</p></td>
|
112 |
<td><p style="text-align: right">7.93 s</p></td>
|
|
|
62 |
<th></th>
|
63 |
</tr>
|
64 |
<tr>
|
65 |
+
<td rowspan="5"><p style="text-align: left">CPU</p></td>
|
66 |
+
<td rowspan="3"><p style="text-align: left">fp32 (baseline)</p></td>
|
67 |
<td><p style="text-align: right">1280</p></td>
|
68 |
<td><p style="text-align: right">27 tk/s</p></td>
|
69 |
<td><p style="text-align: right">6 tk/s</p></td>
|
|
|
74 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv1280.task">🔗</a></p></td>
|
75 |
</tr>
|
76 |
<tr>
|
77 |
+
<td rowspan="2"><p style="text-align: right">1280</p></td>
|
|
|
78 |
<td><p style="text-align: right">106 tk/s</p></td>
|
79 |
<td><p style="text-align: right">23 tk/s</p></td>
|
80 |
<td><p style="text-align: right">2.74 s</p></td>
|
|
|
84 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">🔗</a></p></td>
|
85 |
</tr>
|
86 |
<tr>
|
|
|
87 |
<td><p style="text-align: right">63 tk/s</p></td>
|
88 |
<td><p style="text-align: right">20 tk/s</p></td>
|
89 |
<td><p style="text-align: right">4.40 s</p></td>
|
|
|
93 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task">🔗</a></p></td>
|
94 |
</tr>
|
95 |
<tr>
|
96 |
+
<td rowspan="2"><p style="text-align: left">dynamic_int8</p></td>
|
97 |
+
<td rowspan="2"><p style="text-align: right">1280</p></td>
|
98 |
<td><p style="text-align: right">706 tk/s</p></td>
|
99 |
<td><p style="text-align: right">24 tk/s</p></td>
|
100 |
<td><p style="text-align: right">6.94 s</p></td>
|
|
|
104 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">🔗</a></p></td>
|
105 |
</tr>
|
106 |
<tr>
|
|
|
107 |
<td><p style="text-align: right">417 tk/s</p></td>
|
108 |
<td><p style="text-align: right">22 tk/s</p></td>
|
109 |
<td><p style="text-align: right">7.93 s</p></td>
|