fengwuyao commited on
Commit
4526a9f
·
verified ·
1 Parent(s): 45f5995

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -39,3 +39,5 @@ Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task filter=lfs diff=lfs merg
39
  Qwen2.5-1.5B-Instruct_seq128_q8_ekv4096.task filter=lfs diff=lfs merge=lfs -text
40
  Qwen2.5-1.5B-Instruct_seq128_q8_ekv1280.task filter=lfs diff=lfs merge=lfs -text
41
  Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task filter=lfs diff=lfs merge=lfs -text
 
 
 
39
  Qwen2.5-1.5B-Instruct_seq128_q8_ekv4096.task filter=lfs diff=lfs merge=lfs -text
40
  Qwen2.5-1.5B-Instruct_seq128_q8_ekv1280.task filter=lfs diff=lfs merge=lfs -text
41
  Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task filter=lfs diff=lfs merge=lfs -text
42
+ Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.litertlm filter=lfs diff=lfs merge=lfs -text
43
+ Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.litertlm filter=lfs diff=lfs merge=lfs -text
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.litertlm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708483abb477c10dd8db8cc6b4995bbd9cd2deda5539249a37c218028a669391
3
+ size 6182436864
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a6bf44f952218b22a76cc1b94ac46a5db8dc43c7f31b7e635f2ca608e3f35ed
3
- size 4296491008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:addf45e6d75f60c9cd34bdf42d84f896f5b7b4faba6ee9f631bf6139de6af087
3
+ size 6182391796
Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.litertlm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ac49902133f729199bbd23ad6714df32b20585c786f2546d2293cc371f35278
3
+ size 1598603264
Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a662d1af6ca2c750bc047738d6f48441d4b6968062c24ef52fcff5d693291f6
3
- size 1597913616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82968d0a6c3872cf016fdbcfc591571605f4c7fd2b0f64d2533df502cc6596b3
3
+ size 1598556720
README.md CHANGED
@@ -62,8 +62,8 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
62
  <th></th>
63
  </tr>
64
  <tr>
65
- <td rowspan="3"><p style="text-align: left">CPU</p></td>
66
- <td><p style="text-align: left">fp32 (baseline)</p></td>
67
  <td><p style="text-align: right">1280</p></td>
68
  <td><p style="text-align: right">27 tk/s</p></td>
69
  <td><p style="text-align: right">6 tk/s</p></td>
@@ -74,8 +74,7 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
74
  <td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv1280.task">&#128279;</a></p></td>
75
  </tr>
76
  <tr>
77
- <td rowspan="4"><p style="text-align: left">dynamic_int8</p></td>
78
- <td><p style="text-align: right">1280</p></td>
79
  <td><p style="text-align: right">106 tk/s</p></td>
80
  <td><p style="text-align: right">23 tk/s</p></td>
81
  <td><p style="text-align: right">2.74 s</p></td>
@@ -85,7 +84,6 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
85
  <td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">&#128279;</a></p></td>
86
  </tr>
87
  <tr>
88
- <td><p style="text-align: right">4096</p></td>
89
  <td><p style="text-align: right">63 tk/s</p></td>
90
  <td><p style="text-align: right">20 tk/s</p></td>
91
  <td><p style="text-align: right">4.40 s</p></td>
@@ -95,8 +93,8 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
95
  <td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task">&#128279;</a></p></td>
96
  </tr>
97
  <tr>
98
- <td rowspan="2"><p style="text-align: left">GPU</p></td>
99
- <td><p style="text-align: right">1280</p></td>
100
  <td><p style="text-align: right">706 tk/s</p></td>
101
  <td><p style="text-align: right">24 tk/s</p></td>
102
  <td><p style="text-align: right">6.94 s</p></td>
@@ -106,7 +104,6 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
106
  <td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">&#128279;</a></p></td>
107
  </tr>
108
  <tr>
109
- <td><p style="text-align: right">4096</p></td>
110
  <td><p style="text-align: right">417 tk/s</p></td>
111
  <td><p style="text-align: right">22 tk/s</p></td>
112
  <td><p style="text-align: right">7.93 s</p></td>
 
62
  <th></th>
63
  </tr>
64
  <tr>
65
+ <td rowspan="5"><p style="text-align: left">CPU</p></td>
66
+ <td rowspan="3"><p style="text-align: left">fp32 (baseline)</p></td>
67
  <td><p style="text-align: right">1280</p></td>
68
  <td><p style="text-align: right">27 tk/s</p></td>
69
  <td><p style="text-align: right">6 tk/s</p></td>
 
74
  <td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv1280.task">&#128279;</a></p></td>
75
  </tr>
76
  <tr>
77
+ <td rowspan="2"><p style="text-align: right">1280</p></td>
 
78
  <td><p style="text-align: right">106 tk/s</p></td>
79
  <td><p style="text-align: right">23 tk/s</p></td>
80
  <td><p style="text-align: right">2.74 s</p></td>
 
84
  <td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">&#128279;</a></p></td>
85
  </tr>
86
  <tr>
 
87
  <td><p style="text-align: right">63 tk/s</p></td>
88
  <td><p style="text-align: right">20 tk/s</p></td>
89
  <td><p style="text-align: right">4.40 s</p></td>
 
93
  <td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task">&#128279;</a></p></td>
94
  </tr>
95
  <tr>
96
+ <td rowspan="2"><p style="text-align: left">dynamic_int8</p></td>
97
+ <td rowspan="2"><p style="text-align: right">1280</p></td>
98
  <td><p style="text-align: right">706 tk/s</p></td>
99
  <td><p style="text-align: right">24 tk/s</p></td>
100
  <td><p style="text-align: right">6.94 s</p></td>
 
104
  <td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">&#128279;</a></p></td>
105
  </tr>
106
  <tr>
 
107
  <td><p style="text-align: right">417 tk/s</p></td>
108
  <td><p style="text-align: right">22 tk/s</p></td>
109
  <td><p style="text-align: right">7.93 s</p></td>