diff --git a/.gitattributes b/.gitattributes index a19f152129828108ce35f75719f949203a994a97..49818db780de24c5f10f612d9b159158154dd0d9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -66,3 +66,33 @@ deepseek-r1-7b-ax650/qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text main_prefill_postprocess filter=lfs diff=lfs merge=lfs -text main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text main_axcl_x86 filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l24_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l25_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l26_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l27_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-7b-int4-ax650/qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text +main_ax650 filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 556f1b5016aebbb466d7427664082a8aeab2b003..b5a6075690c8a10cc3f7a51d25eaca99dd92db71 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ license: bsd-3-clause - This model has been optimized with the following LoRA: - - Compatible with Pulsar2 version: 3.3 + - Compatible with Pulsar2 version: 4.2 - Due to the current quantization scheme of w8a16, the CMM consumes about 7.6GiB of memory, so a 16GiB development board is required to run. diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l0_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l0_together.axmodel index 4849f6cf93c71738dfb2ea98371c4e21796e0f1a..9957f26fc484c09ce44012034f4c41a0dc6d0504 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l0_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l0_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:303e6894ee10c13050dbfc099be9a0f4d597c9c8bc009257dc2b4b5f3a0412be -size 259184164 +oid sha256:b5f8d6036c69ee3b1c5ab4b8252e69b5116dea7fc9311403a1b468cb7a670ebf +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l10_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l10_together.axmodel index 26ff3960c177841ea745fd5f09c8e8c3d45a96d9..bbe24093324d33ecfce671808b00a42fdd6e089a 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l10_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l10_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:401ba7b02327f3388a0f0e55385f8c315f0d7b96d9f56f8e53f52085147df259 -size 259184164 +oid sha256:281a0b417c238234d24f820f19a90c8e28d0d2fac1b2b41a94cf83f48b716966 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l11_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l11_together.axmodel index c8706d7d70c53e0bc9cc83bdd53a2b16957c0b1a..0c9f683bab2e51a70a5b3d931c4eb761649f58c8 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l11_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l11_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b067a5f287cb567d2cdfe33f4adf1411194258887978e0d8adf7365abb28365d -size 259184164 +oid sha256:5a5869cbb6920c58b22b5e84e64db09603024b894bf6e035ebc0d58ef73aaff2 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l12_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l12_together.axmodel index 6c2954955cb5ad03619cf9eec504f12db8153a10..5a95431b7b225502a95dec841b516598671bdbdc 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l12_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l12_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a521dcc5cb291ccf84bfd99612077747f785bc60e1bb42dcc10a149df1be0ffc -size 259184164 +oid sha256:2596b97ab4532e255f09fd1df7775491a9521e77a14ad1298f447d443cfa420f +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l13_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l13_together.axmodel index 05f2d1d16e03cfb20d02a86a1a43a59218b9ab69..9e246396a913c3c1300382633b6b0e66ca7467be 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l13_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l13_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c37e61ac80a88515d4b47cc989163ccdccc9f9c50b86654ba737692bfb2799b -size 259184164 +oid sha256:2d105507c5f6f2f3f711d345d9fc084fba68299ac73b2bcb7a16d761e310264c +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l14_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l14_together.axmodel index 807ebe86c8dfcef8fef3ce2cc2e9892bdfbb7a26..aaa103bc04068833d69dde757a55812e65f52d6b 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l14_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l14_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c8b0b1f1ce942749ba03916ffa59598a9333fd1b0a00369017a4e1ad306e88d -size 259184164 +oid sha256:40c48a46ab3bb7b28f9b61f484a2537a965a66b62625feb44dc6326a986f51f0 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l15_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l15_together.axmodel index 8fa1c7fe4a35edb2627e90a49de50dd16619f33e..37d4acd88c2668206edb42f694367a6a3d156c5c 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l15_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l15_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8dcd3da2574f75ee5e96cee629be330904712274823027bef81882af10a0f890 -size 259184164 +oid sha256:e75b0b28d781d9a93e070ea9d8a4cf9c9183a098227f0db5739149d1108326ca +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l16_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l16_together.axmodel index 1757695cd3706f4d53197626fe974aeaf3d3d922..f575f03946d2fec2326c831b41f0ec4976b3d095 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l16_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l16_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2d457dd154542a6d8ee2dfd35539f6a86a74c8a134d7cc4d282a02f7ad4b699 -size 259184164 +oid sha256:ccd119d940055cf5383486e6f8dc44f4c9fcc0c7475342097eaa48c746073e99 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l17_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l17_together.axmodel index 24117aebed2c938421cd809e6b24f4061537f987..ecd9b618cc04adb92b1f9432c601def3812be482 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l17_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l17_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:656e47264a7d63035d761126d005318af87742c267652765ce72a1708aac893a -size 259184164 +oid sha256:f1b23aa38f665abda50296c8200561d10500eaf04beb2ef7a0dc2a0e10707201 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l18_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l18_together.axmodel index df9d97e1b64cf864a06631a1f50aa9f09f01305c..3c132dac17a1547e876945424c9c40751c6867c1 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l18_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l18_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca66d28e3b9924f141908bb262cef744c659c8816acba09837acb66227d902e2 -size 259184164 +oid sha256:4b4cafc6d2606e3193c306d88ab612b351da7fc6b16004654dccd5f8b7c74296 +size 278814797 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l19_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l19_together.axmodel index c8dd2f77337f371d548c4cb87e515b1c21cf184f..b50eef39072560054aef429ce5d4d288684c33dd 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l19_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l19_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4de66fe58abee934ee4e86e62fdf9fcd03b2658fa9a2157129e1e86aa8c79839 -size 259184548 +oid sha256:4d18eaffe4ff0171546e431caaf40fc2d607d785a527649df468f32ec72d6c5e +size 278814861 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l1_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l1_together.axmodel index 00a191b7d218bf12943c4494aeeda4084111b517..1a741bfd92dde88aa46a965bd2dce4323e22a573 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l1_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l1_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:157100c043862354278a83c0dbad79e0471272bc6b02c297b69b098b6dc2b6a9 -size 259184356 +oid sha256:2f3b47078fc4359180a30073553103bc063241e35fa79c36cd79d4abbf94eda8 +size 278815085 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l20_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l20_together.axmodel index e0c4fc9671234bcb62c2afd7d5739fa96703352a..d997a9b01c5f9c31470fc4e37718eabfa823d8b9 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l20_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l20_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c0d5cc99383f29e541baea272da4fbd51d5eadebf4a5e62545e42941d1cba24 -size 259184164 +oid sha256:e511b9db661633730080c1e8272a074d4069029aed0fd3be48e296c3a775d3ec +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l21_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l21_together.axmodel index 4b0c854b4063a37f41f44dbf27f0fa37be902d51..69c1abf83079022c570a848edf91043d955f59bd 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l21_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l21_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f7455f6447d86f2d2fdced11f70e476349df5c5b72baa2ded78d9e702881718 -size 259184164 +oid sha256:d968ad8bb77d014ae6a4ec6842ec48e9fce7b4a212b3705a96f42d0e2b322f8b +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l22_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l22_together.axmodel index 6a076b79c34faa62a5bd1fabefda16c689bac659..62c76b09986260f92b32759ebd2a207e0bb778fe 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l22_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l22_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93f428ecaf973422d0fb54731587f687308e764b2c0343db8fccd47e2282cc17 -size 259184164 +oid sha256:b39bdf44cfb27821cf891e44b13bcbaf60fdbb77dbaac11bb087d9d6b541c9f5 +size 278814893 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l23_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l23_together.axmodel index 19cccd7e32f25b0b6248b429117c8275a42d6cf7..63c2fe0ee56110365d4bee96970b7a77d5e3269f 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l23_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l23_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5075979d91cba2149759be63282e0a1a7dbc07191c97bc58113f91e3bf9633a -size 259184164 +oid sha256:673e66ff5a4a3194c951999b8debabb54ac38bb487327adf112cdcaf1269079b +size 278815085 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l24_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l24_together.axmodel index 6825c4e310547fdc33c8079a7032858e3f5cbf7a..118081a395ccac5647148ee5bcf840d0744e4ffe 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l24_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l24_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4891fd3dd3d890d09cb21bdc3d24c977dd099043d2e6ac3a3ff8905f3167d5f8 -size 259184132 +oid sha256:b5888498b9d4dd52cb92400c0108cf51801ba0012832a0f1e606909d9ff3be80 +size 278814925 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l25_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l25_together.axmodel index 1e2e040967237334b7e4f6ad2ce30d37492c7b44..b348d5c8fc6cc5b8857bb1cf6bfa22f405a7d138 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l25_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l25_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b506464b548318eb8f80853f5c3f5837ae15c19c48ff5ab68ced038110698f19 -size 259184196 +oid sha256:ee7e5469b188be7ae1424643c5e2190304db695934d50933741834b36515757f +size 278815085 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l26_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l26_together.axmodel index 027c0d69ae8e666a585e325e9c7041123ef88092..42eda3dc38f8bea5d7df13159b00bc12a4f25a86 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l26_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l26_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b906358f13967257369bb86288cdb2ded05ac5a463ef5e0155191fb703db3f46 -size 259184228 +oid sha256:c30a547ac27394884dfc3a96e3277a5fef8737c96123fa05f25e18509125cdee +size 278815309 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l27_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l27_together.axmodel index 58ae9ed29c1d7a96e465a8f2fa829a32322a022c..da6581bbc803b9c34360be1503206540f8b06400 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l27_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l27_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a18bd9f3137194dc8f692b231bccb10975a5f8a5b3496c6282acc6b10d7079f -size 259184388 +oid sha256:03c74f116c6c876f54bd4ecdc810d0cd870fa52b58c83552eeb23a4ee5126804 +size 278815757 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l2_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l2_together.axmodel index b9c95583be22f5e06bae31ed1df9bd18e6cb697b..dd02b32ea74d1c7f1bc239f1f996c908385080d9 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l2_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l2_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:687382daccf1f2f758efefa57b4012c975de925219adc865fafc7d431a6491a1 -size 259184164 +oid sha256:477b3f26f77e3071a17de7fca0d5c13c6fdee46ab67c67e86e8faa212229e5a5 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l3_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l3_together.axmodel index 953978ce60c6203f13c9aa66a314496f6ff2dab7..3016d74c6fab7cb2c58f96c52baaeb082d33377c 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l3_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l3_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef1673f6117c4802c8f8faef463b5403fd764d2835eb7346264adbd578eb43c7 -size 259184164 +oid sha256:d1bd5ded6fa46fd08cca63527e60ba9c3fa76e2f9a1b90f8b4c89edcecc32119 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l4_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l4_together.axmodel index 18bcd9b1944fcdbf6e69e79a822cccd814a6fa16..3351847567d5931399a3b29b870b75adc5252da0 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l4_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l4_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76e2deec639eb896305d452756a64ce3dfbdf0a92e3f10b80f1b17b00c64c6db -size 259184164 +oid sha256:36d2d88ae1e561cd75ee5e712b3555ba0cdff188a26024b5eafae1ba993668d4 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l5_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l5_together.axmodel index a0fa0662e9a6b93cf9550187fd2d4c1b5be1f248..7e7dd93cefb5a751f4e4b7448327fc6ae2496437 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l5_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l5_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc1bbba1c7f6cabd2ff0e8123e68f10e6afc45b6b65d49b324f23a0373122d04 -size 259184164 +oid sha256:58c5840138cb4b5251925ec3d06b9011037f83a54c481849cf0b4b2c690dd927 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l6_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l6_together.axmodel index 0e4333f6e104a846d7b2f553c42f675e5cb39db0..e8cc7d225acd5ad2b2b15e4eacd303121a2c71f8 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l6_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l6_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2a6b18a6110b5df02de0624fe21e832b3da0117fb182f3bfde0bb9cb1d08dd11 -size 259184164 +oid sha256:3a1c8dd0c243e2229e8509709f18f328cc9ad256953983ed8d65b8f558ecbbe1 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l7_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l7_together.axmodel index be83349e460deecd2c7492fa9bf5abf6938bb498..7c2087c2fc0bc67e9c42d789dec42b90c7db5a12 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l7_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l7_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:334b935bc5e3cfd7d70bcce061cc533b95175a1bb087343c673ee7e073c1d481 -size 259184164 +oid sha256:5af5e416ff918d5e1511fcb393d196266edc569f3b343f4600d6b8ae6e74b8e2 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l8_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l8_together.axmodel index b465d080517a8ab6d081bb7e084f4a69af143751..3eaa166d1a1750c069532b996bd6b8ffa2f99557 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l8_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l8_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:853cd028403031ccefadca177379c60aea3ec69b1f314b3eb475364d797b56ad -size 259184164 +oid sha256:c885ebb42178867519a1ebc5caf3f8068ffb7cb92e89260ea1764cd7b3a9d11c +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_p128_l9_together.axmodel b/deepseek-r1-7b-ax650/qwen2_p128_l9_together.axmodel index 47ca460a4c9b7d3e196f36e39079773298b0c1ab..3f010739d8ddaa8c48da68c6bf521df15d7551c7 100644 --- a/deepseek-r1-7b-ax650/qwen2_p128_l9_together.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_p128_l9_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8a11c35e9d56dca995561e9a61560663bdf1072287b911a7f58f8c944cc3829 -size 259184164 +oid sha256:0b7f6ebd4f9f6c66208170e617cf3d4ea04d3a0d7d94235d16c6d3a99b90c583 +size 278814541 diff --git a/deepseek-r1-7b-ax650/qwen2_post.axmodel b/deepseek-r1-7b-ax650/qwen2_post.axmodel index df36a0d5a04978b02fb710a56c8edef0946ce5a4..5f782173ea3046d5a2c973392ee6603da2bf4081 100644 --- a/deepseek-r1-7b-ax650/qwen2_post.axmodel +++ b/deepseek-r1-7b-ax650/qwen2_post.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:062c8a2f364f30887ce58d1f3ed2a940b45f7518d60b02581a70523abfe7f214 +oid sha256:4b3c5a8772a3c2c8dd27305cccd4745f8d322391903c25d19911eb22bd1e1f78 size 593952384 diff --git a/deepseek-r1-7b-int4-ax650/model.embed_tokens.weight.bfloat16.bin b/deepseek-r1-7b-int4-ax650/model.embed_tokens.weight.bfloat16.bin new file mode 100644 index 0000000000000000000000000000000000000000..f19ee48074b1810129183b2800671fceb06f639d --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/model.embed_tokens.weight.bfloat16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e0121d3c22891f60b4a9b119a90953514004548d4ee88162fb82e7910941395 +size 1089994752 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l0_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l0_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..a3c2717bacb35fb07cc1d2ab3da3da3f1279ed26 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l0_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ce81ef63a91d52ffd5efccdd8e6974dc1d8cb9493198661e13752a9b7bedb0 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l10_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l10_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..cc99f73734f5c2884f8d38b7bcc50ac47b777788 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l10_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8094d12602d1dfa02a76236dab7c5626cc3ac8a39a303e30ec1d153023105beb +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l11_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l11_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..57849181996f57f47e704b4eb5a3f1b3c8de29b5 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l11_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08db726d73fcfc7b831405ae6afc0733f0b24ad202cbe69748f5c107bf38b79a +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l12_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l12_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..f86d43b75f77dd0c8963eb6c71fb010b4a9a0e6f --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l12_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55dd9e383da125a1c5d5330e9339c62f5994be74e0ba29493b69186b9498f615 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l13_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l13_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..e5a85e016f7c378ccb851b2827d8087e7bbb6137 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l13_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e6b4fa4d7dcb61f6959b76629f2d8d42d50753ecfb76d9f8c1ea83c6451389 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l14_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l14_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..3445163c1c0d2574bd1711b7970260df1f423797 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l14_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f37e4db27d70c13c01548383c9264a66331bb4175485f2306a61b0bb463ded +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l15_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l15_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..139feee19221d00d46afd0499c3005691d05aa12 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l15_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0310e4faa466c15c0fbe19f6d48b711edced025d58f7d37a0037f2d3dd397306 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l16_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l16_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..a9c1b7854e377ef6d6cc1151dac2d4f7a7c37a39 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l16_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dedce591a30b6e7912e0c2d7bf65ee3ac783e09d5feb95c6a68a93875594237b +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l17_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l17_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..51a808152bd38c2591eb3e58829a5a5930c6fd7a --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l17_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac7c519aff5ac13d2a60b2b15dd66e307e5849130186eb124f9235f5c4a233c5 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l18_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l18_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..ade64b0b1f1f04a052a138ebd0fee1827821f468 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l18_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab73e718c8c06c505ffbdff71c7477e07f4501adaf8662151891c49edbee734 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l19_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l19_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..c390054ed79e2cabcbe0fc2011f718c72d93c3cf --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l19_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e263367c2b74ad1b2217ba89d17c40c2fda4ad20096fc979a80e1d9b7fb3fccb +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l1_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l1_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..5729d1eb5d4869811411fbf318e02ef9c0885abf --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l1_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b088a5407487bf735d66c03321d7b8794c8571471d10cd3a955e754c45d7eba +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l20_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l20_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..a1b85bff9bfe7df1f7dd43ff4092c11937020c3d --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l20_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba25ec7376b00bfc2bc2a48897e3246e2660f97e3c9e6dbee87c17a251fa1a3 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l21_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l21_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..0f22e749d93d32f0ef4d9138b4e641cc2822fa2e --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l21_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb532ed6189f3f3492e08ccf6eda4489c79c350e6a88fe4e5549cb60c34ad7d +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l22_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l22_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..23b8fe6c3d5de877ef67285c6e53413a83c7aa36 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l22_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:210248a57666378e132bc50916a08f2072e0a412e4d69cd62682eb0bff017de3 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l23_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l23_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..c8178ffef7c0a6582f3882c3f053f4ca3331169f --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l23_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab1a0e8cf87f56c155799fed556729aad08fea9854ff632d901aa94a0ff908eb +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l24_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l24_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..4050e97060145c3ee8f9a8ae94332593e97921bd --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l24_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccae1a55a943deebc8c97c46b3e241691eb94079ba74ac33ba3038238889a2a1 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l25_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l25_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..7683035d8f75a11b3b75166c6f85f4c0048086c7 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l25_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7e7cdde3f0aa8eaf04a3a2c43682537789d622c5981ab2977d0b2b1e7dd1453 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l26_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l26_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..db5f2b202da8dbd462edf9cd7775c6f181495a40 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l26_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066646f4dfa78c6777abeb491a6a3fa6511cb324f53ab2b78c28cdd42282e57b +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l27_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l27_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..5de089cef32389378e8fa1c659dfb9add7388c39 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l27_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea89b96eb5c0288e85c305f24c9cd8a07f9d37a29ba67de44e7ac458e65bec39 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l2_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l2_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..f389ddded79afaf4a560caf5f4549dc6aaf04bd6 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l2_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11930cb01ee13ee48d79ff275752fdfa96e39740f0c2a03b5f4aa9a118064f42 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l3_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l3_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..9dda56bb09cacff83142a1af066d70c9d927a0ac --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l3_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a7661d4ce430d98fb152f80ea5f121ed8fab68b992e03ec83df3488f797fe0d +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l4_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l4_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..8ba2d160ef0267bbfde193c3612e4f1fa25380f1 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l4_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837389308125da507063433790edbc21b0723d7fcafeb625066b39bf09976775 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l5_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l5_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..7775144717911911a24c2c5977366d7b38f13341 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l5_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6903d0a6eb70cd133b7b9555fd26c8e66c0e06c5da41f581c32696ea4d55a4d6 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l6_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l6_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..a448f685a57d25744f582a41cae922d4c4c9bf64 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l6_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:706ac7e9422c072f28ac9bc1be8666426ac46917dd43943b33abf77ca83758f1 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l7_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l7_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..ed4a3f386188cd719de295f0bcdbcf6fabcfbae7 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l7_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8842ac74d212513caedaa3161c4c528f60f1fbf71c108da4c8c64ee4a63c230 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l8_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l8_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..994b97b58f33b5665d988f15b23e3dccbb53b2be --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l8_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d4ba61ff76c6619289792ec840258923a4e148ffa8f1bf36bb7641396296f7 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_p128_l9_together.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_p128_l9_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..37ad1fc9dad2b12444545cfd005562c674862270 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_p128_l9_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5bcb8dea1cc5c77239a2c765870a5fec3c9418008c1d553d5d6d862b3e24a5 +size 154877904 diff --git a/deepseek-r1-7b-int4-ax650/qwen2_post.axmodel b/deepseek-r1-7b-int4-ax650/qwen2_post.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..9a0116fefa5f4e97b8d752dc5708d3ea7b50b507 --- /dev/null +++ b/deepseek-r1-7b-int4-ax650/qwen2_post.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1966864d1c953c7075d5d8a78bd790f7e8a3ab08dc809c81f55780c23f8622b +size 593952384 diff --git a/deepseek-r1_tokenizer.py b/deepseek-r1_tokenizer.py index 0ebd3e555fb68497b5723de8cea4dcc622a7b28f..00b96326ac95088ccb57bddef0731c9c72d62431 100644 --- a/deepseek-r1_tokenizer.py +++ b/deepseek-r1_tokenizer.py @@ -2,30 +2,57 @@ from transformers import AutoTokenizer, PreTrainedTokenizerFast from http.server import HTTPServer, BaseHTTPRequestHandler import json import argparse +import uuid +# 全局字典:存储 uid 到 Tokenizer_Http 实例的映射 +tokenizers = {} class Tokenizer_Http(): - def __init__(self): model_id = "deepseek-r1_tokenizer" self.tokenizer = AutoTokenizer.from_pretrained(model_id) - - def encode(self, prompt): - messages = [ - {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."}, - {"role": "user", "content": prompt} + self.messages = [ + {"role": "system", "content": "You are DeepSeek. You are a helpful assistant."}, ] + self.token_ids = [] + + self.token_ids_cache = [] + + def encode(self, prompt, last_reply=None): + if last_reply is not None: + self.messages.append({"role": "assistant", "content": last_reply}) + text = self.tokenizer.apply_chat_template( + self.messages, + tokenize=False, + add_generation_prompt=True + ) + # print("生成的文本:\n============\n", text, "============\n") + self.token_ids = self.tokenizer.encode(text)[:-3] + self.messages.append({"role": "user", "content": prompt}) + text = self.tokenizer.apply_chat_template( - messages, + self.messages, tokenize=False, add_generation_prompt=True ) - print(text) + print("生成的文本:\n============\n", text, "============\n") token_ids = self.tokenizer.encode(text) - return token_ids + # 找出新增部分 + diff = token_ids[len(self.token_ids):] + self.token_ids = token_ids + print(self.decode(diff)) + return token_ids, diff def decode(self, token_ids): - return self.tokenizer.decode(token_ids) + self.token_ids_cache += token_ids + text = self.tokenizer.decode(self.token_ids_cache) + if "\ufffd" in text: + print("text 中包含非法字符") + return "" + else: + self.token_ids_cache.clear() + return text + @property def bos_id(self): @@ -42,92 +69,131 @@ class Tokenizer_Http(): @property def eos_token(self): return self.tokenizer.eos_token - - -tokenizer = Tokenizer_Http() - -print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token) -print(tokenizer.encode("hello world")) - + + def reset(self, system_prompt="You are DeepSeek. You are a helpful assistant."): + self.messages = [ + {"role": "system", "content": system_prompt}, + ] + text = self.tokenizer.apply_chat_template( + self.messages, + tokenize=False, + add_generation_prompt=True + ) + token_ids = self.tokenizer.encode(text)[:-3] + self.token_ids = token_ids + print(self.decode(token_ids)) + return token_ids + class Request(BaseHTTPRequestHandler): - #通过类继承,新定义类 timeout = 5 server_version = 'Apache' def do_GET(self): - print(self.path) - #在新类中定义get的内容(当客户端向该服务端使用get请求时,本服务端将如下运行) + print("GET 请求路径:", self.path) self.send_response(200) - self.send_header("type", "get") #设置响应头,可省略或设置多个 + self.send_header("Content-Type", "application/json") self.end_headers() - if self.path == '/bos_id': - bos_id = tokenizer.bos_id - # print(bos_id) - # to json - if bos_id is None: - msg = json.dumps({'bos_id': -1}) + # 新增接口:获取 uid + if '/get_uid' in self.path: + new_uid = str(uuid.uuid4()) + print("新 uid:", new_uid) + # 为该 uid 创建一个新的 Tokenizer_Http 实例 + tokenizers[new_uid] = Tokenizer_Http() + msg = json.dumps({'uid': new_uid}) + elif '/bos_id' in self.path: + # 获取 uid 参数(例如 ?uid=xxx) + uid = self.get_query_param("uid") + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) else: - msg = json.dumps({'bos_id': bos_id}) - elif self.path == '/eos_id': - eos_id = tokenizer.eos_id - if eos_id is None: - msg = json.dumps({'eos_id': -1}) + bos_id = instance.bos_id + msg = json.dumps({'bos_id': bos_id if bos_id is not None else -1}) + elif '/eos_id' in self.path: + uid = self.get_query_param("uid") + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) else: - msg = json.dumps({'eos_id': eos_id}) + eos_id = instance.eos_id + msg = json.dumps({'eos_id': eos_id if eos_id is not None else -1}) else: - msg = 'error' - - print(msg) - msg = str(msg).encode() #转为str再转为byte格式 + msg = json.dumps({'error': 'Invalid GET endpoint'}) - self.wfile.write(msg) #将byte格式的信息返回给客户端 + print("响应消息:", msg) + self.wfile.write(msg.encode()) def do_POST(self): - #在新类中定义post的内容(当客户端向该服务端使用post请求时,本服务端将如下运行) - data = self.rfile.read(int( - self.headers['content-length'])) #获取从客户端传入的参数(byte格式) - data = data.decode() #将byte格式转为str格式 + content_length = int(self.headers.get('content-length', 0)) + data = self.rfile.read(content_length).decode() + print("POST 请求路径:", self.path) + print("接收到的数据:", data) + req = json.loads(data) self.send_response(200) - self.send_header("type", "post") #设置响应头,可省略或设置多个 + self.send_header("Content-Type", "application/json") self.end_headers() - if self.path == '/encode': - req = json.loads(data) - prompt = req['text'] - - token_ids = tokenizer.encode(prompt) - if token_ids is None: - msg = json.dumps({'token_ids': -1}) + if '/encode' in self.path: + # 请求数据中必须包含 uid, text, 和可选的 last_reply + uid = req.get('uid') + prompt = req.get('text') + last_reply = req.get('last_reply') + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) else: - msg = json.dumps({'token_ids': token_ids}) - - elif self.path == '/decode': - req = json.loads(data) - token_ids = req['token_ids'] - text = tokenizer.decode(token_ids) - if text is None: - msg = json.dumps({'text': ""}) + token_ids, diff = instance.encode(prompt, last_reply) + msg = json.dumps({'token_ids': token_ids, 'diff': diff}) + elif '/decode' in self.path: + uid = req.get('uid') + token_ids = req.get('token_ids') + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) else: + text = instance.decode(token_ids) msg = json.dumps({'text': text}) + elif '/reset' in self.path: + uid = req.get("uid") + system_prompt = req.get("system_prompt") + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) + else: + if system_prompt is not None: + print("system_prompt:", system_prompt) + token_ids = instance.reset(system_prompt) + msg = json.dumps({'token_ids': token_ids}) + else: + token_ids = instance.reset() + msg = json.dumps({'token_ids': token_ids}) else: - msg = 'error' - print(msg) - msg = str(msg).encode() #转为str再转为byte格式 - - self.wfile.write(msg) #将byte格式的信息返回给客户端 - + msg = json.dumps({'error': 'Invalid POST endpoint'}) + + print("响应消息:", msg) + self.wfile.write(msg.encode()) + + def get_query_param(self, key): + """ + 辅助函数:从 GET 请求的 URL 中获取查询参数的值 + 例如:/bos_id?uid=xxx + """ + from urllib.parse import urlparse, parse_qs + query = urlparse(self.path).query + params = parse_qs(query) + values = params.get(key) + return values[0] if values else None if __name__ == "__main__": - - args = argparse.ArgumentParser() - args.add_argument('--host', type=str, default='localhost') - args.add_argument('--port', type=int, default=8080) - args = args.parse_args() - - host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1' - print('http://%s:%s' % host) - server = HTTPServer(host, Request) #根据地址端口号和新定义的类,创建服务器实例 - server.serve_forever() #开启服务 + parser = argparse.ArgumentParser() + parser.add_argument('--host', type=str, default='0.0.0.0') + parser.add_argument('--port', type=int, default=12345) + args = parser.parse_args() + + host = (args.host, args.port) + print('Server running at http://%s:%s' % host) + server = HTTPServer(host, Request) + server.serve_forever() \ No newline at end of file diff --git a/main_ax650 b/main_ax650 new file mode 100644 index 0000000000000000000000000000000000000000..e356c8a1ba8d909137ed8fadbb8954d107033154 --- /dev/null +++ b/main_ax650 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f19ddeb193769b16aa8c5d9bba887558aa0a4ed10eb50a19d9bc117f1ba527e5 +size 985352 diff --git a/main_axcl_aarch64 b/main_axcl_aarch64 index 1790d7620d629cefb0823c254849aacdb51bbad2..50c89e82d5aa8deff6f067f48603f2a301014891 100644 --- a/main_axcl_aarch64 +++ b/main_axcl_aarch64 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb111fc00c54abb6142a8f44df087bf104c8150a1cefa6be55c6b174b932c4ec -size 999008 +oid sha256:1f9f1a1ca329b47f70840e8b6d104ce8248a82326aa2402bccb31144590a8fb2 +size 1725008 diff --git a/main_axcl_x86 b/main_axcl_x86 index 6711952241de8943efec1350e53f93684dcc2186..1bd83db9edd6d195da54a1a37820c20d45a8415c 100644 --- a/main_axcl_x86 +++ b/main_axcl_x86 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cba0be8df523e351789fcfa745772423096f52a3b0a760f8d8c9f5b8bb2ec82 -size 1022384 +oid sha256:928d36be31c15d081a7d346464f41458e9624d8b68d5f7dfb3d3189686ce2754 +size 8421624 diff --git a/run_deepseek-r1_7b_ax650.sh b/run_deepseek-r1_7b_ax650.sh index d6287cf157a752d96e828aa60dfc1da73b925b7b..6bc436b2ac3f23f25dacf8a315bd57ac6d8a6c42 100644 --- a/run_deepseek-r1_7b_ax650.sh +++ b/run_deepseek-r1_7b_ax650.sh @@ -1,14 +1,10 @@ -./main_prefill \ +./main_ax650 \ --template_filename_axmodel "deepseek-r1-7b-ax650/qwen2_p128_l%d_together.axmodel" \ --axmodel_num 28 \ ---tokenizer_type 2 \ ---filename_tokenizer_model "http://127.0.0.1:12345" \ ---bos 0 --eos 0 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ --filename_post_axmodel "deepseek-r1-7b-ax650/qwen2_post.axmodel" \ --filename_tokens_embed "deepseek-r1-7b-ax650/model.embed_tokens.weight.bfloat16.bin" \ --tokens_embed_num 152064 \ --tokens_embed_size 3584 \ --use_mmap_load_embed 1 \ ---live_print 1 \ ---continue 1 \ ---prompt "$1" +--live_print 1 \ No newline at end of file diff --git a/run_deepseek-r1_7b_axcl_aarch64.sh b/run_deepseek-r1_7b_axcl_aarch64.sh index a82a818fd5793e4a34af1af910686e0e866e9fc2..2f244ff961ea7e2ab4c5f1072fa6ced016600fcd 100644 --- a/run_deepseek-r1_7b_axcl_aarch64.sh +++ b/run_deepseek-r1_7b_axcl_aarch64.sh @@ -1,14 +1,10 @@ ./main_axcl_aarch64 \ --template_filename_axmodel "deepseek-r1-7b-ax650/qwen2_p128_l%d_together.axmodel" \ --axmodel_num 28 \ ---tokenizer_type 2 \ ---filename_tokenizer_model "http://127.0.0.1:12345" \ ---bos 0 --eos 0 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ --filename_post_axmodel "deepseek-r1-7b-ax650/qwen2_post.axmodel" \ --filename_tokens_embed "deepseek-r1-7b-ax650/model.embed_tokens.weight.bfloat16.bin" \ --tokens_embed_num 152064 \ --tokens_embed_size 3584 \ ---use_mmap_load_embed 0 \ ---live_print 1 \ ---continue 1 \ ---prompt "$1" +--use_mmap_load_embed 1 \ +--live_print 1 diff --git a/run_deepseek-r1_7b_axcl_x86.sh b/run_deepseek-r1_7b_axcl_x86.sh index 9c846566cc4678c4b6d962bfeace43457a346869..c4259fda7f21a335781a66ef2781012a668fe5b8 100644 --- a/run_deepseek-r1_7b_axcl_x86.sh +++ b/run_deepseek-r1_7b_axcl_x86.sh @@ -1,14 +1,10 @@ ./main_axcl_x86 \ --template_filename_axmodel "deepseek-r1-7b-ax650/qwen2_p128_l%d_together.axmodel" \ --axmodel_num 28 \ ---tokenizer_type 2 \ ---filename_tokenizer_model "http://127.0.0.1:12345" \ ---bos 0 --eos 0 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ --filename_post_axmodel "deepseek-r1-7b-ax650/qwen2_post.axmodel" \ --filename_tokens_embed "deepseek-r1-7b-ax650/model.embed_tokens.weight.bfloat16.bin" \ --tokens_embed_num 152064 \ --tokens_embed_size 3584 \ ---use_mmap_load_embed 0 \ ---live_print 1 \ ---continue 1 \ ---prompt "$1" +--use_mmap_load_embed 1 \ +--live_print 1 \ No newline at end of file diff --git a/run_deepseek-r1_7b_int4_ax650.sh b/run_deepseek-r1_7b_int4_ax650.sh new file mode 100644 index 0000000000000000000000000000000000000000..b51d038fc0ef2814c874140b0a5037129d720a8d --- /dev/null +++ b/run_deepseek-r1_7b_int4_ax650.sh @@ -0,0 +1,10 @@ +./main_ax650 \ +--template_filename_axmodel "deepseek-r1-7b-int4-ax650/qwen2_p128_l%d_together.axmodel" \ +--axmodel_num 28 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ +--filename_post_axmodel "deepseek-r1-7b-int4-ax650/qwen2_post.axmodel" \ +--filename_tokens_embed "deepseek-r1-7b-int4-ax650/model.embed_tokens.weight.bfloat16.bin" \ +--tokens_embed_num 152064 \ +--tokens_embed_size 3584 \ +--use_mmap_load_embed 1 \ +--live_print 1 diff --git a/run_deepseek-r1_7b_int4_axcl_aarch64.sh b/run_deepseek-r1_7b_int4_axcl_aarch64.sh new file mode 100644 index 0000000000000000000000000000000000000000..450cb463e72066f0e1a4264a59bcef7399cab497 --- /dev/null +++ b/run_deepseek-r1_7b_int4_axcl_aarch64.sh @@ -0,0 +1,10 @@ +./main_axcl_aarch64 \ +--template_filename_axmodel "deepseek-r1-7b-int4-ax650/qwen2_p128_l%d_together.axmodel" \ +--axmodel_num 28 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ +--filename_post_axmodel "deepseek-r1-7b-int4-ax650/qwen2_post.axmodel" \ +--filename_tokens_embed "deepseek-r1-7b-int4-ax650/model.embed_tokens.weight.bfloat16.bin" \ +--tokens_embed_num 152064 \ +--tokens_embed_size 3584 \ +--use_mmap_load_embed 1 \ +--live_print 1 \ No newline at end of file diff --git a/run_deepseek-r1_7b_int4_axcl_x86.sh b/run_deepseek-r1_7b_int4_axcl_x86.sh new file mode 100644 index 0000000000000000000000000000000000000000..bd1065e77d3c64f6ceece202e122885c4f043d3d --- /dev/null +++ b/run_deepseek-r1_7b_int4_axcl_x86.sh @@ -0,0 +1,10 @@ +./main_axcl_x86 \ +--template_filename_axmodel "deepseek-r1-7b-int4-ax650/qwen2_p128_l%d_together.axmodel" \ +--axmodel_num 28 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ +--filename_post_axmodel "deepseek-r1-7b-int4-ax650/qwen2_post.axmodel" \ +--filename_tokens_embed "deepseek-r1-7b-int4-ax650/model.embed_tokens.weight.bfloat16.bin" \ +--tokens_embed_num 152064 \ +--tokens_embed_size 3584 \ +--use_mmap_load_embed 1 \ +--live_print 1 \ No newline at end of file