Spaces:
Running
on
L40S
Running
on
L40S
miaoyibo
commited on
Commit
·
1d5f555
1
Parent(s):
a7f820f
- kimi_dev/serve/inference.py +0 -3
- requirements.txt +1 -2
kimi_dev/serve/inference.py
CHANGED
@@ -5,7 +5,6 @@ from transformers import (
|
|
5 |
AutoConfig,
|
6 |
AutoTokenizer
|
7 |
)
|
8 |
-
import tensor_parallel as tp
|
9 |
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
@@ -21,8 +20,6 @@ def load_model(model_path: str = "moonshotai/Kimi-Dev-72B"):
|
|
21 |
device_map="auto",
|
22 |
trust_remote_code=True,
|
23 |
)
|
24 |
-
model = tp.tensor_parallel(model, ["cuda:0", "cuda:1", "cuda:2", "cuda:3"]) # <- each GPU has half the weights
|
25 |
-
|
26 |
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
28 |
|
|
|
5 |
AutoConfig,
|
6 |
AutoTokenizer
|
7 |
)
|
|
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
|
|
20 |
device_map="auto",
|
21 |
trust_remote_code=True,
|
22 |
)
|
|
|
|
|
23 |
|
24 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
25 |
|
requirements.txt
CHANGED
@@ -17,5 +17,4 @@ tqdm
|
|
17 |
colorama
|
18 |
Pygments
|
19 |
markdown
|
20 |
-
SentencePiece
|
21 |
-
tensor_parallel
|
|
|
17 |
colorama
|
18 |
Pygments
|
19 |
markdown
|
20 |
+
SentencePiece
|
|