miaoyibo commited on
Commit
1d5f555
·
1 Parent(s): a7f820f
Files changed (2) hide show
  1. kimi_dev/serve/inference.py +0 -3
  2. requirements.txt +1 -2
kimi_dev/serve/inference.py CHANGED
@@ -5,7 +5,6 @@ from transformers import (
5
  AutoConfig,
6
  AutoTokenizer
7
  )
8
- import tensor_parallel as tp
9
 
10
  logger = logging.getLogger(__name__)
11
 
@@ -21,8 +20,6 @@ def load_model(model_path: str = "moonshotai/Kimi-Dev-72B"):
21
  device_map="auto",
22
  trust_remote_code=True,
23
  )
24
- model = tp.tensor_parallel(model, ["cuda:0", "cuda:1", "cuda:2", "cuda:3"]) # <- each GPU has half the weights
25
-
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(model_path)
28
 
 
5
  AutoConfig,
6
  AutoTokenizer
7
  )
 
8
 
9
  logger = logging.getLogger(__name__)
10
 
 
20
  device_map="auto",
21
  trust_remote_code=True,
22
  )
 
 
23
 
24
  tokenizer = AutoTokenizer.from_pretrained(model_path)
25
 
requirements.txt CHANGED
@@ -17,5 +17,4 @@ tqdm
17
  colorama
18
  Pygments
19
  markdown
20
- SentencePiece
21
- tensor_parallel
 
17
  colorama
18
  Pygments
19
  markdown
20
+ SentencePiece