Spaces:
Running
on
L40S
Running
on
L40S
miaoyibo
commited on
Commit
·
a7f820f
1
Parent(s):
2689cfa
add tp
Browse files- app.py +1 -1
- kimi_dev/serve/inference.py +3 -0
- requirements.txt +1 -0
app.py
CHANGED
@@ -257,7 +257,7 @@ def predict(
|
|
257 |
repair_time = repair_end_time - repair_start_time
|
258 |
|
259 |
encoded_answer = tokenizer(partial_output_repair, padding=True, truncation=True, return_tensors='pt')
|
260 |
-
print("
|
261 |
|
262 |
# yield response, "null test", "Generate: Success"
|
263 |
yield [[prompt,highlight_response],[repair_prompt,highlight_response_repair]], [["null test","null test2"]], "Generate: Success"
|
|
|
257 |
repair_time = repair_end_time - repair_start_time
|
258 |
|
259 |
encoded_answer = tokenizer(partial_output_repair, padding=True, truncation=True, return_tensors='pt')
|
260 |
+
print("repair token/s:",len(encoded_answer['input_ids'][0])/repair_time)
|
261 |
|
262 |
# yield response, "null test", "Generate: Success"
|
263 |
yield [[prompt,highlight_response],[repair_prompt,highlight_response_repair]], [["null test","null test2"]], "Generate: Success"
|
kimi_dev/serve/inference.py
CHANGED
@@ -5,6 +5,7 @@ from transformers import (
|
|
5 |
AutoConfig,
|
6 |
AutoTokenizer
|
7 |
)
|
|
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
@@ -20,6 +21,8 @@ def load_model(model_path: str = "moonshotai/Kimi-Dev-72B"):
|
|
20 |
device_map="auto",
|
21 |
trust_remote_code=True,
|
22 |
)
|
|
|
|
|
23 |
|
24 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
25 |
|
|
|
5 |
AutoConfig,
|
6 |
AutoTokenizer
|
7 |
)
|
8 |
+
import tensor_parallel as tp
|
9 |
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
|
|
21 |
device_map="auto",
|
22 |
trust_remote_code=True,
|
23 |
)
|
24 |
+
model = tp.tensor_parallel(model, ["cuda:0", "cuda:1", "cuda:2", "cuda:3"]) # <- each GPU has half the weights
|
25 |
+
|
26 |
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
28 |
|
requirements.txt
CHANGED
@@ -18,3 +18,4 @@ colorama
|
|
18 |
Pygments
|
19 |
markdown
|
20 |
SentencePiece
|
|
|
|
18 |
Pygments
|
19 |
markdown
|
20 |
SentencePiece
|
21 |
+
tensor_parallel
|