Update model files
Browse files- .vscode/settings.json +0 -3
- README.md +24 -16
.vscode/settings.json
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"kwaipilot.settings.proxy": "https://kinsight.corp.kuaishou.com"
|
3 |
-
}
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -9,7 +9,7 @@
|
|
9 |
[[π Home Page](https://kwai-keye.github.io/)]
|
10 |
[[π Technique Report](https://arxiv.org/abs/2507.01949)]
|
11 |
[[π Keye-VL-8B-Preview](https://huggingface.co/Kwai-Keye/Keye-VL-8B-Preview) ]
|
12 |
-
[[π Keye-VL-1.5-8B](https://huggingface.co/Kwai-Keye/Keye-VL-
|
13 |
[[π Demo](https://huggingface.co/spaces/Kwai-Keye/Keye-VL-8B-Preview)]
|
14 |
</div></font>
|
15 |
|
@@ -28,15 +28,23 @@
|
|
28 |
|
29 |
## Contents <!-- omit in toc -->
|
30 |
|
31 |
-
- [
|
32 |
-
- [
|
33 |
-
- [
|
34 |
-
|
35 |
-
- [
|
36 |
-
|
37 |
-
|
38 |
-
- [
|
39 |
-
- [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
## π Quick Start
|
42 |
### Preprocess and Inference
|
@@ -56,7 +64,7 @@ from transformers import AutoModel, AutoTokenizer, AutoProcessor
|
|
56 |
from keye_vl_utils import process_vision_info
|
57 |
|
58 |
# default: Load the model on the available device(s)
|
59 |
-
model_path = "Kwai-Keye/Keye-VL-
|
60 |
|
61 |
model = AutoModel.from_pretrained(
|
62 |
model_path,
|
@@ -215,7 +223,7 @@ from transformers import AutoProcessor
|
|
215 |
from vllm import LLM, SamplingParams
|
216 |
from keye_vl_utils import process_vision_info
|
217 |
|
218 |
-
model_path = "/
|
219 |
|
220 |
llm = LLM(
|
221 |
model=model_path,
|
@@ -292,7 +300,7 @@ print(generated_text)
|
|
292 |
- Serve
|
293 |
```bash
|
294 |
vllm serve \
|
295 |
-
Kwai-Keye/Keye-VL-8B
|
296 |
--tensor-parallel-size 8 \
|
297 |
--enable-prefix-caching \
|
298 |
--gpu-memory-utilization 0.8 \
|
@@ -338,7 +346,7 @@ image_messages = [
|
|
338 |
]
|
339 |
|
340 |
chat_response = client.chat.completions.create(
|
341 |
-
model="Kwai-Keye/Keye-VL-8B
|
342 |
messages=image_messages,
|
343 |
)
|
344 |
print("Chat response:", chat_response)
|
@@ -367,7 +375,7 @@ image_messages = [
|
|
367 |
]
|
368 |
|
369 |
chat_response = client.chat.completions.create(
|
370 |
-
model="Kwai-Keye/Keye-VL-8B
|
371 |
messages=image_messages,
|
372 |
)
|
373 |
print("Chat response:", chat_response)
|
@@ -421,7 +429,7 @@ video_messages, video_kwargs = prepare_message_for_vllm(video_messages)
|
|
421 |
|
422 |
|
423 |
chat_response = client.chat.completions.create(
|
424 |
-
model="Kwai-Keye/Keye-VL-8B
|
425 |
messages=video_messages,
|
426 |
max_tokens=128,
|
427 |
extra_body={
|
|
|
9 |
[[π Home Page](https://kwai-keye.github.io/)]
|
10 |
[[π Technique Report](https://arxiv.org/abs/2507.01949)]
|
11 |
[[π Keye-VL-8B-Preview](https://huggingface.co/Kwai-Keye/Keye-VL-8B-Preview) ]
|
12 |
+
[[π Keye-VL-1.5-8B](https://huggingface.co/Kwai-Keye/Keye-VL-1_5-8B/) ]
|
13 |
[[π Demo](https://huggingface.co/spaces/Kwai-Keye/Keye-VL-8B-Preview)]
|
14 |
</div></font>
|
15 |
|
|
|
28 |
|
29 |
## Contents <!-- omit in toc -->
|
30 |
|
31 |
+
- [Kwai Keye-VL](#kwai-keye-vl)
|
32 |
+
- [π₯ News](#-news)
|
33 |
+
- [π Quick Start](#-quick-start)
|
34 |
+
- [Preprocess and Inference](#preprocess-and-inference)
|
35 |
+
- [Install](#install)
|
36 |
+
- [Keye-VL-1.5 Inference](#keye-vl-15-inference)
|
37 |
+
- [Deployment](#deployment)
|
38 |
+
- [Install](#install-1)
|
39 |
+
- [Offline Inference](#offline-inference)
|
40 |
+
- [Online Serving](#online-serving)
|
41 |
+
- [Evaluation](#evaluation)
|
42 |
+
- [π Architecture and Training Strategy](#-architecture-and-training-strategy)
|
43 |
+
- [π Pre-Train](#-pre-train)
|
44 |
+
- [π Post-Train](#-post-train)
|
45 |
+
- [π Experimental Results](#-experimental-results)
|
46 |
+
- [βοΈ Citation](#οΈ-citation)
|
47 |
+
- [Acknowledgement](#acknowledgement)
|
48 |
|
49 |
## π Quick Start
|
50 |
### Preprocess and Inference
|
|
|
64 |
from keye_vl_utils import process_vision_info
|
65 |
|
66 |
# default: Load the model on the available device(s)
|
67 |
+
model_path = "Kwai-Keye/Keye-VL-1_5-8B"
|
68 |
|
69 |
model = AutoModel.from_pretrained(
|
70 |
model_path,
|
|
|
223 |
from vllm import LLM, SamplingParams
|
224 |
from keye_vl_utils import process_vision_info
|
225 |
|
226 |
+
model_path = "/home/keye/Keye-VL-1_5-8B"
|
227 |
|
228 |
llm = LLM(
|
229 |
model=model_path,
|
|
|
300 |
- Serve
|
301 |
```bash
|
302 |
vllm serve \
|
303 |
+
Kwai-Keye/Keye-VL-1_5-8B \
|
304 |
--tensor-parallel-size 8 \
|
305 |
--enable-prefix-caching \
|
306 |
--gpu-memory-utilization 0.8 \
|
|
|
346 |
]
|
347 |
|
348 |
chat_response = client.chat.completions.create(
|
349 |
+
model="Kwai-Keye/Keye-VL-1_5-8B",
|
350 |
messages=image_messages,
|
351 |
)
|
352 |
print("Chat response:", chat_response)
|
|
|
375 |
]
|
376 |
|
377 |
chat_response = client.chat.completions.create(
|
378 |
+
model="Kwai-Keye/Keye-VL-1_5-8B",
|
379 |
messages=image_messages,
|
380 |
)
|
381 |
print("Chat response:", chat_response)
|
|
|
429 |
|
430 |
|
431 |
chat_response = client.chat.completions.create(
|
432 |
+
model="Kwai-Keye/Keye-VL-1_5-8B",
|
433 |
messages=video_messages,
|
434 |
max_tokens=128,
|
435 |
extra_body={
|