Kwai-Keye
/

Keye-VL-1_5-8B

@@ -9,7 +9,7 @@
 [[🍎 Home Page](https://kwai-keye.github.io/)]
 [[📖 Technique Report](https://arxiv.org/abs/2507.01949)]
 [[📊 Keye-VL-8B-Preview](https://huggingface.co/Kwai-Keye/Keye-VL-8B-Preview) ]
-[[📊 Keye-VL-1.5-8B](https://huggingface.co/Kwai-Keye/Keye-VL-1.5-8B/) ]
 [[🚀 Demo](https://huggingface.co/spaces/Kwai-Keye/Keye-VL-8B-Preview)]
 </div></font>
@@ -28,15 +28,23 @@
 ## Contents <!-- omit in toc -->
-- [🔥 News](#-news)
-- [📐 Quick Start](#-quick-start)
-  - [Preprocess and Inference](#preprocess-and-inference)
-  - [Evaluation](#evaluation)
-- [👀 Architecture and Training Strategy](#-architecture-and-training-strategy)
-  - [🌟 Pre-Train](#-pre-train)
-  - [🌟 Post-Train](#-post-train)
-- [📈 Experimental Results](#-experimental-results)
-- [✒️ Citation](#️-citation)
 ## 📐 Quick Start
 ### Preprocess and Inference
@@ -56,7 +64,7 @@ from transformers import AutoModel, AutoTokenizer, AutoProcessor
 from keye_vl_utils import process_vision_info
 # default: Load the model on the available device(s)
-model_path = "Kwai-Keye/Keye-VL-1.5-8B"
 model = AutoModel.from_pretrained(
     model_path,
@@ -215,7 +223,7 @@ from transformers import AutoProcessor
 from vllm import LLM, SamplingParams
 from keye_vl_utils import process_vision_info
-model_path = "/hetu_group/jky/playground_hhd_2/2025/20250626_keye/Keye-VL-8B-Preview"
 llm = LLM(
     model=model_path,
@@ -292,7 +300,7 @@ print(generated_text)
 - Serve
 ```bash
 vllm serve \
-    Kwai-Keye/Keye-VL-8B-Preview \
     --tensor-parallel-size 8 \
     --enable-prefix-caching \
     --gpu-memory-utilization 0.8 \
@@ -338,7 +346,7 @@ image_messages = [
 ]
 chat_response = client.chat.completions.create(
-    model="Kwai-Keye/Keye-VL-8B-Preview",
     messages=image_messages,
 )
 print("Chat response:", chat_response)
@@ -367,7 +375,7 @@ image_messages = [
 ]
 chat_response = client.chat.completions.create(
-    model="Kwai-Keye/Keye-VL-8B-Preview",
     messages=image_messages,
 )
 print("Chat response:", chat_response)
@@ -421,7 +429,7 @@ video_messages, video_kwargs = prepare_message_for_vllm(video_messages)
 chat_response = client.chat.completions.create(
-    model="Kwai-Keye/Keye-VL-8B-Preview",
     messages=video_messages,
     max_tokens=128,
     extra_body={

 [[🍎 Home Page](https://kwai-keye.github.io/)]
 [[📖 Technique Report](https://arxiv.org/abs/2507.01949)]
 [[📊 Keye-VL-8B-Preview](https://huggingface.co/Kwai-Keye/Keye-VL-8B-Preview) ]
+[[📊 Keye-VL-1.5-8B](https://huggingface.co/Kwai-Keye/Keye-VL-1_5-8B/) ]
 [[🚀 Demo](https://huggingface.co/spaces/Kwai-Keye/Keye-VL-8B-Preview)]
 </div></font>
 ## Contents <!-- omit in toc -->
+- [Kwai Keye-VL](#kwai-keye-vl)
+  - [🔥 News](#-news)
+  - [📐 Quick Start](#-quick-start)
+    - [Preprocess and Inference](#preprocess-and-inference)
+      - [Install](#install)
+      - [Keye-VL-1.5 Inference](#keye-vl-15-inference)
+      - [Deployment](#deployment)
+        - [Install](#install-1)
+        - [Offline Inference](#offline-inference)
+        - [Online Serving](#online-serving)
+    - [Evaluation](#evaluation)
+  - [👀 Architecture and Training Strategy](#-architecture-and-training-strategy)
+    - [🌟 Pre-Train](#-pre-train)
+    - [🌟 Post-Train](#-post-train)
+  - [📈 Experimental Results](#-experimental-results)
+  - [✒️ Citation](#️-citation)
+  - [Acknowledgement](#acknowledgement)
 ## 📐 Quick Start
 ### Preprocess and Inference
 from keye_vl_utils import process_vision_info
 # default: Load the model on the available device(s)
+model_path = "Kwai-Keye/Keye-VL-1_5-8B"
 model = AutoModel.from_pretrained(
     model_path,
 from vllm import LLM, SamplingParams
 from keye_vl_utils import process_vision_info
+model_path = "/home/keye/Keye-VL-1_5-8B"
 llm = LLM(
     model=model_path,
 - Serve
 ```bash
 vllm serve \
+    Kwai-Keye/Keye-VL-1_5-8B \
     --tensor-parallel-size 8 \
     --enable-prefix-caching \
     --gpu-memory-utilization 0.8 \
 ]
 chat_response = client.chat.completions.create(
+    model="Kwai-Keye/Keye-VL-1_5-8B",
     messages=image_messages,
 )
 print("Chat response:", chat_response)
 ]
 chat_response = client.chat.completions.create(
+    model="Kwai-Keye/Keye-VL-1_5-8B",
     messages=image_messages,
 )
 print("Chat response:", chat_response)
 chat_response = client.chat.completions.create(
+    model="Kwai-Keye/Keye-VL-1_5-8B",
     messages=video_messages,
     max_tokens=128,
     extra_body={