miaoyibo commited on
Commit
8cf3ee6
·
1 Parent(s): 4079598
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import argparse
2
  import gradio as gr
3
  import os
4
- os.environ["HF_HOME"] = "/mnt/moonfs/miaoyibo-ksyun/hf_home"
5
  from PIL import Image
6
  import spaces
7
  import copy
 
8
 
9
  from kimi_vl.serve.frontend import reload_javascript
10
  from kimi_vl.serve.utils import (
@@ -137,7 +137,32 @@ def predict(
137
  yield [[text, "No Model Found"]], [], "No Model Found"
138
  return
139
 
140
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  if images is None:
143
  images = []
 
1
  import argparse
2
  import gradio as gr
3
  import os
 
4
  from PIL import Image
5
  import spaces
6
  import copy
7
+ import time
8
 
9
  from kimi_vl.serve.frontend import reload_javascript
10
  from kimi_vl.serve.utils import (
 
137
  yield [[text, "No Model Found"]], [], "No Model Found"
138
  return
139
 
140
+
141
+ prompt = "Give me a short introduction to large language model."
142
+ messages = [
143
+ {"role": "system", "content": "You are a helpful assistant."},
144
+ {"role": "user", "content": prompt}
145
+ ]
146
+ text = tokenizer.apply_chat_template(
147
+ messages,
148
+ tokenize=False,
149
+ add_generation_prompt=True
150
+ )
151
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
152
+
153
+ generated_ids = model.generate(
154
+ **model_inputs,
155
+ max_new_tokens=512
156
+ )
157
+ generated_ids = [
158
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
159
+ ]
160
+
161
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
162
+
163
+ print(response)
164
+ time.sleep(2600)
165
+
166
 
167
  if images is None:
168
  images = []
kimi_vl/serve/chat_utils.py CHANGED
@@ -267,7 +267,7 @@ def generate_prompt_with_history(text, images, history, processor, max_length=20
267
  bot_role_ind = 1
268
 
269
  # Initialize conversation
270
- conversation = new_chat_template(sft_format="kimi-vl")
271
 
272
  if history:
273
  conversation.messages = history
 
267
  bot_role_ind = 1
268
 
269
  # Initialize conversation
270
+ conversation = new_chat_template(sft_format="plain")
271
 
272
  if history:
273
  conversation.messages = history
kimi_vl/serve/inference.py CHANGED
@@ -71,88 +71,6 @@ def format_messages(
71
  return converstion
72
 
73
 
74
- def preprocess(
75
- messages: list[dict],
76
- processor,
77
- sft_format: Optional[str] = "kimi-vl",
78
- ):
79
- """
80
- Build messages from the conversations and images.
81
- """
82
- # get images from conversations
83
- results = []
84
- images = []
85
-
86
- # get texts from conversations
87
- converstion = get_conv_template(sft_format)
88
- # only use the last 3 round of messages
89
- latest_messages = messages[-3:]
90
- for mid, message in enumerate(latest_messages):
91
- if message["role"] == converstion.roles[0] or message["role"] == "user":
92
- record = {
93
- "role": message["role"],
94
- "content": [],
95
- }
96
- if "images" in message:
97
- per_round_images = message["images"]
98
- if len(per_round_images) > 2:
99
- per_round_images = per_round_images[-2:]
100
- print(f"Only use the last 2 images in the {mid}-th round")
101
-
102
- images.extend(per_round_images)
103
- for image in per_round_images:
104
- record["content"].append(
105
- {
106
- "type": "image",
107
- "image": image,
108
- }
109
- )
110
- if 'content' in message:
111
- record["content"].append(
112
- {
113
- "type": "text",
114
- "text": str(message["content"]).strip(),
115
- }
116
- )
117
- results.append(record)
118
- elif message["role"] == converstion.roles[1] or message["role"] == "assistant":
119
- formatted_answer = message["content"].strip()
120
- # ◁think▷用户说了“你好”,这是一个非常简单的问候,通常用于开启对话。我需要判断用户的意图。可能性一:用户只是礼貌性地打招呼,想要开启一段对话;可能性二:用户可能有更具体的需求,比如询问我的功能、功能或者需要帮助。由于用户没有提供更多信息,我需要保持开放,同时引导用户进一步说明他们的需求。
121
- # 我的回复需要既友好又开放,不能显得过于正式或冷漠。同时,我需要避免假设用户的具体需求,而是提供一个轻松的、鼓励继续对话的回应。◁/think▷你好!很高兴见到你。有什么我可以帮助你的吗
122
- # delete all the texts between ◁think▷ and ◁/think▷
123
- # FIXME: this is a hack to remove the thinking texts
124
- # formatted_answer = re.sub(r"◁think▷.*◁/think▷", "", formatted_answer)
125
- think_end_token = '◁/think▷'
126
- formatted_answer = formatted_answer.split(think_end_token)[-1]
127
- results.append(
128
- {
129
- "role": message["role"],
130
- "content": [
131
- {
132
- "type": "text",
133
- "text": formatted_answer,
134
- }
135
- ],
136
- }
137
- )
138
- assert (
139
- formatted_answer.count(processor.image_token) == 0
140
- ), f"there should be no {processor.image_token} in the assistant's reply, but got {messages}"
141
- converstion.append_message(converstion.roles[1], formatted_answer)
142
-
143
- text = processor.apply_chat_template(results, add_generation_prompt=True)
144
- print(f"raw text = {text}")
145
- if len(images) == 0:
146
- images = None
147
-
148
- inputs = processor(
149
- images=images,
150
- text=[text],
151
- return_tensors="pt",
152
- padding=True,
153
- truncation=True,
154
- )
155
- return inputs
156
 
157
 
158
  @torch.no_grad()
@@ -176,6 +94,7 @@ def kimi_dev_generate(
176
 
177
  return generate(
178
  model,
 
179
  inputs,
180
  max_gen_len=max_length,
181
  temperature=temperature,
@@ -187,7 +106,7 @@ def kimi_dev_generate(
187
 
188
  def generate(
189
  model,
190
- processor,
191
  inputs,
192
  max_gen_len: int = 256,
193
  temperature: float = 0,
@@ -196,7 +115,6 @@ def generate(
196
  chunk_size: int = -1,
197
  ):
198
  """Stream the text output from the multimodality model with prompt and image inputs."""
199
- tokenizer = processor.tokenizer
200
  stop_words_ids = [torch.tensor(tokenizer.encode(stop_word)) for stop_word in stop_words]
201
  stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
202
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
 
71
  return converstion
72
 
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
 
76
  @torch.no_grad()
 
94
 
95
  return generate(
96
  model,
97
+ tokenizer,
98
  inputs,
99
  max_gen_len=max_length,
100
  temperature=temperature,
 
106
 
107
  def generate(
108
  model,
109
+ tokenizer,
110
  inputs,
111
  max_gen_len: int = 256,
112
  temperature: float = 0,
 
115
  chunk_size: int = -1,
116
  ):
117
  """Stream the text output from the multimodality model with prompt and image inputs."""
 
118
  stop_words_ids = [torch.tensor(tokenizer.encode(stop_word)) for stop_word in stop_words]
119
  stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
120
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)