Spaces:

moonshotai
/

Kimi-Dev-72B

Running on L40S

App Files Files Community

miaoyibo commited on 10 days ago

Commit

8cf3ee6

1 Parent(s): 4079598

1

Browse files

Files changed (3) hide show

app.py +27 -2
kimi_vl/serve/chat_utils.py +1 -1
kimi_vl/serve/inference.py +2 -84

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import argparse
 import gradio as gr
 import os
-os.environ["HF_HOME"] = "/mnt/moonfs/miaoyibo-ksyun/hf_home"
 from PIL import Image
 import spaces
 import copy
 from kimi_vl.serve.frontend import reload_javascript
 from kimi_vl.serve.utils import (
@@ -137,7 +137,32 @@ def predict(
         yield [[text, "No Model Found"]], [], "No Model Found"
         return
     if images is None:
         images = []

 import argparse
 import gradio as gr
 import os
 from PIL import Image
 import spaces
 import copy
+import time
 from kimi_vl.serve.frontend import reload_javascript
 from kimi_vl.serve.utils import (
         yield [[text, "No Model Found"]], [], "No Model Found"
         return
+    prompt = "Give me a short introduction to large language model."
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=512
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    print(response)
+    time.sleep(2600)
     if images is None:
         images = []

kimi_vl/serve/chat_utils.py CHANGED Viewed

@@ -267,7 +267,7 @@ def generate_prompt_with_history(text, images, history, processor, max_length=20
     bot_role_ind = 1
     # Initialize conversation
-    conversation = new_chat_template(sft_format="kimi-vl")
     if history:
         conversation.messages = history

     bot_role_ind = 1
     # Initialize conversation
+    conversation = new_chat_template(sft_format="plain")
     if history:
         conversation.messages = history

kimi_vl/serve/inference.py CHANGED Viewed

@@ -71,88 +71,6 @@ def format_messages(
     return converstion
-def preprocess(
-    messages: list[dict],
-    processor,
-    sft_format: Optional[str] = "kimi-vl",
-):
-    """
-    Build messages from the conversations and images.
-    """
-    # get images from conversations
-    results = []
-    images = []
-    # get texts from conversations
-    converstion = get_conv_template(sft_format)
-    # only use the last 3 round of messages
-    latest_messages = messages[-3:]
-    for mid, message in enumerate(latest_messages):
-        if message["role"] == converstion.roles[0] or message["role"] == "user":
-            record = {
-                "role": message["role"],
-                "content": [],
-            }
-            if "images" in message:
-                per_round_images = message["images"]
-                if len(per_round_images) > 2:
-                    per_round_images = per_round_images[-2:]
-                    print(f"Only use the last 2 images in the {mid}-th round")
-                images.extend(per_round_images)
-                for image in per_round_images:
-                    record["content"].append(
-                        {
-                            "type": "image",
-                            "image": image,
-                        }
-                    )
-            if 'content' in message:
-                record["content"].append(
-                    {
-                        "type": "text",
-                        "text": str(message["content"]).strip(),
-                    }
-                )
-            results.append(record)
-        elif message["role"] == converstion.roles[1] or message["role"] == "assistant":
-            formatted_answer = message["content"].strip()
-            # ◁think▷用户说了“你好”，这是一个非常简单的问候，通常用于开启对话。我需要判断用户的意图。可能性一：用户只是礼貌性地打招呼，想要开启一段对话；可能性二：用户可能有更具体的需求，比如询问我的功能、功能或者需要帮助。由于用户没有提供更多信息，我需要保持开放，同时引导用户进一步说明他们的需求。
-            # 我的回复需要既友好又开放，不能显得过于正式或冷漠。同时，我需要避免假设用户的具体需求，而是提供一个轻松的、鼓励继续对话的回应。◁/think▷你好！很高兴见到你。有什么我可以帮助你的吗
-            # delete all the texts between ◁think▷ and ◁/think▷
-            # FIXME: this is a hack to remove the thinking texts
-            # formatted_answer = re.sub(r"◁think▷.*◁/think▷", "", formatted_answer)
-            think_end_token = '◁/think▷'
-            formatted_answer = formatted_answer.split(think_end_token)[-1]
-            results.append(
-                {
-                    "role": message["role"],
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": formatted_answer,
-                        }
-                    ],
-                }
-            )
-            assert (
-                formatted_answer.count(processor.image_token) == 0
-            ), f"there should be no {processor.image_token} in the assistant's reply, but got {messages}"
-            converstion.append_message(converstion.roles[1], formatted_answer)
-    text = processor.apply_chat_template(results, add_generation_prompt=True)
-    print(f"raw text = {text}")
-    if len(images) == 0:
-        images = None
-    inputs = processor(
-        images=images,
-        text=[text],
-        return_tensors="pt",
-        padding=True,
-        truncation=True,
-    )
-    return inputs
 @torch.no_grad()
@@ -176,6 +94,7 @@ def kimi_dev_generate(
     return generate(
         model,
         inputs,
         max_gen_len=max_length,
         temperature=temperature,
@@ -187,7 +106,7 @@ def kimi_dev_generate(
 def generate(
     model,
-    processor,
     inputs,
     max_gen_len: int = 256,
     temperature: float = 0,
@@ -196,7 +115,6 @@ def generate(
     chunk_size: int = -1,
 ):
     """Stream the text output from the multimodality model with prompt and image inputs."""
-    tokenizer = processor.tokenizer
     stop_words_ids = [torch.tensor(tokenizer.encode(stop_word)) for stop_word in stop_words]
     stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)

     return converstion
 @torch.no_grad()
     return generate(
         model,
+        tokenizer,
         inputs,
         max_gen_len=max_length,
         temperature=temperature,
 def generate(
     model,
+    tokenizer,
     inputs,
     max_gen_len: int = 256,
     temperature: float = 0,
     chunk_size: int = -1,
 ):
     """Stream the text output from the multimodality model with prompt and image inputs."""
     stop_words_ids = [torch.tensor(tokenizer.encode(stop_word)) for stop_word in stop_words]
     stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)