Spaces:

moonshotai
/

Kimi-Dev-72B

Running on L40S

App Files Files Community

miaoyibo commited on 8 days ago

Commit

46a0b0f

1 Parent(s): 8cf3ee6

kimi_dev

Browse files

Files changed (18) hide show

.gitignore +3 -0
app.py +156 -149
{kimi_vl → kimi_dev}/__init__.py +0 -0
{kimi_vl → kimi_dev}/serve/__init__.py +0 -0
{kimi_vl → kimi_dev}/serve/assets/Kelpy-Codos.js +0 -0
{kimi_vl → kimi_dev}/serve/assets/avatar.png +0 -0
{kimi_vl → kimi_dev}/serve/assets/custom.css +0 -0
{kimi_vl → kimi_dev}/serve/assets/custom.js +0 -0
{kimi_vl → kimi_dev}/serve/assets/favicon.ico +0 -0
kimi_dev/serve/examples.py +26 -0
{kimi_vl → kimi_dev}/serve/frontend.py +0 -0
{kimi_vl → kimi_dev}/serve/gradio_utils.py +0 -0
kimi_dev/serve/inference.py +26 -0
kimi_dev/serve/templates.py +337 -0
{kimi_vl → kimi_dev}/serve/utils.py +0 -0
kimi_vl/serve/chat_utils.py +0 -379
kimi_vl/serve/examples.py +0 -54
kimi_vl/serve/inference.py +0 -145

.gitignore CHANGED Viewed

@@ -3,3 +3,6 @@
 __pycache__
 *.pyc
 *.pyo

 __pycache__
 *.pyc
 *.pyo
+.gradio
+local_path/

app.py CHANGED Viewed

@@ -1,44 +1,38 @@
 import argparse
 import gradio as gr
 import os
-from PIL import Image
 import spaces
 import copy
 import time
-from kimi_vl.serve.frontend import reload_javascript
-from kimi_vl.serve.utils import (
     configure_logger,
-    pil_to_base64,
-    parse_ref_bbox,
-    strip_stop_words,
-    is_variable_assigned,
 )
-from kimi_vl.serve.gradio_utils import (
-    cancel_outputing,
-    delete_last_conversation,
     reset_state,
     reset_textbox,
     transfer_input,
     wrap_gen_fn,
 )
-from kimi_vl.serve.chat_utils import (
-    generate_prompt_with_history,
-    convert_conversation_to_prompts,
-    to_gradio_chatbot,
-    to_gradio_history,
-)
-from kimi_vl.serve.inference import kimi_dev_generate, load_model
-from kimi_vl.serve.examples import get_examples
-TITLE = """<h1 align="left" style="min-width:200px; margin-top:0;">Chat with Kimi-Dev-72B🤔 </h1>"""
-DESCRIPTION_TOP = """<a href="https://github.com/MoonshotAI/Kimi-VL" target="_blank">Kimi-Dev-72B</a> is a multi-modal LLM that can understand text and images, and generate text with thinking processes. For non-thinking version, please try [Kimi-VL-A3B](https://huggingface.co/spaces/moonshotai/Kimi-VL-A3B)."""
-DESCRIPTION = """"""
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 DEPLOY_MODELS = dict()
 logger = configure_logger()
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--model", type=str, default="Kimi-Dev-72B")
@@ -73,16 +67,6 @@ def fetch_model(model_name: str):
     return model_info
-def preview_images(files) -> list[str]:
-    if files is None:
-        return []
-    image_paths = []
-    for file in files:
-        image_paths.append(file.name)
-    return image_paths
 def get_prompt(conversation) -> str:
     """
     Get the prompt for the conversation.
@@ -103,30 +87,29 @@ def highlight_thinking(msg: str) -> str:
 @spaces.GPU(duration=180)
 def predict(
     text,
-    images,
     chatbot,
     history,
     top_p,
     temperature,
     max_length_tokens,
-    max_context_length_tokens,
     chunk_size: int = 512,
 ):
     """
-    Predict the response for the input text and images.
     Args:
         text (str): The input text.
-        images (list[PIL.Image.Image]): The input images.
         chatbot (list): The chatbot.
         history (list): The history.
         top_p (float): The top-p value.
         temperature (float): The temperature value.
         repetition_penalty (float): The repetition penalty value.
         max_length_tokens (int): The max length tokens.
-        max_context_length_tokens (int): The max context length tokens.
         chunk_size (int): The chunk size.
     """
     print("running the prediction function")
     try:
         model, tokenizer = fetch_model(args.model)
@@ -137,131 +120,161 @@ def predict(
         yield [[text, "No Model Found"]], [], "No Model Found"
         return
-    prompt = "Give me a short introduction to large language model."
     messages = [
         {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": prompt}
     ]
-    text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
-    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    generated_ids = model.generate(
-        **model_inputs,
-        max_new_tokens=512
-    )
-    generated_ids = [
-        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-    ]
-    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     print(response)
-    time.sleep(2600)
-    if images is None:
-        images = []
-    # load images
-    pil_images = []
-    for img_or_file in images:
-        try:
-            # load as pil image
-            if isinstance(images, Image.Image):
-                pil_images.append(img_or_file)
-            else:
-                image = Image.open(img_or_file.name).convert("RGB")
-                pil_images.append(image)
-        except Exception as e:
-            print(f"Error loading image: {e}")
-    # generate prompt
-    conversation = generate_prompt_with_history(
-        text,
-        pil_images,
-        history,
-        max_length=max_context_length_tokens,
     )
-    print(conversation)
-    all_conv, last_image = convert_conversation_to_prompts(conversation)
-    stop_words = conversation.stop_str
-    gradio_chatbot_output = to_gradio_chatbot(conversation)
-    full_response = ""
-    for x in kimi_dev_generate(
-            conversations=all_conv,
-            model=model,
-            tokneizer=tokenizer,
-            # processor=processor,
-            stop_words=stop_words,
-            max_length=max_length_tokens,
             temperature=temperature,
             top_p=top_p,
-        ):
-            full_response += x
-            response = strip_stop_words(full_response, stop_words)
-            conversation.update_last_message(response)
-            gradio_chatbot_output[-1][1] = highlight_thinking(response)
-            yield gradio_chatbot_output, to_gradio_history(conversation), "Generating..."
-    if last_image is not None:
-        vg_image = parse_ref_bbox(response, last_image)
-        if vg_image is not None:
-            vg_base64 = pil_to_base64(vg_image, "vg", max_size=800, min_size=400)
-            gradio_chatbot_output[-1][1] += vg_base64
-            yield gradio_chatbot_output, to_gradio_history(conversation), "Generating..."
-    logger.info("flushed result to gradio")
-    if is_variable_assigned("x"):
-        print(
-            f"temperature: {temperature}, "
-            f"top_p: {top_p}, "
-            f"max_length_tokens: {max_length_tokens}"
         )
-    yield gradio_chatbot_output, to_gradio_history(conversation), "Generate: Success"
 def retry(
     text,
-    images,
     chatbot,
     history,
     top_p,
     temperature,
     max_length_tokens,
-    max_context_length_tokens,
     chunk_size: int = 512,
 ):
     """
-    Retry the response for the input text and images.
     """
     if len(history) == 0:
         yield (chatbot, history, "Empty context")
         return
-    chatbot.pop()
-    history.pop()
-    text = history.pop()[-1]
     if type(text) is tuple:
         text, _ = text
     yield from predict(
         text,
-        images,
         chatbot,
         history,
         top_p,
         temperature,
         max_length_tokens,
-        max_context_length_tokens,
         chunk_size,
     )
@@ -270,12 +283,13 @@ def build_demo(args: argparse.Namespace) -> gr.Blocks:
     with gr.Blocks(theme=gr.themes.Soft(), delete_cache=(1800, 1800)) as demo:
         history = gr.State([])
         input_text = gr.State()
-        input_images = gr.State()
         with gr.Row():
             gr.HTML(TITLE)
             status_display = gr.Markdown("Success", elem_id="status_display")
         gr.Markdown(DESCRIPTION_TOP)
         with gr.Row(equal_height=True):
             with gr.Column(scale=4):
@@ -284,63 +298,59 @@ def build_demo(args: argparse.Namespace) -> gr.Blocks:
                         elem_id="Kimi-Dev-72B",
                         show_share_button=True,
                         bubble_full_width=False,
-                        height=600,
                     )
                 with gr.Row():
                     with gr.Column(scale=4):
-                        text_box = gr.Textbox(show_label=False, placeholder="Enter text", container=False)
                     with gr.Column(min_width=70):
                         submit_btn = gr.Button("Send")
-                    with gr.Column(min_width=70):
-                        cancel_btn = gr.Button("Stop")
                 with gr.Row():
                     empty_btn = gr.Button("🧹 New Conversation")
                     retry_btn = gr.Button("🔄 Regenerate")
-                    del_last_btn = gr.Button("🗑️ Remove Last Turn")
             with gr.Column():
-                # add note no more than 2 images once
-                gr.Markdown("Note: you can upload no more than 2 images once")
-                upload_images = gr.Files(file_types=["image"], show_label=True)
-                gallery = gr.Gallery(columns=[3], height="200px", show_label=True)
-                upload_images.change(preview_images, inputs=upload_images, outputs=gallery)
                 # Parameter Setting Tab for control the generation parameters
                 with gr.Tab(label="Parameter Setting"):
-                    top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p")
                     temperature = gr.Slider(
-                        minimum=0, maximum=1.0, value=0.6, step=0.1, interactive=True, label="Temperature"
                     )
                     max_length_tokens = gr.Slider(
-                        minimum=512, maximum=8192, value=2048, step=64, interactive=True, label="Max Length Tokens"
-                    )
-                    max_context_length_tokens = gr.Slider(
-                        minimum=512, maximum=8192, value=2048, step=64, interactive=True, label="Max Context Length Tokens"
                     )
-                    show_images = gr.HTML(visible=False)
         gr.Examples(
             examples=get_examples(ROOT_DIR),
-            inputs=[upload_images, show_images, text_box],
         )
-        gr.Markdown()
         input_widgets = [
             input_text,
-            input_images,
             chatbot,
             history,
             top_p,
             temperature,
             max_length_tokens,
-            max_context_length_tokens,
         ]
         output_widgets = [chatbot, history, status_display]
         transfer_input_args = dict(
             fn=transfer_input,
-            inputs=[text_box, upload_images],
-            outputs=[input_text, input_images, text_box, upload_images, submit_btn],
             show_progress=True,
         )
@@ -356,8 +366,6 @@ def build_demo(args: argparse.Namespace) -> gr.Blocks:
         empty_btn.click(reset_state, outputs=output_widgets, show_progress=True)
         empty_btn.click(**reset_args)
         retry_btn.click(**retry_args)
-        del_last_btn.click(delete_last_conversation, [chatbot, history], output_widgets, show_progress=True)
-        cancel_btn.click(cancel_outputing, [], [status_display], cancels=predict_events)
     demo.title = "Kimi-Dev-72B"
     return demo
@@ -367,8 +375,7 @@ def main(args: argparse.Namespace):
     demo = build_demo(args)
     reload_javascript()
-    # concurrency_count=CONCURRENT_COUNT, max_size=MAX_EVENTS
-    favicon_path = os.path.join("kimi_vl/serve/assets/favicon.ico")
     # demo.queue().launch(
     #     favicon_path=favicon_path,
     #     server_name=args.ip,
@@ -378,7 +385,7 @@ def main(args: argparse.Namespace):
         favicon_path=favicon_path,
         server_name=args.ip,
         server_port=args.port,
-        share=True # for 本地调试
     )
 if __name__ == "__main__":

 import argparse
 import gradio as gr
 import os
 import spaces
 import copy
 import time
+import json
+import subprocess
+import ast
+import pdb
+from transformers import TextIteratorStreamer
+import threading
+from kimi_dev.serve.frontend import reload_javascript
+from kimi_dev.serve.utils import (
     configure_logger,
 )
+from kimi_dev.serve.gradio_utils import (
     reset_state,
     reset_textbox,
     transfer_input,
     wrap_gen_fn,
 )
+from kimi_dev.serve.inference import load_model
+from kimi_dev.serve.examples import get_examples
+from kimi_dev.serve.templates import post_process,get_loc_prompt, clone_github_repo, build_repo_structure, show_project_structure,get_repair_prompt,get_repo_files,get_full_file_paths_and_classes_and_functions,correct_file_path_in_structure
+TITLE = """<h1 align="left" style="min-width:200px; margin-top:0;">Chat with Kimi-Dev-72B🔥 </h1>"""
+DESCRIPTION_TOP = """<a href="https://github.com/MoonshotAI/Kimi-VL" target="_blank">Kimi-Dev-72B</a> is a strong and open-source coding LLM for software engineering tasks."""
+USAGE_TOP = """Usage: 1. Input a Github url like "https://github.com/astropy/astropy" and submit it. \n2. Input your issue description and chat with Kimi-Dev-72B!"""
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 DEPLOY_MODELS = dict()
 logger = configure_logger()
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--model", type=str, default="Kimi-Dev-72B")
     return model_info
 def get_prompt(conversation) -> str:
     """
     Get the prompt for the conversation.
 @spaces.GPU(duration=180)
 def predict(
     text,
+    url,
     chatbot,
     history,
     top_p,
     temperature,
     max_length_tokens,
     chunk_size: int = 512,
 ):
     """
+    Predict the response for the input text and url.
     Args:
         text (str): The input text.
+        url (str): The input url.
         chatbot (list): The chatbot.
         history (list): The history.
         top_p (float): The top-p value.
         temperature (float): The temperature value.
         repetition_penalty (float): The repetition penalty value.
         max_length_tokens (int): The max length tokens.
         chunk_size (int): The chunk size.
     """
     print("running the prediction function")
     try:
         model, tokenizer = fetch_model(args.model)
         yield [[text, "No Model Found"]], [], "No Model Found"
         return
+    prompt = text
+    repo_name = url.split("/")[-1]
+    repo_path = './local_path/'+repo_name  # Local clone path
+    clone_github_repo(url, repo_path)
+    structure = build_repo_structure(repo_path)
+    string_struture = show_project_structure(structure)
+    loc_prompt = get_loc_prompt(prompt,string_struture)
     messages = [
         {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": loc_prompt}
     ]
+    text_for_model = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
+    model_inputs = tokenizer([text_for_model], return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    # print("start generating")
+    if temperature > 0:
+        generation_kwargs = dict(
+            **model_inputs,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            max_new_tokens=max_length_tokens,
+            streamer=streamer
+        )
+    else:
+        generation_kwargs = dict(
+            **model_inputs,
+            do_sample=False,
+            max_new_tokens=max_length_tokens,
+            streamer=streamer
+        )
+    gen_thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
+    gen_thread.start()
+    partial_output =  "Start Locating...\n"
+    for new_text in streamer:
+        partial_output += new_text
+        highlight_response = highlight_thinking(partial_output)
+        yield [[prompt, highlight_response]], [["null test", "null test2"]], "Generating file locations..."
+    gen_thread.join()
+    response = partial_output
+    raw_answer=post_process(response)
+    model_found_files = raw_answer.strip().split("\n")
     print(response)
+    highlight_response = highlight_thinking(response)
+    yield [[prompt,highlight_response]], [["null test","null test2"]], "Generate: Success"
+    # reading file content
+    contents = ""
+    for file_path in model_found_files:
+        file_name = file_path.replace("```","")
+        print(file_name)
+        # pdb.set_trace()
+        to_open_path = repo_path + "/" + file_name
+        print("to_open_path,",to_open_path)
+        with open(to_open_path, "r", encoding="utf-8") as f:
+            content = f.read()
+        contents += f"{file_name}\n{content}\n\n"
+    repair_prompt = get_repair_prompt(prompt,contents)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": repair_prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
     )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    subprocess.run(["rm", "-rf", repo_path], check=True)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    if temperature > 0:
+        generation_kwargs = dict(
+            **model_inputs,
+            do_sample=True,
             temperature=temperature,
             top_p=top_p,
+            max_new_tokens=max_length_tokens,
+            streamer=streamer
         )
+    else:
+        generation_kwargs = dict(
+            **model_inputs,
+            do_sample=False,
+            max_new_tokens=max_length_tokens,
+            streamer=streamer
+        )
+    gen_thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
+    gen_thread.start()
+    partial_output_repair = "Start Repairing...\n"
+    yield [[prompt,highlight_response],[repair_prompt,partial_output_repair]], [["null test","null test2"]], "Generate: Success"
+    time.sleep(5)
+    for new_text in streamer:
+        partial_output_repair += new_text
+        highlight_response = highlight_thinking(partial_output)
+        highlight_response_repair = highlight_thinking(partial_output_repair)
+        yield [[prompt, highlight_response], [repair_prompt, highlight_response_repair]], [["null test", "null test2"]], "Generating repair suggestion..."
+    gen_thread.join()
+    # yield response, "null test", "Generate: Success"
+    yield [[prompt,highlight_response],[repair_prompt,highlight_response_repair]], [["null test","null test2"]], "Generate: Success"
 def retry(
     text,
+    url,
     chatbot,
     history,
     top_p,
     temperature,
     max_length_tokens,
     chunk_size: int = 512,
 ):
     """
+    Retry the response for the input text and url.
     """
     if len(history) == 0:
         yield (chatbot, history, "Empty context")
         return
+    # chatbot.pop()
+    # history.pop()
+    # text = history.pop()[-1]
     if type(text) is tuple:
         text, _ = text
     yield from predict(
         text,
+        url,
         chatbot,
         history,
         top_p,
         temperature,
         max_length_tokens,
         chunk_size,
     )
     with gr.Blocks(theme=gr.themes.Soft(), delete_cache=(1800, 1800)) as demo:
         history = gr.State([])
         input_text = gr.State()
+        upload_url = gr.State()
         with gr.Row():
             gr.HTML(TITLE)
             status_display = gr.Markdown("Success", elem_id="status_display")
         gr.Markdown(DESCRIPTION_TOP)
+        gr.Markdown(USAGE_TOP)
         with gr.Row(equal_height=True):
             with gr.Column(scale=4):
                         elem_id="Kimi-Dev-72B",
                         show_share_button=True,
                         bubble_full_width=False,
+                        height=400,
+                        # render_markdown=False
                     )
                 with gr.Row():
                     with gr.Column(scale=4):
+                        text_box = gr.Textbox(label="Issue Description", placeholder="Enter issue description", container=False)
                     with gr.Column(min_width=70):
                         submit_btn = gr.Button("Send")
+                    # with gr.Column(min_width=70):
+                        # cancel_btn = gr.Button("Stop")
                 with gr.Row():
                     empty_btn = gr.Button("🧹 New Conversation")
                     retry_btn = gr.Button("🔄 Regenerate")
+                    # del_last_btn = gr.Button("🗑️ Remove Last Turn")
+            def respond(message):
+                return f"Url submitted!"
             with gr.Column():
+                url_box = gr.Textbox(label="Please input a Github url here",placeholder="Input your url", lines=1)
+                url_submit_btn = gr.Button("Submit")
+                output = gr.Textbox(label="Submitted url")
+                url_submit_btn.click(fn=respond, inputs=upload_url, outputs=output)
                 # Parameter Setting Tab for control the generation parameters
                 with gr.Tab(label="Parameter Setting"):
+                    top_p = gr.Slider(minimum=-0, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p")
                     temperature = gr.Slider(
+                        minimum=0, maximum=1.0, value=1.0, step=0.1, interactive=True, label="Temperature"
                     )
                     max_length_tokens = gr.Slider(
+                        minimum=512, maximum=16384, value=8192, step=64, interactive=True, label="Max Length Tokens"
                     )
         gr.Examples(
             examples=get_examples(ROOT_DIR),
+            inputs=[url_box, text_box],
         )
+        # gr.Markdown()
         input_widgets = [
             input_text,
+            upload_url,
             chatbot,
             history,
             top_p,
             temperature,
             max_length_tokens,
         ]
         output_widgets = [chatbot, history, status_display]
         transfer_input_args = dict(
             fn=transfer_input,
+            inputs=[text_box, url_box],
+            outputs=[input_text, upload_url, text_box, upload_url, submit_btn],
             show_progress=True,
         )
         empty_btn.click(reset_state, outputs=output_widgets, show_progress=True)
         empty_btn.click(**reset_args)
         retry_btn.click(**retry_args)
     demo.title = "Kimi-Dev-72B"
     return demo
     demo = build_demo(args)
     reload_javascript()
+    favicon_path = os.path.join("kimi_dev/serve/assets/favicon.ico")
     # demo.queue().launch(
     #     favicon_path=favicon_path,
     #     server_name=args.ip,
         favicon_path=favicon_path,
         server_name=args.ip,
         server_port=args.port,
+        share=True
     )
 if __name__ == "__main__":

{kimi_vl → kimi_dev}/__init__.py RENAMED Viewed

File without changes

{kimi_vl → kimi_dev}/serve/__init__.py RENAMED Viewed

File without changes

{kimi_vl → kimi_dev}/serve/assets/Kelpy-Codos.js RENAMED Viewed

File without changes

{kimi_vl → kimi_dev}/serve/assets/avatar.png RENAMED Viewed

File without changes

{kimi_vl → kimi_dev}/serve/assets/custom.css RENAMED Viewed

File without changes

{kimi_vl → kimi_dev}/serve/assets/custom.js RENAMED Viewed

File without changes

{kimi_vl → kimi_dev}/serve/assets/favicon.ico RENAMED Viewed

File without changes

kimi_dev/serve/examples.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import os
+import io
+import base64
+EXAMPLES_LIST = [
+    [
+        "https://github.com/astropy/astropy",
+        "units.quantity_input decorator fails for constructors with type hinted return value -> None\n### Summary\r\nI am using the `units.quantity_input` decorator with typing hints for constructors, however when I add the correct return value for the constructor (`None`) then I get an exception, because `None` has no attribute `to`.\r\n\r\n### Reproducer\r\nThe issue can be reproduced with the following file:\r\n``` Python\r\nimport astropy.units as u\r\n\r\n\r\nclass PoC(object):\r\n\r\n    @u.quantity_input\r\n    def __init__(self, voltage: u.V) -> None:\r\n        pass\r\n\r\n\r\nif __name__ == '__main__':\r\n    poc = PoC(1.*u.V)\r\n```\r\nwhich results in the following error:\r\n```\r\n$ python3 poc.py\r\nTraceback (most recent call last):\r\n  File \"poc.py\", line 12, in <module>\r\n    poc = PoC(1.*u.V)\r\n  File \"/usr/lib64/python3.6/site-packages/astropy/utils/decorators.py\", line 868, in __init__\r\n    func = make_function_with_signature(func, name=name, **wrapped_args)\r\n  File \"/usr/lib64/python3.6/site-packages/astropy/units/decorators.py\", line 225, in wrapper\r\n    return return_.to(wrapped_signature.return_annotation)\r\nAttributeError: 'NoneType' object has no attribute 'to'\r\n```\r\n\r\nThis has been tested on Fedora 27 with python 3.6.3, astropy 2.0.2 and numpy 1.13.3 all from Fedora's repository.\r\n\r\n### Workaround\r\nThe issue can be circumvented by not adding the return type typing hint. Unfortunately, then a static type checker cannot infer that this function returns nothing.\r\n\r\n### Possible fix\r\nMaybe the decorator could explicitly check whether None is returned and then omit the unit check.\n\n\n",
+    ],
+    [
+        "https://github.com/sympy/sympy",
+        "evalf does not call _imp_ recursively\nExample from https://stackoverflow.com/questions/41818842/why-cant-i-evaluate-a-composition-of-implemented-functions-in-sympy-at-a-point:\r\n\r\n```\r\n>>> from sympy.utilities.lambdify import implemented_function\r\n>>> f = implemented_function('f', lambda x: x ** 2)\r\n>>> g = implemented_function('g', lambda x: 2 * x)\r\n>>> print(f(  2 ).evalf())\r\n4.00000000000000\r\n>>> print(  g(2) .evalf())\r\n4.00000000000000\r\n>>> print(f(g(2)).evalf())\r\nf(g(2))\r\n```\r\n\r\nThe code for this is in `Function._eval_evalf`. It isn't calling evalf recursively on the return of `_imp_`. \n\n\n",
+    ],
+    [
+        "https://github.com/matplotlib/matplotlib",
+        "[ENH]: ContourSet.set_paths\n### Problem\n\nTo get contour labelling working with its special transforms, Cartopy has a [workaround](https://github.com/SciTools/cartopy/blob/2ed668c17b4e52421f15c5be3761719c75c5311a/lib/cartopy/mpl/contour.py#L89-L108) where it replaces all the paths on the `ContourSet` with transformed versions.  This currently looks like\r\n\r\n```python\r\npaths = cs.get_paths()\r\npaths[:] = transformed_paths\r\n``` \r\n\r\nwhich doesn’t smell very good.\n\n### Proposed solution\n\nThe above would smell better as \r\n\r\n```python\r\ncs.set_paths(transformed_paths)\r\n``` \n\n\n"
+    ]
+]
+def get_examples(root_dir: str = None):
+    examples = []
+    for github_url, instance_id in EXAMPLES_LIST:
+        examples.append([github_url, instance_id])
+    return examples

{kimi_vl → kimi_dev}/serve/frontend.py RENAMED Viewed

File without changes

{kimi_vl → kimi_dev}/serve/gradio_utils.py RENAMED Viewed

File without changes

kimi_dev/serve/inference.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import logging
+from transformers import (
+    AutoModelForCausalLM,
+    AutoConfig,
+    AutoTokenizer
+)
+logger = logging.getLogger(__name__)
+def load_model(model_path: str = "moonshotai/Kimi-Dev-72B"):
+    # hotfix the model to use flash attention 2
+    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        config=config,
+        torch_dtype="auto",
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    return model, tokenizer

kimi_dev/serve/templates.py ADDED Viewed

	@@ -0,0 +1,337 @@

+import os
+import re
+import json
+import subprocess
+import ast
+def show_project_structure(structure, spacing=0) -> str:
+    """pprint the project structure"""
+    pp_string = ''
+    for key, value in structure.items():
+        if '.' in key and '.py' not in key:
+            continue  # skip none python files
+        # TODO: maybe we should skip the test files...
+        if key.startswith('test'):
+            continue  # skip the test files as well...
+        if '.' in key:
+            pp_string += ' ' * spacing + str(key) + '\n'
+        else:
+            pp_string += ' ' * spacing + str(key) + '/' + '\n'
+        if 'classes' not in value:
+            pp_string += show_project_structure(value, spacing + 4)
+    return pp_string
+import os
+import json
+import subprocess
+import ast
+def clone_github_repo(github_url, local_path):
+    """Clone GitHub repository to local path"""
+    try:
+        subprocess.run(['git', 'clone', github_url, local_path], check=True)
+        print(f"Successfully cloned repository to: {local_path}")
+    except subprocess.CalledProcessError as e:
+        print(f"Warning: Repository cloning may have failed: {e}")
+def parse_python_file(file_path, file_content=None):
+    """Parse a Python file to extract class and function definitions with their line numbers.
+    :param file_path: Path to the Python file.
+    :return: Class names, function names, and file contents
+    """
+    if file_content is None:
+        try:
+            with open(file_path, "r") as file:
+                file_content = file.read()
+                parsed_data = ast.parse(file_content)
+        except Exception as e:  # Catch all types of exceptions
+            print(f"Error in file {file_path}: {e}")
+            return [], [], ""
+    else:
+        try:
+            parsed_data = ast.parse(file_content)
+        except Exception as e:  # Catch all types of exceptions
+            print(f"Error in file {file_path}: {e}")
+            return [], [], ""
+    class_info = []
+    function_names = []
+    class_methods = set()
+    for node in ast.walk(parsed_data):
+        if isinstance(node, ast.ClassDef):
+            methods = []
+            for n in node.body:
+                if isinstance(n, ast.FunctionDef):
+                    methods.append(
+                        {
+                            "name": n.name,
+                            "start_line": n.lineno,
+                            "end_line": n.end_lineno,
+                            "text": file_content.splitlines()[
+                                n.lineno - 1 : n.end_lineno
+                            ],
+                        }
+                    )
+                    class_methods.add(n.name)
+            class_info.append(
+                {
+                    "name": node.name,
+                    "start_line": node.lineno,
+                    "end_line": node.end_lineno,
+                    "text": file_content.splitlines()[
+                        node.lineno - 1 : node.end_lineno
+                    ],
+                    "methods": methods,
+                }
+            )
+        elif isinstance(node, ast.FunctionDef) and not isinstance(
+            node, ast.AsyncFunctionDef
+        ):
+            if node.name not in class_methods:
+                function_names.append(
+                    {
+                        "name": node.name,
+                        "start_line": node.lineno,
+                        "end_line": node.end_lineno,
+                        "text": file_content.splitlines()[
+                            node.lineno - 1 : node.end_lineno
+                        ],
+                    }
+                )
+    return class_info, function_names, file_content.splitlines()
+def create_structure(directory_path):
+    """Create the structure of the repository directory by parsing Python files.
+    :param directory_path: Path to the repository directory.
+    :return: A dictionary representing the structure.
+    """
+    structure = {}
+    for root, _, files in os.walk(directory_path):
+        repo_name = os.path.basename(directory_path)
+        relative_root = os.path.relpath(root, directory_path)
+        if relative_root == ".":
+            relative_root = repo_name
+        curr_struct = structure
+        for part in relative_root.split(os.sep):
+            if part not in curr_struct:
+                curr_struct[part] = {}
+            curr_struct = curr_struct[part]
+        for file_name in files:
+            if file_name.endswith(".py"):
+                file_path = os.path.join(root, file_name)
+                class_info, function_names, file_lines = parse_python_file(file_path)
+                curr_struct[file_name] = {
+                    "classes": class_info,
+                    "functions": function_names,
+                    "text": file_lines,
+                }
+            else:
+                curr_struct[file_name] = {}
+    return structure
+def build_repo_structure(root_path):
+    """Build repository structure using improved parsing method"""
+    return create_structure(root_path)
+def get_loc_prompt(issue_text,repo_structure):
+    obtain_relevant_files_prompt = """
+    Please look through the following GitHub problem description and Repository structure and provide a list of files that one would need to edit to fix the problem.
+    ### GitHub Problem Description ###
+    {problem_statement}
+    ###
+    ### Repository Structure ###
+    {structure}
+    ###
+    Please only provide the full path and return at most 5 files.
+    The returned files should be separated by new lines ordered by most to least important and wrapped with ```
+    For example:
+    ```
+    file1.py
+    file2.py
+    ```
+    """
+    prompt_content = obtain_relevant_files_prompt.format(problem_statement=issue_text,structure=repo_structure)
+    return prompt_content
+def get_repair_prompt(issue_text,file_content):
+    repair_prompt_combine_topn_cot_diff = """
+    We are currently solving the following issue within our repository. Here is the issue text:
+    --- BEGIN ISSUE ---
+    {problem_statement}
+    --- END ISSUE ---
+    Below are some code segments, each from a relevant file. One or more of these files may contain bugs.
+    --- BEGIN FILE ---
+    ```
+    {content}
+    ```
+    --- END FILE ---
+    Please first localize the bug based on the issue statement, and then generate *SEARCH/REPLACE* edits to fix the issue.
+    Every *SEARCH/REPLACE* edit must use this format:
+    1. The file path
+    2. The start of search block: <<<<<<< SEARCH
+    3. A contiguous chunk of lines to search for in the existing source code
+    4. The dividing line: =======
+    5. The lines to replace into the source code
+    6. The end of the replace block: >>>>>>> REPLACE
+    Here is an example:
+    ```python
+    ### mathweb/flask/app.py
+    <<<<<<< SEARCH
+    from flask import Flask
+    =======
+    import math
+    from flask import Flask
+    >>>>>>> REPLACE
+    ```
+    Please note that the *SEARCH/REPLACE* edit REQUIRES PROPER INDENTATION. If you would like to add the line '        print(x)', you must fully write that out, with all those spaces before the code!
+    Wrap the *SEARCH/REPLACE* edit in blocks ```python...```.
+    """
+    prompt_content = repair_prompt_combine_topn_cot_diff.format(problem_statement=issue_text,content=file_content.rstrip())
+    return prompt_content
+def get_repo_files(structure, filepaths: list[str]):
+    files, classes, functions = get_full_file_paths_and_classes_and_functions(structure)
+    file_contents = dict()
+    for filepath in filepaths:
+        content = None
+        for file_content in files:
+            if file_content[0] == filepath:
+                content = '\n'.join(file_content[1])
+                file_contents[filepath] = content
+                break
+        # assert content is not None, "file not found"
+    return file_contents
+def correct_file_path_in_structure(file_name, structure):
+    """
+    Search for the correct file path in the structure, mainly checking first-level subdirectories
+    Args:
+        file_name (str): File name to search for
+        structure (dict): Repository structure
+    Returns:
+        str: Correct file path if found, otherwise returns original file_name
+    """
+    # Search in current directory
+    file_contents = get_repo_files(structure, [file_name])
+    if file_contents != {}:
+        return file_name
+    # Only check first-level subdirectories
+    for sub_dir in structure.keys():
+        if isinstance(structure[sub_dir], dict):
+            file_contents = get_repo_files(structure[sub_dir], [file_name])
+            if file_contents != {}:
+                return f'{sub_dir}/{file_name}'
+    return file_name
+def get_full_file_paths_and_classes_and_functions(structure, current_path=''):
+    """
+    Recursively retrieve all file paths, classes, and functions within a directory structure.
+    Arguments:
+    structure -- a dictionary representing the directory structure
+    current_path -- the path accumulated so far, used during recursion (default="")
+    Returns:
+    A tuple containing:
+    - files: list of full file paths
+    - classes: list of class details with file paths
+    - functions: list of function details with file paths
+    """
+    files = []
+    classes = []
+    functions = []
+    for name, content in structure.items():
+        if isinstance(content, dict):
+            if (
+                (
+                    'functions' not in content.keys()
+                    and 'classes' not in content.keys()
+                    and 'text' not in content.keys()
+                )
+                or not len(content.keys()) == 3
+                or (
+                    isinstance(content.get('text', []), dict)
+                    or isinstance(content.get('functions', []), dict)
+                    or isinstance(content.get('classes', []), dict)
+                )
+            ):
+                # or guards against case where functions and classes are somehow part of the structure.
+                next_path = f'{current_path}/{name}' if current_path else name
+                (
+                    sub_files,
+                    sub_classes,
+                    sub_functions,
+                ) = get_full_file_paths_and_classes_and_functions(content, next_path)
+                files.extend(sub_files)
+                classes.extend(sub_classes)
+                functions.extend(sub_functions)
+            else:
+                next_path = f'{current_path}/{name}' if current_path else name
+                files.append((next_path, content.get('text', [])))
+                if content.get('text', []) == []:
+                    continue
+                if 'classes' in content:
+                    for clazz in content['classes']:
+                        classes.append(
+                            {
+                                'file': next_path,
+                                'name': clazz['name'],
+                                'start_line': clazz['start_line'],
+                                'end_line': clazz['end_line'],
+                                'methods': [
+                                    {
+                                        'name': method['name'],
+                                        'start_line': method['start_line'],
+                                        'end_line': method['end_line'],
+                                    }
+                                    for method in clazz.get('methods', [])
+                                ],
+                            },
+                        )
+                if 'functions' in content:
+                    for function in content['functions']:
+                        try:
+                            function['file'] = next_path
+                        except TypeError:
+                            continue
+                        functions.append(function)
+        else:
+            next_path = f'{current_path}/{name}' if current_path else name
+            files.append(next_path)
+    return files, classes, functions
+def post_process(response: str) -> str:
+    content = response
+    if "◁/think▷" in content:
+        content = content.replace("◁think▷", "")
+        parts = content.split("◁/think▷")
+        content = parts[-1]
+    # Extract content between triple backticks (```)
+    matches = re.findall(r"```.*?```", content, re.DOTALL)
+    if matches:
+        matches = [item.replace("```","") for item in matches]
+        return "\n".join(matches)  # Return all matched code blocks joined by new lines
+    return content  # If no match, return the full response

{kimi_vl → kimi_dev}/serve/utils.py RENAMED Viewed

File without changes

kimi_vl/serve/chat_utils.py DELETED Viewed

@@ -1,379 +0,0 @@
-"""
-From https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
-"""
-import dataclasses
-import logging
-import copy
-from enum import IntEnum, auto
-from typing import Dict, List
-import base64
-import gradio as gr
-import torch
-from .utils import pil_to_base64
-IMAGE_TOKEN = "<image>"
-logger = logging.getLogger("gradio_logger")
-class SeparatorStyle(IntEnum):
-    """Separator styles."""
-    PLAIN = auto()
-    ALIGNMENT = auto()
-    KIMI_VL = auto()
-@dataclasses.dataclass
-class Conversation:
-    """A class that manages prompt templates and keeps all conversation history."""
-    # The name of this template
-    name: str
-    # The template of the system prompt
-    system_template: str = "{system_message}"
-    # The system message
-    system_message: str = ""
-    # The names of two roles
-    roles: List[str] = (("USER", "ASSISTANT"),)
-    # All messages. Each item is (role, message).
-    messages: List[List[str]] = ()
-    # The number of few shot examples
-    offset: int = 0
-    # The separator style and configurations
-    sep_style: SeparatorStyle = SeparatorStyle.PLAIN
-    sep: str = "\n"
-    sep2: str = None
-    # Stop criteria (the default one is EOS token)
-    stop_str: str = None
-    # Stops generation if meeting any token in this list
-    stop_token_ids: List[int] = None
-    def get_prompt(self) -> str:
-        """Get the prompt for generation."""
-        system_prompt = self.system_template.format(system_message=self.system_message)
-        if self.sep_style == SeparatorStyle.PLAIN:
-            seps = [self.sep, self.sep2]
-            ret = ""
-            for i, (role, message) in enumerate(self.messages):
-                if message:
-                    if type(message) is tuple:
-                        message = message[0]
-                    if i % 2 == 0:
-                        ret += message + seps[i % 2]
-                    else:
-                        ret += message + seps[i % 2]
-                else:
-                    ret += ""
-            return ret
-        elif self.sep_style == SeparatorStyle.ALIGNMENT:
-            seps = [self.sep, self.sep2]
-            ret = ""
-            for i, (role, message) in enumerate(self.messages):
-                if message:
-                    if type(message) is tuple:
-                        message, _, _ = message
-                    if i % 2 == 0:
-                        ret += '<image>\n' + seps[i % 2]
-                    else:
-                        ret += message + seps[i % 2]
-                else:
-                    ret += ""
-            return ret
-        elif self.sep_style == SeparatorStyle.KIMI_VL:
-            seps = [self.sep, self.sep2]
-            if system_prompt == "" or system_prompt is None:
-                ret = ""
-            else:
-                ret = system_prompt + seps[0]
-            for i, (role, message) in enumerate(self.messages):
-                if message:
-                    if type(message) is tuple:
-                        message = message[0]
-                    if role == "user":
-                        ret += message + self.sep
-                    else:
-                        if self.sep2 is not None:
-                            ret += message + self.sep2
-                        else:
-                            ret += message
-                else:
-                    ret = ret
-            return ret
-        else:
-            raise ValueError(f"Invalid style: {self.sep_style}")
-    def set_system_message(self, system_message: str):
-        """Set the system message."""
-        self.system_message = system_message
-    def append_message(self, role: str, message: str):
-        """Append a new message."""
-        self.messages.append([role, message])
-    def update_last_message(self, message: str):
-        """Update the last output.
-        The last message is typically set to be None when constructing the prompt,
-        so we need to update it in-place after getting the response from a model.
-        """
-        self.messages[-1][1] = message
-    def reset_message(self):
-        """Reset a new message."""
-        self.messages = []
-    def to_gradio_chatbot(self):
-        """Convert the conversation to gradio chatbot format."""
-        ret = []
-        for i, (role, msg) in enumerate(self.messages[self.offset :]):
-            if i % 2 == 0:
-                ret.append([msg, None])
-            else:
-                ret[-1][-1] = msg
-        return ret
-    def to_openai_api_messages(self):
-        """Convert the conversation to OpenAI chat completion format."""
-        system_prompt = self.system_template.format(system_message=self.system_message)
-        ret = [{"role": "system", "content": system_prompt}]
-        for i, (_, msg) in enumerate(self.messages[self.offset :]):
-            if i % 2 == 0:
-                ret.append({"role": "user", "content": msg})
-            else:
-                if msg is not None:
-                    ret.append({"role": "assistant", "content": msg})
-        return ret
-    def copy(self):
-        return Conversation(
-            name=self.name,
-            system_template=self.system_template,
-            system_message=self.system_message,
-            roles=self.roles,
-            messages=[[x, y] for x, y in self.messages],
-            offset=self.offset,
-            sep_style=self.sep_style,
-            sep=self.sep,
-            sep2=self.sep2,
-            stop_str=self.stop_str,
-            stop_token_ids=self.stop_token_ids,
-        )
-    def dict(self):
-        return {
-            "template_name": self.name,
-            "system_message": self.system_message,
-            "roles": self.roles,
-            "messages": self.messages,
-            "offset": self.offset,
-        }
-# A global registry for all conversation templates
-conv_templates: Dict[str, Conversation] = {}
-def register_conv_template(template: Conversation, override: bool = False):
-    """Register a new conversation template."""
-    if not override:
-        assert template.name not in conv_templates, f"{template.name} has been registered."
-    conv_templates[template.name] = template
-def get_conv_template(name: str) -> Conversation:
-    """Get a conversation template."""
-    return conv_templates[name].copy()
-register_conv_template(
-    Conversation(
-        name="plain",
-        system_template="",
-        system_message="",
-        roles=("", ""),
-        messages=(),
-        offset=0,
-        sep_style=SeparatorStyle.PLAIN,
-        sep="",
-        sep2="",
-        stop_token_ids=[100001],
-        stop_str=['</s>'],
-    )
-)
-register_conv_template(
-    Conversation(
-        name="alignment",
-        system_template="",
-        system_message="",
-        roles=("", ""),
-        messages=(),
-        offset=0,
-        sep_style=SeparatorStyle.ALIGNMENT,
-        sep="",
-        sep2="",
-        stop_token_ids=[100001],
-        stop_str=['</s>'],
-    )
-)
-register_conv_template(
-    Conversation(
-        name="kimi-vl",
-        system_template="{system_message}",
-        system_message="You are a helpful assistant",
-        roles=("user", "assistant"),
-        messages=(),
-        offset=0,
-        sep_style=SeparatorStyle.KIMI_VL,
-        sep="<|im_end|>",
-        sep2=None,
-        stop_token_ids=None,
-        stop_str=["<|im_end|>"],
-    )
-)
-def new_chat_template(sft_format: str = "kimi-vl"):
-    return get_conv_template(sft_format)
-def get_prompt(conv: Conversation) -> str:
-    """Get the prompt for generation."""
-    return conv.get_prompt()
-def generate_prompt_with_history(text, images, history, processor, max_length=2048):
-    """
-    Generate a prompt with the chat history.
-    Args:
-        text (str): The text prompt.
-        images (list[PIL.Image.Image]): The image prompt.
-        history (list): List of previous conversation messages.
-        processor (KimiVLProcessor): The chat processor used for encoding the prompt.
-        max_length (int): The maximum length of the prompt.
-    """
-    global IMAGE_TOKEN
-    user_role_ind = 0
-    bot_role_ind = 1
-    # Initialize conversation
-    conversation = new_chat_template(sft_format="plain")
-    if history:
-        conversation.messages = history
-    if images is not None and len(images) > 0:
-        # num_image_tags = text.count(IMAGE_TOKEN)
-        # num_images = len(images)
-        # if num_images > num_image_tags:
-        #     pad_image_tags = num_images - num_image_tags
-        #     image_tokens = "\n".join([IMAGE_TOKEN] * pad_image_tags)
-        #     # append the <image> in a new line after the text prompt
-        #     text = image_tokens + "\n" + text
-        # elif num_images < num_image_tags:
-        #     remove_image_tags = num_image_tags - num_images
-        #     text = text.replace(IMAGE_TOKEN, "", remove_image_tags)
-        print(f"prompt = {text}, len(images) = {len(images)}")
-        text = (text, images)
-    conversation.append_message(conversation.roles[user_role_ind], text)
-    conversation.append_message(conversation.roles[bot_role_ind], "")
-    # Create a copy of the conversation to avoid history truncation in the UI
-    conversation_copy = conversation.copy()
-    logger.info("=" * 80)
-    logger.info(get_prompt(conversation))
-    rounds = len(conversation.messages) // 2
-    for _ in range(rounds):
-        current_prompt = get_prompt(conversation)
-        assert isinstance(current_prompt, str) and len(current_prompt) > 0, f"current_prompt = {current_prompt}"
-        if torch.tensor(processor.tokenizer.encode(current_prompt)).size(-1) <= max_length:
-            return conversation_copy
-        if len(conversation.messages) % 2 != 0:
-            gr.Error("The messages between user and assistant are not paired.")
-            return
-        try:
-            for _ in range(2):  # pop out two messages in a row
-                conversation.messages.pop(0)
-        except IndexError:
-            gr.Error("Input text processing failed, unable to respond in this round.")
-            return None
-    gr.Error("Prompt could not be generated within max_length limit.")
-    return None
-def convert_conversation_to_prompts(conversation: Conversation):
-    """
-    Convert the conversation to prompts.
-    """
-    conv_prompts = []
-    last_image = None
-    messages = conversation.messages
-    for i in range(0, len(messages), 2):
-        if isinstance(messages[i][1], tuple):
-            text, images = messages[i][1]
-            last_image = images[-1]
-        else:
-            text, images = messages[i][1], []
-        prompt = {"role": messages[i][0], "content": text, "images": images}
-        response = {"role": messages[i + 1][0], "content": messages[i + 1][1]}
-        conv_prompts.extend([prompt, response])
-    return conv_prompts, last_image
-def to_gradio_chatbot(conversation: Conversation) -> list:
-    """Convert the conversation to gradio chatbot format."""
-    ret = []
-    for i, (_, msg) in enumerate(conversation.messages[conversation.offset :]):
-        if i % 2 == 0:
-            if type(msg) is tuple:
-                msg, images = copy.deepcopy(msg)
-                if isinstance(images, list):
-                    img_str = ""
-                    for j, image in enumerate(images):
-                        if isinstance(image, str):
-                            with open(image, "rb") as f:
-                                data = f.read()
-                            img_b64_str = base64.b64encode(data).decode()
-                            image_str = (
-                                f'<img src="data:image/png;base64,{img_b64_str}" '
-                                f'alt="user upload image" style="max-width: 300px; height: auto;" />'
-                            )
-                        else:
-                            image_str = pil_to_base64(image, f"user upload image_{j}", max_size=800, min_size=400)
-                        img_str += image_str
-                    msg = img_str + msg
-                else:
-                    pass
-            ret.append([msg, None])
-        else:
-            ret[-1][-1] = msg
-    return ret
-def to_gradio_history(conversation: Conversation):
-    """Convert the conversation to gradio history format."""
-    return conversation.messages[conversation.offset :]

kimi_vl/serve/examples.py DELETED Viewed

@@ -1,54 +0,0 @@
-import os
-import io
-import base64
-from PIL import Image
-EXAMPLES_LIST = [
-    [
-        ["images/demo1.jpeg"],
-        "Where am I?",
-    ],
-    [
-        ["images/demo2.jpeg", "images/demo3.jpeg"],
-        "Based on the abstract and introduction above, write a concise and elegant Twitter post that highlights key points and figures without sounding overly promotional. Use English, include emojis and hashtags.",
-    ],
-    [
-        ["images/demo6.jpeg"],
-        "Create a role play modeled after this cat."
-    ],
-    # mulit-frames example
-    [
-        ["images/demo4.jpeg", "images/demo5.jpeg"],
-        "Please infer step by step who this manuscript belongs to and what it records."
-    ]
-]
-def display_example(image_list, root_dir: str = None):
-    images_html = ""
-    for _, img_path in enumerate(image_list):
-        if root_dir is not None:
-            img_path = os.path.join(root_dir, img_path)
-        image = Image.open(img_path)
-        buffered = io.BytesIO()
-        image.save(buffered, format="PNG", quality=100)
-        img_b64_str = base64.b64encode(buffered.getvalue()).decode()
-        img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="{img_path}" style="height:80px; margin-right: 10px;" />'
-        images_html += img_str
-    result_html = f"""
-    <div style="display: flex; align-items: center; margin-bottom: 10px;">
-        <div style="flex: 1; margin-right: 10px;">{images_html}</div>
-    </div>
-    """
-    return result_html
-def get_examples(root_dir: str = None):
-    examples = []
-    for images, texts in EXAMPLES_LIST:
-        examples.append([images, display_example(images, root_dir), texts])
-    return examples

kimi_vl/serve/inference.py DELETED Viewed

@@ -1,145 +0,0 @@
-import logging
-import re
-from threading import Thread
-from typing import List, Optional
-import torch
-import spaces
-from transformers import (
-    AutoModelForCausalLM,
-    AutoProcessor,
-    AutoConfig,
-    StoppingCriteria,
-    StoppingCriteriaList,
-    TextIteratorStreamer,
-    AutoTokenizer
-)
-from .chat_utils import Conversation, get_conv_template
-logger = logging.getLogger(__name__)
-def load_model(model_path: str = "moonshotai/Kimi-Dev-72B"):
-    # hotfix the model to use flash attention 2
-    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
-    # config._attn_implementation = "flash_attention_2"
-    # config.vision_config._attn_implementation = "flash_attention_2"
-    # config.text_config._attn_implementation = "flash_attention_2"
-    # print("Successfully set the attn_implementation to flash_attention_2")
-    model = AutoModelForCausalLM.from_pretrained(
-        model_path,
-        config=config,
-        torch_dtype="auto",
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    # processor = AutoProcessor.from_pretrained(model_path, config=config, trust_remote_code=True)
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    return model, tokenizer
-class StoppingCriteriaSub(StoppingCriteria):
-    def __init__(self, stops=[], encounters=1):
-        super().__init__()
-        self.stops = [stop.to("cuda") for stop in stops]
-    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs):
-        for stop in self.stops:
-            if input_ids.shape[-1] < len(stop):
-                continue
-            if torch.all((stop == input_ids[0][-len(stop) :])).item():
-                return True
-        return False
-def format_messages(
-    conversations: list[Conversation],
-    system_prompt: Optional[str] = "",
-    sft_format: Optional[str] = "kimi-vl",
-):
-    """
-    Format the conversations to the input format of the model.
-    """
-    converstion = get_conv_template(sft_format)
-    converstion.set_system_message(system_prompt)
-    for message in conversations:
-        converstion.append_message(message["role"], message["content"])
-    return converstion
-@torch.no_grad()
-@torch.inference_mode()
-def kimi_dev_generate(
-    model: torch.nn.Module,
-    tokenizer,
-    # processor: AutoProcessor,
-    conversations: list[Conversation],
-    stop_words: list,
-    max_length: int = 256,
-    temperature: float = 1.0,
-    top_p: float = 1.0,
-    chunk_size: int = -1,
-):
-    # convert conversation to inputs
-    print(f"conversations = {conversations}")
-    # inputs = preprocess(conversations)
-    inputs = tokenizer.tokenize(conversations)
-    inputs = inputs.to(model.device)
-    return generate(
-        model,
-        tokenizer,
-        inputs,
-        max_gen_len=max_length,
-        temperature=temperature,
-        top_p=top_p,
-        stop_words=stop_words,
-        chunk_size=chunk_size,
-    )
-def generate(
-    model,
-    tokenizer,
-    inputs,
-    max_gen_len: int = 256,
-    temperature: float = 0,
-    top_p: float = 0.95,
-    stop_words: List[str] = [],
-    chunk_size: int = -1,
-):
-    """Stream the text output from the multimodality model with prompt and image inputs."""
-    stop_words_ids = [torch.tensor(tokenizer.encode(stop_word)) for stop_word in stop_words]
-    stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
-    kwargs = dict(
-        **inputs,
-        max_new_tokens=max_gen_len,
-        do_sample=True,
-        use_cache=True,
-        streamer=streamer,
-        stopping_criteria=stopping_criteria,
-    )
-    if temperature > 0:
-        kwargs.update(
-            {
-                "do_sample": True,
-                "top_p": top_p,
-                "temperature": temperature,
-            }
-        )
-    else:
-        kwargs["do_sample"] = False
-    thread = Thread(target=model.generate, kwargs=kwargs)
-    thread.start()
-    yield from streamer