Kimi-Dev-72B / app.py
multimodalart's picture
feat: Enable MCP
535a1b8 verified
raw
history blame
13.2 kB
import argparse
import gradio as gr
import os
import spaces
import copy
import time
import json
import subprocess
import ast
import pdb
import openai
from kimi_dev.serve.frontend import reload_javascript
from kimi_dev.serve.utils import (
configure_logger,
)
from kimi_dev.serve.gradio_utils import (
reset_state,
reset_textbox,
transfer_input,
wrap_gen_fn,
)
from kimi_dev.serve.examples import get_examples
from kimi_dev.serve.templates import post_process,get_loc_prompt, clone_github_repo, build_repo_structure, show_project_structure,get_repair_prompt,get_full_file_paths_and_classes_and_functions,correct_file_path_in_structure,correct_file_paths
TITLE = """<h1 align="left" style="min-width:200px; margin-top:0;">Chat with Kimi-Dev-72B🔥 </h1>"""
DESCRIPTION_TOP = """<a href="https://github.com/MoonshotAI/Kimi-Dev" target="_blank">Kimi-Dev-72B</a> is a strong and open-source coding LLM for software engineering tasks."""
USAGE_TOP = """Usage: 1. Input a Github url like "https://github.com/astropy/astropy" and a commit id and submit them. \n2. Input your issue description and chat with Kimi-Dev-72B!"""
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
DEPLOY_MODELS = dict()
logger = configure_logger()
client = openai.OpenAI(
base_url="http://localhost:8080/v1", # vLLM 服务地址
api_key="EMPTY" # 不验证,只要不是 None
)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="Kimi-Dev-72B")
parser.add_argument(
"--local-path",
type=str,
default="",
help="huggingface ckpt, optional",
)
parser.add_argument("--ip", type=str, default="0.0.0.0")
parser.add_argument("--port", type=int, default=7860)
return parser.parse_args()
def get_prompt(conversation) -> str:
"""
Get the prompt for the conversation.
"""
system_prompt = conversation.system_template.format(system_message=conversation.system_message)
return system_prompt
def highlight_thinking(msg: str) -> str:
msg = copy.deepcopy(msg)
if "◁think▷" in msg:
msg = msg.replace("◁think▷", "<b style='color:blue;'>🤔Thinking...</b>\n")
if "◁/think▷" in msg:
msg = msg.replace("◁/think▷", "\n<b style='color:purple;'>💡Summary</b>\n")
return msg
@wrap_gen_fn
@spaces.GPU(duration=180)
def predict(
text,
url,
commit_hash,
chatbot,
history,
top_p,
temperature,
max_length_tokens,
chunk_size: int = 512,
):
"""
Generate a response for the given issue description and GitHub repository.
This function clones a GitHub repository, analyzes its structure, locates relevant files
based on the issue description, and generates a repair solution using the Kimi-Dev model.
Args:
text: The issue description provided by the user
url: The GitHub repository URL
commit_hash: The specific commit hash to checkout
chatbot: The current chatbot conversation state
history: The conversation history
top_p: The top-p sampling parameter for text generation
temperature: The temperature parameter for text generation
max_length_tokens: Maximum number of tokens to generate
chunk_size: Size of chunks for streaming response (default: 512)
Returns:
Generator yielding tuples of (chatbot_state, history_state, status_message) representing
the updated conversation state and generation status
"""
print("running the prediction function")
openai.api_key = "EMPTY"
openai.base_url = "http://localhost:8080/v1"
prompt = text
repo_name = url.split("/")[-1]
print(url)
print(commit_hash)
repo_path = './local_path/'+repo_name # Local clone path
clone_github_repo(url, repo_path, commit_hash)
print("repo cloned")
structure = build_repo_structure(repo_path)
string_struture = show_project_structure(structure)
loc_prompt = get_loc_prompt(prompt,string_struture)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": loc_prompt}
]
response = client.chat.completions.create(
model="kimi-dev",
messages=messages,
stream=True,
temperature=temperature,
max_tokens=max_length_tokens,
)
partial_output = "Start Locating...\n"
for chunk in response:
delta = chunk.choices[0].delta
if delta and delta.content:
partial_output += delta.content
highlight_response = highlight_thinking(partial_output)
yield [[prompt, highlight_response]], [["null test", "null test2"]], "Generating file locations..."
response = partial_output
raw_answer=post_process(response)
model_found_files = raw_answer.strip().split("\n")
files, _, _ = get_full_file_paths_and_classes_and_functions(structure)
model_found_files = [correct_file_path_in_structure(file, structure) for file in model_found_files]
found_files = correct_file_paths(model_found_files, files)
highlight_response = highlight_thinking(response)
yield [[prompt,highlight_response]], [["null test","null test2"]], "Generate: Success"
contents = ""
for file_path in found_files:
file_name = file_path.replace("```","")
print(file_name)
to_open_path = repo_path + "/" + file_name
with open(to_open_path, "r", encoding="utf-8") as f:
content = f.read()
contents += f"{file_name}\n{content}\n\n"
repair_prompt = get_repair_prompt(prompt,contents)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": repair_prompt}
]
subprocess.run(["rm", "-rf", repo_path], check=True)
time.sleep(5)
response = client.chat.completions.create(
model="kimi-dev",
messages=messages,
stream=True,
temperature=temperature,
max_tokens=max_length_tokens,
)
partial_output_repair = "Start Repairing...\n"
for chunk in response:
delta = chunk.choices[0].delta
if delta and delta.content:
partial_output_repair += delta.content
highlight_response_repair = highlight_thinking(partial_output_repair)
yield [[prompt,highlight_response],[repair_prompt,highlight_response_repair]], [["null test","null test2"]], "Generating file repairing..."
yield [[prompt,highlight_response],[repair_prompt,highlight_response_repair]], [["null test","null test2"]], "Generate: Success"
def retry(
text,
url,
commit_hash,
chatbot,
history,
top_p,
temperature,
max_length_tokens,
chunk_size: int = 512,
):
"""
Regenerate the response for the previous input.
This function retries the prediction with the same parameters as the last request,
useful when the user wants to get a different response for the same input.
Args:
text: The issue description from the previous request
url: The GitHub repository URL from the previous request
commit_hash: The commit hash from the previous request
chatbot: The current chatbot conversation state
history: The conversation history
top_p: The top-p sampling parameter for text generation
temperature: The temperature parameter for text generation
max_length_tokens: Maximum number of tokens to generate
chunk_size: Size of chunks for streaming response (default: 512)
Returns:
Generator yielding tuples of (chatbot_state, history_state, status_message) or
yields error state if history is empty
"""
if len(history) == 0:
yield (chatbot, history, "Empty context")
return
if type(text) is tuple:
text, _ = text
yield from predict(
text,
url,
commit_hash,
chatbot,
history,
top_p,
temperature,
max_length_tokens,
chunk_size,
)
def build_demo(args: argparse.Namespace) -> gr.Blocks:
with gr.Blocks(theme=gr.themes.Soft(), delete_cache=(1800, 1800)) as demo:
history = gr.State([])
input_text = gr.State()
upload_url = gr.State()
commit_hash = gr.State()
with gr.Row():
gr.HTML(TITLE)
status_display = gr.Markdown("Success", elem_id="status_display")
gr.Markdown(DESCRIPTION_TOP)
gr.Markdown(USAGE_TOP)
with gr.Row(equal_height=True):
with gr.Column(scale=4):
with gr.Row():
chatbot = gr.Chatbot(
elem_id="Kimi-Dev-72B",
show_share_button=True,
bubble_full_width=False,
height=400,
# render_markdown=False
)
with gr.Row():
with gr.Column(scale=4):
text_box = gr.Textbox(label="Issue Description", placeholder="Enter issue description", container=False)
with gr.Column(min_width=70):
submit_btn = gr.Button("Send")
with gr.Row():
empty_btn = gr.Button("🧹 New Conversation")
retry_btn = gr.Button("🔄 Regenerate")
def respond(message):
"""
Handle the submission of URL and commit hash.
This function provides feedback when the user submits a GitHub URL and commit hash,
confirming that the information has been received.
Args:
message: The input message (not used in current implementation)
Returns:
A confirmation message string
"""
return f"Url and commit hash submitted!"
with gr.Column():
url_box = gr.Textbox(label="Please input a Github url here",placeholder="Input your url", lines=1)
commit_hash_box = gr.Textbox(label="Please input a commit hash here",placeholder="Input your commit hash", lines=1)
url_submit_btn = gr.Button("Submit")
output = gr.Textbox(label="Submitted url and commit")
url_submit_btn.click(fn=respond, inputs=upload_url, outputs=output)
# Parameter Setting Tab for control the generation parameters
with gr.Tab(label="Parameter Setting"):
top_p = gr.Slider(minimum=-0, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p")
temperature = gr.Slider(
minimum=0, maximum=1.0, value=1.0, step=0.1, interactive=True, label="Temperature"
)
max_length_tokens = gr.Slider(
minimum=512, maximum=32768, value=16384, step=64, interactive=True, label="Max Length Tokens"
)
gr.Examples(
examples=get_examples(ROOT_DIR),
inputs=[url_box, text_box, commit_hash_box],
)
# gr.Markdown()
input_widgets = [
input_text,
upload_url,
commit_hash,
chatbot,
history,
top_p,
temperature,
max_length_tokens,
]
output_widgets = [chatbot, history, status_display]
transfer_input_args = dict(
fn=transfer_input,
inputs=[text_box, url_box,commit_hash_box],
outputs=[input_text, upload_url, text_box, commit_hash, submit_btn],
show_progress=True,
)
predict_args = dict(fn=predict, inputs=input_widgets, outputs=output_widgets, show_progress=True)
retry_args = dict(fn=retry, inputs=input_widgets, outputs=output_widgets, show_progress=True)
reset_args = dict(fn=reset_textbox, inputs=[], outputs=[text_box, status_display])
predict_events = [
text_box.submit(**transfer_input_args).then(**predict_args),
submit_btn.click(**transfer_input_args).then(**predict_args),
]
empty_btn.click(reset_state, outputs=output_widgets, show_progress=True)
empty_btn.click(**reset_args)
retry_btn.click(**retry_args)
demo.title = "Kimi-Dev-72B"
return demo
def main(args: argparse.Namespace):
demo = build_demo(args)
reload_javascript()
favicon_path = os.path.join("kimi_dev/serve/assets/favicon.ico")
demo.queue().launch(
favicon_path=favicon_path,
server_name=args.ip,
server_port=args.port,
share=True,
mcp_server=True
)
if __name__ == "__main__":
print("Start serving vllm...")
script_path = os.path.join(os.path.dirname(__file__), "serve_vllm.sh")
subprocess.Popen(["bash", script_path])
time.sleep(450)
print("Served vllm!")
args = parse_args()
print(args)
main(args)