Spaces:

moonshotai
/

Kimi-Dev-72B

Running on L40S

App Files Files Community

Kimi-Dev-72B / app.py

multimodalart HF Staff

feat: Enable MCP

535a1b8 verified 4 days ago

raw

history blame

13.2 kB

	import argparse
	import gradio as gr
	import os
	import spaces
	import copy
	import time
	import json
	import subprocess
	import ast
	import pdb

	import openai


	from kimi_dev.serve.frontend import reload_javascript
	from kimi_dev.serve.utils import (
	configure_logger,
	)
	from kimi_dev.serve.gradio_utils import (
	reset_state,
	reset_textbox,
	transfer_input,
	wrap_gen_fn,
	)
	from kimi_dev.serve.examples import get_examples
	from kimi_dev.serve.templates import post_process,get_loc_prompt, clone_github_repo, build_repo_structure, show_project_structure,get_repair_prompt,get_full_file_paths_and_classes_and_functions,correct_file_path_in_structure,correct_file_paths

	TITLE = """<h1 align="left" style="min-width:200px; margin-top:0;">Chat with Kimi-Dev-72B🔥 </h1>"""
	DESCRIPTION_TOP = """<a href="https://github.com/MoonshotAI/Kimi-Dev" target="_blank">Kimi-Dev-72B</a> is a strong and open-source coding LLM for software engineering tasks."""
	USAGE_TOP = """Usage: 1. Input a Github url like "https://github.com/astropy/astropy" and a commit id and submit them. \n2. Input your issue description and chat with Kimi-Dev-72B!"""
	ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
	DEPLOY_MODELS = dict()
	logger = configure_logger()


	client = openai.OpenAI(
	base_url="http://localhost:8080/v1", # vLLM 服务地址
	api_key="EMPTY" # 不验证，只要不是 None
	)

	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument("--model", type=str, default="Kimi-Dev-72B")
	parser.add_argument(
	"--local-path",
	type=str,
	default="",
	help="huggingface ckpt, optional",
	)
	parser.add_argument("--ip", type=str, default="0.0.0.0")
	parser.add_argument("--port", type=int, default=7860)
	return parser.parse_args()


	def get_prompt(conversation) -> str:
	"""
	Get the prompt for the conversation.
	"""
	system_prompt = conversation.system_template.format(system_message=conversation.system_message)
	return system_prompt

	def highlight_thinking(msg: str) -> str:
	msg = copy.deepcopy(msg)
	if "◁think▷" in msg:
	msg = msg.replace("◁think▷", "<b style='color:blue;'>🤔Thinking...</b>\n")
	if "◁/think▷" in msg:
	msg = msg.replace("◁/think▷", "\n<b style='color:purple;'>💡Summary</b>\n")

	return msg

	@wrap_gen_fn
	@spaces.GPU(duration=180)
	def predict(
	text,
	url,
	commit_hash,
	chatbot,
	history,
	top_p,
	temperature,
	max_length_tokens,
	chunk_size: int = 512,
	):
	"""
	Generate a response for the given issue description and GitHub repository.

	This function clones a GitHub repository, analyzes its structure, locates relevant files
	based on the issue description, and generates a repair solution using the Kimi-Dev model.

	Args:
	text: The issue description provided by the user
	url: The GitHub repository URL
	commit_hash: The specific commit hash to checkout
	chatbot: The current chatbot conversation state
	history: The conversation history
	top_p: The top-p sampling parameter for text generation
	temperature: The temperature parameter for text generation
	max_length_tokens: Maximum number of tokens to generate
	chunk_size: Size of chunks for streaming response (default: 512)

	Returns:
	Generator yielding tuples of (chatbot_state, history_state, status_message) representing
	the updated conversation state and generation status
	"""
	print("running the prediction function")

	openai.api_key = "EMPTY"
	openai.base_url = "http://localhost:8080/v1"
	prompt = text
	repo_name = url.split("/")[-1]
	print(url)
	print(commit_hash)

	repo_path = './local_path/'+repo_name # Local clone path

	clone_github_repo(url, repo_path, commit_hash)
	print("repo cloned")
	structure = build_repo_structure(repo_path)

	string_struture = show_project_structure(structure)

	loc_prompt = get_loc_prompt(prompt,string_struture)

	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": loc_prompt}
	]

	response = client.chat.completions.create(
	model="kimi-dev",
	messages=messages,
	stream=True,
	temperature=temperature,
	max_tokens=max_length_tokens,
	)

	partial_output = "Start Locating...\n"
	for chunk in response:
	delta = chunk.choices[0].delta
	if delta and delta.content:
	partial_output += delta.content
	highlight_response = highlight_thinking(partial_output)
	yield [[prompt, highlight_response]], [["null test", "null test2"]], "Generating file locations..."
	response = partial_output

	raw_answer=post_process(response)
	model_found_files = raw_answer.strip().split("\n")

	files, _, _ = get_full_file_paths_and_classes_and_functions(structure)
	model_found_files = [correct_file_path_in_structure(file, structure) for file in model_found_files]
	found_files = correct_file_paths(model_found_files, files)

	highlight_response = highlight_thinking(response)
	yield [[prompt,highlight_response]], [["null test","null test2"]], "Generate: Success"

	contents = ""
	for file_path in found_files:
	file_name = file_path.replace("```","")
	print(file_name)

	to_open_path = repo_path + "/" + file_name

	with open(to_open_path, "r", encoding="utf-8") as f:
	content = f.read()
	contents += f"{file_name}\n{content}\n\n"


	repair_prompt = get_repair_prompt(prompt,contents)

	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": repair_prompt}
	]

	subprocess.run(["rm", "-rf", repo_path], check=True)


	time.sleep(5)


	response = client.chat.completions.create(
	model="kimi-dev",
	messages=messages,
	stream=True,
	temperature=temperature,
	max_tokens=max_length_tokens,
	)

	partial_output_repair = "Start Repairing...\n"
	for chunk in response:
	delta = chunk.choices[0].delta
	if delta and delta.content:
	partial_output_repair += delta.content
	highlight_response_repair = highlight_thinking(partial_output_repair)
	yield [[prompt,highlight_response],[repair_prompt,highlight_response_repair]], [["null test","null test2"]], "Generating file repairing..."

	yield [[prompt,highlight_response],[repair_prompt,highlight_response_repair]], [["null test","null test2"]], "Generate: Success"


	def retry(
	text,
	url,
	commit_hash,
	chatbot,
	history,
	top_p,
	temperature,
	max_length_tokens,
	chunk_size: int = 512,
	):
	"""
	Regenerate the response for the previous input.

	This function retries the prediction with the same parameters as the last request,
	useful when the user wants to get a different response for the same input.

	Args:
	text: The issue description from the previous request
	url: The GitHub repository URL from the previous request
	commit_hash: The commit hash from the previous request
	chatbot: The current chatbot conversation state
	history: The conversation history
	top_p: The top-p sampling parameter for text generation
	temperature: The temperature parameter for text generation
	max_length_tokens: Maximum number of tokens to generate
	chunk_size: Size of chunks for streaming response (default: 512)

	Returns:
	Generator yielding tuples of (chatbot_state, history_state, status_message) or
	yields error state if history is empty
	"""
	if len(history) == 0:
	yield (chatbot, history, "Empty context")
	return


	if type(text) is tuple:
	text, _ = text

	yield from predict(
	text,
	url,
	commit_hash,
	chatbot,
	history,
	top_p,
	temperature,
	max_length_tokens,
	chunk_size,
	)


	def build_demo(args: argparse.Namespace) -> gr.Blocks:
	with gr.Blocks(theme=gr.themes.Soft(), delete_cache=(1800, 1800)) as demo:
	history = gr.State([])
	input_text = gr.State()
	upload_url = gr.State()
	commit_hash = gr.State()

	with gr.Row():
	gr.HTML(TITLE)
	status_display = gr.Markdown("Success", elem_id="status_display")
	gr.Markdown(DESCRIPTION_TOP)
	gr.Markdown(USAGE_TOP)

	with gr.Row(equal_height=True):
	with gr.Column(scale=4):
	with gr.Row():
	chatbot = gr.Chatbot(
	elem_id="Kimi-Dev-72B",
	show_share_button=True,
	bubble_full_width=False,
	height=400,
	# render_markdown=False
	)
	with gr.Row():
	with gr.Column(scale=4):
	text_box = gr.Textbox(label="Issue Description", placeholder="Enter issue description", container=False)
	with gr.Column(min_width=70):
	submit_btn = gr.Button("Send")

	with gr.Row():
	empty_btn = gr.Button("🧹 New Conversation")
	retry_btn = gr.Button("🔄 Regenerate")

	def respond(message):
	"""
	Handle the submission of URL and commit hash.

	This function provides feedback when the user submits a GitHub URL and commit hash,
	confirming that the information has been received.

	Args:
	message: The input message (not used in current implementation)

	Returns:
	A confirmation message string
	"""
	return f"Url and commit hash submitted!"
	with gr.Column():
	url_box = gr.Textbox(label="Please input a Github url here",placeholder="Input your url", lines=1)
	commit_hash_box = gr.Textbox(label="Please input a commit hash here",placeholder="Input your commit hash", lines=1)

	url_submit_btn = gr.Button("Submit")
	output = gr.Textbox(label="Submitted url and commit")
	url_submit_btn.click(fn=respond, inputs=upload_url, outputs=output)

	# Parameter Setting Tab for control the generation parameters
	with gr.Tab(label="Parameter Setting"):
	top_p = gr.Slider(minimum=-0, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p")
	temperature = gr.Slider(
	minimum=0, maximum=1.0, value=1.0, step=0.1, interactive=True, label="Temperature"
	)
	max_length_tokens = gr.Slider(
	minimum=512, maximum=32768, value=16384, step=64, interactive=True, label="Max Length Tokens"
	)

	gr.Examples(
	examples=get_examples(ROOT_DIR),
	inputs=[url_box, text_box, commit_hash_box],
	)
	# gr.Markdown()

	input_widgets = [
	input_text,
	upload_url,
	commit_hash,
	chatbot,
	history,
	top_p,
	temperature,
	max_length_tokens,
	]
	output_widgets = [chatbot, history, status_display]

	transfer_input_args = dict(
	fn=transfer_input,
	inputs=[text_box, url_box,commit_hash_box],
	outputs=[input_text, upload_url, text_box, commit_hash, submit_btn],
	show_progress=True,
	)

	predict_args = dict(fn=predict, inputs=input_widgets, outputs=output_widgets, show_progress=True)
	retry_args = dict(fn=retry, inputs=input_widgets, outputs=output_widgets, show_progress=True)
	reset_args = dict(fn=reset_textbox, inputs=[], outputs=[text_box, status_display])

	predict_events = [
	text_box.submit(transfer_input_args).then(predict_args),
	submit_btn.click(transfer_input_args).then(predict_args),
	]

	empty_btn.click(reset_state, outputs=output_widgets, show_progress=True)
	empty_btn.click(**reset_args)
	retry_btn.click(**retry_args)

	demo.title = "Kimi-Dev-72B"
	return demo


	def main(args: argparse.Namespace):
	demo = build_demo(args)
	reload_javascript()

	favicon_path = os.path.join("kimi_dev/serve/assets/favicon.ico")
	demo.queue().launch(
	favicon_path=favicon_path,
	server_name=args.ip,
	server_port=args.port,
	share=True,
	mcp_server=True
	)

	if __name__ == "__main__":
	print("Start serving vllm...")
	script_path = os.path.join(os.path.dirname(__file__), "serve_vllm.sh")
	subprocess.Popen(["bash", script_path])
	time.sleep(450)
	print("Served vllm!")


	args = parse_args()
	print(args)
	main(args)