Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.gitattributes +15 -0
LICENSE.md +2 -0
README.md +81 -22
config.json +38 -0
model-00001-of-00003.safetensors +3 -0
model-00002-of-00003.safetensors +3 -0
model-00003-of-00003.safetensors +3 -0
model.safetensors.index.json +0 -0
quantization_config.json +0 -0
tokenizer.json +0 -0
tokenizer_config.json +207 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+RL_scale.jpeg filter=lfs diff=lfs merge=lfs -text
+close_performance.png filter=lfs diff=lfs merge=lfs -text
+main_logo.png filter=lfs diff=lfs merge=lfs -text
+mid_train_scale.jpeg filter=lfs diff=lfs merge=lfs -text
+open_performance.png filter=lfs diff=lfs merge=lfs -text
+open_performance_white.png filter=lfs diff=lfs merge=lfs -text
+test_time_scale.jpeg filter=lfs diff=lfs merge=lfs -text
+assets/test_time_scale.jpeg filter=lfs diff=lfs merge=lfs -text
+assets/close_performance.png filter=lfs diff=lfs merge=lfs -text
+assets/mid_train_scale.jpeg filter=lfs diff=lfs merge=lfs -text
+assets/open_performance_white.png filter=lfs diff=lfs merge=lfs -text
+assets/main_logo.png filter=lfs diff=lfs merge=lfs -text
+assets/RL_scale.jpeg filter=lfs diff=lfs merge=lfs -text
+assets/open_performance.png filter=lfs diff=lfs merge=lfs -text
+*.jsonl filter=lfs diff=lfs merge=lfs -text

LICENSE.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ - Kimi-Dev-72B is built with Qwen-2.5-72B. Qwen-2.5-72B is licensed under the [Qwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen2.5-72B/blob/main/LICENSE), Copyright (c) Alibaba Cloud. All Rights Reserved.
2	+ - Subject to the Qwen LICENSE AGREEMENT, Kimi-Dev-72B is under MIT license.

README.md CHANGED Viewed

@@ -1,43 +1,102 @@
 ---
-base_model: moonshotai/Kimi-Dev-72B
-base_model_relation: quantized
-quantized_by: ArtusDev
 license: mit
 tags:
 - code
 - swebench
 - software
 - issue-resolving
-- exl3
 library_name: transformers
 ---
-## EXL3 Quants of moonshotai/Kimi-Dev-72B
-EXL3 quants of [moonshotai/Kimi-Dev-72B](https://huggingface.co/moonshotai/Kimi-Dev-72B) using <a href="https://github.com/turboderp-org/exllamav3/">exllamav3</a> for quantization.
-### Quants
-| Quant(Revision) | Bits per Weight | Head Bits |
-| -------- | ---------- | --------- |
-| [3.5_H6](https://huggingface.co/ArtusDev/moonshotai_Kimi-Dev-72B-EXL3/tree/3.5bpw_H6) | 3.5 | 6 |
-| [5.0_H6](https://huggingface.co/ArtusDev/moonshotai_Kimi-Dev-72B-EXL3/tree/5.0bpw_H6) | 5.0 | 6 |
-| [8.0_H6](https://huggingface.co/ArtusDev/moonshotai_Kimi-Dev-72B-EXL3/tree/8.0bpw_H6) | 8.0 | 6 |
-| [8.0_H8](https://huggingface.co/ArtusDev/moonshotai_Kimi-Dev-72B-EXL3/tree/8.0bpw_H8) | 8.0 | 8 |
-### Downloading quants with huggingface-cli
-<details>
-  <summary>Click to view download instructions</summary>
-Install hugginface-cli:
-```bash
-pip install -U "huggingface_hub[cli]"
 ```
-Download quant by targeting the specific quant revision (branch):
 ```
-huggingface-cli download ArtusDev/moonshotai_Kimi-Dev-72B-EXL3 --revision "5.0bpw_H6" --local-dir ./
 ```
-</details>

 ---
 license: mit
+base_model:
+- Qwen/Qwen2.5-72B
 tags:
 - code
 - swebench
 - software
 - issue-resolving
 library_name: transformers
 ---
+<!-- # Kimi-Dev -->
+<div align="center">
+  <img src="./assets/main_logo.png" alt="Kimi Logo" width="400" />
+<h2><a href="https://moonshotai.github.io/Kimi-Dev/">
+Introducing Kimi-Dev: <br>A Strong and Open-source Coding LLM for Issue Resolution</a></h2>
+</a></h2>
+<b>Kimi-Dev Team</b>
+<br>
+</div>
+<div align="center">
+  <a href="">
+    <b>📄 Tech Report (Coming soon...)</b>
+  </a> &nbsp;|&nbsp;
+  <a href="https://github.com/MoonshotAI/Kimi-Dev">
+    <b>📄 Github</b>
+  </a> &nbsp;
+</div>
+<br>
+<br>
+<!-- https://github.com/MoonshotAI/Kimi-Dev -->
+We introduce Kimi-Dev-72B, our new open-source coding LLM for software engineering tasks. Kimi-Dev-72B achieves a new state-of-the-art on SWE-bench Verified among open-source models.
+- Kimi-Dev-72B achieves 60.4% performance on SWE-bench Verified. It surpasses the runner-up, setting a new state-of-the-art result among open-source models.
+- Kimi-Dev-72B is optimized via large-scale reinforcement learning. It autonomously patches real repositories in Docker and gains rewards only when the entire test suite passes. This ensures correct and robust solutions, aligning with real-world development standards.
+- Kimi-Dev-72B is available for download and deployment on Hugging Face and GitHub. We welcome developers and researchers to explore its capabilities and contribute to development.
+<div align="center">
+  <img src="./assets/open_performance_white.png" alt="Kimi Logo" width="600" />
+  <p><b>Performance of Open-source Models on SWE-bench Verified.</b></p>
+</div>
+## Quick Start
 ```
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "moonshotai/Kimi-Dev-72B"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+prompt = "Give me a short introduction to large language model."
+messages = [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": prompt}
+]
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True
+)
+model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+generated_ids = model.generate(
+    **model_inputs,
+    max_new_tokens=512
+)
+generated_ids = [
+    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 ```
+## Citation
 ```
+@misc{kimi_dev_72b_2025,
+  title        = {Introducing Kimi-Dev: A Strong and Open-source Coding LLM for Issue Resolution},
+  author       = {{Kimi-Dev Team}},
+  year         = {2025},
+  month        = {June},
+  url          = {\url{https://www.moonshot.cn/Kimi-Dev}}
+}
+```

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+    "architectures": [
+        "Qwen2ForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "hidden_act": "silu",
+    "hidden_size": 8192,
+    "initializer_range": 0.02,
+    "intermediate_size": 29568,
+    "max_position_embeddings": 131072,
+    "max_window_layers": 70,
+    "model_type": "qwen2",
+    "num_attention_heads": 64,
+    "num_hidden_layers": 80,
+    "num_key_value_heads": 8,
+    "rms_norm_eps": 1e-06,
+    "rope_theta": 1000000.0,
+    "sliding_window": 131072,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.43.1",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 152064,
+    "quantization_config": {
+        "quant_method": "exl3",
+        "version": "0.0.4",
+        "bits": 2.5,
+        "head_bits": 6,
+        "calibration": {
+            "rows": 100,
+            "cols": 2048
+        },
+        "out_scales": "auto"
+    }
+}

model-00001-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5598248ff366ddee645c65a9d51733e3e86491157afdffeee20b8ca7cf73324
+size 8530118104

model-00002-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0eb5ef6b5d91700b931b205218f383231a2f8a7b007d93a9ee9f317a5f943ecb
+size 8510249848

model-00003-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:063b393d4ed6af5c340f24d509a2d1a0bc2d6575294a5a353fc24313fa44f537
+size 8349270328

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

quantization_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,207 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff