Training in progress, step 100

Browse files

Files changed (6) hide show

adapter_config.json +3 -3
adapter_model.safetensors +2 -2
chat_template.jinja +1 -14
tokenizer.json +2 -2
tokenizer_config.json +14 -62
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
@@ -24,8 +24,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e10a5b1f5f9d06b9dc019229b974664ae5c6a450903019dc7ac74ff46afe502
-size 30690184

 version https://git-lfs.github.com/spec/v1
+oid sha256:1df3cb6bd90c96e6e4b15847c1e89f5bf305d6044d58e1ec0210426af14247c5
+size 8731128

chat_template.jinja CHANGED Viewed

@@ -1,14 +1 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
-' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + content + '<｜Assistant｜>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '
-' + '```json' + '
-' + tool['function']['arguments'] + '
-' + '```' + '<｜tool▁call▁end｜>'}}{%- else %}{{content + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '
-' + '```json' + '
-' + tool['function']['arguments'] + '
-' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'
-' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '
-' + '```json' + '
-' + tool['function']['arguments'] + '
-' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + content + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'
-<｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}


1	+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93d5fd6d2f8cf1172ac86cf982e2b88fa6732366b44dc1a32349379a54a6a044
-size 11423346

 version https://git-lfs.github.com/spec/v1
+oid sha256:e51761ae26bab0e3530dd15e9eccf5b959916140e2fe264d45ccc944fa4040c8
+size 11423071

tokenizer_config.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "add_bos_token": false,
   "add_eos_token": false,
   "add_prefix_space": null,
   "added_tokens_decoder": {
     "151643": {
-      "content": "<｜begin▁of▁sentence｜>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -12,23 +12,23 @@
       "special": true
     },
     "151644": {
-      "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
-      "special": true
     },
     "151645": {
-      "content": "<｜end▁of▁sentence｜>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
-      "special": true
     },
     "151646": {
-      "content": "<|object_ref_start|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -36,28 +36,28 @@
       "special": true
     },
     "151647": {
-      "content": "<|object_ref_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
-      "special": true
     },
     "151648": {
-      "content": "<|box_start|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
-      "special": true
     },
     "151649": {
-      "content": "<|box_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
-      "special": true
     },
     "151650": {
       "content": "<|quad_start|>",
@@ -178,54 +178,6 @@
       "rstrip": false,
       "single_word": false,
       "special": false
-    },
-    "151665": {
-      "content": "<tool_response>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151666": {
-      "content": "</tool_response>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151667": {
-      "content": "<think>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151668": {
-      "content": "</think>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151669": {
-      "content": "<｜User｜>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151670": {
-      "content": "<｜Assistant｜>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
     }
   },
   "bos_token": "<｜begin▁of▁sentence｜>",
@@ -233,7 +185,7 @@
   "eos_token": "<｜end▁of▁sentence｜>",
   "extra_special_tokens": {},
   "legacy": true,
-  "model_max_length": 131072,
   "pad_token": "<｜end▁of▁sentence｜>",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizerFast",

 {
+  "add_bos_token": true,
   "add_eos_token": false,
   "add_prefix_space": null,
   "added_tokens_decoder": {
     "151643": {
+      "content": "<｜end▁of▁sentence｜>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "151644": {
+      "content": "<｜User｜>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
+      "special": false
     },
     "151645": {
+      "content": "<｜Assistant｜>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
+      "special": false
     },
     "151646": {
+      "content": "<｜begin▁of▁sentence｜>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "151647": {
+      "content": "<|EOT|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
+      "special": false
     },
     "151648": {
+      "content": "<think>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
+      "special": false
     },
     "151649": {
+      "content": "</think>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
+      "special": false
     },
     "151650": {
       "content": "<|quad_start|>",
       "rstrip": false,
       "single_word": false,
       "special": false
     }
   },
   "bos_token": "<｜begin▁of▁sentence｜>",
   "eos_token": "<｜end▁of▁sentence｜>",
   "extra_special_tokens": {},
   "legacy": true,
+  "model_max_length": 16384,
   "pad_token": "<｜end▁of▁sentence｜>",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizerFast",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e0dba18c4a672dc09bf531d64edb9630f48f4a5790f3ec82fcc90a7b3fa494a
 size 5777

 version https://git-lfs.github.com/spec/v1
+oid sha256:36d320f4ecd5f44d7b3a86fb3f73f4c175f658ad931311d7cdf1a170127a01f1
 size 5777