End of training

Browse files

Files changed (13) hide show

README.md +2 -2
adapter_config.json +7 -5
adapter_model.safetensors +1 -1
chat_template.jinja +6 -93
runs/Jul05_12-24-11_6f3bc00ffbbb/events.out.tfevents.1751718253.6f3bc00ffbbb.992408.1 +3 -0
runs/Jul05_12-25-31_6f3bc00ffbbb/events.out.tfevents.1751718333.6f3bc00ffbbb.1009849.0 +3 -0
runs/Jul05_12-26-49_6f3bc00ffbbb/events.out.tfevents.1751718411.6f3bc00ffbbb.1010251.0 +3 -0
runs/Jul05_12-33-11_6f3bc00ffbbb/events.out.tfevents.1751718793.6f3bc00ffbbb.1010817.0 +3 -0
runs/Jul05_13-22-52_6f3bc00ffbbb/events.out.tfevents.1751721778.6f3bc00ffbbb.1010817.1 +3 -0
runs/Jul05_13-23-44_6f3bc00ffbbb/events.out.tfevents.1751721828.6f3bc00ffbbb.1020559.0 +3 -0
runs/Jul05_15-03-35_d6f25f79222f/events.out.tfevents.1751727818.d6f25f79222f.1602.0 +3 -0
tokenizer.json +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -4,9 +4,9 @@ library_name: transformers
 model_name: SOC_Query_Generation_Base_Llama_3B
 tags:
 - generated_from_trainer
 - unsloth
 - trl
-- sft
 licence: license
 ---
@@ -36,7 +36,7 @@ This model was trained with SFT.
 ### Framework versions
 - TRL: 0.19.0
-- Transformers: 4.53.0
 - Pytorch: 2.7.0
 - Datasets: 3.6.0
 - Tokenizers: 0.21.2

 model_name: SOC_Query_Generation_Base_Llama_3B
 tags:
 - generated_from_trainer
+- sft
 - unsloth
 - trl
 licence: license
 ---
 ### Framework versions
 - TRL: 0.19.0
+- Transformers: 4.53.1
 - Pytorch: 2.7.0
 - Datasets: 3.6.0
 - Tokenizers: 0.21.2

adapter_config.json CHANGED Viewed

@@ -20,20 +20,22 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "up_proj",
-    "q_proj",
     "v_proj",
     "gate_proj",
-    "o_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": true
 }

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "qalora_group_size": 16,
   "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "up_proj",
     "gate_proj",
+    "down_proj",
+    "q_proj",
+    "k_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": true
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2288a4887f6a30f6e0411d0fc4e53474990c0f65d2c729a10aa43a72dcc3bd61
 size 194563400

 version https://git-lfs.github.com/spec/v1
+oid sha256:c052ed4af9e692030976574aed1b06f8ea1d03830d6a390dd0b23be035be2910
 size 194563400

chat_template.jinja CHANGED Viewed

@@ -1,93 +1,6 @@
-{{- bos_token }}
-{%- if custom_tools is defined %}
-    {%- set tools = custom_tools %}
-{%- endif %}
-{%- if not tools_in_user_message is defined %}
-    {%- set tools_in_user_message = true %}
-{%- endif %}
-{%- if not date_string is defined %}
-    {%- if strftime_now is defined %}
-        {%- set date_string = strftime_now("%d %b %Y") %}
-    {%- else %}
-        {%- set date_string = "26 Jul 2024" %}
-    {%- endif %}
-{%- endif %}
-{%- if not tools is defined %}
-    {%- set tools = none %}
-{%- endif %}
-{#- This block extracts the system message, so we can slot it into the right place. #}
-{%- if messages[0]['role'] == 'system' %}
-    {%- set system_message = messages[0]['content']|trim %}
-    {%- set messages = messages[1:] %}
-{%- else %}
-    {%- set system_message = "" %}
-{%- endif %}
-{#- System message #}
-{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
-{%- if tools is not none %}
-    {{- "Environment: ipython\n" }}
-{%- endif %}
-{{- "Cutting Knowledge Date: December 2023\n" }}
-{{- "Today Date: " + date_string + "\n\n" }}
-{%- if tools is not none and not tools_in_user_message %}
-    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
-    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
-    {{- "Do not use variables.\n\n" }}
-    {%- for t in tools %}
-        {{- t | tojson(indent=4) }}
-        {{- "\n\n" }}
-    {%- endfor %}
-{%- endif %}
-{{- system_message }}
-{{- "<|eot_id|>" }}
-{#- Custom tools are passed in a user message with some extra guidance #}
-{%- if tools_in_user_message and not tools is none %}
-    {#- Extract the first user message so we can plug it in here #}
-    {%- if messages | length != 0 %}
-        {%- set first_user_message = messages[0]['content']|trim %}
-        {%- set messages = messages[1:] %}
-    {%- else %}
-        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
-{%- endif %}
-    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
-    {{- "Given the following functions, please respond with a JSON for a function call " }}
-    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
-    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
-    {{- "Do not use variables.\n\n" }}
-    {%- for t in tools %}
-        {{- t | tojson(indent=4) }}
-        {{- "\n\n" }}
-    {%- endfor %}
-    {{- first_user_message + "<|eot_id|>"}}
-{%- endif %}
-{%- for message in messages %}
-    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
-        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
-    {%- elif 'tool_calls' in message %}
-        {%- if not message.tool_calls|length == 1 %}
-            {{- raise_exception("This model only supports single tool-calls at once!") }}
-        {%- endif %}
-        {%- set tool_call = message.tool_calls[0].function %}
-        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
-        {{- '{"name": "' + tool_call.name + '", ' }}
-        {{- '"parameters": ' }}
-        {{- tool_call.arguments | tojson }}
-        {{- "}" }}
-        {{- "<|eot_id|>" }}
-    {%- elif message.role == "tool" or message.role == "ipython" %}
-        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
-        {%- if message.content is mapping or message.content is iterable %}
-            {{- message.content | tojson }}
-        {%- else %}
-            {{- message.content }}
-        {%- endif %}
-        {{- "<|eot_id|>" }}
-    {%- endif %}
-{%- endfor %}
-{%- if add_generation_prompt %}
-    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
-{%- endif %}

+{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ '<|start_header_id|>system<|end_header_id|>
+' + messages[0]['content'] + '<|eot_id|>' }}{% set loop_messages = messages[1:] %}{% else %}{{ '<|start_header_id|>system<|end_header_id|>
+Below are some instructions that describe some tasks. Write responses that appropriately complete each request.<|eot_id|>' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>
+' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'assistant' %}{{ '<|start_header_id|>assistant<|end_header_id|>
+' + message['content'] + '<|eot_id|>' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>
+' }}{% endif %}

runs/Jul05_12-24-11_6f3bc00ffbbb/events.out.tfevents.1751718253.6f3bc00ffbbb.992408.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee351ca97b2b9dea9247922048fe2303ffd78e1710f007794732c382ffef8567
+size 4184

runs/Jul05_12-25-31_6f3bc00ffbbb/events.out.tfevents.1751718333.6f3bc00ffbbb.1009849.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e06d0be584f91a64d562bc48ec83f8ff7a744eeee2fad3c1845d4f16a70bd8be
+size 6385

runs/Jul05_12-26-49_6f3bc00ffbbb/events.out.tfevents.1751718411.6f3bc00ffbbb.1010251.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:361442619de483083ae470392b7c33806820860e1b6ca1e1e40e11de34a6101e
+size 6385

runs/Jul05_12-33-11_6f3bc00ffbbb/events.out.tfevents.1751718793.6f3bc00ffbbb.1010817.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5663ef07d6a11bb9ab955a9741eb8ef9d3758973a6be1787600a1977d16b964e
+size 48224

runs/Jul05_13-22-52_6f3bc00ffbbb/events.out.tfevents.1751721778.6f3bc00ffbbb.1010817.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d27b1876f7b3394da5efcec9c0133d46c4aa75f3376d8aa1beba14eeef7cf45a
+size 4184

runs/Jul05_13-23-44_6f3bc00ffbbb/events.out.tfevents.1751721828.6f3bc00ffbbb.1020559.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd0afb073bec7bf043bbd4471585e62efc3722a0cbc53fe8e41f5e0c6286a97f
+size 45549

runs/Jul05_15-03-35_d6f25f79222f/events.out.tfevents.1751727818.d6f25f79222f.1602.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd971086c32c96ace6b7ba424d34fea7428f5fb590404b31edee33a7459b8980
+size 48224

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
-size 17209920

 version https://git-lfs.github.com/spec/v1
+oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05
+size 17210099

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fd49190ff52b47206fb107f57073cdd9eac5d56c18d3725ab6437df7764c471
 size 6289

 version https://git-lfs.github.com/spec/v1
+oid sha256:00ebda2d38c636e70110ea16843ae22d5f255b0c34dff9dd6f6e7924e5333a50
 size 6289