End of training
Browse files- README.md +2 -2
- adapter_config.json +7 -5
- adapter_model.safetensors +1 -1
- chat_template.jinja +6 -93
- runs/Jul05_12-24-11_6f3bc00ffbbb/events.out.tfevents.1751718253.6f3bc00ffbbb.992408.1 +3 -0
- runs/Jul05_12-25-31_6f3bc00ffbbb/events.out.tfevents.1751718333.6f3bc00ffbbb.1009849.0 +3 -0
- runs/Jul05_12-26-49_6f3bc00ffbbb/events.out.tfevents.1751718411.6f3bc00ffbbb.1010251.0 +3 -0
- runs/Jul05_12-33-11_6f3bc00ffbbb/events.out.tfevents.1751718793.6f3bc00ffbbb.1010817.0 +3 -0
- runs/Jul05_13-22-52_6f3bc00ffbbb/events.out.tfevents.1751721778.6f3bc00ffbbb.1010817.1 +3 -0
- runs/Jul05_13-23-44_6f3bc00ffbbb/events.out.tfevents.1751721828.6f3bc00ffbbb.1020559.0 +3 -0
- runs/Jul05_15-03-35_d6f25f79222f/events.out.tfevents.1751727818.d6f25f79222f.1602.0 +3 -0
- tokenizer.json +2 -2
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -4,9 +4,9 @@ library_name: transformers
|
|
| 4 |
model_name: SOC_Query_Generation_Base_Llama_3B
|
| 5 |
tags:
|
| 6 |
- generated_from_trainer
|
|
|
|
| 7 |
- unsloth
|
| 8 |
- trl
|
| 9 |
-
- sft
|
| 10 |
licence: license
|
| 11 |
---
|
| 12 |
|
|
@@ -36,7 +36,7 @@ This model was trained with SFT.
|
|
| 36 |
### Framework versions
|
| 37 |
|
| 38 |
- TRL: 0.19.0
|
| 39 |
-
- Transformers: 4.53.
|
| 40 |
- Pytorch: 2.7.0
|
| 41 |
- Datasets: 3.6.0
|
| 42 |
- Tokenizers: 0.21.2
|
|
|
|
| 4 |
model_name: SOC_Query_Generation_Base_Llama_3B
|
| 5 |
tags:
|
| 6 |
- generated_from_trainer
|
| 7 |
+
- sft
|
| 8 |
- unsloth
|
| 9 |
- trl
|
|
|
|
| 10 |
licence: license
|
| 11 |
---
|
| 12 |
|
|
|
|
| 36 |
### Framework versions
|
| 37 |
|
| 38 |
- TRL: 0.19.0
|
| 39 |
+
- Transformers: 4.53.1
|
| 40 |
- Pytorch: 2.7.0
|
| 41 |
- Datasets: 3.6.0
|
| 42 |
- Tokenizers: 0.21.2
|
adapter_config.json
CHANGED
|
@@ -20,20 +20,22 @@
|
|
| 20 |
"megatron_core": "megatron.core",
|
| 21 |
"modules_to_save": null,
|
| 22 |
"peft_type": "LORA",
|
|
|
|
| 23 |
"r": 32,
|
| 24 |
"rank_pattern": {},
|
| 25 |
"revision": null,
|
| 26 |
"target_modules": [
|
| 27 |
-
"down_proj",
|
| 28 |
-
"up_proj",
|
| 29 |
-
"q_proj",
|
| 30 |
"v_proj",
|
|
|
|
| 31 |
"gate_proj",
|
| 32 |
-
"
|
| 33 |
-
"
|
|
|
|
|
|
|
| 34 |
],
|
| 35 |
"task_type": "CAUSAL_LM",
|
| 36 |
"trainable_token_indices": null,
|
| 37 |
"use_dora": false,
|
|
|
|
| 38 |
"use_rslora": true
|
| 39 |
}
|
|
|
|
| 20 |
"megatron_core": "megatron.core",
|
| 21 |
"modules_to_save": null,
|
| 22 |
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
"r": 32,
|
| 25 |
"rank_pattern": {},
|
| 26 |
"revision": null,
|
| 27 |
"target_modules": [
|
|
|
|
|
|
|
|
|
|
| 28 |
"v_proj",
|
| 29 |
+
"up_proj",
|
| 30 |
"gate_proj",
|
| 31 |
+
"down_proj",
|
| 32 |
+
"q_proj",
|
| 33 |
+
"k_proj",
|
| 34 |
+
"o_proj"
|
| 35 |
],
|
| 36 |
"task_type": "CAUSAL_LM",
|
| 37 |
"trainable_token_indices": null,
|
| 38 |
"use_dora": false,
|
| 39 |
+
"use_qalora": false,
|
| 40 |
"use_rslora": true
|
| 41 |
}
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 194563400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c052ed4af9e692030976574aed1b06f8ea1d03830d6a390dd0b23be035be2910
|
| 3 |
size 194563400
|
chat_template.jinja
CHANGED
|
@@ -1,93 +1,6 @@
|
|
| 1 |
-
{{
|
| 2 |
-
{
|
| 3 |
-
|
| 4 |
-
{
|
| 5 |
-
{
|
| 6 |
-
|
| 7 |
-
{%- endif %}
|
| 8 |
-
{%- if not date_string is defined %}
|
| 9 |
-
{%- if strftime_now is defined %}
|
| 10 |
-
{%- set date_string = strftime_now("%d %b %Y") %}
|
| 11 |
-
{%- else %}
|
| 12 |
-
{%- set date_string = "26 Jul 2024" %}
|
| 13 |
-
{%- endif %}
|
| 14 |
-
{%- endif %}
|
| 15 |
-
{%- if not tools is defined %}
|
| 16 |
-
{%- set tools = none %}
|
| 17 |
-
{%- endif %}
|
| 18 |
-
|
| 19 |
-
{#- This block extracts the system message, so we can slot it into the right place. #}
|
| 20 |
-
{%- if messages[0]['role'] == 'system' %}
|
| 21 |
-
{%- set system_message = messages[0]['content']|trim %}
|
| 22 |
-
{%- set messages = messages[1:] %}
|
| 23 |
-
{%- else %}
|
| 24 |
-
{%- set system_message = "" %}
|
| 25 |
-
{%- endif %}
|
| 26 |
-
|
| 27 |
-
{#- System message #}
|
| 28 |
-
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
|
| 29 |
-
{%- if tools is not none %}
|
| 30 |
-
{{- "Environment: ipython\n" }}
|
| 31 |
-
{%- endif %}
|
| 32 |
-
{{- "Cutting Knowledge Date: December 2023\n" }}
|
| 33 |
-
{{- "Today Date: " + date_string + "\n\n" }}
|
| 34 |
-
{%- if tools is not none and not tools_in_user_message %}
|
| 35 |
-
{{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
|
| 36 |
-
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
|
| 37 |
-
{{- "Do not use variables.\n\n" }}
|
| 38 |
-
{%- for t in tools %}
|
| 39 |
-
{{- t | tojson(indent=4) }}
|
| 40 |
-
{{- "\n\n" }}
|
| 41 |
-
{%- endfor %}
|
| 42 |
-
{%- endif %}
|
| 43 |
-
{{- system_message }}
|
| 44 |
-
{{- "<|eot_id|>" }}
|
| 45 |
-
|
| 46 |
-
{#- Custom tools are passed in a user message with some extra guidance #}
|
| 47 |
-
{%- if tools_in_user_message and not tools is none %}
|
| 48 |
-
{#- Extract the first user message so we can plug it in here #}
|
| 49 |
-
{%- if messages | length != 0 %}
|
| 50 |
-
{%- set first_user_message = messages[0]['content']|trim %}
|
| 51 |
-
{%- set messages = messages[1:] %}
|
| 52 |
-
{%- else %}
|
| 53 |
-
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
|
| 54 |
-
{%- endif %}
|
| 55 |
-
{{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
|
| 56 |
-
{{- "Given the following functions, please respond with a JSON for a function call " }}
|
| 57 |
-
{{- "with its proper arguments that best answers the given prompt.\n\n" }}
|
| 58 |
-
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
|
| 59 |
-
{{- "Do not use variables.\n\n" }}
|
| 60 |
-
{%- for t in tools %}
|
| 61 |
-
{{- t | tojson(indent=4) }}
|
| 62 |
-
{{- "\n\n" }}
|
| 63 |
-
{%- endfor %}
|
| 64 |
-
{{- first_user_message + "<|eot_id|>"}}
|
| 65 |
-
{%- endif %}
|
| 66 |
-
|
| 67 |
-
{%- for message in messages %}
|
| 68 |
-
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
|
| 69 |
-
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
|
| 70 |
-
{%- elif 'tool_calls' in message %}
|
| 71 |
-
{%- if not message.tool_calls|length == 1 %}
|
| 72 |
-
{{- raise_exception("This model only supports single tool-calls at once!") }}
|
| 73 |
-
{%- endif %}
|
| 74 |
-
{%- set tool_call = message.tool_calls[0].function %}
|
| 75 |
-
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
|
| 76 |
-
{{- '{"name": "' + tool_call.name + '", ' }}
|
| 77 |
-
{{- '"parameters": ' }}
|
| 78 |
-
{{- tool_call.arguments | tojson }}
|
| 79 |
-
{{- "}" }}
|
| 80 |
-
{{- "<|eot_id|>" }}
|
| 81 |
-
{%- elif message.role == "tool" or message.role == "ipython" %}
|
| 82 |
-
{{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
|
| 83 |
-
{%- if message.content is mapping or message.content is iterable %}
|
| 84 |
-
{{- message.content | tojson }}
|
| 85 |
-
{%- else %}
|
| 86 |
-
{{- message.content }}
|
| 87 |
-
{%- endif %}
|
| 88 |
-
{{- "<|eot_id|>" }}
|
| 89 |
-
{%- endif %}
|
| 90 |
-
{%- endfor %}
|
| 91 |
-
{%- if add_generation_prompt %}
|
| 92 |
-
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
|
| 93 |
-
{%- endif %}
|
|
|
|
| 1 |
+
{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ '<|start_header_id|>system<|end_header_id|>
|
| 2 |
+
' + messages[0]['content'] + '<|eot_id|>' }}{% set loop_messages = messages[1:] %}{% else %}{{ '<|start_header_id|>system<|end_header_id|>
|
| 3 |
+
Below are some instructions that describe some tasks. Write responses that appropriately complete each request.<|eot_id|>' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>
|
| 4 |
+
' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'assistant' %}{{ '<|start_header_id|>assistant<|end_header_id|>
|
| 5 |
+
' + message['content'] + '<|eot_id|>' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>
|
| 6 |
+
' }}{% endif %}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
runs/Jul05_12-24-11_6f3bc00ffbbb/events.out.tfevents.1751718253.6f3bc00ffbbb.992408.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee351ca97b2b9dea9247922048fe2303ffd78e1710f007794732c382ffef8567
|
| 3 |
+
size 4184
|
runs/Jul05_12-25-31_6f3bc00ffbbb/events.out.tfevents.1751718333.6f3bc00ffbbb.1009849.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e06d0be584f91a64d562bc48ec83f8ff7a744eeee2fad3c1845d4f16a70bd8be
|
| 3 |
+
size 6385
|
runs/Jul05_12-26-49_6f3bc00ffbbb/events.out.tfevents.1751718411.6f3bc00ffbbb.1010251.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:361442619de483083ae470392b7c33806820860e1b6ca1e1e40e11de34a6101e
|
| 3 |
+
size 6385
|
runs/Jul05_12-33-11_6f3bc00ffbbb/events.out.tfevents.1751718793.6f3bc00ffbbb.1010817.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5663ef07d6a11bb9ab955a9741eb8ef9d3758973a6be1787600a1977d16b964e
|
| 3 |
+
size 48224
|
runs/Jul05_13-22-52_6f3bc00ffbbb/events.out.tfevents.1751721778.6f3bc00ffbbb.1010817.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d27b1876f7b3394da5efcec9c0133d46c4aa75f3376d8aa1beba14eeef7cf45a
|
| 3 |
+
size 4184
|
runs/Jul05_13-23-44_6f3bc00ffbbb/events.out.tfevents.1751721828.6f3bc00ffbbb.1020559.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd0afb073bec7bf043bbd4471585e62efc3722a0cbc53fe8e41f5e0c6286a97f
|
| 3 |
+
size 45549
|
runs/Jul05_15-03-35_d6f25f79222f/events.out.tfevents.1751727818.d6f25f79222f.1602.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd971086c32c96ace6b7ba424d34fea7428f5fb590404b31edee33a7459b8980
|
| 3 |
+
size 48224
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05
|
| 3 |
+
size 17210099
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6289
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00ebda2d38c636e70110ea16843ae22d5f255b0c34dff9dd6f6e7924e5333a50
|
| 3 |
size 6289
|