Upload processor
Browse files- chat_template.jinja +17 -85
- special_tokens_map.json +0 -2
- tokenizer.json +2 -2
- tokenizer_config.json +2 -4
chat_template.jinja
CHANGED
@@ -6,114 +6,46 @@
|
|
6 |
{%- if messages[0]['role'] == 'system' %}
|
7 |
{%- if messages[0]['content'] is string %}
|
8 |
{%- set system_message = messages[0]['content'] %}
|
9 |
-
{%- set loop_messages = messages[1:] %}
|
10 |
{%- else %}
|
11 |
{%- set system_message = messages[0]['content'][0]['text'] %}
|
12 |
-
{%- set loop_messages = messages[1:] %}
|
13 |
{%- endif %}
|
|
|
14 |
{%- else %}
|
15 |
{%- set system_message = default_system_message %}
|
16 |
{%- set loop_messages = messages %}
|
17 |
{%- endif %}
|
18 |
-
{%- if not tools is defined %}
|
19 |
-
{%- set tools = none %}
|
20 |
-
{%- elif tools is not none %}
|
21 |
-
{%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. If you call one or more tools, format them in a single JSON array or objects, where each object is a tool call, not as separate objects outside of an array or multiple arrays. Use the format [{\"name\": tool call name, \"arguments\": tool call arguments}, additional tool calls] if you call more than one tool. If you call tools, do not attempt to interpret them or otherwise provide a response until you receive a tool call result that you can interpret for the user." %}
|
22 |
-
{%- if system_message is defined %}
|
23 |
-
{%- set system_message = system_message + "\n\n" + parallel_tool_prompt %}
|
24 |
-
{%- else %}
|
25 |
-
{%- set system_message = parallel_tool_prompt %}
|
26 |
-
{%- endif %}
|
27 |
-
{%- endif %}
|
28 |
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
29 |
|
30 |
-
{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
|
31 |
-
|
32 |
-
{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %}
|
33 |
-
{%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}
|
34 |
-
{{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
|
35 |
-
{%- endif %}
|
36 |
-
{%- endfor %}
|
37 |
-
|
38 |
{%- for message in loop_messages %}
|
39 |
-
{%- if message[
|
40 |
-
{%- if
|
41 |
-
{{-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
{%-
|
46 |
-
{
|
47 |
-
|
48 |
-
{
|
49 |
-
{{- '"' + key + '": ' + val|tojson }}
|
50 |
-
{%- endif %}
|
51 |
-
{%- if not loop.last %}
|
52 |
-
{{- ", " }}
|
53 |
-
{%- endif %}
|
54 |
-
{%- endfor %}
|
55 |
-
{{- "}}" }}
|
56 |
-
{%- if not loop.last %}
|
57 |
-
{{- ", " }}
|
58 |
{%- else %}
|
59 |
-
{{-
|
60 |
{%- endif %}
|
61 |
{%- endfor %}
|
62 |
-
{{-
|
63 |
{%- endif %}
|
|
|
64 |
{%- if message['content'] is string %}
|
65 |
-
|
66 |
-
{%- else %}
|
67 |
-
{{- '[INST]' }}
|
68 |
-
{%- for block in message['content'] %}
|
69 |
-
{%- if block['type'] == 'text' %}
|
70 |
-
{{- block['text'] }}
|
71 |
-
{%- elif block['type'] == 'image' or block['type'] == 'image_url' %}
|
72 |
-
{{- '[IMG]' }}
|
73 |
-
{%- else %}
|
74 |
-
{{- raise_exception('Only text and image blocks are supported in message content!') }}
|
75 |
-
{%- endif %}
|
76 |
-
{%- endfor %}
|
77 |
-
{{- '[/INST]' }}
|
78 |
-
{%- endif %}
|
79 |
-
{%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}
|
80 |
-
{%- if message.tool_calls is defined %}
|
81 |
-
{%- set tool_calls = message.tool_calls %}
|
82 |
{%- else %}
|
83 |
-
{
|
84 |
{%- endif %}
|
85 |
-
{{- "[TOOL_CALLS] [" }}
|
86 |
-
{%- for tool_call in tool_calls %}
|
87 |
-
{%- set out = tool_call.function|tojson %}
|
88 |
-
{{- out[:-1] }}
|
89 |
-
{%- if not tool_call.id is defined or tool_call.id|length < 9 %}
|
90 |
-
{{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }}
|
91 |
-
{%- endif %}
|
92 |
-
{{- ', "id": "' + tool_call.id[-9:] + '"}' }}
|
93 |
-
{%- if not loop.last %}
|
94 |
-
{{- ", " }}
|
95 |
-
{%- else %}
|
96 |
-
{{- "]" + eos_token }}
|
97 |
-
{%- endif %}
|
98 |
-
{%- endfor %}
|
99 |
{%- elif message['role'] == 'assistant' %}
|
100 |
{%- if message['content'] is string %}
|
101 |
{{- message['content'] + eos_token }}
|
102 |
{%- else %}
|
103 |
{{- message['content'][0]['text'] + eos_token }}
|
104 |
{%- endif %}
|
105 |
-
{%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
|
106 |
-
{%- if message.content is defined and message.content.content is defined %}
|
107 |
-
{%- set content = message.content.content %}
|
108 |
-
{%- else %}
|
109 |
-
{%- set content = message.content %}
|
110 |
-
{%- endif %}
|
111 |
-
{{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
|
112 |
-
{%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %}
|
113 |
-
{{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }}
|
114 |
-
{%- endif %}
|
115 |
-
{{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }}
|
116 |
{%- else %}
|
117 |
-
{{- raise_exception(
|
118 |
{%- endif %}
|
119 |
{%- endfor %}
|
|
|
6 |
{%- if messages[0]['role'] == 'system' %}
|
7 |
{%- if messages[0]['content'] is string %}
|
8 |
{%- set system_message = messages[0]['content'] %}
|
|
|
9 |
{%- else %}
|
10 |
{%- set system_message = messages[0]['content'][0]['text'] %}
|
|
|
11 |
{%- endif %}
|
12 |
+
{%- set loop_messages = messages[1:] %}
|
13 |
{%- else %}
|
14 |
{%- set system_message = default_system_message %}
|
15 |
{%- set loop_messages = messages %}
|
16 |
{%- endif %}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
{%- for message in loop_messages %}
|
20 |
+
{%- if message['role'] == 'user' %}
|
21 |
+
{%- if message['content'] is string %}
|
22 |
+
{{- '[INST]' + message['content'] + '[/INST]' }}
|
23 |
+
{%- else %}
|
24 |
+
{{- '[INST]' }}
|
25 |
+
{%- for block in message['content'] %}
|
26 |
+
{%- if block['type'] == 'text' %}
|
27 |
+
{{- block['text'] }}
|
28 |
+
{%- elif block['type'] in ['image', 'image_url'] %}
|
29 |
+
{{- '[IMG]' }}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
{%- else %}
|
31 |
+
{{- raise_exception('Only text and image blocks are supported in message content!') }}
|
32 |
{%- endif %}
|
33 |
{%- endfor %}
|
34 |
+
{{- '[/INST]' }}
|
35 |
{%- endif %}
|
36 |
+
{%- elif message['role'] == 'system' %}
|
37 |
{%- if message['content'] is string %}
|
38 |
+
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
{%- else %}
|
40 |
+
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
41 |
{%- endif %}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
{%- elif message['role'] == 'assistant' %}
|
43 |
{%- if message['content'] is string %}
|
44 |
{{- message['content'] + eos_token }}
|
45 |
{%- else %}
|
46 |
{{- message['content'][0]['text'] + eos_token }}
|
47 |
{%- endif %}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
{%- else %}
|
49 |
+
{{- raise_exception('Only user, system and assistant roles are supported!') }}
|
50 |
{%- endif %}
|
51 |
{%- endfor %}
|
special_tokens_map.json
CHANGED
@@ -20,8 +20,6 @@
|
|
20 |
"[SYSTEM_PROMPT]",
|
21 |
"[/SYSTEM_PROMPT]",
|
22 |
"[TOOL_CONTENT]",
|
23 |
-
"<think>",
|
24 |
-
"</think>",
|
25 |
"<SPECIAL_22>",
|
26 |
"<SPECIAL_23>",
|
27 |
"<SPECIAL_24>",
|
|
|
20 |
"[SYSTEM_PROMPT]",
|
21 |
"[/SYSTEM_PROMPT]",
|
22 |
"[TOOL_CONTENT]",
|
|
|
|
|
23 |
"<SPECIAL_22>",
|
24 |
"<SPECIAL_23>",
|
25 |
"<SPECIAL_24>",
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24f2d27f4699b47d7e5dfa65648c204844bd17b9e81c30733e1cbd55e5385da7
|
3 |
+
size 17078021
|
tokenizer_config.json
CHANGED
@@ -169,7 +169,7 @@
|
|
169 |
"normalized": false,
|
170 |
"rstrip": false,
|
171 |
"single_word": false,
|
172 |
-
"special":
|
173 |
},
|
174 |
"21": {
|
175 |
"content": "</think>",
|
@@ -177,7 +177,7 @@
|
|
177 |
"normalized": false,
|
178 |
"rstrip": false,
|
179 |
"single_word": false,
|
180 |
-
"special":
|
181 |
},
|
182 |
"22": {
|
183 |
"content": "<SPECIAL_22>",
|
@@ -8025,8 +8025,6 @@
|
|
8025 |
"[SYSTEM_PROMPT]",
|
8026 |
"[/SYSTEM_PROMPT]",
|
8027 |
"[TOOL_CONTENT]",
|
8028 |
-
"<think>",
|
8029 |
-
"</think>",
|
8030 |
"<SPECIAL_22>",
|
8031 |
"<SPECIAL_23>",
|
8032 |
"<SPECIAL_24>",
|
|
|
169 |
"normalized": false,
|
170 |
"rstrip": false,
|
171 |
"single_word": false,
|
172 |
+
"special": false
|
173 |
},
|
174 |
"21": {
|
175 |
"content": "</think>",
|
|
|
177 |
"normalized": false,
|
178 |
"rstrip": false,
|
179 |
"single_word": false,
|
180 |
+
"special": false
|
181 |
},
|
182 |
"22": {
|
183 |
"content": "<SPECIAL_22>",
|
|
|
8025 |
"[SYSTEM_PROMPT]",
|
8026 |
"[/SYSTEM_PROMPT]",
|
8027 |
"[TOOL_CONTENT]",
|
|
|
|
|
8028 |
"<SPECIAL_22>",
|
8029 |
"<SPECIAL_23>",
|
8030 |
"<SPECIAL_24>",
|