LGAI-EXAONE
/

EXAONE-4.0-1.2B-GGUF

@@ -94,8 +94,8 @@ You can run EXAONE models locally using llama.cpp by following these steps:
 4. Generate result with greedy decoding.
     ```bash
-    llama-cli -m EXAONE-4.0-1.2B-GGUF-Q4_K_M.gguf \
-        -fa -ngl 64 \
         --temp 0.0 --top-k 1 \
         -f inputs.txt -no-cnv
     ```
@@ -107,12 +107,12 @@ You can run EXAONE models locally using llama.cpp by following these steps:
 3. Run llama-server with EXAONE 4.0 Jinja template. You can find the [chat template file](https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-1.2B-GGUF/blob/main/chat_template.jinja) in this repository.
     ```bash
-    llama-server -m EXAONE-4.0-32B-Q4_K_M.gguf \
-        -c 131072 -fa -ngl 64 \
         --temp 0.6 --top-p 0.95 \
         --jinja --chat-template-file chat_template.jinja \
         --host 0.0.0.0 --port 8820 \
-        -a EXAONE-4.0-32B-Q4_K_M
     ```
 4. Use OpenAI chat completion to test the GGUF model.
@@ -122,13 +122,14 @@ You can run EXAONE models locally using llama.cpp by following these steps:
     curl -X POST http://localhost:8820/v1/chat/completions \
         -H "Content-Type: application/json" \
         -d '{
-            "model": "EXAONE-4.0-32B-Q4_K_M",
             "messages": [
                 {"role": "user", "content": "Let'\''s work together on server!"}
             ],
             "max_tokens": 1024,
             "temperature": 0.6,
-            "top_p": 0.95
         }'
     ```

 4. Generate result with greedy decoding.
     ```bash
+    llama-cli -m EXAONE-4.0-1.2B-Q4_K_M.gguf \
+        -fa -ngl 31 \
         --temp 0.0 --top-k 1 \
         -f inputs.txt -no-cnv
     ```
 3. Run llama-server with EXAONE 4.0 Jinja template. You can find the [chat template file](https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-1.2B-GGUF/blob/main/chat_template.jinja) in this repository.
     ```bash
+    llama-server -m EXAONE-4.0-1.2B-Q4_K_M.gguf \
+        -c 131072 -fa -ngl 31 \
         --temp 0.6 --top-p 0.95 \
         --jinja --chat-template-file chat_template.jinja \
         --host 0.0.0.0 --port 8820 \
+        -a EXAONE-4.0-1.2B-Q4_K_M
     ```
 4. Use OpenAI chat completion to test the GGUF model.
     curl -X POST http://localhost:8820/v1/chat/completions \
         -H "Content-Type: application/json" \
         -d '{
+            "model": "EXAONE-4.0-1.2B-Q4_K_M",
             "messages": [
                 {"role": "user", "content": "Let'\''s work together on server!"}
             ],
             "max_tokens": 1024,
             "temperature": 0.6,
+            "top_p": 0.95,
+            "chat_template_kwargs": {"enable_thinking": false}
         }'
     ```

chat_template.jinja CHANGED Viewed

@@ -17,7 +17,7 @@
     {{- "\nHere are the tools available to you in JSON format within <tool> and </tool> tags:\n" }}
     {%- for tool in tools %}
         {{- "<tool>" }}
-        {{- tool | safe }}
         {{- "</tool>\n" }}
     {%- endfor %}
@@ -38,7 +38,7 @@
 {%- for i in range(messages | length) %}
     {%- set msg = messages[i] %}
     {%- set role = msg.role %}
-    {% if role is not none and role.class is not none and role not in role_indicators %}
         {{- raise_exception('Unknown role: ' ~ role) }}
     {%- endif %}
@@ -51,17 +51,17 @@
             {%- endif %}
             {{- end_of_turn -}}
             {%- continue %}
-        {%- elif tools is defined and tools %}
             {{- role_indicators['system'] }}
             {{- available_tools(tools) }}
-            {{- end_of_turn -}}
         {%- endif %}
     {%- endif %}
     {%- if role == 'assistant' %}
         {{- role_indicators['assistant'] }}
-        {%- if msg.content %}
             {%- if "</think>" in msg.content %}
                 {%- set content = msg.content.split('</think>')[-1].strip() %}
                 {%- set reasoning_content = msg.content.split('</think>')[0].strip() %}
@@ -105,7 +105,7 @@
                     {{- raise_exception('arguments or parameters are mandatory: ' ~ tool_call) }}
                 {%- endif %}
-                {{- "<tool_call>" }}{"name": "{{- tool_call.name }}", "arguments": {{ arguments | safe }}}{{- "</tool_call>" }}
                 {%- if not loop.last %}
                     {{- "\n" }}
@@ -119,8 +119,8 @@
         {%- if i == 0 or messages[i - 1].role != "tool" %}
             {{- role_indicators['tool'] }}
         {%- endif %}
-        {%- if msg.content is defined %}
-            {{- "<tool_result>" }}{"result": {{ msg.content | safe }}}{{- "</tool_result>" }}
         {%- endif %}
         {%- if loop.last or messages[i + 1].role != "tool" %}
             {{- end_of_turn -}}

     {{- "\nHere are the tools available to you in JSON format within <tool> and </tool> tags:\n" }}
     {%- for tool in tools %}
         {{- "<tool>" }}
+        {{- tool | tojson | safe }}
         {{- "</tool>\n" }}
     {%- endfor %}
 {%- for i in range(messages | length) %}
     {%- set msg = messages[i] %}
     {%- set role = msg.role %}
+    {% if role is not none and role.class is not none and not role in role_indicators %}
         {{- raise_exception('Unknown role: ' ~ role) }}
     {%- endif %}
             {%- endif %}
             {{- end_of_turn -}}
             {%- continue %}
+        {%- elif tools is defined and tools %}
             {{- role_indicators['system'] }}
             {{- available_tools(tools) }}
+            {{- end_of_turn -}}
         {%- endif %}
     {%- endif %}
     {%- if role == 'assistant' %}
         {{- role_indicators['assistant'] }}
+        {%- if msg.content %}
             {%- if "</think>" in msg.content %}
                 {%- set content = msg.content.split('</think>')[-1].strip() %}
                 {%- set reasoning_content = msg.content.split('</think>')[0].strip() %}
                     {{- raise_exception('arguments or parameters are mandatory: ' ~ tool_call) }}
                 {%- endif %}
+                {{- "<tool_call>" }}{"name": "{{- tool_call.name }}", "arguments": {{ arguments | tojson | safe }}}{{- "</tool_call>" }}
                 {%- if not loop.last %}
                     {{- "\n" }}
         {%- if i == 0 or messages[i - 1].role != "tool" %}
             {{- role_indicators['tool'] }}
         {%- endif %}
+        {%- if msg.content is defined %}
+            {{- "<tool_result>" }}{"result": {{ msg.content | tojson | safe }}}{{- "</tool_result>" }}
         {%- endif %}
         {%- if loop.last or messages[i + 1].role != "tool" %}
             {{- end_of_turn -}}