danielhanchen commited on
Commit
5ea9902
·
verified ·
1 Parent(s): 5998403

Add files using upload-large-folder tool

Browse files
Files changed (5) hide show
  1. README.md +5 -3
  2. added_tokens.json +4 -0
  3. config.json +1 -1
  4. tokenizer.json +2 -2
  5. tokenizer_config.json +32 -0
README.md CHANGED
@@ -3,8 +3,10 @@ tags:
3
  - unsloth
4
  base_model:
5
  - Qwen/Qwen3-30B-A3B-Base
 
 
6
  ---
7
- # Qwen3-30B-A3B
8
 
9
  ## Qwen3 Highlights
10
 
@@ -18,9 +20,9 @@ Building upon extensive advancements in training data, model architecture, and o
18
 
19
  ## Model Overview
20
 
21
- **Qwen3-30B-A3B** has the following features:
22
  - Type: Causal Language Models
23
- - Training Stage: Pretraining & Post-training
24
  - Number of Parameters: 30.5B in total and 3.3B activated
25
  - Number of Paramaters (Non-Embedding): 29.9B
26
  - Number of Layers: 48
 
3
  - unsloth
4
  base_model:
5
  - Qwen/Qwen3-30B-A3B-Base
6
+ license: apache-2.0
7
+ library_name: transformers
8
  ---
9
+ # Qwen3-30B-A3B-Base
10
 
11
  ## Qwen3 Highlights
12
 
 
20
 
21
  ## Model Overview
22
 
23
+ **Qwen3-30B-A3B-Base** has the following features:
24
  - Type: Causal Language Models
25
+ - Training Stage: Pretraining
26
  - Number of Parameters: 30.5B in total and 3.3B activated
27
  - Number of Paramaters (Non-Embedding): 29.9B
28
  - Number of Layers: 48
added_tokens.json CHANGED
@@ -1,6 +1,10 @@
1
  {
 
2
  "</tool_call>": 151658,
 
 
3
  "<tool_call>": 151657,
 
4
  "<|box_end|>": 151649,
5
  "<|box_start|>": 151648,
6
  "<|endoftext|>": 151643,
 
1
  {
2
+ "</think>": 151668,
3
  "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
  "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
  "<|box_end|>": 151649,
9
  "<|box_start|>": 151648,
10
  "<|endoftext|>": 151643,
config.json CHANGED
@@ -10,7 +10,7 @@
10
  "hidden_act": "silu",
11
  "hidden_size": 2048,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 8192,
14
  "max_position_embeddings": 32768,
15
  "max_window_layers": 48,
16
  "mlp_only_layers": [],
 
10
  "hidden_act": "silu",
11
  "hidden_size": 2048,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 6144,
14
  "max_position_embeddings": 32768,
15
  "max_window_layers": 48,
16
  "mlp_only_layers": [],
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
- size 11421896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json CHANGED
@@ -177,6 +177,38 @@
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  }
181
  },
182
  "additional_special_tokens": [
 
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
  }
213
  },
214
  "additional_special_tokens": [